osfmk/vm/vm_map.c

   1 /*
   2  * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_map.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      Virtual memory mapping module.
  64  */
  65
  66 #include <task_swapper.h>
  67 #include <mach_assert.h>
  68
  69 #include <vm/vm_options.h>
  70
  71 #include <libkern/OSAtomic.h>
  72
  73 #include <mach/kern_return.h>
  74 #include <mach/port.h>
  75 #include <mach/vm_attributes.h>
  76 #include <mach/vm_param.h>
  77 #include <mach/vm_behavior.h>
  78 #include <mach/vm_statistics.h>
  79 #include <mach/memory_object.h>
  80 #include <mach/mach_vm.h>
  81 #include <machine/cpu_capabilities.h>
  82 #include <mach/sdt.h>
  83
  84 #include <kern/assert.h>
  85 #include <kern/backtrace.h>
  86 #include <kern/counters.h>
  87 #include <kern/exc_guard.h>
  88 #include <kern/kalloc.h>
  89 #include <kern/zalloc_internal.h>
  90
  91 #include <vm/cpm.h>
  92 #include <vm/vm_compressor.h>
  93 #include <vm/vm_compressor_pager.h>
  94 #include <vm/vm_init.h>
  95 #include <vm/vm_fault.h>
  96 #include <vm/vm_map.h>
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>
 101 #include <vm/vm_kern.h>
 102 #include <ipc/ipc_port.h>
 103 #include <kern/sched_prim.h>
 104 #include <kern/misc_protos.h>
 105
 106 #include <mach/vm_map_server.h>
 107 #include <mach/mach_host_server.h>
 108 #include <vm/vm_protos.h>
 109 #include <vm/vm_purgeable_internal.h>
 110
 111 #include <vm/vm_protos.h>
 112 #include <vm/vm_shared_region.h>
 113 #include <vm/vm_map_store.h>
 114
 115 #include <san/kasan.h>
 116
 117 #include <sys/codesign.h>
 118 #include <sys/mman.h>
 119
 120 #include <libkern/section_keywords.h>
 121 #if DEVELOPMENT || DEBUG
 122 extern int proc_selfcsflags(void);
 123 int panic_on_unsigned_execute = 0;
 124 #endif /* DEVELOPMENT || DEBUG */
 125
 126 #if MACH_ASSERT
 127 int debug4k_filter = 0;
 128 char debug4k_proc_name[1024] = "";
 129 int debug4k_proc_filter = (int)-1 & ~(1 << __DEBUG4K_FAULT);
 130 int debug4k_panic_on_misaligned_sharing = 0;
 131 const char *debug4k_category_name[] = {
 132         "error",        /* 0 */
 133         "life",         /* 1 */
 134         "load",         /* 2 */
 135         "fault",        /* 3 */
 136         "copy",         /* 4 */
 137         "share",        /* 5 */
 138         "adjust",       /* 6 */
 139         "pmap",         /* 7 */
 140         "mementry",     /* 8 */
 141         "iokit",        /* 9 */
 142         "upl",          /* 10 */
 143         "exc",          /* 11 */
 144         "vfs"           /* 12 */
 145 };
 146 #endif /* MACH_ASSERT */
 147 int debug4k_no_cow_copyin = 0;
 148
 149
 150 #if __arm64__
 151 extern const int fourk_binary_compatibility_unsafe;
 152 extern const int fourk_binary_compatibility_allow_wx;
 153 #endif /* __arm64__ */
 154 extern int proc_selfpid(void);
 155 extern char *proc_name_address(void *p);
 156
 157 #if VM_MAP_DEBUG_APPLE_PROTECT
 158 int vm_map_debug_apple_protect = 0;
 159 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 160 #if VM_MAP_DEBUG_FOURK
 161 int vm_map_debug_fourk = 0;
 162 #endif /* VM_MAP_DEBUG_FOURK */
 163
 164 SECURITY_READ_ONLY_LATE(int) vm_map_executable_immutable = 1;
 165 int vm_map_executable_immutable_verbose = 0;
 166
 167 os_refgrp_decl(static, map_refgrp, "vm_map", NULL);
 168
 169 extern u_int32_t random(void);  /* from <libkern/libkern.h> */
 170 /* Internal prototypes
 171  */
 172
 173 static void vm_map_simplify_range(
 174         vm_map_t        map,
 175         vm_map_offset_t start,
 176         vm_map_offset_t end);   /* forward */
 177
 178 static boolean_t        vm_map_range_check(
 179         vm_map_t        map,
 180         vm_map_offset_t start,
 181         vm_map_offset_t end,
 182         vm_map_entry_t  *entry);
 183
 184 static vm_map_entry_t   _vm_map_entry_create(
 185         struct vm_map_header    *map_header, boolean_t map_locked);
 186
 187 static void             _vm_map_entry_dispose(
 188         struct vm_map_header    *map_header,
 189         vm_map_entry_t          entry);
 190
 191 static void             vm_map_pmap_enter(
 192         vm_map_t                map,
 193         vm_map_offset_t         addr,
 194         vm_map_offset_t         end_addr,
 195         vm_object_t             object,
 196         vm_object_offset_t      offset,
 197         vm_prot_t               protection);
 198
 199 static void             _vm_map_clip_end(
 200         struct vm_map_header    *map_header,
 201         vm_map_entry_t          entry,
 202         vm_map_offset_t         end);
 203
 204 static void             _vm_map_clip_start(
 205         struct vm_map_header    *map_header,
 206         vm_map_entry_t          entry,
 207         vm_map_offset_t         start);
 208
 209 static void             vm_map_entry_delete(
 210         vm_map_t        map,
 211         vm_map_entry_t  entry);
 212
 213 static kern_return_t    vm_map_delete(
 214         vm_map_t        map,
 215         vm_map_offset_t start,
 216         vm_map_offset_t end,
 217         int             flags,
 218         vm_map_t        zap_map);
 219
 220 static void             vm_map_copy_insert(
 221         vm_map_t        map,
 222         vm_map_entry_t  after_where,
 223         vm_map_copy_t   copy);
 224
 225 static kern_return_t    vm_map_copy_overwrite_unaligned(
 226         vm_map_t        dst_map,
 227         vm_map_entry_t  entry,
 228         vm_map_copy_t   copy,
 229         vm_map_address_t start,
 230         boolean_t       discard_on_success);
 231
 232 static kern_return_t    vm_map_copy_overwrite_aligned(
 233         vm_map_t        dst_map,
 234         vm_map_entry_t  tmp_entry,
 235         vm_map_copy_t   copy,
 236         vm_map_offset_t start,
 237         pmap_t          pmap);
 238
 239 static kern_return_t    vm_map_copyin_kernel_buffer(
 240         vm_map_t        src_map,
 241         vm_map_address_t src_addr,
 242         vm_map_size_t   len,
 243         boolean_t       src_destroy,
 244         vm_map_copy_t   *copy_result);  /* OUT */
 245
 246 static kern_return_t    vm_map_copyout_kernel_buffer(
 247         vm_map_t        map,
 248         vm_map_address_t *addr, /* IN/OUT */
 249         vm_map_copy_t   copy,
 250         vm_map_size_t   copy_size,
 251         boolean_t       overwrite,
 252         boolean_t       consume_on_success);
 253
 254 static void             vm_map_fork_share(
 255         vm_map_t        old_map,
 256         vm_map_entry_t  old_entry,
 257         vm_map_t        new_map);
 258
 259 static boolean_t        vm_map_fork_copy(
 260         vm_map_t        old_map,
 261         vm_map_entry_t  *old_entry_p,
 262         vm_map_t        new_map,
 263         int             vm_map_copyin_flags);
 264
 265 static kern_return_t    vm_map_wire_nested(
 266         vm_map_t                   map,
 267         vm_map_offset_t            start,
 268         vm_map_offset_t            end,
 269         vm_prot_t                  caller_prot,
 270         vm_tag_t                   tag,
 271         boolean_t                  user_wire,
 272         pmap_t                     map_pmap,
 273         vm_map_offset_t            pmap_addr,
 274         ppnum_t                    *physpage_p);
 275
 276 static kern_return_t    vm_map_unwire_nested(
 277         vm_map_t                   map,
 278         vm_map_offset_t            start,
 279         vm_map_offset_t            end,
 280         boolean_t                  user_wire,
 281         pmap_t                     map_pmap,
 282         vm_map_offset_t            pmap_addr);
 283
 284 static kern_return_t    vm_map_overwrite_submap_recurse(
 285         vm_map_t                   dst_map,
 286         vm_map_offset_t            dst_addr,
 287         vm_map_size_t              dst_size);
 288
 289 static kern_return_t    vm_map_copy_overwrite_nested(
 290         vm_map_t                   dst_map,
 291         vm_map_offset_t            dst_addr,
 292         vm_map_copy_t              copy,
 293         boolean_t                  interruptible,
 294         pmap_t                     pmap,
 295         boolean_t                  discard_on_success);
 296
 297 static kern_return_t    vm_map_remap_extract(
 298         vm_map_t                map,
 299         vm_map_offset_t         addr,
 300         vm_map_size_t           size,
 301         vm_prot_t               required_protection,
 302         boolean_t               copy,
 303         struct vm_map_header    *map_header,
 304         vm_prot_t               *cur_protection,
 305         vm_prot_t               *max_protection,
 306         vm_inherit_t            inheritance,
 307         vm_map_kernel_flags_t   vmk_flags);
 308
 309 static kern_return_t    vm_map_remap_range_allocate(
 310         vm_map_t                map,
 311         vm_map_address_t        *address,
 312         vm_map_size_t           size,
 313         vm_map_offset_t         mask,
 314         int                     flags,
 315         vm_map_kernel_flags_t   vmk_flags,
 316         vm_tag_t                tag,
 317         vm_map_entry_t          *map_entry);
 318
 319 static void             vm_map_region_look_for_page(
 320         vm_map_t                   map,
 321         vm_map_offset_t            va,
 322         vm_object_t                object,
 323         vm_object_offset_t         offset,
 324         int                        max_refcnt,
 325         unsigned short             depth,
 326         vm_region_extended_info_t  extended,
 327         mach_msg_type_number_t count);
 328
 329 static int              vm_map_region_count_obj_refs(
 330         vm_map_entry_t             entry,
 331         vm_object_t                object);
 332
 333
 334 static kern_return_t    vm_map_willneed(
 335         vm_map_t        map,
 336         vm_map_offset_t start,
 337         vm_map_offset_t end);
 338
 339 static kern_return_t    vm_map_reuse_pages(
 340         vm_map_t        map,
 341         vm_map_offset_t start,
 342         vm_map_offset_t end);
 343
 344 static kern_return_t    vm_map_reusable_pages(
 345         vm_map_t        map,
 346         vm_map_offset_t start,
 347         vm_map_offset_t end);
 348
 349 static kern_return_t    vm_map_can_reuse(
 350         vm_map_t        map,
 351         vm_map_offset_t start,
 352         vm_map_offset_t end);
 353
 354 #if MACH_ASSERT
 355 static kern_return_t    vm_map_pageout(
 356         vm_map_t        map,
 357         vm_map_offset_t start,
 358         vm_map_offset_t end);
 359 #endif /* MACH_ASSERT */
 360
 361 kern_return_t vm_map_corpse_footprint_collect(
 362         vm_map_t        old_map,
 363         vm_map_entry_t  old_entry,
 364         vm_map_t        new_map);
 365 void vm_map_corpse_footprint_collect_done(
 366         vm_map_t        new_map);
 367 void vm_map_corpse_footprint_destroy(
 368         vm_map_t        map);
 369 kern_return_t vm_map_corpse_footprint_query_page_info(
 370         vm_map_t        map,
 371         vm_map_offset_t va,
 372         int             *disposition_p);
 373 void vm_map_footprint_query_page_info(
 374         vm_map_t        map,
 375         vm_map_entry_t  map_entry,
 376         vm_map_offset_t curr_s_offset,
 377         int             *disposition_p);
 378
 379 static const struct vm_map_entry vm_map_entry_template = {
 380         .behavior = VM_BEHAVIOR_DEFAULT,
 381         .inheritance = VM_INHERIT_DEFAULT,
 382 };
 383
 384 pid_t find_largest_process_vm_map_entries(void);
 385
 386 /*
 387  * Macros to copy a vm_map_entry. We must be careful to correctly
 388  * manage the wired page count. vm_map_entry_copy() creates a new
 389  * map entry to the same memory - the wired count in the new entry
 390  * must be set to zero. vm_map_entry_copy_full() creates a new
 391  * entry that is identical to the old entry.  This preserves the
 392  * wire count; it's used for map splitting and zone changing in
 393  * vm_map_copyout.
 394  */
 395
 396 static inline void
 397 vm_map_entry_copy_pmap_cs_assoc(
 398         vm_map_t map __unused,
 399         vm_map_entry_t new __unused,
 400         vm_map_entry_t old __unused)
 401 {
 402 #if PMAP_CS
 403         /* when pmap_cs is enabled, we want to reset on copy */
 404         new->pmap_cs_associated = FALSE;
 405 #else /* PMAP_CS */
 406         /* when pmap_cs is not enabled, assert as a sanity check */
 407         assert(new->pmap_cs_associated == FALSE);
 408 #endif /* PMAP_CS */
 409 }
 410
 411 /*
 412  * The "used_for_jit" flag was copied from OLD to NEW in vm_map_entry_copy().
 413  * But for security reasons on some platforms, we don't want the
 414  * new mapping to be "used for jit", so we reset the flag here.
 415  */
 416 static inline void
 417 vm_map_entry_copy_code_signing(
 418         vm_map_t map,
 419         vm_map_entry_t new,
 420         vm_map_entry_t old __unused)
 421 {
 422         if (VM_MAP_POLICY_ALLOW_JIT_COPY(map)) {
 423                 assert(new->used_for_jit == old->used_for_jit);
 424         } else {
 425                 new->used_for_jit = FALSE;
 426         }
 427 }
 428
 429 static inline void
 430 vm_map_entry_copy(
 431         vm_map_t map,
 432         vm_map_entry_t new,
 433         vm_map_entry_t old)
 434 {
 435         boolean_t _vmec_reserved = new->from_reserved_zone;
 436         *new = *old;
 437         new->is_shared = FALSE;
 438         new->needs_wakeup = FALSE;
 439         new->in_transition = FALSE;
 440         new->wired_count = 0;
 441         new->user_wired_count = 0;
 442         new->permanent = FALSE;
 443         vm_map_entry_copy_code_signing(map, new, old);
 444         vm_map_entry_copy_pmap_cs_assoc(map, new, old);
 445         new->from_reserved_zone = _vmec_reserved;
 446         if (new->iokit_acct) {
 447                 assertf(!new->use_pmap, "old %p new %p\n", old, new);
 448                 new->iokit_acct = FALSE;
 449                 new->use_pmap = TRUE;
 450         }
 451         new->vme_resilient_codesign = FALSE;
 452         new->vme_resilient_media = FALSE;
 453         new->vme_atomic = FALSE;
 454         new->vme_no_copy_on_read = FALSE;
 455 }
 456
 457 static inline void
 458 vm_map_entry_copy_full(
 459         vm_map_entry_t new,
 460         vm_map_entry_t old)
 461 {
 462         boolean_t _vmecf_reserved = new->from_reserved_zone;
 463         *new = *old;
 464         new->from_reserved_zone = _vmecf_reserved;
 465 }
 466
 467 /*
 468  * Normal lock_read_to_write() returns FALSE/0 on failure.
 469  * These functions evaluate to zero on success and non-zero value on failure.
 470  */
 471 __attribute__((always_inline))
 472 int
 473 vm_map_lock_read_to_write(vm_map_t map)
 474 {
 475         if (lck_rw_lock_shared_to_exclusive(&(map)->lock)) {
 476                 DTRACE_VM(vm_map_lock_upgrade);
 477                 return 0;
 478         }
 479         return 1;
 480 }
 481
 482 __attribute__((always_inline))
 483 boolean_t
 484 vm_map_try_lock(vm_map_t map)
 485 {
 486         if (lck_rw_try_lock_exclusive(&(map)->lock)) {
 487                 DTRACE_VM(vm_map_lock_w);
 488                 return TRUE;
 489         }
 490         return FALSE;
 491 }
 492
 493 __attribute__((always_inline))
 494 boolean_t
 495 vm_map_try_lock_read(vm_map_t map)
 496 {
 497         if (lck_rw_try_lock_shared(&(map)->lock)) {
 498                 DTRACE_VM(vm_map_lock_r);
 499                 return TRUE;
 500         }
 501         return FALSE;
 502 }
 503
 504 /*
 505  * Routines to get the page size the caller should
 506  * use while inspecting the target address space.
 507  * Use the "_safely" variant if the caller is dealing with a user-provided
 508  * array whose size depends on the page size, to avoid any overflow or
 509  * underflow of a user-allocated buffer.
 510  */
 511 int
 512 vm_self_region_page_shift_safely(
 513         vm_map_t target_map)
 514 {
 515         int effective_page_shift = 0;
 516
 517         if (PAGE_SIZE == (4096)) {
 518                 /* x86_64 and 4k watches: always use 4k */
 519                 return PAGE_SHIFT;
 520         }
 521         /* did caller provide an explicit page size for this thread to use? */
 522         effective_page_shift = thread_self_region_page_shift();
 523         if (effective_page_shift) {
 524                 /* use the explicitly-provided page size */
 525                 return effective_page_shift;
 526         }
 527         /* no explicit page size: use the caller's page size... */
 528         effective_page_shift = VM_MAP_PAGE_SHIFT(current_map());
 529         if (effective_page_shift == VM_MAP_PAGE_SHIFT(target_map)) {
 530                 /* page size match: safe to use */
 531                 return effective_page_shift;
 532         }
 533         /* page size mismatch */
 534         return -1;
 535 }
 536 int
 537 vm_self_region_page_shift(
 538         vm_map_t target_map)
 539 {
 540         int effective_page_shift;
 541
 542         effective_page_shift = vm_self_region_page_shift_safely(target_map);
 543         if (effective_page_shift == -1) {
 544                 /* no safe value but OK to guess for caller */
 545                 effective_page_shift = MIN(VM_MAP_PAGE_SHIFT(current_map()),
 546                     VM_MAP_PAGE_SHIFT(target_map));
 547         }
 548         return effective_page_shift;
 549 }
 550
 551
 552 /*
 553  *      Decide if we want to allow processes to execute from their data or stack areas.
 554  *      override_nx() returns true if we do.  Data/stack execution can be enabled independently
 555  *      for 32 and 64 bit processes.  Set the VM_ABI_32 or VM_ABI_64 flags in allow_data_exec
 556  *      or allow_stack_exec to enable data execution for that type of data area for that particular
 557  *      ABI (or both by or'ing the flags together).  These are initialized in the architecture
 558  *      specific pmap files since the default behavior varies according to architecture.  The
 559  *      main reason it varies is because of the need to provide binary compatibility with old
 560  *      applications that were written before these restrictions came into being.  In the old
 561  *      days, an app could execute anything it could read, but this has slowly been tightened
 562  *      up over time.  The default behavior is:
 563  *
 564  *      32-bit PPC apps         may execute from both stack and data areas
 565  *      32-bit Intel apps       may exeucte from data areas but not stack
 566  *      64-bit PPC/Intel apps   may not execute from either data or stack
 567  *
 568  *      An application on any architecture may override these defaults by explicitly
 569  *      adding PROT_EXEC permission to the page in question with the mprotect(2)
 570  *      system call.  This code here just determines what happens when an app tries to
 571  *      execute from a page that lacks execute permission.
 572  *
 573  *      Note that allow_data_exec or allow_stack_exec may also be modified by sysctl to change the
 574  *      default behavior for both 32 and 64 bit apps on a system-wide basis. Furthermore,
 575  *      a Mach-O header flag bit (MH_NO_HEAP_EXECUTION) can be used to forcibly disallow
 576  *      execution from data areas for a particular binary even if the arch normally permits it. As
 577  *      a final wrinkle, a posix_spawn attribute flag can be used to negate this opt-in header bit
 578  *      to support some complicated use cases, notably browsers with out-of-process plugins that
 579  *      are not all NX-safe.
 580  */
 581
 582 extern int allow_data_exec, allow_stack_exec;
 583
 584 int
 585 override_nx(vm_map_t map, uint32_t user_tag) /* map unused on arm */
 586 {
 587         int current_abi;
 588
 589         if (map->pmap == kernel_pmap) {
 590                 return FALSE;
 591         }
 592
 593         /*
 594          * Determine if the app is running in 32 or 64 bit mode.
 595          */
 596
 597         if (vm_map_is_64bit(map)) {
 598                 current_abi = VM_ABI_64;
 599         } else {
 600                 current_abi = VM_ABI_32;
 601         }
 602
 603         /*
 604          * Determine if we should allow the execution based on whether it's a
 605          * stack or data area and the current architecture.
 606          */
 607
 608         if (user_tag == VM_MEMORY_STACK) {
 609                 return allow_stack_exec & current_abi;
 610         }
 611
 612         return (allow_data_exec & current_abi) && (map->map_disallow_data_exec == FALSE);
 613 }
 614
 615
 616 /*
 617  *      Virtual memory maps provide for the mapping, protection,
 618  *      and sharing of virtual memory objects.  In addition,
 619  *      this module provides for an efficient virtual copy of
 620  *      memory from one map to another.
 621  *
 622  *      Synchronization is required prior to most operations.
 623  *
 624  *      Maps consist of an ordered doubly-linked list of simple
 625  *      entries; a single hint is used to speed up lookups.
 626  *
 627  *      Sharing maps have been deleted from this version of Mach.
 628  *      All shared objects are now mapped directly into the respective
 629  *      maps.  This requires a change in the copy on write strategy;
 630  *      the asymmetric (delayed) strategy is used for shared temporary
 631  *      objects instead of the symmetric (shadow) strategy.  All maps
 632  *      are now "top level" maps (either task map, kernel map or submap
 633  *      of the kernel map).
 634  *
 635  *      Since portions of maps are specified by start/end addreses,
 636  *      which may not align with existing map entries, all
 637  *      routines merely "clip" entries to these start/end values.
 638  *      [That is, an entry is split into two, bordering at a
 639  *      start or end value.]  Note that these clippings may not
 640  *      always be necessary (as the two resulting entries are then
 641  *      not changed); however, the clipping is done for convenience.
 642  *      No attempt is currently made to "glue back together" two
 643  *      abutting entries.
 644  *
 645  *      The symmetric (shadow) copy strategy implements virtual copy
 646  *      by copying VM object references from one map to
 647  *      another, and then marking both regions as copy-on-write.
 648  *      It is important to note that only one writeable reference
 649  *      to a VM object region exists in any map when this strategy
 650  *      is used -- this means that shadow object creation can be
 651  *      delayed until a write operation occurs.  The symmetric (delayed)
 652  *      strategy allows multiple maps to have writeable references to
 653  *      the same region of a vm object, and hence cannot delay creating
 654  *      its copy objects.  See vm_object_copy_quickly() in vm_object.c.
 655  *      Copying of permanent objects is completely different; see
 656  *      vm_object_copy_strategically() in vm_object.c.
 657  */
 658
 659 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_zone;       /* zone for vm_map structures */
 660 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_entry_reserved_zone; /* zone with reserve for non-blocking allocations */
 661 static SECURITY_READ_ONLY_LATE(zone_t) vm_map_copy_zone;  /* zone for vm_map_copy structures */
 662
 663 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_entry_zone; /* zone for vm_map_entry structures */
 664 SECURITY_READ_ONLY_LATE(zone_t)        vm_map_holes_zone; /* zone for vm map holes (vm_map_links) structures */
 665
 666 #define VM_MAP_ZONE_NAME "maps"
 667 #define VM_MAP_ZFLAGS ( \
 668   ZC_NOENCRYPT | \
 669   ZC_NOGC      | \
 670   ZC_NOGZALLOC | \
 671   ZC_ALLOW_FOREIGN)
 672
 673 #define VME_RESERVED_ZONE_NAME "Reserved VM map entries"
 674 #define VM_MAP_RESERVED_ZFLAGS ( \
 675     ZC_NOENCRYPT          | \
 676     ZC_ALLOW_FOREIGN      | \
 677     ZC_NOCALLOUT          | \
 678     ZC_NOGZALLOC          | \
 679     ZC_KASAN_NOQUARANTINE | \
 680     ZC_NOGC)
 681
 682 #define VM_MAP_HOLES_ZONE_NAME "VM map holes"
 683 #define VM_MAP_HOLES_ZFLAGS ( \
 684     ZC_NOENCRYPT | \
 685     ZC_NOGC      | \
 686     ZC_NOGZALLOC | \
 687     ZC_ALLOW_FOREIGN)
 688
 689 /*
 690  * Asserts that a vm_map_copy object is coming from the
 691  * vm_map_copy_zone to ensure that it isn't a fake constructed
 692  * anywhere else.
 693  */
 694 static inline void
 695 vm_map_copy_require(struct vm_map_copy *copy)
 696 {
 697         zone_id_require(ZONE_ID_VM_MAP_COPY, sizeof(struct vm_map_copy), copy);
 698 }
 699
 700 /*
 701  *      Placeholder object for submap operations.  This object is dropped
 702  *      into the range by a call to vm_map_find, and removed when
 703  *      vm_map_submap creates the submap.
 704  */
 705
 706 vm_object_t     vm_submap_object;
 707
 708 static __startup_data vm_offset_t      map_data;
 709 static __startup_data vm_size_t        map_data_size;
 710 static __startup_data vm_offset_t      kentry_data;
 711 static __startup_data vm_size_t        kentry_data_size;
 712 static __startup_data vm_offset_t      map_holes_data;
 713 static __startup_data vm_size_t        map_holes_data_size;
 714
 715 #if XNU_TARGET_OS_OSX
 716 #define         NO_COALESCE_LIMIT  ((1024 * 128) - 1)
 717 #else /* XNU_TARGET_OS_OSX */
 718 #define         NO_COALESCE_LIMIT  0
 719 #endif /* XNU_TARGET_OS_OSX */
 720
 721 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 722 unsigned int not_in_kdp = 1;
 723
 724 unsigned int vm_map_set_cache_attr_count = 0;
 725
 726 kern_return_t
 727 vm_map_set_cache_attr(
 728         vm_map_t        map,
 729         vm_map_offset_t va)
 730 {
 731         vm_map_entry_t  map_entry;
 732         vm_object_t     object;
 733         kern_return_t   kr = KERN_SUCCESS;
 734
 735         vm_map_lock_read(map);
 736
 737         if (!vm_map_lookup_entry(map, va, &map_entry) ||
 738             map_entry->is_sub_map) {
 739                 /*
 740                  * that memory is not properly mapped
 741                  */
 742                 kr = KERN_INVALID_ARGUMENT;
 743                 goto done;
 744         }
 745         object = VME_OBJECT(map_entry);
 746
 747         if (object == VM_OBJECT_NULL) {
 748                 /*
 749                  * there should be a VM object here at this point
 750                  */
 751                 kr = KERN_INVALID_ARGUMENT;
 752                 goto done;
 753         }
 754         vm_object_lock(object);
 755         object->set_cache_attr = TRUE;
 756         vm_object_unlock(object);
 757
 758         vm_map_set_cache_attr_count++;
 759 done:
 760         vm_map_unlock_read(map);
 761
 762         return kr;
 763 }
 764
 765
 766 #if CONFIG_CODE_DECRYPTION
 767 /*
 768  * vm_map_apple_protected:
 769  * This remaps the requested part of the object with an object backed by
 770  * the decrypting pager.
 771  * crypt_info contains entry points and session data for the crypt module.
 772  * The crypt_info block will be copied by vm_map_apple_protected. The data structures
 773  * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
 774  */
 775 kern_return_t
 776 vm_map_apple_protected(
 777         vm_map_t                map,
 778         vm_map_offset_t         start,
 779         vm_map_offset_t         end,
 780         vm_object_offset_t      crypto_backing_offset,
 781         struct pager_crypt_info *crypt_info,
 782         uint32_t                cryptid)
 783 {
 784         boolean_t       map_locked;
 785         kern_return_t   kr;
 786         vm_map_entry_t  map_entry;
 787         struct vm_map_entry tmp_entry;
 788         memory_object_t unprotected_mem_obj;
 789         vm_object_t     protected_object;
 790         vm_map_offset_t map_addr;
 791         vm_map_offset_t start_aligned, end_aligned;
 792         vm_object_offset_t      crypto_start, crypto_end;
 793         int             vm_flags;
 794         vm_map_kernel_flags_t vmk_flags;
 795
 796         vm_flags = 0;
 797         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
 798
 799         map_locked = FALSE;
 800         unprotected_mem_obj = MEMORY_OBJECT_NULL;
 801
 802         start_aligned = vm_map_trunc_page(start, PAGE_MASK_64);
 803         end_aligned = vm_map_round_page(end, PAGE_MASK_64);
 804         start_aligned = vm_map_trunc_page(start_aligned, VM_MAP_PAGE_MASK(map));
 805         end_aligned = vm_map_round_page(end_aligned, VM_MAP_PAGE_MASK(map));
 806
 807 #if __arm64__
 808         /*
 809          * "start" and "end" might be 4K-aligned but not 16K-aligned,
 810          * so we might have to loop and establish up to 3 mappings:
 811          *
 812          * + the first 16K-page, which might overlap with the previous
 813          *   4K-aligned mapping,
 814          * + the center,
 815          * + the last 16K-page, which might overlap with the next
 816          *   4K-aligned mapping.
 817          * Each of these mapping might be backed by a vnode pager (if
 818          * properly page-aligned) or a "fourk_pager", itself backed by a
 819          * vnode pager (if 4K-aligned but not page-aligned).
 820          */
 821 #endif /* __arm64__ */
 822
 823         map_addr = start_aligned;
 824         for (map_addr = start_aligned;
 825             map_addr < end;
 826             map_addr = tmp_entry.vme_end) {
 827                 vm_map_lock(map);
 828                 map_locked = TRUE;
 829
 830                 /* lookup the protected VM object */
 831                 if (!vm_map_lookup_entry(map,
 832                     map_addr,
 833                     &map_entry) ||
 834                     map_entry->is_sub_map ||
 835                     VME_OBJECT(map_entry) == VM_OBJECT_NULL) {
 836                         /* that memory is not properly mapped */
 837                         kr = KERN_INVALID_ARGUMENT;
 838                         goto done;
 839                 }
 840
 841                 /* ensure mapped memory is mapped as executable except
 842                  *  except for model decryption flow */
 843                 if ((cryptid != CRYPTID_MODEL_ENCRYPTION) &&
 844                     !(map_entry->protection & VM_PROT_EXECUTE)) {
 845                         kr = KERN_INVALID_ARGUMENT;
 846                         goto done;
 847                 }
 848
 849                 /* get the protected object to be decrypted */
 850                 protected_object = VME_OBJECT(map_entry);
 851                 if (protected_object == VM_OBJECT_NULL) {
 852                         /* there should be a VM object here at this point */
 853                         kr = KERN_INVALID_ARGUMENT;
 854                         goto done;
 855                 }
 856                 /* ensure protected object stays alive while map is unlocked */
 857                 vm_object_reference(protected_object);
 858
 859                 /* limit the map entry to the area we want to cover */
 860                 vm_map_clip_start(map, map_entry, start_aligned);
 861                 vm_map_clip_end(map, map_entry, end_aligned);
 862
 863                 tmp_entry = *map_entry;
 864                 map_entry = VM_MAP_ENTRY_NULL; /* not valid after unlocking map */
 865                 vm_map_unlock(map);
 866                 map_locked = FALSE;
 867
 868                 /*
 869                  * This map entry might be only partially encrypted
 870                  * (if not fully "page-aligned").
 871                  */
 872                 crypto_start = 0;
 873                 crypto_end = tmp_entry.vme_end - tmp_entry.vme_start;
 874                 if (tmp_entry.vme_start < start) {
 875                         if (tmp_entry.vme_start != start_aligned) {
 876                                 kr = KERN_INVALID_ADDRESS;
 877                         }
 878                         crypto_start += (start - tmp_entry.vme_start);
 879                 }
 880                 if (tmp_entry.vme_end > end) {
 881                         if (tmp_entry.vme_end != end_aligned) {
 882                                 kr = KERN_INVALID_ADDRESS;
 883                         }
 884                         crypto_end -= (tmp_entry.vme_end - end);
 885                 }
 886
 887                 /*
 888                  * This "extra backing offset" is needed to get the decryption
 889                  * routine to use the right key.  It adjusts for the possibly
 890                  * relative offset of an interposed "4K" pager...
 891                  */
 892                 if (crypto_backing_offset == (vm_object_offset_t) -1) {
 893                         crypto_backing_offset = VME_OFFSET(&tmp_entry);
 894                 }
 895
 896                 /*
 897                  * Lookup (and create if necessary) the protected memory object
 898                  * matching that VM object.
 899                  * If successful, this also grabs a reference on the memory object,
 900                  * to guarantee that it doesn't go away before we get a chance to map
 901                  * it.
 902                  */
 903                 unprotected_mem_obj = apple_protect_pager_setup(
 904                         protected_object,
 905                         VME_OFFSET(&tmp_entry),
 906                         crypto_backing_offset,
 907                         crypt_info,
 908                         crypto_start,
 909                         crypto_end);
 910
 911                 /* release extra ref on protected object */
 912                 vm_object_deallocate(protected_object);
 913
 914                 if (unprotected_mem_obj == NULL) {
 915                         kr = KERN_FAILURE;
 916                         goto done;
 917                 }
 918
 919                 vm_flags = VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
 920                 /* can overwrite an immutable mapping */
 921                 vmk_flags.vmkf_overwrite_immutable = TRUE;
 922 #if __arm64__
 923                 if (tmp_entry.used_for_jit &&
 924                     (VM_MAP_PAGE_SHIFT(map) != FOURK_PAGE_SHIFT ||
 925                     PAGE_SHIFT != FOURK_PAGE_SHIFT) &&
 926                     fourk_binary_compatibility_unsafe &&
 927                     fourk_binary_compatibility_allow_wx) {
 928                         printf("** FOURK_COMPAT [%d]: "
 929                             "allowing write+execute at 0x%llx\n",
 930                             proc_selfpid(), tmp_entry.vme_start);
 931                         vmk_flags.vmkf_map_jit = TRUE;
 932                 }
 933 #endif /* __arm64__ */
 934
 935                 /* map this memory object in place of the current one */
 936                 map_addr = tmp_entry.vme_start;
 937                 kr = vm_map_enter_mem_object(map,
 938                     &map_addr,
 939                     (tmp_entry.vme_end -
 940                     tmp_entry.vme_start),
 941                     (mach_vm_offset_t) 0,
 942                     vm_flags,
 943                     vmk_flags,
 944                     VM_KERN_MEMORY_NONE,
 945                     (ipc_port_t)(uintptr_t) unprotected_mem_obj,
 946                     0,
 947                     TRUE,
 948                     tmp_entry.protection,
 949                     tmp_entry.max_protection,
 950                     tmp_entry.inheritance);
 951                 assertf(kr == KERN_SUCCESS,
 952                     "kr = 0x%x\n", kr);
 953                 assertf(map_addr == tmp_entry.vme_start,
 954                     "map_addr=0x%llx vme_start=0x%llx tmp_entry=%p\n",
 955                     (uint64_t)map_addr,
 956                     (uint64_t) tmp_entry.vme_start,
 957                     &tmp_entry);
 958
 959 #if VM_MAP_DEBUG_APPLE_PROTECT
 960                 if (vm_map_debug_apple_protect) {
 961                         printf("APPLE_PROTECT: map %p [0x%llx:0x%llx] pager %p:"
 962                             " backing:[object:%p,offset:0x%llx,"
 963                             "crypto_backing_offset:0x%llx,"
 964                             "crypto_start:0x%llx,crypto_end:0x%llx]\n",
 965                             map,
 966                             (uint64_t) map_addr,
 967                             (uint64_t) (map_addr + (tmp_entry.vme_end -
 968                             tmp_entry.vme_start)),
 969                             unprotected_mem_obj,
 970                             protected_object,
 971                             VME_OFFSET(&tmp_entry),
 972                             crypto_backing_offset,
 973                             crypto_start,
 974                             crypto_end);
 975                 }
 976 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
 977
 978                 /*
 979                  * Release the reference obtained by
 980                  * apple_protect_pager_setup().
 981                  * The mapping (if it succeeded) is now holding a reference on
 982                  * the memory object.
 983                  */
 984                 memory_object_deallocate(unprotected_mem_obj);
 985                 unprotected_mem_obj = MEMORY_OBJECT_NULL;
 986
 987                 /* continue with next map entry */
 988                 crypto_backing_offset += (tmp_entry.vme_end -
 989                     tmp_entry.vme_start);
 990                 crypto_backing_offset -= crypto_start;
 991         }
 992         kr = KERN_SUCCESS;
 993
 994 done:
 995         if (map_locked) {
 996                 vm_map_unlock(map);
 997         }
 998         return kr;
 999 }
1000 #endif  /* CONFIG_CODE_DECRYPTION */
1001
1002
1003 LCK_GRP_DECLARE(vm_map_lck_grp, "vm_map");
1004 LCK_ATTR_DECLARE(vm_map_lck_attr, 0, 0);
1005 LCK_ATTR_DECLARE(vm_map_lck_rw_attr, 0, LCK_ATTR_DEBUG);
1006
1007 #if XNU_TARGET_OS_OSX
1008 int malloc_no_cow = 0;
1009 #else /* XNU_TARGET_OS_OSX */
1010 int malloc_no_cow = 1;
1011 #endif /* XNU_TARGET_OS_OSX */
1012 uint64_t vm_memory_malloc_no_cow_mask = 0ULL;
1013 #if DEBUG
1014 int vm_check_map_sanity = 0;
1015 #endif
1016
1017 /*
1018  *      vm_map_init:
1019  *
1020  *      Initialize the vm_map module.  Must be called before
1021  *      any other vm_map routines.
1022  *
1023  *      Map and entry structures are allocated from zones -- we must
1024  *      initialize those zones.
1025  *
1026  *      There are three zones of interest:
1027  *
1028  *      vm_map_zone:            used to allocate maps.
1029  *      vm_map_entry_zone:      used to allocate map entries.
1030  *      vm_map_entry_reserved_zone:     fallback zone for kernel map entries
1031  *
1032  *      The kernel allocates map entries from a special zone that is initially
1033  *      "crammed" with memory.  It would be difficult (perhaps impossible) for
1034  *      the kernel to allocate more memory to a entry zone when it became
1035  *      empty since the very act of allocating memory implies the creation
1036  *      of a new entry.
1037  */
1038 __startup_func
1039 void
1040 vm_map_init(void)
1041 {
1042         const char *mez_name = "VM map entries";
1043
1044
1045 #if MACH_ASSERT
1046         PE_parse_boot_argn("debug4k_filter", &debug4k_filter,
1047             sizeof(debug4k_filter));
1048 #endif /* MACH_ASSERT */
1049
1050         vm_map_zone = zone_create(VM_MAP_ZONE_NAME, sizeof(struct _vm_map),
1051             VM_MAP_ZFLAGS);
1052
1053         vm_map_entry_zone = zone_create(mez_name, sizeof(struct vm_map_entry),
1054             ZC_NOENCRYPT | ZC_NOGZALLOC | ZC_NOCALLOUT);
1055
1056         /*
1057          * Don't quarantine because we always need elements available
1058          * Disallow GC on this zone... to aid the GC.
1059          */
1060         vm_map_entry_reserved_zone = zone_create_ext(VME_RESERVED_ZONE_NAME,
1061             sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1062             ZONE_ID_ANY, ^(zone_t z) {
1063                 zone_set_noexpand(z, 64 * kentry_data_size);
1064         });
1065
1066         vm_map_copy_zone = zone_create_ext("VM map copies", sizeof(struct vm_map_copy),
1067             ZC_NOENCRYPT | ZC_CACHING, ZONE_ID_VM_MAP_COPY, NULL);
1068
1069         vm_map_holes_zone = zone_create(VM_MAP_HOLES_ZONE_NAME,
1070             sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS);
1071
1072         /*
1073          * Add the stolen memory to zones, adjust zone size and stolen counts.
1074          */
1075         zcram(vm_map_zone, map_data, map_data_size);
1076         zcram(vm_map_entry_reserved_zone, kentry_data, kentry_data_size);
1077         zcram(vm_map_holes_zone, map_holes_data, map_holes_data_size);
1078
1079         /*
1080          * Since these are covered by zones, remove them from stolen page accounting.
1081          */
1082         VM_PAGE_MOVE_STOLEN(atop_64(map_data_size) + atop_64(kentry_data_size) + atop_64(map_holes_data_size));
1083
1084 #if VM_MAP_DEBUG_APPLE_PROTECT
1085         PE_parse_boot_argn("vm_map_debug_apple_protect",
1086             &vm_map_debug_apple_protect,
1087             sizeof(vm_map_debug_apple_protect));
1088 #endif /* VM_MAP_DEBUG_APPLE_PROTECT */
1089 #if VM_MAP_DEBUG_APPLE_FOURK
1090         PE_parse_boot_argn("vm_map_debug_fourk",
1091             &vm_map_debug_fourk,
1092             sizeof(vm_map_debug_fourk));
1093 #endif /* VM_MAP_DEBUG_FOURK */
1094         PE_parse_boot_argn("vm_map_executable_immutable",
1095             &vm_map_executable_immutable,
1096             sizeof(vm_map_executable_immutable));
1097         PE_parse_boot_argn("vm_map_executable_immutable_verbose",
1098             &vm_map_executable_immutable_verbose,
1099             sizeof(vm_map_executable_immutable_verbose));
1100
1101         PE_parse_boot_argn("malloc_no_cow",
1102             &malloc_no_cow,
1103             sizeof(malloc_no_cow));
1104         if (malloc_no_cow) {
1105                 vm_memory_malloc_no_cow_mask = 0ULL;
1106                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC;
1107                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_SMALL;
1108                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_MEDIUM;
1109                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE;
1110 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_HUGE;
1111 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_REALLOC;
1112                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_TINY;
1113                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSABLE;
1114                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_LARGE_REUSED;
1115                 vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_MALLOC_NANO;
1116 //              vm_memory_malloc_no_cow_mask |= 1ULL << VM_MEMORY_TCMALLOC;
1117                 PE_parse_boot_argn("vm_memory_malloc_no_cow_mask",
1118                     &vm_memory_malloc_no_cow_mask,
1119                     sizeof(vm_memory_malloc_no_cow_mask));
1120         }
1121
1122 #if DEBUG
1123         PE_parse_boot_argn("vm_check_map_sanity", &vm_check_map_sanity, sizeof(vm_check_map_sanity));
1124         if (vm_check_map_sanity) {
1125                 kprintf("VM sanity checking enabled\n");
1126         } else {
1127                 kprintf("VM sanity checking disabled. Set bootarg vm_check_map_sanity=1 to enable\n");
1128         }
1129 #endif /* DEBUG */
1130
1131 #if DEVELOPMENT || DEBUG
1132         PE_parse_boot_argn("panic_on_unsigned_execute",
1133             &panic_on_unsigned_execute,
1134             sizeof(panic_on_unsigned_execute));
1135 #endif /* DEVELOPMENT || DEBUG */
1136 }
1137
1138 __startup_func
1139 static void
1140 vm_map_steal_memory(void)
1141 {
1142         uint16_t kentry_initial_pages;
1143
1144         map_data_size = zone_get_foreign_alloc_size(VM_MAP_ZONE_NAME,
1145             sizeof(struct _vm_map), VM_MAP_ZFLAGS, 1);
1146
1147         /*
1148          * kentry_initial_pages corresponds to the number of kernel map entries
1149          * required during bootstrap until the asynchronous replenishment
1150          * scheme is activated and/or entries are available from the general
1151          * map entry pool.
1152          */
1153 #if     defined(__LP64__)
1154         kentry_initial_pages = 10;
1155 #else
1156         kentry_initial_pages = 6;
1157 #endif
1158
1159 #if CONFIG_GZALLOC
1160         /* If using the guard allocator, reserve more memory for the kernel
1161          * reserved map entry pool.
1162          */
1163         if (gzalloc_enabled()) {
1164                 kentry_initial_pages *= 1024;
1165         }
1166 #endif
1167
1168         kentry_data_size = zone_get_foreign_alloc_size(VME_RESERVED_ZONE_NAME,
1169             sizeof(struct vm_map_entry), VM_MAP_RESERVED_ZFLAGS,
1170             kentry_initial_pages);
1171
1172         map_holes_data_size = zone_get_foreign_alloc_size(VM_MAP_HOLES_ZONE_NAME,
1173             sizeof(struct vm_map_links), VM_MAP_HOLES_ZFLAGS,
1174             kentry_initial_pages);
1175
1176         /*
1177          * Steal a contiguous range of memory so that a simple range check
1178          * can validate foreign addresses being freed/crammed to these
1179          * zones
1180          */
1181         vm_size_t total_size;
1182         if (os_add3_overflow(map_data_size, kentry_data_size,
1183             map_holes_data_size, &total_size)) {
1184                 panic("vm_map_steal_memory: overflow in amount of memory requested");
1185         }
1186         map_data = zone_foreign_mem_init(total_size);
1187         kentry_data = map_data + map_data_size;
1188         map_holes_data = kentry_data + kentry_data_size;
1189 }
1190 STARTUP(PMAP_STEAL, STARTUP_RANK_FIRST, vm_map_steal_memory);
1191
1192 boolean_t vm_map_supports_hole_optimization = FALSE;
1193
1194 void
1195 vm_kernel_reserved_entry_init(void)
1196 {
1197         zone_prio_refill_configure(vm_map_entry_reserved_zone);
1198
1199         /*
1200          * Once we have our replenish thread set up, we can start using the vm_map_holes zone.
1201          */
1202         zone_prio_refill_configure(vm_map_holes_zone);
1203         vm_map_supports_hole_optimization = TRUE;
1204 }
1205
1206 void
1207 vm_map_disable_hole_optimization(vm_map_t map)
1208 {
1209         vm_map_entry_t  head_entry, hole_entry, next_hole_entry;
1210
1211         if (map->holelistenabled) {
1212                 head_entry = hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1213
1214                 while (hole_entry != NULL) {
1215                         next_hole_entry = hole_entry->vme_next;
1216
1217                         hole_entry->vme_next = NULL;
1218                         hole_entry->vme_prev = NULL;
1219                         zfree(vm_map_holes_zone, hole_entry);
1220
1221                         if (next_hole_entry == head_entry) {
1222                                 hole_entry = NULL;
1223                         } else {
1224                                 hole_entry = next_hole_entry;
1225                         }
1226                 }
1227
1228                 map->holes_list = NULL;
1229                 map->holelistenabled = FALSE;
1230
1231                 map->first_free = vm_map_first_entry(map);
1232                 SAVE_HINT_HOLE_WRITE(map, NULL);
1233         }
1234 }
1235
1236 boolean_t
1237 vm_kernel_map_is_kernel(vm_map_t map)
1238 {
1239         return map->pmap == kernel_pmap;
1240 }
1241
1242 /*
1243  *      vm_map_create:
1244  *
1245  *      Creates and returns a new empty VM map with
1246  *      the given physical map structure, and having
1247  *      the given lower and upper address bounds.
1248  */
1249
1250 vm_map_t
1251 vm_map_create(
1252         pmap_t          pmap,
1253         vm_map_offset_t min,
1254         vm_map_offset_t max,
1255         boolean_t       pageable)
1256 {
1257         int options;
1258
1259         options = 0;
1260         if (pageable) {
1261                 options |= VM_MAP_CREATE_PAGEABLE;
1262         }
1263         return vm_map_create_options(pmap, min, max, options);
1264 }
1265
1266 vm_map_t
1267 vm_map_create_options(
1268         pmap_t          pmap,
1269         vm_map_offset_t min,
1270         vm_map_offset_t max,
1271         int             options)
1272 {
1273         vm_map_t        result;
1274         struct vm_map_links     *hole_entry = NULL;
1275
1276         if (options & ~(VM_MAP_CREATE_ALL_OPTIONS)) {
1277                 /* unknown option */
1278                 return VM_MAP_NULL;
1279         }
1280
1281         result = (vm_map_t) zalloc(vm_map_zone);
1282         if (result == VM_MAP_NULL) {
1283                 panic("vm_map_create");
1284         }
1285
1286         vm_map_first_entry(result) = vm_map_to_entry(result);
1287         vm_map_last_entry(result)  = vm_map_to_entry(result);
1288         result->hdr.nentries = 0;
1289         if (options & VM_MAP_CREATE_PAGEABLE) {
1290                 result->hdr.entries_pageable = TRUE;
1291         } else {
1292                 result->hdr.entries_pageable = FALSE;
1293         }
1294
1295         vm_map_store_init( &(result->hdr));
1296
1297         result->hdr.page_shift = PAGE_SHIFT;
1298
1299         result->size = 0;
1300         result->user_wire_limit = MACH_VM_MAX_ADDRESS;  /* default limit is unlimited */
1301         result->user_wire_size  = 0;
1302 #if XNU_TARGET_OS_OSX
1303         result->vmmap_high_start = 0;
1304 #endif
1305         os_ref_init_count(&result->map_refcnt, &map_refgrp, 1);
1306 #if     TASK_SWAPPER
1307         result->res_count = 1;
1308         result->sw_state = MAP_SW_IN;
1309 #endif  /* TASK_SWAPPER */
1310         result->pmap = pmap;
1311         result->min_offset = min;
1312         result->max_offset = max;
1313         result->wiring_required = FALSE;
1314         result->no_zero_fill = FALSE;
1315         result->mapped_in_other_pmaps = FALSE;
1316         result->wait_for_space = FALSE;
1317         result->switch_protect = FALSE;
1318         result->disable_vmentry_reuse = FALSE;
1319         result->map_disallow_data_exec = FALSE;
1320         result->is_nested_map = FALSE;
1321         result->map_disallow_new_exec = FALSE;
1322         result->terminated = FALSE;
1323         result->cs_enforcement = FALSE;
1324         result->highest_entry_end = 0;
1325         result->first_free = vm_map_to_entry(result);
1326         result->hint = vm_map_to_entry(result);
1327         result->jit_entry_exists = FALSE;
1328         result->is_alien = FALSE;
1329         result->reserved_regions = FALSE;
1330
1331         /* "has_corpse_footprint" and "holelistenabled" are mutually exclusive */
1332         if (options & VM_MAP_CREATE_CORPSE_FOOTPRINT) {
1333                 result->has_corpse_footprint = TRUE;
1334                 result->holelistenabled = FALSE;
1335                 result->vmmap_corpse_footprint = NULL;
1336         } else {
1337                 result->has_corpse_footprint = FALSE;
1338                 if (vm_map_supports_hole_optimization) {
1339                         hole_entry = zalloc(vm_map_holes_zone);
1340
1341                         hole_entry->start = min;
1342 #if defined(__arm__) || defined(__arm64__)
1343                         hole_entry->end = result->max_offset;
1344 #else
1345                         hole_entry->end = (max > (vm_map_offset_t)MACH_VM_MAX_ADDRESS) ? max : (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
1346 #endif
1347                         result->holes_list = result->hole_hint = hole_entry;
1348                         hole_entry->prev = hole_entry->next = CAST_TO_VM_MAP_ENTRY(hole_entry);
1349                         result->holelistenabled = TRUE;
1350                 } else {
1351                         result->holelistenabled = FALSE;
1352                 }
1353         }
1354
1355         vm_map_lock_init(result);
1356         lck_mtx_init_ext(&result->s_lock, &result->s_lock_ext, &vm_map_lck_grp, &vm_map_lck_attr);
1357
1358         return result;
1359 }
1360
1361 vm_map_size_t
1362 vm_map_adjusted_size(vm_map_t map)
1363 {
1364         struct vm_reserved_region *regions = NULL;
1365         size_t num_regions = 0;
1366         mach_vm_size_t  reserved_size = 0, map_size = 0;
1367
1368         if (map == NULL || (map->size == 0)) {
1369                 return 0;
1370         }
1371
1372         map_size = map->size;
1373
1374         if (map->reserved_regions == FALSE || !vm_map_is_exotic(map) || map->terminated) {
1375                 /*
1376                  * No special reserved regions or not an exotic map or the task
1377                  * is terminating and these special regions might have already
1378                  * been deallocated.
1379                  */
1380                 return map_size;
1381         }
1382
1383         num_regions = ml_get_vm_reserved_regions(vm_map_is_64bit(map), &regions);
1384         assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
1385
1386         while (num_regions) {
1387                 reserved_size += regions[--num_regions].vmrr_size;
1388         }
1389
1390         /*
1391          * There are a few places where the map is being switched out due to
1392          * 'termination' without that bit being set (e.g. exec and corpse purging).
1393          * In those cases, we could have the map's regions being deallocated on
1394          * a core while some accounting process is trying to get the map's size.
1395          * So this assert can't be enabled till all those places are uniform in
1396          * their use of the 'map->terminated' bit.
1397          *
1398          * assert(map_size >= reserved_size);
1399          */
1400
1401         return (map_size >= reserved_size) ? (map_size - reserved_size) : map_size;
1402 }
1403
1404 /*
1405  *      vm_map_entry_create:    [ internal use only ]
1406  *
1407  *      Allocates a VM map entry for insertion in the
1408  *      given map (or map copy).  No fields are filled.
1409  */
1410 #define vm_map_entry_create(map, map_locked)    _vm_map_entry_create(&(map)->hdr, map_locked)
1411
1412 #define vm_map_copy_entry_create(copy, map_locked)                                      \
1413         _vm_map_entry_create(&(copy)->cpy_hdr, map_locked)
1414 unsigned reserved_zalloc_count, nonreserved_zalloc_count;
1415
1416 static vm_map_entry_t
1417 _vm_map_entry_create(
1418         struct vm_map_header    *map_header, boolean_t __unused map_locked)
1419 {
1420         zone_t  zone;
1421         vm_map_entry_t  entry;
1422
1423         zone = vm_map_entry_zone;
1424
1425         assert(map_header->entries_pageable ? !map_locked : TRUE);
1426
1427         if (map_header->entries_pageable) {
1428                 entry = (vm_map_entry_t) zalloc(zone);
1429         } else {
1430                 entry = (vm_map_entry_t) zalloc_noblock(zone);
1431
1432                 if (entry == VM_MAP_ENTRY_NULL) {
1433                         zone = vm_map_entry_reserved_zone;
1434                         entry = (vm_map_entry_t) zalloc(zone);
1435                         OSAddAtomic(1, &reserved_zalloc_count);
1436                 } else {
1437                         OSAddAtomic(1, &nonreserved_zalloc_count);
1438                 }
1439         }
1440
1441         if (entry == VM_MAP_ENTRY_NULL) {
1442                 panic("vm_map_entry_create");
1443         }
1444         *entry = vm_map_entry_template;
1445         entry->from_reserved_zone = (zone == vm_map_entry_reserved_zone);
1446
1447         vm_map_store_update((vm_map_t) NULL, entry, VM_MAP_ENTRY_CREATE);
1448 #if     MAP_ENTRY_CREATION_DEBUG
1449         entry->vme_creation_maphdr = map_header;
1450         backtrace(&entry->vme_creation_bt[0],
1451             (sizeof(entry->vme_creation_bt) / sizeof(uintptr_t)), NULL);
1452 #endif
1453         return entry;
1454 }
1455
1456 /*
1457  *      vm_map_entry_dispose:   [ internal use only ]
1458  *
1459  *      Inverse of vm_map_entry_create.
1460  *
1461  *      write map lock held so no need to
1462  *      do anything special to insure correctness
1463  *      of the stores
1464  */
1465 #define vm_map_entry_dispose(map, entry)                        \
1466         _vm_map_entry_dispose(&(map)->hdr, (entry))
1467
1468 #define vm_map_copy_entry_dispose(copy, entry) \
1469         _vm_map_entry_dispose(&(copy)->cpy_hdr, (entry))
1470
1471 static void
1472 _vm_map_entry_dispose(
1473         struct vm_map_header    *map_header,
1474         vm_map_entry_t          entry)
1475 {
1476         zone_t          zone;
1477
1478         if (map_header->entries_pageable || !(entry->from_reserved_zone)) {
1479                 zone = vm_map_entry_zone;
1480         } else {
1481                 zone = vm_map_entry_reserved_zone;
1482         }
1483
1484         if (!map_header->entries_pageable) {
1485                 if (zone == vm_map_entry_zone) {
1486                         OSAddAtomic(-1, &nonreserved_zalloc_count);
1487                 } else {
1488                         OSAddAtomic(-1, &reserved_zalloc_count);
1489                 }
1490         }
1491
1492         zfree(zone, entry);
1493 }
1494
1495 #if MACH_ASSERT
1496 static boolean_t first_free_check = FALSE;
1497 boolean_t
1498 first_free_is_valid(
1499         vm_map_t        map)
1500 {
1501         if (!first_free_check) {
1502                 return TRUE;
1503         }
1504
1505         return first_free_is_valid_store( map );
1506 }
1507 #endif /* MACH_ASSERT */
1508
1509
1510 #define vm_map_copy_entry_link(copy, after_where, entry)                \
1511         _vm_map_store_entry_link(&(copy)->cpy_hdr, after_where, (entry))
1512
1513 #define vm_map_copy_entry_unlink(copy, entry)                           \
1514         _vm_map_store_entry_unlink(&(copy)->cpy_hdr, (entry))
1515
1516 #if     MACH_ASSERT && TASK_SWAPPER
1517 /*
1518  *      vm_map_res_reference:
1519  *
1520  *      Adds another valid residence count to the given map.
1521  *
1522  *      Map is locked so this function can be called from
1523  *      vm_map_swapin.
1524  *
1525  */
1526 void
1527 vm_map_res_reference(vm_map_t map)
1528 {
1529         /* assert map is locked */
1530         assert(map->res_count >= 0);
1531         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1532         if (map->res_count == 0) {
1533                 lck_mtx_unlock(&map->s_lock);
1534                 vm_map_lock(map);
1535                 vm_map_swapin(map);
1536                 lck_mtx_lock(&map->s_lock);
1537                 ++map->res_count;
1538                 vm_map_unlock(map);
1539         } else {
1540                 ++map->res_count;
1541         }
1542 }
1543
1544 /*
1545  *      vm_map_reference_swap:
1546  *
1547  *      Adds valid reference and residence counts to the given map.
1548  *
1549  *      The map may not be in memory (i.e. zero residence count).
1550  *
1551  */
1552 void
1553 vm_map_reference_swap(vm_map_t map)
1554 {
1555         assert(map != VM_MAP_NULL);
1556         lck_mtx_lock(&map->s_lock);
1557         assert(map->res_count >= 0);
1558         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1559         os_ref_retain_locked(&map->map_refcnt);
1560         vm_map_res_reference(map);
1561         lck_mtx_unlock(&map->s_lock);
1562 }
1563
1564 /*
1565  *      vm_map_res_deallocate:
1566  *
1567  *      Decrement residence count on a map; possibly causing swapout.
1568  *
1569  *      The map must be in memory (i.e. non-zero residence count).
1570  *
1571  *      The map is locked, so this function is callable from vm_map_deallocate.
1572  *
1573  */
1574 void
1575 vm_map_res_deallocate(vm_map_t map)
1576 {
1577         assert(map->res_count > 0);
1578         if (--map->res_count == 0) {
1579                 lck_mtx_unlock(&map->s_lock);
1580                 vm_map_lock(map);
1581                 vm_map_swapout(map);
1582                 vm_map_unlock(map);
1583                 lck_mtx_lock(&map->s_lock);
1584         }
1585         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
1586 }
1587 #endif  /* MACH_ASSERT && TASK_SWAPPER */
1588
1589 /*
1590  *      vm_map_destroy:
1591  *
1592  *      Actually destroy a map.
1593  */
1594 void
1595 vm_map_destroy(
1596         vm_map_t        map,
1597         int             flags)
1598 {
1599         vm_map_lock(map);
1600
1601         /* final cleanup: no need to unnest shared region */
1602         flags |= VM_MAP_REMOVE_NO_UNNESTING;
1603         /* final cleanup: ok to remove immutable mappings */
1604         flags |= VM_MAP_REMOVE_IMMUTABLE;
1605         /* final cleanup: allow gaps in range */
1606         flags |= VM_MAP_REMOVE_GAPS_OK;
1607
1608         /* clean up regular map entries */
1609         (void) vm_map_delete(map, map->min_offset, map->max_offset,
1610             flags, VM_MAP_NULL);
1611         /* clean up leftover special mappings (commpage, GPU carveout, etc...) */
1612 #if     !defined(__arm__)
1613         (void) vm_map_delete(map, 0x0, 0xFFFFFFFFFFFFF000ULL,
1614             flags, VM_MAP_NULL);
1615 #endif /* !__arm__ */
1616
1617         vm_map_disable_hole_optimization(map);
1618         vm_map_corpse_footprint_destroy(map);
1619
1620         vm_map_unlock(map);
1621
1622         assert(map->hdr.nentries == 0);
1623
1624         if (map->pmap) {
1625                 pmap_destroy(map->pmap);
1626         }
1627
1628         if (vm_map_lck_attr.lck_attr_val & LCK_ATTR_DEBUG) {
1629                 /*
1630                  * If lock debugging is enabled the mutexes get tagged as LCK_MTX_TAG_INDIRECT.
1631                  * And this is regardless of whether the lck_mtx_ext_t is embedded in the
1632                  * structure or kalloc'ed via lck_mtx_init.
1633                  * An example is s_lock_ext within struct _vm_map.
1634                  *
1635                  * A lck_mtx_destroy on such a mutex will attempt a kfree and panic. We
1636                  * can add another tag to detect embedded vs alloc'ed indirect external
1637                  * mutexes but that'll be additional checks in the lock path and require
1638                  * updating dependencies for the old vs new tag.
1639                  *
1640                  * Since the kfree() is for LCK_MTX_TAG_INDIRECT mutexes and that tag is applied
1641                  * just when lock debugging is ON, we choose to forego explicitly destroying
1642                  * the vm_map mutex and rw lock and, as a consequence, will overflow the reference
1643                  * count on vm_map_lck_grp, which has no serious side-effect.
1644                  */
1645         } else {
1646                 lck_rw_destroy(&(map)->lock, &vm_map_lck_grp);
1647                 lck_mtx_destroy(&(map)->s_lock, &vm_map_lck_grp);
1648         }
1649
1650         zfree(vm_map_zone, map);
1651 }
1652
1653 /*
1654  * Returns pid of the task with the largest number of VM map entries.
1655  * Used in the zone-map-exhaustion jetsam path.
1656  */
1657 pid_t
1658 find_largest_process_vm_map_entries(void)
1659 {
1660         pid_t victim_pid = -1;
1661         int max_vm_map_entries = 0;
1662         task_t task = TASK_NULL;
1663         queue_head_t *task_list = &tasks;
1664
1665         lck_mtx_lock(&tasks_threads_lock);
1666         queue_iterate(task_list, task, task_t, tasks) {
1667                 if (task == kernel_task || !task->active) {
1668                         continue;
1669                 }
1670
1671                 vm_map_t task_map = task->map;
1672                 if (task_map != VM_MAP_NULL) {
1673                         int task_vm_map_entries = task_map->hdr.nentries;
1674                         if (task_vm_map_entries > max_vm_map_entries) {
1675                                 max_vm_map_entries = task_vm_map_entries;
1676                                 victim_pid = pid_from_task(task);
1677                         }
1678                 }
1679         }
1680         lck_mtx_unlock(&tasks_threads_lock);
1681
1682         printf("zone_map_exhaustion: victim pid %d, vm region count: %d\n", victim_pid, max_vm_map_entries);
1683         return victim_pid;
1684 }
1685
1686 #if     TASK_SWAPPER
1687 /*
1688  * vm_map_swapin/vm_map_swapout
1689  *
1690  * Swap a map in and out, either referencing or releasing its resources.
1691  * These functions are internal use only; however, they must be exported
1692  * because they may be called from macros, which are exported.
1693  *
1694  * In the case of swapout, there could be races on the residence count,
1695  * so if the residence count is up, we return, assuming that a
1696  * vm_map_deallocate() call in the near future will bring us back.
1697  *
1698  * Locking:
1699  *      -- We use the map write lock for synchronization among races.
1700  *      -- The map write lock, and not the simple s_lock, protects the
1701  *         swap state of the map.
1702  *      -- If a map entry is a share map, then we hold both locks, in
1703  *         hierarchical order.
1704  *
1705  * Synchronization Notes:
1706  *      1) If a vm_map_swapin() call happens while swapout in progress, it
1707  *      will block on the map lock and proceed when swapout is through.
1708  *      2) A vm_map_reference() call at this time is illegal, and will
1709  *      cause a panic.  vm_map_reference() is only allowed on resident
1710  *      maps, since it refuses to block.
1711  *      3) A vm_map_swapin() call during a swapin will block, and
1712  *      proceeed when the first swapin is done, turning into a nop.
1713  *      This is the reason the res_count is not incremented until
1714  *      after the swapin is complete.
1715  *      4) There is a timing hole after the checks of the res_count, before
1716  *      the map lock is taken, during which a swapin may get the lock
1717  *      before a swapout about to happen.  If this happens, the swapin
1718  *      will detect the state and increment the reference count, causing
1719  *      the swapout to be a nop, thereby delaying it until a later
1720  *      vm_map_deallocate.  If the swapout gets the lock first, then
1721  *      the swapin will simply block until the swapout is done, and
1722  *      then proceed.
1723  *
1724  * Because vm_map_swapin() is potentially an expensive operation, it
1725  * should be used with caution.
1726  *
1727  * Invariants:
1728  *      1) A map with a residence count of zero is either swapped, or
1729  *         being swapped.
1730  *      2) A map with a non-zero residence count is either resident,
1731  *         or being swapped in.
1732  */
1733
1734 int vm_map_swap_enable = 1;
1735
1736 void
1737 vm_map_swapin(vm_map_t map)
1738 {
1739         vm_map_entry_t entry;
1740
1741         if (!vm_map_swap_enable) {      /* debug */
1742                 return;
1743         }
1744
1745         /*
1746          * Map is locked
1747          * First deal with various races.
1748          */
1749         if (map->sw_state == MAP_SW_IN) {
1750                 /*
1751                  * we raced with swapout and won.  Returning will incr.
1752                  * the res_count, turning the swapout into a nop.
1753                  */
1754                 return;
1755         }
1756
1757         /*
1758          * The residence count must be zero.  If we raced with another
1759          * swapin, the state would have been IN; if we raced with a
1760          * swapout (after another competing swapin), we must have lost
1761          * the race to get here (see above comment), in which case
1762          * res_count is still 0.
1763          */
1764         assert(map->res_count == 0);
1765
1766         /*
1767          * There are no intermediate states of a map going out or
1768          * coming in, since the map is locked during the transition.
1769          */
1770         assert(map->sw_state == MAP_SW_OUT);
1771
1772         /*
1773          * We now operate upon each map entry.  If the entry is a sub-
1774          * or share-map, we call vm_map_res_reference upon it.
1775          * If the entry is an object, we call vm_object_res_reference
1776          * (this may iterate through the shadow chain).
1777          * Note that we hold the map locked the entire time,
1778          * even if we get back here via a recursive call in
1779          * vm_map_res_reference.
1780          */
1781         entry = vm_map_first_entry(map);
1782
1783         while (entry != vm_map_to_entry(map)) {
1784                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1785                         if (entry->is_sub_map) {
1786                                 vm_map_t lmap = VME_SUBMAP(entry);
1787                                 lck_mtx_lock(&lmap->s_lock);
1788                                 vm_map_res_reference(lmap);
1789                                 lck_mtx_unlock(&lmap->s_lock);
1790                         } else {
1791                                 vm_object_t object = VME_OBEJCT(entry);
1792                                 vm_object_lock(object);
1793                                 /*
1794                                  * This call may iterate through the
1795                                  * shadow chain.
1796                                  */
1797                                 vm_object_res_reference(object);
1798                                 vm_object_unlock(object);
1799                         }
1800                 }
1801                 entry = entry->vme_next;
1802         }
1803         assert(map->sw_state == MAP_SW_OUT);
1804         map->sw_state = MAP_SW_IN;
1805 }
1806
1807 void
1808 vm_map_swapout(vm_map_t map)
1809 {
1810         vm_map_entry_t entry;
1811
1812         /*
1813          * Map is locked
1814          * First deal with various races.
1815          * If we raced with a swapin and lost, the residence count
1816          * will have been incremented to 1, and we simply return.
1817          */
1818         lck_mtx_lock(&map->s_lock);
1819         if (map->res_count != 0) {
1820                 lck_mtx_unlock(&map->s_lock);
1821                 return;
1822         }
1823         lck_mtx_unlock(&map->s_lock);
1824
1825         /*
1826          * There are no intermediate states of a map going out or
1827          * coming in, since the map is locked during the transition.
1828          */
1829         assert(map->sw_state == MAP_SW_IN);
1830
1831         if (!vm_map_swap_enable) {
1832                 return;
1833         }
1834
1835         /*
1836          * We now operate upon each map entry.  If the entry is a sub-
1837          * or share-map, we call vm_map_res_deallocate upon it.
1838          * If the entry is an object, we call vm_object_res_deallocate
1839          * (this may iterate through the shadow chain).
1840          * Note that we hold the map locked the entire time,
1841          * even if we get back here via a recursive call in
1842          * vm_map_res_deallocate.
1843          */
1844         entry = vm_map_first_entry(map);
1845
1846         while (entry != vm_map_to_entry(map)) {
1847                 if (VME_OBJECT(entry) != VM_OBJECT_NULL) {
1848                         if (entry->is_sub_map) {
1849                                 vm_map_t lmap = VME_SUBMAP(entry);
1850                                 lck_mtx_lock(&lmap->s_lock);
1851                                 vm_map_res_deallocate(lmap);
1852                                 lck_mtx_unlock(&lmap->s_lock);
1853                         } else {
1854                                 vm_object_t object = VME_OBJECT(entry);
1855                                 vm_object_lock(object);
1856                                 /*
1857                                  * This call may take a long time,
1858                                  * since it could actively push
1859                                  * out pages (if we implement it
1860                                  * that way).
1861                                  */
1862                                 vm_object_res_deallocate(object);
1863                                 vm_object_unlock(object);
1864                         }
1865                 }
1866                 entry = entry->vme_next;
1867         }
1868         assert(map->sw_state == MAP_SW_IN);
1869         map->sw_state = MAP_SW_OUT;
1870 }
1871
1872 #endif  /* TASK_SWAPPER */
1873
1874 /*
1875  *      vm_map_lookup_entry:    [ internal use only ]
1876  *
1877  *      Calls into the vm map store layer to find the map
1878  *      entry containing (or immediately preceding) the
1879  *      specified address in the given map; the entry is returned
1880  *      in the "entry" parameter.  The boolean
1881  *      result indicates whether the address is
1882  *      actually contained in the map.
1883  */
1884 boolean_t
1885 vm_map_lookup_entry(
1886         vm_map_t                map,
1887         vm_map_offset_t address,
1888         vm_map_entry_t          *entry)         /* OUT */
1889 {
1890         return vm_map_store_lookup_entry( map, address, entry );
1891 }
1892
1893 /*
1894  *      Routine:        vm_map_find_space
1895  *      Purpose:
1896  *              Allocate a range in the specified virtual address map,
1897  *              returning the entry allocated for that range.
1898  *              Used by kmem_alloc, etc.
1899  *
1900  *              The map must be NOT be locked. It will be returned locked
1901  *              on KERN_SUCCESS, unlocked on failure.
1902  *
1903  *              If an entry is allocated, the object/offset fields
1904  *              are initialized to zero.
1905  *
1906  *      If VM_MAP_FIND_LAST_FREE flag is set, allocate from end of map. This
1907  *      is currently only used for allocating memory for zones backing
1908  *      one of the kalloc heaps.(rdar://65832263)
1909  */
1910 kern_return_t
1911 vm_map_find_space(
1912         vm_map_t                map,
1913         vm_map_offset_t         *address,       /* OUT */
1914         vm_map_size_t           size,
1915         vm_map_offset_t         mask,
1916         int                     flags,
1917         vm_map_kernel_flags_t   vmk_flags,
1918         vm_tag_t                tag,
1919         vm_map_entry_t          *o_entry)       /* OUT */
1920 {
1921         vm_map_entry_t          entry, new_entry, hole_entry;
1922         vm_map_offset_t         start;
1923         vm_map_offset_t         end;
1924
1925         if (size == 0) {
1926                 *address = 0;
1927                 return KERN_INVALID_ARGUMENT;
1928         }
1929
1930         new_entry = vm_map_entry_create(map, FALSE);
1931         vm_map_lock(map);
1932
1933         if (flags & VM_MAP_FIND_LAST_FREE) {
1934                 assert(!map->disable_vmentry_reuse);
1935                 /* TODO: Make backward lookup generic and support guard pages */
1936                 assert(!vmk_flags.vmkf_guard_after && !vmk_flags.vmkf_guard_before);
1937                 assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
1938
1939                 /* Allocate space from end of map */
1940                 vm_map_store_find_last_free(map, &entry);
1941
1942                 if (!entry) {
1943                         goto noSpace;
1944                 }
1945
1946                 if (entry == vm_map_to_entry(map)) {
1947                         end = map->max_offset;
1948                 } else {
1949                         end = entry->vme_start;
1950                 }
1951
1952                 while (TRUE) {
1953                         vm_map_entry_t prev;
1954
1955                         start = end - size;
1956
1957                         if ((start < map->min_offset) || end < start) {
1958                                 goto noSpace;
1959                         }
1960
1961                         prev = entry->vme_prev;
1962                         entry = prev;
1963
1964                         if (prev == vm_map_to_entry(map)) {
1965                                 break;
1966                         }
1967
1968                         if (prev->vme_end <= start) {
1969                                 break;
1970                         }
1971
1972                         /*
1973                          *      Didn't fit -- move to the next entry.
1974                          */
1975
1976                         end = entry->vme_start;
1977                 }
1978         } else {
1979                 if (vmk_flags.vmkf_guard_after) {
1980                         /* account for the back guard page in the size */
1981                         size += VM_MAP_PAGE_SIZE(map);
1982                 }
1983
1984                 /*
1985                  *      Look for the first possible address; if there's already
1986                  *      something at this address, we have to start after it.
1987                  */
1988
1989                 if (map->disable_vmentry_reuse == TRUE) {
1990                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
1991                 } else {
1992                         if (map->holelistenabled) {
1993                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
1994
1995                                 if (hole_entry == NULL) {
1996                                         /*
1997                                          * No more space in the map?
1998                                          */
1999                                         goto noSpace;
2000                                 }
2001
2002                                 entry = hole_entry;
2003                                 start = entry->vme_start;
2004                         } else {
2005                                 assert(first_free_is_valid(map));
2006                                 if ((entry = map->first_free) == vm_map_to_entry(map)) {
2007                                         start = map->min_offset;
2008                                 } else {
2009                                         start = entry->vme_end;
2010                                 }
2011                         }
2012                 }
2013
2014                 /*
2015                  *      In any case, the "entry" always precedes
2016                  *      the proposed new region throughout the loop:
2017                  */
2018
2019                 while (TRUE) {
2020                         vm_map_entry_t  next;
2021
2022                         /*
2023                          *      Find the end of the proposed new region.
2024                          *      Be sure we didn't go beyond the end, or
2025                          *      wrap around the address.
2026                          */
2027
2028                         if (vmk_flags.vmkf_guard_before) {
2029                                 /* reserve space for the front guard page */
2030                                 start += VM_MAP_PAGE_SIZE(map);
2031                         }
2032                         end = ((start + mask) & ~mask);
2033
2034                         if (end < start) {
2035                                 goto noSpace;
2036                         }
2037                         start = end;
2038                         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
2039                         end += size;
2040                         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
2041
2042                         if ((end > map->max_offset) || (end < start)) {
2043                                 goto noSpace;
2044                         }
2045
2046                         next = entry->vme_next;
2047
2048                         if (map->holelistenabled) {
2049                                 if (entry->vme_end >= end) {
2050                                         break;
2051                                 }
2052                         } else {
2053                                 /*
2054                                  *      If there are no more entries, we must win.
2055                                  *
2056                                  *      OR
2057                                  *
2058                                  *      If there is another entry, it must be
2059                                  *      after the end of the potential new region.
2060                                  */
2061
2062                                 if (next == vm_map_to_entry(map)) {
2063                                         break;
2064                                 }
2065
2066                                 if (next->vme_start >= end) {
2067                                         break;
2068                                 }
2069                         }
2070
2071                         /*
2072                          *      Didn't fit -- move to the next entry.
2073                          */
2074
2075                         entry = next;
2076
2077                         if (map->holelistenabled) {
2078                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2079                                         /*
2080                                          * Wrapped around
2081                                          */
2082                                         goto noSpace;
2083                                 }
2084                                 start = entry->vme_start;
2085                         } else {
2086                                 start = entry->vme_end;
2087                         }
2088                 }
2089
2090                 if (vmk_flags.vmkf_guard_before) {
2091                         /* go back for the front guard page */
2092                         start -= VM_MAP_PAGE_SIZE(map);
2093                 }
2094         }
2095
2096         if (map->holelistenabled) {
2097                 if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2098                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2099                 }
2100         }
2101
2102         /*
2103          *      At this point,
2104          *              "start" and "end" should define the endpoints of the
2105          *                      available new range, and
2106          *              "entry" should refer to the region before the new
2107          *                      range, and
2108          *
2109          *              the map should be locked.
2110          */
2111
2112         *address = start;
2113
2114         assert(start < end);
2115         new_entry->vme_start = start;
2116         new_entry->vme_end = end;
2117         assert(page_aligned(new_entry->vme_start));
2118         assert(page_aligned(new_entry->vme_end));
2119         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
2120             VM_MAP_PAGE_MASK(map)));
2121         assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
2122             VM_MAP_PAGE_MASK(map)));
2123
2124         new_entry->is_shared = FALSE;
2125         new_entry->is_sub_map = FALSE;
2126         new_entry->use_pmap = TRUE;
2127         VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
2128         VME_OFFSET_SET(new_entry, (vm_object_offset_t) 0);
2129
2130         new_entry->needs_copy = FALSE;
2131
2132         new_entry->inheritance = VM_INHERIT_DEFAULT;
2133         new_entry->protection = VM_PROT_DEFAULT;
2134         new_entry->max_protection = VM_PROT_ALL;
2135         new_entry->behavior = VM_BEHAVIOR_DEFAULT;
2136         new_entry->wired_count = 0;
2137         new_entry->user_wired_count = 0;
2138
2139         new_entry->in_transition = FALSE;
2140         new_entry->needs_wakeup = FALSE;
2141         new_entry->no_cache = FALSE;
2142         new_entry->permanent = FALSE;
2143         new_entry->superpage_size = FALSE;
2144         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
2145                 new_entry->map_aligned = TRUE;
2146         } else {
2147                 new_entry->map_aligned = FALSE;
2148         }
2149
2150         new_entry->used_for_jit = FALSE;
2151         new_entry->pmap_cs_associated = FALSE;
2152         new_entry->zero_wired_pages = FALSE;
2153         new_entry->iokit_acct = FALSE;
2154         new_entry->vme_resilient_codesign = FALSE;
2155         new_entry->vme_resilient_media = FALSE;
2156         if (vmk_flags.vmkf_atomic_entry) {
2157                 new_entry->vme_atomic = TRUE;
2158         } else {
2159                 new_entry->vme_atomic = FALSE;
2160         }
2161
2162         VME_ALIAS_SET(new_entry, tag);
2163
2164         /*
2165          *      Insert the new entry into the list
2166          */
2167
2168         vm_map_store_entry_link(map, entry, new_entry, VM_MAP_KERNEL_FLAGS_NONE);
2169
2170         map->size += size;
2171
2172         /*
2173          *      Update the lookup hint
2174          */
2175         SAVE_HINT_MAP_WRITE(map, new_entry);
2176
2177         *o_entry = new_entry;
2178         return KERN_SUCCESS;
2179
2180 noSpace:
2181
2182         vm_map_entry_dispose(map, new_entry);
2183         vm_map_unlock(map);
2184         return KERN_NO_SPACE;
2185 }
2186
2187 int vm_map_pmap_enter_print = FALSE;
2188 int vm_map_pmap_enter_enable = FALSE;
2189
2190 /*
2191  *      Routine:        vm_map_pmap_enter [internal only]
2192  *
2193  *      Description:
2194  *              Force pages from the specified object to be entered into
2195  *              the pmap at the specified address if they are present.
2196  *              As soon as a page not found in the object the scan ends.
2197  *
2198  *      Returns:
2199  *              Nothing.
2200  *
2201  *      In/out conditions:
2202  *              The source map should not be locked on entry.
2203  */
2204 __unused static void
2205 vm_map_pmap_enter(
2206         vm_map_t                map,
2207         vm_map_offset_t         addr,
2208         vm_map_offset_t         end_addr,
2209         vm_object_t             object,
2210         vm_object_offset_t      offset,
2211         vm_prot_t               protection)
2212 {
2213         int                     type_of_fault;
2214         kern_return_t           kr;
2215         struct vm_object_fault_info fault_info = {};
2216
2217         if (map->pmap == 0) {
2218                 return;
2219         }
2220
2221         assert(VM_MAP_PAGE_SHIFT(map) == PAGE_SHIFT);
2222
2223         while (addr < end_addr) {
2224                 vm_page_t       m;
2225
2226
2227                 /*
2228                  * TODO:
2229                  * From vm_map_enter(), we come into this function without the map
2230                  * lock held or the object lock held.
2231                  * We haven't taken a reference on the object either.
2232                  * We should do a proper lookup on the map to make sure
2233                  * that things are sane before we go locking objects that
2234                  * could have been deallocated from under us.
2235                  */
2236
2237                 vm_object_lock(object);
2238
2239                 m = vm_page_lookup(object, offset);
2240
2241                 if (m == VM_PAGE_NULL || m->vmp_busy || m->vmp_fictitious ||
2242                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_absent))) {
2243                         vm_object_unlock(object);
2244                         return;
2245                 }
2246
2247                 if (vm_map_pmap_enter_print) {
2248                         printf("vm_map_pmap_enter:");
2249                         printf("map: %p, addr: %llx, object: %p, offset: %llx\n",
2250                             map, (unsigned long long)addr, object, (unsigned long long)offset);
2251                 }
2252                 type_of_fault = DBG_CACHE_HIT_FAULT;
2253                 kr = vm_fault_enter(m, map->pmap,
2254                     addr,
2255                     PAGE_SIZE, 0,
2256                     protection, protection,
2257                     VM_PAGE_WIRED(m),
2258                     FALSE,                 /* change_wiring */
2259                     VM_KERN_MEMORY_NONE,                 /* tag - not wiring */
2260                     &fault_info,
2261                     NULL,                  /* need_retry */
2262                     &type_of_fault);
2263
2264                 vm_object_unlock(object);
2265
2266                 offset += PAGE_SIZE_64;
2267                 addr += PAGE_SIZE;
2268         }
2269 }
2270
2271 boolean_t vm_map_pmap_is_empty(
2272         vm_map_t        map,
2273         vm_map_offset_t start,
2274         vm_map_offset_t end);
2275 boolean_t
2276 vm_map_pmap_is_empty(
2277         vm_map_t        map,
2278         vm_map_offset_t start,
2279         vm_map_offset_t end)
2280 {
2281 #ifdef MACHINE_PMAP_IS_EMPTY
2282         return pmap_is_empty(map->pmap, start, end);
2283 #else   /* MACHINE_PMAP_IS_EMPTY */
2284         vm_map_offset_t offset;
2285         ppnum_t         phys_page;
2286
2287         if (map->pmap == NULL) {
2288                 return TRUE;
2289         }
2290
2291         for (offset = start;
2292             offset < end;
2293             offset += PAGE_SIZE) {
2294                 phys_page = pmap_find_phys(map->pmap, offset);
2295                 if (phys_page) {
2296                         kprintf("vm_map_pmap_is_empty(%p,0x%llx,0x%llx): "
2297                             "page %d at 0x%llx\n",
2298                             map, (long long)start, (long long)end,
2299                             phys_page, (long long)offset);
2300                         return FALSE;
2301                 }
2302         }
2303         return TRUE;
2304 #endif  /* MACHINE_PMAP_IS_EMPTY */
2305 }
2306
2307 #define MAX_TRIES_TO_GET_RANDOM_ADDRESS 1000
2308 kern_return_t
2309 vm_map_random_address_for_size(
2310         vm_map_t        map,
2311         vm_map_offset_t *address,
2312         vm_map_size_t   size)
2313 {
2314         kern_return_t   kr = KERN_SUCCESS;
2315         int             tries = 0;
2316         vm_map_offset_t random_addr = 0;
2317         vm_map_offset_t hole_end;
2318
2319         vm_map_entry_t  next_entry = VM_MAP_ENTRY_NULL;
2320         vm_map_entry_t  prev_entry = VM_MAP_ENTRY_NULL;
2321         vm_map_size_t   vm_hole_size = 0;
2322         vm_map_size_t   addr_space_size;
2323
2324         addr_space_size = vm_map_max(map) - vm_map_min(map);
2325
2326         assert(VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map)));
2327
2328         while (tries < MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2329                 random_addr = ((vm_map_offset_t)random()) << VM_MAP_PAGE_SHIFT(map);
2330                 random_addr = vm_map_trunc_page(
2331                         vm_map_min(map) + (random_addr % addr_space_size),
2332                         VM_MAP_PAGE_MASK(map));
2333
2334                 if (vm_map_lookup_entry(map, random_addr, &prev_entry) == FALSE) {
2335                         if (prev_entry == vm_map_to_entry(map)) {
2336                                 next_entry = vm_map_first_entry(map);
2337                         } else {
2338                                 next_entry = prev_entry->vme_next;
2339                         }
2340                         if (next_entry == vm_map_to_entry(map)) {
2341                                 hole_end = vm_map_max(map);
2342                         } else {
2343                                 hole_end = next_entry->vme_start;
2344                         }
2345                         vm_hole_size = hole_end - random_addr;
2346                         if (vm_hole_size >= size) {
2347                                 *address = random_addr;
2348                                 break;
2349                         }
2350                 }
2351                 tries++;
2352         }
2353
2354         if (tries == MAX_TRIES_TO_GET_RANDOM_ADDRESS) {
2355                 kr = KERN_NO_SPACE;
2356         }
2357         return kr;
2358 }
2359
2360 static boolean_t
2361 vm_memory_malloc_no_cow(
2362         int alias)
2363 {
2364         uint64_t alias_mask;
2365
2366         if (alias > 63) {
2367                 return FALSE;
2368         }
2369
2370         alias_mask = 1ULL << alias;
2371         if (alias_mask & vm_memory_malloc_no_cow_mask) {
2372                 return TRUE;
2373         }
2374         return FALSE;
2375 }
2376
2377 /*
2378  *      Routine:        vm_map_enter
2379  *
2380  *      Description:
2381  *              Allocate a range in the specified virtual address map.
2382  *              The resulting range will refer to memory defined by
2383  *              the given memory object and offset into that object.
2384  *
2385  *              Arguments are as defined in the vm_map call.
2386  */
2387 static unsigned int vm_map_enter_restore_successes = 0;
2388 static unsigned int vm_map_enter_restore_failures = 0;
2389 kern_return_t
2390 vm_map_enter(
2391         vm_map_t                map,
2392         vm_map_offset_t         *address,       /* IN/OUT */
2393         vm_map_size_t           size,
2394         vm_map_offset_t         mask,
2395         int                     flags,
2396         vm_map_kernel_flags_t   vmk_flags,
2397         vm_tag_t                alias,
2398         vm_object_t             object,
2399         vm_object_offset_t      offset,
2400         boolean_t               needs_copy,
2401         vm_prot_t               cur_protection,
2402         vm_prot_t               max_protection,
2403         vm_inherit_t            inheritance)
2404 {
2405         vm_map_entry_t          entry, new_entry;
2406         vm_map_offset_t         start, tmp_start, tmp_offset;
2407         vm_map_offset_t         end, tmp_end;
2408         vm_map_offset_t         tmp2_start, tmp2_end;
2409         vm_map_offset_t         desired_empty_end;
2410         vm_map_offset_t         step;
2411         kern_return_t           result = KERN_SUCCESS;
2412         vm_map_t                zap_old_map = VM_MAP_NULL;
2413         vm_map_t                zap_new_map = VM_MAP_NULL;
2414         boolean_t               map_locked = FALSE;
2415         boolean_t               pmap_empty = TRUE;
2416         boolean_t               new_mapping_established = FALSE;
2417         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
2418         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
2419         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
2420         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
2421         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
2422         boolean_t               is_submap = vmk_flags.vmkf_submap;
2423         boolean_t               permanent = vmk_flags.vmkf_permanent;
2424         boolean_t               no_copy_on_read = vmk_flags.vmkf_no_copy_on_read;
2425         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
2426         boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
2427         boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
2428         boolean_t               resilient_codesign = ((flags & VM_FLAGS_RESILIENT_CODESIGN) != 0);
2429         boolean_t               resilient_media = ((flags & VM_FLAGS_RESILIENT_MEDIA) != 0);
2430         boolean_t               random_address = ((flags & VM_FLAGS_RANDOM_ADDR) != 0);
2431         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
2432         vm_tag_t                user_alias;
2433         vm_map_offset_t         effective_min_offset, effective_max_offset;
2434         kern_return_t           kr;
2435         boolean_t               clear_map_aligned = FALSE;
2436         vm_map_entry_t          hole_entry;
2437         vm_map_size_t           chunk_size = 0;
2438
2439         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
2440
2441         if (flags & VM_FLAGS_4GB_CHUNK) {
2442 #if defined(__LP64__)
2443                 chunk_size = (4ULL * 1024 * 1024 * 1024); /* max. 4GB chunks for the new allocation */
2444 #else /* __LP64__ */
2445                 chunk_size = ANON_CHUNK_SIZE;
2446 #endif /* __LP64__ */
2447         } else {
2448                 chunk_size = ANON_CHUNK_SIZE;
2449         }
2450
2451         if (superpage_size) {
2452                 switch (superpage_size) {
2453                         /*
2454                          * Note that the current implementation only supports
2455                          * a single size for superpages, SUPERPAGE_SIZE, per
2456                          * architecture. As soon as more sizes are supposed
2457                          * to be supported, SUPERPAGE_SIZE has to be replaced
2458                          * with a lookup of the size depending on superpage_size.
2459                          */
2460 #ifdef __x86_64__
2461                 case SUPERPAGE_SIZE_ANY:
2462                         /* handle it like 2 MB and round up to page size */
2463                         size = (size + 2 * 1024 * 1024 - 1) & ~(2 * 1024 * 1024 - 1);
2464                         OS_FALLTHROUGH;
2465                 case SUPERPAGE_SIZE_2MB:
2466                         break;
2467 #endif
2468                 default:
2469                         return KERN_INVALID_ARGUMENT;
2470                 }
2471                 mask = SUPERPAGE_SIZE - 1;
2472                 if (size & (SUPERPAGE_SIZE - 1)) {
2473                         return KERN_INVALID_ARGUMENT;
2474                 }
2475                 inheritance = VM_INHERIT_NONE;  /* fork() children won't inherit superpages */
2476         }
2477
2478
2479         if ((cur_protection & VM_PROT_WRITE) &&
2480             (cur_protection & VM_PROT_EXECUTE) &&
2481 #if XNU_TARGET_OS_OSX
2482             map->pmap != kernel_pmap &&
2483             (cs_process_global_enforcement() ||
2484             (vmk_flags.vmkf_cs_enforcement_override
2485             ? vmk_flags.vmkf_cs_enforcement
2486             : (vm_map_cs_enforcement(map)
2487 #if __arm64__
2488             || !VM_MAP_IS_EXOTIC(map)
2489 #endif /* __arm64__ */
2490             ))) &&
2491 #endif /* XNU_TARGET_OS_OSX */
2492 #if PMAP_CS
2493             !pmap_cs_exempt(map->pmap) &&
2494 #endif
2495             (VM_MAP_POLICY_WX_FAIL(map) ||
2496             VM_MAP_POLICY_WX_STRIP_X(map)) &&
2497             !entry_for_jit) {
2498                 boolean_t vm_protect_wx_fail = VM_MAP_POLICY_WX_FAIL(map);
2499
2500                 DTRACE_VM3(cs_wx,
2501                     uint64_t, 0,
2502                     uint64_t, 0,
2503                     vm_prot_t, cur_protection);
2504                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. %s\n",
2505                     proc_selfpid(),
2506                     (current_task()->bsd_info
2507                     ? proc_name_address(current_task()->bsd_info)
2508                     : "?"),
2509                     __FUNCTION__,
2510                     (vm_protect_wx_fail ? "failing" : "turning off execute"));
2511                 cur_protection &= ~VM_PROT_EXECUTE;
2512                 if (vm_protect_wx_fail) {
2513                         return KERN_PROTECTION_FAILURE;
2514                 }
2515         }
2516
2517         /*
2518          * If the task has requested executable lockdown,
2519          * deny any new executable mapping.
2520          */
2521         if (map->map_disallow_new_exec == TRUE) {
2522                 if (cur_protection & VM_PROT_EXECUTE) {
2523                         return KERN_PROTECTION_FAILURE;
2524                 }
2525         }
2526
2527         if (resilient_codesign) {
2528                 assert(!is_submap);
2529                 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
2530                 if ((cur_protection | max_protection) & reject_prot) {
2531                         return KERN_PROTECTION_FAILURE;
2532                 }
2533         }
2534
2535         if (resilient_media) {
2536                 assert(!is_submap);
2537 //              assert(!needs_copy);
2538                 if (object != VM_OBJECT_NULL &&
2539                     !object->internal) {
2540                         /*
2541                          * This mapping is directly backed by an external
2542                          * memory manager (e.g. a vnode pager for a file):
2543                          * we would not have any safe place to inject
2544                          * a zero-filled page if an actual page is not
2545                          * available, without possibly impacting the actual
2546                          * contents of the mapped object (e.g. the file),
2547                          * so we can't provide any media resiliency here.
2548                          */
2549                         return KERN_INVALID_ARGUMENT;
2550                 }
2551         }
2552
2553         if (is_submap) {
2554                 if (purgable) {
2555                         /* submaps can not be purgeable */
2556                         return KERN_INVALID_ARGUMENT;
2557                 }
2558                 if (object == VM_OBJECT_NULL) {
2559                         /* submaps can not be created lazily */
2560                         return KERN_INVALID_ARGUMENT;
2561                 }
2562         }
2563         if (vmk_flags.vmkf_already) {
2564                 /*
2565                  * VM_FLAGS_ALREADY says that it's OK if the same mapping
2566                  * is already present.  For it to be meaningul, the requested
2567                  * mapping has to be at a fixed address (!VM_FLAGS_ANYWHERE) and
2568                  * we shouldn't try and remove what was mapped there first
2569                  * (!VM_FLAGS_OVERWRITE).
2570                  */
2571                 if ((flags & VM_FLAGS_ANYWHERE) ||
2572                     (flags & VM_FLAGS_OVERWRITE)) {
2573                         return KERN_INVALID_ARGUMENT;
2574                 }
2575         }
2576
2577         effective_min_offset = map->min_offset;
2578
2579         if (vmk_flags.vmkf_beyond_max) {
2580                 /*
2581                  * Allow an insertion beyond the map's max offset.
2582                  */
2583 #if     !defined(__arm__)
2584                 if (vm_map_is_64bit(map)) {
2585                         effective_max_offset = 0xFFFFFFFFFFFFF000ULL;
2586                 } else
2587 #endif  /* __arm__ */
2588                 effective_max_offset = 0x00000000FFFFF000ULL;
2589         } else {
2590 #if XNU_TARGET_OS_OSX
2591                 if (__improbable(vmk_flags.vmkf_32bit_map_va)) {
2592                         effective_max_offset = MIN(map->max_offset, 0x00000000FFFFF000ULL);
2593                 } else {
2594                         effective_max_offset = map->max_offset;
2595                 }
2596 #else /* XNU_TARGET_OS_OSX */
2597                 effective_max_offset = map->max_offset;
2598 #endif /* XNU_TARGET_OS_OSX */
2599         }
2600
2601         if (size == 0 ||
2602             (offset & MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK_64)) != 0) {
2603                 *address = 0;
2604                 return KERN_INVALID_ARGUMENT;
2605         }
2606
2607         if (map->pmap == kernel_pmap) {
2608                 user_alias = VM_KERN_MEMORY_NONE;
2609         } else {
2610                 user_alias = alias;
2611         }
2612
2613         if (user_alias == VM_MEMORY_MALLOC_MEDIUM) {
2614                 chunk_size = MALLOC_MEDIUM_CHUNK_SIZE;
2615         }
2616
2617 #define RETURN(value)   { result = value; goto BailOut; }
2618
2619         assertf(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK), "0x%llx", (uint64_t)*address);
2620         assertf(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK), "0x%llx", (uint64_t)size);
2621         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
2622                 assertf(page_aligned(*address), "0x%llx", (uint64_t)*address);
2623                 assertf(page_aligned(size), "0x%llx", (uint64_t)size);
2624         }
2625
2626         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2627             !VM_MAP_PAGE_ALIGNED(size, VM_MAP_PAGE_MASK(map))) {
2628                 /*
2629                  * In most cases, the caller rounds the size up to the
2630                  * map's page size.
2631                  * If we get a size that is explicitly not map-aligned here,
2632                  * we'll have to respect the caller's wish and mark the
2633                  * mapping as "not map-aligned" to avoid tripping the
2634                  * map alignment checks later.
2635                  */
2636                 clear_map_aligned = TRUE;
2637         }
2638         if (!anywhere &&
2639             VM_MAP_PAGE_MASK(map) >= PAGE_MASK &&
2640             !VM_MAP_PAGE_ALIGNED(*address, VM_MAP_PAGE_MASK(map))) {
2641                 /*
2642                  * We've been asked to map at a fixed address and that
2643                  * address is not aligned to the map's specific alignment.
2644                  * The caller should know what it's doing (i.e. most likely
2645                  * mapping some fragmented copy map, transferring memory from
2646                  * a VM map with a different alignment), so clear map_aligned
2647                  * for this new VM map entry and proceed.
2648                  */
2649                 clear_map_aligned = TRUE;
2650         }
2651
2652         /*
2653          * Only zero-fill objects are allowed to be purgable.
2654          * LP64todo - limit purgable objects to 32-bits for now
2655          */
2656         if (purgable &&
2657             (offset != 0 ||
2658             (object != VM_OBJECT_NULL &&
2659             (object->vo_size != size ||
2660             object->purgable == VM_PURGABLE_DENY))
2661             || size > ANON_MAX_SIZE)) { /* LP64todo: remove when dp capable */
2662                 return KERN_INVALID_ARGUMENT;
2663         }
2664
2665         if (!anywhere && overwrite) {
2666                 /*
2667                  * Create a temporary VM map to hold the old mappings in the
2668                  * affected area while we create the new one.
2669                  * This avoids releasing the VM map lock in
2670                  * vm_map_entry_delete() and allows atomicity
2671                  * when we want to replace some mappings with a new one.
2672                  * It also allows us to restore the old VM mappings if the
2673                  * new mapping fails.
2674                  */
2675                 zap_old_map = vm_map_create(PMAP_NULL,
2676                     *address,
2677                     *address + size,
2678                     map->hdr.entries_pageable);
2679                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
2680                 vm_map_disable_hole_optimization(zap_old_map);
2681         }
2682
2683 StartAgain:;
2684
2685         start = *address;
2686
2687         if (anywhere) {
2688                 vm_map_lock(map);
2689                 map_locked = TRUE;
2690
2691                 if (entry_for_jit) {
2692                         if (map->jit_entry_exists &&
2693                             !VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
2694                                 result = KERN_INVALID_ARGUMENT;
2695                                 goto BailOut;
2696                         }
2697                         if (VM_MAP_POLICY_ALLOW_JIT_RANDOM_ADDRESS(map)) {
2698                                 random_address = TRUE;
2699                         }
2700                 }
2701
2702                 if (random_address) {
2703                         /*
2704                          * Get a random start address.
2705                          */
2706                         result = vm_map_random_address_for_size(map, address, size);
2707                         if (result != KERN_SUCCESS) {
2708                                 goto BailOut;
2709                         }
2710                         start = *address;
2711                 }
2712 #if XNU_TARGET_OS_OSX
2713                 else if ((start == 0 || start == vm_map_min(map)) &&
2714                     !map->disable_vmentry_reuse &&
2715                     map->vmmap_high_start != 0) {
2716                         start = map->vmmap_high_start;
2717                 }
2718 #endif /* XNU_TARGET_OS_OSX */
2719
2720
2721                 /*
2722                  *      Calculate the first possible address.
2723                  */
2724
2725                 if (start < effective_min_offset) {
2726                         start = effective_min_offset;
2727                 }
2728                 if (start > effective_max_offset) {
2729                         RETURN(KERN_NO_SPACE);
2730                 }
2731
2732                 /*
2733                  *      Look for the first possible address;
2734                  *      if there's already something at this
2735                  *      address, we have to start after it.
2736                  */
2737
2738                 if (map->disable_vmentry_reuse == TRUE) {
2739                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
2740                 } else {
2741                         if (map->holelistenabled) {
2742                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
2743
2744                                 if (hole_entry == NULL) {
2745                                         /*
2746                                          * No more space in the map?
2747                                          */
2748                                         result = KERN_NO_SPACE;
2749                                         goto BailOut;
2750                                 } else {
2751                                         boolean_t found_hole = FALSE;
2752
2753                                         do {
2754                                                 if (hole_entry->vme_start >= start) {
2755                                                         start = hole_entry->vme_start;
2756                                                         found_hole = TRUE;
2757                                                         break;
2758                                                 }
2759
2760                                                 if (hole_entry->vme_end > start) {
2761                                                         found_hole = TRUE;
2762                                                         break;
2763                                                 }
2764                                                 hole_entry = hole_entry->vme_next;
2765                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
2766
2767                                         if (found_hole == FALSE) {
2768                                                 result = KERN_NO_SPACE;
2769                                                 goto BailOut;
2770                                         }
2771
2772                                         entry = hole_entry;
2773
2774                                         if (start == 0) {
2775                                                 start += PAGE_SIZE_64;
2776                                         }
2777                                 }
2778                         } else {
2779                                 assert(first_free_is_valid(map));
2780
2781                                 entry = map->first_free;
2782
2783                                 if (entry == vm_map_to_entry(map)) {
2784                                         entry = NULL;
2785                                 } else {
2786                                         if (entry->vme_next == vm_map_to_entry(map)) {
2787                                                 /*
2788                                                  * Hole at the end of the map.
2789                                                  */
2790                                                 entry = NULL;
2791                                         } else {
2792                                                 if (start < (entry->vme_next)->vme_start) {
2793                                                         start = entry->vme_end;
2794                                                         start = vm_map_round_page(start,
2795                                                             VM_MAP_PAGE_MASK(map));
2796                                                 } else {
2797                                                         /*
2798                                                          * Need to do a lookup.
2799                                                          */
2800                                                         entry = NULL;
2801                                                 }
2802                                         }
2803                                 }
2804
2805                                 if (entry == NULL) {
2806                                         vm_map_entry_t  tmp_entry;
2807                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
2808                                                 assert(!entry_for_jit);
2809                                                 start = tmp_entry->vme_end;
2810                                                 start = vm_map_round_page(start,
2811                                                     VM_MAP_PAGE_MASK(map));
2812                                         }
2813                                         entry = tmp_entry;
2814                                 }
2815                         }
2816                 }
2817
2818                 /*
2819                  *      In any case, the "entry" always precedes
2820                  *      the proposed new region throughout the
2821                  *      loop:
2822                  */
2823
2824                 while (TRUE) {
2825                         vm_map_entry_t  next;
2826
2827                         /*
2828                          *      Find the end of the proposed new region.
2829                          *      Be sure we didn't go beyond the end, or
2830                          *      wrap around the address.
2831                          */
2832
2833                         end = ((start + mask) & ~mask);
2834                         end = vm_map_round_page(end,
2835                             VM_MAP_PAGE_MASK(map));
2836                         if (end < start) {
2837                                 RETURN(KERN_NO_SPACE);
2838                         }
2839                         start = end;
2840                         assert(VM_MAP_PAGE_ALIGNED(start,
2841                             VM_MAP_PAGE_MASK(map)));
2842                         end += size;
2843
2844                         /* We want an entire page of empty space, but don't increase the allocation size. */
2845                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
2846
2847                         if ((desired_empty_end > effective_max_offset) || (desired_empty_end < start)) {
2848                                 if (map->wait_for_space) {
2849                                         assert(!keep_map_locked);
2850                                         if (size <= (effective_max_offset -
2851                                             effective_min_offset)) {
2852                                                 assert_wait((event_t)map,
2853                                                     THREAD_ABORTSAFE);
2854                                                 vm_map_unlock(map);
2855                                                 map_locked = FALSE;
2856                                                 thread_block(THREAD_CONTINUE_NULL);
2857                                                 goto StartAgain;
2858                                         }
2859                                 }
2860                                 RETURN(KERN_NO_SPACE);
2861                         }
2862
2863                         next = entry->vme_next;
2864
2865                         if (map->holelistenabled) {
2866                                 if (entry->vme_end >= desired_empty_end) {
2867                                         break;
2868                                 }
2869                         } else {
2870                                 /*
2871                                  *      If there are no more entries, we must win.
2872                                  *
2873                                  *      OR
2874                                  *
2875                                  *      If there is another entry, it must be
2876                                  *      after the end of the potential new region.
2877                                  */
2878
2879                                 if (next == vm_map_to_entry(map)) {
2880                                         break;
2881                                 }
2882
2883                                 if (next->vme_start >= desired_empty_end) {
2884                                         break;
2885                                 }
2886                         }
2887
2888                         /*
2889                          *      Didn't fit -- move to the next entry.
2890                          */
2891
2892                         entry = next;
2893
2894                         if (map->holelistenabled) {
2895                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
2896                                         /*
2897                                          * Wrapped around
2898                                          */
2899                                         result = KERN_NO_SPACE;
2900                                         goto BailOut;
2901                                 }
2902                                 start = entry->vme_start;
2903                         } else {
2904                                 start = entry->vme_end;
2905                         }
2906
2907                         start = vm_map_round_page(start,
2908                             VM_MAP_PAGE_MASK(map));
2909                 }
2910
2911                 if (map->holelistenabled) {
2912                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
2913                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
2914                         }
2915                 }
2916
2917                 *address = start;
2918                 assert(VM_MAP_PAGE_ALIGNED(*address,
2919                     VM_MAP_PAGE_MASK(map)));
2920         } else {
2921                 if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT &&
2922                     !overwrite &&
2923                     user_alias == VM_MEMORY_REALLOC) {
2924                         /*
2925                          * Force realloc() to switch to a new allocation,
2926                          * to prevent 4k-fragmented virtual ranges.
2927                          */
2928 //                      DEBUG4K_ERROR("no realloc in place");
2929                         return KERN_NO_SPACE;
2930                 }
2931
2932                 /*
2933                  *      Verify that:
2934                  *              the address doesn't itself violate
2935                  *              the mask requirement.
2936                  */
2937
2938                 vm_map_lock(map);
2939                 map_locked = TRUE;
2940                 if ((start & mask) != 0) {
2941                         RETURN(KERN_NO_SPACE);
2942                 }
2943
2944                 /*
2945                  *      ...     the address is within bounds
2946                  */
2947
2948                 end = start + size;
2949
2950                 if ((start < effective_min_offset) ||
2951                     (end > effective_max_offset) ||
2952                     (start >= end)) {
2953                         RETURN(KERN_INVALID_ADDRESS);
2954                 }
2955
2956                 if (overwrite && zap_old_map != VM_MAP_NULL) {
2957                         int remove_flags;
2958                         /*
2959                          * Fixed mapping and "overwrite" flag: attempt to
2960                          * remove all existing mappings in the specified
2961                          * address range, saving them in our "zap_old_map".
2962                          */
2963                         remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES;
2964                         remove_flags |= VM_MAP_REMOVE_NO_MAP_ALIGN;
2965                         if (vmk_flags.vmkf_overwrite_immutable) {
2966                                 /* we can overwrite immutable mappings */
2967                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
2968                         }
2969                         (void) vm_map_delete(map, start, end,
2970                             remove_flags,
2971                             zap_old_map);
2972                 }
2973
2974                 /*
2975                  *      ...     the starting address isn't allocated
2976                  */
2977
2978                 if (vm_map_lookup_entry(map, start, &entry)) {
2979                         if (!(vmk_flags.vmkf_already)) {
2980                                 RETURN(KERN_NO_SPACE);
2981                         }
2982                         /*
2983                          * Check if what's already there is what we want.
2984                          */
2985                         tmp_start = start;
2986                         tmp_offset = offset;
2987                         if (entry->vme_start < start) {
2988                                 tmp_start -= start - entry->vme_start;
2989                                 tmp_offset -= start - entry->vme_start;
2990                         }
2991                         for (; entry->vme_start < end;
2992                             entry = entry->vme_next) {
2993                                 /*
2994                                  * Check if the mapping's attributes
2995                                  * match the existing map entry.
2996                                  */
2997                                 if (entry == vm_map_to_entry(map) ||
2998                                     entry->vme_start != tmp_start ||
2999                                     entry->is_sub_map != is_submap ||
3000                                     VME_OFFSET(entry) != tmp_offset ||
3001                                     entry->needs_copy != needs_copy ||
3002                                     entry->protection != cur_protection ||
3003                                     entry->max_protection != max_protection ||
3004                                     entry->inheritance != inheritance ||
3005                                     entry->iokit_acct != iokit_acct ||
3006                                     VME_ALIAS(entry) != alias) {
3007                                         /* not the same mapping ! */
3008                                         RETURN(KERN_NO_SPACE);
3009                                 }
3010                                 /*
3011                                  * Check if the same object is being mapped.
3012                                  */
3013                                 if (is_submap) {
3014                                         if (VME_SUBMAP(entry) !=
3015                                             (vm_map_t) object) {
3016                                                 /* not the same submap */
3017                                                 RETURN(KERN_NO_SPACE);
3018                                         }
3019                                 } else {
3020                                         if (VME_OBJECT(entry) != object) {
3021                                                 /* not the same VM object... */
3022                                                 vm_object_t obj2;
3023
3024                                                 obj2 = VME_OBJECT(entry);
3025                                                 if ((obj2 == VM_OBJECT_NULL ||
3026                                                     obj2->internal) &&
3027                                                     (object == VM_OBJECT_NULL ||
3028                                                     object->internal)) {
3029                                                         /*
3030                                                          * ... but both are
3031                                                          * anonymous memory,
3032                                                          * so equivalent.
3033                                                          */
3034                                                 } else {
3035                                                         RETURN(KERN_NO_SPACE);
3036                                                 }
3037                                         }
3038                                 }
3039
3040                                 tmp_offset += entry->vme_end - entry->vme_start;
3041                                 tmp_start += entry->vme_end - entry->vme_start;
3042                                 if (entry->vme_end >= end) {
3043                                         /* reached the end of our mapping */
3044                                         break;
3045                                 }
3046                         }
3047                         /* it all matches:  let's use what's already there ! */
3048                         RETURN(KERN_MEMORY_PRESENT);
3049                 }
3050
3051                 /*
3052                  *      ...     the next region doesn't overlap the
3053                  *              end point.
3054                  */
3055
3056                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3057                     (entry->vme_next->vme_start < end)) {
3058                         RETURN(KERN_NO_SPACE);
3059                 }
3060         }
3061
3062         /*
3063          *      At this point,
3064          *              "start" and "end" should define the endpoints of the
3065          *                      available new range, and
3066          *              "entry" should refer to the region before the new
3067          *                      range, and
3068          *
3069          *              the map should be locked.
3070          */
3071
3072         /*
3073          *      See whether we can avoid creating a new entry (and object) by
3074          *      extending one of our neighbors.  [So far, we only attempt to
3075          *      extend from below.]  Note that we can never extend/join
3076          *      purgable objects because they need to remain distinct
3077          *      entities in order to implement their "volatile object"
3078          *      semantics.
3079          */
3080
3081         if (purgable ||
3082             entry_for_jit ||
3083             vm_memory_malloc_no_cow(user_alias)) {
3084                 if (object == VM_OBJECT_NULL) {
3085                         object = vm_object_allocate(size);
3086                         object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3087                         object->true_share = FALSE;
3088                         if (purgable) {
3089                                 task_t owner;
3090                                 object->purgable = VM_PURGABLE_NONVOLATILE;
3091                                 if (map->pmap == kernel_pmap) {
3092                                         /*
3093                                          * Purgeable mappings made in a kernel
3094                                          * map are "owned" by the kernel itself
3095                                          * rather than the current user task
3096                                          * because they're likely to be used by
3097                                          * more than this user task (see
3098                                          * execargs_purgeable_allocate(), for
3099                                          * example).
3100                                          */
3101                                         owner = kernel_task;
3102                                 } else {
3103                                         owner = current_task();
3104                                 }
3105                                 assert(object->vo_owner == NULL);
3106                                 assert(object->resident_page_count == 0);
3107                                 assert(object->wired_page_count == 0);
3108                                 vm_object_lock(object);
3109                                 vm_purgeable_nonvolatile_enqueue(object, owner);
3110                                 vm_object_unlock(object);
3111                         }
3112                         offset = (vm_object_offset_t)0;
3113                 }
3114         } else if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
3115                 /* no coalescing if address space uses sub-pages */
3116         } else if ((is_submap == FALSE) &&
3117             (object == VM_OBJECT_NULL) &&
3118             (entry != vm_map_to_entry(map)) &&
3119             (entry->vme_end == start) &&
3120             (!entry->is_shared) &&
3121             (!entry->is_sub_map) &&
3122             (!entry->in_transition) &&
3123             (!entry->needs_wakeup) &&
3124             (entry->behavior == VM_BEHAVIOR_DEFAULT) &&
3125             (entry->protection == cur_protection) &&
3126             (entry->max_protection == max_protection) &&
3127             (entry->inheritance == inheritance) &&
3128             ((user_alias == VM_MEMORY_REALLOC) ||
3129             (VME_ALIAS(entry) == alias)) &&
3130             (entry->no_cache == no_cache) &&
3131             (entry->permanent == permanent) &&
3132             /* no coalescing for immutable executable mappings */
3133             !((entry->protection & VM_PROT_EXECUTE) &&
3134             entry->permanent) &&
3135             (!entry->superpage_size && !superpage_size) &&
3136             /*
3137              * No coalescing if not map-aligned, to avoid propagating
3138              * that condition any further than needed:
3139              */
3140             (!entry->map_aligned || !clear_map_aligned) &&
3141             (!entry->zero_wired_pages) &&
3142             (!entry->used_for_jit && !entry_for_jit) &&
3143             (!entry->pmap_cs_associated) &&
3144             (entry->iokit_acct == iokit_acct) &&
3145             (!entry->vme_resilient_codesign) &&
3146             (!entry->vme_resilient_media) &&
3147             (!entry->vme_atomic) &&
3148             (entry->vme_no_copy_on_read == no_copy_on_read) &&
3149
3150             ((entry->vme_end - entry->vme_start) + size <=
3151             (user_alias == VM_MEMORY_REALLOC ?
3152             ANON_CHUNK_SIZE :
3153             NO_COALESCE_LIMIT)) &&
3154
3155             (entry->wired_count == 0)) {        /* implies user_wired_count == 0 */
3156                 if (vm_object_coalesce(VME_OBJECT(entry),
3157                     VM_OBJECT_NULL,
3158                     VME_OFFSET(entry),
3159                     (vm_object_offset_t) 0,
3160                     (vm_map_size_t)(entry->vme_end - entry->vme_start),
3161                     (vm_map_size_t)(end - entry->vme_end))) {
3162                         /*
3163                          *      Coalesced the two objects - can extend
3164                          *      the previous map entry to include the
3165                          *      new range.
3166                          */
3167                         map->size += (end - entry->vme_end);
3168                         assert(entry->vme_start < end);
3169                         assert(VM_MAP_PAGE_ALIGNED(end,
3170                             VM_MAP_PAGE_MASK(map)));
3171                         if (__improbable(vm_debug_events)) {
3172                                 DTRACE_VM5(map_entry_extend, vm_map_t, map, vm_map_entry_t, entry, vm_address_t, entry->vme_start, vm_address_t, entry->vme_end, vm_address_t, end);
3173                         }
3174                         entry->vme_end = end;
3175                         if (map->holelistenabled) {
3176                                 vm_map_store_update_first_free(map, entry, TRUE);
3177                         } else {
3178                                 vm_map_store_update_first_free(map, map->first_free, TRUE);
3179                         }
3180                         new_mapping_established = TRUE;
3181                         RETURN(KERN_SUCCESS);
3182                 }
3183         }
3184
3185         step = superpage_size ? SUPERPAGE_SIZE : (end - start);
3186         new_entry = NULL;
3187
3188         for (tmp2_start = start; tmp2_start < end; tmp2_start += step) {
3189                 tmp2_end = tmp2_start + step;
3190                 /*
3191                  *      Create a new entry
3192                  *
3193                  * XXX FBDP
3194                  * The reserved "page zero" in each process's address space can
3195                  * be arbitrarily large.  Splitting it into separate objects and
3196                  * therefore different VM map entries serves no purpose and just
3197                  * slows down operations on the VM map, so let's not split the
3198                  * allocation into chunks if the max protection is NONE.  That
3199                  * memory should never be accessible, so it will never get to the
3200                  * default pager.
3201                  */
3202                 tmp_start = tmp2_start;
3203                 if (object == VM_OBJECT_NULL &&
3204                     size > chunk_size &&
3205                     max_protection != VM_PROT_NONE &&
3206                     superpage_size == 0) {
3207                         tmp_end = tmp_start + chunk_size;
3208                 } else {
3209                         tmp_end = tmp2_end;
3210                 }
3211                 do {
3212                         new_entry = vm_map_entry_insert(map,
3213                             entry, tmp_start, tmp_end,
3214                             object, offset, needs_copy,
3215                             FALSE, FALSE,
3216                             cur_protection, max_protection,
3217                             VM_BEHAVIOR_DEFAULT,
3218                             (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3219                             VM_INHERIT_NONE : inheritance),
3220                             0,
3221                             no_cache,
3222                             permanent,
3223                             no_copy_on_read,
3224                             superpage_size,
3225                             clear_map_aligned,
3226                             is_submap,
3227                             entry_for_jit,
3228                             alias,
3229                             translated_allow_execute);
3230
3231                         assert((object != kernel_object) || (VM_KERN_MEMORY_NONE != alias));
3232
3233                         if (resilient_codesign) {
3234                                 int reject_prot = (needs_copy ? VM_PROT_EXECUTE : (VM_PROT_WRITE | VM_PROT_EXECUTE));
3235                                 if (!((cur_protection | max_protection) & reject_prot)) {
3236                                         new_entry->vme_resilient_codesign = TRUE;
3237                                 }
3238                         }
3239
3240                         if (resilient_media &&
3241                             (object == VM_OBJECT_NULL ||
3242                             object->internal)) {
3243                                 new_entry->vme_resilient_media = TRUE;
3244                         }
3245
3246                         assert(!new_entry->iokit_acct);
3247                         if (!is_submap &&
3248                             object != VM_OBJECT_NULL &&
3249                             (object->purgable != VM_PURGABLE_DENY ||
3250                             object->vo_ledger_tag)) {
3251                                 assert(new_entry->use_pmap);
3252                                 assert(!new_entry->iokit_acct);
3253                                 /*
3254                                  * Turn off pmap accounting since
3255                                  * purgeable (or tagged) objects have their
3256                                  * own ledgers.
3257                                  */
3258                                 new_entry->use_pmap = FALSE;
3259                         } else if (!is_submap &&
3260                             iokit_acct &&
3261                             object != VM_OBJECT_NULL &&
3262                             object->internal) {
3263                                 /* alternate accounting */
3264                                 assert(!new_entry->iokit_acct);
3265                                 assert(new_entry->use_pmap);
3266                                 new_entry->iokit_acct = TRUE;
3267                                 new_entry->use_pmap = FALSE;
3268                                 DTRACE_VM4(
3269                                         vm_map_iokit_mapped_region,
3270                                         vm_map_t, map,
3271                                         vm_map_offset_t, new_entry->vme_start,
3272                                         vm_map_offset_t, new_entry->vme_end,
3273                                         int, VME_ALIAS(new_entry));
3274                                 vm_map_iokit_mapped_region(
3275                                         map,
3276                                         (new_entry->vme_end -
3277                                         new_entry->vme_start));
3278                         } else if (!is_submap) {
3279                                 assert(!new_entry->iokit_acct);
3280                                 assert(new_entry->use_pmap);
3281                         }
3282
3283                         if (is_submap) {
3284                                 vm_map_t        submap;
3285                                 boolean_t       submap_is_64bit;
3286                                 boolean_t       use_pmap;
3287
3288                                 assert(new_entry->is_sub_map);
3289                                 assert(!new_entry->use_pmap);
3290                                 assert(!new_entry->iokit_acct);
3291                                 submap = (vm_map_t) object;
3292                                 submap_is_64bit = vm_map_is_64bit(submap);
3293                                 use_pmap = vmk_flags.vmkf_nested_pmap;
3294 #ifndef NO_NESTED_PMAP
3295                                 if (use_pmap && submap->pmap == NULL) {
3296                                         ledger_t ledger = map->pmap->ledger;
3297                                         /* we need a sub pmap to nest... */
3298                                         submap->pmap = pmap_create_options(ledger, 0,
3299                                             submap_is_64bit ? PMAP_CREATE_64BIT : 0);
3300                                         if (submap->pmap == NULL) {
3301                                                 /* let's proceed without nesting... */
3302                                         }
3303 #if     defined(__arm__) || defined(__arm64__)
3304                                         else {
3305                                                 pmap_set_nested(submap->pmap);
3306                                         }
3307 #endif
3308                                 }
3309                                 if (use_pmap && submap->pmap != NULL) {
3310                                         if (VM_MAP_PAGE_SHIFT(map) != VM_MAP_PAGE_SHIFT(submap)) {
3311                                                 DEBUG4K_ERROR("map %p (%d) submap %p (%d): incompatible page sizes\n", map, VM_MAP_PAGE_SHIFT(map), submap, VM_MAP_PAGE_SHIFT(submap));
3312                                                 kr = KERN_FAILURE;
3313                                         } else {
3314                                                 kr = pmap_nest(map->pmap,
3315                                                     submap->pmap,
3316                                                     tmp_start,
3317                                                     tmp_end - tmp_start);
3318                                         }
3319                                         if (kr != KERN_SUCCESS) {
3320                                                 printf("vm_map_enter: "
3321                                                     "pmap_nest(0x%llx,0x%llx) "
3322                                                     "error 0x%x\n",
3323                                                     (long long)tmp_start,
3324                                                     (long long)tmp_end,
3325                                                     kr);
3326                                         } else {
3327                                                 /* we're now nested ! */
3328                                                 new_entry->use_pmap = TRUE;
3329                                                 pmap_empty = FALSE;
3330                                         }
3331                                 }
3332 #endif /* NO_NESTED_PMAP */
3333                         }
3334                         entry = new_entry;
3335
3336                         if (superpage_size) {
3337                                 vm_page_t pages, m;
3338                                 vm_object_t sp_object;
3339                                 vm_object_offset_t sp_offset;
3340
3341                                 VME_OFFSET_SET(entry, 0);
3342
3343                                 /* allocate one superpage */
3344                                 kr = cpm_allocate(SUPERPAGE_SIZE, &pages, 0, SUPERPAGE_NBASEPAGES - 1, TRUE, 0);
3345                                 if (kr != KERN_SUCCESS) {
3346                                         /* deallocate whole range... */
3347                                         new_mapping_established = TRUE;
3348                                         /* ... but only up to "tmp_end" */
3349                                         size -= end - tmp_end;
3350                                         RETURN(kr);
3351                                 }
3352
3353                                 /* create one vm_object per superpage */
3354                                 sp_object = vm_object_allocate((vm_map_size_t)(entry->vme_end - entry->vme_start));
3355                                 sp_object->phys_contiguous = TRUE;
3356                                 sp_object->vo_shadow_offset = (vm_object_offset_t)VM_PAGE_GET_PHYS_PAGE(pages) * PAGE_SIZE;
3357                                 VME_OBJECT_SET(entry, sp_object);
3358                                 assert(entry->use_pmap);
3359
3360                                 /* enter the base pages into the object */
3361                                 vm_object_lock(sp_object);
3362                                 for (sp_offset = 0;
3363                                     sp_offset < SUPERPAGE_SIZE;
3364                                     sp_offset += PAGE_SIZE) {
3365                                         m = pages;
3366                                         pmap_zero_page(VM_PAGE_GET_PHYS_PAGE(m));
3367                                         pages = NEXT_PAGE(m);
3368                                         *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
3369                                         vm_page_insert_wired(m, sp_object, sp_offset, VM_KERN_MEMORY_OSFMK);
3370                                 }
3371                                 vm_object_unlock(sp_object);
3372                         }
3373                 } while (tmp_end != tmp2_end &&
3374                     (tmp_start = tmp_end) &&
3375                     (tmp_end = (tmp2_end - tmp_end > chunk_size) ?
3376                     tmp_end + chunk_size : tmp2_end));
3377         }
3378
3379         new_mapping_established = TRUE;
3380
3381 BailOut:
3382         assert(map_locked == TRUE);
3383
3384         if (result == KERN_SUCCESS) {
3385                 vm_prot_t pager_prot;
3386                 memory_object_t pager;
3387
3388 #if DEBUG
3389                 if (pmap_empty &&
3390                     !(vmk_flags.vmkf_no_pmap_check)) {
3391                         assert(vm_map_pmap_is_empty(map,
3392                             *address,
3393                             *address + size));
3394                 }
3395 #endif /* DEBUG */
3396
3397                 /*
3398                  * For "named" VM objects, let the pager know that the
3399                  * memory object is being mapped.  Some pagers need to keep
3400                  * track of this, to know when they can reclaim the memory
3401                  * object, for example.
3402                  * VM calls memory_object_map() for each mapping (specifying
3403                  * the protection of each mapping) and calls
3404                  * memory_object_last_unmap() when all the mappings are gone.
3405                  */
3406                 pager_prot = max_protection;
3407                 if (needs_copy) {
3408                         /*
3409                          * Copy-On-Write mapping: won't modify
3410                          * the memory object.
3411                          */
3412                         pager_prot &= ~VM_PROT_WRITE;
3413                 }
3414                 if (!is_submap &&
3415                     object != VM_OBJECT_NULL &&
3416                     object->named &&
3417                     object->pager != MEMORY_OBJECT_NULL) {
3418                         vm_object_lock(object);
3419                         pager = object->pager;
3420                         if (object->named &&
3421                             pager != MEMORY_OBJECT_NULL) {
3422                                 assert(object->pager_ready);
3423                                 vm_object_mapping_wait(object, THREAD_UNINT);
3424                                 vm_object_mapping_begin(object);
3425                                 vm_object_unlock(object);
3426
3427                                 kr = memory_object_map(pager, pager_prot);
3428                                 assert(kr == KERN_SUCCESS);
3429
3430                                 vm_object_lock(object);
3431                                 vm_object_mapping_end(object);
3432                         }
3433                         vm_object_unlock(object);
3434                 }
3435         }
3436
3437         assert(map_locked == TRUE);
3438
3439         if (!keep_map_locked) {
3440                 vm_map_unlock(map);
3441                 map_locked = FALSE;
3442         }
3443
3444         /*
3445          * We can't hold the map lock if we enter this block.
3446          */
3447
3448         if (result == KERN_SUCCESS) {
3449                 /*      Wire down the new entry if the user
3450                  *      requested all new map entries be wired.
3451                  */
3452                 if ((map->wiring_required) || (superpage_size)) {
3453                         assert(!keep_map_locked);
3454                         pmap_empty = FALSE; /* pmap won't be empty */
3455                         kr = vm_map_wire_kernel(map, start, end,
3456                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
3457                             TRUE);
3458                         result = kr;
3459                 }
3460
3461         }
3462
3463         if (result != KERN_SUCCESS) {
3464                 if (new_mapping_established) {
3465                         /*
3466                          * We have to get rid of the new mappings since we
3467                          * won't make them available to the user.
3468                          * Try and do that atomically, to minimize the risk
3469                          * that someone else create new mappings that range.
3470                          */
3471                         zap_new_map = vm_map_create(PMAP_NULL,
3472                             *address,
3473                             *address + size,
3474                             map->hdr.entries_pageable);
3475                         vm_map_set_page_shift(zap_new_map,
3476                             VM_MAP_PAGE_SHIFT(map));
3477                         vm_map_disable_hole_optimization(zap_new_map);
3478
3479                         if (!map_locked) {
3480                                 vm_map_lock(map);
3481                                 map_locked = TRUE;
3482                         }
3483                         (void) vm_map_delete(map, *address, *address + size,
3484                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3485                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3486                             zap_new_map);
3487                 }
3488                 if (zap_old_map != VM_MAP_NULL &&
3489                     zap_old_map->hdr.nentries != 0) {
3490                         vm_map_entry_t  entry1, entry2;
3491
3492                         /*
3493                          * The new mapping failed.  Attempt to restore
3494                          * the old mappings, saved in the "zap_old_map".
3495                          */
3496                         if (!map_locked) {
3497                                 vm_map_lock(map);
3498                                 map_locked = TRUE;
3499                         }
3500
3501                         /* first check if the coast is still clear */
3502                         start = vm_map_first_entry(zap_old_map)->vme_start;
3503                         end = vm_map_last_entry(zap_old_map)->vme_end;
3504                         if (vm_map_lookup_entry(map, start, &entry1) ||
3505                             vm_map_lookup_entry(map, end, &entry2) ||
3506                             entry1 != entry2) {
3507                                 /*
3508                                  * Part of that range has already been
3509                                  * re-mapped:  we can't restore the old
3510                                  * mappings...
3511                                  */
3512                                 vm_map_enter_restore_failures++;
3513                         } else {
3514                                 /*
3515                                  * Transfer the saved map entries from
3516                                  * "zap_old_map" to the original "map",
3517                                  * inserting them all after "entry1".
3518                                  */
3519                                 for (entry2 = vm_map_first_entry(zap_old_map);
3520                                     entry2 != vm_map_to_entry(zap_old_map);
3521                                     entry2 = vm_map_first_entry(zap_old_map)) {
3522                                         vm_map_size_t entry_size;
3523
3524                                         entry_size = (entry2->vme_end -
3525                                             entry2->vme_start);
3526                                         vm_map_store_entry_unlink(zap_old_map,
3527                                             entry2);
3528                                         zap_old_map->size -= entry_size;
3529                                         vm_map_store_entry_link(map, entry1, entry2,
3530                                             VM_MAP_KERNEL_FLAGS_NONE);
3531                                         map->size += entry_size;
3532                                         entry1 = entry2;
3533                                 }
3534                                 if (map->wiring_required) {
3535                                         /*
3536                                          * XXX TODO: we should rewire the
3537                                          * old pages here...
3538                                          */
3539                                 }
3540                                 vm_map_enter_restore_successes++;
3541                         }
3542                 }
3543         }
3544
3545         /*
3546          * The caller is responsible for releasing the lock if it requested to
3547          * keep the map locked.
3548          */
3549         if (map_locked && !keep_map_locked) {
3550                 vm_map_unlock(map);
3551         }
3552
3553         /*
3554          * Get rid of the "zap_maps" and all the map entries that
3555          * they may still contain.
3556          */
3557         if (zap_old_map != VM_MAP_NULL) {
3558                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3559                 zap_old_map = VM_MAP_NULL;
3560         }
3561         if (zap_new_map != VM_MAP_NULL) {
3562                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
3563                 zap_new_map = VM_MAP_NULL;
3564         }
3565
3566         return result;
3567
3568 #undef  RETURN
3569 }
3570
3571 #if __arm64__
3572 extern const struct memory_object_pager_ops fourk_pager_ops;
3573 kern_return_t
3574 vm_map_enter_fourk(
3575         vm_map_t                map,
3576         vm_map_offset_t         *address,       /* IN/OUT */
3577         vm_map_size_t           size,
3578         vm_map_offset_t         mask,
3579         int                     flags,
3580         vm_map_kernel_flags_t   vmk_flags,
3581         vm_tag_t                alias,
3582         vm_object_t             object,
3583         vm_object_offset_t      offset,
3584         boolean_t               needs_copy,
3585         vm_prot_t               cur_protection,
3586         vm_prot_t               max_protection,
3587         vm_inherit_t            inheritance)
3588 {
3589         vm_map_entry_t          entry, new_entry;
3590         vm_map_offset_t         start, fourk_start;
3591         vm_map_offset_t         end, fourk_end;
3592         vm_map_size_t           fourk_size;
3593         kern_return_t           result = KERN_SUCCESS;
3594         vm_map_t                zap_old_map = VM_MAP_NULL;
3595         vm_map_t                zap_new_map = VM_MAP_NULL;
3596         boolean_t               map_locked = FALSE;
3597         boolean_t               pmap_empty = TRUE;
3598         boolean_t               new_mapping_established = FALSE;
3599         boolean_t               keep_map_locked = vmk_flags.vmkf_keep_map_locked;
3600         boolean_t               anywhere = ((flags & VM_FLAGS_ANYWHERE) != 0);
3601         boolean_t               purgable = ((flags & VM_FLAGS_PURGABLE) != 0);
3602         boolean_t               overwrite = ((flags & VM_FLAGS_OVERWRITE) != 0);
3603         boolean_t               no_cache = ((flags & VM_FLAGS_NO_CACHE) != 0);
3604         boolean_t               is_submap = vmk_flags.vmkf_submap;
3605         boolean_t               permanent = vmk_flags.vmkf_permanent;
3606         boolean_t               no_copy_on_read = vmk_flags.vmkf_permanent;
3607         boolean_t               entry_for_jit = vmk_flags.vmkf_map_jit;
3608 //      boolean_t               iokit_acct = vmk_flags.vmkf_iokit_acct;
3609         boolean_t               translated_allow_execute = vmk_flags.vmkf_translated_allow_execute;
3610         unsigned int            superpage_size = ((flags & VM_FLAGS_SUPERPAGE_MASK) >> VM_FLAGS_SUPERPAGE_SHIFT);
3611         vm_map_offset_t         effective_min_offset, effective_max_offset;
3612         kern_return_t           kr;
3613         boolean_t               clear_map_aligned = FALSE;
3614         memory_object_t         fourk_mem_obj;
3615         vm_object_t             fourk_object;
3616         vm_map_offset_t         fourk_pager_offset;
3617         int                     fourk_pager_index_start, fourk_pager_index_num;
3618         int                     cur_idx;
3619         boolean_t               fourk_copy;
3620         vm_object_t             copy_object;
3621         vm_object_offset_t      copy_offset;
3622
3623         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
3624                 panic("%s:%d\n", __FUNCTION__, __LINE__);
3625         }
3626         fourk_mem_obj = MEMORY_OBJECT_NULL;
3627         fourk_object = VM_OBJECT_NULL;
3628
3629         if (superpage_size) {
3630                 return KERN_NOT_SUPPORTED;
3631         }
3632
3633         if ((cur_protection & VM_PROT_WRITE) &&
3634             (cur_protection & VM_PROT_EXECUTE) &&
3635 #if XNU_TARGET_OS_OSX
3636             map->pmap != kernel_pmap &&
3637             (vm_map_cs_enforcement(map)
3638 #if __arm64__
3639             || !VM_MAP_IS_EXOTIC(map)
3640 #endif /* __arm64__ */
3641             ) &&
3642 #endif /* XNU_TARGET_OS_OSX */
3643 #if PMAP_CS
3644             !pmap_cs_exempt(map->pmap) &&
3645 #endif
3646             !entry_for_jit) {
3647                 DTRACE_VM3(cs_wx,
3648                     uint64_t, 0,
3649                     uint64_t, 0,
3650                     vm_prot_t, cur_protection);
3651                 printf("CODE SIGNING: %d[%s] %s: curprot cannot be write+execute. "
3652                     "turning off execute\n",
3653                     proc_selfpid(),
3654                     (current_task()->bsd_info
3655                     ? proc_name_address(current_task()->bsd_info)
3656                     : "?"),
3657                     __FUNCTION__);
3658                 cur_protection &= ~VM_PROT_EXECUTE;
3659         }
3660
3661         /*
3662          * If the task has requested executable lockdown,
3663          * deny any new executable mapping.
3664          */
3665         if (map->map_disallow_new_exec == TRUE) {
3666                 if (cur_protection & VM_PROT_EXECUTE) {
3667                         return KERN_PROTECTION_FAILURE;
3668                 }
3669         }
3670
3671         if (is_submap) {
3672                 return KERN_NOT_SUPPORTED;
3673         }
3674         if (vmk_flags.vmkf_already) {
3675                 return KERN_NOT_SUPPORTED;
3676         }
3677         if (purgable || entry_for_jit) {
3678                 return KERN_NOT_SUPPORTED;
3679         }
3680
3681         effective_min_offset = map->min_offset;
3682
3683         if (vmk_flags.vmkf_beyond_max) {
3684                 return KERN_NOT_SUPPORTED;
3685         } else {
3686                 effective_max_offset = map->max_offset;
3687         }
3688
3689         if (size == 0 ||
3690             (offset & FOURK_PAGE_MASK) != 0) {
3691                 *address = 0;
3692                 return KERN_INVALID_ARGUMENT;
3693         }
3694
3695 #define RETURN(value)   { result = value; goto BailOut; }
3696
3697         assert(VM_MAP_PAGE_ALIGNED(*address, FOURK_PAGE_MASK));
3698         assert(VM_MAP_PAGE_ALIGNED(size, FOURK_PAGE_MASK));
3699
3700         if (!anywhere && overwrite) {
3701                 return KERN_NOT_SUPPORTED;
3702         }
3703         if (!anywhere && overwrite) {
3704                 /*
3705                  * Create a temporary VM map to hold the old mappings in the
3706                  * affected area while we create the new one.
3707                  * This avoids releasing the VM map lock in
3708                  * vm_map_entry_delete() and allows atomicity
3709                  * when we want to replace some mappings with a new one.
3710                  * It also allows us to restore the old VM mappings if the
3711                  * new mapping fails.
3712                  */
3713                 zap_old_map = vm_map_create(PMAP_NULL,
3714                     *address,
3715                     *address + size,
3716                     map->hdr.entries_pageable);
3717                 vm_map_set_page_shift(zap_old_map, VM_MAP_PAGE_SHIFT(map));
3718                 vm_map_disable_hole_optimization(zap_old_map);
3719         }
3720
3721         fourk_start = *address;
3722         fourk_size = size;
3723         fourk_end = fourk_start + fourk_size;
3724
3725         start = vm_map_trunc_page(*address, VM_MAP_PAGE_MASK(map));
3726         end = vm_map_round_page(fourk_end, VM_MAP_PAGE_MASK(map));
3727         size = end - start;
3728
3729         if (anywhere) {
3730                 return KERN_NOT_SUPPORTED;
3731         } else {
3732                 /*
3733                  *      Verify that:
3734                  *              the address doesn't itself violate
3735                  *              the mask requirement.
3736                  */
3737
3738                 vm_map_lock(map);
3739                 map_locked = TRUE;
3740                 if ((start & mask) != 0) {
3741                         RETURN(KERN_NO_SPACE);
3742                 }
3743
3744                 /*
3745                  *      ...     the address is within bounds
3746                  */
3747
3748                 end = start + size;
3749
3750                 if ((start < effective_min_offset) ||
3751                     (end > effective_max_offset) ||
3752                     (start >= end)) {
3753                         RETURN(KERN_INVALID_ADDRESS);
3754                 }
3755
3756                 if (overwrite && zap_old_map != VM_MAP_NULL) {
3757                         /*
3758                          * Fixed mapping and "overwrite" flag: attempt to
3759                          * remove all existing mappings in the specified
3760                          * address range, saving them in our "zap_old_map".
3761                          */
3762                         (void) vm_map_delete(map, start, end,
3763                             (VM_MAP_REMOVE_SAVE_ENTRIES |
3764                             VM_MAP_REMOVE_NO_MAP_ALIGN),
3765                             zap_old_map);
3766                 }
3767
3768                 /*
3769                  *      ...     the starting address isn't allocated
3770                  */
3771                 if (vm_map_lookup_entry(map, start, &entry)) {
3772                         vm_object_t cur_object, shadow_object;
3773
3774                         /*
3775                          * We might already some 4K mappings
3776                          * in a 16K page here.
3777                          */
3778
3779                         if (entry->vme_end - entry->vme_start
3780                             != SIXTEENK_PAGE_SIZE) {
3781                                 RETURN(KERN_NO_SPACE);
3782                         }
3783                         if (entry->is_sub_map) {
3784                                 RETURN(KERN_NO_SPACE);
3785                         }
3786                         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
3787                                 RETURN(KERN_NO_SPACE);
3788                         }
3789
3790                         /* go all the way down the shadow chain */
3791                         cur_object = VME_OBJECT(entry);
3792                         vm_object_lock(cur_object);
3793                         while (cur_object->shadow != VM_OBJECT_NULL) {
3794                                 shadow_object = cur_object->shadow;
3795                                 vm_object_lock(shadow_object);
3796                                 vm_object_unlock(cur_object);
3797                                 cur_object = shadow_object;
3798                                 shadow_object = VM_OBJECT_NULL;
3799                         }
3800                         if (cur_object->internal ||
3801                             cur_object->pager == NULL) {
3802                                 vm_object_unlock(cur_object);
3803                                 RETURN(KERN_NO_SPACE);
3804                         }
3805                         if (cur_object->pager->mo_pager_ops
3806                             != &fourk_pager_ops) {
3807                                 vm_object_unlock(cur_object);
3808                                 RETURN(KERN_NO_SPACE);
3809                         }
3810                         fourk_object = cur_object;
3811                         fourk_mem_obj = fourk_object->pager;
3812
3813                         /* keep the "4K" object alive */
3814                         vm_object_reference_locked(fourk_object);
3815                         memory_object_reference(fourk_mem_obj);
3816                         vm_object_unlock(fourk_object);
3817
3818                         /* merge permissions */
3819                         entry->protection |= cur_protection;
3820                         entry->max_protection |= max_protection;
3821                         if ((entry->protection & (VM_PROT_WRITE |
3822                             VM_PROT_EXECUTE)) ==
3823                             (VM_PROT_WRITE | VM_PROT_EXECUTE) &&
3824                             fourk_binary_compatibility_unsafe &&
3825                             fourk_binary_compatibility_allow_wx) {
3826                                 /* write+execute: need to be "jit" */
3827                                 entry->used_for_jit = TRUE;
3828                         }
3829                         goto map_in_fourk_pager;
3830                 }
3831
3832                 /*
3833                  *      ...     the next region doesn't overlap the
3834                  *              end point.
3835                  */
3836
3837                 if ((entry->vme_next != vm_map_to_entry(map)) &&
3838                     (entry->vme_next->vme_start < end)) {
3839                         RETURN(KERN_NO_SPACE);
3840                 }
3841         }
3842
3843         /*
3844          *      At this point,
3845          *              "start" and "end" should define the endpoints of the
3846          *                      available new range, and
3847          *              "entry" should refer to the region before the new
3848          *                      range, and
3849          *
3850          *              the map should be locked.
3851          */
3852
3853         /* create a new "4K" pager */
3854         fourk_mem_obj = fourk_pager_create();
3855         fourk_object = fourk_pager_to_vm_object(fourk_mem_obj);
3856         assert(fourk_object);
3857
3858         /* keep the "4" object alive */
3859         vm_object_reference(fourk_object);
3860
3861         /* create a "copy" object, to map the "4K" object copy-on-write */
3862         fourk_copy = TRUE;
3863         result = vm_object_copy_strategically(fourk_object,
3864             0,
3865             end - start,
3866             &copy_object,
3867             &copy_offset,
3868             &fourk_copy);
3869         assert(result == KERN_SUCCESS);
3870         assert(copy_object != VM_OBJECT_NULL);
3871         assert(copy_offset == 0);
3872
3873         /* map the "4K" pager's copy object */
3874         new_entry =
3875             vm_map_entry_insert(map, entry,
3876             vm_map_trunc_page(start,
3877             VM_MAP_PAGE_MASK(map)),
3878             vm_map_round_page(end,
3879             VM_MAP_PAGE_MASK(map)),
3880             copy_object,
3881             0,                         /* offset */
3882             FALSE,                         /* needs_copy */
3883             FALSE,
3884             FALSE,
3885             cur_protection, max_protection,
3886             VM_BEHAVIOR_DEFAULT,
3887             (entry_for_jit && !VM_MAP_POLICY_ALLOW_JIT_INHERIT(map) ?
3888             VM_INHERIT_NONE : inheritance),
3889             0,
3890             no_cache,
3891             permanent,
3892             no_copy_on_read,
3893             superpage_size,
3894             clear_map_aligned,
3895             is_submap,
3896             FALSE,                         /* jit */
3897             alias,
3898             translated_allow_execute);
3899         entry = new_entry;
3900
3901 #if VM_MAP_DEBUG_FOURK
3902         if (vm_map_debug_fourk) {
3903                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] new pager %p\n",
3904                     map,
3905                     (uint64_t) entry->vme_start,
3906                     (uint64_t) entry->vme_end,
3907                     fourk_mem_obj);
3908         }
3909 #endif /* VM_MAP_DEBUG_FOURK */
3910
3911         new_mapping_established = TRUE;
3912
3913 map_in_fourk_pager:
3914         /* "map" the original "object" where it belongs in the "4K" pager */
3915         fourk_pager_offset = (fourk_start & SIXTEENK_PAGE_MASK);
3916         fourk_pager_index_start = (int) (fourk_pager_offset / FOURK_PAGE_SIZE);
3917         if (fourk_size > SIXTEENK_PAGE_SIZE) {
3918                 fourk_pager_index_num = 4;
3919         } else {
3920                 fourk_pager_index_num = (int) (fourk_size / FOURK_PAGE_SIZE);
3921         }
3922         if (fourk_pager_index_start + fourk_pager_index_num > 4) {
3923                 fourk_pager_index_num = 4 - fourk_pager_index_start;
3924         }
3925         for (cur_idx = 0;
3926             cur_idx < fourk_pager_index_num;
3927             cur_idx++) {
3928                 vm_object_t             old_object;
3929                 vm_object_offset_t      old_offset;
3930
3931                 kr = fourk_pager_populate(fourk_mem_obj,
3932                     TRUE,                       /* overwrite */
3933                     fourk_pager_index_start + cur_idx,
3934                     object,
3935                     (object
3936                     ? (offset +
3937                     (cur_idx * FOURK_PAGE_SIZE))
3938                     : 0),
3939                     &old_object,
3940                     &old_offset);
3941 #if VM_MAP_DEBUG_FOURK
3942                 if (vm_map_debug_fourk) {
3943                         if (old_object == (vm_object_t) -1 &&
3944                             old_offset == (vm_object_offset_t) -1) {
3945                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3946                                     "pager [%p:0x%llx] "
3947                                     "populate[%d] "
3948                                     "[object:%p,offset:0x%llx]\n",
3949                                     map,
3950                                     (uint64_t) entry->vme_start,
3951                                     (uint64_t) entry->vme_end,
3952                                     fourk_mem_obj,
3953                                     VME_OFFSET(entry),
3954                                     fourk_pager_index_start + cur_idx,
3955                                     object,
3956                                     (object
3957                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3958                                     : 0));
3959                         } else {
3960                                 printf("FOURK_PAGER: map %p [0x%llx:0x%llx] "
3961                                     "pager [%p:0x%llx] "
3962                                     "populate[%d] [object:%p,offset:0x%llx] "
3963                                     "old [%p:0x%llx]\n",
3964                                     map,
3965                                     (uint64_t) entry->vme_start,
3966                                     (uint64_t) entry->vme_end,
3967                                     fourk_mem_obj,
3968                                     VME_OFFSET(entry),
3969                                     fourk_pager_index_start + cur_idx,
3970                                     object,
3971                                     (object
3972                                     ? (offset + (cur_idx * FOURK_PAGE_SIZE))
3973                                     : 0),
3974                                     old_object,
3975                                     old_offset);
3976                         }
3977                 }
3978 #endif /* VM_MAP_DEBUG_FOURK */
3979
3980                 assert(kr == KERN_SUCCESS);
3981                 if (object != old_object &&
3982                     object != VM_OBJECT_NULL &&
3983                     object != (vm_object_t) -1) {
3984                         vm_object_reference(object);
3985                 }
3986                 if (object != old_object &&
3987                     old_object != VM_OBJECT_NULL &&
3988                     old_object != (vm_object_t) -1) {
3989                         vm_object_deallocate(old_object);
3990                 }
3991         }
3992
3993 BailOut:
3994         assert(map_locked == TRUE);
3995
3996         if (result == KERN_SUCCESS) {
3997                 vm_prot_t pager_prot;
3998                 memory_object_t pager;
3999
4000 #if DEBUG
4001                 if (pmap_empty &&
4002                     !(vmk_flags.vmkf_no_pmap_check)) {
4003                         assert(vm_map_pmap_is_empty(map,
4004                             *address,
4005                             *address + size));
4006                 }
4007 #endif /* DEBUG */
4008
4009                 /*
4010                  * For "named" VM objects, let the pager know that the
4011                  * memory object is being mapped.  Some pagers need to keep
4012                  * track of this, to know when they can reclaim the memory
4013                  * object, for example.
4014                  * VM calls memory_object_map() for each mapping (specifying
4015                  * the protection of each mapping) and calls
4016                  * memory_object_last_unmap() when all the mappings are gone.
4017                  */
4018                 pager_prot = max_protection;
4019                 if (needs_copy) {
4020                         /*
4021                          * Copy-On-Write mapping: won't modify
4022                          * the memory object.
4023                          */
4024                         pager_prot &= ~VM_PROT_WRITE;
4025                 }
4026                 if (!is_submap &&
4027                     object != VM_OBJECT_NULL &&
4028                     object->named &&
4029                     object->pager != MEMORY_OBJECT_NULL) {
4030                         vm_object_lock(object);
4031                         pager = object->pager;
4032                         if (object->named &&
4033                             pager != MEMORY_OBJECT_NULL) {
4034                                 assert(object->pager_ready);
4035                                 vm_object_mapping_wait(object, THREAD_UNINT);
4036                                 vm_object_mapping_begin(object);
4037                                 vm_object_unlock(object);
4038
4039                                 kr = memory_object_map(pager, pager_prot);
4040                                 assert(kr == KERN_SUCCESS);
4041
4042                                 vm_object_lock(object);
4043                                 vm_object_mapping_end(object);
4044                         }
4045                         vm_object_unlock(object);
4046                 }
4047                 if (!is_submap &&
4048                     fourk_object != VM_OBJECT_NULL &&
4049                     fourk_object->named &&
4050                     fourk_object->pager != MEMORY_OBJECT_NULL) {
4051                         vm_object_lock(fourk_object);
4052                         pager = fourk_object->pager;
4053                         if (fourk_object->named &&
4054                             pager != MEMORY_OBJECT_NULL) {
4055                                 assert(fourk_object->pager_ready);
4056                                 vm_object_mapping_wait(fourk_object,
4057                                     THREAD_UNINT);
4058                                 vm_object_mapping_begin(fourk_object);
4059                                 vm_object_unlock(fourk_object);
4060
4061                                 kr = memory_object_map(pager, VM_PROT_READ);
4062                                 assert(kr == KERN_SUCCESS);
4063
4064                                 vm_object_lock(fourk_object);
4065                                 vm_object_mapping_end(fourk_object);
4066                         }
4067                         vm_object_unlock(fourk_object);
4068                 }
4069         }
4070
4071         if (fourk_object != VM_OBJECT_NULL) {
4072                 vm_object_deallocate(fourk_object);
4073                 fourk_object = VM_OBJECT_NULL;
4074                 memory_object_deallocate(fourk_mem_obj);
4075                 fourk_mem_obj = MEMORY_OBJECT_NULL;
4076         }
4077
4078         assert(map_locked == TRUE);
4079
4080         if (!keep_map_locked) {
4081                 vm_map_unlock(map);
4082                 map_locked = FALSE;
4083         }
4084
4085         /*
4086          * We can't hold the map lock if we enter this block.
4087          */
4088
4089         if (result == KERN_SUCCESS) {
4090                 /*      Wire down the new entry if the user
4091                  *      requested all new map entries be wired.
4092                  */
4093                 if ((map->wiring_required) || (superpage_size)) {
4094                         assert(!keep_map_locked);
4095                         pmap_empty = FALSE; /* pmap won't be empty */
4096                         kr = vm_map_wire_kernel(map, start, end,
4097                             new_entry->protection, VM_KERN_MEMORY_MLOCK,
4098                             TRUE);
4099                         result = kr;
4100                 }
4101
4102         }
4103
4104         if (result != KERN_SUCCESS) {
4105                 if (new_mapping_established) {
4106                         /*
4107                          * We have to get rid of the new mappings since we
4108                          * won't make them available to the user.
4109                          * Try and do that atomically, to minimize the risk
4110                          * that someone else create new mappings that range.
4111                          */
4112                         zap_new_map = vm_map_create(PMAP_NULL,
4113                             *address,
4114                             *address + size,
4115                             map->hdr.entries_pageable);
4116                         vm_map_set_page_shift(zap_new_map,
4117                             VM_MAP_PAGE_SHIFT(map));
4118                         vm_map_disable_hole_optimization(zap_new_map);
4119
4120                         if (!map_locked) {
4121                                 vm_map_lock(map);
4122                                 map_locked = TRUE;
4123                         }
4124                         (void) vm_map_delete(map, *address, *address + size,
4125                             (VM_MAP_REMOVE_SAVE_ENTRIES |
4126                             VM_MAP_REMOVE_NO_MAP_ALIGN),
4127                             zap_new_map);
4128                 }
4129                 if (zap_old_map != VM_MAP_NULL &&
4130                     zap_old_map->hdr.nentries != 0) {
4131                         vm_map_entry_t  entry1, entry2;
4132
4133                         /*
4134                          * The new mapping failed.  Attempt to restore
4135                          * the old mappings, saved in the "zap_old_map".
4136                          */
4137                         if (!map_locked) {
4138                                 vm_map_lock(map);
4139                                 map_locked = TRUE;
4140                         }
4141
4142                         /* first check if the coast is still clear */
4143                         start = vm_map_first_entry(zap_old_map)->vme_start;
4144                         end = vm_map_last_entry(zap_old_map)->vme_end;
4145                         if (vm_map_lookup_entry(map, start, &entry1) ||
4146                             vm_map_lookup_entry(map, end, &entry2) ||
4147                             entry1 != entry2) {
4148                                 /*
4149                                  * Part of that range has already been
4150                                  * re-mapped:  we can't restore the old
4151                                  * mappings...
4152                                  */
4153                                 vm_map_enter_restore_failures++;
4154                         } else {
4155                                 /*
4156                                  * Transfer the saved map entries from
4157                                  * "zap_old_map" to the original "map",
4158                                  * inserting them all after "entry1".
4159                                  */
4160                                 for (entry2 = vm_map_first_entry(zap_old_map);
4161                                     entry2 != vm_map_to_entry(zap_old_map);
4162                                     entry2 = vm_map_first_entry(zap_old_map)) {
4163                                         vm_map_size_t entry_size;
4164
4165                                         entry_size = (entry2->vme_end -
4166                                             entry2->vme_start);
4167                                         vm_map_store_entry_unlink(zap_old_map,
4168                                             entry2);
4169                                         zap_old_map->size -= entry_size;
4170                                         vm_map_store_entry_link(map, entry1, entry2,
4171                                             VM_MAP_KERNEL_FLAGS_NONE);
4172                                         map->size += entry_size;
4173                                         entry1 = entry2;
4174                                 }
4175                                 if (map->wiring_required) {
4176                                         /*
4177                                          * XXX TODO: we should rewire the
4178                                          * old pages here...
4179                                          */
4180                                 }
4181                                 vm_map_enter_restore_successes++;
4182                         }
4183                 }
4184         }
4185
4186         /*
4187          * The caller is responsible for releasing the lock if it requested to
4188          * keep the map locked.
4189          */
4190         if (map_locked && !keep_map_locked) {
4191                 vm_map_unlock(map);
4192         }
4193
4194         /*
4195          * Get rid of the "zap_maps" and all the map entries that
4196          * they may still contain.
4197          */
4198         if (zap_old_map != VM_MAP_NULL) {
4199                 vm_map_destroy(zap_old_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4200                 zap_old_map = VM_MAP_NULL;
4201         }
4202         if (zap_new_map != VM_MAP_NULL) {
4203                 vm_map_destroy(zap_new_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
4204                 zap_new_map = VM_MAP_NULL;
4205         }
4206
4207         return result;
4208
4209 #undef  RETURN
4210 }
4211 #endif /* __arm64__ */
4212
4213 /*
4214  * Counters for the prefault optimization.
4215  */
4216 int64_t vm_prefault_nb_pages = 0;
4217 int64_t vm_prefault_nb_bailout = 0;
4218
4219 static kern_return_t
4220 vm_map_enter_mem_object_helper(
4221         vm_map_t                target_map,
4222         vm_map_offset_t         *address,
4223         vm_map_size_t           initial_size,
4224         vm_map_offset_t         mask,
4225         int                     flags,
4226         vm_map_kernel_flags_t   vmk_flags,
4227         vm_tag_t                tag,
4228         ipc_port_t              port,
4229         vm_object_offset_t      offset,
4230         boolean_t               copy,
4231         vm_prot_t               cur_protection,
4232         vm_prot_t               max_protection,
4233         vm_inherit_t            inheritance,
4234         upl_page_list_ptr_t     page_list,
4235         unsigned int            page_list_count)
4236 {
4237         vm_map_address_t        map_addr;
4238         vm_map_size_t           map_size;
4239         vm_object_t             object;
4240         vm_object_size_t        size;
4241         kern_return_t           result;
4242         boolean_t               mask_cur_protection, mask_max_protection;
4243         boolean_t               kernel_prefault, try_prefault = (page_list_count != 0);
4244         vm_map_offset_t         offset_in_mapping = 0;
4245 #if __arm64__
4246         boolean_t               fourk = vmk_flags.vmkf_fourk;
4247 #endif /* __arm64__ */
4248
4249         if (VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4250                 /* XXX TODO4K prefaulting depends on page size... */
4251                 try_prefault = FALSE;
4252         }
4253
4254         assertf(vmk_flags.__vmkf_unused == 0, "vmk_flags unused=0x%x\n", vmk_flags.__vmkf_unused);
4255
4256         mask_cur_protection = cur_protection & VM_PROT_IS_MASK;
4257         mask_max_protection = max_protection & VM_PROT_IS_MASK;
4258         cur_protection &= ~VM_PROT_IS_MASK;
4259         max_protection &= ~VM_PROT_IS_MASK;
4260
4261         /*
4262          * Check arguments for validity
4263          */
4264         if ((target_map == VM_MAP_NULL) ||
4265             (cur_protection & ~VM_PROT_ALL) ||
4266             (max_protection & ~VM_PROT_ALL) ||
4267             (inheritance > VM_INHERIT_LAST_VALID) ||
4268             (try_prefault && (copy || !page_list)) ||
4269             initial_size == 0) {
4270                 return KERN_INVALID_ARGUMENT;
4271         }
4272
4273 #if __arm64__
4274         if (fourk && VM_MAP_PAGE_SHIFT(target_map) < PAGE_SHIFT) {
4275                 /* no "fourk" if map is using a sub-page page size */
4276                 fourk = FALSE;
4277         }
4278         if (fourk) {
4279                 map_addr = vm_map_trunc_page(*address, FOURK_PAGE_MASK);
4280                 map_size = vm_map_round_page(initial_size, FOURK_PAGE_MASK);
4281         } else
4282 #endif /* __arm64__ */
4283         {
4284                 map_addr = vm_map_trunc_page(*address,
4285                     VM_MAP_PAGE_MASK(target_map));
4286                 map_size = vm_map_round_page(initial_size,
4287                     VM_MAP_PAGE_MASK(target_map));
4288         }
4289         size = vm_object_round_page(initial_size);
4290
4291         /*
4292          * Find the vm object (if any) corresponding to this port.
4293          */
4294         if (!IP_VALID(port)) {
4295                 object = VM_OBJECT_NULL;
4296                 offset = 0;
4297                 copy = FALSE;
4298         } else if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
4299                 vm_named_entry_t        named_entry;
4300                 vm_object_offset_t      data_offset;
4301
4302                 named_entry = (vm_named_entry_t) ip_get_kobject(port);
4303
4304                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4305                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4306                         data_offset = named_entry->data_offset;
4307                         offset += named_entry->data_offset;
4308                 } else {
4309                         data_offset = 0;
4310                 }
4311
4312                 /* a few checks to make sure user is obeying rules */
4313                 if (size == 0) {
4314                         if (offset >= named_entry->size) {
4315                                 return KERN_INVALID_RIGHT;
4316                         }
4317                         size = named_entry->size - offset;
4318                 }
4319                 if (mask_max_protection) {
4320                         max_protection &= named_entry->protection;
4321                 }
4322                 if (mask_cur_protection) {
4323                         cur_protection &= named_entry->protection;
4324                 }
4325                 if ((named_entry->protection & max_protection) !=
4326                     max_protection) {
4327                         return KERN_INVALID_RIGHT;
4328                 }
4329                 if ((named_entry->protection & cur_protection) !=
4330                     cur_protection) {
4331                         return KERN_INVALID_RIGHT;
4332                 }
4333                 if (offset + size < offset) {
4334                         /* overflow */
4335                         return KERN_INVALID_ARGUMENT;
4336                 }
4337                 if (named_entry->size < (offset + initial_size)) {
4338                         return KERN_INVALID_ARGUMENT;
4339                 }
4340
4341                 if (named_entry->is_copy) {
4342                         /* for a vm_map_copy, we can only map it whole */
4343                         if ((size != named_entry->size) &&
4344                             (vm_map_round_page(size,
4345                             VM_MAP_PAGE_MASK(target_map)) ==
4346                             named_entry->size)) {
4347                                 /* XXX FBDP use the rounded size... */
4348                                 size = vm_map_round_page(
4349                                         size,
4350                                         VM_MAP_PAGE_MASK(target_map));
4351                         }
4352                 }
4353
4354                 /* the callers parameter offset is defined to be the */
4355                 /* offset from beginning of named entry offset in object */
4356                 offset = offset + named_entry->offset;
4357
4358                 if (!VM_MAP_PAGE_ALIGNED(size,
4359                     VM_MAP_PAGE_MASK(target_map))) {
4360                         /*
4361                          * Let's not map more than requested;
4362                          * vm_map_enter() will handle this "not map-aligned"
4363                          * case.
4364                          */
4365                         map_size = size;
4366                 }
4367
4368                 named_entry_lock(named_entry);
4369                 if (named_entry->is_sub_map) {
4370                         vm_map_t                submap;
4371
4372                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4373                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4374                                 panic("VM_FLAGS_RETURN_DATA_ADDR not expected for submap.");
4375                         }
4376
4377                         submap = named_entry->backing.map;
4378                         vm_map_reference(submap);
4379                         named_entry_unlock(named_entry);
4380
4381                         vmk_flags.vmkf_submap = TRUE;
4382
4383                         result = vm_map_enter(target_map,
4384                             &map_addr,
4385                             map_size,
4386                             mask,
4387                             flags,
4388                             vmk_flags,
4389                             tag,
4390                             (vm_object_t)(uintptr_t) submap,
4391                             offset,
4392                             copy,
4393                             cur_protection,
4394                             max_protection,
4395                             inheritance);
4396                         if (result != KERN_SUCCESS) {
4397                                 vm_map_deallocate(submap);
4398                         } else {
4399                                 /*
4400                                  * No need to lock "submap" just to check its
4401                                  * "mapped" flag: that flag is never reset
4402                                  * once it's been set and if we race, we'll
4403                                  * just end up setting it twice, which is OK.
4404                                  */
4405                                 if (submap->mapped_in_other_pmaps == FALSE &&
4406                                     vm_map_pmap(submap) != PMAP_NULL &&
4407                                     vm_map_pmap(submap) !=
4408                                     vm_map_pmap(target_map)) {
4409                                         /*
4410                                          * This submap is being mapped in a map
4411                                          * that uses a different pmap.
4412                                          * Set its "mapped_in_other_pmaps" flag
4413                                          * to indicate that we now need to
4414                                          * remove mappings from all pmaps rather
4415                                          * than just the submap's pmap.
4416                                          */
4417                                         vm_map_lock(submap);
4418                                         submap->mapped_in_other_pmaps = TRUE;
4419                                         vm_map_unlock(submap);
4420                                 }
4421                                 *address = map_addr;
4422                         }
4423                         return result;
4424                 } else if (named_entry->is_copy) {
4425                         kern_return_t   kr;
4426                         vm_map_copy_t   copy_map;
4427                         vm_map_entry_t  copy_entry;
4428                         vm_map_offset_t copy_addr;
4429                         vm_map_copy_t   target_copy_map;
4430                         vm_map_offset_t overmap_start, overmap_end;
4431                         vm_map_offset_t trimmed_start;
4432                         vm_map_size_t   target_size;
4433
4434                         if (flags & ~(VM_FLAGS_FIXED |
4435                             VM_FLAGS_ANYWHERE |
4436                             VM_FLAGS_OVERWRITE |
4437                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4438                             VM_FLAGS_RETURN_DATA_ADDR |
4439                             VM_FLAGS_ALIAS_MASK)) {
4440                                 named_entry_unlock(named_entry);
4441                                 return KERN_INVALID_ARGUMENT;
4442                         }
4443
4444                         copy_map = named_entry->backing.copy;
4445                         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
4446                         if (copy_map->type != VM_MAP_COPY_ENTRY_LIST) {
4447                                 /* unsupported type; should not happen */
4448                                 printf("vm_map_enter_mem_object: "
4449                                     "memory_entry->backing.copy "
4450                                     "unsupported type 0x%x\n",
4451                                     copy_map->type);
4452                                 named_entry_unlock(named_entry);
4453                                 return KERN_INVALID_ARGUMENT;
4454                         }
4455
4456                         if (VM_MAP_PAGE_SHIFT(target_map) != copy_map->cpy_hdr.page_shift) {
4457                                 DEBUG4K_SHARE("copy_map %p offset %llx size 0x%llx pgshift %d -> target_map %p pgshift %d\n", copy_map, offset, (uint64_t)map_size, copy_map->cpy_hdr.page_shift, target_map, VM_MAP_PAGE_SHIFT(target_map));
4458                         }
4459
4460                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4461                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4462                                 offset_in_mapping = offset & VM_MAP_PAGE_MASK(target_map);
4463                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4464                                         offset_in_mapping &= ~((signed)(0xFFF));
4465                                 }
4466                         }
4467
4468                         target_copy_map = VM_MAP_COPY_NULL;
4469                         target_size = copy_map->size;
4470                         overmap_start = 0;
4471                         overmap_end = 0;
4472                         trimmed_start = 0;
4473                         if (copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(target_map)) {
4474                                 DEBUG4K_ADJUST("adjusting...\n");
4475                                 kr = vm_map_copy_adjust_to_target(
4476                                         copy_map,
4477                                         offset /* includes data_offset */,
4478                                         initial_size,
4479                                         target_map,
4480                                         copy,
4481                                         &target_copy_map,
4482                                         &overmap_start,
4483                                         &overmap_end,
4484                                         &trimmed_start);
4485                                 if (kr != KERN_SUCCESS) {
4486                                         named_entry_unlock(named_entry);
4487                                         return kr;
4488                                 }
4489                                 target_size = target_copy_map->size;
4490                                 if (trimmed_start >= data_offset) {
4491                                         data_offset = offset & VM_MAP_PAGE_MASK(target_map);
4492                                 } else {
4493                                         data_offset -= trimmed_start;
4494                                 }
4495                         } else {
4496                                 target_copy_map = copy_map;
4497                         }
4498
4499                         /* reserve a contiguous range */
4500                         kr = vm_map_enter(target_map,
4501                             &map_addr,
4502                             vm_map_round_page(target_size, VM_MAP_PAGE_MASK(target_map)),
4503                             mask,
4504                             flags & (VM_FLAGS_ANYWHERE |
4505                             VM_FLAGS_OVERWRITE |
4506                             VM_FLAGS_RETURN_4K_DATA_ADDR |
4507                             VM_FLAGS_RETURN_DATA_ADDR),
4508                             vmk_flags,
4509                             tag,
4510                             VM_OBJECT_NULL,
4511                             0,
4512                             FALSE,               /* copy */
4513                             cur_protection,
4514                             max_protection,
4515                             inheritance);
4516                         if (kr != KERN_SUCCESS) {
4517                                 DEBUG4K_ERROR("kr 0x%x\n", kr);
4518                                 if (target_copy_map != copy_map) {
4519                                         vm_map_copy_discard(target_copy_map);
4520                                         target_copy_map = VM_MAP_COPY_NULL;
4521                                 }
4522                                 named_entry_unlock(named_entry);
4523                                 return kr;
4524                         }
4525
4526                         copy_addr = map_addr;
4527
4528                         for (copy_entry = vm_map_copy_first_entry(target_copy_map);
4529                             copy_entry != vm_map_copy_to_entry(target_copy_map);
4530                             copy_entry = copy_entry->vme_next) {
4531                                 int                     remap_flags;
4532                                 vm_map_kernel_flags_t   vmk_remap_flags;
4533                                 vm_map_t                copy_submap;
4534                                 vm_object_t             copy_object;
4535                                 vm_map_size_t           copy_size;
4536                                 vm_object_offset_t      copy_offset;
4537                                 int                     copy_vm_alias;
4538
4539                                 remap_flags = 0;
4540                                 vmk_remap_flags = VM_MAP_KERNEL_FLAGS_NONE;
4541
4542                                 copy_object = VME_OBJECT(copy_entry);
4543                                 copy_offset = VME_OFFSET(copy_entry);
4544                                 copy_size = (copy_entry->vme_end -
4545                                     copy_entry->vme_start);
4546                                 VM_GET_FLAGS_ALIAS(flags, copy_vm_alias);
4547                                 if (copy_vm_alias == 0) {
4548                                         /*
4549                                          * Caller does not want a specific
4550                                          * alias for this new mapping:  use
4551                                          * the alias of the original mapping.
4552                                          */
4553                                         copy_vm_alias = VME_ALIAS(copy_entry);
4554                                 }
4555
4556                                 /* sanity check */
4557                                 if ((copy_addr + copy_size) >
4558                                     (map_addr +
4559                                     overmap_start + overmap_end +
4560                                     named_entry->size /* XXX full size */)) {
4561                                         /* over-mapping too much !? */
4562                                         kr = KERN_INVALID_ARGUMENT;
4563                                         DEBUG4K_ERROR("kr 0x%x\n", kr);
4564                                         /* abort */
4565                                         break;
4566                                 }
4567
4568                                 /* take a reference on the object */
4569                                 if (copy_entry->is_sub_map) {
4570                                         vmk_remap_flags.vmkf_submap = TRUE;
4571                                         copy_submap = VME_SUBMAP(copy_entry);
4572                                         vm_map_lock(copy_submap);
4573                                         vm_map_reference(copy_submap);
4574                                         vm_map_unlock(copy_submap);
4575                                         copy_object = (vm_object_t)(uintptr_t) copy_submap;
4576                                 } else if (!copy &&
4577                                     copy_object != VM_OBJECT_NULL &&
4578                                     (copy_entry->needs_copy ||
4579                                     copy_object->shadowed ||
4580                                     (!copy_object->true_share &&
4581                                     !copy_entry->is_shared &&
4582                                     copy_object->vo_size > copy_size))) {
4583                                         /*
4584                                          * We need to resolve our side of this
4585                                          * "symmetric" copy-on-write now; we
4586                                          * need a new object to map and share,
4587                                          * instead of the current one which
4588                                          * might still be shared with the
4589                                          * original mapping.
4590                                          *
4591                                          * Note: A "vm_map_copy_t" does not
4592                                          * have a lock but we're protected by
4593                                          * the named entry's lock here.
4594                                          */
4595                                         // assert(copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC);
4596                                         VME_OBJECT_SHADOW(copy_entry, copy_size);
4597                                         if (!copy_entry->needs_copy &&
4598                                             copy_entry->protection & VM_PROT_WRITE) {
4599                                                 vm_prot_t prot;
4600
4601                                                 prot = copy_entry->protection & ~VM_PROT_WRITE;
4602                                                 vm_object_pmap_protect(copy_object,
4603                                                     copy_offset,
4604                                                     copy_size,
4605                                                     PMAP_NULL,
4606                                                     PAGE_SIZE,
4607                                                     0,
4608                                                     prot);
4609                                         }
4610
4611                                         copy_entry->needs_copy = FALSE;
4612                                         copy_entry->is_shared = TRUE;
4613                                         copy_object = VME_OBJECT(copy_entry);
4614                                         copy_offset = VME_OFFSET(copy_entry);
4615                                         vm_object_lock(copy_object);
4616                                         vm_object_reference_locked(copy_object);
4617                                         if (copy_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
4618                                                 /* we're about to make a shared mapping of this object */
4619                                                 copy_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
4620                                                 copy_object->true_share = TRUE;
4621                                         }
4622                                         vm_object_unlock(copy_object);
4623                                 } else {
4624                                         /*
4625                                          * We already have the right object
4626                                          * to map.
4627                                          */
4628                                         copy_object = VME_OBJECT(copy_entry);
4629                                         vm_object_reference(copy_object);
4630                                 }
4631
4632                                 /* over-map the object into destination */
4633                                 remap_flags |= flags;
4634                                 remap_flags |= VM_FLAGS_FIXED;
4635                                 remap_flags |= VM_FLAGS_OVERWRITE;
4636                                 remap_flags &= ~VM_FLAGS_ANYWHERE;
4637                                 if (!copy && !copy_entry->is_sub_map) {
4638                                         /*
4639                                          * copy-on-write should have been
4640                                          * resolved at this point, or we would
4641                                          * end up sharing instead of copying.
4642                                          */
4643                                         assert(!copy_entry->needs_copy);
4644                                 }
4645 #if XNU_TARGET_OS_OSX
4646                                 if (copy_entry->used_for_jit) {
4647                                         vmk_remap_flags.vmkf_map_jit = TRUE;
4648                                 }
4649 #endif /* XNU_TARGET_OS_OSX */
4650
4651                                 assertf((copy_vm_alias & VME_ALIAS_MASK) == copy_vm_alias,
4652                                     "VM Tag truncated from 0x%x to 0x%x\n", copy_vm_alias, (copy_vm_alias & VME_ALIAS_MASK));
4653                                 kr = vm_map_enter(target_map,
4654                                     &copy_addr,
4655                                     copy_size,
4656                                     (vm_map_offset_t) 0,
4657                                     remap_flags,
4658                                     vmk_remap_flags,
4659                                     (vm_tag_t) copy_vm_alias, /* see comment at end of vm_fault_unwire re. cast*/
4660                                     copy_object,
4661                                     copy_offset,
4662                                     ((copy_object == NULL) ? FALSE : copy),
4663                                     cur_protection,
4664                                     max_protection,
4665                                     inheritance);
4666                                 if (kr != KERN_SUCCESS) {
4667                                         DEBUG4K_SHARE("failed kr 0x%x\n", kr);
4668                                         if (copy_entry->is_sub_map) {
4669                                                 vm_map_deallocate(copy_submap);
4670                                         } else {
4671                                                 vm_object_deallocate(copy_object);
4672                                         }
4673                                         /* abort */
4674                                         break;
4675                                 }
4676
4677                                 /* next mapping */
4678                                 copy_addr += copy_size;
4679                         }
4680
4681                         if (kr == KERN_SUCCESS) {
4682                                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4683                                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4684                                         *address = map_addr + offset_in_mapping;
4685                                 } else {
4686                                         *address = map_addr;
4687                                 }
4688                                 if (overmap_start) {
4689                                         *address += overmap_start;
4690                                         DEBUG4K_SHARE("map %p map_addr 0x%llx offset_in_mapping 0x%llx overmap_start 0x%llx -> *address 0x%llx\n", target_map, (uint64_t)map_addr, (uint64_t) offset_in_mapping, (uint64_t)overmap_start, (uint64_t)*address);
4691                                 }
4692                         }
4693                         named_entry_unlock(named_entry);
4694                         if (target_copy_map != copy_map) {
4695                                 vm_map_copy_discard(target_copy_map);
4696                                 target_copy_map = VM_MAP_COPY_NULL;
4697                         }
4698
4699                         if (kr != KERN_SUCCESS) {
4700                                 if (!(flags & VM_FLAGS_OVERWRITE)) {
4701                                         /* deallocate the contiguous range */
4702                                         (void) vm_deallocate(target_map,
4703                                             map_addr,
4704                                             map_size);
4705                                 }
4706                         }
4707
4708                         return kr;
4709                 }
4710
4711                 if (named_entry->is_object) {
4712                         unsigned int    access;
4713                         vm_prot_t       protections;
4714                         unsigned int    wimg_mode;
4715
4716                         /* we are mapping a VM object */
4717
4718                         protections = named_entry->protection & VM_PROT_ALL;
4719                         access = GET_MAP_MEM(named_entry->protection);
4720
4721                         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4722                             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4723                                 offset_in_mapping = offset - VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4724                                 if (flags & VM_FLAGS_RETURN_4K_DATA_ADDR) {
4725                                         offset_in_mapping &= ~((signed)(0xFFF));
4726                                 }
4727                                 offset = VM_MAP_TRUNC_PAGE(offset, VM_MAP_PAGE_MASK(target_map));
4728                                 map_size = VM_MAP_ROUND_PAGE((offset + offset_in_mapping + initial_size) - offset, VM_MAP_PAGE_MASK(target_map));
4729                         }
4730
4731                         object = vm_named_entry_to_vm_object(named_entry);
4732                         assert(object != VM_OBJECT_NULL);
4733                         vm_object_lock(object);
4734                         named_entry_unlock(named_entry);
4735
4736                         vm_object_reference_locked(object);
4737
4738                         wimg_mode = object->wimg_bits;
4739                         vm_prot_to_wimg(access, &wimg_mode);
4740                         if (object->wimg_bits != wimg_mode) {
4741                                 vm_object_change_wimg_mode(object, wimg_mode);
4742                         }
4743
4744                         vm_object_unlock(object);
4745                 } else {
4746                         panic("invalid VM named entry %p", named_entry);
4747                 }
4748         } else if (ip_kotype(port) == IKOT_MEMORY_OBJECT) {
4749                 /*
4750                  * JMM - This is temporary until we unify named entries
4751                  * and raw memory objects.
4752                  *
4753                  * Detected fake ip_kotype for a memory object.  In
4754                  * this case, the port isn't really a port at all, but
4755                  * instead is just a raw memory object.
4756                  */
4757                 if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4758                     VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4759                         panic("VM_FLAGS_RETURN_DATA_ADDR not expected for raw memory object.");
4760                 }
4761
4762                 object = memory_object_to_vm_object((memory_object_t)port);
4763                 if (object == VM_OBJECT_NULL) {
4764                         return KERN_INVALID_OBJECT;
4765                 }
4766                 vm_object_reference(object);
4767
4768                 /* wait for object (if any) to be ready */
4769                 if (object != VM_OBJECT_NULL) {
4770                         if (object == kernel_object) {
4771                                 printf("Warning: Attempt to map kernel object"
4772                                     " by a non-private kernel entity\n");
4773                                 return KERN_INVALID_OBJECT;
4774                         }
4775                         if (!object->pager_ready) {
4776                                 vm_object_lock(object);
4777
4778                                 while (!object->pager_ready) {
4779                                         vm_object_wait(object,
4780                                             VM_OBJECT_EVENT_PAGER_READY,
4781                                             THREAD_UNINT);
4782                                         vm_object_lock(object);
4783                                 }
4784                                 vm_object_unlock(object);
4785                         }
4786                 }
4787         } else {
4788                 return KERN_INVALID_OBJECT;
4789         }
4790
4791         if (object != VM_OBJECT_NULL &&
4792             object->named &&
4793             object->pager != MEMORY_OBJECT_NULL &&
4794             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4795                 memory_object_t pager;
4796                 vm_prot_t       pager_prot;
4797                 kern_return_t   kr;
4798
4799                 /*
4800                  * For "named" VM objects, let the pager know that the
4801                  * memory object is being mapped.  Some pagers need to keep
4802                  * track of this, to know when they can reclaim the memory
4803                  * object, for example.
4804                  * VM calls memory_object_map() for each mapping (specifying
4805                  * the protection of each mapping) and calls
4806                  * memory_object_last_unmap() when all the mappings are gone.
4807                  */
4808                 pager_prot = max_protection;
4809                 if (copy) {
4810                         /*
4811                          * Copy-On-Write mapping: won't modify the
4812                          * memory object.
4813                          */
4814                         pager_prot &= ~VM_PROT_WRITE;
4815                 }
4816                 vm_object_lock(object);
4817                 pager = object->pager;
4818                 if (object->named &&
4819                     pager != MEMORY_OBJECT_NULL &&
4820                     object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
4821                         assert(object->pager_ready);
4822                         vm_object_mapping_wait(object, THREAD_UNINT);
4823                         vm_object_mapping_begin(object);
4824                         vm_object_unlock(object);
4825
4826                         kr = memory_object_map(pager, pager_prot);
4827                         assert(kr == KERN_SUCCESS);
4828
4829                         vm_object_lock(object);
4830                         vm_object_mapping_end(object);
4831                 }
4832                 vm_object_unlock(object);
4833         }
4834
4835         /*
4836          *      Perform the copy if requested
4837          */
4838
4839         if (copy) {
4840                 vm_object_t             new_object;
4841                 vm_object_offset_t      new_offset;
4842
4843                 result = vm_object_copy_strategically(object, offset,
4844                     map_size,
4845                     &new_object, &new_offset,
4846                     &copy);
4847
4848
4849                 if (result == KERN_MEMORY_RESTART_COPY) {
4850                         boolean_t success;
4851                         boolean_t src_needs_copy;
4852
4853                         /*
4854                          * XXX
4855                          * We currently ignore src_needs_copy.
4856                          * This really is the issue of how to make
4857                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
4858                          * non-kernel users to use. Solution forthcoming.
4859                          * In the meantime, since we don't allow non-kernel
4860                          * memory managers to specify symmetric copy,
4861                          * we won't run into problems here.
4862                          */
4863                         new_object = object;
4864                         new_offset = offset;
4865                         success = vm_object_copy_quickly(&new_object,
4866                             new_offset,
4867                             map_size,
4868                             &src_needs_copy,
4869                             &copy);
4870                         assert(success);
4871                         result = KERN_SUCCESS;
4872                 }
4873                 /*
4874                  *      Throw away the reference to the
4875                  *      original object, as it won't be mapped.
4876                  */
4877
4878                 vm_object_deallocate(object);
4879
4880                 if (result != KERN_SUCCESS) {
4881                         return result;
4882                 }
4883
4884                 object = new_object;
4885                 offset = new_offset;
4886         }
4887
4888         /*
4889          * If non-kernel users want to try to prefault pages, the mapping and prefault
4890          * needs to be atomic.
4891          */
4892         kernel_prefault = (try_prefault && vm_kernel_map_is_kernel(target_map));
4893         vmk_flags.vmkf_keep_map_locked = (try_prefault && !kernel_prefault);
4894
4895 #if __arm64__
4896         if (fourk) {
4897                 /* map this object in a "4K" pager */
4898                 result = vm_map_enter_fourk(target_map,
4899                     &map_addr,
4900                     map_size,
4901                     (vm_map_offset_t) mask,
4902                     flags,
4903                     vmk_flags,
4904                     tag,
4905                     object,
4906                     offset,
4907                     copy,
4908                     cur_protection,
4909                     max_protection,
4910                     inheritance);
4911         } else
4912 #endif /* __arm64__ */
4913         {
4914                 result = vm_map_enter(target_map,
4915                     &map_addr, map_size,
4916                     (vm_map_offset_t)mask,
4917                     flags,
4918                     vmk_flags,
4919                     tag,
4920                     object, offset,
4921                     copy,
4922                     cur_protection, max_protection,
4923                     inheritance);
4924         }
4925         if (result != KERN_SUCCESS) {
4926                 vm_object_deallocate(object);
4927         }
4928
4929         /*
4930          * Try to prefault, and do not forget to release the vm map lock.
4931          */
4932         if (result == KERN_SUCCESS && try_prefault) {
4933                 mach_vm_address_t va = map_addr;
4934                 kern_return_t kr = KERN_SUCCESS;
4935                 unsigned int i = 0;
4936                 int pmap_options;
4937
4938                 pmap_options = kernel_prefault ? 0 : PMAP_OPTIONS_NOWAIT;
4939                 if (object->internal) {
4940                         pmap_options |= PMAP_OPTIONS_INTERNAL;
4941                 }
4942
4943                 for (i = 0; i < page_list_count; ++i) {
4944                         if (!UPL_VALID_PAGE(page_list, i)) {
4945                                 if (kernel_prefault) {
4946                                         assertf(FALSE, "kernel_prefault && !UPL_VALID_PAGE");
4947                                         result = KERN_MEMORY_ERROR;
4948                                         break;
4949                                 }
4950                         } else {
4951                                 /*
4952                                  * If this function call failed, we should stop
4953                                  * trying to optimize, other calls are likely
4954                                  * going to fail too.
4955                                  *
4956                                  * We are not gonna report an error for such
4957                                  * failure though. That's an optimization, not
4958                                  * something critical.
4959                                  */
4960                                 kr = pmap_enter_options(target_map->pmap,
4961                                     va, UPL_PHYS_PAGE(page_list, i),
4962                                     cur_protection, VM_PROT_NONE,
4963                                     0, TRUE, pmap_options, NULL);
4964                                 if (kr != KERN_SUCCESS) {
4965                                         OSIncrementAtomic64(&vm_prefault_nb_bailout);
4966                                         if (kernel_prefault) {
4967                                                 result = kr;
4968                                         }
4969                                         break;
4970                                 }
4971                                 OSIncrementAtomic64(&vm_prefault_nb_pages);
4972                         }
4973
4974                         /* Next virtual address */
4975                         va += PAGE_SIZE;
4976                 }
4977                 if (vmk_flags.vmkf_keep_map_locked) {
4978                         vm_map_unlock(target_map);
4979                 }
4980         }
4981
4982         if (flags & (VM_FLAGS_RETURN_DATA_ADDR |
4983             VM_FLAGS_RETURN_4K_DATA_ADDR)) {
4984                 *address = map_addr + offset_in_mapping;
4985         } else {
4986                 *address = map_addr;
4987         }
4988         return result;
4989 }
4990
4991 kern_return_t
4992 vm_map_enter_mem_object(
4993         vm_map_t                target_map,
4994         vm_map_offset_t         *address,
4995         vm_map_size_t           initial_size,
4996         vm_map_offset_t         mask,
4997         int                     flags,
4998         vm_map_kernel_flags_t   vmk_flags,
4999         vm_tag_t                tag,
5000         ipc_port_t              port,
5001         vm_object_offset_t      offset,
5002         boolean_t               copy,
5003         vm_prot_t               cur_protection,
5004         vm_prot_t               max_protection,
5005         vm_inherit_t            inheritance)
5006 {
5007         kern_return_t ret;
5008
5009         ret = vm_map_enter_mem_object_helper(target_map,
5010             address,
5011             initial_size,
5012             mask,
5013             flags,
5014             vmk_flags,
5015             tag,
5016             port,
5017             offset,
5018             copy,
5019             cur_protection,
5020             max_protection,
5021             inheritance,
5022             NULL,
5023             0);
5024
5025 #if KASAN
5026         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5027                 kasan_notify_address(*address, initial_size);
5028         }
5029 #endif
5030
5031         return ret;
5032 }
5033
5034 kern_return_t
5035 vm_map_enter_mem_object_prefault(
5036         vm_map_t                target_map,
5037         vm_map_offset_t         *address,
5038         vm_map_size_t           initial_size,
5039         vm_map_offset_t         mask,
5040         int                     flags,
5041         vm_map_kernel_flags_t   vmk_flags,
5042         vm_tag_t                tag,
5043         ipc_port_t              port,
5044         vm_object_offset_t      offset,
5045         vm_prot_t               cur_protection,
5046         vm_prot_t               max_protection,
5047         upl_page_list_ptr_t     page_list,
5048         unsigned int            page_list_count)
5049 {
5050         kern_return_t ret;
5051
5052         ret = vm_map_enter_mem_object_helper(target_map,
5053             address,
5054             initial_size,
5055             mask,
5056             flags,
5057             vmk_flags,
5058             tag,
5059             port,
5060             offset,
5061             FALSE,
5062             cur_protection,
5063             max_protection,
5064             VM_INHERIT_DEFAULT,
5065             page_list,
5066             page_list_count);
5067
5068 #if KASAN
5069         if (ret == KERN_SUCCESS && address && target_map->pmap == kernel_pmap) {
5070                 kasan_notify_address(*address, initial_size);
5071         }
5072 #endif
5073
5074         return ret;
5075 }
5076
5077
5078 kern_return_t
5079 vm_map_enter_mem_object_control(
5080         vm_map_t                target_map,
5081         vm_map_offset_t         *address,
5082         vm_map_size_t           initial_size,
5083         vm_map_offset_t         mask,
5084         int                     flags,
5085         vm_map_kernel_flags_t   vmk_flags,
5086         vm_tag_t                tag,
5087         memory_object_control_t control,
5088         vm_object_offset_t      offset,
5089         boolean_t               copy,
5090         vm_prot_t               cur_protection,
5091         vm_prot_t               max_protection,
5092         vm_inherit_t            inheritance)
5093 {
5094         vm_map_address_t        map_addr;
5095         vm_map_size_t           map_size;
5096         vm_object_t             object;
5097         vm_object_size_t        size;
5098         kern_return_t           result;
5099         memory_object_t         pager;
5100         vm_prot_t               pager_prot;
5101         kern_return_t           kr;
5102 #if __arm64__
5103         boolean_t               fourk = vmk_flags.vmkf_fourk;
5104 #endif /* __arm64__ */
5105
5106         /*
5107          * Check arguments for validity
5108          */
5109         if ((target_map == VM_MAP_NULL) ||
5110             (cur_protection & ~VM_PROT_ALL) ||
5111             (max_protection & ~VM_PROT_ALL) ||
5112             (inheritance > VM_INHERIT_LAST_VALID) ||
5113             initial_size == 0) {
5114                 return KERN_INVALID_ARGUMENT;
5115         }
5116
5117 #if __arm64__
5118         if (fourk && VM_MAP_PAGE_MASK(target_map) < PAGE_MASK) {
5119                 fourk = FALSE;
5120         }
5121
5122         if (fourk) {
5123                 map_addr = vm_map_trunc_page(*address,
5124                     FOURK_PAGE_MASK);
5125                 map_size = vm_map_round_page(initial_size,
5126                     FOURK_PAGE_MASK);
5127         } else
5128 #endif /* __arm64__ */
5129         {
5130                 map_addr = vm_map_trunc_page(*address,
5131                     VM_MAP_PAGE_MASK(target_map));
5132                 map_size = vm_map_round_page(initial_size,
5133                     VM_MAP_PAGE_MASK(target_map));
5134         }
5135         size = vm_object_round_page(initial_size);
5136
5137         object = memory_object_control_to_vm_object(control);
5138
5139         if (object == VM_OBJECT_NULL) {
5140                 return KERN_INVALID_OBJECT;
5141         }
5142
5143         if (object == kernel_object) {
5144                 printf("Warning: Attempt to map kernel object"
5145                     " by a non-private kernel entity\n");
5146                 return KERN_INVALID_OBJECT;
5147         }
5148
5149         vm_object_lock(object);
5150         object->ref_count++;
5151         vm_object_res_reference(object);
5152
5153         /*
5154          * For "named" VM objects, let the pager know that the
5155          * memory object is being mapped.  Some pagers need to keep
5156          * track of this, to know when they can reclaim the memory
5157          * object, for example.
5158          * VM calls memory_object_map() for each mapping (specifying
5159          * the protection of each mapping) and calls
5160          * memory_object_last_unmap() when all the mappings are gone.
5161          */
5162         pager_prot = max_protection;
5163         if (copy) {
5164                 pager_prot &= ~VM_PROT_WRITE;
5165         }
5166         pager = object->pager;
5167         if (object->named &&
5168             pager != MEMORY_OBJECT_NULL &&
5169             object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
5170                 assert(object->pager_ready);
5171                 vm_object_mapping_wait(object, THREAD_UNINT);
5172                 vm_object_mapping_begin(object);
5173                 vm_object_unlock(object);
5174
5175                 kr = memory_object_map(pager, pager_prot);
5176                 assert(kr == KERN_SUCCESS);
5177
5178                 vm_object_lock(object);
5179                 vm_object_mapping_end(object);
5180         }
5181         vm_object_unlock(object);
5182
5183         /*
5184          *      Perform the copy if requested
5185          */
5186
5187         if (copy) {
5188                 vm_object_t             new_object;
5189                 vm_object_offset_t      new_offset;
5190
5191                 result = vm_object_copy_strategically(object, offset, size,
5192                     &new_object, &new_offset,
5193                     &copy);
5194
5195
5196                 if (result == KERN_MEMORY_RESTART_COPY) {
5197                         boolean_t success;
5198                         boolean_t src_needs_copy;
5199
5200                         /*
5201                          * XXX
5202                          * We currently ignore src_needs_copy.
5203                          * This really is the issue of how to make
5204                          * MEMORY_OBJECT_COPY_SYMMETRIC safe for
5205                          * non-kernel users to use. Solution forthcoming.
5206                          * In the meantime, since we don't allow non-kernel
5207                          * memory managers to specify symmetric copy,
5208                          * we won't run into problems here.
5209                          */
5210                         new_object = object;
5211                         new_offset = offset;
5212                         success = vm_object_copy_quickly(&new_object,
5213                             new_offset, size,
5214                             &src_needs_copy,
5215                             &copy);
5216                         assert(success);
5217                         result = KERN_SUCCESS;
5218                 }
5219                 /*
5220                  *      Throw away the reference to the
5221                  *      original object, as it won't be mapped.
5222                  */
5223
5224                 vm_object_deallocate(object);
5225
5226                 if (result != KERN_SUCCESS) {
5227                         return result;
5228                 }
5229
5230                 object = new_object;
5231                 offset = new_offset;
5232         }
5233
5234 #if __arm64__
5235         if (fourk) {
5236                 result = vm_map_enter_fourk(target_map,
5237                     &map_addr,
5238                     map_size,
5239                     (vm_map_offset_t)mask,
5240                     flags,
5241                     vmk_flags,
5242                     tag,
5243                     object, offset,
5244                     copy,
5245                     cur_protection, max_protection,
5246                     inheritance);
5247         } else
5248 #endif /* __arm64__ */
5249         {
5250                 result = vm_map_enter(target_map,
5251                     &map_addr, map_size,
5252                     (vm_map_offset_t)mask,
5253                     flags,
5254                     vmk_flags,
5255                     tag,
5256                     object, offset,
5257                     copy,
5258                     cur_protection, max_protection,
5259                     inheritance);
5260         }
5261         if (result != KERN_SUCCESS) {
5262                 vm_object_deallocate(object);
5263         }
5264         *address = map_addr;
5265
5266         return result;
5267 }
5268
5269
5270 #if     VM_CPM
5271
5272 #ifdef MACH_ASSERT
5273 extern pmap_paddr_t     avail_start, avail_end;
5274 #endif
5275
5276 /*
5277  *      Allocate memory in the specified map, with the caveat that
5278  *      the memory is physically contiguous.  This call may fail
5279  *      if the system can't find sufficient contiguous memory.
5280  *      This call may cause or lead to heart-stopping amounts of
5281  *      paging activity.
5282  *
5283  *      Memory obtained from this call should be freed in the
5284  *      normal way, viz., via vm_deallocate.
5285  */
5286 kern_return_t
5287 vm_map_enter_cpm(
5288         vm_map_t                map,
5289         vm_map_offset_t *addr,
5290         vm_map_size_t           size,
5291         int                     flags)
5292 {
5293         vm_object_t             cpm_obj;
5294         pmap_t                  pmap;
5295         vm_page_t               m, pages;
5296         kern_return_t           kr;
5297         vm_map_offset_t         va, start, end, offset;
5298 #if     MACH_ASSERT
5299         vm_map_offset_t         prev_addr = 0;
5300 #endif  /* MACH_ASSERT */
5301
5302         boolean_t               anywhere = ((VM_FLAGS_ANYWHERE & flags) != 0);
5303         vm_tag_t tag;
5304
5305         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
5306                 /* XXX TODO4K do we need to support this? */
5307                 *addr = 0;
5308                 return KERN_NOT_SUPPORTED;
5309         }
5310
5311         VM_GET_FLAGS_ALIAS(flags, tag);
5312
5313         if (size == 0) {
5314                 *addr = 0;
5315                 return KERN_SUCCESS;
5316         }
5317         if (anywhere) {
5318                 *addr = vm_map_min(map);
5319         } else {
5320                 *addr = vm_map_trunc_page(*addr,
5321                     VM_MAP_PAGE_MASK(map));
5322         }
5323         size = vm_map_round_page(size,
5324             VM_MAP_PAGE_MASK(map));
5325
5326         /*
5327          * LP64todo - cpm_allocate should probably allow
5328          * allocations of >4GB, but not with the current
5329          * algorithm, so just cast down the size for now.
5330          */
5331         if (size > VM_MAX_ADDRESS) {
5332                 return KERN_RESOURCE_SHORTAGE;
5333         }
5334         if ((kr = cpm_allocate(CAST_DOWN(vm_size_t, size),
5335             &pages, 0, 0, TRUE, flags)) != KERN_SUCCESS) {
5336                 return kr;
5337         }
5338
5339         cpm_obj = vm_object_allocate((vm_object_size_t)size);
5340         assert(cpm_obj != VM_OBJECT_NULL);
5341         assert(cpm_obj->internal);
5342         assert(cpm_obj->vo_size == (vm_object_size_t)size);
5343         assert(cpm_obj->can_persist == FALSE);
5344         assert(cpm_obj->pager_created == FALSE);
5345         assert(cpm_obj->pageout == FALSE);
5346         assert(cpm_obj->shadow == VM_OBJECT_NULL);
5347
5348         /*
5349          *      Insert pages into object.
5350          */
5351
5352         vm_object_lock(cpm_obj);
5353         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5354                 m = pages;
5355                 pages = NEXT_PAGE(m);
5356                 *(NEXT_PAGE_PTR(m)) = VM_PAGE_NULL;
5357
5358                 assert(!m->vmp_gobbled);
5359                 assert(!m->vmp_wanted);
5360                 assert(!m->vmp_pageout);
5361                 assert(!m->vmp_tabled);
5362                 assert(VM_PAGE_WIRED(m));
5363                 assert(m->vmp_busy);
5364                 assert(VM_PAGE_GET_PHYS_PAGE(m) >= (avail_start >> PAGE_SHIFT) && VM_PAGE_GET_PHYS_PAGE(m) <= (avail_end >> PAGE_SHIFT));
5365
5366                 m->vmp_busy = FALSE;
5367                 vm_page_insert(m, cpm_obj, offset);
5368         }
5369         assert(cpm_obj->resident_page_count == size / PAGE_SIZE);
5370         vm_object_unlock(cpm_obj);
5371
5372         /*
5373          *      Hang onto a reference on the object in case a
5374          *      multi-threaded application for some reason decides
5375          *      to deallocate the portion of the address space into
5376          *      which we will insert this object.
5377          *
5378          *      Unfortunately, we must insert the object now before
5379          *      we can talk to the pmap module about which addresses
5380          *      must be wired down.  Hence, the race with a multi-
5381          *      threaded app.
5382          */
5383         vm_object_reference(cpm_obj);
5384
5385         /*
5386          *      Insert object into map.
5387          */
5388
5389         kr = vm_map_enter(
5390                 map,
5391                 addr,
5392                 size,
5393                 (vm_map_offset_t)0,
5394                 flags,
5395                 VM_MAP_KERNEL_FLAGS_NONE,
5396                 cpm_obj,
5397                 (vm_object_offset_t)0,
5398                 FALSE,
5399                 VM_PROT_ALL,
5400                 VM_PROT_ALL,
5401                 VM_INHERIT_DEFAULT);
5402
5403         if (kr != KERN_SUCCESS) {
5404                 /*
5405                  *      A CPM object doesn't have can_persist set,
5406                  *      so all we have to do is deallocate it to
5407                  *      free up these pages.
5408                  */
5409                 assert(cpm_obj->pager_created == FALSE);
5410                 assert(cpm_obj->can_persist == FALSE);
5411                 assert(cpm_obj->pageout == FALSE);
5412                 assert(cpm_obj->shadow == VM_OBJECT_NULL);
5413                 vm_object_deallocate(cpm_obj); /* kill acquired ref */
5414                 vm_object_deallocate(cpm_obj); /* kill creation ref */
5415         }
5416
5417         /*
5418          *      Inform the physical mapping system that the
5419          *      range of addresses may not fault, so that
5420          *      page tables and such can be locked down as well.
5421          */
5422         start = *addr;
5423         end = start + size;
5424         pmap = vm_map_pmap(map);
5425         pmap_pageable(pmap, start, end, FALSE);
5426
5427         /*
5428          *      Enter each page into the pmap, to avoid faults.
5429          *      Note that this loop could be coded more efficiently,
5430          *      if the need arose, rather than looking up each page
5431          *      again.
5432          */
5433         for (offset = 0, va = start; offset < size;
5434             va += PAGE_SIZE, offset += PAGE_SIZE) {
5435                 int type_of_fault;
5436
5437                 vm_object_lock(cpm_obj);
5438                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5439                 assert(m != VM_PAGE_NULL);
5440
5441                 vm_page_zero_fill(m);
5442
5443                 type_of_fault = DBG_ZERO_FILL_FAULT;
5444
5445                 vm_fault_enter(m, pmap, va,
5446                     PAGE_SIZE, 0,
5447                     VM_PROT_ALL, VM_PROT_WRITE,
5448                     VM_PAGE_WIRED(m),
5449                     FALSE,                             /* change_wiring */
5450                     VM_KERN_MEMORY_NONE,                             /* tag - not wiring */
5451                     FALSE,                             /* no_cache */
5452                     FALSE,                             /* cs_bypass */
5453                     0,                                 /* user_tag */
5454                     0,                             /* pmap_options */
5455                     NULL,                              /* need_retry */
5456                     &type_of_fault);
5457
5458                 vm_object_unlock(cpm_obj);
5459         }
5460
5461 #if     MACH_ASSERT
5462         /*
5463          *      Verify ordering in address space.
5464          */
5465         for (offset = 0; offset < size; offset += PAGE_SIZE) {
5466                 vm_object_lock(cpm_obj);
5467                 m = vm_page_lookup(cpm_obj, (vm_object_offset_t)offset);
5468                 vm_object_unlock(cpm_obj);
5469                 if (m == VM_PAGE_NULL) {
5470                         panic("vm_allocate_cpm:  obj %p off 0x%llx no page",
5471                             cpm_obj, (uint64_t)offset);
5472                 }
5473                 assert(m->vmp_tabled);
5474                 assert(!m->vmp_busy);
5475                 assert(!m->vmp_wanted);
5476                 assert(!m->vmp_fictitious);
5477                 assert(!m->vmp_private);
5478                 assert(!m->vmp_absent);
5479                 assert(!m->vmp_error);
5480                 assert(!m->vmp_cleaning);
5481                 assert(!m->vmp_laundry);
5482                 assert(!m->vmp_precious);
5483                 assert(!m->vmp_clustered);
5484                 if (offset != 0) {
5485                         if (VM_PAGE_GET_PHYS_PAGE(m) != prev_addr + 1) {
5486                                 printf("start 0x%llx end 0x%llx va 0x%llx\n",
5487                                     (uint64_t)start, (uint64_t)end, (uint64_t)va);
5488                                 printf("obj %p off 0x%llx\n", cpm_obj, (uint64_t)offset);
5489                                 printf("m %p prev_address 0x%llx\n", m, (uint64_t)prev_addr);
5490                                 panic("vm_allocate_cpm:  pages not contig!");
5491                         }
5492                 }
5493                 prev_addr = VM_PAGE_GET_PHYS_PAGE(m);
5494         }
5495 #endif  /* MACH_ASSERT */
5496
5497         vm_object_deallocate(cpm_obj); /* kill extra ref */
5498
5499         return kr;
5500 }
5501
5502
5503 #else   /* VM_CPM */
5504
5505 /*
5506  *      Interface is defined in all cases, but unless the kernel
5507  *      is built explicitly for this option, the interface does
5508  *      nothing.
5509  */
5510
5511 kern_return_t
5512 vm_map_enter_cpm(
5513         __unused vm_map_t       map,
5514         __unused vm_map_offset_t        *addr,
5515         __unused vm_map_size_t  size,
5516         __unused int            flags)
5517 {
5518         return KERN_FAILURE;
5519 }
5520 #endif /* VM_CPM */
5521
5522 /* Not used without nested pmaps */
5523 #ifndef NO_NESTED_PMAP
5524 /*
5525  * Clip and unnest a portion of a nested submap mapping.
5526  */
5527
5528
5529 static void
5530 vm_map_clip_unnest(
5531         vm_map_t        map,
5532         vm_map_entry_t  entry,
5533         vm_map_offset_t start_unnest,
5534         vm_map_offset_t end_unnest)
5535 {
5536         vm_map_offset_t old_start_unnest = start_unnest;
5537         vm_map_offset_t old_end_unnest = end_unnest;
5538
5539         assert(entry->is_sub_map);
5540         assert(VME_SUBMAP(entry) != NULL);
5541         assert(entry->use_pmap);
5542
5543         /*
5544          * Query the platform for the optimal unnest range.
5545          * DRK: There's some duplication of effort here, since
5546          * callers may have adjusted the range to some extent. This
5547          * routine was introduced to support 1GiB subtree nesting
5548          * for x86 platforms, which can also nest on 2MiB boundaries
5549          * depending on size/alignment.
5550          */
5551         if (pmap_adjust_unnest_parameters(map->pmap, &start_unnest, &end_unnest)) {
5552                 assert(VME_SUBMAP(entry)->is_nested_map);
5553                 assert(!VME_SUBMAP(entry)->disable_vmentry_reuse);
5554                 log_unnest_badness(map,
5555                     old_start_unnest,
5556                     old_end_unnest,
5557                     VME_SUBMAP(entry)->is_nested_map,
5558                     (entry->vme_start +
5559                     VME_SUBMAP(entry)->lowest_unnestable_start -
5560                     VME_OFFSET(entry)));
5561         }
5562
5563         if (entry->vme_start > start_unnest ||
5564             entry->vme_end < end_unnest) {
5565                 panic("vm_map_clip_unnest(0x%llx,0x%llx): "
5566                     "bad nested entry: start=0x%llx end=0x%llx\n",
5567                     (long long)start_unnest, (long long)end_unnest,
5568                     (long long)entry->vme_start, (long long)entry->vme_end);
5569         }
5570
5571         if (start_unnest > entry->vme_start) {
5572                 _vm_map_clip_start(&map->hdr,
5573                     entry,
5574                     start_unnest);
5575                 if (map->holelistenabled) {
5576                         vm_map_store_update_first_free(map, NULL, FALSE);
5577                 } else {
5578                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5579                 }
5580         }
5581         if (entry->vme_end > end_unnest) {
5582                 _vm_map_clip_end(&map->hdr,
5583                     entry,
5584                     end_unnest);
5585                 if (map->holelistenabled) {
5586                         vm_map_store_update_first_free(map, NULL, FALSE);
5587                 } else {
5588                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5589                 }
5590         }
5591
5592         pmap_unnest(map->pmap,
5593             entry->vme_start,
5594             entry->vme_end - entry->vme_start);
5595         if ((map->mapped_in_other_pmaps) && os_ref_get_count(&map->map_refcnt) != 0) {
5596                 /* clean up parent map/maps */
5597                 vm_map_submap_pmap_clean(
5598                         map, entry->vme_start,
5599                         entry->vme_end,
5600                         VME_SUBMAP(entry),
5601                         VME_OFFSET(entry));
5602         }
5603         entry->use_pmap = FALSE;
5604         if ((map->pmap != kernel_pmap) &&
5605             (VME_ALIAS(entry) == VM_MEMORY_SHARED_PMAP)) {
5606                 VME_ALIAS_SET(entry, VM_MEMORY_UNSHARED_PMAP);
5607         }
5608 }
5609 #endif  /* NO_NESTED_PMAP */
5610
5611 /*
5612  *      vm_map_clip_start:      [ internal use only ]
5613  *
5614  *      Asserts that the given entry begins at or after
5615  *      the specified address; if necessary,
5616  *      it splits the entry into two.
5617  */
5618 void
5619 vm_map_clip_start(
5620         vm_map_t        map,
5621         vm_map_entry_t  entry,
5622         vm_map_offset_t startaddr)
5623 {
5624 #ifndef NO_NESTED_PMAP
5625         if (entry->is_sub_map &&
5626             entry->use_pmap &&
5627             startaddr >= entry->vme_start) {
5628                 vm_map_offset_t start_unnest, end_unnest;
5629
5630                 /*
5631                  * Make sure "startaddr" is no longer in a nested range
5632                  * before we clip.  Unnest only the minimum range the platform
5633                  * can handle.
5634                  * vm_map_clip_unnest may perform additional adjustments to
5635                  * the unnest range.
5636                  */
5637                 start_unnest = startaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
5638                 end_unnest = start_unnest + pmap_shared_region_size_min(map->pmap);
5639                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5640         }
5641 #endif /* NO_NESTED_PMAP */
5642         if (startaddr > entry->vme_start) {
5643                 if (VME_OBJECT(entry) &&
5644                     !entry->is_sub_map &&
5645                     VME_OBJECT(entry)->phys_contiguous) {
5646                         pmap_remove(map->pmap,
5647                             (addr64_t)(entry->vme_start),
5648                             (addr64_t)(entry->vme_end));
5649                 }
5650                 if (entry->vme_atomic) {
5651                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5652                 }
5653
5654                 DTRACE_VM5(
5655                         vm_map_clip_start,
5656                         vm_map_t, map,
5657                         vm_map_offset_t, entry->vme_start,
5658                         vm_map_offset_t, entry->vme_end,
5659                         vm_map_offset_t, startaddr,
5660                         int, VME_ALIAS(entry));
5661
5662                 _vm_map_clip_start(&map->hdr, entry, startaddr);
5663                 if (map->holelistenabled) {
5664                         vm_map_store_update_first_free(map, NULL, FALSE);
5665                 } else {
5666                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5667                 }
5668         }
5669 }
5670
5671
5672 #define vm_map_copy_clip_start(copy, entry, startaddr) \
5673         MACRO_BEGIN \
5674         if ((startaddr) > (entry)->vme_start) \
5675                 _vm_map_clip_start(&(copy)->cpy_hdr,(entry),(startaddr)); \
5676         MACRO_END
5677
5678 /*
5679  *      This routine is called only when it is known that
5680  *      the entry must be split.
5681  */
5682 static void
5683 _vm_map_clip_start(
5684         struct vm_map_header    *map_header,
5685         vm_map_entry_t          entry,
5686         vm_map_offset_t         start)
5687 {
5688         vm_map_entry_t  new_entry;
5689
5690         /*
5691          *      Split off the front portion --
5692          *      note that we must insert the new
5693          *      entry BEFORE this one, so that
5694          *      this entry has the specified starting
5695          *      address.
5696          */
5697
5698         if (entry->map_aligned) {
5699                 assert(VM_MAP_PAGE_ALIGNED(start,
5700                     VM_MAP_HDR_PAGE_MASK(map_header)));
5701         }
5702
5703         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5704         vm_map_entry_copy_full(new_entry, entry);
5705
5706         new_entry->vme_end = start;
5707         assert(new_entry->vme_start < new_entry->vme_end);
5708         VME_OFFSET_SET(entry, VME_OFFSET(entry) + (start - entry->vme_start));
5709         assert(start < entry->vme_end);
5710         entry->vme_start = start;
5711
5712         _vm_map_store_entry_link(map_header, entry->vme_prev, new_entry);
5713
5714         if (entry->is_sub_map) {
5715                 vm_map_reference(VME_SUBMAP(new_entry));
5716         } else {
5717                 vm_object_reference(VME_OBJECT(new_entry));
5718         }
5719 }
5720
5721
5722 /*
5723  *      vm_map_clip_end:        [ internal use only ]
5724  *
5725  *      Asserts that the given entry ends at or before
5726  *      the specified address; if necessary,
5727  *      it splits the entry into two.
5728  */
5729 void
5730 vm_map_clip_end(
5731         vm_map_t        map,
5732         vm_map_entry_t  entry,
5733         vm_map_offset_t endaddr)
5734 {
5735         if (endaddr > entry->vme_end) {
5736                 /*
5737                  * Within the scope of this clipping, limit "endaddr" to
5738                  * the end of this map entry...
5739                  */
5740                 endaddr = entry->vme_end;
5741         }
5742 #ifndef NO_NESTED_PMAP
5743         if (entry->is_sub_map && entry->use_pmap) {
5744                 vm_map_offset_t start_unnest, end_unnest;
5745
5746                 /*
5747                  * Make sure the range between the start of this entry and
5748                  * the new "endaddr" is no longer nested before we clip.
5749                  * Unnest only the minimum range the platform can handle.
5750                  * vm_map_clip_unnest may perform additional adjustments to
5751                  * the unnest range.
5752                  */
5753                 start_unnest = entry->vme_start;
5754                 end_unnest =
5755                     (endaddr + pmap_shared_region_size_min(map->pmap) - 1) &
5756                     ~(pmap_shared_region_size_min(map->pmap) - 1);
5757                 vm_map_clip_unnest(map, entry, start_unnest, end_unnest);
5758         }
5759 #endif /* NO_NESTED_PMAP */
5760         if (endaddr < entry->vme_end) {
5761                 if (VME_OBJECT(entry) &&
5762                     !entry->is_sub_map &&
5763                     VME_OBJECT(entry)->phys_contiguous) {
5764                         pmap_remove(map->pmap,
5765                             (addr64_t)(entry->vme_start),
5766                             (addr64_t)(entry->vme_end));
5767                 }
5768                 if (entry->vme_atomic) {
5769                         panic("Attempting to clip an atomic VM entry! (map: %p, entry: %p)\n", map, entry);
5770                 }
5771                 DTRACE_VM5(
5772                         vm_map_clip_end,
5773                         vm_map_t, map,
5774                         vm_map_offset_t, entry->vme_start,
5775                         vm_map_offset_t, entry->vme_end,
5776                         vm_map_offset_t, endaddr,
5777                         int, VME_ALIAS(entry));
5778
5779                 _vm_map_clip_end(&map->hdr, entry, endaddr);
5780                 if (map->holelistenabled) {
5781                         vm_map_store_update_first_free(map, NULL, FALSE);
5782                 } else {
5783                         vm_map_store_update_first_free(map, map->first_free, FALSE);
5784                 }
5785         }
5786 }
5787
5788
5789 #define vm_map_copy_clip_end(copy, entry, endaddr) \
5790         MACRO_BEGIN \
5791         if ((endaddr) < (entry)->vme_end) \
5792                 _vm_map_clip_end(&(copy)->cpy_hdr,(entry),(endaddr)); \
5793         MACRO_END
5794
5795 /*
5796  *      This routine is called only when it is known that
5797  *      the entry must be split.
5798  */
5799 static void
5800 _vm_map_clip_end(
5801         struct vm_map_header    *map_header,
5802         vm_map_entry_t          entry,
5803         vm_map_offset_t         end)
5804 {
5805         vm_map_entry_t  new_entry;
5806
5807         /*
5808          *      Create a new entry and insert it
5809          *      AFTER the specified entry
5810          */
5811
5812         if (entry->map_aligned) {
5813                 assert(VM_MAP_PAGE_ALIGNED(end,
5814                     VM_MAP_HDR_PAGE_MASK(map_header)));
5815         }
5816
5817         new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
5818         vm_map_entry_copy_full(new_entry, entry);
5819
5820         assert(entry->vme_start < end);
5821         new_entry->vme_start = entry->vme_end = end;
5822         VME_OFFSET_SET(new_entry,
5823             VME_OFFSET(new_entry) + (end - entry->vme_start));
5824         assert(new_entry->vme_start < new_entry->vme_end);
5825
5826         _vm_map_store_entry_link(map_header, entry, new_entry);
5827
5828         if (entry->is_sub_map) {
5829                 vm_map_reference(VME_SUBMAP(new_entry));
5830         } else {
5831                 vm_object_reference(VME_OBJECT(new_entry));
5832         }
5833 }
5834
5835
5836 /*
5837  *      VM_MAP_RANGE_CHECK:     [ internal use only ]
5838  *
5839  *      Asserts that the starting and ending region
5840  *      addresses fall within the valid range of the map.
5841  */
5842 #define VM_MAP_RANGE_CHECK(map, start, end)     \
5843         MACRO_BEGIN                             \
5844         if (start < vm_map_min(map))            \
5845                 start = vm_map_min(map);        \
5846         if (end > vm_map_max(map))              \
5847                 end = vm_map_max(map);          \
5848         if (start > end)                        \
5849                 start = end;                    \
5850         MACRO_END
5851
5852 /*
5853  *      vm_map_range_check:     [ internal use only ]
5854  *
5855  *      Check that the region defined by the specified start and
5856  *      end addresses are wholly contained within a single map
5857  *      entry or set of adjacent map entries of the spacified map,
5858  *      i.e. the specified region contains no unmapped space.
5859  *      If any or all of the region is unmapped, FALSE is returned.
5860  *      Otherwise, TRUE is returned and if the output argument 'entry'
5861  *      is not NULL it points to the map entry containing the start
5862  *      of the region.
5863  *
5864  *      The map is locked for reading on entry and is left locked.
5865  */
5866 static boolean_t
5867 vm_map_range_check(
5868         vm_map_t                map,
5869         vm_map_offset_t         start,
5870         vm_map_offset_t         end,
5871         vm_map_entry_t          *entry)
5872 {
5873         vm_map_entry_t          cur;
5874         vm_map_offset_t         prev;
5875
5876         /*
5877          *      Basic sanity checks first
5878          */
5879         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
5880                 return FALSE;
5881         }
5882
5883         /*
5884          *      Check first if the region starts within a valid
5885          *      mapping for the map.
5886          */
5887         if (!vm_map_lookup_entry(map, start, &cur)) {
5888                 return FALSE;
5889         }
5890
5891         /*
5892          *      Optimize for the case that the region is contained
5893          *      in a single map entry.
5894          */
5895         if (entry != (vm_map_entry_t *) NULL) {
5896                 *entry = cur;
5897         }
5898         if (end <= cur->vme_end) {
5899                 return TRUE;
5900         }
5901
5902         /*
5903          *      If the region is not wholly contained within a
5904          *      single entry, walk the entries looking for holes.
5905          */
5906         prev = cur->vme_end;
5907         cur = cur->vme_next;
5908         while ((cur != vm_map_to_entry(map)) && (prev == cur->vme_start)) {
5909                 if (end <= cur->vme_end) {
5910                         return TRUE;
5911                 }
5912                 prev = cur->vme_end;
5913                 cur = cur->vme_next;
5914         }
5915         return FALSE;
5916 }
5917
5918 /*
5919  *      vm_map_submap:          [ kernel use only ]
5920  *
5921  *      Mark the given range as handled by a subordinate map.
5922  *
5923  *      This range must have been created with vm_map_find using
5924  *      the vm_submap_object, and no other operations may have been
5925  *      performed on this range prior to calling vm_map_submap.
5926  *
5927  *      Only a limited number of operations can be performed
5928  *      within this rage after calling vm_map_submap:
5929  *              vm_fault
5930  *      [Don't try vm_map_copyin!]
5931  *
5932  *      To remove a submapping, one must first remove the
5933  *      range from the superior map, and then destroy the
5934  *      submap (if desired).  [Better yet, don't try it.]
5935  */
5936 kern_return_t
5937 vm_map_submap(
5938         vm_map_t        map,
5939         vm_map_offset_t start,
5940         vm_map_offset_t end,
5941         vm_map_t        submap,
5942         vm_map_offset_t offset,
5943 #ifdef NO_NESTED_PMAP
5944         __unused
5945 #endif  /* NO_NESTED_PMAP */
5946         boolean_t       use_pmap)
5947 {
5948         vm_map_entry_t          entry;
5949         kern_return_t           result = KERN_INVALID_ARGUMENT;
5950         vm_object_t             object;
5951
5952         vm_map_lock(map);
5953
5954         if (!vm_map_lookup_entry(map, start, &entry)) {
5955                 entry = entry->vme_next;
5956         }
5957
5958         if (entry == vm_map_to_entry(map) ||
5959             entry->is_sub_map) {
5960                 vm_map_unlock(map);
5961                 return KERN_INVALID_ARGUMENT;
5962         }
5963
5964         vm_map_clip_start(map, entry, start);
5965         vm_map_clip_end(map, entry, end);
5966
5967         if ((entry->vme_start == start) && (entry->vme_end == end) &&
5968             (!entry->is_sub_map) &&
5969             ((object = VME_OBJECT(entry)) == vm_submap_object) &&
5970             (object->resident_page_count == 0) &&
5971             (object->copy == VM_OBJECT_NULL) &&
5972             (object->shadow == VM_OBJECT_NULL) &&
5973             (!object->pager_created)) {
5974                 VME_OFFSET_SET(entry, (vm_object_offset_t)offset);
5975                 VME_OBJECT_SET(entry, VM_OBJECT_NULL);
5976                 vm_object_deallocate(object);
5977                 entry->is_sub_map = TRUE;
5978                 entry->use_pmap = FALSE;
5979                 VME_SUBMAP_SET(entry, submap);
5980                 vm_map_reference(submap);
5981                 if (submap->mapped_in_other_pmaps == FALSE &&
5982                     vm_map_pmap(submap) != PMAP_NULL &&
5983                     vm_map_pmap(submap) != vm_map_pmap(map)) {
5984                         /*
5985                          * This submap is being mapped in a map
5986                          * that uses a different pmap.
5987                          * Set its "mapped_in_other_pmaps" flag
5988                          * to indicate that we now need to
5989                          * remove mappings from all pmaps rather
5990                          * than just the submap's pmap.
5991                          */
5992                         submap->mapped_in_other_pmaps = TRUE;
5993                 }
5994
5995 #ifndef NO_NESTED_PMAP
5996                 if (use_pmap) {
5997                         /* nest if platform code will allow */
5998                         if (submap->pmap == NULL) {
5999                                 ledger_t ledger = map->pmap->ledger;
6000                                 submap->pmap = pmap_create_options(ledger,
6001                                     (vm_map_size_t) 0, 0);
6002                                 if (submap->pmap == PMAP_NULL) {
6003                                         vm_map_unlock(map);
6004                                         return KERN_NO_SPACE;
6005                                 }
6006 #if     defined(__arm__) || defined(__arm64__)
6007                                 pmap_set_nested(submap->pmap);
6008 #endif
6009                         }
6010                         result = pmap_nest(map->pmap,
6011                             (VME_SUBMAP(entry))->pmap,
6012                             (addr64_t)start,
6013                             (uint64_t)(end - start));
6014                         if (result) {
6015                                 panic("vm_map_submap: pmap_nest failed, rc = %08X\n", result);
6016                         }
6017                         entry->use_pmap = TRUE;
6018                 }
6019 #else   /* NO_NESTED_PMAP */
6020                 pmap_remove(map->pmap, (addr64_t)start, (addr64_t)end);
6021 #endif  /* NO_NESTED_PMAP */
6022                 result = KERN_SUCCESS;
6023         }
6024         vm_map_unlock(map);
6025
6026         return result;
6027 }
6028
6029 /*
6030  *      vm_map_protect:
6031  *
6032  *      Sets the protection of the specified address
6033  *      region in the target map.  If "set_max" is
6034  *      specified, the maximum protection is to be set;
6035  *      otherwise, only the current protection is affected.
6036  */
6037 kern_return_t
6038 vm_map_protect(
6039         vm_map_t        map,
6040         vm_map_offset_t start,
6041         vm_map_offset_t end,
6042         vm_prot_t       new_prot,
6043         boolean_t       set_max)
6044 {
6045         vm_map_entry_t                  current;
6046         vm_map_offset_t                 prev;
6047         vm_map_entry_t                  entry;
6048         vm_prot_t                       new_max;
6049         int                             pmap_options = 0;
6050         kern_return_t                   kr;
6051
6052         if (new_prot & VM_PROT_COPY) {
6053                 vm_map_offset_t         new_start;
6054                 vm_prot_t               cur_prot, max_prot;
6055                 vm_map_kernel_flags_t   kflags;
6056
6057                 /* LP64todo - see below */
6058                 if (start >= map->max_offset) {
6059                         return KERN_INVALID_ADDRESS;
6060                 }
6061
6062                 if ((new_prot & VM_PROT_EXECUTE) &&
6063                     map->pmap != kernel_pmap &&
6064                     (vm_map_cs_enforcement(map)
6065 #if XNU_TARGET_OS_OSX && __arm64__
6066                     || !VM_MAP_IS_EXOTIC(map)
6067 #endif /* XNU_TARGET_OS_OSX && __arm64__ */
6068                     ) &&
6069                     VM_MAP_POLICY_WX_FAIL(map)) {
6070                         DTRACE_VM3(cs_wx,
6071                             uint64_t, (uint64_t) start,
6072                             uint64_t, (uint64_t) end,
6073                             vm_prot_t, new_prot);
6074                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6075                             proc_selfpid(),
6076                             (current_task()->bsd_info
6077                             ? proc_name_address(current_task()->bsd_info)
6078                             : "?"),
6079                             __FUNCTION__);
6080                         return KERN_PROTECTION_FAILURE;
6081                 }
6082
6083                 /*
6084                  * Let vm_map_remap_extract() know that it will need to:
6085                  * + make a copy of the mapping
6086                  * + add VM_PROT_WRITE to the max protections
6087                  * + remove any protections that are no longer allowed from the
6088                  *   max protections (to avoid any WRITE/EXECUTE conflict, for
6089                  *   example).
6090                  * Note that "max_prot" is an IN/OUT parameter only for this
6091                  * specific (VM_PROT_COPY) case.  It's usually an OUT parameter
6092                  * only.
6093                  */
6094                 max_prot = new_prot & VM_PROT_ALL;
6095                 kflags = VM_MAP_KERNEL_FLAGS_NONE;
6096                 kflags.vmkf_remap_prot_copy = TRUE;
6097                 kflags.vmkf_overwrite_immutable = TRUE;
6098                 new_start = start;
6099                 kr = vm_map_remap(map,
6100                     &new_start,
6101                     end - start,
6102                     0,               /* mask */
6103                     VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE,
6104                     kflags,
6105                     0,
6106                     map,
6107                     start,
6108                     TRUE,               /* copy-on-write remapping! */
6109                     &cur_prot,
6110                     &max_prot,
6111                     VM_INHERIT_DEFAULT);
6112                 if (kr != KERN_SUCCESS) {
6113                         return kr;
6114                 }
6115                 new_prot &= ~VM_PROT_COPY;
6116         }
6117
6118         vm_map_lock(map);
6119
6120         /* LP64todo - remove this check when vm_map_commpage64()
6121          * no longer has to stuff in a map_entry for the commpage
6122          * above the map's max_offset.
6123          */
6124         if (start >= map->max_offset) {
6125                 vm_map_unlock(map);
6126                 return KERN_INVALID_ADDRESS;
6127         }
6128
6129         while (1) {
6130                 /*
6131                  *      Lookup the entry.  If it doesn't start in a valid
6132                  *      entry, return an error.
6133                  */
6134                 if (!vm_map_lookup_entry(map, start, &entry)) {
6135                         vm_map_unlock(map);
6136                         return KERN_INVALID_ADDRESS;
6137                 }
6138
6139                 if (entry->superpage_size && (start & (SUPERPAGE_SIZE - 1))) { /* extend request to whole entry */
6140                         start = SUPERPAGE_ROUND_DOWN(start);
6141                         continue;
6142                 }
6143                 break;
6144         }
6145         if (entry->superpage_size) {
6146                 end = SUPERPAGE_ROUND_UP(end);
6147         }
6148
6149         /*
6150          *      Make a first pass to check for protection and address
6151          *      violations.
6152          */
6153
6154         current = entry;
6155         prev = current->vme_start;
6156         while ((current != vm_map_to_entry(map)) &&
6157             (current->vme_start < end)) {
6158                 /*
6159                  * If there is a hole, return an error.
6160                  */
6161                 if (current->vme_start != prev) {
6162                         vm_map_unlock(map);
6163                         return KERN_INVALID_ADDRESS;
6164                 }
6165
6166                 new_max = current->max_protection;
6167 #if PMAP_CS
6168                 if (set_max && (new_prot & VM_PROT_EXECUTE) && pmap_cs_exempt(map->pmap)) {
6169                         new_max |= VM_PROT_EXECUTE;
6170                 }
6171 #endif
6172                 if ((new_prot & new_max) != new_prot) {
6173                         vm_map_unlock(map);
6174                         return KERN_PROTECTION_FAILURE;
6175                 }
6176
6177                 if ((new_prot & VM_PROT_WRITE) &&
6178                     (new_prot & VM_PROT_EXECUTE) &&
6179 #if XNU_TARGET_OS_OSX
6180                     map->pmap != kernel_pmap &&
6181                     (vm_map_cs_enforcement(map)
6182 #if __arm64__
6183                     || !VM_MAP_IS_EXOTIC(map)
6184 #endif /* __arm64__ */
6185                     ) &&
6186 #endif /* XNU_TARGET_OS_OSX */
6187 #if PMAP_CS
6188                     !pmap_cs_exempt(map->pmap) &&
6189 #endif
6190                     !(current->used_for_jit)) {
6191                         DTRACE_VM3(cs_wx,
6192                             uint64_t, (uint64_t) current->vme_start,
6193                             uint64_t, (uint64_t) current->vme_end,
6194                             vm_prot_t, new_prot);
6195                         printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
6196                             proc_selfpid(),
6197                             (current_task()->bsd_info
6198                             ? proc_name_address(current_task()->bsd_info)
6199                             : "?"),
6200                             __FUNCTION__);
6201                         new_prot &= ~VM_PROT_EXECUTE;
6202                         if (VM_MAP_POLICY_WX_FAIL(map)) {
6203                                 vm_map_unlock(map);
6204                                 return KERN_PROTECTION_FAILURE;
6205                         }
6206                 }
6207
6208                 /*
6209                  * If the task has requested executable lockdown,
6210                  * deny both:
6211                  * - adding executable protections OR
6212                  * - adding write protections to an existing executable mapping.
6213                  */
6214                 if (map->map_disallow_new_exec == TRUE) {
6215                         if ((new_prot & VM_PROT_EXECUTE) ||
6216                             ((current->protection & VM_PROT_EXECUTE) && (new_prot & VM_PROT_WRITE))) {
6217                                 vm_map_unlock(map);
6218                                 return KERN_PROTECTION_FAILURE;
6219                         }
6220                 }
6221
6222                 prev = current->vme_end;
6223                 current = current->vme_next;
6224         }
6225
6226 #if __arm64__
6227         if (end > prev &&
6228             end == vm_map_round_page(prev, VM_MAP_PAGE_MASK(map))) {
6229                 vm_map_entry_t prev_entry;
6230
6231                 prev_entry = current->vme_prev;
6232                 if (prev_entry != vm_map_to_entry(map) &&
6233                     !prev_entry->map_aligned &&
6234                     (vm_map_round_page(prev_entry->vme_end,
6235                     VM_MAP_PAGE_MASK(map))
6236                     == end)) {
6237                         /*
6238                          * The last entry in our range is not "map-aligned"
6239                          * but it would have reached all the way to "end"
6240                          * if it had been map-aligned, so this is not really
6241                          * a hole in the range and we can proceed.
6242                          */
6243                         prev = end;
6244                 }
6245         }
6246 #endif /* __arm64__ */
6247
6248         if (end > prev) {
6249                 vm_map_unlock(map);
6250                 return KERN_INVALID_ADDRESS;
6251         }
6252
6253         /*
6254          *      Go back and fix up protections.
6255          *      Clip to start here if the range starts within
6256          *      the entry.
6257          */
6258
6259         current = entry;
6260         if (current != vm_map_to_entry(map)) {
6261                 /* clip and unnest if necessary */
6262                 vm_map_clip_start(map, current, start);
6263         }
6264
6265         while ((current != vm_map_to_entry(map)) &&
6266             (current->vme_start < end)) {
6267                 vm_prot_t       old_prot;
6268
6269                 vm_map_clip_end(map, current, end);
6270
6271                 if (current->is_sub_map) {
6272                         /* clipping did unnest if needed */
6273                         assert(!current->use_pmap);
6274                 }
6275
6276                 old_prot = current->protection;
6277
6278                 if (set_max) {
6279                         current->max_protection = new_prot;
6280                         current->protection = new_prot & old_prot;
6281                 } else {
6282                         current->protection = new_prot;
6283                 }
6284
6285                 /*
6286                  *      Update physical map if necessary.
6287                  *      If the request is to turn off write protection,
6288                  *      we won't do it for real (in pmap). This is because
6289                  *      it would cause copy-on-write to fail.  We've already
6290                  *      set, the new protection in the map, so if a
6291                  *      write-protect fault occurred, it will be fixed up
6292                  *      properly, COW or not.
6293                  */
6294                 if (current->protection != old_prot) {
6295                         /* Look one level in we support nested pmaps */
6296                         /* from mapped submaps which are direct entries */
6297                         /* in our map */
6298
6299                         vm_prot_t prot;
6300
6301                         prot = current->protection;
6302                         if (current->is_sub_map || (VME_OBJECT(current) == NULL) || (VME_OBJECT(current) != compressor_object)) {
6303                                 prot &= ~VM_PROT_WRITE;
6304                         } else {
6305                                 assert(!VME_OBJECT(current)->code_signed);
6306                                 assert(VME_OBJECT(current)->copy_strategy == MEMORY_OBJECT_COPY_NONE);
6307                         }
6308
6309                         if (override_nx(map, VME_ALIAS(current)) && prot) {
6310                                 prot |= VM_PROT_EXECUTE;
6311                         }
6312
6313 #if DEVELOPMENT || DEBUG
6314                         if (!(old_prot & VM_PROT_EXECUTE) &&
6315                             (prot & VM_PROT_EXECUTE) &&
6316                             panic_on_unsigned_execute &&
6317                             (proc_selfcsflags() & CS_KILL)) {
6318                                 panic("vm_map_protect(%p,0x%llx,0x%llx) old=0x%x new=0x%x - <rdar://23770418> code-signing bypass?\n", map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, old_prot, prot);
6319                         }
6320 #endif /* DEVELOPMENT || DEBUG */
6321
6322                         if (pmap_has_prot_policy(map->pmap, current->translated_allow_execute, prot)) {
6323                                 if (current->wired_count) {
6324                                         panic("vm_map_protect(%p,0x%llx,0x%llx) new=0x%x wired=%x\n",
6325                                             map, (uint64_t)current->vme_start, (uint64_t)current->vme_end, prot, current->wired_count);
6326                                 }
6327
6328                                 /* If the pmap layer cares about this
6329                                  * protection type, force a fault for
6330                                  * each page so that vm_fault will
6331                                  * repopulate the page with the full
6332                                  * set of protections.
6333                                  */
6334                                 /*
6335                                  * TODO: We don't seem to need this,
6336                                  * but this is due to an internal
6337                                  * implementation detail of
6338                                  * pmap_protect.  Do we want to rely
6339                                  * on this?
6340                                  */
6341                                 prot = VM_PROT_NONE;
6342                         }
6343
6344                         if (current->is_sub_map && current->use_pmap) {
6345                                 pmap_protect(VME_SUBMAP(current)->pmap,
6346                                     current->vme_start,
6347                                     current->vme_end,
6348                                     prot);
6349                         } else {
6350                                 if (prot & VM_PROT_WRITE) {
6351                                         if (VME_OBJECT(current) == compressor_object) {
6352                                                 /*
6353                                                  * For write requests on the
6354                                                  * compressor, we wil ask the
6355                                                  * pmap layer to prevent us from
6356                                                  * taking a write fault when we
6357                                                  * attempt to access the mapping
6358                                                  * next.
6359                                                  */
6360                                                 pmap_options |= PMAP_OPTIONS_PROTECT_IMMEDIATE;
6361                                         }
6362                                 }
6363
6364                                 pmap_protect_options(map->pmap,
6365                                     current->vme_start,
6366                                     current->vme_end,
6367                                     prot,
6368                                     pmap_options,
6369                                     NULL);
6370                         }
6371                 }
6372                 current = current->vme_next;
6373         }
6374
6375         current = entry;
6376         while ((current != vm_map_to_entry(map)) &&
6377             (current->vme_start <= end)) {
6378                 vm_map_simplify_entry(map, current);
6379                 current = current->vme_next;
6380         }
6381
6382         vm_map_unlock(map);
6383         return KERN_SUCCESS;
6384 }
6385
6386 /*
6387  *      vm_map_inherit:
6388  *
6389  *      Sets the inheritance of the specified address
6390  *      range in the target map.  Inheritance
6391  *      affects how the map will be shared with
6392  *      child maps at the time of vm_map_fork.
6393  */
6394 kern_return_t
6395 vm_map_inherit(
6396         vm_map_t        map,
6397         vm_map_offset_t start,
6398         vm_map_offset_t end,
6399         vm_inherit_t    new_inheritance)
6400 {
6401         vm_map_entry_t  entry;
6402         vm_map_entry_t  temp_entry;
6403
6404         vm_map_lock(map);
6405
6406         VM_MAP_RANGE_CHECK(map, start, end);
6407
6408         if (vm_map_lookup_entry(map, start, &temp_entry)) {
6409                 entry = temp_entry;
6410         } else {
6411                 temp_entry = temp_entry->vme_next;
6412                 entry = temp_entry;
6413         }
6414
6415         /* first check entire range for submaps which can't support the */
6416         /* given inheritance. */
6417         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6418                 if (entry->is_sub_map) {
6419                         if (new_inheritance == VM_INHERIT_COPY) {
6420                                 vm_map_unlock(map);
6421                                 return KERN_INVALID_ARGUMENT;
6422                         }
6423                 }
6424
6425                 entry = entry->vme_next;
6426         }
6427
6428         entry = temp_entry;
6429         if (entry != vm_map_to_entry(map)) {
6430                 /* clip and unnest if necessary */
6431                 vm_map_clip_start(map, entry, start);
6432         }
6433
6434         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
6435                 vm_map_clip_end(map, entry, end);
6436                 if (entry->is_sub_map) {
6437                         /* clip did unnest if needed */
6438                         assert(!entry->use_pmap);
6439                 }
6440
6441                 entry->inheritance = new_inheritance;
6442
6443                 entry = entry->vme_next;
6444         }
6445
6446         vm_map_unlock(map);
6447         return KERN_SUCCESS;
6448 }
6449
6450 /*
6451  * Update the accounting for the amount of wired memory in this map.  If the user has
6452  * exceeded the defined limits, then we fail.  Wiring on behalf of the kernel never fails.
6453  */
6454
6455 static kern_return_t
6456 add_wire_counts(
6457         vm_map_t        map,
6458         vm_map_entry_t  entry,
6459         boolean_t       user_wire)
6460 {
6461         vm_map_size_t   size;
6462
6463         if (user_wire) {
6464                 unsigned int total_wire_count =  vm_page_wire_count + vm_lopage_free_count;
6465
6466                 /*
6467                  * We're wiring memory at the request of the user.  Check if this is the first time the user is wiring
6468                  * this map entry.
6469                  */
6470
6471                 if (entry->user_wired_count == 0) {
6472                         size = entry->vme_end - entry->vme_start;
6473
6474                         /*
6475                          * Since this is the first time the user is wiring this map entry, check to see if we're
6476                          * exceeding the user wire limits.  There is a per map limit which is the smaller of either
6477                          * the process's rlimit or the global vm_per_task_user_wire_limit which caps this value.  There is also
6478                          * a system-wide limit on the amount of memory all users can wire.  If the user is over either
6479                          * limit, then we fail.
6480                          */
6481
6482                         if (size + map->user_wire_size > MIN(map->user_wire_limit, vm_per_task_user_wire_limit) ||
6483                             size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6484                                 if (size + ptoa_64(total_wire_count) > vm_global_user_wire_limit) {
6485                                         os_atomic_inc(&vm_add_wire_count_over_global_limit, relaxed);
6486                                 } else {
6487                                         os_atomic_inc(&vm_add_wire_count_over_user_limit, relaxed);
6488                                 }
6489                                 return KERN_RESOURCE_SHORTAGE;
6490                         }
6491
6492                         /*
6493                          * The first time the user wires an entry, we also increment the wired_count and add this to
6494                          * the total that has been wired in the map.
6495                          */
6496
6497                         if (entry->wired_count >= MAX_WIRE_COUNT) {
6498                                 return KERN_FAILURE;
6499                         }
6500
6501                         entry->wired_count++;
6502                         map->user_wire_size += size;
6503                 }
6504
6505                 if (entry->user_wired_count >= MAX_WIRE_COUNT) {
6506                         return KERN_FAILURE;
6507                 }
6508
6509                 entry->user_wired_count++;
6510         } else {
6511                 /*
6512                  * The kernel's wiring the memory.  Just bump the count and continue.
6513                  */
6514
6515                 if (entry->wired_count >= MAX_WIRE_COUNT) {
6516                         panic("vm_map_wire: too many wirings");
6517                 }
6518
6519                 entry->wired_count++;
6520         }
6521
6522         return KERN_SUCCESS;
6523 }
6524
6525 /*
6526  * Update the memory wiring accounting now that the given map entry is being unwired.
6527  */
6528
6529 static void
6530 subtract_wire_counts(
6531         vm_map_t        map,
6532         vm_map_entry_t  entry,
6533         boolean_t       user_wire)
6534 {
6535         if (user_wire) {
6536                 /*
6537                  * We're unwiring memory at the request of the user.  See if we're removing the last user wire reference.
6538                  */
6539
6540                 if (entry->user_wired_count == 1) {
6541                         /*
6542                          * We're removing the last user wire reference.  Decrement the wired_count and the total
6543                          * user wired memory for this map.
6544                          */
6545
6546                         assert(entry->wired_count >= 1);
6547                         entry->wired_count--;
6548                         map->user_wire_size -= entry->vme_end - entry->vme_start;
6549                 }
6550
6551                 assert(entry->user_wired_count >= 1);
6552                 entry->user_wired_count--;
6553         } else {
6554                 /*
6555                  * The kernel is unwiring the memory.   Just update the count.
6556                  */
6557
6558                 assert(entry->wired_count >= 1);
6559                 entry->wired_count--;
6560         }
6561 }
6562
6563 int cs_executable_wire = 0;
6564
6565 /*
6566  *      vm_map_wire:
6567  *
6568  *      Sets the pageability of the specified address range in the
6569  *      target map as wired.  Regions specified as not pageable require
6570  *      locked-down physical memory and physical page maps.  The
6571  *      access_type variable indicates types of accesses that must not
6572  *      generate page faults.  This is checked against protection of
6573  *      memory being locked-down.
6574  *
6575  *      The map must not be locked, but a reference must remain to the
6576  *      map throughout the call.
6577  */
6578 static kern_return_t
6579 vm_map_wire_nested(
6580         vm_map_t                map,
6581         vm_map_offset_t         start,
6582         vm_map_offset_t         end,
6583         vm_prot_t               caller_prot,
6584         vm_tag_t                tag,
6585         boolean_t               user_wire,
6586         pmap_t                  map_pmap,
6587         vm_map_offset_t         pmap_addr,
6588         ppnum_t                 *physpage_p)
6589 {
6590         vm_map_entry_t          entry;
6591         vm_prot_t               access_type;
6592         struct vm_map_entry     *first_entry, tmp_entry;
6593         vm_map_t                real_map;
6594         vm_map_offset_t         s, e;
6595         kern_return_t           rc;
6596         boolean_t               need_wakeup;
6597         boolean_t               main_map = FALSE;
6598         wait_interrupt_t        interruptible_state;
6599         thread_t                cur_thread;
6600         unsigned int            last_timestamp;
6601         vm_map_size_t           size;
6602         boolean_t               wire_and_extract;
6603         vm_prot_t               extra_prots;
6604
6605         extra_prots = VM_PROT_COPY;
6606         extra_prots |= VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6607 #if XNU_TARGET_OS_OSX
6608         if (map->pmap == kernel_pmap ||
6609             !vm_map_cs_enforcement(map)) {
6610                 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6611         }
6612 #endif /* XNU_TARGET_OS_OSX */
6613 #if PMAP_CS
6614         if (pmap_cs_exempt(map->pmap)) {
6615                 extra_prots &= ~VM_PROT_COPY_FAIL_IF_EXECUTABLE;
6616         }
6617 #endif /* PMAP_CS */
6618
6619         access_type = (caller_prot & VM_PROT_ALL);
6620
6621         wire_and_extract = FALSE;
6622         if (physpage_p != NULL) {
6623                 /*
6624                  * The caller wants the physical page number of the
6625                  * wired page.  We return only one physical page number
6626                  * so this works for only one page at a time.
6627                  */
6628                 if ((end - start) != PAGE_SIZE) {
6629                         return KERN_INVALID_ARGUMENT;
6630                 }
6631                 wire_and_extract = TRUE;
6632                 *physpage_p = 0;
6633         }
6634
6635         vm_map_lock(map);
6636         if (map_pmap == NULL) {
6637                 main_map = TRUE;
6638         }
6639         last_timestamp = map->timestamp;
6640
6641         VM_MAP_RANGE_CHECK(map, start, end);
6642         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
6643         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
6644
6645         if (start == end) {
6646                 /* We wired what the caller asked for, zero pages */
6647                 vm_map_unlock(map);
6648                 return KERN_SUCCESS;
6649         }
6650
6651         need_wakeup = FALSE;
6652         cur_thread = current_thread();
6653
6654         s = start;
6655         rc = KERN_SUCCESS;
6656
6657         if (vm_map_lookup_entry(map, s, &first_entry)) {
6658                 entry = first_entry;
6659                 /*
6660                  * vm_map_clip_start will be done later.
6661                  * We don't want to unnest any nested submaps here !
6662                  */
6663         } else {
6664                 /* Start address is not in map */
6665                 rc = KERN_INVALID_ADDRESS;
6666                 goto done;
6667         }
6668
6669         while ((entry != vm_map_to_entry(map)) && (s < end)) {
6670                 /*
6671                  * At this point, we have wired from "start" to "s".
6672                  * We still need to wire from "s" to "end".
6673                  *
6674                  * "entry" hasn't been clipped, so it could start before "s"
6675                  * and/or end after "end".
6676                  */
6677
6678                 /* "e" is how far we want to wire in this entry */
6679                 e = entry->vme_end;
6680                 if (e > end) {
6681                         e = end;
6682                 }
6683
6684                 /*
6685                  * If another thread is wiring/unwiring this entry then
6686                  * block after informing other thread to wake us up.
6687                  */
6688                 if (entry->in_transition) {
6689                         wait_result_t wait_result;
6690
6691                         /*
6692                          * We have not clipped the entry.  Make sure that
6693                          * the start address is in range so that the lookup
6694                          * below will succeed.
6695                          * "s" is the current starting point: we've already
6696                          * wired from "start" to "s" and we still have
6697                          * to wire from "s" to "end".
6698                          */
6699
6700                         entry->needs_wakeup = TRUE;
6701
6702                         /*
6703                          * wake up anybody waiting on entries that we have
6704                          * already wired.
6705                          */
6706                         if (need_wakeup) {
6707                                 vm_map_entry_wakeup(map);
6708                                 need_wakeup = FALSE;
6709                         }
6710                         /*
6711                          * User wiring is interruptible
6712                          */
6713                         wait_result = vm_map_entry_wait(map,
6714                             (user_wire) ? THREAD_ABORTSAFE :
6715                             THREAD_UNINT);
6716                         if (user_wire && wait_result == THREAD_INTERRUPTED) {
6717                                 /*
6718                                  * undo the wirings we have done so far
6719                                  * We do not clear the needs_wakeup flag,
6720                                  * because we cannot tell if we were the
6721                                  * only one waiting.
6722                                  */
6723                                 rc = KERN_FAILURE;
6724                                 goto done;
6725                         }
6726
6727                         /*
6728                          * Cannot avoid a lookup here. reset timestamp.
6729                          */
6730                         last_timestamp = map->timestamp;
6731
6732                         /*
6733                          * The entry could have been clipped, look it up again.
6734                          * Worse that can happen is, it may not exist anymore.
6735                          */
6736                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
6737                                 /*
6738                                  * User: undo everything upto the previous
6739                                  * entry.  let vm_map_unwire worry about
6740                                  * checking the validity of the range.
6741                                  */
6742                                 rc = KERN_FAILURE;
6743                                 goto done;
6744                         }
6745                         entry = first_entry;
6746                         continue;
6747                 }
6748
6749                 if (entry->is_sub_map) {
6750                         vm_map_offset_t sub_start;
6751                         vm_map_offset_t sub_end;
6752                         vm_map_offset_t local_start;
6753                         vm_map_offset_t local_end;
6754                         pmap_t          pmap;
6755
6756                         if (wire_and_extract) {
6757                                 /*
6758                                  * Wiring would result in copy-on-write
6759                                  * which would not be compatible with
6760                                  * the sharing we have with the original
6761                                  * provider of this memory.
6762                                  */
6763                                 rc = KERN_INVALID_ARGUMENT;
6764                                 goto done;
6765                         }
6766
6767                         vm_map_clip_start(map, entry, s);
6768                         vm_map_clip_end(map, entry, end);
6769
6770                         sub_start = VME_OFFSET(entry);
6771                         sub_end = entry->vme_end;
6772                         sub_end += VME_OFFSET(entry) - entry->vme_start;
6773
6774                         local_end = entry->vme_end;
6775                         if (map_pmap == NULL) {
6776                                 vm_object_t             object;
6777                                 vm_object_offset_t      offset;
6778                                 vm_prot_t               prot;
6779                                 boolean_t               wired;
6780                                 vm_map_entry_t          local_entry;
6781                                 vm_map_version_t         version;
6782                                 vm_map_t                lookup_map;
6783
6784                                 if (entry->use_pmap) {
6785                                         pmap = VME_SUBMAP(entry)->pmap;
6786                                         /* ppc implementation requires that */
6787                                         /* submaps pmap address ranges line */
6788                                         /* up with parent map */
6789 #ifdef notdef
6790                                         pmap_addr = sub_start;
6791 #endif
6792                                         pmap_addr = s;
6793                                 } else {
6794                                         pmap = map->pmap;
6795                                         pmap_addr = s;
6796                                 }
6797
6798                                 if (entry->wired_count) {
6799                                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6800                                                 goto done;
6801                                         }
6802
6803                                         /*
6804                                          * The map was not unlocked:
6805                                          * no need to goto re-lookup.
6806                                          * Just go directly to next entry.
6807                                          */
6808                                         entry = entry->vme_next;
6809                                         s = entry->vme_start;
6810                                         continue;
6811                                 }
6812
6813                                 /* call vm_map_lookup_locked to */
6814                                 /* cause any needs copy to be   */
6815                                 /* evaluated */
6816                                 local_start = entry->vme_start;
6817                                 lookup_map = map;
6818                                 vm_map_lock_write_to_read(map);
6819                                 rc = vm_map_lookup_locked(
6820                                         &lookup_map, local_start,
6821                                         (access_type | extra_prots),
6822                                         OBJECT_LOCK_EXCLUSIVE,
6823                                         &version, &object,
6824                                         &offset, &prot, &wired,
6825                                         NULL,
6826                                         &real_map, NULL);
6827                                 if (rc != KERN_SUCCESS) {
6828                                         vm_map_unlock_read(lookup_map);
6829                                         assert(map_pmap == NULL);
6830                                         vm_map_unwire(map, start,
6831                                             s, user_wire);
6832                                         return rc;
6833                                 }
6834                                 vm_object_unlock(object);
6835                                 if (real_map != lookup_map) {
6836                                         vm_map_unlock(real_map);
6837                                 }
6838                                 vm_map_unlock_read(lookup_map);
6839                                 vm_map_lock(map);
6840
6841                                 /* we unlocked, so must re-lookup */
6842                                 if (!vm_map_lookup_entry(map,
6843                                     local_start,
6844                                     &local_entry)) {
6845                                         rc = KERN_FAILURE;
6846                                         goto done;
6847                                 }
6848
6849                                 /*
6850                                  * entry could have been "simplified",
6851                                  * so re-clip
6852                                  */
6853                                 entry = local_entry;
6854                                 assert(s == local_start);
6855                                 vm_map_clip_start(map, entry, s);
6856                                 vm_map_clip_end(map, entry, end);
6857                                 /* re-compute "e" */
6858                                 e = entry->vme_end;
6859                                 if (e > end) {
6860                                         e = end;
6861                                 }
6862
6863                                 /* did we have a change of type? */
6864                                 if (!entry->is_sub_map) {
6865                                         last_timestamp = map->timestamp;
6866                                         continue;
6867                                 }
6868                         } else {
6869                                 local_start = entry->vme_start;
6870                                 pmap = map_pmap;
6871                         }
6872
6873                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6874                                 goto done;
6875                         }
6876
6877                         entry->in_transition = TRUE;
6878
6879                         vm_map_unlock(map);
6880                         rc = vm_map_wire_nested(VME_SUBMAP(entry),
6881                             sub_start, sub_end,
6882                             caller_prot, tag,
6883                             user_wire, pmap, pmap_addr,
6884                             NULL);
6885                         vm_map_lock(map);
6886
6887                         /*
6888                          * Find the entry again.  It could have been clipped
6889                          * after we unlocked the map.
6890                          */
6891                         if (!vm_map_lookup_entry(map, local_start,
6892                             &first_entry)) {
6893                                 panic("vm_map_wire: re-lookup failed");
6894                         }
6895                         entry = first_entry;
6896
6897                         assert(local_start == s);
6898                         /* re-compute "e" */
6899                         e = entry->vme_end;
6900                         if (e > end) {
6901                                 e = end;
6902                         }
6903
6904                         last_timestamp = map->timestamp;
6905                         while ((entry != vm_map_to_entry(map)) &&
6906                             (entry->vme_start < e)) {
6907                                 assert(entry->in_transition);
6908                                 entry->in_transition = FALSE;
6909                                 if (entry->needs_wakeup) {
6910                                         entry->needs_wakeup = FALSE;
6911                                         need_wakeup = TRUE;
6912                                 }
6913                                 if (rc != KERN_SUCCESS) {/* from vm_*_wire */
6914                                         subtract_wire_counts(map, entry, user_wire);
6915                                 }
6916                                 entry = entry->vme_next;
6917                         }
6918                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
6919                                 goto done;
6920                         }
6921
6922                         /* no need to relookup again */
6923                         s = entry->vme_start;
6924                         continue;
6925                 }
6926
6927                 /*
6928                  * If this entry is already wired then increment
6929                  * the appropriate wire reference count.
6930                  */
6931                 if (entry->wired_count) {
6932                         if ((entry->protection & access_type) != access_type) {
6933                                 /* found a protection problem */
6934
6935                                 /*
6936                                  * XXX FBDP
6937                                  * We should always return an error
6938                                  * in this case but since we didn't
6939                                  * enforce it before, let's do
6940                                  * it only for the new "wire_and_extract"
6941                                  * code path for now...
6942                                  */
6943                                 if (wire_and_extract) {
6944                                         rc = KERN_PROTECTION_FAILURE;
6945                                         goto done;
6946                                 }
6947                         }
6948
6949                         /*
6950                          * entry is already wired down, get our reference
6951                          * after clipping to our range.
6952                          */
6953                         vm_map_clip_start(map, entry, s);
6954                         vm_map_clip_end(map, entry, end);
6955
6956                         if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
6957                                 goto done;
6958                         }
6959
6960                         if (wire_and_extract) {
6961                                 vm_object_t             object;
6962                                 vm_object_offset_t      offset;
6963                                 vm_page_t               m;
6964
6965                                 /*
6966                                  * We don't have to "wire" the page again
6967                                  * bit we still have to "extract" its
6968                                  * physical page number, after some sanity
6969                                  * checks.
6970                                  */
6971                                 assert((entry->vme_end - entry->vme_start)
6972                                     == PAGE_SIZE);
6973                                 assert(!entry->needs_copy);
6974                                 assert(!entry->is_sub_map);
6975                                 assert(VME_OBJECT(entry));
6976                                 if (((entry->vme_end - entry->vme_start)
6977                                     != PAGE_SIZE) ||
6978                                     entry->needs_copy ||
6979                                     entry->is_sub_map ||
6980                                     VME_OBJECT(entry) == VM_OBJECT_NULL) {
6981                                         rc = KERN_INVALID_ARGUMENT;
6982                                         goto done;
6983                                 }
6984
6985                                 object = VME_OBJECT(entry);
6986                                 offset = VME_OFFSET(entry);
6987                                 /* need exclusive lock to update m->dirty */
6988                                 if (entry->protection & VM_PROT_WRITE) {
6989                                         vm_object_lock(object);
6990                                 } else {
6991                                         vm_object_lock_shared(object);
6992                                 }
6993                                 m = vm_page_lookup(object, offset);
6994                                 assert(m != VM_PAGE_NULL);
6995                                 assert(VM_PAGE_WIRED(m));
6996                                 if (m != VM_PAGE_NULL && VM_PAGE_WIRED(m)) {
6997                                         *physpage_p = VM_PAGE_GET_PHYS_PAGE(m);
6998                                         if (entry->protection & VM_PROT_WRITE) {
6999                                                 vm_object_lock_assert_exclusive(
7000                                                         object);
7001                                                 m->vmp_dirty = TRUE;
7002                                         }
7003                                 } else {
7004                                         /* not already wired !? */
7005                                         *physpage_p = 0;
7006                                 }
7007                                 vm_object_unlock(object);
7008                         }
7009
7010                         /* map was not unlocked: no need to relookup */
7011                         entry = entry->vme_next;
7012                         s = entry->vme_start;
7013                         continue;
7014                 }
7015
7016                 /*
7017                  * Unwired entry or wire request transmitted via submap
7018                  */
7019
7020                 /*
7021                  * Wiring would copy the pages to the shadow object.
7022                  * The shadow object would not be code-signed so
7023                  * attempting to execute code from these copied pages
7024                  * would trigger a code-signing violation.
7025                  */
7026
7027                 if ((entry->protection & VM_PROT_EXECUTE)
7028 #if XNU_TARGET_OS_OSX
7029                     &&
7030                     map->pmap != kernel_pmap &&
7031                     (vm_map_cs_enforcement(map)
7032 #if __arm64__
7033                     || !VM_MAP_IS_EXOTIC(map)
7034 #endif /* __arm64__ */
7035                     )
7036 #endif /* XNU_TARGET_OS_OSX */
7037 #if PMAP_CS
7038                     &&
7039                     !pmap_cs_exempt(map->pmap)
7040 #endif
7041                     ) {
7042 #if MACH_ASSERT
7043                         printf("pid %d[%s] wiring executable range from "
7044                             "0x%llx to 0x%llx: rejected to preserve "
7045                             "code-signing\n",
7046                             proc_selfpid(),
7047                             (current_task()->bsd_info
7048                             ? proc_name_address(current_task()->bsd_info)
7049                             : "?"),
7050                             (uint64_t) entry->vme_start,
7051                             (uint64_t) entry->vme_end);
7052 #endif /* MACH_ASSERT */
7053                         DTRACE_VM2(cs_executable_wire,
7054                             uint64_t, (uint64_t)entry->vme_start,
7055                             uint64_t, (uint64_t)entry->vme_end);
7056                         cs_executable_wire++;
7057                         rc = KERN_PROTECTION_FAILURE;
7058                         goto done;
7059                 }
7060
7061                 /*
7062                  * Perform actions of vm_map_lookup that need the write
7063                  * lock on the map: create a shadow object for a
7064                  * copy-on-write region, or an object for a zero-fill
7065                  * region.
7066                  */
7067                 size = entry->vme_end - entry->vme_start;
7068                 /*
7069                  * If wiring a copy-on-write page, we need to copy it now
7070                  * even if we're only (currently) requesting read access.
7071                  * This is aggressive, but once it's wired we can't move it.
7072                  */
7073                 if (entry->needs_copy) {
7074                         if (wire_and_extract) {
7075                                 /*
7076                                  * We're supposed to share with the original
7077                                  * provider so should not be "needs_copy"
7078                                  */
7079                                 rc = KERN_INVALID_ARGUMENT;
7080                                 goto done;
7081                         }
7082
7083                         VME_OBJECT_SHADOW(entry, size);
7084                         entry->needs_copy = FALSE;
7085                 } else if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
7086                         if (wire_and_extract) {
7087                                 /*
7088                                  * We're supposed to share with the original
7089                                  * provider so should already have an object.
7090                                  */
7091                                 rc = KERN_INVALID_ARGUMENT;
7092                                 goto done;
7093                         }
7094                         VME_OBJECT_SET(entry, vm_object_allocate(size));
7095                         VME_OFFSET_SET(entry, (vm_object_offset_t)0);
7096                         assert(entry->use_pmap);
7097                 }
7098
7099                 vm_map_clip_start(map, entry, s);
7100                 vm_map_clip_end(map, entry, end);
7101
7102                 /* re-compute "e" */
7103                 e = entry->vme_end;
7104                 if (e > end) {
7105                         e = end;
7106                 }
7107
7108                 /*
7109                  * Check for holes and protection mismatch.
7110                  * Holes: Next entry should be contiguous unless this
7111                  *        is the end of the region.
7112                  * Protection: Access requested must be allowed, unless
7113                  *      wiring is by protection class
7114                  */
7115                 if ((entry->vme_end < end) &&
7116                     ((entry->vme_next == vm_map_to_entry(map)) ||
7117                     (entry->vme_next->vme_start > entry->vme_end))) {
7118                         /* found a hole */
7119                         rc = KERN_INVALID_ADDRESS;
7120                         goto done;
7121                 }
7122                 if ((entry->protection & access_type) != access_type) {
7123                         /* found a protection problem */
7124                         rc = KERN_PROTECTION_FAILURE;
7125                         goto done;
7126                 }
7127
7128                 assert(entry->wired_count == 0 && entry->user_wired_count == 0);
7129
7130                 if ((rc = add_wire_counts(map, entry, user_wire)) != KERN_SUCCESS) {
7131                         goto done;
7132                 }
7133
7134                 entry->in_transition = TRUE;
7135
7136                 /*
7137                  * This entry might get split once we unlock the map.
7138                  * In vm_fault_wire(), we need the current range as
7139                  * defined by this entry.  In order for this to work
7140                  * along with a simultaneous clip operation, we make a
7141                  * temporary copy of this entry and use that for the
7142                  * wiring.  Note that the underlying objects do not
7143                  * change during a clip.
7144                  */
7145                 tmp_entry = *entry;
7146
7147                 /*
7148                  * The in_transition state guarentees that the entry
7149                  * (or entries for this range, if split occured) will be
7150                  * there when the map lock is acquired for the second time.
7151                  */
7152                 vm_map_unlock(map);
7153
7154                 if (!user_wire && cur_thread != THREAD_NULL) {
7155                         interruptible_state = thread_interrupt_level(THREAD_UNINT);
7156                 } else {
7157                         interruptible_state = THREAD_UNINT;
7158                 }
7159
7160                 if (map_pmap) {
7161                         rc = vm_fault_wire(map,
7162                             &tmp_entry, caller_prot, tag, map_pmap, pmap_addr,
7163                             physpage_p);
7164                 } else {
7165                         rc = vm_fault_wire(map,
7166                             &tmp_entry, caller_prot, tag, map->pmap,
7167                             tmp_entry.vme_start,
7168                             physpage_p);
7169                 }
7170
7171                 if (!user_wire && cur_thread != THREAD_NULL) {
7172                         thread_interrupt_level(interruptible_state);
7173                 }
7174
7175                 vm_map_lock(map);
7176
7177                 if (last_timestamp + 1 != map->timestamp) {
7178                         /*
7179                          * Find the entry again.  It could have been clipped
7180                          * after we unlocked the map.
7181                          */
7182                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7183                             &first_entry)) {
7184                                 panic("vm_map_wire: re-lookup failed");
7185                         }
7186
7187                         entry = first_entry;
7188                 }
7189
7190                 last_timestamp = map->timestamp;
7191
7192                 while ((entry != vm_map_to_entry(map)) &&
7193                     (entry->vme_start < tmp_entry.vme_end)) {
7194                         assert(entry->in_transition);
7195                         entry->in_transition = FALSE;
7196                         if (entry->needs_wakeup) {
7197                                 entry->needs_wakeup = FALSE;
7198                                 need_wakeup = TRUE;
7199                         }
7200                         if (rc != KERN_SUCCESS) {       /* from vm_*_wire */
7201                                 subtract_wire_counts(map, entry, user_wire);
7202                         }
7203                         entry = entry->vme_next;
7204                 }
7205
7206                 if (rc != KERN_SUCCESS) {               /* from vm_*_wire */
7207                         goto done;
7208                 }
7209
7210                 if ((entry != vm_map_to_entry(map)) && /* we still have entries in the map */
7211                     (tmp_entry.vme_end != end) &&    /* AND, we are not at the end of the requested range */
7212                     (entry->vme_start != tmp_entry.vme_end)) { /* AND, the next entry is not contiguous. */
7213                         /* found a "new" hole */
7214                         s = tmp_entry.vme_end;
7215                         rc = KERN_INVALID_ADDRESS;
7216                         goto done;
7217                 }
7218
7219                 s = entry->vme_start;
7220         } /* end while loop through map entries */
7221
7222 done:
7223         if (rc == KERN_SUCCESS) {
7224                 /* repair any damage we may have made to the VM map */
7225                 vm_map_simplify_range(map, start, end);
7226         }
7227
7228         vm_map_unlock(map);
7229
7230         /*
7231          * wake up anybody waiting on entries we wired.
7232          */
7233         if (need_wakeup) {
7234                 vm_map_entry_wakeup(map);
7235         }
7236
7237         if (rc != KERN_SUCCESS) {
7238                 /* undo what has been wired so far */
7239                 vm_map_unwire_nested(map, start, s, user_wire,
7240                     map_pmap, pmap_addr);
7241                 if (physpage_p) {
7242                         *physpage_p = 0;
7243                 }
7244         }
7245
7246         return rc;
7247 }
7248
7249 kern_return_t
7250 vm_map_wire_external(
7251         vm_map_t                map,
7252         vm_map_offset_t         start,
7253         vm_map_offset_t         end,
7254         vm_prot_t               caller_prot,
7255         boolean_t               user_wire)
7256 {
7257         kern_return_t   kret;
7258
7259         kret = vm_map_wire_nested(map, start, end, caller_prot, vm_tag_bt(),
7260             user_wire, (pmap_t)NULL, 0, NULL);
7261         return kret;
7262 }
7263
7264 kern_return_t
7265 vm_map_wire_kernel(
7266         vm_map_t                map,
7267         vm_map_offset_t         start,
7268         vm_map_offset_t         end,
7269         vm_prot_t               caller_prot,
7270         vm_tag_t                tag,
7271         boolean_t               user_wire)
7272 {
7273         kern_return_t   kret;
7274
7275         kret = vm_map_wire_nested(map, start, end, caller_prot, tag,
7276             user_wire, (pmap_t)NULL, 0, NULL);
7277         return kret;
7278 }
7279
7280 kern_return_t
7281 vm_map_wire_and_extract_external(
7282         vm_map_t        map,
7283         vm_map_offset_t start,
7284         vm_prot_t       caller_prot,
7285         boolean_t       user_wire,
7286         ppnum_t         *physpage_p)
7287 {
7288         kern_return_t   kret;
7289
7290         kret = vm_map_wire_nested(map,
7291             start,
7292             start + VM_MAP_PAGE_SIZE(map),
7293             caller_prot,
7294             vm_tag_bt(),
7295             user_wire,
7296             (pmap_t)NULL,
7297             0,
7298             physpage_p);
7299         if (kret != KERN_SUCCESS &&
7300             physpage_p != NULL) {
7301                 *physpage_p = 0;
7302         }
7303         return kret;
7304 }
7305
7306 kern_return_t
7307 vm_map_wire_and_extract_kernel(
7308         vm_map_t        map,
7309         vm_map_offset_t start,
7310         vm_prot_t       caller_prot,
7311         vm_tag_t        tag,
7312         boolean_t       user_wire,
7313         ppnum_t         *physpage_p)
7314 {
7315         kern_return_t   kret;
7316
7317         kret = vm_map_wire_nested(map,
7318             start,
7319             start + VM_MAP_PAGE_SIZE(map),
7320             caller_prot,
7321             tag,
7322             user_wire,
7323             (pmap_t)NULL,
7324             0,
7325             physpage_p);
7326         if (kret != KERN_SUCCESS &&
7327             physpage_p != NULL) {
7328                 *physpage_p = 0;
7329         }
7330         return kret;
7331 }
7332
7333 /*
7334  *      vm_map_unwire:
7335  *
7336  *      Sets the pageability of the specified address range in the target
7337  *      as pageable.  Regions specified must have been wired previously.
7338  *
7339  *      The map must not be locked, but a reference must remain to the map
7340  *      throughout the call.
7341  *
7342  *      Kernel will panic on failures.  User unwire ignores holes and
7343  *      unwired and intransition entries to avoid losing memory by leaving
7344  *      it unwired.
7345  */
7346 static kern_return_t
7347 vm_map_unwire_nested(
7348         vm_map_t                map,
7349         vm_map_offset_t         start,
7350         vm_map_offset_t         end,
7351         boolean_t               user_wire,
7352         pmap_t                  map_pmap,
7353         vm_map_offset_t         pmap_addr)
7354 {
7355         vm_map_entry_t          entry;
7356         struct vm_map_entry     *first_entry, tmp_entry;
7357         boolean_t               need_wakeup;
7358         boolean_t               main_map = FALSE;
7359         unsigned int            last_timestamp;
7360
7361         vm_map_lock(map);
7362         if (map_pmap == NULL) {
7363                 main_map = TRUE;
7364         }
7365         last_timestamp = map->timestamp;
7366
7367         VM_MAP_RANGE_CHECK(map, start, end);
7368         assert(VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)));
7369         assert(VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)));
7370
7371         if (start == end) {
7372                 /* We unwired what the caller asked for: zero pages */
7373                 vm_map_unlock(map);
7374                 return KERN_SUCCESS;
7375         }
7376
7377         if (vm_map_lookup_entry(map, start, &first_entry)) {
7378                 entry = first_entry;
7379                 /*
7380                  * vm_map_clip_start will be done later.
7381                  * We don't want to unnest any nested sub maps here !
7382                  */
7383         } else {
7384                 if (!user_wire) {
7385                         panic("vm_map_unwire: start not found");
7386                 }
7387                 /*      Start address is not in map. */
7388                 vm_map_unlock(map);
7389                 return KERN_INVALID_ADDRESS;
7390         }
7391
7392         if (entry->superpage_size) {
7393                 /* superpages are always wired */
7394                 vm_map_unlock(map);
7395                 return KERN_INVALID_ADDRESS;
7396         }
7397
7398         need_wakeup = FALSE;
7399         while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
7400                 if (entry->in_transition) {
7401                         /*
7402                          * 1)
7403                          * Another thread is wiring down this entry. Note
7404                          * that if it is not for the other thread we would
7405                          * be unwiring an unwired entry.  This is not
7406                          * permitted.  If we wait, we will be unwiring memory
7407                          * we did not wire.
7408                          *
7409                          * 2)
7410                          * Another thread is unwiring this entry.  We did not
7411                          * have a reference to it, because if we did, this
7412                          * entry will not be getting unwired now.
7413                          */
7414                         if (!user_wire) {
7415                                 /*
7416                                  * XXX FBDP
7417                                  * This could happen:  there could be some
7418                                  * overlapping vslock/vsunlock operations
7419                                  * going on.
7420                                  * We should probably just wait and retry,
7421                                  * but then we have to be careful that this
7422                                  * entry could get "simplified" after
7423                                  * "in_transition" gets unset and before
7424                                  * we re-lookup the entry, so we would
7425                                  * have to re-clip the entry to avoid
7426                                  * re-unwiring what we have already unwired...
7427                                  * See vm_map_wire_nested().
7428                                  *
7429                                  * Or we could just ignore "in_transition"
7430                                  * here and proceed to decement the wired
7431                                  * count(s) on this entry.  That should be fine
7432                                  * as long as "wired_count" doesn't drop all
7433                                  * the way to 0 (and we should panic if THAT
7434                                  * happens).
7435                                  */
7436                                 panic("vm_map_unwire: in_transition entry");
7437                         }
7438
7439                         entry = entry->vme_next;
7440                         continue;
7441                 }
7442
7443                 if (entry->is_sub_map) {
7444                         vm_map_offset_t sub_start;
7445                         vm_map_offset_t sub_end;
7446                         vm_map_offset_t local_end;
7447                         pmap_t          pmap;
7448
7449                         vm_map_clip_start(map, entry, start);
7450                         vm_map_clip_end(map, entry, end);
7451
7452                         sub_start = VME_OFFSET(entry);
7453                         sub_end = entry->vme_end - entry->vme_start;
7454                         sub_end += VME_OFFSET(entry);
7455                         local_end = entry->vme_end;
7456                         if (map_pmap == NULL) {
7457                                 if (entry->use_pmap) {
7458                                         pmap = VME_SUBMAP(entry)->pmap;
7459                                         pmap_addr = sub_start;
7460                                 } else {
7461                                         pmap = map->pmap;
7462                                         pmap_addr = start;
7463                                 }
7464                                 if (entry->wired_count == 0 ||
7465                                     (user_wire && entry->user_wired_count == 0)) {
7466                                         if (!user_wire) {
7467                                                 panic("vm_map_unwire: entry is unwired");
7468                                         }
7469                                         entry = entry->vme_next;
7470                                         continue;
7471                                 }
7472
7473                                 /*
7474                                  * Check for holes
7475                                  * Holes: Next entry should be contiguous unless
7476                                  * this is the end of the region.
7477                                  */
7478                                 if (((entry->vme_end < end) &&
7479                                     ((entry->vme_next == vm_map_to_entry(map)) ||
7480                                     (entry->vme_next->vme_start
7481                                     > entry->vme_end)))) {
7482                                         if (!user_wire) {
7483                                                 panic("vm_map_unwire: non-contiguous region");
7484                                         }
7485 /*
7486  *                                       entry = entry->vme_next;
7487  *                                       continue;
7488  */
7489                                 }
7490
7491                                 subtract_wire_counts(map, entry, user_wire);
7492
7493                                 if (entry->wired_count != 0) {
7494                                         entry = entry->vme_next;
7495                                         continue;
7496                                 }
7497
7498                                 entry->in_transition = TRUE;
7499                                 tmp_entry = *entry;/* see comment in vm_map_wire() */
7500
7501                                 /*
7502                                  * We can unlock the map now. The in_transition state
7503                                  * guarantees existance of the entry.
7504                                  */
7505                                 vm_map_unlock(map);
7506                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7507                                     sub_start, sub_end, user_wire, pmap, pmap_addr);
7508                                 vm_map_lock(map);
7509
7510                                 if (last_timestamp + 1 != map->timestamp) {
7511                                         /*
7512                                          * Find the entry again.  It could have been
7513                                          * clipped or deleted after we unlocked the map.
7514                                          */
7515                                         if (!vm_map_lookup_entry(map,
7516                                             tmp_entry.vme_start,
7517                                             &first_entry)) {
7518                                                 if (!user_wire) {
7519                                                         panic("vm_map_unwire: re-lookup failed");
7520                                                 }
7521                                                 entry = first_entry->vme_next;
7522                                         } else {
7523                                                 entry = first_entry;
7524                                         }
7525                                 }
7526                                 last_timestamp = map->timestamp;
7527
7528                                 /*
7529                                  * clear transition bit for all constituent entries
7530                                  * that were in the original entry (saved in
7531                                  * tmp_entry).  Also check for waiters.
7532                                  */
7533                                 while ((entry != vm_map_to_entry(map)) &&
7534                                     (entry->vme_start < tmp_entry.vme_end)) {
7535                                         assert(entry->in_transition);
7536                                         entry->in_transition = FALSE;
7537                                         if (entry->needs_wakeup) {
7538                                                 entry->needs_wakeup = FALSE;
7539                                                 need_wakeup = TRUE;
7540                                         }
7541                                         entry = entry->vme_next;
7542                                 }
7543                                 continue;
7544                         } else {
7545                                 vm_map_unlock(map);
7546                                 vm_map_unwire_nested(VME_SUBMAP(entry),
7547                                     sub_start, sub_end, user_wire, map_pmap,
7548                                     pmap_addr);
7549                                 vm_map_lock(map);
7550
7551                                 if (last_timestamp + 1 != map->timestamp) {
7552                                         /*
7553                                          * Find the entry again.  It could have been
7554                                          * clipped or deleted after we unlocked the map.
7555                                          */
7556                                         if (!vm_map_lookup_entry(map,
7557                                             tmp_entry.vme_start,
7558                                             &first_entry)) {
7559                                                 if (!user_wire) {
7560                                                         panic("vm_map_unwire: re-lookup failed");
7561                                                 }
7562                                                 entry = first_entry->vme_next;
7563                                         } else {
7564                                                 entry = first_entry;
7565                                         }
7566                                 }
7567                                 last_timestamp = map->timestamp;
7568                         }
7569                 }
7570
7571
7572                 if ((entry->wired_count == 0) ||
7573                     (user_wire && entry->user_wired_count == 0)) {
7574                         if (!user_wire) {
7575                                 panic("vm_map_unwire: entry is unwired");
7576                         }
7577
7578                         entry = entry->vme_next;
7579                         continue;
7580                 }
7581
7582                 assert(entry->wired_count > 0 &&
7583                     (!user_wire || entry->user_wired_count > 0));
7584
7585                 vm_map_clip_start(map, entry, start);
7586                 vm_map_clip_end(map, entry, end);
7587
7588                 /*
7589                  * Check for holes
7590                  * Holes: Next entry should be contiguous unless
7591                  *        this is the end of the region.
7592                  */
7593                 if (((entry->vme_end < end) &&
7594                     ((entry->vme_next == vm_map_to_entry(map)) ||
7595                     (entry->vme_next->vme_start > entry->vme_end)))) {
7596                         if (!user_wire) {
7597                                 panic("vm_map_unwire: non-contiguous region");
7598                         }
7599                         entry = entry->vme_next;
7600                         continue;
7601                 }
7602
7603                 subtract_wire_counts(map, entry, user_wire);
7604
7605                 if (entry->wired_count != 0) {
7606                         entry = entry->vme_next;
7607                         continue;
7608                 }
7609
7610                 if (entry->zero_wired_pages) {
7611                         entry->zero_wired_pages = FALSE;
7612                 }
7613
7614                 entry->in_transition = TRUE;
7615                 tmp_entry = *entry;     /* see comment in vm_map_wire() */
7616
7617                 /*
7618                  * We can unlock the map now. The in_transition state
7619                  * guarantees existance of the entry.
7620                  */
7621                 vm_map_unlock(map);
7622                 if (map_pmap) {
7623                         vm_fault_unwire(map,
7624                             &tmp_entry, FALSE, map_pmap, pmap_addr);
7625                 } else {
7626                         vm_fault_unwire(map,
7627                             &tmp_entry, FALSE, map->pmap,
7628                             tmp_entry.vme_start);
7629                 }
7630                 vm_map_lock(map);
7631
7632                 if (last_timestamp + 1 != map->timestamp) {
7633                         /*
7634                          * Find the entry again.  It could have been clipped
7635                          * or deleted after we unlocked the map.
7636                          */
7637                         if (!vm_map_lookup_entry(map, tmp_entry.vme_start,
7638                             &first_entry)) {
7639                                 if (!user_wire) {
7640                                         panic("vm_map_unwire: re-lookup failed");
7641                                 }
7642                                 entry = first_entry->vme_next;
7643                         } else {
7644                                 entry = first_entry;
7645                         }
7646                 }
7647                 last_timestamp = map->timestamp;
7648
7649                 /*
7650                  * clear transition bit for all constituent entries that
7651                  * were in the original entry (saved in tmp_entry).  Also
7652                  * check for waiters.
7653                  */
7654                 while ((entry != vm_map_to_entry(map)) &&
7655                     (entry->vme_start < tmp_entry.vme_end)) {
7656                         assert(entry->in_transition);
7657                         entry->in_transition = FALSE;
7658                         if (entry->needs_wakeup) {
7659                                 entry->needs_wakeup = FALSE;
7660                                 need_wakeup = TRUE;
7661                         }
7662                         entry = entry->vme_next;
7663                 }
7664         }
7665
7666         /*
7667          * We might have fragmented the address space when we wired this
7668          * range of addresses.  Attempt to re-coalesce these VM map entries
7669          * with their neighbors now that they're no longer wired.
7670          * Under some circumstances, address space fragmentation can
7671          * prevent VM object shadow chain collapsing, which can cause
7672          * swap space leaks.
7673          */
7674         vm_map_simplify_range(map, start, end);
7675
7676         vm_map_unlock(map);
7677         /*
7678          * wake up anybody waiting on entries that we have unwired.
7679          */
7680         if (need_wakeup) {
7681                 vm_map_entry_wakeup(map);
7682         }
7683         return KERN_SUCCESS;
7684 }
7685
7686 kern_return_t
7687 vm_map_unwire(
7688         vm_map_t                map,
7689         vm_map_offset_t         start,
7690         vm_map_offset_t         end,
7691         boolean_t               user_wire)
7692 {
7693         return vm_map_unwire_nested(map, start, end,
7694                    user_wire, (pmap_t)NULL, 0);
7695 }
7696
7697
7698 /*
7699  *      vm_map_entry_delete:    [ internal use only ]
7700  *
7701  *      Deallocate the given entry from the target map.
7702  */
7703 static void
7704 vm_map_entry_delete(
7705         vm_map_t        map,
7706         vm_map_entry_t  entry)
7707 {
7708         vm_map_offset_t s, e;
7709         vm_object_t     object;
7710         vm_map_t        submap;
7711
7712         s = entry->vme_start;
7713         e = entry->vme_end;
7714         assert(VM_MAP_PAGE_ALIGNED(s, FOURK_PAGE_MASK));
7715         assert(VM_MAP_PAGE_ALIGNED(e, FOURK_PAGE_MASK));
7716         if (VM_MAP_PAGE_MASK(map) >= PAGE_MASK) {
7717                 assert(page_aligned(s));
7718                 assert(page_aligned(e));
7719         }
7720         if (entry->map_aligned == TRUE) {
7721                 assert(VM_MAP_PAGE_ALIGNED(s, VM_MAP_PAGE_MASK(map)));
7722                 assert(VM_MAP_PAGE_ALIGNED(e, VM_MAP_PAGE_MASK(map)));
7723         }
7724         assert(entry->wired_count == 0);
7725         assert(entry->user_wired_count == 0);
7726         assert(!entry->permanent);
7727
7728         if (entry->is_sub_map) {
7729                 object = NULL;
7730                 submap = VME_SUBMAP(entry);
7731         } else {
7732                 submap = NULL;
7733                 object = VME_OBJECT(entry);
7734         }
7735
7736         vm_map_store_entry_unlink(map, entry);
7737         map->size -= e - s;
7738
7739         vm_map_entry_dispose(map, entry);
7740
7741         vm_map_unlock(map);
7742         /*
7743          *      Deallocate the object only after removing all
7744          *      pmap entries pointing to its pages.
7745          */
7746         if (submap) {
7747                 vm_map_deallocate(submap);
7748         } else {
7749                 vm_object_deallocate(object);
7750         }
7751 }
7752
7753 void
7754 vm_map_submap_pmap_clean(
7755         vm_map_t        map,
7756         vm_map_offset_t start,
7757         vm_map_offset_t end,
7758         vm_map_t        sub_map,
7759         vm_map_offset_t offset)
7760 {
7761         vm_map_offset_t submap_start;
7762         vm_map_offset_t submap_end;
7763         vm_map_size_t   remove_size;
7764         vm_map_entry_t  entry;
7765
7766         submap_end = offset + (end - start);
7767         submap_start = offset;
7768
7769         vm_map_lock_read(sub_map);
7770         if (vm_map_lookup_entry(sub_map, offset, &entry)) {
7771                 remove_size = (entry->vme_end - entry->vme_start);
7772                 if (offset > entry->vme_start) {
7773                         remove_size -= offset - entry->vme_start;
7774                 }
7775
7776
7777                 if (submap_end < entry->vme_end) {
7778                         remove_size -=
7779                             entry->vme_end - submap_end;
7780                 }
7781                 if (entry->is_sub_map) {
7782                         vm_map_submap_pmap_clean(
7783                                 sub_map,
7784                                 start,
7785                                 start + remove_size,
7786                                 VME_SUBMAP(entry),
7787                                 VME_OFFSET(entry));
7788                 } else {
7789                         if (map->mapped_in_other_pmaps &&
7790                             os_ref_get_count(&map->map_refcnt) != 0 &&
7791                             VME_OBJECT(entry) != NULL) {
7792                                 vm_object_pmap_protect_options(
7793                                         VME_OBJECT(entry),
7794                                         (VME_OFFSET(entry) +
7795                                         offset -
7796                                         entry->vme_start),
7797                                         remove_size,
7798                                         PMAP_NULL,
7799                                         PAGE_SIZE,
7800                                         entry->vme_start,
7801                                         VM_PROT_NONE,
7802                                         PMAP_OPTIONS_REMOVE);
7803                         } else {
7804                                 pmap_remove(map->pmap,
7805                                     (addr64_t)start,
7806                                     (addr64_t)(start + remove_size));
7807                         }
7808                 }
7809         }
7810
7811         entry = entry->vme_next;
7812
7813         while ((entry != vm_map_to_entry(sub_map))
7814             && (entry->vme_start < submap_end)) {
7815                 remove_size = (entry->vme_end - entry->vme_start);
7816                 if (submap_end < entry->vme_end) {
7817                         remove_size -= entry->vme_end - submap_end;
7818                 }
7819                 if (entry->is_sub_map) {
7820                         vm_map_submap_pmap_clean(
7821                                 sub_map,
7822                                 (start + entry->vme_start) - offset,
7823                                 ((start + entry->vme_start) - offset) + remove_size,
7824                                 VME_SUBMAP(entry),
7825                                 VME_OFFSET(entry));
7826                 } else {
7827                         if (map->mapped_in_other_pmaps &&
7828                             os_ref_get_count(&map->map_refcnt) != 0 &&
7829                             VME_OBJECT(entry) != NULL) {
7830                                 vm_object_pmap_protect_options(
7831                                         VME_OBJECT(entry),
7832                                         VME_OFFSET(entry),
7833                                         remove_size,
7834                                         PMAP_NULL,
7835                                         PAGE_SIZE,
7836                                         entry->vme_start,
7837                                         VM_PROT_NONE,
7838                                         PMAP_OPTIONS_REMOVE);
7839                         } else {
7840                                 pmap_remove(map->pmap,
7841                                     (addr64_t)((start + entry->vme_start)
7842                                     - offset),
7843                                     (addr64_t)(((start + entry->vme_start)
7844                                     - offset) + remove_size));
7845                         }
7846                 }
7847                 entry = entry->vme_next;
7848         }
7849         vm_map_unlock_read(sub_map);
7850         return;
7851 }
7852
7853 /*
7854  *     virt_memory_guard_ast:
7855  *
7856  *     Handle the AST callout for a virtual memory guard.
7857  *         raise an EXC_GUARD exception and terminate the task
7858  *     if configured to do so.
7859  */
7860 void
7861 virt_memory_guard_ast(
7862         thread_t thread,
7863         mach_exception_data_type_t code,
7864         mach_exception_data_type_t subcode)
7865 {
7866         task_t task = thread->task;
7867         assert(task != kernel_task);
7868         assert(task == current_task());
7869         uint32_t behavior;
7870
7871         behavior = task->task_exc_guard;
7872
7873         /* Is delivery enabled */
7874         if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7875                 return;
7876         }
7877
7878         /* If only once, make sure we're that once */
7879         while (behavior & TASK_EXC_GUARD_VM_ONCE) {
7880                 uint32_t new_behavior = behavior & ~TASK_EXC_GUARD_VM_DELIVER;
7881
7882                 if (OSCompareAndSwap(behavior, new_behavior, &task->task_exc_guard)) {
7883                         break;
7884                 }
7885                 behavior = task->task_exc_guard;
7886                 if ((behavior & TASK_EXC_GUARD_VM_DELIVER) == 0) {
7887                         return;
7888                 }
7889         }
7890
7891         /* Raise exception via corpse fork or synchronously */
7892         if ((task->task_exc_guard & TASK_EXC_GUARD_VM_CORPSE) &&
7893             (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) == 0) {
7894                 task_violated_guard(code, subcode, NULL);
7895         } else {
7896                 task_exception_notify(EXC_GUARD, code, subcode);
7897         }
7898
7899         /* Terminate the task if desired */
7900         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7901                 task_bsdtask_kill(current_task());
7902         }
7903 }
7904
7905 /*
7906  *     vm_map_guard_exception:
7907  *
7908  *     Generate a GUARD_TYPE_VIRTUAL_MEMORY EXC_GUARD exception.
7909  *
7910  *     Right now, we do this when we find nothing mapped, or a
7911  *     gap in the mapping when a user address space deallocate
7912  *     was requested. We report the address of the first gap found.
7913  */
7914 static void
7915 vm_map_guard_exception(
7916         vm_map_offset_t gap_start,
7917         unsigned reason)
7918 {
7919         mach_exception_code_t code = 0;
7920         unsigned int guard_type = GUARD_TYPE_VIRT_MEMORY;
7921         unsigned int target = 0; /* should we pass in pid associated with map? */
7922         mach_exception_data_type_t subcode = (uint64_t)gap_start;
7923         boolean_t fatal = FALSE;
7924
7925         task_t task = current_task();
7926
7927         /* Can't deliver exceptions to kernel task */
7928         if (task == kernel_task) {
7929                 return;
7930         }
7931
7932         EXC_GUARD_ENCODE_TYPE(code, guard_type);
7933         EXC_GUARD_ENCODE_FLAVOR(code, reason);
7934         EXC_GUARD_ENCODE_TARGET(code, target);
7935
7936         if (task->task_exc_guard & TASK_EXC_GUARD_VM_FATAL) {
7937                 fatal = TRUE;
7938         }
7939         thread_guard_violation(current_thread(), code, subcode, fatal);
7940 }
7941
7942 /*
7943  *      vm_map_delete:  [ internal use only ]
7944  *
7945  *      Deallocates the given address range from the target map.
7946  *      Removes all user wirings. Unwires one kernel wiring if
7947  *      VM_MAP_REMOVE_KUNWIRE is set.  Waits for kernel wirings to go
7948  *      away if VM_MAP_REMOVE_WAIT_FOR_KWIRE is set.  Sleeps
7949  *      interruptibly if VM_MAP_REMOVE_INTERRUPTIBLE is set.
7950  *
7951  *      This routine is called with map locked and leaves map locked.
7952  */
7953 static kern_return_t
7954 vm_map_delete(
7955         vm_map_t                map,
7956         vm_map_offset_t         start,
7957         vm_map_offset_t         end,
7958         int                     flags,
7959         vm_map_t                zap_map)
7960 {
7961         vm_map_entry_t          entry, next;
7962         struct   vm_map_entry   *first_entry, tmp_entry;
7963         vm_map_offset_t         s;
7964         vm_object_t             object;
7965         boolean_t               need_wakeup;
7966         unsigned int            last_timestamp = ~0; /* unlikely value */
7967         int                     interruptible;
7968         vm_map_offset_t         gap_start;
7969         __unused vm_map_offset_t save_start = start;
7970         __unused vm_map_offset_t save_end = end;
7971         const vm_map_offset_t   FIND_GAP = 1;   /* a not page aligned value */
7972         const vm_map_offset_t   GAPS_OK = 2;    /* a different not page aligned value */
7973
7974         if (map != kernel_map && !(flags & VM_MAP_REMOVE_GAPS_OK) && !map->terminated) {
7975                 gap_start = FIND_GAP;
7976         } else {
7977                 gap_start = GAPS_OK;
7978         }
7979
7980         interruptible = (flags & VM_MAP_REMOVE_INTERRUPTIBLE) ?
7981             THREAD_ABORTSAFE : THREAD_UNINT;
7982
7983         /*
7984          * All our DMA I/O operations in IOKit are currently done by
7985          * wiring through the map entries of the task requesting the I/O.
7986          * Because of this, we must always wait for kernel wirings
7987          * to go away on the entries before deleting them.
7988          *
7989          * Any caller who wants to actually remove a kernel wiring
7990          * should explicitly set the VM_MAP_REMOVE_KUNWIRE flag to
7991          * properly remove one wiring instead of blasting through
7992          * them all.
7993          */
7994         flags |= VM_MAP_REMOVE_WAIT_FOR_KWIRE;
7995
7996         while (1) {
7997                 /*
7998                  *      Find the start of the region, and clip it
7999                  */
8000                 if (vm_map_lookup_entry(map, start, &first_entry)) {
8001                         entry = first_entry;
8002                         if (map == kalloc_map &&
8003                             (entry->vme_start != start ||
8004                             entry->vme_end != end)) {
8005                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8006                                     "mismatched entry %p [0x%llx:0x%llx]\n",
8007                                     map,
8008                                     (uint64_t)start,
8009                                     (uint64_t)end,
8010                                     entry,
8011                                     (uint64_t)entry->vme_start,
8012                                     (uint64_t)entry->vme_end);
8013                         }
8014
8015                         /*
8016                          * If in a superpage, extend the range to include the start of the mapping.
8017                          */
8018                         if (entry->superpage_size && (start & ~SUPERPAGE_MASK)) {
8019                                 start = SUPERPAGE_ROUND_DOWN(start);
8020                                 continue;
8021                         }
8022
8023                         if (start == entry->vme_start) {
8024                                 /*
8025                                  * No need to clip.  We don't want to cause
8026                                  * any unnecessary unnesting in this case...
8027                                  */
8028                         } else {
8029                                 if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8030                                     entry->map_aligned &&
8031                                     !VM_MAP_PAGE_ALIGNED(
8032                                             start,
8033                                             VM_MAP_PAGE_MASK(map))) {
8034                                         /*
8035                                          * The entry will no longer be
8036                                          * map-aligned after clipping
8037                                          * and the caller said it's OK.
8038                                          */
8039                                         entry->map_aligned = FALSE;
8040                                 }
8041                                 if (map == kalloc_map) {
8042                                         panic("vm_map_delete(%p,0x%llx,0x%llx):"
8043                                             " clipping %p at 0x%llx\n",
8044                                             map,
8045                                             (uint64_t)start,
8046                                             (uint64_t)end,
8047                                             entry,
8048                                             (uint64_t)start);
8049                                 }
8050                                 vm_map_clip_start(map, entry, start);
8051                         }
8052
8053                         /*
8054                          *      Fix the lookup hint now, rather than each
8055                          *      time through the loop.
8056                          */
8057                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8058                 } else {
8059                         if (map->pmap == kernel_pmap &&
8060                             os_ref_get_count(&map->map_refcnt) != 0) {
8061                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8062                                     "no map entry at 0x%llx\n",
8063                                     map,
8064                                     (uint64_t)start,
8065                                     (uint64_t)end,
8066                                     (uint64_t)start);
8067                         }
8068                         entry = first_entry->vme_next;
8069                         if (gap_start == FIND_GAP) {
8070                                 gap_start = start;
8071                         }
8072                 }
8073                 break;
8074         }
8075         if (entry->superpage_size) {
8076                 end = SUPERPAGE_ROUND_UP(end);
8077         }
8078
8079         need_wakeup = FALSE;
8080         /*
8081          *      Step through all entries in this region
8082          */
8083         s = entry->vme_start;
8084         while ((entry != vm_map_to_entry(map)) && (s < end)) {
8085                 /*
8086                  * At this point, we have deleted all the memory entries
8087                  * between "start" and "s".  We still need to delete
8088                  * all memory entries between "s" and "end".
8089                  * While we were blocked and the map was unlocked, some
8090                  * new memory entries could have been re-allocated between
8091                  * "start" and "s" and we don't want to mess with those.
8092                  * Some of those entries could even have been re-assembled
8093                  * with an entry after "s" (in vm_map_simplify_entry()), so
8094                  * we may have to vm_map_clip_start() again.
8095                  */
8096
8097                 if (entry->vme_start >= s) {
8098                         /*
8099                          * This entry starts on or after "s"
8100                          * so no need to clip its start.
8101                          */
8102                 } else {
8103                         /*
8104                          * This entry has been re-assembled by a
8105                          * vm_map_simplify_entry().  We need to
8106                          * re-clip its start.
8107                          */
8108                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8109                             entry->map_aligned &&
8110                             !VM_MAP_PAGE_ALIGNED(s,
8111                             VM_MAP_PAGE_MASK(map))) {
8112                                 /*
8113                                  * The entry will no longer be map-aligned
8114                                  * after clipping and the caller said it's OK.
8115                                  */
8116                                 entry->map_aligned = FALSE;
8117                         }
8118                         if (map == kalloc_map) {
8119                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8120                                     "clipping %p at 0x%llx\n",
8121                                     map,
8122                                     (uint64_t)start,
8123                                     (uint64_t)end,
8124                                     entry,
8125                                     (uint64_t)s);
8126                         }
8127                         vm_map_clip_start(map, entry, s);
8128                 }
8129                 if (entry->vme_end <= end) {
8130                         /*
8131                          * This entry is going away completely, so no need
8132                          * to clip and possibly cause an unnecessary unnesting.
8133                          */
8134                 } else {
8135                         if ((flags & VM_MAP_REMOVE_NO_MAP_ALIGN) &&
8136                             entry->map_aligned &&
8137                             !VM_MAP_PAGE_ALIGNED(end,
8138                             VM_MAP_PAGE_MASK(map))) {
8139                                 /*
8140                                  * The entry will no longer be map-aligned
8141                                  * after clipping and the caller said it's OK.
8142                                  */
8143                                 entry->map_aligned = FALSE;
8144                         }
8145                         if (map == kalloc_map) {
8146                                 panic("vm_map_delete(%p,0x%llx,0x%llx): "
8147                                     "clipping %p at 0x%llx\n",
8148                                     map,
8149                                     (uint64_t)start,
8150                                     (uint64_t)end,
8151                                     entry,
8152                                     (uint64_t)end);
8153                         }
8154                         vm_map_clip_end(map, entry, end);
8155                 }
8156
8157                 if (entry->permanent) {
8158                         if (map->pmap == kernel_pmap) {
8159                                 panic("%s(%p,0x%llx,0x%llx): "
8160                                     "attempt to remove permanent "
8161                                     "VM map entry "
8162                                     "%p [0x%llx:0x%llx]\n",
8163                                     __FUNCTION__,
8164                                     map,
8165                                     (uint64_t) start,
8166                                     (uint64_t) end,
8167                                     entry,
8168                                     (uint64_t) entry->vme_start,
8169                                     (uint64_t) entry->vme_end);
8170                         } else if (flags & VM_MAP_REMOVE_IMMUTABLE) {
8171 //                              printf("FBDP %d[%s] removing permanent entry %p [0x%llx:0x%llx] prot 0x%x/0x%x\n", proc_selfpid(), (current_task()->bsd_info ? proc_name_address(current_task()->bsd_info) : "?"), entry, (uint64_t)entry->vme_start, (uint64_t)entry->vme_end, entry->protection, entry->max_protection);
8172                                 entry->permanent = FALSE;
8173 #if PMAP_CS
8174                         } else if ((entry->protection & VM_PROT_EXECUTE) && !pmap_cs_enforced(map->pmap)) {
8175                                 entry->permanent = FALSE;
8176
8177                                 printf("%d[%s] %s(0x%llx,0x%llx): "
8178                                     "pmap_cs disabled, allowing for permanent executable entry [0x%llx:0x%llx] "
8179                                     "prot 0x%x/0x%x\n",
8180                                     proc_selfpid(),
8181                                     (current_task()->bsd_info
8182                                     ? proc_name_address(current_task()->bsd_info)
8183                                     : "?"),
8184                                     __FUNCTION__,
8185                                     (uint64_t) start,
8186                                     (uint64_t) end,
8187                                     (uint64_t)entry->vme_start,
8188                                     (uint64_t)entry->vme_end,
8189                                     entry->protection,
8190                                     entry->max_protection);
8191 #endif
8192                         } else {
8193                                 if (vm_map_executable_immutable_verbose) {
8194                                         printf("%d[%s] %s(0x%llx,0x%llx): "
8195                                             "permanent entry [0x%llx:0x%llx] "
8196                                             "prot 0x%x/0x%x\n",
8197                                             proc_selfpid(),
8198                                             (current_task()->bsd_info
8199                                             ? proc_name_address(current_task()->bsd_info)
8200                                             : "?"),
8201                                             __FUNCTION__,
8202                                             (uint64_t) start,
8203                                             (uint64_t) end,
8204                                             (uint64_t)entry->vme_start,
8205                                             (uint64_t)entry->vme_end,
8206                                             entry->protection,
8207                                             entry->max_protection);
8208                                 }
8209                                 /*
8210                                  * dtrace -n 'vm_map_delete_permanent { print("start=0x%llx end=0x%llx prot=0x%x/0x%x\n", arg0, arg1, arg2, arg3); stack(); ustack(); }'
8211                                  */
8212                                 DTRACE_VM5(vm_map_delete_permanent,
8213                                     vm_map_offset_t, entry->vme_start,
8214                                     vm_map_offset_t, entry->vme_end,
8215                                     vm_prot_t, entry->protection,
8216                                     vm_prot_t, entry->max_protection,
8217                                     int, VME_ALIAS(entry));
8218                         }
8219                 }
8220
8221
8222                 if (entry->in_transition) {
8223                         wait_result_t wait_result;
8224
8225                         /*
8226                          * Another thread is wiring/unwiring this entry.
8227                          * Let the other thread know we are waiting.
8228                          */
8229                         assert(s == entry->vme_start);
8230                         entry->needs_wakeup = TRUE;
8231
8232                         /*
8233                          * wake up anybody waiting on entries that we have
8234                          * already unwired/deleted.
8235                          */
8236                         if (need_wakeup) {
8237                                 vm_map_entry_wakeup(map);
8238                                 need_wakeup = FALSE;
8239                         }
8240
8241                         wait_result = vm_map_entry_wait(map, interruptible);
8242
8243                         if (interruptible &&
8244                             wait_result == THREAD_INTERRUPTED) {
8245                                 /*
8246                                  * We do not clear the needs_wakeup flag,
8247                                  * since we cannot tell if we were the only one.
8248                                  */
8249                                 return KERN_ABORTED;
8250                         }
8251
8252                         /*
8253                          * The entry could have been clipped or it
8254                          * may not exist anymore.  Look it up again.
8255                          */
8256                         if (!vm_map_lookup_entry(map, s, &first_entry)) {
8257                                 /*
8258                                  * User: use the next entry
8259                                  */
8260                                 if (gap_start == FIND_GAP) {
8261                                         gap_start = s;
8262                                 }
8263                                 entry = first_entry->vme_next;
8264                                 s = entry->vme_start;
8265                         } else {
8266                                 entry = first_entry;
8267                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8268                         }
8269                         last_timestamp = map->timestamp;
8270                         continue;
8271                 } /* end in_transition */
8272
8273                 if (entry->wired_count) {
8274                         boolean_t       user_wire;
8275
8276                         user_wire = entry->user_wired_count > 0;
8277
8278                         /*
8279                          *      Remove a kernel wiring if requested
8280                          */
8281                         if (flags & VM_MAP_REMOVE_KUNWIRE) {
8282                                 entry->wired_count--;
8283                         }
8284
8285                         /*
8286                          *      Remove all user wirings for proper accounting
8287                          */
8288                         if (entry->user_wired_count > 0) {
8289                                 while (entry->user_wired_count) {
8290                                         subtract_wire_counts(map, entry, user_wire);
8291                                 }
8292                         }
8293
8294                         if (entry->wired_count != 0) {
8295                                 assert(map != kernel_map);
8296                                 /*
8297                                  * Cannot continue.  Typical case is when
8298                                  * a user thread has physical io pending on
8299                                  * on this page.  Either wait for the
8300                                  * kernel wiring to go away or return an
8301                                  * error.
8302                                  */
8303                                 if (flags & VM_MAP_REMOVE_WAIT_FOR_KWIRE) {
8304                                         wait_result_t wait_result;
8305
8306                                         assert(s == entry->vme_start);
8307                                         entry->needs_wakeup = TRUE;
8308                                         wait_result = vm_map_entry_wait(map,
8309                                             interruptible);
8310
8311                                         if (interruptible &&
8312                                             wait_result == THREAD_INTERRUPTED) {
8313                                                 /*
8314                                                  * We do not clear the
8315                                                  * needs_wakeup flag, since we
8316                                                  * cannot tell if we were the
8317                                                  * only one.
8318                                                  */
8319                                                 return KERN_ABORTED;
8320                                         }
8321
8322                                         /*
8323                                          * The entry could have been clipped or
8324                                          * it may not exist anymore.  Look it
8325                                          * up again.
8326                                          */
8327                                         if (!vm_map_lookup_entry(map, s,
8328                                             &first_entry)) {
8329                                                 assert(map != kernel_map);
8330                                                 /*
8331                                                  * User: use the next entry
8332                                                  */
8333                                                 if (gap_start == FIND_GAP) {
8334                                                         gap_start = s;
8335                                                 }
8336                                                 entry = first_entry->vme_next;
8337                                                 s = entry->vme_start;
8338                                         } else {
8339                                                 entry = first_entry;
8340                                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8341                                         }
8342                                         last_timestamp = map->timestamp;
8343                                         continue;
8344                                 } else {
8345                                         return KERN_FAILURE;
8346                                 }
8347                         }
8348
8349                         entry->in_transition = TRUE;
8350                         /*
8351                          * copy current entry.  see comment in vm_map_wire()
8352                          */
8353                         tmp_entry = *entry;
8354                         assert(s == entry->vme_start);
8355
8356                         /*
8357                          * We can unlock the map now. The in_transition
8358                          * state guarentees existance of the entry.
8359                          */
8360                         vm_map_unlock(map);
8361
8362                         if (tmp_entry.is_sub_map) {
8363                                 vm_map_t sub_map;
8364                                 vm_map_offset_t sub_start, sub_end;
8365                                 pmap_t pmap;
8366                                 vm_map_offset_t pmap_addr;
8367
8368
8369                                 sub_map = VME_SUBMAP(&tmp_entry);
8370                                 sub_start = VME_OFFSET(&tmp_entry);
8371                                 sub_end = sub_start + (tmp_entry.vme_end -
8372                                     tmp_entry.vme_start);
8373                                 if (tmp_entry.use_pmap) {
8374                                         pmap = sub_map->pmap;
8375                                         pmap_addr = tmp_entry.vme_start;
8376                                 } else {
8377                                         pmap = map->pmap;
8378                                         pmap_addr = tmp_entry.vme_start;
8379                                 }
8380                                 (void) vm_map_unwire_nested(sub_map,
8381                                     sub_start, sub_end,
8382                                     user_wire,
8383                                     pmap, pmap_addr);
8384                         } else {
8385                                 if (VME_OBJECT(&tmp_entry) == kernel_object) {
8386                                         pmap_protect_options(
8387                                                 map->pmap,
8388                                                 tmp_entry.vme_start,
8389                                                 tmp_entry.vme_end,
8390                                                 VM_PROT_NONE,
8391                                                 PMAP_OPTIONS_REMOVE,
8392                                                 NULL);
8393                                 }
8394                                 vm_fault_unwire(map, &tmp_entry,
8395                                     VME_OBJECT(&tmp_entry) == kernel_object,
8396                                     map->pmap, tmp_entry.vme_start);
8397                         }
8398
8399                         vm_map_lock(map);
8400
8401                         if (last_timestamp + 1 != map->timestamp) {
8402                                 /*
8403                                  * Find the entry again.  It could have
8404                                  * been clipped after we unlocked the map.
8405                                  */
8406                                 if (!vm_map_lookup_entry(map, s, &first_entry)) {
8407                                         assert((map != kernel_map) &&
8408                                             (!entry->is_sub_map));
8409                                         if (gap_start == FIND_GAP) {
8410                                                 gap_start = s;
8411                                         }
8412                                         first_entry = first_entry->vme_next;
8413                                         s = first_entry->vme_start;
8414                                 } else {
8415                                         SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8416                                 }
8417                         } else {
8418                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8419                                 first_entry = entry;
8420                         }
8421
8422                         last_timestamp = map->timestamp;
8423
8424                         entry = first_entry;
8425                         while ((entry != vm_map_to_entry(map)) &&
8426                             (entry->vme_start < tmp_entry.vme_end)) {
8427                                 assert(entry->in_transition);
8428                                 entry->in_transition = FALSE;
8429                                 if (entry->needs_wakeup) {
8430                                         entry->needs_wakeup = FALSE;
8431                                         need_wakeup = TRUE;
8432                                 }
8433                                 entry = entry->vme_next;
8434                         }
8435                         /*
8436                          * We have unwired the entry(s).  Go back and
8437                          * delete them.
8438                          */
8439                         entry = first_entry;
8440                         continue;
8441                 }
8442
8443                 /* entry is unwired */
8444                 assert(entry->wired_count == 0);
8445                 assert(entry->user_wired_count == 0);
8446
8447                 assert(s == entry->vme_start);
8448
8449                 if (flags & VM_MAP_REMOVE_NO_PMAP_CLEANUP) {
8450                         /*
8451                          * XXX with the VM_MAP_REMOVE_SAVE_ENTRIES flag to
8452                          * vm_map_delete(), some map entries might have been
8453                          * transferred to a "zap_map", which doesn't have a
8454                          * pmap.  The original pmap has already been flushed
8455                          * in the vm_map_delete() call targeting the original
8456                          * map, but when we get to destroying the "zap_map",
8457                          * we don't have any pmap to flush, so let's just skip
8458                          * all this.
8459                          */
8460                 } else if (entry->is_sub_map) {
8461                         assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
8462                             "map %p (%d) entry %p submap %p (%d)\n",
8463                             map, VM_MAP_PAGE_SHIFT(map), entry,
8464                             VME_SUBMAP(entry),
8465                             VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8466                         if (entry->use_pmap) {
8467                                 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) == VM_MAP_PAGE_SHIFT(map),
8468                                     "map %p (%d) entry %p submap %p (%d)\n",
8469                                     map, VM_MAP_PAGE_SHIFT(map), entry,
8470                                     VME_SUBMAP(entry),
8471                                     VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
8472 #ifndef NO_NESTED_PMAP
8473                                 int pmap_flags;
8474
8475                                 if (flags & VM_MAP_REMOVE_NO_UNNESTING) {
8476                                         /*
8477                                          * This is the final cleanup of the
8478                                          * address space being terminated.
8479                                          * No new mappings are expected and
8480                                          * we don't really need to unnest the
8481                                          * shared region (and lose the "global"
8482                                          * pmap mappings, if applicable).
8483                                          *
8484                                          * Tell the pmap layer that we're
8485                                          * "clean" wrt nesting.
8486                                          */
8487                                         pmap_flags = PMAP_UNNEST_CLEAN;
8488                                 } else {
8489                                         /*
8490                                          * We're unmapping part of the nested
8491                                          * shared region, so we can't keep the
8492                                          * nested pmap.
8493                                          */
8494                                         pmap_flags = 0;
8495                                 }
8496                                 pmap_unnest_options(
8497                                         map->pmap,
8498                                         (addr64_t)entry->vme_start,
8499                                         entry->vme_end - entry->vme_start,
8500                                         pmap_flags);
8501 #endif  /* NO_NESTED_PMAP */
8502                                 if (map->mapped_in_other_pmaps &&
8503                                     os_ref_get_count(&map->map_refcnt) != 0) {
8504                                         /* clean up parent map/maps */
8505                                         vm_map_submap_pmap_clean(
8506                                                 map, entry->vme_start,
8507                                                 entry->vme_end,
8508                                                 VME_SUBMAP(entry),
8509                                                 VME_OFFSET(entry));
8510                                 }
8511                         } else {
8512                                 vm_map_submap_pmap_clean(
8513                                         map, entry->vme_start, entry->vme_end,
8514                                         VME_SUBMAP(entry),
8515                                         VME_OFFSET(entry));
8516                         }
8517                 } else if (VME_OBJECT(entry) != kernel_object &&
8518                     VME_OBJECT(entry) != compressor_object) {
8519                         object = VME_OBJECT(entry);
8520                         if (map->mapped_in_other_pmaps &&
8521                             os_ref_get_count(&map->map_refcnt) != 0) {
8522                                 vm_object_pmap_protect_options(
8523                                         object, VME_OFFSET(entry),
8524                                         entry->vme_end - entry->vme_start,
8525                                         PMAP_NULL,
8526                                         PAGE_SIZE,
8527                                         entry->vme_start,
8528                                         VM_PROT_NONE,
8529                                         PMAP_OPTIONS_REMOVE);
8530                         } else if ((VME_OBJECT(entry) != VM_OBJECT_NULL) ||
8531                             (map->pmap == kernel_pmap)) {
8532                                 /* Remove translations associated
8533                                  * with this range unless the entry
8534                                  * does not have an object, or
8535                                  * it's the kernel map or a descendant
8536                                  * since the platform could potentially
8537                                  * create "backdoor" mappings invisible
8538                                  * to the VM. It is expected that
8539                                  * objectless, non-kernel ranges
8540                                  * do not have such VM invisible
8541                                  * translations.
8542                                  */
8543                                 pmap_remove_options(map->pmap,
8544                                     (addr64_t)entry->vme_start,
8545                                     (addr64_t)entry->vme_end,
8546                                     PMAP_OPTIONS_REMOVE);
8547                         }
8548                 }
8549
8550                 if (entry->iokit_acct) {
8551                         /* alternate accounting */
8552                         DTRACE_VM4(vm_map_iokit_unmapped_region,
8553                             vm_map_t, map,
8554                             vm_map_offset_t, entry->vme_start,
8555                             vm_map_offset_t, entry->vme_end,
8556                             int, VME_ALIAS(entry));
8557                         vm_map_iokit_unmapped_region(map,
8558                             (entry->vme_end -
8559                             entry->vme_start));
8560                         entry->iokit_acct = FALSE;
8561                         entry->use_pmap = FALSE;
8562                 }
8563
8564                 /*
8565                  * All pmap mappings for this map entry must have been
8566                  * cleared by now.
8567                  */
8568 #if DEBUG
8569                 assert(vm_map_pmap_is_empty(map,
8570                     entry->vme_start,
8571                     entry->vme_end));
8572 #endif /* DEBUG */
8573
8574                 next = entry->vme_next;
8575
8576                 if (map->pmap == kernel_pmap &&
8577                     os_ref_get_count(&map->map_refcnt) != 0 &&
8578                     entry->vme_end < end &&
8579                     (next == vm_map_to_entry(map) ||
8580                     next->vme_start != entry->vme_end)) {
8581                         panic("vm_map_delete(%p,0x%llx,0x%llx): "
8582                             "hole after %p at 0x%llx\n",
8583                             map,
8584                             (uint64_t)start,
8585                             (uint64_t)end,
8586                             entry,
8587                             (uint64_t)entry->vme_end);
8588                 }
8589
8590                 /*
8591                  * If the desired range didn't end with "entry", then there is a gap if
8592                  * we wrapped around to the start of the map or if "entry" and "next"
8593                  * aren't contiguous.
8594                  *
8595                  * The vm_map_round_page() is needed since an entry can be less than VM_MAP_PAGE_MASK() sized.
8596                  * For example, devices which have h/w 4K pages, but entry sizes are all now 16K.
8597                  */
8598                 if (gap_start == FIND_GAP &&
8599                     vm_map_round_page(entry->vme_end, VM_MAP_PAGE_MASK(map)) < end &&
8600                     (next == vm_map_to_entry(map) || entry->vme_end != next->vme_start)) {
8601                         gap_start = entry->vme_end;
8602                 }
8603                 s = next->vme_start;
8604                 last_timestamp = map->timestamp;
8605
8606                 if (entry->permanent) {
8607                         /*
8608                          * A permanent entry can not be removed, so leave it
8609                          * in place but remove all access permissions.
8610                          */
8611                         entry->protection = VM_PROT_NONE;
8612                         entry->max_protection = VM_PROT_NONE;
8613                 } else if ((flags & VM_MAP_REMOVE_SAVE_ENTRIES) &&
8614                     zap_map != VM_MAP_NULL) {
8615                         vm_map_size_t entry_size;
8616                         /*
8617                          * The caller wants to save the affected VM map entries
8618                          * into the "zap_map".  The caller will take care of
8619                          * these entries.
8620                          */
8621                         /* unlink the entry from "map" ... */
8622                         vm_map_store_entry_unlink(map, entry);
8623                         /* ... and add it to the end of the "zap_map" */
8624                         vm_map_store_entry_link(zap_map,
8625                             vm_map_last_entry(zap_map),
8626                             entry,
8627                             VM_MAP_KERNEL_FLAGS_NONE);
8628                         entry_size = entry->vme_end - entry->vme_start;
8629                         map->size -= entry_size;
8630                         zap_map->size += entry_size;
8631                         /* we didn't unlock the map, so no timestamp increase */
8632                         last_timestamp--;
8633                 } else {
8634                         vm_map_entry_delete(map, entry);
8635                         /* vm_map_entry_delete unlocks the map */
8636                         vm_map_lock(map);
8637                 }
8638
8639                 entry = next;
8640
8641                 if (entry == vm_map_to_entry(map)) {
8642                         break;
8643                 }
8644                 if (last_timestamp + 1 != map->timestamp) {
8645                         /*
8646                          * We are responsible for deleting everything
8647                          * from the given space. If someone has interfered,
8648                          * we pick up where we left off. Back fills should
8649                          * be all right for anyone, except map_delete, and
8650                          * we have to assume that the task has been fully
8651                          * disabled before we get here
8652                          */
8653                         if (!vm_map_lookup_entry(map, s, &entry)) {
8654                                 entry = entry->vme_next;
8655
8656                                 /*
8657                                  * Nothing found for s. If we weren't already done, then there is a gap.
8658                                  */
8659                                 if (gap_start == FIND_GAP && s < end) {
8660                                         gap_start = s;
8661                                 }
8662                                 s = entry->vme_start;
8663                         } else {
8664                                 SAVE_HINT_MAP_WRITE(map, entry->vme_prev);
8665                         }
8666                         /*
8667                          * others can not only allocate behind us, we can
8668                          * also see coalesce while we don't have the map lock
8669                          */
8670                         if (entry == vm_map_to_entry(map)) {
8671                                 break;
8672                         }
8673                 }
8674                 last_timestamp = map->timestamp;
8675         }
8676
8677         if (map->wait_for_space) {
8678                 thread_wakeup((event_t) map);
8679         }
8680         /*
8681          * wake up anybody waiting on entries that we have already deleted.
8682          */
8683         if (need_wakeup) {
8684                 vm_map_entry_wakeup(map);
8685         }
8686
8687         if (gap_start != FIND_GAP && gap_start != GAPS_OK) {
8688                 DTRACE_VM3(kern_vm_deallocate_gap,
8689                     vm_map_offset_t, gap_start,
8690                     vm_map_offset_t, save_start,
8691                     vm_map_offset_t, save_end);
8692                 if (!(flags & VM_MAP_REMOVE_GAPS_OK)) {
8693                         vm_map_guard_exception(gap_start, kGUARD_EXC_DEALLOC_GAP);
8694                 }
8695         }
8696
8697         return KERN_SUCCESS;
8698 }
8699
8700
8701 /*
8702  *      vm_map_terminate:
8703  *
8704  *      Clean out a task's map.
8705  */
8706 kern_return_t
8707 vm_map_terminate(
8708         vm_map_t        map)
8709 {
8710         vm_map_lock(map);
8711         map->terminated = TRUE;
8712         vm_map_unlock(map);
8713
8714         return vm_map_remove(map,
8715                    map->min_offset,
8716                    map->max_offset,
8717                    /*
8718                     * Final cleanup:
8719                     * + no unnesting
8720                     * + remove immutable mappings
8721                     * + allow gaps in range
8722                     */
8723                    (VM_MAP_REMOVE_NO_UNNESTING |
8724                    VM_MAP_REMOVE_IMMUTABLE |
8725                    VM_MAP_REMOVE_GAPS_OK));
8726 }
8727
8728 /*
8729  *      vm_map_remove:
8730  *
8731  *      Remove the given address range from the target map.
8732  *      This is the exported form of vm_map_delete.
8733  */
8734 kern_return_t
8735 vm_map_remove(
8736         vm_map_t        map,
8737         vm_map_offset_t start,
8738         vm_map_offset_t end,
8739         boolean_t      flags)
8740 {
8741         kern_return_t   result;
8742
8743         vm_map_lock(map);
8744         VM_MAP_RANGE_CHECK(map, start, end);
8745         /*
8746          * For the zone maps, the kernel controls the allocation/freeing of memory.
8747          * Any free to the zone maps should be within the bounds of the map and
8748          * should free up memory. If the VM_MAP_RANGE_CHECK() silently converts a
8749          * free to the zone maps into a no-op, there is a problem and we should
8750          * panic.
8751          */
8752         if ((start == end) && zone_maps_owned(start, 1)) {
8753                 panic("Nothing being freed to a zone map. start = end = %p\n", (void *)start);
8754         }
8755         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8756         vm_map_unlock(map);
8757
8758         return result;
8759 }
8760
8761 /*
8762  *      vm_map_remove_locked:
8763  *
8764  *      Remove the given address range from the target locked map.
8765  *      This is the exported form of vm_map_delete.
8766  */
8767 kern_return_t
8768 vm_map_remove_locked(
8769         vm_map_t        map,
8770         vm_map_offset_t start,
8771         vm_map_offset_t end,
8772         boolean_t       flags)
8773 {
8774         kern_return_t   result;
8775
8776         VM_MAP_RANGE_CHECK(map, start, end);
8777         result = vm_map_delete(map, start, end, flags, VM_MAP_NULL);
8778         return result;
8779 }
8780
8781
8782 /*
8783  *      Routine:        vm_map_copy_allocate
8784  *
8785  *      Description:
8786  *              Allocates and initializes a map copy object.
8787  */
8788 static vm_map_copy_t
8789 vm_map_copy_allocate(void)
8790 {
8791         vm_map_copy_t new_copy;
8792
8793         new_copy = zalloc(vm_map_copy_zone);
8794         bzero(new_copy, sizeof(*new_copy));
8795         new_copy->c_u.hdr.rb_head_store.rbh_root = (void*)(int)SKIP_RB_TREE;
8796         vm_map_copy_first_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8797         vm_map_copy_last_entry(new_copy) = vm_map_copy_to_entry(new_copy);
8798         return new_copy;
8799 }
8800
8801 /*
8802  *      Routine:        vm_map_copy_discard
8803  *
8804  *      Description:
8805  *              Dispose of a map copy object (returned by
8806  *              vm_map_copyin).
8807  */
8808 void
8809 vm_map_copy_discard(
8810         vm_map_copy_t   copy)
8811 {
8812         if (copy == VM_MAP_COPY_NULL) {
8813                 return;
8814         }
8815
8816         switch (copy->type) {
8817         case VM_MAP_COPY_ENTRY_LIST:
8818                 while (vm_map_copy_first_entry(copy) !=
8819                     vm_map_copy_to_entry(copy)) {
8820                         vm_map_entry_t  entry = vm_map_copy_first_entry(copy);
8821
8822                         vm_map_copy_entry_unlink(copy, entry);
8823                         if (entry->is_sub_map) {
8824                                 vm_map_deallocate(VME_SUBMAP(entry));
8825                         } else {
8826                                 vm_object_deallocate(VME_OBJECT(entry));
8827                         }
8828                         vm_map_copy_entry_dispose(copy, entry);
8829                 }
8830                 break;
8831         case VM_MAP_COPY_OBJECT:
8832                 vm_object_deallocate(copy->cpy_object);
8833                 break;
8834         case VM_MAP_COPY_KERNEL_BUFFER:
8835
8836                 /*
8837                  * The vm_map_copy_t and possibly the data buffer were
8838                  * allocated by a single call to kheap_alloc(), i.e. the
8839                  * vm_map_copy_t was not allocated out of the zone.
8840                  */
8841                 if (copy->size > msg_ool_size_small || copy->offset) {
8842                         panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
8843                             (long long)copy->size, (long long)copy->offset);
8844                 }
8845                 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy->size);
8846         }
8847         zfree(vm_map_copy_zone, copy);
8848 }
8849
8850 /*
8851  *      Routine:        vm_map_copy_copy
8852  *
8853  *      Description:
8854  *                      Move the information in a map copy object to
8855  *                      a new map copy object, leaving the old one
8856  *                      empty.
8857  *
8858  *                      This is used by kernel routines that need
8859  *                      to look at out-of-line data (in copyin form)
8860  *                      before deciding whether to return SUCCESS.
8861  *                      If the routine returns FAILURE, the original
8862  *                      copy object will be deallocated; therefore,
8863  *                      these routines must make a copy of the copy
8864  *                      object and leave the original empty so that
8865  *                      deallocation will not fail.
8866  */
8867 vm_map_copy_t
8868 vm_map_copy_copy(
8869         vm_map_copy_t   copy)
8870 {
8871         vm_map_copy_t   new_copy;
8872
8873         if (copy == VM_MAP_COPY_NULL) {
8874                 return VM_MAP_COPY_NULL;
8875         }
8876
8877         /*
8878          * Allocate a new copy object, and copy the information
8879          * from the old one into it.
8880          */
8881
8882         new_copy = (vm_map_copy_t) zalloc(vm_map_copy_zone);
8883         memcpy((void *) new_copy, (void *) copy, sizeof(struct vm_map_copy));
8884 #if __has_feature(ptrauth_calls)
8885         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
8886                 new_copy->cpy_kdata = copy->cpy_kdata;
8887         }
8888 #endif
8889
8890         if (copy->type == VM_MAP_COPY_ENTRY_LIST) {
8891                 /*
8892                  * The links in the entry chain must be
8893                  * changed to point to the new copy object.
8894                  */
8895                 vm_map_copy_first_entry(copy)->vme_prev
8896                         = vm_map_copy_to_entry(new_copy);
8897                 vm_map_copy_last_entry(copy)->vme_next
8898                         = vm_map_copy_to_entry(new_copy);
8899         }
8900
8901         /*
8902          * Change the old copy object into one that contains
8903          * nothing to be deallocated.
8904          */
8905         copy->type = VM_MAP_COPY_OBJECT;
8906         copy->cpy_object = VM_OBJECT_NULL;
8907
8908         /*
8909          * Return the new object.
8910          */
8911         return new_copy;
8912 }
8913
8914 static kern_return_t
8915 vm_map_overwrite_submap_recurse(
8916         vm_map_t        dst_map,
8917         vm_map_offset_t dst_addr,
8918         vm_map_size_t   dst_size)
8919 {
8920         vm_map_offset_t dst_end;
8921         vm_map_entry_t  tmp_entry;
8922         vm_map_entry_t  entry;
8923         kern_return_t   result;
8924         boolean_t       encountered_sub_map = FALSE;
8925
8926
8927
8928         /*
8929          *      Verify that the destination is all writeable
8930          *      initially.  We have to trunc the destination
8931          *      address and round the copy size or we'll end up
8932          *      splitting entries in strange ways.
8933          */
8934
8935         dst_end = vm_map_round_page(dst_addr + dst_size,
8936             VM_MAP_PAGE_MASK(dst_map));
8937         vm_map_lock(dst_map);
8938
8939 start_pass_1:
8940         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
8941                 vm_map_unlock(dst_map);
8942                 return KERN_INVALID_ADDRESS;
8943         }
8944
8945         vm_map_clip_start(dst_map,
8946             tmp_entry,
8947             vm_map_trunc_page(dst_addr,
8948             VM_MAP_PAGE_MASK(dst_map)));
8949         if (tmp_entry->is_sub_map) {
8950                 /* clipping did unnest if needed */
8951                 assert(!tmp_entry->use_pmap);
8952         }
8953
8954         for (entry = tmp_entry;;) {
8955                 vm_map_entry_t  next;
8956
8957                 next = entry->vme_next;
8958                 while (entry->is_sub_map) {
8959                         vm_map_offset_t sub_start;
8960                         vm_map_offset_t sub_end;
8961                         vm_map_offset_t local_end;
8962
8963                         if (entry->in_transition) {
8964                                 /*
8965                                  * Say that we are waiting, and wait for entry.
8966                                  */
8967                                 entry->needs_wakeup = TRUE;
8968                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
8969
8970                                 goto start_pass_1;
8971                         }
8972
8973                         encountered_sub_map = TRUE;
8974                         sub_start = VME_OFFSET(entry);
8975
8976                         if (entry->vme_end < dst_end) {
8977                                 sub_end = entry->vme_end;
8978                         } else {
8979                                 sub_end = dst_end;
8980                         }
8981                         sub_end -= entry->vme_start;
8982                         sub_end += VME_OFFSET(entry);
8983                         local_end = entry->vme_end;
8984                         vm_map_unlock(dst_map);
8985
8986                         result = vm_map_overwrite_submap_recurse(
8987                                 VME_SUBMAP(entry),
8988                                 sub_start,
8989                                 sub_end - sub_start);
8990
8991                         if (result != KERN_SUCCESS) {
8992                                 return result;
8993                         }
8994                         if (dst_end <= entry->vme_end) {
8995                                 return KERN_SUCCESS;
8996                         }
8997                         vm_map_lock(dst_map);
8998                         if (!vm_map_lookup_entry(dst_map, local_end,
8999                             &tmp_entry)) {
9000                                 vm_map_unlock(dst_map);
9001                                 return KERN_INVALID_ADDRESS;
9002                         }
9003                         entry = tmp_entry;
9004                         next = entry->vme_next;
9005                 }
9006
9007                 if (!(entry->protection & VM_PROT_WRITE)) {
9008                         vm_map_unlock(dst_map);
9009                         return KERN_PROTECTION_FAILURE;
9010                 }
9011
9012                 /*
9013                  *      If the entry is in transition, we must wait
9014                  *      for it to exit that state.  Anything could happen
9015                  *      when we unlock the map, so start over.
9016                  */
9017                 if (entry->in_transition) {
9018                         /*
9019                          * Say that we are waiting, and wait for entry.
9020                          */
9021                         entry->needs_wakeup = TRUE;
9022                         vm_map_entry_wait(dst_map, THREAD_UNINT);
9023
9024                         goto start_pass_1;
9025                 }
9026
9027 /*
9028  *              our range is contained completely within this map entry
9029  */
9030                 if (dst_end <= entry->vme_end) {
9031                         vm_map_unlock(dst_map);
9032                         return KERN_SUCCESS;
9033                 }
9034 /*
9035  *              check that range specified is contiguous region
9036  */
9037                 if ((next == vm_map_to_entry(dst_map)) ||
9038                     (next->vme_start != entry->vme_end)) {
9039                         vm_map_unlock(dst_map);
9040                         return KERN_INVALID_ADDRESS;
9041                 }
9042
9043                 /*
9044                  *      Check for permanent objects in the destination.
9045                  */
9046                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9047                     ((!VME_OBJECT(entry)->internal) ||
9048                     (VME_OBJECT(entry)->true_share))) {
9049                         if (encountered_sub_map) {
9050                                 vm_map_unlock(dst_map);
9051                                 return KERN_FAILURE;
9052                         }
9053                 }
9054
9055
9056                 entry = next;
9057         }/* for */
9058         vm_map_unlock(dst_map);
9059         return KERN_SUCCESS;
9060 }
9061
9062 /*
9063  *      Routine:        vm_map_copy_overwrite
9064  *
9065  *      Description:
9066  *              Copy the memory described by the map copy
9067  *              object (copy; returned by vm_map_copyin) onto
9068  *              the specified destination region (dst_map, dst_addr).
9069  *              The destination must be writeable.
9070  *
9071  *              Unlike vm_map_copyout, this routine actually
9072  *              writes over previously-mapped memory.  If the
9073  *              previous mapping was to a permanent (user-supplied)
9074  *              memory object, it is preserved.
9075  *
9076  *              The attributes (protection and inheritance) of the
9077  *              destination region are preserved.
9078  *
9079  *              If successful, consumes the copy object.
9080  *              Otherwise, the caller is responsible for it.
9081  *
9082  *      Implementation notes:
9083  *              To overwrite aligned temporary virtual memory, it is
9084  *              sufficient to remove the previous mapping and insert
9085  *              the new copy.  This replacement is done either on
9086  *              the whole region (if no permanent virtual memory
9087  *              objects are embedded in the destination region) or
9088  *              in individual map entries.
9089  *
9090  *              To overwrite permanent virtual memory , it is necessary
9091  *              to copy each page, as the external memory management
9092  *              interface currently does not provide any optimizations.
9093  *
9094  *              Unaligned memory also has to be copied.  It is possible
9095  *              to use 'vm_trickery' to copy the aligned data.  This is
9096  *              not done but not hard to implement.
9097  *
9098  *              Once a page of permanent memory has been overwritten,
9099  *              it is impossible to interrupt this function; otherwise,
9100  *              the call would be neither atomic nor location-independent.
9101  *              The kernel-state portion of a user thread must be
9102  *              interruptible.
9103  *
9104  *              It may be expensive to forward all requests that might
9105  *              overwrite permanent memory (vm_write, vm_copy) to
9106  *              uninterruptible kernel threads.  This routine may be
9107  *              called by interruptible threads; however, success is
9108  *              not guaranteed -- if the request cannot be performed
9109  *              atomically and interruptibly, an error indication is
9110  *              returned.
9111  */
9112
9113 static kern_return_t
9114 vm_map_copy_overwrite_nested(
9115         vm_map_t                dst_map,
9116         vm_map_address_t        dst_addr,
9117         vm_map_copy_t           copy,
9118         boolean_t               interruptible,
9119         pmap_t                  pmap,
9120         boolean_t               discard_on_success)
9121 {
9122         vm_map_offset_t         dst_end;
9123         vm_map_entry_t          tmp_entry;
9124         vm_map_entry_t          entry;
9125         kern_return_t           kr;
9126         boolean_t               aligned = TRUE;
9127         boolean_t               contains_permanent_objects = FALSE;
9128         boolean_t               encountered_sub_map = FALSE;
9129         vm_map_offset_t         base_addr;
9130         vm_map_size_t           copy_size;
9131         vm_map_size_t           total_size;
9132         int                     copy_page_shift;
9133
9134
9135         /*
9136          *      Check for null copy object.
9137          */
9138
9139         if (copy == VM_MAP_COPY_NULL) {
9140                 return KERN_SUCCESS;
9141         }
9142
9143         /*
9144          * Assert that the vm_map_copy is coming from the right
9145          * zone and hasn't been forged
9146          */
9147         vm_map_copy_require(copy);
9148
9149         /*
9150          *      Check for special kernel buffer allocated
9151          *      by new_ipc_kmsg_copyin.
9152          */
9153
9154         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
9155                 return vm_map_copyout_kernel_buffer(
9156                         dst_map, &dst_addr,
9157                         copy, copy->size, TRUE, discard_on_success);
9158         }
9159
9160         /*
9161          *      Only works for entry lists at the moment.  Will
9162          *      support page lists later.
9163          */
9164
9165         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9166
9167         if (copy->size == 0) {
9168                 if (discard_on_success) {
9169                         vm_map_copy_discard(copy);
9170                 }
9171                 return KERN_SUCCESS;
9172         }
9173
9174         copy_page_shift = copy->cpy_hdr.page_shift;
9175
9176         /*
9177          *      Verify that the destination is all writeable
9178          *      initially.  We have to trunc the destination
9179          *      address and round the copy size or we'll end up
9180          *      splitting entries in strange ways.
9181          */
9182
9183         if (!VM_MAP_PAGE_ALIGNED(copy->size,
9184             VM_MAP_PAGE_MASK(dst_map)) ||
9185             !VM_MAP_PAGE_ALIGNED(copy->offset,
9186             VM_MAP_PAGE_MASK(dst_map)) ||
9187             !VM_MAP_PAGE_ALIGNED(dst_addr,
9188             VM_MAP_PAGE_MASK(dst_map)) ||
9189             copy_page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
9190                 aligned = FALSE;
9191                 dst_end = vm_map_round_page(dst_addr + copy->size,
9192                     VM_MAP_PAGE_MASK(dst_map));
9193         } else {
9194                 dst_end = dst_addr + copy->size;
9195         }
9196
9197         vm_map_lock(dst_map);
9198
9199         /* LP64todo - remove this check when vm_map_commpage64()
9200          * no longer has to stuff in a map_entry for the commpage
9201          * above the map's max_offset.
9202          */
9203         if (dst_addr >= dst_map->max_offset) {
9204                 vm_map_unlock(dst_map);
9205                 return KERN_INVALID_ADDRESS;
9206         }
9207
9208 start_pass_1:
9209         if (!vm_map_lookup_entry(dst_map, dst_addr, &tmp_entry)) {
9210                 vm_map_unlock(dst_map);
9211                 return KERN_INVALID_ADDRESS;
9212         }
9213         vm_map_clip_start(dst_map,
9214             tmp_entry,
9215             vm_map_trunc_page(dst_addr,
9216             VM_MAP_PAGE_MASK(dst_map)));
9217         for (entry = tmp_entry;;) {
9218                 vm_map_entry_t  next = entry->vme_next;
9219
9220                 while (entry->is_sub_map) {
9221                         vm_map_offset_t sub_start;
9222                         vm_map_offset_t sub_end;
9223                         vm_map_offset_t local_end;
9224
9225                         if (entry->in_transition) {
9226                                 /*
9227                                  * Say that we are waiting, and wait for entry.
9228                                  */
9229                                 entry->needs_wakeup = TRUE;
9230                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9231
9232                                 goto start_pass_1;
9233                         }
9234
9235                         local_end = entry->vme_end;
9236                         if (!(entry->needs_copy)) {
9237                                 /* if needs_copy we are a COW submap */
9238                                 /* in such a case we just replace so */
9239                                 /* there is no need for the follow-  */
9240                                 /* ing check.                        */
9241                                 encountered_sub_map = TRUE;
9242                                 sub_start = VME_OFFSET(entry);
9243
9244                                 if (entry->vme_end < dst_end) {
9245                                         sub_end = entry->vme_end;
9246                                 } else {
9247                                         sub_end = dst_end;
9248                                 }
9249                                 sub_end -= entry->vme_start;
9250                                 sub_end += VME_OFFSET(entry);
9251                                 vm_map_unlock(dst_map);
9252
9253                                 kr = vm_map_overwrite_submap_recurse(
9254                                         VME_SUBMAP(entry),
9255                                         sub_start,
9256                                         sub_end - sub_start);
9257                                 if (kr != KERN_SUCCESS) {
9258                                         return kr;
9259                                 }
9260                                 vm_map_lock(dst_map);
9261                         }
9262
9263                         if (dst_end <= entry->vme_end) {
9264                                 goto start_overwrite;
9265                         }
9266                         if (!vm_map_lookup_entry(dst_map, local_end,
9267                             &entry)) {
9268                                 vm_map_unlock(dst_map);
9269                                 return KERN_INVALID_ADDRESS;
9270                         }
9271                         next = entry->vme_next;
9272                 }
9273
9274                 if (!(entry->protection & VM_PROT_WRITE)) {
9275                         vm_map_unlock(dst_map);
9276                         return KERN_PROTECTION_FAILURE;
9277                 }
9278
9279                 /*
9280                  *      If the entry is in transition, we must wait
9281                  *      for it to exit that state.  Anything could happen
9282                  *      when we unlock the map, so start over.
9283                  */
9284                 if (entry->in_transition) {
9285                         /*
9286                          * Say that we are waiting, and wait for entry.
9287                          */
9288                         entry->needs_wakeup = TRUE;
9289                         vm_map_entry_wait(dst_map, THREAD_UNINT);
9290
9291                         goto start_pass_1;
9292                 }
9293
9294 /*
9295  *              our range is contained completely within this map entry
9296  */
9297                 if (dst_end <= entry->vme_end) {
9298                         break;
9299                 }
9300 /*
9301  *              check that range specified is contiguous region
9302  */
9303                 if ((next == vm_map_to_entry(dst_map)) ||
9304                     (next->vme_start != entry->vme_end)) {
9305                         vm_map_unlock(dst_map);
9306                         return KERN_INVALID_ADDRESS;
9307                 }
9308
9309
9310                 /*
9311                  *      Check for permanent objects in the destination.
9312                  */
9313                 if ((VME_OBJECT(entry) != VM_OBJECT_NULL) &&
9314                     ((!VME_OBJECT(entry)->internal) ||
9315                     (VME_OBJECT(entry)->true_share))) {
9316                         contains_permanent_objects = TRUE;
9317                 }
9318
9319                 entry = next;
9320         }/* for */
9321
9322 start_overwrite:
9323         /*
9324          *      If there are permanent objects in the destination, then
9325          *      the copy cannot be interrupted.
9326          */
9327
9328         if (interruptible && contains_permanent_objects) {
9329                 vm_map_unlock(dst_map);
9330                 return KERN_FAILURE;   /* XXX */
9331         }
9332
9333         /*
9334          *
9335          *      Make a second pass, overwriting the data
9336          *      At the beginning of each loop iteration,
9337          *      the next entry to be overwritten is "tmp_entry"
9338          *      (initially, the value returned from the lookup above),
9339          *      and the starting address expected in that entry
9340          *      is "start".
9341          */
9342
9343         total_size = copy->size;
9344         if (encountered_sub_map) {
9345                 copy_size = 0;
9346                 /* re-calculate tmp_entry since we've had the map */
9347                 /* unlocked */
9348                 if (!vm_map_lookup_entry( dst_map, dst_addr, &tmp_entry)) {
9349                         vm_map_unlock(dst_map);
9350                         return KERN_INVALID_ADDRESS;
9351                 }
9352         } else {
9353                 copy_size = copy->size;
9354         }
9355
9356         base_addr = dst_addr;
9357         while (TRUE) {
9358                 /* deconstruct the copy object and do in parts */
9359                 /* only in sub_map, interruptable case */
9360                 vm_map_entry_t  copy_entry;
9361                 vm_map_entry_t  previous_prev = VM_MAP_ENTRY_NULL;
9362                 vm_map_entry_t  next_copy = VM_MAP_ENTRY_NULL;
9363                 int             nentries;
9364                 int             remaining_entries = 0;
9365                 vm_map_offset_t new_offset = 0;
9366
9367                 for (entry = tmp_entry; copy_size == 0;) {
9368                         vm_map_entry_t  next;
9369
9370                         next = entry->vme_next;
9371
9372                         /* tmp_entry and base address are moved along */
9373                         /* each time we encounter a sub-map.  Otherwise */
9374                         /* entry can outpase tmp_entry, and the copy_size */
9375                         /* may reflect the distance between them */
9376                         /* if the current entry is found to be in transition */
9377                         /* we will start over at the beginning or the last */
9378                         /* encounter of a submap as dictated by base_addr */
9379                         /* we will zero copy_size accordingly. */
9380                         if (entry->in_transition) {
9381                                 /*
9382                                  * Say that we are waiting, and wait for entry.
9383                                  */
9384                                 entry->needs_wakeup = TRUE;
9385                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9386
9387                                 if (!vm_map_lookup_entry(dst_map, base_addr,
9388                                     &tmp_entry)) {
9389                                         vm_map_unlock(dst_map);
9390                                         return KERN_INVALID_ADDRESS;
9391                                 }
9392                                 copy_size = 0;
9393                                 entry = tmp_entry;
9394                                 continue;
9395                         }
9396                         if (entry->is_sub_map) {
9397                                 vm_map_offset_t sub_start;
9398                                 vm_map_offset_t sub_end;
9399                                 vm_map_offset_t local_end;
9400
9401                                 if (entry->needs_copy) {
9402                                         /* if this is a COW submap */
9403                                         /* just back the range with a */
9404                                         /* anonymous entry */
9405                                         if (entry->vme_end < dst_end) {
9406                                                 sub_end = entry->vme_end;
9407                                         } else {
9408                                                 sub_end = dst_end;
9409                                         }
9410                                         if (entry->vme_start < base_addr) {
9411                                                 sub_start = base_addr;
9412                                         } else {
9413                                                 sub_start = entry->vme_start;
9414                                         }
9415                                         vm_map_clip_end(
9416                                                 dst_map, entry, sub_end);
9417                                         vm_map_clip_start(
9418                                                 dst_map, entry, sub_start);
9419                                         assert(!entry->use_pmap);
9420                                         assert(!entry->iokit_acct);
9421                                         entry->use_pmap = TRUE;
9422                                         entry->is_sub_map = FALSE;
9423                                         vm_map_deallocate(
9424                                                 VME_SUBMAP(entry));
9425                                         VME_OBJECT_SET(entry, VM_OBJECT_NULL);
9426                                         VME_OFFSET_SET(entry, 0);
9427                                         entry->is_shared = FALSE;
9428                                         entry->needs_copy = FALSE;
9429                                         entry->protection = VM_PROT_DEFAULT;
9430                                         entry->max_protection = VM_PROT_ALL;
9431                                         entry->wired_count = 0;
9432                                         entry->user_wired_count = 0;
9433                                         if (entry->inheritance
9434                                             == VM_INHERIT_SHARE) {
9435                                                 entry->inheritance = VM_INHERIT_COPY;
9436                                         }
9437                                         continue;
9438                                 }
9439                                 /* first take care of any non-sub_map */
9440                                 /* entries to send */
9441                                 if (base_addr < entry->vme_start) {
9442                                         /* stuff to send */
9443                                         copy_size =
9444                                             entry->vme_start - base_addr;
9445                                         break;
9446                                 }
9447                                 sub_start = VME_OFFSET(entry);
9448
9449                                 if (entry->vme_end < dst_end) {
9450                                         sub_end = entry->vme_end;
9451                                 } else {
9452                                         sub_end = dst_end;
9453                                 }
9454                                 sub_end -= entry->vme_start;
9455                                 sub_end += VME_OFFSET(entry);
9456                                 local_end = entry->vme_end;
9457                                 vm_map_unlock(dst_map);
9458                                 copy_size = sub_end - sub_start;
9459
9460                                 /* adjust the copy object */
9461                                 if (total_size > copy_size) {
9462                                         vm_map_size_t   local_size = 0;
9463                                         vm_map_size_t   entry_size;
9464
9465                                         nentries = 1;
9466                                         new_offset = copy->offset;
9467                                         copy_entry = vm_map_copy_first_entry(copy);
9468                                         while (copy_entry !=
9469                                             vm_map_copy_to_entry(copy)) {
9470                                                 entry_size = copy_entry->vme_end -
9471                                                     copy_entry->vme_start;
9472                                                 if ((local_size < copy_size) &&
9473                                                     ((local_size + entry_size)
9474                                                     >= copy_size)) {
9475                                                         vm_map_copy_clip_end(copy,
9476                                                             copy_entry,
9477                                                             copy_entry->vme_start +
9478                                                             (copy_size - local_size));
9479                                                         entry_size = copy_entry->vme_end -
9480                                                             copy_entry->vme_start;
9481                                                         local_size += entry_size;
9482                                                         new_offset += entry_size;
9483                                                 }
9484                                                 if (local_size >= copy_size) {
9485                                                         next_copy = copy_entry->vme_next;
9486                                                         copy_entry->vme_next =
9487                                                             vm_map_copy_to_entry(copy);
9488                                                         previous_prev =
9489                                                             copy->cpy_hdr.links.prev;
9490                                                         copy->cpy_hdr.links.prev = copy_entry;
9491                                                         copy->size = copy_size;
9492                                                         remaining_entries =
9493                                                             copy->cpy_hdr.nentries;
9494                                                         remaining_entries -= nentries;
9495                                                         copy->cpy_hdr.nentries = nentries;
9496                                                         break;
9497                                                 } else {
9498                                                         local_size += entry_size;
9499                                                         new_offset += entry_size;
9500                                                         nentries++;
9501                                                 }
9502                                                 copy_entry = copy_entry->vme_next;
9503                                         }
9504                                 }
9505
9506                                 if ((entry->use_pmap) && (pmap == NULL)) {
9507                                         kr = vm_map_copy_overwrite_nested(
9508                                                 VME_SUBMAP(entry),
9509                                                 sub_start,
9510                                                 copy,
9511                                                 interruptible,
9512                                                 VME_SUBMAP(entry)->pmap,
9513                                                 TRUE);
9514                                 } else if (pmap != NULL) {
9515                                         kr = vm_map_copy_overwrite_nested(
9516                                                 VME_SUBMAP(entry),
9517                                                 sub_start,
9518                                                 copy,
9519                                                 interruptible, pmap,
9520                                                 TRUE);
9521                                 } else {
9522                                         kr = vm_map_copy_overwrite_nested(
9523                                                 VME_SUBMAP(entry),
9524                                                 sub_start,
9525                                                 copy,
9526                                                 interruptible,
9527                                                 dst_map->pmap,
9528                                                 TRUE);
9529                                 }
9530                                 if (kr != KERN_SUCCESS) {
9531                                         if (next_copy != NULL) {
9532                                                 copy->cpy_hdr.nentries +=
9533                                                     remaining_entries;
9534                                                 copy->cpy_hdr.links.prev->vme_next =
9535                                                     next_copy;
9536                                                 copy->cpy_hdr.links.prev
9537                                                         = previous_prev;
9538                                                 copy->size = total_size;
9539                                         }
9540                                         return kr;
9541                                 }
9542                                 if (dst_end <= local_end) {
9543                                         return KERN_SUCCESS;
9544                                 }
9545                                 /* otherwise copy no longer exists, it was */
9546                                 /* destroyed after successful copy_overwrite */
9547                                 copy = vm_map_copy_allocate();
9548                                 copy->type = VM_MAP_COPY_ENTRY_LIST;
9549                                 copy->offset = new_offset;
9550                                 copy->cpy_hdr.page_shift = copy_page_shift;
9551
9552                                 /*
9553                                  * XXX FBDP
9554                                  * this does not seem to deal with
9555                                  * the VM map store (R&B tree)
9556                                  */
9557
9558                                 total_size -= copy_size;
9559                                 copy_size = 0;
9560                                 /* put back remainder of copy in container */
9561                                 if (next_copy != NULL) {
9562                                         copy->cpy_hdr.nentries = remaining_entries;
9563                                         copy->cpy_hdr.links.next = next_copy;
9564                                         copy->cpy_hdr.links.prev = previous_prev;
9565                                         copy->size = total_size;
9566                                         next_copy->vme_prev =
9567                                             vm_map_copy_to_entry(copy);
9568                                         next_copy = NULL;
9569                                 }
9570                                 base_addr = local_end;
9571                                 vm_map_lock(dst_map);
9572                                 if (!vm_map_lookup_entry(dst_map,
9573                                     local_end, &tmp_entry)) {
9574                                         vm_map_unlock(dst_map);
9575                                         return KERN_INVALID_ADDRESS;
9576                                 }
9577                                 entry = tmp_entry;
9578                                 continue;
9579                         }
9580                         if (dst_end <= entry->vme_end) {
9581                                 copy_size = dst_end - base_addr;
9582                                 break;
9583                         }
9584
9585                         if ((next == vm_map_to_entry(dst_map)) ||
9586                             (next->vme_start != entry->vme_end)) {
9587                                 vm_map_unlock(dst_map);
9588                                 return KERN_INVALID_ADDRESS;
9589                         }
9590
9591                         entry = next;
9592                 }/* for */
9593
9594                 next_copy = NULL;
9595                 nentries = 1;
9596
9597                 /* adjust the copy object */
9598                 if (total_size > copy_size) {
9599                         vm_map_size_t   local_size = 0;
9600                         vm_map_size_t   entry_size;
9601
9602                         new_offset = copy->offset;
9603                         copy_entry = vm_map_copy_first_entry(copy);
9604                         while (copy_entry != vm_map_copy_to_entry(copy)) {
9605                                 entry_size = copy_entry->vme_end -
9606                                     copy_entry->vme_start;
9607                                 if ((local_size < copy_size) &&
9608                                     ((local_size + entry_size)
9609                                     >= copy_size)) {
9610                                         vm_map_copy_clip_end(copy, copy_entry,
9611                                             copy_entry->vme_start +
9612                                             (copy_size - local_size));
9613                                         entry_size = copy_entry->vme_end -
9614                                             copy_entry->vme_start;
9615                                         local_size += entry_size;
9616                                         new_offset += entry_size;
9617                                 }
9618                                 if (local_size >= copy_size) {
9619                                         next_copy = copy_entry->vme_next;
9620                                         copy_entry->vme_next =
9621                                             vm_map_copy_to_entry(copy);
9622                                         previous_prev =
9623                                             copy->cpy_hdr.links.prev;
9624                                         copy->cpy_hdr.links.prev = copy_entry;
9625                                         copy->size = copy_size;
9626                                         remaining_entries =
9627                                             copy->cpy_hdr.nentries;
9628                                         remaining_entries -= nentries;
9629                                         copy->cpy_hdr.nentries = nentries;
9630                                         break;
9631                                 } else {
9632                                         local_size += entry_size;
9633                                         new_offset += entry_size;
9634                                         nentries++;
9635                                 }
9636                                 copy_entry = copy_entry->vme_next;
9637                         }
9638                 }
9639
9640                 if (aligned) {
9641                         pmap_t  local_pmap;
9642
9643                         if (pmap) {
9644                                 local_pmap = pmap;
9645                         } else {
9646                                 local_pmap = dst_map->pmap;
9647                         }
9648
9649                         if ((kr =  vm_map_copy_overwrite_aligned(
9650                                     dst_map, tmp_entry, copy,
9651                                     base_addr, local_pmap)) != KERN_SUCCESS) {
9652                                 if (next_copy != NULL) {
9653                                         copy->cpy_hdr.nentries +=
9654                                             remaining_entries;
9655                                         copy->cpy_hdr.links.prev->vme_next =
9656                                             next_copy;
9657                                         copy->cpy_hdr.links.prev =
9658                                             previous_prev;
9659                                         copy->size += copy_size;
9660                                 }
9661                                 return kr;
9662                         }
9663                         vm_map_unlock(dst_map);
9664                 } else {
9665                         /*
9666                          * Performance gain:
9667                          *
9668                          * if the copy and dst address are misaligned but the same
9669                          * offset within the page we can copy_not_aligned the
9670                          * misaligned parts and copy aligned the rest.  If they are
9671                          * aligned but len is unaligned we simply need to copy
9672                          * the end bit unaligned.  We'll need to split the misaligned
9673                          * bits of the region in this case !
9674                          */
9675                         /* ALWAYS UNLOCKS THE dst_map MAP */
9676                         kr = vm_map_copy_overwrite_unaligned(
9677                                 dst_map,
9678                                 tmp_entry,
9679                                 copy,
9680                                 base_addr,
9681                                 discard_on_success);
9682                         if (kr != KERN_SUCCESS) {
9683                                 if (next_copy != NULL) {
9684                                         copy->cpy_hdr.nentries +=
9685                                             remaining_entries;
9686                                         copy->cpy_hdr.links.prev->vme_next =
9687                                             next_copy;
9688                                         copy->cpy_hdr.links.prev =
9689                                             previous_prev;
9690                                         copy->size += copy_size;
9691                                 }
9692                                 return kr;
9693                         }
9694                 }
9695                 total_size -= copy_size;
9696                 if (total_size == 0) {
9697                         break;
9698                 }
9699                 base_addr += copy_size;
9700                 copy_size = 0;
9701                 copy->offset = new_offset;
9702                 if (next_copy != NULL) {
9703                         copy->cpy_hdr.nentries = remaining_entries;
9704                         copy->cpy_hdr.links.next = next_copy;
9705                         copy->cpy_hdr.links.prev = previous_prev;
9706                         next_copy->vme_prev = vm_map_copy_to_entry(copy);
9707                         copy->size = total_size;
9708                 }
9709                 vm_map_lock(dst_map);
9710                 while (TRUE) {
9711                         if (!vm_map_lookup_entry(dst_map,
9712                             base_addr, &tmp_entry)) {
9713                                 vm_map_unlock(dst_map);
9714                                 return KERN_INVALID_ADDRESS;
9715                         }
9716                         if (tmp_entry->in_transition) {
9717                                 entry->needs_wakeup = TRUE;
9718                                 vm_map_entry_wait(dst_map, THREAD_UNINT);
9719                         } else {
9720                                 break;
9721                         }
9722                 }
9723                 vm_map_clip_start(dst_map,
9724                     tmp_entry,
9725                     vm_map_trunc_page(base_addr,
9726                     VM_MAP_PAGE_MASK(dst_map)));
9727
9728                 entry = tmp_entry;
9729         } /* while */
9730
9731         /*
9732          *      Throw away the vm_map_copy object
9733          */
9734         if (discard_on_success) {
9735                 vm_map_copy_discard(copy);
9736         }
9737
9738         return KERN_SUCCESS;
9739 }/* vm_map_copy_overwrite */
9740
9741 kern_return_t
9742 vm_map_copy_overwrite(
9743         vm_map_t        dst_map,
9744         vm_map_offset_t dst_addr,
9745         vm_map_copy_t   copy,
9746         vm_map_size_t   copy_size,
9747         boolean_t       interruptible)
9748 {
9749         vm_map_size_t   head_size, tail_size;
9750         vm_map_copy_t   head_copy, tail_copy;
9751         vm_map_offset_t head_addr, tail_addr;
9752         vm_map_entry_t  entry;
9753         kern_return_t   kr;
9754         vm_map_offset_t effective_page_mask, effective_page_size;
9755         int             copy_page_shift;
9756
9757         head_size = 0;
9758         tail_size = 0;
9759         head_copy = NULL;
9760         tail_copy = NULL;
9761         head_addr = 0;
9762         tail_addr = 0;
9763
9764         if (interruptible ||
9765             copy == VM_MAP_COPY_NULL ||
9766             copy->type != VM_MAP_COPY_ENTRY_LIST) {
9767                 /*
9768                  * We can't split the "copy" map if we're interruptible
9769                  * or if we don't have a "copy" map...
9770                  */
9771 blunt_copy:
9772                 return vm_map_copy_overwrite_nested(dst_map,
9773                            dst_addr,
9774                            copy,
9775                            interruptible,
9776                            (pmap_t) NULL,
9777                            TRUE);
9778         }
9779
9780         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy);
9781         if (copy_page_shift < PAGE_SHIFT ||
9782             VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9783                 goto blunt_copy;
9784         }
9785
9786         if (VM_MAP_PAGE_SHIFT(dst_map) < PAGE_SHIFT) {
9787                 effective_page_mask = VM_MAP_PAGE_MASK(dst_map);
9788         } else {
9789                 effective_page_mask = MAX(VM_MAP_PAGE_MASK(dst_map), PAGE_MASK);
9790                 effective_page_mask = MAX(VM_MAP_COPY_PAGE_MASK(copy),
9791                     effective_page_mask);
9792         }
9793         effective_page_size = effective_page_mask + 1;
9794
9795         if (copy_size < VM_MAP_COPY_OVERWRITE_OPTIMIZATION_THRESHOLD_PAGES * effective_page_size) {
9796                 /*
9797                  * Too small to bother with optimizing...
9798                  */
9799                 goto blunt_copy;
9800         }
9801
9802         if ((dst_addr & effective_page_mask) !=
9803             (copy->offset & effective_page_mask)) {
9804                 /*
9805                  * Incompatible mis-alignment of source and destination...
9806                  */
9807                 goto blunt_copy;
9808         }
9809
9810         /*
9811          * Proper alignment or identical mis-alignment at the beginning.
9812          * Let's try and do a small unaligned copy first (if needed)
9813          * and then an aligned copy for the rest.
9814          */
9815         if (!vm_map_page_aligned(dst_addr, effective_page_mask)) {
9816                 head_addr = dst_addr;
9817                 head_size = (effective_page_size -
9818                     (copy->offset & effective_page_mask));
9819                 head_size = MIN(head_size, copy_size);
9820         }
9821         if (!vm_map_page_aligned(copy->offset + copy_size,
9822             effective_page_mask)) {
9823                 /*
9824                  * Mis-alignment at the end.
9825                  * Do an aligned copy up to the last page and
9826                  * then an unaligned copy for the remaining bytes.
9827                  */
9828                 tail_size = ((copy->offset + copy_size) &
9829                     effective_page_mask);
9830                 tail_size = MIN(tail_size, copy_size);
9831                 tail_addr = dst_addr + copy_size - tail_size;
9832                 assert(tail_addr >= head_addr + head_size);
9833         }
9834         assert(head_size + tail_size <= copy_size);
9835
9836         if (head_size + tail_size == copy_size) {
9837                 /*
9838                  * It's all unaligned, no optimization possible...
9839                  */
9840                 goto blunt_copy;
9841         }
9842
9843         /*
9844          * Can't optimize if there are any submaps in the
9845          * destination due to the way we free the "copy" map
9846          * progressively in vm_map_copy_overwrite_nested()
9847          * in that case.
9848          */
9849         vm_map_lock_read(dst_map);
9850         if (!vm_map_lookup_entry(dst_map, dst_addr, &entry)) {
9851                 vm_map_unlock_read(dst_map);
9852                 goto blunt_copy;
9853         }
9854         for (;
9855             (entry != vm_map_copy_to_entry(copy) &&
9856             entry->vme_start < dst_addr + copy_size);
9857             entry = entry->vme_next) {
9858                 if (entry->is_sub_map) {
9859                         vm_map_unlock_read(dst_map);
9860                         goto blunt_copy;
9861                 }
9862         }
9863         vm_map_unlock_read(dst_map);
9864
9865         if (head_size) {
9866                 /*
9867                  * Unaligned copy of the first "head_size" bytes, to reach
9868                  * a page boundary.
9869                  */
9870
9871                 /*
9872                  * Extract "head_copy" out of "copy".
9873                  */
9874                 head_copy = vm_map_copy_allocate();
9875                 head_copy->type = VM_MAP_COPY_ENTRY_LIST;
9876                 head_copy->cpy_hdr.entries_pageable =
9877                     copy->cpy_hdr.entries_pageable;
9878                 vm_map_store_init(&head_copy->cpy_hdr);
9879                 head_copy->cpy_hdr.page_shift = copy_page_shift;
9880
9881                 entry = vm_map_copy_first_entry(copy);
9882                 if (entry->vme_end < copy->offset + head_size) {
9883                         head_size = entry->vme_end - copy->offset;
9884                 }
9885
9886                 head_copy->offset = copy->offset;
9887                 head_copy->size = head_size;
9888                 copy->offset += head_size;
9889                 copy->size -= head_size;
9890                 copy_size -= head_size;
9891                 assert(copy_size > 0);
9892
9893                 vm_map_copy_clip_end(copy, entry, copy->offset);
9894                 vm_map_copy_entry_unlink(copy, entry);
9895                 vm_map_copy_entry_link(head_copy,
9896                     vm_map_copy_to_entry(head_copy),
9897                     entry);
9898
9899                 /*
9900                  * Do the unaligned copy.
9901                  */
9902                 kr = vm_map_copy_overwrite_nested(dst_map,
9903                     head_addr,
9904                     head_copy,
9905                     interruptible,
9906                     (pmap_t) NULL,
9907                     FALSE);
9908                 if (kr != KERN_SUCCESS) {
9909                         goto done;
9910                 }
9911         }
9912
9913         if (tail_size) {
9914                 /*
9915                  * Extract "tail_copy" out of "copy".
9916                  */
9917                 tail_copy = vm_map_copy_allocate();
9918                 tail_copy->type = VM_MAP_COPY_ENTRY_LIST;
9919                 tail_copy->cpy_hdr.entries_pageable =
9920                     copy->cpy_hdr.entries_pageable;
9921                 vm_map_store_init(&tail_copy->cpy_hdr);
9922                 tail_copy->cpy_hdr.page_shift = copy_page_shift;
9923
9924                 tail_copy->offset = copy->offset + copy_size - tail_size;
9925                 tail_copy->size = tail_size;
9926
9927                 copy->size -= tail_size;
9928                 copy_size -= tail_size;
9929                 assert(copy_size > 0);
9930
9931                 entry = vm_map_copy_last_entry(copy);
9932                 vm_map_copy_clip_start(copy, entry, tail_copy->offset);
9933                 entry = vm_map_copy_last_entry(copy);
9934                 vm_map_copy_entry_unlink(copy, entry);
9935                 vm_map_copy_entry_link(tail_copy,
9936                     vm_map_copy_last_entry(tail_copy),
9937                     entry);
9938         }
9939
9940         /*
9941          * If we are here from ipc_kmsg_copyout_ool_descriptor(),
9942          * we want to avoid TOCTOU issues w.r.t copy->size but
9943          * we don't need to change vm_map_copy_overwrite_nested()
9944          * and all other vm_map_copy_overwrite variants.
9945          *
9946          * So we assign the original copy_size that was passed into
9947          * this routine back to copy.
9948          *
9949          * This use of local 'copy_size' passed into this routine is
9950          * to try and protect against TOCTOU attacks where the kernel
9951          * has been exploited. We don't expect this to be an issue
9952          * during normal system operation.
9953          */
9954         assertf(copy->size == copy_size,
9955             "Mismatch of copy sizes. Expected 0x%llx, Got 0x%llx\n", (uint64_t) copy_size, (uint64_t) copy->size);
9956         copy->size = copy_size;
9957
9958         /*
9959          * Copy most (or possibly all) of the data.
9960          */
9961         kr = vm_map_copy_overwrite_nested(dst_map,
9962             dst_addr + head_size,
9963             copy,
9964             interruptible,
9965             (pmap_t) NULL,
9966             FALSE);
9967         if (kr != KERN_SUCCESS) {
9968                 goto done;
9969         }
9970
9971         if (tail_size) {
9972                 kr = vm_map_copy_overwrite_nested(dst_map,
9973                     tail_addr,
9974                     tail_copy,
9975                     interruptible,
9976                     (pmap_t) NULL,
9977                     FALSE);
9978         }
9979
9980 done:
9981         assert(copy->type == VM_MAP_COPY_ENTRY_LIST);
9982         if (kr == KERN_SUCCESS) {
9983                 /*
9984                  * Discard all the copy maps.
9985                  */
9986                 if (head_copy) {
9987                         vm_map_copy_discard(head_copy);
9988                         head_copy = NULL;
9989                 }
9990                 vm_map_copy_discard(copy);
9991                 if (tail_copy) {
9992                         vm_map_copy_discard(tail_copy);
9993                         tail_copy = NULL;
9994                 }
9995         } else {
9996                 /*
9997                  * Re-assemble the original copy map.
9998                  */
9999                 if (head_copy) {
10000                         entry = vm_map_copy_first_entry(head_copy);
10001                         vm_map_copy_entry_unlink(head_copy, entry);
10002                         vm_map_copy_entry_link(copy,
10003                             vm_map_copy_to_entry(copy),
10004                             entry);
10005                         copy->offset -= head_size;
10006                         copy->size += head_size;
10007                         vm_map_copy_discard(head_copy);
10008                         head_copy = NULL;
10009                 }
10010                 if (tail_copy) {
10011                         entry = vm_map_copy_last_entry(tail_copy);
10012                         vm_map_copy_entry_unlink(tail_copy, entry);
10013                         vm_map_copy_entry_link(copy,
10014                             vm_map_copy_last_entry(copy),
10015                             entry);
10016                         copy->size += tail_size;
10017                         vm_map_copy_discard(tail_copy);
10018                         tail_copy = NULL;
10019                 }
10020         }
10021         return kr;
10022 }
10023
10024
10025 /*
10026  *      Routine: vm_map_copy_overwrite_unaligned        [internal use only]
10027  *
10028  *      Decription:
10029  *      Physically copy unaligned data
10030  *
10031  *      Implementation:
10032  *      Unaligned parts of pages have to be physically copied.  We use
10033  *      a modified form of vm_fault_copy (which understands none-aligned
10034  *      page offsets and sizes) to do the copy.  We attempt to copy as
10035  *      much memory in one go as possibly, however vm_fault_copy copies
10036  *      within 1 memory object so we have to find the smaller of "amount left"
10037  *      "source object data size" and "target object data size".  With
10038  *      unaligned data we don't need to split regions, therefore the source
10039  *      (copy) object should be one map entry, the target range may be split
10040  *      over multiple map entries however.  In any event we are pessimistic
10041  *      about these assumptions.
10042  *
10043  *      Assumptions:
10044  *      dst_map is locked on entry and is return locked on success,
10045  *      unlocked on error.
10046  */
10047
10048 static kern_return_t
10049 vm_map_copy_overwrite_unaligned(
10050         vm_map_t        dst_map,
10051         vm_map_entry_t  entry,
10052         vm_map_copy_t   copy,
10053         vm_map_offset_t start,
10054         boolean_t       discard_on_success)
10055 {
10056         vm_map_entry_t          copy_entry;
10057         vm_map_entry_t          copy_entry_next;
10058         vm_map_version_t        version;
10059         vm_object_t             dst_object;
10060         vm_object_offset_t      dst_offset;
10061         vm_object_offset_t      src_offset;
10062         vm_object_offset_t      entry_offset;
10063         vm_map_offset_t         entry_end;
10064         vm_map_size_t           src_size,
10065             dst_size,
10066             copy_size,
10067             amount_left;
10068         kern_return_t           kr = KERN_SUCCESS;
10069
10070
10071         copy_entry = vm_map_copy_first_entry(copy);
10072
10073         vm_map_lock_write_to_read(dst_map);
10074
10075         src_offset = copy->offset - trunc_page_mask_64(copy->offset, VM_MAP_COPY_PAGE_MASK(copy));
10076         amount_left = copy->size;
10077 /*
10078  *      unaligned so we never clipped this entry, we need the offset into
10079  *      the vm_object not just the data.
10080  */
10081         while (amount_left > 0) {
10082                 if (entry == vm_map_to_entry(dst_map)) {
10083                         vm_map_unlock_read(dst_map);
10084                         return KERN_INVALID_ADDRESS;
10085                 }
10086
10087                 /* "start" must be within the current map entry */
10088                 assert((start >= entry->vme_start) && (start < entry->vme_end));
10089
10090                 dst_offset = start - entry->vme_start;
10091
10092                 dst_size = entry->vme_end - start;
10093
10094                 src_size = copy_entry->vme_end -
10095                     (copy_entry->vme_start + src_offset);
10096
10097                 if (dst_size < src_size) {
10098 /*
10099  *                      we can only copy dst_size bytes before
10100  *                      we have to get the next destination entry
10101  */
10102                         copy_size = dst_size;
10103                 } else {
10104 /*
10105  *                      we can only copy src_size bytes before
10106  *                      we have to get the next source copy entry
10107  */
10108                         copy_size = src_size;
10109                 }
10110
10111                 if (copy_size > amount_left) {
10112                         copy_size = amount_left;
10113                 }
10114 /*
10115  *              Entry needs copy, create a shadow shadow object for
10116  *              Copy on write region.
10117  */
10118                 if (entry->needs_copy &&
10119                     ((entry->protection & VM_PROT_WRITE) != 0)) {
10120                         if (vm_map_lock_read_to_write(dst_map)) {
10121                                 vm_map_lock_read(dst_map);
10122                                 goto RetryLookup;
10123                         }
10124                         VME_OBJECT_SHADOW(entry,
10125                             (vm_map_size_t)(entry->vme_end
10126                             - entry->vme_start));
10127                         entry->needs_copy = FALSE;
10128                         vm_map_lock_write_to_read(dst_map);
10129                 }
10130                 dst_object = VME_OBJECT(entry);
10131 /*
10132  *              unlike with the virtual (aligned) copy we're going
10133  *              to fault on it therefore we need a target object.
10134  */
10135                 if (dst_object == VM_OBJECT_NULL) {
10136                         if (vm_map_lock_read_to_write(dst_map)) {
10137                                 vm_map_lock_read(dst_map);
10138                                 goto RetryLookup;
10139                         }
10140                         dst_object = vm_object_allocate((vm_map_size_t)
10141                             entry->vme_end - entry->vme_start);
10142                         VME_OBJECT_SET(entry, dst_object);
10143                         VME_OFFSET_SET(entry, 0);
10144                         assert(entry->use_pmap);
10145                         vm_map_lock_write_to_read(dst_map);
10146                 }
10147 /*
10148  *              Take an object reference and unlock map. The "entry" may
10149  *              disappear or change when the map is unlocked.
10150  */
10151                 vm_object_reference(dst_object);
10152                 version.main_timestamp = dst_map->timestamp;
10153                 entry_offset = VME_OFFSET(entry);
10154                 entry_end = entry->vme_end;
10155                 vm_map_unlock_read(dst_map);
10156 /*
10157  *              Copy as much as possible in one pass
10158  */
10159                 kr = vm_fault_copy(
10160                         VME_OBJECT(copy_entry),
10161                         VME_OFFSET(copy_entry) + src_offset,
10162                         &copy_size,
10163                         dst_object,
10164                         entry_offset + dst_offset,
10165                         dst_map,
10166                         &version,
10167                         THREAD_UNINT );
10168
10169                 start += copy_size;
10170                 src_offset += copy_size;
10171                 amount_left -= copy_size;
10172 /*
10173  *              Release the object reference
10174  */
10175                 vm_object_deallocate(dst_object);
10176 /*
10177  *              If a hard error occurred, return it now
10178  */
10179                 if (kr != KERN_SUCCESS) {
10180                         return kr;
10181                 }
10182
10183                 if ((copy_entry->vme_start + src_offset) == copy_entry->vme_end
10184                     || amount_left == 0) {
10185 /*
10186  *                      all done with this copy entry, dispose.
10187  */
10188                         copy_entry_next = copy_entry->vme_next;
10189
10190                         if (discard_on_success) {
10191                                 vm_map_copy_entry_unlink(copy, copy_entry);
10192                                 assert(!copy_entry->is_sub_map);
10193                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10194                                 vm_map_copy_entry_dispose(copy, copy_entry);
10195                         }
10196
10197                         if (copy_entry_next == vm_map_copy_to_entry(copy) &&
10198                             amount_left) {
10199 /*
10200  *                              not finished copying but run out of source
10201  */
10202                                 return KERN_INVALID_ADDRESS;
10203                         }
10204
10205                         copy_entry = copy_entry_next;
10206
10207                         src_offset = 0;
10208                 }
10209
10210                 if (amount_left == 0) {
10211                         return KERN_SUCCESS;
10212                 }
10213
10214                 vm_map_lock_read(dst_map);
10215                 if (version.main_timestamp == dst_map->timestamp) {
10216                         if (start == entry_end) {
10217 /*
10218  *                              destination region is split.  Use the version
10219  *                              information to avoid a lookup in the normal
10220  *                              case.
10221  */
10222                                 entry = entry->vme_next;
10223 /*
10224  *                              should be contiguous. Fail if we encounter
10225  *                              a hole in the destination.
10226  */
10227                                 if (start != entry->vme_start) {
10228                                         vm_map_unlock_read(dst_map);
10229                                         return KERN_INVALID_ADDRESS;
10230                                 }
10231                         }
10232                 } else {
10233 /*
10234  *                      Map version check failed.
10235  *                      we must lookup the entry because somebody
10236  *                      might have changed the map behind our backs.
10237  */
10238 RetryLookup:
10239                         if (!vm_map_lookup_entry(dst_map, start, &entry)) {
10240                                 vm_map_unlock_read(dst_map);
10241                                 return KERN_INVALID_ADDRESS;
10242                         }
10243                 }
10244         }/* while */
10245
10246         return KERN_SUCCESS;
10247 }/* vm_map_copy_overwrite_unaligned */
10248
10249 /*
10250  *      Routine: vm_map_copy_overwrite_aligned  [internal use only]
10251  *
10252  *      Description:
10253  *      Does all the vm_trickery possible for whole pages.
10254  *
10255  *      Implementation:
10256  *
10257  *      If there are no permanent objects in the destination,
10258  *      and the source and destination map entry zones match,
10259  *      and the destination map entry is not shared,
10260  *      then the map entries can be deleted and replaced
10261  *      with those from the copy.  The following code is the
10262  *      basic idea of what to do, but there are lots of annoying
10263  *      little details about getting protection and inheritance
10264  *      right.  Should add protection, inheritance, and sharing checks
10265  *      to the above pass and make sure that no wiring is involved.
10266  */
10267
10268 int vm_map_copy_overwrite_aligned_src_not_internal = 0;
10269 int vm_map_copy_overwrite_aligned_src_not_symmetric = 0;
10270 int vm_map_copy_overwrite_aligned_src_large = 0;
10271
10272 static kern_return_t
10273 vm_map_copy_overwrite_aligned(
10274         vm_map_t        dst_map,
10275         vm_map_entry_t  tmp_entry,
10276         vm_map_copy_t   copy,
10277         vm_map_offset_t start,
10278         __unused pmap_t pmap)
10279 {
10280         vm_object_t     object;
10281         vm_map_entry_t  copy_entry;
10282         vm_map_size_t   copy_size;
10283         vm_map_size_t   size;
10284         vm_map_entry_t  entry;
10285
10286         while ((copy_entry = vm_map_copy_first_entry(copy))
10287             != vm_map_copy_to_entry(copy)) {
10288                 copy_size = (copy_entry->vme_end - copy_entry->vme_start);
10289
10290                 entry = tmp_entry;
10291                 if (entry->is_sub_map) {
10292                         /* unnested when clipped earlier */
10293                         assert(!entry->use_pmap);
10294                 }
10295                 if (entry == vm_map_to_entry(dst_map)) {
10296                         vm_map_unlock(dst_map);
10297                         return KERN_INVALID_ADDRESS;
10298                 }
10299                 size = (entry->vme_end - entry->vme_start);
10300                 /*
10301                  *      Make sure that no holes popped up in the
10302                  *      address map, and that the protection is
10303                  *      still valid, in case the map was unlocked
10304                  *      earlier.
10305                  */
10306
10307                 if ((entry->vme_start != start) || ((entry->is_sub_map)
10308                     && !entry->needs_copy)) {
10309                         vm_map_unlock(dst_map);
10310                         return KERN_INVALID_ADDRESS;
10311                 }
10312                 assert(entry != vm_map_to_entry(dst_map));
10313
10314                 /*
10315                  *      Check protection again
10316                  */
10317
10318                 if (!(entry->protection & VM_PROT_WRITE)) {
10319                         vm_map_unlock(dst_map);
10320                         return KERN_PROTECTION_FAILURE;
10321                 }
10322
10323                 /*
10324                  *      Adjust to source size first
10325                  */
10326
10327                 if (copy_size < size) {
10328                         if (entry->map_aligned &&
10329                             !VM_MAP_PAGE_ALIGNED(entry->vme_start + copy_size,
10330                             VM_MAP_PAGE_MASK(dst_map))) {
10331                                 /* no longer map-aligned */
10332                                 entry->map_aligned = FALSE;
10333                         }
10334                         vm_map_clip_end(dst_map, entry, entry->vme_start + copy_size);
10335                         size = copy_size;
10336                 }
10337
10338                 /*
10339                  *      Adjust to destination size
10340                  */
10341
10342                 if (size < copy_size) {
10343                         vm_map_copy_clip_end(copy, copy_entry,
10344                             copy_entry->vme_start + size);
10345                         copy_size = size;
10346                 }
10347
10348                 assert((entry->vme_end - entry->vme_start) == size);
10349                 assert((tmp_entry->vme_end - tmp_entry->vme_start) == size);
10350                 assert((copy_entry->vme_end - copy_entry->vme_start) == size);
10351
10352                 /*
10353                  *      If the destination contains temporary unshared memory,
10354                  *      we can perform the copy by throwing it away and
10355                  *      installing the source data.
10356                  */
10357
10358                 object = VME_OBJECT(entry);
10359                 if ((!entry->is_shared &&
10360                     ((object == VM_OBJECT_NULL) ||
10361                     (object->internal && !object->true_share))) ||
10362                     entry->needs_copy) {
10363                         vm_object_t     old_object = VME_OBJECT(entry);
10364                         vm_object_offset_t      old_offset = VME_OFFSET(entry);
10365                         vm_object_offset_t      offset;
10366
10367                         /*
10368                          * Ensure that the source and destination aren't
10369                          * identical
10370                          */
10371                         if (old_object == VME_OBJECT(copy_entry) &&
10372                             old_offset == VME_OFFSET(copy_entry)) {
10373                                 vm_map_copy_entry_unlink(copy, copy_entry);
10374                                 vm_map_copy_entry_dispose(copy, copy_entry);
10375
10376                                 if (old_object != VM_OBJECT_NULL) {
10377                                         vm_object_deallocate(old_object);
10378                                 }
10379
10380                                 start = tmp_entry->vme_end;
10381                                 tmp_entry = tmp_entry->vme_next;
10382                                 continue;
10383                         }
10384
10385 #if XNU_TARGET_OS_OSX
10386 #define __TRADEOFF1_OBJ_SIZE (64 * 1024 * 1024) /* 64 MB */
10387 #define __TRADEOFF1_COPY_SIZE (128 * 1024)      /* 128 KB */
10388                         if (VME_OBJECT(copy_entry) != VM_OBJECT_NULL &&
10389                             VME_OBJECT(copy_entry)->vo_size >= __TRADEOFF1_OBJ_SIZE &&
10390                             copy_size <= __TRADEOFF1_COPY_SIZE) {
10391                                 /*
10392                                  * Virtual vs. Physical copy tradeoff #1.
10393                                  *
10394                                  * Copying only a few pages out of a large
10395                                  * object:  do a physical copy instead of
10396                                  * a virtual copy, to avoid possibly keeping
10397                                  * the entire large object alive because of
10398                                  * those few copy-on-write pages.
10399                                  */
10400                                 vm_map_copy_overwrite_aligned_src_large++;
10401                                 goto slow_copy;
10402                         }
10403 #endif /* XNU_TARGET_OS_OSX */
10404
10405                         if ((dst_map->pmap != kernel_pmap) &&
10406                             (VME_ALIAS(entry) >= VM_MEMORY_MALLOC) &&
10407                             (VME_ALIAS(entry) <= VM_MEMORY_MALLOC_MEDIUM)) {
10408                                 vm_object_t new_object, new_shadow;
10409
10410                                 /*
10411                                  * We're about to map something over a mapping
10412                                  * established by malloc()...
10413                                  */
10414                                 new_object = VME_OBJECT(copy_entry);
10415                                 if (new_object != VM_OBJECT_NULL) {
10416                                         vm_object_lock_shared(new_object);
10417                                 }
10418                                 while (new_object != VM_OBJECT_NULL &&
10419 #if XNU_TARGET_OS_OSX
10420                                     !new_object->true_share &&
10421                                     new_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
10422 #endif /* XNU_TARGET_OS_OSX */
10423                                     new_object->internal) {
10424                                         new_shadow = new_object->shadow;
10425                                         if (new_shadow == VM_OBJECT_NULL) {
10426                                                 break;
10427                                         }
10428                                         vm_object_lock_shared(new_shadow);
10429                                         vm_object_unlock(new_object);
10430                                         new_object = new_shadow;
10431                                 }
10432                                 if (new_object != VM_OBJECT_NULL) {
10433                                         if (!new_object->internal) {
10434                                                 /*
10435                                                  * The new mapping is backed
10436                                                  * by an external object.  We
10437                                                  * don't want malloc'ed memory
10438                                                  * to be replaced with such a
10439                                                  * non-anonymous mapping, so
10440                                                  * let's go off the optimized
10441                                                  * path...
10442                                                  */
10443                                                 vm_map_copy_overwrite_aligned_src_not_internal++;
10444                                                 vm_object_unlock(new_object);
10445                                                 goto slow_copy;
10446                                         }
10447 #if XNU_TARGET_OS_OSX
10448                                         if (new_object->true_share ||
10449                                             new_object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
10450                                                 /*
10451                                                  * Same if there's a "true_share"
10452                                                  * object in the shadow chain, or
10453                                                  * an object with a non-default
10454                                                  * (SYMMETRIC) copy strategy.
10455                                                  */
10456                                                 vm_map_copy_overwrite_aligned_src_not_symmetric++;
10457                                                 vm_object_unlock(new_object);
10458                                                 goto slow_copy;
10459                                         }
10460 #endif /* XNU_TARGET_OS_OSX */
10461                                         vm_object_unlock(new_object);
10462                                 }
10463                                 /*
10464                                  * The new mapping is still backed by
10465                                  * anonymous (internal) memory, so it's
10466                                  * OK to substitute it for the original
10467                                  * malloc() mapping.
10468                                  */
10469                         }
10470
10471                         if (old_object != VM_OBJECT_NULL) {
10472                                 if (entry->is_sub_map) {
10473                                         if (entry->use_pmap) {
10474 #ifndef NO_NESTED_PMAP
10475                                                 pmap_unnest(dst_map->pmap,
10476                                                     (addr64_t)entry->vme_start,
10477                                                     entry->vme_end - entry->vme_start);
10478 #endif  /* NO_NESTED_PMAP */
10479                                                 if (dst_map->mapped_in_other_pmaps) {
10480                                                         /* clean up parent */
10481                                                         /* map/maps */
10482                                                         vm_map_submap_pmap_clean(
10483                                                                 dst_map, entry->vme_start,
10484                                                                 entry->vme_end,
10485                                                                 VME_SUBMAP(entry),
10486                                                                 VME_OFFSET(entry));
10487                                                 }
10488                                         } else {
10489                                                 vm_map_submap_pmap_clean(
10490                                                         dst_map, entry->vme_start,
10491                                                         entry->vme_end,
10492                                                         VME_SUBMAP(entry),
10493                                                         VME_OFFSET(entry));
10494                                         }
10495                                         vm_map_deallocate(VME_SUBMAP(entry));
10496                                 } else {
10497                                         if (dst_map->mapped_in_other_pmaps) {
10498                                                 vm_object_pmap_protect_options(
10499                                                         VME_OBJECT(entry),
10500                                                         VME_OFFSET(entry),
10501                                                         entry->vme_end
10502                                                         - entry->vme_start,
10503                                                         PMAP_NULL,
10504                                                         PAGE_SIZE,
10505                                                         entry->vme_start,
10506                                                         VM_PROT_NONE,
10507                                                         PMAP_OPTIONS_REMOVE);
10508                                         } else {
10509                                                 pmap_remove_options(
10510                                                         dst_map->pmap,
10511                                                         (addr64_t)(entry->vme_start),
10512                                                         (addr64_t)(entry->vme_end),
10513                                                         PMAP_OPTIONS_REMOVE);
10514                                         }
10515                                         vm_object_deallocate(old_object);
10516                                 }
10517                         }
10518
10519                         if (entry->iokit_acct) {
10520                                 /* keep using iokit accounting */
10521                                 entry->use_pmap = FALSE;
10522                         } else {
10523                                 /* use pmap accounting */
10524                                 entry->use_pmap = TRUE;
10525                         }
10526                         entry->is_sub_map = FALSE;
10527                         VME_OBJECT_SET(entry, VME_OBJECT(copy_entry));
10528                         object = VME_OBJECT(entry);
10529                         entry->needs_copy = copy_entry->needs_copy;
10530                         entry->wired_count = 0;
10531                         entry->user_wired_count = 0;
10532                         offset = VME_OFFSET(copy_entry);
10533                         VME_OFFSET_SET(entry, offset);
10534
10535                         vm_map_copy_entry_unlink(copy, copy_entry);
10536                         vm_map_copy_entry_dispose(copy, copy_entry);
10537
10538                         /*
10539                          * we could try to push pages into the pmap at this point, BUT
10540                          * this optimization only saved on average 2 us per page if ALL
10541                          * the pages in the source were currently mapped
10542                          * and ALL the pages in the dest were touched, if there were fewer
10543                          * than 2/3 of the pages touched, this optimization actually cost more cycles
10544                          * it also puts a lot of pressure on the pmap layer w/r to mapping structures
10545                          */
10546
10547                         /*
10548                          *      Set up for the next iteration.  The map
10549                          *      has not been unlocked, so the next
10550                          *      address should be at the end of this
10551                          *      entry, and the next map entry should be
10552                          *      the one following it.
10553                          */
10554
10555                         start = tmp_entry->vme_end;
10556                         tmp_entry = tmp_entry->vme_next;
10557                 } else {
10558                         vm_map_version_t        version;
10559                         vm_object_t             dst_object;
10560                         vm_object_offset_t      dst_offset;
10561                         kern_return_t           r;
10562
10563 slow_copy:
10564                         if (entry->needs_copy) {
10565                                 VME_OBJECT_SHADOW(entry,
10566                                     (entry->vme_end -
10567                                     entry->vme_start));
10568                                 entry->needs_copy = FALSE;
10569                         }
10570
10571                         dst_object = VME_OBJECT(entry);
10572                         dst_offset = VME_OFFSET(entry);
10573
10574                         /*
10575                          *      Take an object reference, and record
10576                          *      the map version information so that the
10577                          *      map can be safely unlocked.
10578                          */
10579
10580                         if (dst_object == VM_OBJECT_NULL) {
10581                                 /*
10582                                  * We would usually have just taken the
10583                                  * optimized path above if the destination
10584                                  * object has not been allocated yet.  But we
10585                                  * now disable that optimization if the copy
10586                                  * entry's object is not backed by anonymous
10587                                  * memory to avoid replacing malloc'ed
10588                                  * (i.e. re-usable) anonymous memory with a
10589                                  * not-so-anonymous mapping.
10590                                  * So we have to handle this case here and
10591                                  * allocate a new VM object for this map entry.
10592                                  */
10593                                 dst_object = vm_object_allocate(
10594                                         entry->vme_end - entry->vme_start);
10595                                 dst_offset = 0;
10596                                 VME_OBJECT_SET(entry, dst_object);
10597                                 VME_OFFSET_SET(entry, dst_offset);
10598                                 assert(entry->use_pmap);
10599                         }
10600
10601                         vm_object_reference(dst_object);
10602
10603                         /* account for unlock bumping up timestamp */
10604                         version.main_timestamp = dst_map->timestamp + 1;
10605
10606                         vm_map_unlock(dst_map);
10607
10608                         /*
10609                          *      Copy as much as possible in one pass
10610                          */
10611
10612                         copy_size = size;
10613                         r = vm_fault_copy(
10614                                 VME_OBJECT(copy_entry),
10615                                 VME_OFFSET(copy_entry),
10616                                 &copy_size,
10617                                 dst_object,
10618                                 dst_offset,
10619                                 dst_map,
10620                                 &version,
10621                                 THREAD_UNINT );
10622
10623                         /*
10624                          *      Release the object reference
10625                          */
10626
10627                         vm_object_deallocate(dst_object);
10628
10629                         /*
10630                          *      If a hard error occurred, return it now
10631                          */
10632
10633                         if (r != KERN_SUCCESS) {
10634                                 return r;
10635                         }
10636
10637                         if (copy_size != 0) {
10638                                 /*
10639                                  *      Dispose of the copied region
10640                                  */
10641
10642                                 vm_map_copy_clip_end(copy, copy_entry,
10643                                     copy_entry->vme_start + copy_size);
10644                                 vm_map_copy_entry_unlink(copy, copy_entry);
10645                                 vm_object_deallocate(VME_OBJECT(copy_entry));
10646                                 vm_map_copy_entry_dispose(copy, copy_entry);
10647                         }
10648
10649                         /*
10650                          *      Pick up in the destination map where we left off.
10651                          *
10652                          *      Use the version information to avoid a lookup
10653                          *      in the normal case.
10654                          */
10655
10656                         start += copy_size;
10657                         vm_map_lock(dst_map);
10658                         if (version.main_timestamp == dst_map->timestamp &&
10659                             copy_size != 0) {
10660                                 /* We can safely use saved tmp_entry value */
10661
10662                                 if (tmp_entry->map_aligned &&
10663                                     !VM_MAP_PAGE_ALIGNED(
10664                                             start,
10665                                             VM_MAP_PAGE_MASK(dst_map))) {
10666                                         /* no longer map-aligned */
10667                                         tmp_entry->map_aligned = FALSE;
10668                                 }
10669                                 vm_map_clip_end(dst_map, tmp_entry, start);
10670                                 tmp_entry = tmp_entry->vme_next;
10671                         } else {
10672                                 /* Must do lookup of tmp_entry */
10673
10674                                 if (!vm_map_lookup_entry(dst_map, start, &tmp_entry)) {
10675                                         vm_map_unlock(dst_map);
10676                                         return KERN_INVALID_ADDRESS;
10677                                 }
10678                                 if (tmp_entry->map_aligned &&
10679                                     !VM_MAP_PAGE_ALIGNED(
10680                                             start,
10681                                             VM_MAP_PAGE_MASK(dst_map))) {
10682                                         /* no longer map-aligned */
10683                                         tmp_entry->map_aligned = FALSE;
10684                                 }
10685                                 vm_map_clip_start(dst_map, tmp_entry, start);
10686                         }
10687                 }
10688         }/* while */
10689
10690         return KERN_SUCCESS;
10691 }/* vm_map_copy_overwrite_aligned */
10692
10693 /*
10694  *      Routine: vm_map_copyin_kernel_buffer [internal use only]
10695  *
10696  *      Description:
10697  *              Copy in data to a kernel buffer from space in the
10698  *              source map. The original space may be optionally
10699  *              deallocated.
10700  *
10701  *              If successful, returns a new copy object.
10702  */
10703 static kern_return_t
10704 vm_map_copyin_kernel_buffer(
10705         vm_map_t        src_map,
10706         vm_map_offset_t src_addr,
10707         vm_map_size_t   len,
10708         boolean_t       src_destroy,
10709         vm_map_copy_t   *copy_result)
10710 {
10711         kern_return_t kr;
10712         vm_map_copy_t copy;
10713
10714         if (len > msg_ool_size_small) {
10715                 return KERN_INVALID_ARGUMENT;
10716         }
10717
10718         copy = zalloc_flags(vm_map_copy_zone, Z_WAITOK | Z_ZERO);
10719         if (copy == VM_MAP_COPY_NULL) {
10720                 return KERN_RESOURCE_SHORTAGE;
10721         }
10722         copy->cpy_kdata = kheap_alloc(KHEAP_DATA_BUFFERS, len, Z_WAITOK);
10723         if (copy->cpy_kdata == NULL) {
10724                 zfree(vm_map_copy_zone, copy);
10725                 return KERN_RESOURCE_SHORTAGE;
10726         }
10727
10728         copy->type = VM_MAP_COPY_KERNEL_BUFFER;
10729         copy->size = len;
10730         copy->offset = 0;
10731
10732         kr = copyinmap(src_map, src_addr, copy->cpy_kdata, (vm_size_t)len);
10733         if (kr != KERN_SUCCESS) {
10734                 kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, len);
10735                 zfree(vm_map_copy_zone, copy);
10736                 return kr;
10737         }
10738         if (src_destroy) {
10739                 (void) vm_map_remove(
10740                         src_map,
10741                         vm_map_trunc_page(src_addr,
10742                         VM_MAP_PAGE_MASK(src_map)),
10743                         vm_map_round_page(src_addr + len,
10744                         VM_MAP_PAGE_MASK(src_map)),
10745                         (VM_MAP_REMOVE_INTERRUPTIBLE |
10746                         VM_MAP_REMOVE_WAIT_FOR_KWIRE |
10747                         ((src_map == kernel_map) ? VM_MAP_REMOVE_KUNWIRE : VM_MAP_REMOVE_NO_FLAGS)));
10748         }
10749         *copy_result = copy;
10750         return KERN_SUCCESS;
10751 }
10752
10753 /*
10754  *      Routine: vm_map_copyout_kernel_buffer   [internal use only]
10755  *
10756  *      Description:
10757  *              Copy out data from a kernel buffer into space in the
10758  *              destination map. The space may be otpionally dynamically
10759  *              allocated.
10760  *
10761  *              If successful, consumes the copy object.
10762  *              Otherwise, the caller is responsible for it.
10763  */
10764 static int vm_map_copyout_kernel_buffer_failures = 0;
10765 static kern_return_t
10766 vm_map_copyout_kernel_buffer(
10767         vm_map_t                map,
10768         vm_map_address_t        *addr,  /* IN/OUT */
10769         vm_map_copy_t           copy,
10770         vm_map_size_t           copy_size,
10771         boolean_t               overwrite,
10772         boolean_t               consume_on_success)
10773 {
10774         kern_return_t kr = KERN_SUCCESS;
10775         thread_t thread = current_thread();
10776
10777         assert(copy->size == copy_size);
10778
10779         /*
10780          * check for corrupted vm_map_copy structure
10781          */
10782         if (copy_size > msg_ool_size_small || copy->offset) {
10783                 panic("Invalid vm_map_copy_t sz:%lld, ofst:%lld",
10784                     (long long)copy->size, (long long)copy->offset);
10785         }
10786
10787         if (!overwrite) {
10788                 /*
10789                  * Allocate space in the target map for the data
10790                  */
10791                 *addr = 0;
10792                 kr = vm_map_enter(map,
10793                     addr,
10794                     vm_map_round_page(copy_size,
10795                     VM_MAP_PAGE_MASK(map)),
10796                     (vm_map_offset_t) 0,
10797                     VM_FLAGS_ANYWHERE,
10798                     VM_MAP_KERNEL_FLAGS_NONE,
10799                     VM_KERN_MEMORY_NONE,
10800                     VM_OBJECT_NULL,
10801                     (vm_object_offset_t) 0,
10802                     FALSE,
10803                     VM_PROT_DEFAULT,
10804                     VM_PROT_ALL,
10805                     VM_INHERIT_DEFAULT);
10806                 if (kr != KERN_SUCCESS) {
10807                         return kr;
10808                 }
10809 #if KASAN
10810                 if (map->pmap == kernel_pmap) {
10811                         kasan_notify_address(*addr, copy->size);
10812                 }
10813 #endif
10814         }
10815
10816         /*
10817          * Copyout the data from the kernel buffer to the target map.
10818          */
10819         if (thread->map == map) {
10820                 /*
10821                  * If the target map is the current map, just do
10822                  * the copy.
10823                  */
10824                 assert((vm_size_t)copy_size == copy_size);
10825                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10826                         kr = KERN_INVALID_ADDRESS;
10827                 }
10828         } else {
10829                 vm_map_t oldmap;
10830
10831                 /*
10832                  * If the target map is another map, assume the
10833                  * target's address space identity for the duration
10834                  * of the copy.
10835                  */
10836                 vm_map_reference(map);
10837                 oldmap = vm_map_switch(map);
10838
10839                 assert((vm_size_t)copy_size == copy_size);
10840                 if (copyout(copy->cpy_kdata, *addr, (vm_size_t)copy_size)) {
10841                         vm_map_copyout_kernel_buffer_failures++;
10842                         kr = KERN_INVALID_ADDRESS;
10843                 }
10844
10845                 (void) vm_map_switch(oldmap);
10846                 vm_map_deallocate(map);
10847         }
10848
10849         if (kr != KERN_SUCCESS) {
10850                 /* the copy failed, clean up */
10851                 if (!overwrite) {
10852                         /*
10853                          * Deallocate the space we allocated in the target map.
10854                          */
10855                         (void) vm_map_remove(
10856                                 map,
10857                                 vm_map_trunc_page(*addr,
10858                                 VM_MAP_PAGE_MASK(map)),
10859                                 vm_map_round_page((*addr +
10860                                 vm_map_round_page(copy_size,
10861                                 VM_MAP_PAGE_MASK(map))),
10862                                 VM_MAP_PAGE_MASK(map)),
10863                                 VM_MAP_REMOVE_NO_FLAGS);
10864                         *addr = 0;
10865                 }
10866         } else {
10867                 /* copy was successful, dicard the copy structure */
10868                 if (consume_on_success) {
10869                         kheap_free(KHEAP_DATA_BUFFERS, copy->cpy_kdata, copy_size);
10870                         zfree(vm_map_copy_zone, copy);
10871                 }
10872         }
10873
10874         return kr;
10875 }
10876
10877 /*
10878  *      Routine:        vm_map_copy_insert      [internal use only]
10879  *
10880  *      Description:
10881  *              Link a copy chain ("copy") into a map at the
10882  *              specified location (after "where").
10883  *      Side effects:
10884  *              The copy chain is destroyed.
10885  */
10886 static void
10887 vm_map_copy_insert(
10888         vm_map_t        map,
10889         vm_map_entry_t  after_where,
10890         vm_map_copy_t   copy)
10891 {
10892         vm_map_entry_t  entry;
10893
10894         while (vm_map_copy_first_entry(copy) != vm_map_copy_to_entry(copy)) {
10895                 entry = vm_map_copy_first_entry(copy);
10896                 vm_map_copy_entry_unlink(copy, entry);
10897                 vm_map_store_entry_link(map, after_where, entry,
10898                     VM_MAP_KERNEL_FLAGS_NONE);
10899                 after_where = entry;
10900         }
10901         zfree(vm_map_copy_zone, copy);
10902 }
10903
10904 void
10905 vm_map_copy_remap(
10906         vm_map_t        map,
10907         vm_map_entry_t  where,
10908         vm_map_copy_t   copy,
10909         vm_map_offset_t adjustment,
10910         vm_prot_t       cur_prot,
10911         vm_prot_t       max_prot,
10912         vm_inherit_t    inheritance)
10913 {
10914         vm_map_entry_t  copy_entry, new_entry;
10915
10916         for (copy_entry = vm_map_copy_first_entry(copy);
10917             copy_entry != vm_map_copy_to_entry(copy);
10918             copy_entry = copy_entry->vme_next) {
10919                 /* get a new VM map entry for the map */
10920                 new_entry = vm_map_entry_create(map,
10921                     !map->hdr.entries_pageable);
10922                 /* copy the "copy entry" to the new entry */
10923                 vm_map_entry_copy(map, new_entry, copy_entry);
10924                 /* adjust "start" and "end" */
10925                 new_entry->vme_start += adjustment;
10926                 new_entry->vme_end += adjustment;
10927                 /* clear some attributes */
10928                 new_entry->inheritance = inheritance;
10929                 new_entry->protection = cur_prot;
10930                 new_entry->max_protection = max_prot;
10931                 new_entry->behavior = VM_BEHAVIOR_DEFAULT;
10932                 /* take an extra reference on the entry's "object" */
10933                 if (new_entry->is_sub_map) {
10934                         assert(!new_entry->use_pmap); /* not nested */
10935                         vm_map_lock(VME_SUBMAP(new_entry));
10936                         vm_map_reference(VME_SUBMAP(new_entry));
10937                         vm_map_unlock(VME_SUBMAP(new_entry));
10938                 } else {
10939                         vm_object_reference(VME_OBJECT(new_entry));
10940                 }
10941                 /* insert the new entry in the map */
10942                 vm_map_store_entry_link(map, where, new_entry,
10943                     VM_MAP_KERNEL_FLAGS_NONE);
10944                 /* continue inserting the "copy entries" after the new entry */
10945                 where = new_entry;
10946         }
10947 }
10948
10949
10950 /*
10951  * Returns true if *size matches (or is in the range of) copy->size.
10952  * Upon returning true, the *size field is updated with the actual size of the
10953  * copy object (may be different for VM_MAP_COPY_ENTRY_LIST types)
10954  */
10955 boolean_t
10956 vm_map_copy_validate_size(
10957         vm_map_t                dst_map,
10958         vm_map_copy_t           copy,
10959         vm_map_size_t           *size)
10960 {
10961         if (copy == VM_MAP_COPY_NULL) {
10962                 return FALSE;
10963         }
10964         vm_map_size_t copy_sz = copy->size;
10965         vm_map_size_t sz = *size;
10966         switch (copy->type) {
10967         case VM_MAP_COPY_OBJECT:
10968         case VM_MAP_COPY_KERNEL_BUFFER:
10969                 if (sz == copy_sz) {
10970                         return TRUE;
10971                 }
10972                 break;
10973         case VM_MAP_COPY_ENTRY_LIST:
10974                 /*
10975                  * potential page-size rounding prevents us from exactly
10976                  * validating this flavor of vm_map_copy, but we can at least
10977                  * assert that it's within a range.
10978                  */
10979                 if (copy_sz >= sz &&
10980                     copy_sz <= vm_map_round_page(sz, VM_MAP_PAGE_MASK(dst_map))) {
10981                         *size = copy_sz;
10982                         return TRUE;
10983                 }
10984                 break;
10985         default:
10986                 break;
10987         }
10988         return FALSE;
10989 }
10990
10991 /*
10992  *      Routine:        vm_map_copyout_size
10993  *
10994  *      Description:
10995  *              Copy out a copy chain ("copy") into newly-allocated
10996  *              space in the destination map. Uses a prevalidated
10997  *              size for the copy object (vm_map_copy_validate_size).
10998  *
10999  *              If successful, consumes the copy object.
11000  *              Otherwise, the caller is responsible for it.
11001  */
11002 kern_return_t
11003 vm_map_copyout_size(
11004         vm_map_t                dst_map,
11005         vm_map_address_t        *dst_addr,      /* OUT */
11006         vm_map_copy_t           copy,
11007         vm_map_size_t           copy_size)
11008 {
11009         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy_size,
11010                    TRUE,                     /* consume_on_success */
11011                    VM_PROT_DEFAULT,
11012                    VM_PROT_ALL,
11013                    VM_INHERIT_DEFAULT);
11014 }
11015
11016 /*
11017  *      Routine:        vm_map_copyout
11018  *
11019  *      Description:
11020  *              Copy out a copy chain ("copy") into newly-allocated
11021  *              space in the destination map.
11022  *
11023  *              If successful, consumes the copy object.
11024  *              Otherwise, the caller is responsible for it.
11025  */
11026 kern_return_t
11027 vm_map_copyout(
11028         vm_map_t                dst_map,
11029         vm_map_address_t        *dst_addr,      /* OUT */
11030         vm_map_copy_t           copy)
11031 {
11032         return vm_map_copyout_internal(dst_map, dst_addr, copy, copy ? copy->size : 0,
11033                    TRUE,                     /* consume_on_success */
11034                    VM_PROT_DEFAULT,
11035                    VM_PROT_ALL,
11036                    VM_INHERIT_DEFAULT);
11037 }
11038
11039 kern_return_t
11040 vm_map_copyout_internal(
11041         vm_map_t                dst_map,
11042         vm_map_address_t        *dst_addr,      /* OUT */
11043         vm_map_copy_t           copy,
11044         vm_map_size_t           copy_size,
11045         boolean_t               consume_on_success,
11046         vm_prot_t               cur_protection,
11047         vm_prot_t               max_protection,
11048         vm_inherit_t            inheritance)
11049 {
11050         vm_map_size_t           size;
11051         vm_map_size_t           adjustment;
11052         vm_map_offset_t         start;
11053         vm_object_offset_t      vm_copy_start;
11054         vm_map_entry_t          last;
11055         vm_map_entry_t          entry;
11056         vm_map_entry_t          hole_entry;
11057         vm_map_copy_t           original_copy;
11058
11059         /*
11060          *      Check for null copy object.
11061          */
11062
11063         if (copy == VM_MAP_COPY_NULL) {
11064                 *dst_addr = 0;
11065                 return KERN_SUCCESS;
11066         }
11067
11068         /*
11069          * Assert that the vm_map_copy is coming from the right
11070          * zone and hasn't been forged
11071          */
11072         vm_map_copy_require(copy);
11073
11074         if (copy->size != copy_size) {
11075                 *dst_addr = 0;
11076                 return KERN_FAILURE;
11077         }
11078
11079         /*
11080          *      Check for special copy object, created
11081          *      by vm_map_copyin_object.
11082          */
11083
11084         if (copy->type == VM_MAP_COPY_OBJECT) {
11085                 vm_object_t             object = copy->cpy_object;
11086                 kern_return_t           kr;
11087                 vm_object_offset_t      offset;
11088
11089                 offset = vm_object_trunc_page(copy->offset);
11090                 size = vm_map_round_page((copy_size +
11091                     (vm_map_size_t)(copy->offset -
11092                     offset)),
11093                     VM_MAP_PAGE_MASK(dst_map));
11094                 *dst_addr = 0;
11095                 kr = vm_map_enter(dst_map, dst_addr, size,
11096                     (vm_map_offset_t) 0, VM_FLAGS_ANYWHERE,
11097                     VM_MAP_KERNEL_FLAGS_NONE,
11098                     VM_KERN_MEMORY_NONE,
11099                     object, offset, FALSE,
11100                     VM_PROT_DEFAULT, VM_PROT_ALL,
11101                     VM_INHERIT_DEFAULT);
11102                 if (kr != KERN_SUCCESS) {
11103                         return kr;
11104                 }
11105                 /* Account for non-pagealigned copy object */
11106                 *dst_addr += (vm_map_offset_t)(copy->offset - offset);
11107                 if (consume_on_success) {
11108                         zfree(vm_map_copy_zone, copy);
11109                 }
11110                 return KERN_SUCCESS;
11111         }
11112
11113         /*
11114          *      Check for special kernel buffer allocated
11115          *      by new_ipc_kmsg_copyin.
11116          */
11117
11118         if (copy->type == VM_MAP_COPY_KERNEL_BUFFER) {
11119                 return vm_map_copyout_kernel_buffer(dst_map, dst_addr,
11120                            copy, copy_size, FALSE,
11121                            consume_on_success);
11122         }
11123
11124         original_copy = copy;
11125         if (copy->cpy_hdr.page_shift != VM_MAP_PAGE_SHIFT(dst_map)) {
11126                 kern_return_t kr;
11127                 vm_map_copy_t target_copy;
11128                 vm_map_offset_t overmap_start, overmap_end, trimmed_start;
11129
11130                 target_copy = VM_MAP_COPY_NULL;
11131                 DEBUG4K_ADJUST("adjusting...\n");
11132                 kr = vm_map_copy_adjust_to_target(
11133                         copy,
11134                         0, /* offset */
11135                         copy->size, /* size */
11136                         dst_map,
11137                         TRUE, /* copy */
11138                         &target_copy,
11139                         &overmap_start,
11140                         &overmap_end,
11141                         &trimmed_start);
11142                 if (kr != KERN_SUCCESS) {
11143                         DEBUG4K_COPY("adjust failed 0x%x\n", kr);
11144                         return kr;
11145                 }
11146                 DEBUG4K_COPY("copy %p (%d 0x%llx 0x%llx) dst_map %p (%d) target_copy %p (%d 0x%llx 0x%llx) overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx\n", copy, copy->cpy_hdr.page_shift, copy->offset, (uint64_t)copy->size, dst_map, VM_MAP_PAGE_SHIFT(dst_map), target_copy, target_copy->cpy_hdr.page_shift, target_copy->offset, (uint64_t)target_copy->size, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start);
11147                 if (target_copy != copy) {
11148                         copy = target_copy;
11149                 }
11150                 copy_size = copy->size;
11151         }
11152
11153         /*
11154          *      Find space for the data
11155          */
11156
11157         vm_copy_start = vm_map_trunc_page((vm_map_size_t)copy->offset,
11158             VM_MAP_COPY_PAGE_MASK(copy));
11159         size = vm_map_round_page((vm_map_size_t)copy->offset + copy_size,
11160             VM_MAP_COPY_PAGE_MASK(copy))
11161             - vm_copy_start;
11162
11163
11164 StartAgain:;
11165
11166         vm_map_lock(dst_map);
11167         if (dst_map->disable_vmentry_reuse == TRUE) {
11168                 VM_MAP_HIGHEST_ENTRY(dst_map, entry, start);
11169                 last = entry;
11170         } else {
11171                 if (dst_map->holelistenabled) {
11172                         hole_entry = CAST_TO_VM_MAP_ENTRY(dst_map->holes_list);
11173
11174                         if (hole_entry == NULL) {
11175                                 /*
11176                                  * No more space in the map?
11177                                  */
11178                                 vm_map_unlock(dst_map);
11179                                 return KERN_NO_SPACE;
11180                         }
11181
11182                         last = hole_entry;
11183                         start = last->vme_start;
11184                 } else {
11185                         assert(first_free_is_valid(dst_map));
11186                         start = ((last = dst_map->first_free) == vm_map_to_entry(dst_map)) ?
11187                             vm_map_min(dst_map) : last->vme_end;
11188                 }
11189                 start = vm_map_round_page(start,
11190                     VM_MAP_PAGE_MASK(dst_map));
11191         }
11192
11193         while (TRUE) {
11194                 vm_map_entry_t  next = last->vme_next;
11195                 vm_map_offset_t end = start + size;
11196
11197                 if ((end > dst_map->max_offset) || (end < start)) {
11198                         if (dst_map->wait_for_space) {
11199                                 if (size <= (dst_map->max_offset - dst_map->min_offset)) {
11200                                         assert_wait((event_t) dst_map,
11201                                             THREAD_INTERRUPTIBLE);
11202                                         vm_map_unlock(dst_map);
11203                                         thread_block(THREAD_CONTINUE_NULL);
11204                                         goto StartAgain;
11205                                 }
11206                         }
11207                         vm_map_unlock(dst_map);
11208                         return KERN_NO_SPACE;
11209                 }
11210
11211                 if (dst_map->holelistenabled) {
11212                         if (last->vme_end >= end) {
11213                                 break;
11214                         }
11215                 } else {
11216                         /*
11217                          *      If there are no more entries, we must win.
11218                          *
11219                          *      OR
11220                          *
11221                          *      If there is another entry, it must be
11222                          *      after the end of the potential new region.
11223                          */
11224
11225                         if (next == vm_map_to_entry(dst_map)) {
11226                                 break;
11227                         }
11228
11229                         if (next->vme_start >= end) {
11230                                 break;
11231                         }
11232                 }
11233
11234                 last = next;
11235
11236                 if (dst_map->holelistenabled) {
11237                         if (last == CAST_TO_VM_MAP_ENTRY(dst_map->holes_list)) {
11238                                 /*
11239                                  * Wrapped around
11240                                  */
11241                                 vm_map_unlock(dst_map);
11242                                 return KERN_NO_SPACE;
11243                         }
11244                         start = last->vme_start;
11245                 } else {
11246                         start = last->vme_end;
11247                 }
11248                 start = vm_map_round_page(start,
11249                     VM_MAP_PAGE_MASK(dst_map));
11250         }
11251
11252         if (dst_map->holelistenabled) {
11253                 if (vm_map_lookup_entry(dst_map, last->vme_start, &last)) {
11254                         panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", last, (unsigned long long)last->vme_start);
11255                 }
11256         }
11257
11258
11259         adjustment = start - vm_copy_start;
11260         if (!consume_on_success) {
11261                 /*
11262                  * We're not allowed to consume "copy", so we'll have to
11263                  * copy its map entries into the destination map below.
11264                  * No need to re-allocate map entries from the correct
11265                  * (pageable or not) zone, since we'll get new map entries
11266                  * during the transfer.
11267                  * We'll also adjust the map entries's "start" and "end"
11268                  * during the transfer, to keep "copy"'s entries consistent
11269                  * with its "offset".
11270                  */
11271                 goto after_adjustments;
11272         }
11273
11274         /*
11275          *      Since we're going to just drop the map
11276          *      entries from the copy into the destination
11277          *      map, they must come from the same pool.
11278          */
11279
11280         if (copy->cpy_hdr.entries_pageable != dst_map->hdr.entries_pageable) {
11281                 /*
11282                  * Mismatches occur when dealing with the default
11283                  * pager.
11284                  */
11285                 zone_t          old_zone;
11286                 vm_map_entry_t  next, new;
11287
11288                 /*
11289                  * Find the zone that the copies were allocated from
11290                  */
11291
11292                 entry = vm_map_copy_first_entry(copy);
11293
11294                 /*
11295                  * Reinitialize the copy so that vm_map_copy_entry_link
11296                  * will work.
11297                  */
11298                 vm_map_store_copy_reset(copy, entry);
11299                 copy->cpy_hdr.entries_pageable = dst_map->hdr.entries_pageable;
11300
11301                 /*
11302                  * Copy each entry.
11303                  */
11304                 while (entry != vm_map_copy_to_entry(copy)) {
11305                         new = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11306                         vm_map_entry_copy_full(new, entry);
11307                         new->vme_no_copy_on_read = FALSE;
11308                         assert(!new->iokit_acct);
11309                         if (new->is_sub_map) {
11310                                 /* clr address space specifics */
11311                                 new->use_pmap = FALSE;
11312                         }
11313                         vm_map_copy_entry_link(copy,
11314                             vm_map_copy_last_entry(copy),
11315                             new);
11316                         next = entry->vme_next;
11317                         old_zone = entry->from_reserved_zone ? vm_map_entry_reserved_zone : vm_map_entry_zone;
11318                         zfree(old_zone, entry);
11319                         entry = next;
11320                 }
11321         }
11322
11323         /*
11324          *      Adjust the addresses in the copy chain, and
11325          *      reset the region attributes.
11326          */
11327
11328         for (entry = vm_map_copy_first_entry(copy);
11329             entry != vm_map_copy_to_entry(copy);
11330             entry = entry->vme_next) {
11331                 if (VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT) {
11332                         /*
11333                          * We're injecting this copy entry into a map that
11334                          * has the standard page alignment, so clear
11335                          * "map_aligned" (which might have been inherited
11336                          * from the original map entry).
11337                          */
11338                         entry->map_aligned = FALSE;
11339                 }
11340
11341                 entry->vme_start += adjustment;
11342                 entry->vme_end += adjustment;
11343
11344                 if (entry->map_aligned) {
11345                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start,
11346                             VM_MAP_PAGE_MASK(dst_map)));
11347                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end,
11348                             VM_MAP_PAGE_MASK(dst_map)));
11349                 }
11350
11351                 entry->inheritance = VM_INHERIT_DEFAULT;
11352                 entry->protection = VM_PROT_DEFAULT;
11353                 entry->max_protection = VM_PROT_ALL;
11354                 entry->behavior = VM_BEHAVIOR_DEFAULT;
11355
11356                 /*
11357                  * If the entry is now wired,
11358                  * map the pages into the destination map.
11359                  */
11360                 if (entry->wired_count != 0) {
11361                         vm_map_offset_t va;
11362                         vm_object_offset_t       offset;
11363                         vm_object_t object;
11364                         vm_prot_t prot;
11365                         int     type_of_fault;
11366
11367                         /* TODO4K would need to use actual page size */
11368                         assert(VM_MAP_PAGE_SHIFT(dst_map) == PAGE_SHIFT);
11369
11370                         object = VME_OBJECT(entry);
11371                         offset = VME_OFFSET(entry);
11372                         va = entry->vme_start;
11373
11374                         pmap_pageable(dst_map->pmap,
11375                             entry->vme_start,
11376                             entry->vme_end,
11377                             TRUE);
11378
11379                         while (va < entry->vme_end) {
11380                                 vm_page_t       m;
11381                                 struct vm_object_fault_info fault_info = {};
11382
11383                                 /*
11384                                  * Look up the page in the object.
11385                                  * Assert that the page will be found in the
11386                                  * top object:
11387                                  * either
11388                                  *      the object was newly created by
11389                                  *      vm_object_copy_slowly, and has
11390                                  *      copies of all of the pages from
11391                                  *      the source object
11392                                  * or
11393                                  *      the object was moved from the old
11394                                  *      map entry; because the old map
11395                                  *      entry was wired, all of the pages
11396                                  *      were in the top-level object.
11397                                  *      (XXX not true if we wire pages for
11398                                  *       reading)
11399                                  */
11400                                 vm_object_lock(object);
11401
11402                                 m = vm_page_lookup(object, offset);
11403                                 if (m == VM_PAGE_NULL || !VM_PAGE_WIRED(m) ||
11404                                     m->vmp_absent) {
11405                                         panic("vm_map_copyout: wiring %p", m);
11406                                 }
11407
11408                                 prot = entry->protection;
11409
11410                                 if (override_nx(dst_map, VME_ALIAS(entry)) &&
11411                                     prot) {
11412                                         prot |= VM_PROT_EXECUTE;
11413                                 }
11414
11415                                 type_of_fault = DBG_CACHE_HIT_FAULT;
11416
11417                                 fault_info.user_tag = VME_ALIAS(entry);
11418                                 fault_info.pmap_options = 0;
11419                                 if (entry->iokit_acct ||
11420                                     (!entry->is_sub_map && !entry->use_pmap)) {
11421                                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
11422                                 }
11423
11424                                 vm_fault_enter(m,
11425                                     dst_map->pmap,
11426                                     va,
11427                                     PAGE_SIZE, 0,
11428                                     prot,
11429                                     prot,
11430                                     VM_PAGE_WIRED(m),
11431                                     FALSE,            /* change_wiring */
11432                                     VM_KERN_MEMORY_NONE,            /* tag - not wiring */
11433                                     &fault_info,
11434                                     NULL,             /* need_retry */
11435                                     &type_of_fault);
11436
11437                                 vm_object_unlock(object);
11438
11439                                 offset += PAGE_SIZE_64;
11440                                 va += PAGE_SIZE;
11441                         }
11442                 }
11443         }
11444
11445 after_adjustments:
11446
11447         /*
11448          *      Correct the page alignment for the result
11449          */
11450
11451         *dst_addr = start + (copy->offset - vm_copy_start);
11452
11453 #if KASAN
11454         kasan_notify_address(*dst_addr, size);
11455 #endif
11456
11457         /*
11458          *      Update the hints and the map size
11459          */
11460
11461         if (consume_on_success) {
11462                 SAVE_HINT_MAP_WRITE(dst_map, vm_map_copy_last_entry(copy));
11463         } else {
11464                 SAVE_HINT_MAP_WRITE(dst_map, last);
11465         }
11466
11467         dst_map->size += size;
11468
11469         /*
11470          *      Link in the copy
11471          */
11472
11473         if (consume_on_success) {
11474                 vm_map_copy_insert(dst_map, last, copy);
11475                 if (copy != original_copy) {
11476                         vm_map_copy_discard(original_copy);
11477                         original_copy = VM_MAP_COPY_NULL;
11478                 }
11479         } else {
11480                 vm_map_copy_remap(dst_map, last, copy, adjustment,
11481                     cur_protection, max_protection,
11482                     inheritance);
11483                 if (copy != original_copy && original_copy != VM_MAP_COPY_NULL) {
11484                         vm_map_copy_discard(copy);
11485                         copy = original_copy;
11486                 }
11487         }
11488
11489
11490         vm_map_unlock(dst_map);
11491
11492         /*
11493          * XXX  If wiring_required, call vm_map_pageable
11494          */
11495
11496         return KERN_SUCCESS;
11497 }
11498
11499 /*
11500  *      Routine:        vm_map_copyin
11501  *
11502  *      Description:
11503  *              see vm_map_copyin_common.  Exported via Unsupported.exports.
11504  *
11505  */
11506
11507 #undef vm_map_copyin
11508
11509 kern_return_t
11510 vm_map_copyin(
11511         vm_map_t                        src_map,
11512         vm_map_address_t        src_addr,
11513         vm_map_size_t           len,
11514         boolean_t                       src_destroy,
11515         vm_map_copy_t           *copy_result)   /* OUT */
11516 {
11517         return vm_map_copyin_common(src_map, src_addr, len, src_destroy,
11518                    FALSE, copy_result, FALSE);
11519 }
11520
11521 /*
11522  *      Routine:        vm_map_copyin_common
11523  *
11524  *      Description:
11525  *              Copy the specified region (src_addr, len) from the
11526  *              source address space (src_map), possibly removing
11527  *              the region from the source address space (src_destroy).
11528  *
11529  *      Returns:
11530  *              A vm_map_copy_t object (copy_result), suitable for
11531  *              insertion into another address space (using vm_map_copyout),
11532  *              copying over another address space region (using
11533  *              vm_map_copy_overwrite).  If the copy is unused, it
11534  *              should be destroyed (using vm_map_copy_discard).
11535  *
11536  *      In/out conditions:
11537  *              The source map should not be locked on entry.
11538  */
11539
11540 typedef struct submap_map {
11541         vm_map_t        parent_map;
11542         vm_map_offset_t base_start;
11543         vm_map_offset_t base_end;
11544         vm_map_size_t   base_len;
11545         struct submap_map *next;
11546 } submap_map_t;
11547
11548 kern_return_t
11549 vm_map_copyin_common(
11550         vm_map_t        src_map,
11551         vm_map_address_t src_addr,
11552         vm_map_size_t   len,
11553         boolean_t       src_destroy,
11554         __unused boolean_t      src_volatile,
11555         vm_map_copy_t   *copy_result,   /* OUT */
11556         boolean_t       use_maxprot)
11557 {
11558         int flags;
11559
11560         flags = 0;
11561         if (src_destroy) {
11562                 flags |= VM_MAP_COPYIN_SRC_DESTROY;
11563         }
11564         if (use_maxprot) {
11565                 flags |= VM_MAP_COPYIN_USE_MAXPROT;
11566         }
11567         return vm_map_copyin_internal(src_map,
11568                    src_addr,
11569                    len,
11570                    flags,
11571                    copy_result);
11572 }
11573 kern_return_t
11574 vm_map_copyin_internal(
11575         vm_map_t        src_map,
11576         vm_map_address_t src_addr,
11577         vm_map_size_t   len,
11578         int             flags,
11579         vm_map_copy_t   *copy_result)   /* OUT */
11580 {
11581         vm_map_entry_t  tmp_entry;      /* Result of last map lookup --
11582                                          * in multi-level lookup, this
11583                                          * entry contains the actual
11584                                          * vm_object/offset.
11585                                          */
11586         vm_map_entry_t  new_entry = VM_MAP_ENTRY_NULL;  /* Map entry for copy */
11587
11588         vm_map_offset_t src_start;      /* Start of current entry --
11589                                          * where copy is taking place now
11590                                          */
11591         vm_map_offset_t src_end;        /* End of entire region to be
11592                                          * copied */
11593         vm_map_offset_t src_base;
11594         vm_map_t        base_map = src_map;
11595         boolean_t       map_share = FALSE;
11596         submap_map_t    *parent_maps = NULL;
11597
11598         vm_map_copy_t   copy;           /* Resulting copy */
11599         vm_map_address_t copy_addr;
11600         vm_map_size_t   copy_size;
11601         boolean_t       src_destroy;
11602         boolean_t       use_maxprot;
11603         boolean_t       preserve_purgeable;
11604         boolean_t       entry_was_shared;
11605         vm_map_entry_t  saved_src_entry;
11606
11607         if (flags & ~VM_MAP_COPYIN_ALL_FLAGS) {
11608                 return KERN_INVALID_ARGUMENT;
11609         }
11610
11611         src_destroy = (flags & VM_MAP_COPYIN_SRC_DESTROY) ? TRUE : FALSE;
11612         use_maxprot = (flags & VM_MAP_COPYIN_USE_MAXPROT) ? TRUE : FALSE;
11613         preserve_purgeable =
11614             (flags & VM_MAP_COPYIN_PRESERVE_PURGEABLE) ? TRUE : FALSE;
11615
11616         /*
11617          *      Check for copies of zero bytes.
11618          */
11619
11620         if (len == 0) {
11621                 *copy_result = VM_MAP_COPY_NULL;
11622                 return KERN_SUCCESS;
11623         }
11624
11625         /*
11626          *      Check that the end address doesn't overflow
11627          */
11628         src_end = src_addr + len;
11629         if (src_end < src_addr) {
11630                 return KERN_INVALID_ADDRESS;
11631         }
11632
11633         /*
11634          *      Compute (page aligned) start and end of region
11635          */
11636         src_start = vm_map_trunc_page(src_addr,
11637             VM_MAP_PAGE_MASK(src_map));
11638         src_end = vm_map_round_page(src_end,
11639             VM_MAP_PAGE_MASK(src_map));
11640
11641         /*
11642          * If the copy is sufficiently small, use a kernel buffer instead
11643          * of making a virtual copy.  The theory being that the cost of
11644          * setting up VM (and taking C-O-W faults) dominates the copy costs
11645          * for small regions.
11646          */
11647         if ((len < msg_ool_size_small) &&
11648             !use_maxprot &&
11649             !preserve_purgeable &&
11650             !(flags & VM_MAP_COPYIN_ENTRY_LIST) &&
11651             /*
11652              * Since the "msg_ool_size_small" threshold was increased and
11653              * vm_map_copyin_kernel_buffer() doesn't handle accesses beyond the
11654              * address space limits, we revert to doing a virtual copy if the
11655              * copied range goes beyond those limits.  Otherwise, mach_vm_read()
11656              * of the commpage would now fail when it used to work.
11657              */
11658             (src_start >= vm_map_min(src_map) &&
11659             src_start < vm_map_max(src_map) &&
11660             src_end >= vm_map_min(src_map) &&
11661             src_end < vm_map_max(src_map))) {
11662                 return vm_map_copyin_kernel_buffer(src_map, src_addr, len,
11663                            src_destroy, copy_result);
11664         }
11665
11666         /*
11667          *      Allocate a header element for the list.
11668          *
11669          *      Use the start and end in the header to
11670          *      remember the endpoints prior to rounding.
11671          */
11672
11673         copy = vm_map_copy_allocate();
11674         copy->type = VM_MAP_COPY_ENTRY_LIST;
11675         copy->cpy_hdr.entries_pageable = TRUE;
11676         copy->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(src_map);
11677
11678         vm_map_store_init( &(copy->cpy_hdr));
11679
11680         copy->offset = src_addr;
11681         copy->size = len;
11682
11683         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11684
11685 #define RETURN(x)                                               \
11686         MACRO_BEGIN                                             \
11687         vm_map_unlock(src_map);                                 \
11688         if(src_map != base_map)                                 \
11689                 vm_map_deallocate(src_map);                     \
11690         if (new_entry != VM_MAP_ENTRY_NULL)                     \
11691                 vm_map_copy_entry_dispose(copy,new_entry);      \
11692         vm_map_copy_discard(copy);                              \
11693         {                                                       \
11694                 submap_map_t    *_ptr;                          \
11695                                                                 \
11696                 for(_ptr = parent_maps; _ptr != NULL; _ptr = parent_maps) { \
11697                         parent_maps=parent_maps->next;          \
11698                         if (_ptr->parent_map != base_map)       \
11699                                 vm_map_deallocate(_ptr->parent_map);    \
11700                         kfree(_ptr, sizeof(submap_map_t));      \
11701                 }                                               \
11702         }                                                       \
11703         MACRO_RETURN(x);                                        \
11704         MACRO_END
11705
11706         /*
11707          *      Find the beginning of the region.
11708          */
11709
11710         vm_map_lock(src_map);
11711
11712         /*
11713          * Lookup the original "src_addr" rather than the truncated
11714          * "src_start", in case "src_start" falls in a non-map-aligned
11715          * map entry *before* the map entry that contains "src_addr"...
11716          */
11717         if (!vm_map_lookup_entry(src_map, src_addr, &tmp_entry)) {
11718                 RETURN(KERN_INVALID_ADDRESS);
11719         }
11720         if (!tmp_entry->is_sub_map) {
11721                 /*
11722                  * ... but clip to the map-rounded "src_start" rather than
11723                  * "src_addr" to preserve map-alignment.  We'll adjust the
11724                  * first copy entry at the end, if needed.
11725                  */
11726                 vm_map_clip_start(src_map, tmp_entry, src_start);
11727         }
11728         if (src_start < tmp_entry->vme_start) {
11729                 /*
11730                  * Move "src_start" up to the start of the
11731                  * first map entry to copy.
11732                  */
11733                 src_start = tmp_entry->vme_start;
11734         }
11735         /* set for later submap fix-up */
11736         copy_addr = src_start;
11737
11738         /*
11739          *      Go through entries until we get to the end.
11740          */
11741
11742         while (TRUE) {
11743                 vm_map_entry_t  src_entry = tmp_entry;  /* Top-level entry */
11744                 vm_map_size_t   src_size;               /* Size of source
11745                                                          * map entry (in both
11746                                                          * maps)
11747                                                          */
11748
11749                 vm_object_t             src_object;     /* Object to copy */
11750                 vm_object_offset_t      src_offset;
11751
11752                 boolean_t       src_needs_copy;         /* Should source map
11753                                                          * be made read-only
11754                                                          * for copy-on-write?
11755                                                          */
11756
11757                 boolean_t       new_entry_needs_copy;   /* Will new entry be COW? */
11758
11759                 boolean_t       was_wired;              /* Was source wired? */
11760                 vm_map_version_t version;               /* Version before locks
11761                                                          * dropped to make copy
11762                                                          */
11763                 kern_return_t   result;                 /* Return value from
11764                                                          * copy_strategically.
11765                                                          */
11766                 while (tmp_entry->is_sub_map) {
11767                         vm_map_size_t submap_len;
11768                         submap_map_t *ptr;
11769
11770                         ptr = (submap_map_t *)kalloc(sizeof(submap_map_t));
11771                         ptr->next = parent_maps;
11772                         parent_maps = ptr;
11773                         ptr->parent_map = src_map;
11774                         ptr->base_start = src_start;
11775                         ptr->base_end = src_end;
11776                         submap_len = tmp_entry->vme_end - src_start;
11777                         if (submap_len > (src_end - src_start)) {
11778                                 submap_len = src_end - src_start;
11779                         }
11780                         ptr->base_len = submap_len;
11781
11782                         src_start -= tmp_entry->vme_start;
11783                         src_start += VME_OFFSET(tmp_entry);
11784                         src_end = src_start + submap_len;
11785                         src_map = VME_SUBMAP(tmp_entry);
11786                         vm_map_lock(src_map);
11787                         /* keep an outstanding reference for all maps in */
11788                         /* the parents tree except the base map */
11789                         vm_map_reference(src_map);
11790                         vm_map_unlock(ptr->parent_map);
11791                         if (!vm_map_lookup_entry(
11792                                     src_map, src_start, &tmp_entry)) {
11793                                 RETURN(KERN_INVALID_ADDRESS);
11794                         }
11795                         map_share = TRUE;
11796                         if (!tmp_entry->is_sub_map) {
11797                                 vm_map_clip_start(src_map, tmp_entry, src_start);
11798                         }
11799                         src_entry = tmp_entry;
11800                 }
11801                 /* we are now in the lowest level submap... */
11802
11803                 if ((VME_OBJECT(tmp_entry) != VM_OBJECT_NULL) &&
11804                     (VME_OBJECT(tmp_entry)->phys_contiguous)) {
11805                         /* This is not, supported for now.In future */
11806                         /* we will need to detect the phys_contig   */
11807                         /* condition and then upgrade copy_slowly   */
11808                         /* to do physical copy from the device mem  */
11809                         /* based object. We can piggy-back off of   */
11810                         /* the was wired boolean to set-up the      */
11811                         /* proper handling */
11812                         RETURN(KERN_PROTECTION_FAILURE);
11813                 }
11814                 /*
11815                  *      Create a new address map entry to hold the result.
11816                  *      Fill in the fields from the appropriate source entries.
11817                  *      We must unlock the source map to do this if we need
11818                  *      to allocate a map entry.
11819                  */
11820                 if (new_entry == VM_MAP_ENTRY_NULL) {
11821                         version.main_timestamp = src_map->timestamp;
11822                         vm_map_unlock(src_map);
11823
11824                         new_entry = vm_map_copy_entry_create(copy, !copy->cpy_hdr.entries_pageable);
11825
11826                         vm_map_lock(src_map);
11827                         if ((version.main_timestamp + 1) != src_map->timestamp) {
11828                                 if (!vm_map_lookup_entry(src_map, src_start,
11829                                     &tmp_entry)) {
11830                                         RETURN(KERN_INVALID_ADDRESS);
11831                                 }
11832                                 if (!tmp_entry->is_sub_map) {
11833                                         vm_map_clip_start(src_map, tmp_entry, src_start);
11834                                 }
11835                                 continue; /* restart w/ new tmp_entry */
11836                         }
11837                 }
11838
11839                 /*
11840                  *      Verify that the region can be read.
11841                  */
11842                 if (((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE &&
11843                     !use_maxprot) ||
11844                     (src_entry->max_protection & VM_PROT_READ) == 0) {
11845                         RETURN(KERN_PROTECTION_FAILURE);
11846                 }
11847
11848                 /*
11849                  *      Clip against the endpoints of the entire region.
11850                  */
11851
11852                 vm_map_clip_end(src_map, src_entry, src_end);
11853
11854                 src_size = src_entry->vme_end - src_start;
11855                 src_object = VME_OBJECT(src_entry);
11856                 src_offset = VME_OFFSET(src_entry);
11857                 was_wired = (src_entry->wired_count != 0);
11858
11859                 vm_map_entry_copy(src_map, new_entry, src_entry);
11860                 if (new_entry->is_sub_map) {
11861                         /* clr address space specifics */
11862                         new_entry->use_pmap = FALSE;
11863                 } else {
11864                         /*
11865                          * We're dealing with a copy-on-write operation,
11866                          * so the resulting mapping should not inherit the
11867                          * original mapping's accounting settings.
11868                          * "iokit_acct" should have been cleared in
11869                          * vm_map_entry_copy().
11870                          * "use_pmap" should be reset to its default (TRUE)
11871                          * so that the new mapping gets accounted for in
11872                          * the task's memory footprint.
11873                          */
11874                         assert(!new_entry->iokit_acct);
11875                         new_entry->use_pmap = TRUE;
11876                 }
11877
11878                 /*
11879                  *      Attempt non-blocking copy-on-write optimizations.
11880                  */
11881
11882                 /*
11883                  * If we are destroying the source, and the object
11884                  * is internal, we could move the object reference
11885                  * from the source to the copy.  The copy is
11886                  * copy-on-write only if the source is.
11887                  * We make another reference to the object, because
11888                  * destroying the source entry will deallocate it.
11889                  *
11890                  * This memory transfer has to be atomic, (to prevent
11891                  * the VM object from being shared or copied while
11892                  * it's being moved here), so we could only do this
11893                  * if we won't have to unlock the VM map until the
11894                  * original mapping has been fully removed.
11895                  */
11896
11897 RestartCopy:
11898                 if ((src_object == VM_OBJECT_NULL ||
11899                     (!was_wired && !map_share && !tmp_entry->is_shared
11900                     && !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT))) &&
11901                     vm_object_copy_quickly(
11902                             VME_OBJECT_PTR(new_entry),
11903                             src_offset,
11904                             src_size,
11905                             &src_needs_copy,
11906                             &new_entry_needs_copy)) {
11907                         new_entry->needs_copy = new_entry_needs_copy;
11908
11909                         /*
11910                          *      Handle copy-on-write obligations
11911                          */
11912
11913                         if (src_needs_copy && !tmp_entry->needs_copy) {
11914                                 vm_prot_t prot;
11915
11916                                 prot = src_entry->protection & ~VM_PROT_WRITE;
11917
11918                                 if (override_nx(src_map, VME_ALIAS(src_entry))
11919                                     && prot) {
11920                                         prot |= VM_PROT_EXECUTE;
11921                                 }
11922
11923                                 vm_object_pmap_protect(
11924                                         src_object,
11925                                         src_offset,
11926                                         src_size,
11927                                         (src_entry->is_shared ?
11928                                         PMAP_NULL
11929                                         : src_map->pmap),
11930                                         VM_MAP_PAGE_SIZE(src_map),
11931                                         src_entry->vme_start,
11932                                         prot);
11933
11934                                 assert(tmp_entry->wired_count == 0);
11935                                 tmp_entry->needs_copy = TRUE;
11936                         }
11937
11938                         /*
11939                          *      The map has never been unlocked, so it's safe
11940                          *      to move to the next entry rather than doing
11941                          *      another lookup.
11942                          */
11943
11944                         goto CopySuccessful;
11945                 }
11946
11947                 entry_was_shared = tmp_entry->is_shared;
11948
11949                 /*
11950                  *      Take an object reference, so that we may
11951                  *      release the map lock(s).
11952                  */
11953
11954                 assert(src_object != VM_OBJECT_NULL);
11955                 vm_object_reference(src_object);
11956
11957                 /*
11958                  *      Record the timestamp for later verification.
11959                  *      Unlock the map.
11960                  */
11961
11962                 version.main_timestamp = src_map->timestamp;
11963                 vm_map_unlock(src_map); /* Increments timestamp once! */
11964                 saved_src_entry = src_entry;
11965                 tmp_entry = VM_MAP_ENTRY_NULL;
11966                 src_entry = VM_MAP_ENTRY_NULL;
11967
11968                 /*
11969                  *      Perform the copy
11970                  */
11971
11972                 if (was_wired ||
11973                     (debug4k_no_cow_copyin &&
11974                     VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT)) {
11975 CopySlowly:
11976                         vm_object_lock(src_object);
11977                         result = vm_object_copy_slowly(
11978                                 src_object,
11979                                 src_offset,
11980                                 src_size,
11981                                 THREAD_UNINT,
11982                                 VME_OBJECT_PTR(new_entry));
11983                         VME_OFFSET_SET(new_entry,
11984                             src_offset - vm_object_trunc_page(src_offset));
11985                         new_entry->needs_copy = FALSE;
11986                 } else if (src_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC &&
11987                     (entry_was_shared || map_share)) {
11988                         vm_object_t new_object;
11989
11990                         vm_object_lock_shared(src_object);
11991                         new_object = vm_object_copy_delayed(
11992                                 src_object,
11993                                 src_offset,
11994                                 src_size,
11995                                 TRUE);
11996                         if (new_object == VM_OBJECT_NULL) {
11997                                 goto CopySlowly;
11998                         }
11999
12000                         VME_OBJECT_SET(new_entry, new_object);
12001                         assert(new_entry->wired_count == 0);
12002                         new_entry->needs_copy = TRUE;
12003                         assert(!new_entry->iokit_acct);
12004                         assert(new_object->purgable == VM_PURGABLE_DENY);
12005                         assertf(new_entry->use_pmap, "src_map %p new_entry %p\n", src_map, new_entry);
12006                         result = KERN_SUCCESS;
12007                 } else {
12008                         vm_object_offset_t new_offset;
12009                         new_offset = VME_OFFSET(new_entry);
12010                         result = vm_object_copy_strategically(src_object,
12011                             src_offset,
12012                             src_size,
12013                             VME_OBJECT_PTR(new_entry),
12014                             &new_offset,
12015                             &new_entry_needs_copy);
12016                         if (new_offset != VME_OFFSET(new_entry)) {
12017                                 VME_OFFSET_SET(new_entry, new_offset);
12018                         }
12019
12020                         new_entry->needs_copy = new_entry_needs_copy;
12021                 }
12022
12023                 if (result == KERN_SUCCESS &&
12024                     ((preserve_purgeable &&
12025                     src_object->purgable != VM_PURGABLE_DENY) ||
12026                     new_entry->used_for_jit)) {
12027                         /*
12028                          * Purgeable objects should be COPY_NONE, true share;
12029                          * this should be propogated to the copy.
12030                          *
12031                          * Also force mappings the pmap specially protects to
12032                          * be COPY_NONE; trying to COW these mappings would
12033                          * change the effective protections, which could have
12034                          * side effects if the pmap layer relies on the
12035                          * specified protections.
12036                          */
12037
12038                         vm_object_t     new_object;
12039
12040                         new_object = VME_OBJECT(new_entry);
12041                         assert(new_object != src_object);
12042                         vm_object_lock(new_object);
12043                         assert(new_object->ref_count == 1);
12044                         assert(new_object->shadow == VM_OBJECT_NULL);
12045                         assert(new_object->copy == VM_OBJECT_NULL);
12046                         assert(new_object->vo_owner == NULL);
12047
12048                         new_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
12049
12050                         if (preserve_purgeable &&
12051                             src_object->purgable != VM_PURGABLE_DENY) {
12052                                 new_object->true_share = TRUE;
12053
12054                                 /* start as non-volatile with no owner... */
12055                                 new_object->purgable = VM_PURGABLE_NONVOLATILE;
12056                                 vm_purgeable_nonvolatile_enqueue(new_object, NULL);
12057                                 /* ... and move to src_object's purgeable state */
12058                                 if (src_object->purgable != VM_PURGABLE_NONVOLATILE) {
12059                                         int state;
12060                                         state = src_object->purgable;
12061                                         vm_object_purgable_control(
12062                                                 new_object,
12063                                                 VM_PURGABLE_SET_STATE_FROM_KERNEL,
12064                                                 &state);
12065                                 }
12066                                 /* no pmap accounting for purgeable objects */
12067                                 new_entry->use_pmap = FALSE;
12068                         }
12069
12070                         vm_object_unlock(new_object);
12071                         new_object = VM_OBJECT_NULL;
12072                 }
12073
12074                 if (result != KERN_SUCCESS &&
12075                     result != KERN_MEMORY_RESTART_COPY) {
12076                         vm_map_lock(src_map);
12077                         RETURN(result);
12078                 }
12079
12080                 /*
12081                  *      Throw away the extra reference
12082                  */
12083
12084                 vm_object_deallocate(src_object);
12085
12086                 /*
12087                  *      Verify that the map has not substantially
12088                  *      changed while the copy was being made.
12089                  */
12090
12091                 vm_map_lock(src_map);
12092
12093                 if ((version.main_timestamp + 1) == src_map->timestamp) {
12094                         /* src_map hasn't changed: src_entry is still valid */
12095                         src_entry = saved_src_entry;
12096                         goto VerificationSuccessful;
12097                 }
12098
12099                 /*
12100                  *      Simple version comparison failed.
12101                  *
12102                  *      Retry the lookup and verify that the
12103                  *      same object/offset are still present.
12104                  *
12105                  *      [Note: a memory manager that colludes with
12106                  *      the calling task can detect that we have
12107                  *      cheated.  While the map was unlocked, the
12108                  *      mapping could have been changed and restored.]
12109                  */
12110
12111                 if (!vm_map_lookup_entry(src_map, src_start, &tmp_entry)) {
12112                         if (result != KERN_MEMORY_RESTART_COPY) {
12113                                 vm_object_deallocate(VME_OBJECT(new_entry));
12114                                 VME_OBJECT_SET(new_entry, VM_OBJECT_NULL);
12115                                 /* reset accounting state */
12116                                 new_entry->iokit_acct = FALSE;
12117                                 new_entry->use_pmap = TRUE;
12118                         }
12119                         RETURN(KERN_INVALID_ADDRESS);
12120                 }
12121
12122                 src_entry = tmp_entry;
12123                 vm_map_clip_start(src_map, src_entry, src_start);
12124
12125                 if ((((src_entry->protection & VM_PROT_READ) == VM_PROT_NONE) &&
12126                     !use_maxprot) ||
12127                     ((src_entry->max_protection & VM_PROT_READ) == 0)) {
12128                         goto VerificationFailed;
12129                 }
12130
12131                 if (src_entry->vme_end < new_entry->vme_end) {
12132                         /*
12133                          * This entry might have been shortened
12134                          * (vm_map_clip_end) or been replaced with
12135                          * an entry that ends closer to "src_start"
12136                          * than before.
12137                          * Adjust "new_entry" accordingly; copying
12138                          * less memory would be correct but we also
12139                          * redo the copy (see below) if the new entry
12140                          * no longer points at the same object/offset.
12141                          */
12142                         assert(VM_MAP_PAGE_ALIGNED(src_entry->vme_end,
12143                             VM_MAP_COPY_PAGE_MASK(copy)));
12144                         new_entry->vme_end = src_entry->vme_end;
12145                         src_size = new_entry->vme_end - src_start;
12146                 } else if (src_entry->vme_end > new_entry->vme_end) {
12147                         /*
12148                          * This entry might have been extended
12149                          * (vm_map_entry_simplify() or coalesce)
12150                          * or been replaced with an entry that ends farther
12151                          * from "src_start" than before.
12152                          *
12153                          * We've called vm_object_copy_*() only on
12154                          * the previous <start:end> range, so we can't
12155                          * just extend new_entry.  We have to re-do
12156                          * the copy based on the new entry as if it was
12157                          * pointing at a different object/offset (see
12158                          * "Verification failed" below).
12159                          */
12160                 }
12161
12162                 if ((VME_OBJECT(src_entry) != src_object) ||
12163                     (VME_OFFSET(src_entry) != src_offset) ||
12164                     (src_entry->vme_end > new_entry->vme_end)) {
12165                         /*
12166                          *      Verification failed.
12167                          *
12168                          *      Start over with this top-level entry.
12169                          */
12170
12171 VerificationFailed:     ;
12172
12173                         vm_object_deallocate(VME_OBJECT(new_entry));
12174                         tmp_entry = src_entry;
12175                         continue;
12176                 }
12177
12178                 /*
12179                  *      Verification succeeded.
12180                  */
12181
12182 VerificationSuccessful:;
12183
12184                 if (result == KERN_MEMORY_RESTART_COPY) {
12185                         goto RestartCopy;
12186                 }
12187
12188                 /*
12189                  *      Copy succeeded.
12190                  */
12191
12192 CopySuccessful: ;
12193
12194                 /*
12195                  *      Link in the new copy entry.
12196                  */
12197
12198                 vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy),
12199                     new_entry);
12200
12201                 /*
12202                  *      Determine whether the entire region
12203                  *      has been copied.
12204                  */
12205                 src_base = src_start;
12206                 src_start = new_entry->vme_end;
12207                 new_entry = VM_MAP_ENTRY_NULL;
12208                 while ((src_start >= src_end) && (src_end != 0)) {
12209                         submap_map_t    *ptr;
12210
12211                         if (src_map == base_map) {
12212                                 /* back to the top */
12213                                 break;
12214                         }
12215
12216                         ptr = parent_maps;
12217                         assert(ptr != NULL);
12218                         parent_maps = parent_maps->next;
12219
12220                         /* fix up the damage we did in that submap */
12221                         vm_map_simplify_range(src_map,
12222                             src_base,
12223                             src_end);
12224
12225                         vm_map_unlock(src_map);
12226                         vm_map_deallocate(src_map);
12227                         vm_map_lock(ptr->parent_map);
12228                         src_map = ptr->parent_map;
12229                         src_base = ptr->base_start;
12230                         src_start = ptr->base_start + ptr->base_len;
12231                         src_end = ptr->base_end;
12232                         if (!vm_map_lookup_entry(src_map,
12233                             src_start,
12234                             &tmp_entry) &&
12235                             (src_end > src_start)) {
12236                                 RETURN(KERN_INVALID_ADDRESS);
12237                         }
12238                         kfree(ptr, sizeof(submap_map_t));
12239                         if (parent_maps == NULL) {
12240                                 map_share = FALSE;
12241                         }
12242                         src_entry = tmp_entry->vme_prev;
12243                 }
12244
12245                 if ((VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT) &&
12246                     (src_start >= src_addr + len) &&
12247                     (src_addr + len != 0)) {
12248                         /*
12249                          * Stop copying now, even though we haven't reached
12250                          * "src_end".  We'll adjust the end of the last copy
12251                          * entry at the end, if needed.
12252                          *
12253                          * If src_map's aligment is different from the
12254                          * system's page-alignment, there could be
12255                          * extra non-map-aligned map entries between
12256                          * the original (non-rounded) "src_addr + len"
12257                          * and the rounded "src_end".
12258                          * We do not want to copy those map entries since
12259                          * they're not part of the copied range.
12260                          */
12261                         break;
12262                 }
12263
12264                 if ((src_start >= src_end) && (src_end != 0)) {
12265                         break;
12266                 }
12267
12268                 /*
12269                  *      Verify that there are no gaps in the region
12270                  */
12271
12272                 tmp_entry = src_entry->vme_next;
12273                 if ((tmp_entry->vme_start != src_start) ||
12274                     (tmp_entry == vm_map_to_entry(src_map))) {
12275                         RETURN(KERN_INVALID_ADDRESS);
12276                 }
12277         }
12278
12279         /*
12280          * If the source should be destroyed, do it now, since the
12281          * copy was successful.
12282          */
12283         if (src_destroy) {
12284                 (void) vm_map_delete(
12285                         src_map,
12286                         vm_map_trunc_page(src_addr,
12287                         VM_MAP_PAGE_MASK(src_map)),
12288                         src_end,
12289                         ((src_map == kernel_map) ?
12290                         VM_MAP_REMOVE_KUNWIRE :
12291                         VM_MAP_REMOVE_NO_FLAGS),
12292                         VM_MAP_NULL);
12293         } else {
12294                 /* fix up the damage we did in the base map */
12295                 vm_map_simplify_range(
12296                         src_map,
12297                         vm_map_trunc_page(src_addr,
12298                         VM_MAP_PAGE_MASK(src_map)),
12299                         vm_map_round_page(src_end,
12300                         VM_MAP_PAGE_MASK(src_map)));
12301         }
12302
12303         vm_map_unlock(src_map);
12304         tmp_entry = VM_MAP_ENTRY_NULL;
12305
12306         if (VM_MAP_PAGE_SHIFT(src_map) > PAGE_SHIFT &&
12307             VM_MAP_PAGE_SHIFT(src_map) != VM_MAP_COPY_PAGE_SHIFT(copy)) {
12308                 vm_map_offset_t original_start, original_offset, original_end;
12309
12310                 assert(VM_MAP_COPY_PAGE_MASK(copy) == PAGE_MASK);
12311
12312                 /* adjust alignment of first copy_entry's "vme_start" */
12313                 tmp_entry = vm_map_copy_first_entry(copy);
12314                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12315                         vm_map_offset_t adjustment;
12316
12317                         original_start = tmp_entry->vme_start;
12318                         original_offset = VME_OFFSET(tmp_entry);
12319
12320                         /* map-align the start of the first copy entry... */
12321                         adjustment = (tmp_entry->vme_start -
12322                             vm_map_trunc_page(
12323                                     tmp_entry->vme_start,
12324                                     VM_MAP_PAGE_MASK(src_map)));
12325                         tmp_entry->vme_start -= adjustment;
12326                         VME_OFFSET_SET(tmp_entry,
12327                             VME_OFFSET(tmp_entry) - adjustment);
12328                         copy_addr -= adjustment;
12329                         assert(tmp_entry->vme_start < tmp_entry->vme_end);
12330                         /* ... adjust for mis-aligned start of copy range */
12331                         adjustment =
12332                             (vm_map_trunc_page(copy->offset,
12333                             PAGE_MASK) -
12334                             vm_map_trunc_page(copy->offset,
12335                             VM_MAP_PAGE_MASK(src_map)));
12336                         if (adjustment) {
12337                                 assert(page_aligned(adjustment));
12338                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12339                                 tmp_entry->vme_start += adjustment;
12340                                 VME_OFFSET_SET(tmp_entry,
12341                                     (VME_OFFSET(tmp_entry) +
12342                                     adjustment));
12343                                 copy_addr += adjustment;
12344                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12345                         }
12346
12347                         /*
12348                          * Assert that the adjustments haven't exposed
12349                          * more than was originally copied...
12350                          */
12351                         assert(tmp_entry->vme_start >= original_start);
12352                         assert(VME_OFFSET(tmp_entry) >= original_offset);
12353                         /*
12354                          * ... and that it did not adjust outside of a
12355                          * a single 16K page.
12356                          */
12357                         assert(vm_map_trunc_page(tmp_entry->vme_start,
12358                             VM_MAP_PAGE_MASK(src_map)) ==
12359                             vm_map_trunc_page(original_start,
12360                             VM_MAP_PAGE_MASK(src_map)));
12361                 }
12362
12363                 /* adjust alignment of last copy_entry's "vme_end" */
12364                 tmp_entry = vm_map_copy_last_entry(copy);
12365                 if (tmp_entry != vm_map_copy_to_entry(copy)) {
12366                         vm_map_offset_t adjustment;
12367
12368                         original_end = tmp_entry->vme_end;
12369
12370                         /* map-align the end of the last copy entry... */
12371                         tmp_entry->vme_end =
12372                             vm_map_round_page(tmp_entry->vme_end,
12373                             VM_MAP_PAGE_MASK(src_map));
12374                         /* ... adjust for mis-aligned end of copy range */
12375                         adjustment =
12376                             (vm_map_round_page((copy->offset +
12377                             copy->size),
12378                             VM_MAP_PAGE_MASK(src_map)) -
12379                             vm_map_round_page((copy->offset +
12380                             copy->size),
12381                             PAGE_MASK));
12382                         if (adjustment) {
12383                                 assert(page_aligned(adjustment));
12384                                 assert(adjustment < VM_MAP_PAGE_SIZE(src_map));
12385                                 tmp_entry->vme_end -= adjustment;
12386                                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12387                         }
12388
12389                         /*
12390                          * Assert that the adjustments haven't exposed
12391                          * more than was originally copied...
12392                          */
12393                         assert(tmp_entry->vme_end <= original_end);
12394                         /*
12395                          * ... and that it did not adjust outside of a
12396                          * a single 16K page.
12397                          */
12398                         assert(vm_map_round_page(tmp_entry->vme_end,
12399                             VM_MAP_PAGE_MASK(src_map)) ==
12400                             vm_map_round_page(original_end,
12401                             VM_MAP_PAGE_MASK(src_map)));
12402                 }
12403         }
12404
12405         /* Fix-up start and end points in copy.  This is necessary */
12406         /* when the various entries in the copy object were picked */
12407         /* up from different sub-maps */
12408
12409         tmp_entry = vm_map_copy_first_entry(copy);
12410         copy_size = 0; /* compute actual size */
12411         while (tmp_entry != vm_map_copy_to_entry(copy)) {
12412                 assert(VM_MAP_PAGE_ALIGNED(
12413                             copy_addr + (tmp_entry->vme_end -
12414                             tmp_entry->vme_start),
12415                             MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12416                 assert(VM_MAP_PAGE_ALIGNED(
12417                             copy_addr,
12418                             MIN(VM_MAP_COPY_PAGE_MASK(copy), PAGE_MASK)));
12419
12420                 /*
12421                  * The copy_entries will be injected directly into the
12422                  * destination map and might not be "map aligned" there...
12423                  */
12424                 tmp_entry->map_aligned = FALSE;
12425
12426                 tmp_entry->vme_end = copy_addr +
12427                     (tmp_entry->vme_end - tmp_entry->vme_start);
12428                 tmp_entry->vme_start = copy_addr;
12429                 assert(tmp_entry->vme_start < tmp_entry->vme_end);
12430                 copy_addr += tmp_entry->vme_end - tmp_entry->vme_start;
12431                 copy_size += tmp_entry->vme_end - tmp_entry->vme_start;
12432                 tmp_entry = (struct vm_map_entry *)tmp_entry->vme_next;
12433         }
12434
12435         if (VM_MAP_PAGE_SHIFT(src_map) != PAGE_SHIFT &&
12436             copy_size < copy->size) {
12437                 /*
12438                  * The actual size of the VM map copy is smaller than what
12439                  * was requested by the caller.  This must be because some
12440                  * PAGE_SIZE-sized pages are missing at the end of the last
12441                  * VM_MAP_PAGE_SIZE(src_map)-sized chunk of the range.
12442                  * The caller might not have been aware of those missing
12443                  * pages and might not want to be aware of it, which is
12444                  * fine as long as they don't try to access (and crash on)
12445                  * those missing pages.
12446                  * Let's adjust the size of the "copy", to avoid failing
12447                  * in vm_map_copyout() or vm_map_copy_overwrite().
12448                  */
12449                 assert(vm_map_round_page(copy_size,
12450                     VM_MAP_PAGE_MASK(src_map)) ==
12451                     vm_map_round_page(copy->size,
12452                     VM_MAP_PAGE_MASK(src_map)));
12453                 copy->size = copy_size;
12454         }
12455
12456         *copy_result = copy;
12457         return KERN_SUCCESS;
12458
12459 #undef  RETURN
12460 }
12461
12462 kern_return_t
12463 vm_map_copy_extract(
12464         vm_map_t                src_map,
12465         vm_map_address_t        src_addr,
12466         vm_map_size_t           len,
12467         vm_prot_t               required_prot,
12468         boolean_t               do_copy,
12469         vm_map_copy_t           *copy_result,   /* OUT */
12470         vm_prot_t               *cur_prot,      /* OUT */
12471         vm_prot_t               *max_prot,      /* OUT */
12472         vm_inherit_t            inheritance,
12473         vm_map_kernel_flags_t   vmk_flags)
12474 {
12475         vm_map_copy_t   copy;
12476         kern_return_t   kr;
12477
12478         /*
12479          *      Check for copies of zero bytes.
12480          */
12481
12482         if (len == 0) {
12483                 *copy_result = VM_MAP_COPY_NULL;
12484                 return KERN_SUCCESS;
12485         }
12486
12487         /*
12488          *      Check that the end address doesn't overflow
12489          */
12490         if (src_addr + len < src_addr) {
12491                 return KERN_INVALID_ADDRESS;
12492         }
12493
12494         if (VM_MAP_PAGE_SIZE(src_map) < PAGE_SIZE) {
12495                 DEBUG4K_SHARE("src_map %p src_addr 0x%llx src_end 0x%llx\n", src_map, (uint64_t)src_addr, (uint64_t)(src_addr + len));
12496         }
12497
12498         /*
12499          *      Allocate a header element for the list.
12500          *
12501          *      Use the start and end in the header to
12502          *      remember the endpoints prior to rounding.
12503          */
12504
12505         copy = vm_map_copy_allocate();
12506         copy->type = VM_MAP_COPY_ENTRY_LIST;
12507         copy->cpy_hdr.entries_pageable = vmk_flags.vmkf_copy_pageable;
12508
12509         vm_map_store_init(&copy->cpy_hdr);
12510
12511         copy->offset = 0;
12512         copy->size = len;
12513
12514         kr = vm_map_remap_extract(src_map,
12515             src_addr,
12516             len,
12517             required_prot,
12518             do_copy,                       /* copy */
12519             &copy->cpy_hdr,
12520             cur_prot,
12521             max_prot,
12522             inheritance,
12523             vmk_flags);
12524         if (kr != KERN_SUCCESS) {
12525                 vm_map_copy_discard(copy);
12526                 return kr;
12527         }
12528         assert((*cur_prot & required_prot) == required_prot);
12529         assert((*max_prot & required_prot) == required_prot);
12530
12531         *copy_result = copy;
12532         return KERN_SUCCESS;
12533 }
12534
12535 /*
12536  *      vm_map_copyin_object:
12537  *
12538  *      Create a copy object from an object.
12539  *      Our caller donates an object reference.
12540  */
12541
12542 kern_return_t
12543 vm_map_copyin_object(
12544         vm_object_t             object,
12545         vm_object_offset_t      offset, /* offset of region in object */
12546         vm_object_size_t        size,   /* size of region in object */
12547         vm_map_copy_t   *copy_result)   /* OUT */
12548 {
12549         vm_map_copy_t   copy;           /* Resulting copy */
12550
12551         /*
12552          *      We drop the object into a special copy object
12553          *      that contains the object directly.
12554          */
12555
12556         copy = vm_map_copy_allocate();
12557         copy->type = VM_MAP_COPY_OBJECT;
12558         copy->cpy_object = object;
12559         copy->offset = offset;
12560         copy->size = size;
12561
12562         *copy_result = copy;
12563         return KERN_SUCCESS;
12564 }
12565
12566 static void
12567 vm_map_fork_share(
12568         vm_map_t        old_map,
12569         vm_map_entry_t  old_entry,
12570         vm_map_t        new_map)
12571 {
12572         vm_object_t     object;
12573         vm_map_entry_t  new_entry;
12574
12575         /*
12576          *      New sharing code.  New map entry
12577          *      references original object.  Internal
12578          *      objects use asynchronous copy algorithm for
12579          *      future copies.  First make sure we have
12580          *      the right object.  If we need a shadow,
12581          *      or someone else already has one, then
12582          *      make a new shadow and share it.
12583          */
12584
12585         object = VME_OBJECT(old_entry);
12586         if (old_entry->is_sub_map) {
12587                 assert(old_entry->wired_count == 0);
12588 #ifndef NO_NESTED_PMAP
12589                 if (old_entry->use_pmap) {
12590                         kern_return_t   result;
12591
12592                         result = pmap_nest(new_map->pmap,
12593                             (VME_SUBMAP(old_entry))->pmap,
12594                             (addr64_t)old_entry->vme_start,
12595                             (uint64_t)(old_entry->vme_end - old_entry->vme_start));
12596                         if (result) {
12597                                 panic("vm_map_fork_share: pmap_nest failed!");
12598                         }
12599                 }
12600 #endif  /* NO_NESTED_PMAP */
12601         } else if (object == VM_OBJECT_NULL) {
12602                 object = vm_object_allocate((vm_map_size_t)(old_entry->vme_end -
12603                     old_entry->vme_start));
12604                 VME_OFFSET_SET(old_entry, 0);
12605                 VME_OBJECT_SET(old_entry, object);
12606                 old_entry->use_pmap = TRUE;
12607 //              assert(!old_entry->needs_copy);
12608         } else if (object->copy_strategy !=
12609             MEMORY_OBJECT_COPY_SYMMETRIC) {
12610                 /*
12611                  *      We are already using an asymmetric
12612                  *      copy, and therefore we already have
12613                  *      the right object.
12614                  */
12615
12616                 assert(!old_entry->needs_copy);
12617         } else if (old_entry->needs_copy ||       /* case 1 */
12618             object->shadowed ||                 /* case 2 */
12619             (!object->true_share &&             /* case 3 */
12620             !old_entry->is_shared &&
12621             (object->vo_size >
12622             (vm_map_size_t)(old_entry->vme_end -
12623             old_entry->vme_start)))) {
12624                 /*
12625                  *      We need to create a shadow.
12626                  *      There are three cases here.
12627                  *      In the first case, we need to
12628                  *      complete a deferred symmetrical
12629                  *      copy that we participated in.
12630                  *      In the second and third cases,
12631                  *      we need to create the shadow so
12632                  *      that changes that we make to the
12633                  *      object do not interfere with
12634                  *      any symmetrical copies which
12635                  *      have occured (case 2) or which
12636                  *      might occur (case 3).
12637                  *
12638                  *      The first case is when we had
12639                  *      deferred shadow object creation
12640                  *      via the entry->needs_copy mechanism.
12641                  *      This mechanism only works when
12642                  *      only one entry points to the source
12643                  *      object, and we are about to create
12644                  *      a second entry pointing to the
12645                  *      same object. The problem is that
12646                  *      there is no way of mapping from
12647                  *      an object to the entries pointing
12648                  *      to it. (Deferred shadow creation
12649                  *      works with one entry because occurs
12650                  *      at fault time, and we walk from the
12651                  *      entry to the object when handling
12652                  *      the fault.)
12653                  *
12654                  *      The second case is when the object
12655                  *      to be shared has already been copied
12656                  *      with a symmetric copy, but we point
12657                  *      directly to the object without
12658                  *      needs_copy set in our entry. (This
12659                  *      can happen because different ranges
12660                  *      of an object can be pointed to by
12661                  *      different entries. In particular,
12662                  *      a single entry pointing to an object
12663                  *      can be split by a call to vm_inherit,
12664                  *      which, combined with task_create, can
12665                  *      result in the different entries
12666                  *      having different needs_copy values.)
12667                  *      The shadowed flag in the object allows
12668                  *      us to detect this case. The problem
12669                  *      with this case is that if this object
12670                  *      has or will have shadows, then we
12671                  *      must not perform an asymmetric copy
12672                  *      of this object, since such a copy
12673                  *      allows the object to be changed, which
12674                  *      will break the previous symmetrical
12675                  *      copies (which rely upon the object
12676                  *      not changing). In a sense, the shadowed
12677                  *      flag says "don't change this object".
12678                  *      We fix this by creating a shadow
12679                  *      object for this object, and sharing
12680                  *      that. This works because we are free
12681                  *      to change the shadow object (and thus
12682                  *      to use an asymmetric copy strategy);
12683                  *      this is also semantically correct,
12684                  *      since this object is temporary, and
12685                  *      therefore a copy of the object is
12686                  *      as good as the object itself. (This
12687                  *      is not true for permanent objects,
12688                  *      since the pager needs to see changes,
12689                  *      which won't happen if the changes
12690                  *      are made to a copy.)
12691                  *
12692                  *      The third case is when the object
12693                  *      to be shared has parts sticking
12694                  *      outside of the entry we're working
12695                  *      with, and thus may in the future
12696                  *      be subject to a symmetrical copy.
12697                  *      (This is a preemptive version of
12698                  *      case 2.)
12699                  */
12700                 VME_OBJECT_SHADOW(old_entry,
12701                     (vm_map_size_t) (old_entry->vme_end -
12702                     old_entry->vme_start));
12703
12704                 /*
12705                  *      If we're making a shadow for other than
12706                  *      copy on write reasons, then we have
12707                  *      to remove write permission.
12708                  */
12709
12710                 if (!old_entry->needs_copy &&
12711                     (old_entry->protection & VM_PROT_WRITE)) {
12712                         vm_prot_t prot;
12713
12714                         assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
12715
12716                         prot = old_entry->protection & ~VM_PROT_WRITE;
12717
12718                         assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
12719
12720                         if (override_nx(old_map, VME_ALIAS(old_entry)) && prot) {
12721                                 prot |= VM_PROT_EXECUTE;
12722                         }
12723
12724
12725                         if (old_map->mapped_in_other_pmaps) {
12726                                 vm_object_pmap_protect(
12727                                         VME_OBJECT(old_entry),
12728                                         VME_OFFSET(old_entry),
12729                                         (old_entry->vme_end -
12730                                         old_entry->vme_start),
12731                                         PMAP_NULL,
12732                                         PAGE_SIZE,
12733                                         old_entry->vme_start,
12734                                         prot);
12735                         } else {
12736                                 pmap_protect(old_map->pmap,
12737                                     old_entry->vme_start,
12738                                     old_entry->vme_end,
12739                                     prot);
12740                         }
12741                 }
12742
12743                 old_entry->needs_copy = FALSE;
12744                 object = VME_OBJECT(old_entry);
12745         }
12746
12747
12748         /*
12749          *      If object was using a symmetric copy strategy,
12750          *      change its copy strategy to the default
12751          *      asymmetric copy strategy, which is copy_delay
12752          *      in the non-norma case and copy_call in the
12753          *      norma case. Bump the reference count for the
12754          *      new entry.
12755          */
12756
12757         if (old_entry->is_sub_map) {
12758                 vm_map_lock(VME_SUBMAP(old_entry));
12759                 vm_map_reference(VME_SUBMAP(old_entry));
12760                 vm_map_unlock(VME_SUBMAP(old_entry));
12761         } else {
12762                 vm_object_lock(object);
12763                 vm_object_reference_locked(object);
12764                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) {
12765                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
12766                 }
12767                 vm_object_unlock(object);
12768         }
12769
12770         /*
12771          *      Clone the entry, using object ref from above.
12772          *      Mark both entries as shared.
12773          */
12774
12775         new_entry = vm_map_entry_create(new_map, FALSE); /* Never the kernel
12776                                                           * map or descendants */
12777         vm_map_entry_copy(old_map, new_entry, old_entry);
12778         old_entry->is_shared = TRUE;
12779         new_entry->is_shared = TRUE;
12780
12781         /*
12782          * We're dealing with a shared mapping, so the resulting mapping
12783          * should inherit some of the original mapping's accounting settings.
12784          * "iokit_acct" should have been cleared in vm_map_entry_copy().
12785          * "use_pmap" should stay the same as before (if it hasn't been reset
12786          * to TRUE when we cleared "iokit_acct").
12787          */
12788         assert(!new_entry->iokit_acct);
12789
12790         /*
12791          *      If old entry's inheritence is VM_INHERIT_NONE,
12792          *      the new entry is for corpse fork, remove the
12793          *      write permission from the new entry.
12794          */
12795         if (old_entry->inheritance == VM_INHERIT_NONE) {
12796                 new_entry->protection &= ~VM_PROT_WRITE;
12797                 new_entry->max_protection &= ~VM_PROT_WRITE;
12798         }
12799
12800         /*
12801          *      Insert the entry into the new map -- we
12802          *      know we're inserting at the end of the new
12803          *      map.
12804          */
12805
12806         vm_map_store_entry_link(new_map, vm_map_last_entry(new_map), new_entry,
12807             VM_MAP_KERNEL_FLAGS_NONE);
12808
12809         /*
12810          *      Update the physical map
12811          */
12812
12813         if (old_entry->is_sub_map) {
12814                 /* Bill Angell pmap support goes here */
12815         } else {
12816                 pmap_copy(new_map->pmap, old_map->pmap, new_entry->vme_start,
12817                     old_entry->vme_end - old_entry->vme_start,
12818                     old_entry->vme_start);
12819         }
12820 }
12821
12822 static boolean_t
12823 vm_map_fork_copy(
12824         vm_map_t        old_map,
12825         vm_map_entry_t  *old_entry_p,
12826         vm_map_t        new_map,
12827         int             vm_map_copyin_flags)
12828 {
12829         vm_map_entry_t old_entry = *old_entry_p;
12830         vm_map_size_t entry_size = old_entry->vme_end - old_entry->vme_start;
12831         vm_map_offset_t start = old_entry->vme_start;
12832         vm_map_copy_t copy;
12833         vm_map_entry_t last = vm_map_last_entry(new_map);
12834
12835         vm_map_unlock(old_map);
12836         /*
12837          *      Use maxprot version of copyin because we
12838          *      care about whether this memory can ever
12839          *      be accessed, not just whether it's accessible
12840          *      right now.
12841          */
12842         vm_map_copyin_flags |= VM_MAP_COPYIN_USE_MAXPROT;
12843         if (vm_map_copyin_internal(old_map, start, entry_size,
12844             vm_map_copyin_flags, &copy)
12845             != KERN_SUCCESS) {
12846                 /*
12847                  *      The map might have changed while it
12848                  *      was unlocked, check it again.  Skip
12849                  *      any blank space or permanently
12850                  *      unreadable region.
12851                  */
12852                 vm_map_lock(old_map);
12853                 if (!vm_map_lookup_entry(old_map, start, &last) ||
12854                     (last->max_protection & VM_PROT_READ) == VM_PROT_NONE) {
12855                         last = last->vme_next;
12856                 }
12857                 *old_entry_p = last;
12858
12859                 /*
12860                  * XXX  For some error returns, want to
12861                  * XXX  skip to the next element.  Note
12862                  *      that INVALID_ADDRESS and
12863                  *      PROTECTION_FAILURE are handled above.
12864                  */
12865
12866                 return FALSE;
12867         }
12868
12869         /*
12870          * Assert that the vm_map_copy is coming from the right
12871          * zone and hasn't been forged
12872          */
12873         vm_map_copy_require(copy);
12874
12875         /*
12876          *      Insert the copy into the new map
12877          */
12878         vm_map_copy_insert(new_map, last, copy);
12879
12880         /*
12881          *      Pick up the traversal at the end of
12882          *      the copied region.
12883          */
12884
12885         vm_map_lock(old_map);
12886         start += entry_size;
12887         if (!vm_map_lookup_entry(old_map, start, &last)) {
12888                 last = last->vme_next;
12889         } else {
12890                 if (last->vme_start == start) {
12891                         /*
12892                          * No need to clip here and we don't
12893                          * want to cause any unnecessary
12894                          * unnesting...
12895                          */
12896                 } else {
12897                         vm_map_clip_start(old_map, last, start);
12898                 }
12899         }
12900         *old_entry_p = last;
12901
12902         return TRUE;
12903 }
12904
12905 /*
12906  *      vm_map_fork:
12907  *
12908  *      Create and return a new map based on the old
12909  *      map, according to the inheritance values on the
12910  *      regions in that map and the options.
12911  *
12912  *      The source map must not be locked.
12913  */
12914 vm_map_t
12915 vm_map_fork(
12916         ledger_t        ledger,
12917         vm_map_t        old_map,
12918         int             options)
12919 {
12920         pmap_t          new_pmap;
12921         vm_map_t        new_map;
12922         vm_map_entry_t  old_entry;
12923         vm_map_size_t   new_size = 0, entry_size;
12924         vm_map_entry_t  new_entry;
12925         boolean_t       src_needs_copy;
12926         boolean_t       new_entry_needs_copy;
12927         boolean_t       pmap_is64bit;
12928         int             vm_map_copyin_flags;
12929         vm_inherit_t    old_entry_inheritance;
12930         int             map_create_options;
12931         kern_return_t   footprint_collect_kr;
12932
12933         if (options & ~(VM_MAP_FORK_SHARE_IF_INHERIT_NONE |
12934             VM_MAP_FORK_PRESERVE_PURGEABLE |
12935             VM_MAP_FORK_CORPSE_FOOTPRINT)) {
12936                 /* unsupported option */
12937                 return VM_MAP_NULL;
12938         }
12939
12940         pmap_is64bit =
12941 #if defined(__i386__) || defined(__x86_64__)
12942             old_map->pmap->pm_task_map != TASK_MAP_32BIT;
12943 #elif defined(__arm64__)
12944             old_map->pmap->max == MACH_VM_MAX_ADDRESS;
12945 #elif defined(__arm__)
12946             FALSE;
12947 #else
12948 #error Unknown architecture.
12949 #endif
12950
12951         unsigned int pmap_flags = 0;
12952         pmap_flags |= pmap_is64bit ? PMAP_CREATE_64BIT : 0;
12953 #if defined(HAS_APPLE_PAC)
12954         pmap_flags |= old_map->pmap->disable_jop ? PMAP_CREATE_DISABLE_JOP : 0;
12955 #endif
12956 #if PMAP_CREATE_FORCE_4K_PAGES
12957         if (VM_MAP_PAGE_SIZE(old_map) == FOURK_PAGE_SIZE &&
12958             PAGE_SIZE != FOURK_PAGE_SIZE) {
12959                 pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
12960         }
12961 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
12962         new_pmap = pmap_create_options(ledger, (vm_map_size_t) 0, pmap_flags);
12963
12964         vm_map_reference_swap(old_map);
12965         vm_map_lock(old_map);
12966
12967         map_create_options = 0;
12968         if (old_map->hdr.entries_pageable) {
12969                 map_create_options |= VM_MAP_CREATE_PAGEABLE;
12970         }
12971         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
12972                 map_create_options |= VM_MAP_CREATE_CORPSE_FOOTPRINT;
12973                 footprint_collect_kr = KERN_SUCCESS;
12974         }
12975         new_map = vm_map_create_options(new_pmap,
12976             old_map->min_offset,
12977             old_map->max_offset,
12978             map_create_options);
12979         /* inherit cs_enforcement */
12980         vm_map_cs_enforcement_set(new_map, old_map->cs_enforcement);
12981         vm_map_lock(new_map);
12982         vm_commit_pagezero_status(new_map);
12983         /* inherit the parent map's page size */
12984         vm_map_set_page_shift(new_map, VM_MAP_PAGE_SHIFT(old_map));
12985         for (
12986                 old_entry = vm_map_first_entry(old_map);
12987                 old_entry != vm_map_to_entry(old_map);
12988                 ) {
12989                 entry_size = old_entry->vme_end - old_entry->vme_start;
12990
12991                 old_entry_inheritance = old_entry->inheritance;
12992                 /*
12993                  * If caller used the VM_MAP_FORK_SHARE_IF_INHERIT_NONE option
12994                  * share VM_INHERIT_NONE entries that are not backed by a
12995                  * device pager.
12996                  */
12997                 if (old_entry_inheritance == VM_INHERIT_NONE &&
12998                     (options & VM_MAP_FORK_SHARE_IF_INHERIT_NONE) &&
12999                     (old_entry->protection & VM_PROT_READ) &&
13000                     !(!old_entry->is_sub_map &&
13001                     VME_OBJECT(old_entry) != NULL &&
13002                     VME_OBJECT(old_entry)->pager != NULL &&
13003                     is_device_pager_ops(
13004                             VME_OBJECT(old_entry)->pager->mo_pager_ops))) {
13005                         old_entry_inheritance = VM_INHERIT_SHARE;
13006                 }
13007
13008                 if (old_entry_inheritance != VM_INHERIT_NONE &&
13009                     (options & VM_MAP_FORK_CORPSE_FOOTPRINT) &&
13010                     footprint_collect_kr == KERN_SUCCESS) {
13011                         /*
13012                          * The corpse won't have old_map->pmap to query
13013                          * footprint information, so collect that data now
13014                          * and store it in new_map->vmmap_corpse_footprint
13015                          * for later autopsy.
13016                          */
13017                         footprint_collect_kr =
13018                             vm_map_corpse_footprint_collect(old_map,
13019                             old_entry,
13020                             new_map);
13021                 }
13022
13023                 switch (old_entry_inheritance) {
13024                 case VM_INHERIT_NONE:
13025                         break;
13026
13027                 case VM_INHERIT_SHARE:
13028                         vm_map_fork_share(old_map, old_entry, new_map);
13029                         new_size += entry_size;
13030                         break;
13031
13032                 case VM_INHERIT_COPY:
13033
13034                         /*
13035                          *      Inline the copy_quickly case;
13036                          *      upon failure, fall back on call
13037                          *      to vm_map_fork_copy.
13038                          */
13039
13040                         if (old_entry->is_sub_map) {
13041                                 break;
13042                         }
13043                         if ((old_entry->wired_count != 0) ||
13044                             ((VME_OBJECT(old_entry) != NULL) &&
13045                             (VME_OBJECT(old_entry)->true_share))) {
13046                                 goto slow_vm_map_fork_copy;
13047                         }
13048
13049                         new_entry = vm_map_entry_create(new_map, FALSE); /* never the kernel map or descendants */
13050                         vm_map_entry_copy(old_map, new_entry, old_entry);
13051
13052                         if (new_entry->used_for_jit == TRUE && new_map->jit_entry_exists == FALSE) {
13053                                 new_map->jit_entry_exists = TRUE;
13054                         }
13055
13056                         if (new_entry->is_sub_map) {
13057                                 /* clear address space specifics */
13058                                 new_entry->use_pmap = FALSE;
13059                         } else {
13060                                 /*
13061                                  * We're dealing with a copy-on-write operation,
13062                                  * so the resulting mapping should not inherit
13063                                  * the original mapping's accounting settings.
13064                                  * "iokit_acct" should have been cleared in
13065                                  * vm_map_entry_copy().
13066                                  * "use_pmap" should be reset to its default
13067                                  * (TRUE) so that the new mapping gets
13068                                  * accounted for in the task's memory footprint.
13069                                  */
13070                                 assert(!new_entry->iokit_acct);
13071                                 new_entry->use_pmap = TRUE;
13072                         }
13073
13074                         if (!vm_object_copy_quickly(
13075                                     VME_OBJECT_PTR(new_entry),
13076                                     VME_OFFSET(old_entry),
13077                                     (old_entry->vme_end -
13078                                     old_entry->vme_start),
13079                                     &src_needs_copy,
13080                                     &new_entry_needs_copy)) {
13081                                 vm_map_entry_dispose(new_map, new_entry);
13082                                 goto slow_vm_map_fork_copy;
13083                         }
13084
13085                         /*
13086                          *      Handle copy-on-write obligations
13087                          */
13088
13089                         if (src_needs_copy && !old_entry->needs_copy) {
13090                                 vm_prot_t prot;
13091
13092                                 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, old_entry->protection));
13093
13094                                 prot = old_entry->protection & ~VM_PROT_WRITE;
13095
13096                                 if (override_nx(old_map, VME_ALIAS(old_entry))
13097                                     && prot) {
13098                                         prot |= VM_PROT_EXECUTE;
13099                                 }
13100
13101                                 assert(!pmap_has_prot_policy(old_map->pmap, old_entry->translated_allow_execute, prot));
13102
13103                                 vm_object_pmap_protect(
13104                                         VME_OBJECT(old_entry),
13105                                         VME_OFFSET(old_entry),
13106                                         (old_entry->vme_end -
13107                                         old_entry->vme_start),
13108                                         ((old_entry->is_shared
13109                                         || old_map->mapped_in_other_pmaps)
13110                                         ? PMAP_NULL :
13111                                         old_map->pmap),
13112                                         VM_MAP_PAGE_SIZE(old_map),
13113                                         old_entry->vme_start,
13114                                         prot);
13115
13116                                 assert(old_entry->wired_count == 0);
13117                                 old_entry->needs_copy = TRUE;
13118                         }
13119                         new_entry->needs_copy = new_entry_needs_copy;
13120
13121                         /*
13122                          *      Insert the entry at the end
13123                          *      of the map.
13124                          */
13125
13126                         vm_map_store_entry_link(new_map,
13127                             vm_map_last_entry(new_map),
13128                             new_entry,
13129                             VM_MAP_KERNEL_FLAGS_NONE);
13130                         new_size += entry_size;
13131                         break;
13132
13133 slow_vm_map_fork_copy:
13134                         vm_map_copyin_flags = 0;
13135                         if (options & VM_MAP_FORK_PRESERVE_PURGEABLE) {
13136                                 vm_map_copyin_flags |=
13137                                     VM_MAP_COPYIN_PRESERVE_PURGEABLE;
13138                         }
13139                         if (vm_map_fork_copy(old_map,
13140                             &old_entry,
13141                             new_map,
13142                             vm_map_copyin_flags)) {
13143                                 new_size += entry_size;
13144                         }
13145                         continue;
13146                 }
13147                 old_entry = old_entry->vme_next;
13148         }
13149
13150 #if defined(__arm64__)
13151         pmap_insert_sharedpage(new_map->pmap);
13152 #endif /* __arm64__ */
13153
13154         new_map->size = new_size;
13155
13156         if (options & VM_MAP_FORK_CORPSE_FOOTPRINT) {
13157                 vm_map_corpse_footprint_collect_done(new_map);
13158         }
13159
13160         /* Propagate JIT entitlement for the pmap layer. */
13161         if (pmap_get_jit_entitled(old_map->pmap)) {
13162                 /* Tell the pmap that it supports JIT. */
13163                 pmap_set_jit_entitled(new_map->pmap);
13164         }
13165
13166         vm_map_unlock(new_map);
13167         vm_map_unlock(old_map);
13168         vm_map_deallocate(old_map);
13169
13170         return new_map;
13171 }
13172
13173 /*
13174  * vm_map_exec:
13175  *
13176  *      Setup the "new_map" with the proper execution environment according
13177  *      to the type of executable (platform, 64bit, chroot environment).
13178  *      Map the comm page and shared region, etc...
13179  */
13180 kern_return_t
13181 vm_map_exec(
13182         vm_map_t        new_map,
13183         task_t          task,
13184         boolean_t       is64bit,
13185         void            *fsroot,
13186         cpu_type_t      cpu,
13187         cpu_subtype_t   cpu_subtype,
13188         boolean_t       reslide)
13189 {
13190         SHARED_REGION_TRACE_DEBUG(
13191                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): ->\n",
13192                 (void *)VM_KERNEL_ADDRPERM(current_task()),
13193                 (void *)VM_KERNEL_ADDRPERM(new_map),
13194                 (void *)VM_KERNEL_ADDRPERM(task),
13195                 (void *)VM_KERNEL_ADDRPERM(fsroot),
13196                 cpu,
13197                 cpu_subtype));
13198         (void) vm_commpage_enter(new_map, task, is64bit);
13199
13200         (void) vm_shared_region_enter(new_map, task, is64bit, fsroot, cpu, cpu_subtype, reslide);
13201
13202         SHARED_REGION_TRACE_DEBUG(
13203                 ("shared_region: task %p: vm_map_exec(%p,%p,%p,0x%x,0x%x): <-\n",
13204                 (void *)VM_KERNEL_ADDRPERM(current_task()),
13205                 (void *)VM_KERNEL_ADDRPERM(new_map),
13206                 (void *)VM_KERNEL_ADDRPERM(task),
13207                 (void *)VM_KERNEL_ADDRPERM(fsroot),
13208                 cpu,
13209                 cpu_subtype));
13210
13211         /*
13212          * Some devices have region(s) of memory that shouldn't get allocated by
13213          * user processes. The following code creates dummy vm_map_entry_t's for each
13214          * of the regions that needs to be reserved to prevent any allocations in
13215          * those regions.
13216          */
13217         kern_return_t kr = KERN_FAILURE;
13218         vm_map_kernel_flags_t vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
13219         vmk_flags.vmkf_permanent = TRUE;
13220         vmk_flags.vmkf_beyond_max = TRUE;
13221
13222         struct vm_reserved_region *regions = NULL;
13223         size_t num_regions = ml_get_vm_reserved_regions(is64bit, &regions);
13224         assert((num_regions == 0) || (num_regions > 0 && regions != NULL));
13225
13226         for (size_t i = 0; i < num_regions; ++i) {
13227                 kr = vm_map_enter(
13228                         new_map,
13229                         &regions[i].vmrr_addr,
13230                         regions[i].vmrr_size,
13231                         (vm_map_offset_t)0,
13232                         VM_FLAGS_FIXED,
13233                         vmk_flags,
13234                         VM_KERN_MEMORY_NONE,
13235                         VM_OBJECT_NULL,
13236                         (vm_object_offset_t)0,
13237                         FALSE,
13238                         VM_PROT_NONE,
13239                         VM_PROT_NONE,
13240                         VM_INHERIT_NONE);
13241
13242                 if (kr != KERN_SUCCESS) {
13243                         panic("Failed to reserve %s region in user map %p %d", regions[i].vmrr_name, new_map, kr);
13244                 }
13245         }
13246
13247         new_map->reserved_regions = (num_regions ? TRUE : FALSE);
13248
13249         return KERN_SUCCESS;
13250 }
13251
13252 /*
13253  *      vm_map_lookup_locked:
13254  *
13255  *      Finds the VM object, offset, and
13256  *      protection for a given virtual address in the
13257  *      specified map, assuming a page fault of the
13258  *      type specified.
13259  *
13260  *      Returns the (object, offset, protection) for
13261  *      this address, whether it is wired down, and whether
13262  *      this map has the only reference to the data in question.
13263  *      In order to later verify this lookup, a "version"
13264  *      is returned.
13265  *      If contended != NULL, *contended will be set to
13266  *      true iff the thread had to spin or block to acquire
13267  *      an exclusive lock.
13268  *
13269  *      The map MUST be locked by the caller and WILL be
13270  *      locked on exit.  In order to guarantee the
13271  *      existence of the returned object, it is returned
13272  *      locked.
13273  *
13274  *      If a lookup is requested with "write protection"
13275  *      specified, the map may be changed to perform virtual
13276  *      copying operations, although the data referenced will
13277  *      remain the same.
13278  */
13279 kern_return_t
13280 vm_map_lookup_locked(
13281         vm_map_t                *var_map,       /* IN/OUT */
13282         vm_map_offset_t         vaddr,
13283         vm_prot_t               fault_type,
13284         int                     object_lock_type,
13285         vm_map_version_t        *out_version,   /* OUT */
13286         vm_object_t             *object,        /* OUT */
13287         vm_object_offset_t      *offset,        /* OUT */
13288         vm_prot_t               *out_prot,      /* OUT */
13289         boolean_t               *wired,         /* OUT */
13290         vm_object_fault_info_t  fault_info,     /* OUT */
13291         vm_map_t                *real_map,      /* OUT */
13292         bool                    *contended)     /* OUT */
13293 {
13294         vm_map_entry_t                  entry;
13295         vm_map_t                        map = *var_map;
13296         vm_map_t                        old_map = *var_map;
13297         vm_map_t                        cow_sub_map_parent = VM_MAP_NULL;
13298         vm_map_offset_t                 cow_parent_vaddr = 0;
13299         vm_map_offset_t                 old_start = 0;
13300         vm_map_offset_t                 old_end = 0;
13301         vm_prot_t                       prot;
13302         boolean_t                       mask_protections;
13303         boolean_t                       force_copy;
13304         boolean_t                       no_force_copy_if_executable;
13305         vm_prot_t                       original_fault_type;
13306         vm_map_size_t                   fault_page_mask;
13307
13308         /*
13309          * VM_PROT_MASK means that the caller wants us to use "fault_type"
13310          * as a mask against the mapping's actual protections, not as an
13311          * absolute value.
13312          */
13313         mask_protections = (fault_type & VM_PROT_IS_MASK) ? TRUE : FALSE;
13314         force_copy = (fault_type & VM_PROT_COPY) ? TRUE : FALSE;
13315         no_force_copy_if_executable = (fault_type & VM_PROT_COPY_FAIL_IF_EXECUTABLE) ? TRUE : FALSE;
13316         fault_type &= VM_PROT_ALL;
13317         original_fault_type = fault_type;
13318         if (contended) {
13319                 *contended = false;
13320         }
13321
13322         *real_map = map;
13323
13324         fault_page_mask = MIN(VM_MAP_PAGE_MASK(map), PAGE_MASK);
13325         vaddr = VM_MAP_TRUNC_PAGE(vaddr, fault_page_mask);
13326
13327 RetryLookup:
13328         fault_type = original_fault_type;
13329
13330         /*
13331          *      If the map has an interesting hint, try it before calling
13332          *      full blown lookup routine.
13333          */
13334         entry = map->hint;
13335
13336         if ((entry == vm_map_to_entry(map)) ||
13337             (vaddr < entry->vme_start) || (vaddr >= entry->vme_end)) {
13338                 vm_map_entry_t  tmp_entry;
13339
13340                 /*
13341                  *      Entry was either not a valid hint, or the vaddr
13342                  *      was not contained in the entry, so do a full lookup.
13343                  */
13344                 if (!vm_map_lookup_entry(map, vaddr, &tmp_entry)) {
13345                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13346                                 vm_map_unlock(cow_sub_map_parent);
13347                         }
13348                         if ((*real_map != map)
13349                             && (*real_map != cow_sub_map_parent)) {
13350                                 vm_map_unlock(*real_map);
13351                         }
13352                         return KERN_INVALID_ADDRESS;
13353                 }
13354
13355                 entry = tmp_entry;
13356         }
13357         if (map == old_map) {
13358                 old_start = entry->vme_start;
13359                 old_end = entry->vme_end;
13360         }
13361
13362         /*
13363          *      Handle submaps.  Drop lock on upper map, submap is
13364          *      returned locked.
13365          */
13366
13367 submap_recurse:
13368         if (entry->is_sub_map) {
13369                 vm_map_offset_t         local_vaddr;
13370                 vm_map_offset_t         end_delta;
13371                 vm_map_offset_t         start_delta;
13372                 vm_map_entry_t          submap_entry, saved_submap_entry;
13373                 vm_object_offset_t      submap_entry_offset;
13374                 vm_object_size_t        submap_entry_size;
13375                 vm_prot_t               subentry_protection;
13376                 vm_prot_t               subentry_max_protection;
13377                 boolean_t               subentry_no_copy_on_read;
13378                 boolean_t               mapped_needs_copy = FALSE;
13379                 vm_map_version_t        version;
13380
13381                 assertf(VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)) >= VM_MAP_PAGE_SHIFT(map),
13382                     "map %p (%d) entry %p submap %p (%d)\n",
13383                     map, VM_MAP_PAGE_SHIFT(map), entry,
13384                     VME_SUBMAP(entry), VM_MAP_PAGE_SHIFT(VME_SUBMAP(entry)));
13385
13386                 local_vaddr = vaddr;
13387
13388                 if ((entry->use_pmap &&
13389                     !((fault_type & VM_PROT_WRITE) ||
13390                     force_copy))) {
13391                         /* if real_map equals map we unlock below */
13392                         if ((*real_map != map) &&
13393                             (*real_map != cow_sub_map_parent)) {
13394                                 vm_map_unlock(*real_map);
13395                         }
13396                         *real_map = VME_SUBMAP(entry);
13397                 }
13398
13399                 if (entry->needs_copy &&
13400                     ((fault_type & VM_PROT_WRITE) ||
13401                     force_copy)) {
13402                         if (!mapped_needs_copy) {
13403                                 if (vm_map_lock_read_to_write(map)) {
13404                                         vm_map_lock_read(map);
13405                                         *real_map = map;
13406                                         goto RetryLookup;
13407                                 }
13408                                 vm_map_lock_read(VME_SUBMAP(entry));
13409                                 *var_map = VME_SUBMAP(entry);
13410                                 cow_sub_map_parent = map;
13411                                 /* reset base to map before cow object */
13412                                 /* this is the map which will accept   */
13413                                 /* the new cow object */
13414                                 old_start = entry->vme_start;
13415                                 old_end = entry->vme_end;
13416                                 cow_parent_vaddr = vaddr;
13417                                 mapped_needs_copy = TRUE;
13418                         } else {
13419                                 vm_map_lock_read(VME_SUBMAP(entry));
13420                                 *var_map = VME_SUBMAP(entry);
13421                                 if ((cow_sub_map_parent != map) &&
13422                                     (*real_map != map)) {
13423                                         vm_map_unlock(map);
13424                                 }
13425                         }
13426                 } else {
13427                         vm_map_lock_read(VME_SUBMAP(entry));
13428                         *var_map = VME_SUBMAP(entry);
13429                         /* leave map locked if it is a target */
13430                         /* cow sub_map above otherwise, just  */
13431                         /* follow the maps down to the object */
13432                         /* here we unlock knowing we are not  */
13433                         /* revisiting the map.  */
13434                         if ((*real_map != map) && (map != cow_sub_map_parent)) {
13435                                 vm_map_unlock_read(map);
13436                         }
13437                 }
13438
13439                 map = *var_map;
13440
13441                 /* calculate the offset in the submap for vaddr */
13442                 local_vaddr = (local_vaddr - entry->vme_start) + VME_OFFSET(entry);
13443                 assertf(VM_MAP_PAGE_ALIGNED(local_vaddr, fault_page_mask),
13444                     "local_vaddr 0x%llx entry->vme_start 0x%llx fault_page_mask 0x%llx\n",
13445                     (uint64_t)local_vaddr, (uint64_t)entry->vme_start, (uint64_t)fault_page_mask);
13446
13447 RetrySubMap:
13448                 if (!vm_map_lookup_entry(map, local_vaddr, &submap_entry)) {
13449                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13450                                 vm_map_unlock(cow_sub_map_parent);
13451                         }
13452                         if ((*real_map != map)
13453                             && (*real_map != cow_sub_map_parent)) {
13454                                 vm_map_unlock(*real_map);
13455                         }
13456                         *real_map = map;
13457                         return KERN_INVALID_ADDRESS;
13458                 }
13459
13460                 /* find the attenuated shadow of the underlying object */
13461                 /* on our target map */
13462
13463                 /* in english the submap object may extend beyond the     */
13464                 /* region mapped by the entry or, may only fill a portion */
13465                 /* of it.  For our purposes, we only care if the object   */
13466                 /* doesn't fill.  In this case the area which will        */
13467                 /* ultimately be clipped in the top map will only need    */
13468                 /* to be as big as the portion of the underlying entry    */
13469                 /* which is mapped */
13470                 start_delta = submap_entry->vme_start > VME_OFFSET(entry) ?
13471                     submap_entry->vme_start - VME_OFFSET(entry) : 0;
13472
13473                 end_delta =
13474                     (VME_OFFSET(entry) + start_delta + (old_end - old_start)) <=
13475                     submap_entry->vme_end ?
13476                     0 : (VME_OFFSET(entry) +
13477                     (old_end - old_start))
13478                     - submap_entry->vme_end;
13479
13480                 old_start += start_delta;
13481                 old_end -= end_delta;
13482
13483                 if (submap_entry->is_sub_map) {
13484                         entry = submap_entry;
13485                         vaddr = local_vaddr;
13486                         goto submap_recurse;
13487                 }
13488
13489                 if (((fault_type & VM_PROT_WRITE) ||
13490                     force_copy)
13491                     && cow_sub_map_parent) {
13492                         vm_object_t     sub_object, copy_object;
13493                         vm_object_offset_t copy_offset;
13494                         vm_map_offset_t local_start;
13495                         vm_map_offset_t local_end;
13496                         boolean_t       copied_slowly = FALSE;
13497                         vm_object_offset_t copied_slowly_phys_offset = 0;
13498                         kern_return_t   kr = KERN_SUCCESS;
13499
13500                         if (vm_map_lock_read_to_write(map)) {
13501                                 vm_map_lock_read(map);
13502                                 old_start -= start_delta;
13503                                 old_end += end_delta;
13504                                 goto RetrySubMap;
13505                         }
13506
13507
13508                         sub_object = VME_OBJECT(submap_entry);
13509                         if (sub_object == VM_OBJECT_NULL) {
13510                                 sub_object =
13511                                     vm_object_allocate(
13512                                         (vm_map_size_t)
13513                                         (submap_entry->vme_end -
13514                                         submap_entry->vme_start));
13515                                 VME_OBJECT_SET(submap_entry, sub_object);
13516                                 VME_OFFSET_SET(submap_entry, 0);
13517                                 assert(!submap_entry->is_sub_map);
13518                                 assert(submap_entry->use_pmap);
13519                         }
13520                         local_start =  local_vaddr -
13521                             (cow_parent_vaddr - old_start);
13522                         local_end = local_vaddr +
13523                             (old_end - cow_parent_vaddr);
13524                         vm_map_clip_start(map, submap_entry, local_start);
13525                         vm_map_clip_end(map, submap_entry, local_end);
13526                         if (submap_entry->is_sub_map) {
13527                                 /* unnesting was done when clipping */
13528                                 assert(!submap_entry->use_pmap);
13529                         }
13530
13531                         /* This is the COW case, lets connect */
13532                         /* an entry in our space to the underlying */
13533                         /* object in the submap, bypassing the  */
13534                         /* submap. */
13535
13536                         if (submap_entry->wired_count != 0 ||
13537                             (sub_object->copy_strategy !=
13538                             MEMORY_OBJECT_COPY_SYMMETRIC)) {
13539                                 if ((submap_entry->protection & VM_PROT_EXECUTE) &&
13540                                     no_force_copy_if_executable) {
13541 //                                      printf("FBDP map %p entry %p start 0x%llx end 0x%llx wired %d strat %d\n", map, submap_entry, (uint64_t)local_start, (uint64_t)local_end, submap_entry->wired_count, sub_object->copy_strategy);
13542                                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13543                                                 vm_map_unlock(cow_sub_map_parent);
13544                                         }
13545                                         if ((*real_map != map)
13546                                             && (*real_map != cow_sub_map_parent)) {
13547                                                 vm_map_unlock(*real_map);
13548                                         }
13549                                         *real_map = map;
13550                                         vm_map_lock_write_to_read(map);
13551                                         kr = KERN_PROTECTION_FAILURE;
13552                                         DTRACE_VM4(submap_no_copy_executable,
13553                                             vm_map_t, map,
13554                                             vm_object_offset_t, submap_entry_offset,
13555                                             vm_object_size_t, submap_entry_size,
13556                                             int, kr);
13557                                         return kr;
13558                                 }
13559
13560                                 vm_object_reference(sub_object);
13561
13562                                 assertf(VM_MAP_PAGE_ALIGNED(VME_OFFSET(submap_entry), VM_MAP_PAGE_MASK(map)),
13563                                     "submap_entry %p offset 0x%llx\n",
13564                                     submap_entry, VME_OFFSET(submap_entry));
13565                                 submap_entry_offset = VME_OFFSET(submap_entry);
13566                                 submap_entry_size = submap_entry->vme_end - submap_entry->vme_start;
13567
13568                                 DTRACE_VM6(submap_copy_slowly,
13569                                     vm_map_t, cow_sub_map_parent,
13570                                     vm_map_offset_t, vaddr,
13571                                     vm_map_t, map,
13572                                     vm_object_size_t, submap_entry_size,
13573                                     int, submap_entry->wired_count,
13574                                     int, sub_object->copy_strategy);
13575
13576                                 saved_submap_entry = submap_entry;
13577                                 version.main_timestamp = map->timestamp;
13578                                 vm_map_unlock(map); /* Increments timestamp by 1 */
13579                                 submap_entry = VM_MAP_ENTRY_NULL;
13580
13581                                 vm_object_lock(sub_object);
13582                                 kr = vm_object_copy_slowly(sub_object,
13583                                     submap_entry_offset,
13584                                     submap_entry_size,
13585                                     FALSE,
13586                                     &copy_object);
13587                                 copied_slowly = TRUE;
13588                                 /* 4k: account for extra offset in physical page */
13589                                 copied_slowly_phys_offset = submap_entry_offset - vm_object_trunc_page(submap_entry_offset);
13590                                 vm_object_deallocate(sub_object);
13591
13592                                 vm_map_lock(map);
13593
13594                                 if (kr != KERN_SUCCESS &&
13595                                     kr != KERN_MEMORY_RESTART_COPY) {
13596                                         if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13597                                                 vm_map_unlock(cow_sub_map_parent);
13598                                         }
13599                                         if ((*real_map != map)
13600                                             && (*real_map != cow_sub_map_parent)) {
13601                                                 vm_map_unlock(*real_map);
13602                                         }
13603                                         *real_map = map;
13604                                         vm_object_deallocate(copy_object);
13605                                         copy_object = VM_OBJECT_NULL;
13606                                         vm_map_lock_write_to_read(map);
13607                                         DTRACE_VM4(submap_copy_slowly,
13608                                             vm_object_t, sub_object,
13609                                             vm_object_offset_t, submap_entry_offset,
13610                                             vm_object_size_t, submap_entry_size,
13611                                             int, kr);
13612                                         return kr;
13613                                 }
13614
13615                                 if ((kr == KERN_SUCCESS) &&
13616                                     (version.main_timestamp + 1) == map->timestamp) {
13617                                         submap_entry = saved_submap_entry;
13618                                 } else {
13619                                         saved_submap_entry = NULL;
13620                                         old_start -= start_delta;
13621                                         old_end += end_delta;
13622                                         vm_object_deallocate(copy_object);
13623                                         copy_object = VM_OBJECT_NULL;
13624                                         vm_map_lock_write_to_read(map);
13625                                         goto RetrySubMap;
13626                                 }
13627                         } else {
13628                                 /* set up shadow object */
13629                                 copy_object = sub_object;
13630                                 vm_object_lock(sub_object);
13631                                 vm_object_reference_locked(sub_object);
13632                                 sub_object->shadowed = TRUE;
13633                                 vm_object_unlock(sub_object);
13634
13635                                 assert(submap_entry->wired_count == 0);
13636                                 submap_entry->needs_copy = TRUE;
13637
13638                                 prot = submap_entry->protection;
13639                                 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13640                                 prot = prot & ~VM_PROT_WRITE;
13641                                 assert(!pmap_has_prot_policy(map->pmap, submap_entry->translated_allow_execute, prot));
13642
13643                                 if (override_nx(old_map,
13644                                     VME_ALIAS(submap_entry))
13645                                     && prot) {
13646                                         prot |= VM_PROT_EXECUTE;
13647                                 }
13648
13649                                 vm_object_pmap_protect(
13650                                         sub_object,
13651                                         VME_OFFSET(submap_entry),
13652                                         submap_entry->vme_end -
13653                                         submap_entry->vme_start,
13654                                         (submap_entry->is_shared
13655                                         || map->mapped_in_other_pmaps) ?
13656                                         PMAP_NULL : map->pmap,
13657                                         VM_MAP_PAGE_SIZE(map),
13658                                         submap_entry->vme_start,
13659                                         prot);
13660                         }
13661
13662                         /*
13663                          * Adjust the fault offset to the submap entry.
13664                          */
13665                         copy_offset = (local_vaddr -
13666                             submap_entry->vme_start +
13667                             VME_OFFSET(submap_entry));
13668
13669                         /* This works diffently than the   */
13670                         /* normal submap case. We go back  */
13671                         /* to the parent of the cow map and*/
13672                         /* clip out the target portion of  */
13673                         /* the sub_map, substituting the   */
13674                         /* new copy object,                */
13675
13676                         subentry_protection = submap_entry->protection;
13677                         subentry_max_protection = submap_entry->max_protection;
13678                         subentry_no_copy_on_read = submap_entry->vme_no_copy_on_read;
13679                         vm_map_unlock(map);
13680                         submap_entry = NULL; /* not valid after map unlock */
13681
13682                         local_start = old_start;
13683                         local_end = old_end;
13684                         map = cow_sub_map_parent;
13685                         *var_map = cow_sub_map_parent;
13686                         vaddr = cow_parent_vaddr;
13687                         cow_sub_map_parent = NULL;
13688
13689                         if (!vm_map_lookup_entry(map,
13690                             vaddr, &entry)) {
13691                                 if ((cow_sub_map_parent) && (cow_sub_map_parent != map)) {
13692                                         vm_map_unlock(cow_sub_map_parent);
13693                                 }
13694                                 if ((*real_map != map)
13695                                     && (*real_map != cow_sub_map_parent)) {
13696                                         vm_map_unlock(*real_map);
13697                                 }
13698                                 *real_map = map;
13699                                 vm_object_deallocate(
13700                                         copy_object);
13701                                 copy_object = VM_OBJECT_NULL;
13702                                 vm_map_lock_write_to_read(map);
13703                                 DTRACE_VM4(submap_lookup_post_unlock,
13704                                     uint64_t, (uint64_t)entry->vme_start,
13705                                     uint64_t, (uint64_t)entry->vme_end,
13706                                     vm_map_offset_t, vaddr,
13707                                     int, copied_slowly);
13708                                 return KERN_INVALID_ADDRESS;
13709                         }
13710
13711                         /* clip out the portion of space */
13712                         /* mapped by the sub map which   */
13713                         /* corresponds to the underlying */
13714                         /* object */
13715
13716                         /*
13717                          * Clip (and unnest) the smallest nested chunk
13718                          * possible around the faulting address...
13719                          */
13720                         local_start = vaddr & ~(pmap_shared_region_size_min(map->pmap) - 1);
13721                         local_end = local_start + pmap_shared_region_size_min(map->pmap);
13722                         /*
13723                          * ... but don't go beyond the "old_start" to "old_end"
13724                          * range, to avoid spanning over another VM region
13725                          * with a possibly different VM object and/or offset.
13726                          */
13727                         if (local_start < old_start) {
13728                                 local_start = old_start;
13729                         }
13730                         if (local_end > old_end) {
13731                                 local_end = old_end;
13732                         }
13733                         /*
13734                          * Adjust copy_offset to the start of the range.
13735                          */
13736                         copy_offset -= (vaddr - local_start);
13737
13738                         vm_map_clip_start(map, entry, local_start);
13739                         vm_map_clip_end(map, entry, local_end);
13740                         if (entry->is_sub_map) {
13741                                 /* unnesting was done when clipping */
13742                                 assert(!entry->use_pmap);
13743                         }
13744
13745                         /* substitute copy object for */
13746                         /* shared map entry           */
13747                         vm_map_deallocate(VME_SUBMAP(entry));
13748                         assert(!entry->iokit_acct);
13749                         entry->is_sub_map = FALSE;
13750                         entry->use_pmap = TRUE;
13751                         VME_OBJECT_SET(entry, copy_object);
13752
13753                         /* propagate the submap entry's protections */
13754                         if (entry->protection != VM_PROT_READ) {
13755                                 /*
13756                                  * Someone has already altered the top entry's
13757                                  * protections via vm_protect(VM_PROT_COPY).
13758                                  * Respect these new values and ignore the
13759                                  * submap entry's protections.
13760                                  */
13761                         } else {
13762                                 /*
13763                                  * Regular copy-on-write: propagate the submap
13764                                  * entry's protections to the top map entry.
13765                                  */
13766                                 entry->protection |= subentry_protection;
13767                         }
13768                         entry->max_protection |= subentry_max_protection;
13769                         /* propagate no_copy_on_read */
13770                         entry->vme_no_copy_on_read = subentry_no_copy_on_read;
13771
13772                         if ((entry->protection & VM_PROT_WRITE) &&
13773                             (entry->protection & VM_PROT_EXECUTE) &&
13774 #if XNU_TARGET_OS_OSX
13775                             map->pmap != kernel_pmap &&
13776                             (vm_map_cs_enforcement(map)
13777 #if __arm64__
13778                             || !VM_MAP_IS_EXOTIC(map)
13779 #endif /* __arm64__ */
13780                             ) &&
13781 #endif /* XNU_TARGET_OS_OSX */
13782 #if PMAP_CS
13783                             !pmap_cs_exempt(map->pmap) &&
13784 #endif
13785                             !(entry->used_for_jit) &&
13786                             VM_MAP_POLICY_WX_STRIP_X(map)) {
13787                                 DTRACE_VM3(cs_wx,
13788                                     uint64_t, (uint64_t)entry->vme_start,
13789                                     uint64_t, (uint64_t)entry->vme_end,
13790                                     vm_prot_t, entry->protection);
13791                                 printf("CODE SIGNING: %d[%s] %s can't have both write and exec at the same time\n",
13792                                     proc_selfpid(),
13793                                     (current_task()->bsd_info
13794                                     ? proc_name_address(current_task()->bsd_info)
13795                                     : "?"),
13796                                     __FUNCTION__);
13797                                 entry->protection &= ~VM_PROT_EXECUTE;
13798                         }
13799
13800                         if (copied_slowly) {
13801                                 VME_OFFSET_SET(entry, local_start - old_start + copied_slowly_phys_offset);
13802                                 entry->needs_copy = FALSE;
13803                                 entry->is_shared = FALSE;
13804                         } else {
13805                                 VME_OFFSET_SET(entry, copy_offset);
13806                                 assert(entry->wired_count == 0);
13807                                 entry->needs_copy = TRUE;
13808                                 if (entry->inheritance == VM_INHERIT_SHARE) {
13809                                         entry->inheritance = VM_INHERIT_COPY;
13810                                 }
13811                                 if (map != old_map) {
13812                                         entry->is_shared = TRUE;
13813                                 }
13814                         }
13815                         if (entry->inheritance == VM_INHERIT_SHARE) {
13816                                 entry->inheritance = VM_INHERIT_COPY;
13817                         }
13818
13819                         vm_map_lock_write_to_read(map);
13820                 } else {
13821                         if ((cow_sub_map_parent)
13822                             && (cow_sub_map_parent != *real_map)
13823                             && (cow_sub_map_parent != map)) {
13824                                 vm_map_unlock(cow_sub_map_parent);
13825                         }
13826                         entry = submap_entry;
13827                         vaddr = local_vaddr;
13828                 }
13829         }
13830
13831         /*
13832          *      Check whether this task is allowed to have
13833          *      this page.
13834          */
13835
13836         prot = entry->protection;
13837
13838         if (override_nx(old_map, VME_ALIAS(entry)) && prot) {
13839                 /*
13840                  * HACK -- if not a stack, then allow execution
13841                  */
13842                 prot |= VM_PROT_EXECUTE;
13843         }
13844
13845         if (mask_protections) {
13846                 fault_type &= prot;
13847                 if (fault_type == VM_PROT_NONE) {
13848                         goto protection_failure;
13849                 }
13850         }
13851         if (((fault_type & prot) != fault_type)
13852 #if __arm64__
13853             /* prefetch abort in execute-only page */
13854             && !(prot == VM_PROT_EXECUTE && fault_type == (VM_PROT_READ | VM_PROT_EXECUTE))
13855 #endif
13856             ) {
13857 protection_failure:
13858                 if (*real_map != map) {
13859                         vm_map_unlock(*real_map);
13860                 }
13861                 *real_map = map;
13862
13863                 if ((fault_type & VM_PROT_EXECUTE) && prot) {
13864                         log_stack_execution_failure((addr64_t)vaddr, prot);
13865                 }
13866
13867                 DTRACE_VM2(prot_fault, int, 1, (uint64_t *), NULL);
13868                 return KERN_PROTECTION_FAILURE;
13869         }
13870
13871         /*
13872          *      If this page is not pageable, we have to get
13873          *      it for all possible accesses.
13874          */
13875
13876         *wired = (entry->wired_count != 0);
13877         if (*wired) {
13878                 fault_type = prot;
13879         }
13880
13881         /*
13882          *      If the entry was copy-on-write, we either ...
13883          */
13884
13885         if (entry->needs_copy) {
13886                 /*
13887                  *      If we want to write the page, we may as well
13888                  *      handle that now since we've got the map locked.
13889                  *
13890                  *      If we don't need to write the page, we just
13891                  *      demote the permissions allowed.
13892                  */
13893
13894                 if ((fault_type & VM_PROT_WRITE) || *wired || force_copy) {
13895                         /*
13896                          *      Make a new object, and place it in the
13897                          *      object chain.  Note that no new references
13898                          *      have appeared -- one just moved from the
13899                          *      map to the new object.
13900                          */
13901
13902                         if (vm_map_lock_read_to_write(map)) {
13903                                 vm_map_lock_read(map);
13904                                 goto RetryLookup;
13905                         }
13906
13907                         if (VME_OBJECT(entry)->shadowed == FALSE) {
13908                                 vm_object_lock(VME_OBJECT(entry));
13909                                 VME_OBJECT(entry)->shadowed = TRUE;
13910                                 vm_object_unlock(VME_OBJECT(entry));
13911                         }
13912                         VME_OBJECT_SHADOW(entry,
13913                             (vm_map_size_t) (entry->vme_end -
13914                             entry->vme_start));
13915                         entry->needs_copy = FALSE;
13916
13917                         vm_map_lock_write_to_read(map);
13918                 }
13919                 if ((fault_type & VM_PROT_WRITE) == 0 && *wired == 0) {
13920                         /*
13921                          *      We're attempting to read a copy-on-write
13922                          *      page -- don't allow writes.
13923                          */
13924
13925                         prot &= (~VM_PROT_WRITE);
13926                 }
13927         }
13928
13929         /*
13930          *      Create an object if necessary.
13931          */
13932         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
13933                 if (vm_map_lock_read_to_write(map)) {
13934                         vm_map_lock_read(map);
13935                         goto RetryLookup;
13936                 }
13937
13938                 VME_OBJECT_SET(entry,
13939                     vm_object_allocate(
13940                             (vm_map_size_t)(entry->vme_end -
13941                             entry->vme_start)));
13942                 VME_OFFSET_SET(entry, 0);
13943                 assert(entry->use_pmap);
13944                 vm_map_lock_write_to_read(map);
13945         }
13946
13947         /*
13948          *      Return the object/offset from this entry.  If the entry
13949          *      was copy-on-write or empty, it has been fixed up.  Also
13950          *      return the protection.
13951          */
13952
13953         *offset = (vaddr - entry->vme_start) + VME_OFFSET(entry);
13954         *object = VME_OBJECT(entry);
13955         *out_prot = prot;
13956         KDBG_FILTERED(MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_MAP_LOOKUP_OBJECT), VM_KERNEL_UNSLIDE_OR_PERM(*object), (unsigned long) VME_ALIAS(entry), 0, 0);
13957
13958         if (fault_info) {
13959                 fault_info->interruptible = THREAD_UNINT; /* for now... */
13960                 /* ... the caller will change "interruptible" if needed */
13961                 fault_info->cluster_size = 0;
13962                 fault_info->user_tag = VME_ALIAS(entry);
13963                 fault_info->pmap_options = 0;
13964                 if (entry->iokit_acct ||
13965                     (!entry->is_sub_map && !entry->use_pmap)) {
13966                         fault_info->pmap_options |= PMAP_OPTIONS_ALT_ACCT;
13967                 }
13968                 fault_info->behavior = entry->behavior;
13969                 fault_info->lo_offset = VME_OFFSET(entry);
13970                 fault_info->hi_offset =
13971                     (entry->vme_end - entry->vme_start) + VME_OFFSET(entry);
13972                 fault_info->no_cache  = entry->no_cache;
13973                 fault_info->stealth = FALSE;
13974                 fault_info->io_sync = FALSE;
13975                 if (entry->used_for_jit ||
13976 #if PMAP_CS
13977                     pmap_cs_exempt(map->pmap) ||
13978 #endif
13979                     entry->vme_resilient_codesign) {
13980                         fault_info->cs_bypass = TRUE;
13981                 } else {
13982                         fault_info->cs_bypass = FALSE;
13983                 }
13984                 fault_info->pmap_cs_associated = FALSE;
13985 #if CONFIG_PMAP_CS
13986                 if (entry->pmap_cs_associated) {
13987                         /*
13988                          * The pmap layer will validate this page
13989                          * before allowing it to be executed from.
13990                          */
13991                         fault_info->pmap_cs_associated = TRUE;
13992                 }
13993 #endif /* CONFIG_PMAP_CS */
13994                 fault_info->mark_zf_absent = FALSE;
13995                 fault_info->batch_pmap_op = FALSE;
13996                 fault_info->resilient_media = entry->vme_resilient_media;
13997                 fault_info->no_copy_on_read = entry->vme_no_copy_on_read;
13998                 if (entry->translated_allow_execute) {
13999                         fault_info->pmap_options |= PMAP_OPTIONS_TRANSLATED_ALLOW_EXECUTE;
14000                 }
14001         }
14002
14003         /*
14004          *      Lock the object to prevent it from disappearing
14005          */
14006         if (object_lock_type == OBJECT_LOCK_EXCLUSIVE) {
14007                 if (contended == NULL) {
14008                         vm_object_lock(*object);
14009                 } else {
14010                         *contended = vm_object_lock_check_contended(*object);
14011                 }
14012         } else {
14013                 vm_object_lock_shared(*object);
14014         }
14015
14016         /*
14017          *      Save the version number
14018          */
14019
14020         out_version->main_timestamp = map->timestamp;
14021
14022         return KERN_SUCCESS;
14023 }
14024
14025
14026 /*
14027  *      vm_map_verify:
14028  *
14029  *      Verifies that the map in question has not changed
14030  *      since the given version. The map has to be locked
14031  *      ("shared" mode is fine) before calling this function
14032  *      and it will be returned locked too.
14033  */
14034 boolean_t
14035 vm_map_verify(
14036         vm_map_t                map,
14037         vm_map_version_t        *version)       /* REF */
14038 {
14039         boolean_t       result;
14040
14041         vm_map_lock_assert_held(map);
14042         result = (map->timestamp == version->main_timestamp);
14043
14044         return result;
14045 }
14046
14047 /*
14048  *      TEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARYTEMPORARY
14049  *      Goes away after regular vm_region_recurse function migrates to
14050  *      64 bits
14051  *      vm_region_recurse: A form of vm_region which follows the
14052  *      submaps in a target map
14053  *
14054  */
14055
14056 kern_return_t
14057 vm_map_region_recurse_64(
14058         vm_map_t                 map,
14059         vm_map_offset_t *address,               /* IN/OUT */
14060         vm_map_size_t           *size,                  /* OUT */
14061         natural_t               *nesting_depth, /* IN/OUT */
14062         vm_region_submap_info_64_t      submap_info,    /* IN/OUT */
14063         mach_msg_type_number_t  *count) /* IN/OUT */
14064 {
14065         mach_msg_type_number_t  original_count;
14066         vm_region_extended_info_data_t  extended;
14067         vm_map_entry_t                  tmp_entry;
14068         vm_map_offset_t                 user_address;
14069         unsigned int                    user_max_depth;
14070
14071         /*
14072          * "curr_entry" is the VM map entry preceding or including the
14073          * address we're looking for.
14074          * "curr_map" is the map or sub-map containing "curr_entry".
14075          * "curr_address" is the equivalent of the top map's "user_address"
14076          * in the current map.
14077          * "curr_offset" is the cumulated offset of "curr_map" in the
14078          * target task's address space.
14079          * "curr_depth" is the depth of "curr_map" in the chain of
14080          * sub-maps.
14081          *
14082          * "curr_max_below" and "curr_max_above" limit the range (around
14083          * "curr_address") we should take into account in the current (sub)map.
14084          * They limit the range to what's visible through the map entries
14085          * we've traversed from the top map to the current map.
14086          *
14087          */
14088         vm_map_entry_t                  curr_entry;
14089         vm_map_address_t                curr_address;
14090         vm_map_offset_t                 curr_offset;
14091         vm_map_t                        curr_map;
14092         unsigned int                    curr_depth;
14093         vm_map_offset_t                 curr_max_below, curr_max_above;
14094         vm_map_offset_t                 curr_skip;
14095
14096         /*
14097          * "next_" is the same as "curr_" but for the VM region immediately
14098          * after the address we're looking for.  We need to keep track of this
14099          * too because we want to return info about that region if the
14100          * address we're looking for is not mapped.
14101          */
14102         vm_map_entry_t                  next_entry;
14103         vm_map_offset_t                 next_offset;
14104         vm_map_offset_t                 next_address;
14105         vm_map_t                        next_map;
14106         unsigned int                    next_depth;
14107         vm_map_offset_t                 next_max_below, next_max_above;
14108         vm_map_offset_t                 next_skip;
14109
14110         boolean_t                       look_for_pages;
14111         vm_region_submap_short_info_64_t short_info;
14112         boolean_t                       do_region_footprint;
14113         int                             effective_page_size, effective_page_shift;
14114
14115         if (map == VM_MAP_NULL) {
14116                 /* no address space to work on */
14117                 return KERN_INVALID_ARGUMENT;
14118         }
14119
14120         effective_page_shift = vm_self_region_page_shift(map);
14121         effective_page_size = (1 << effective_page_shift);
14122
14123         if (*count < VM_REGION_SUBMAP_SHORT_INFO_COUNT_64) {
14124                 /*
14125                  * "info" structure is not big enough and
14126                  * would overflow
14127                  */
14128                 return KERN_INVALID_ARGUMENT;
14129         }
14130
14131         do_region_footprint = task_self_region_footprint();
14132         original_count = *count;
14133
14134         if (original_count < VM_REGION_SUBMAP_INFO_V0_COUNT_64) {
14135                 *count = VM_REGION_SUBMAP_SHORT_INFO_COUNT_64;
14136                 look_for_pages = FALSE;
14137                 short_info = (vm_region_submap_short_info_64_t) submap_info;
14138                 submap_info = NULL;
14139         } else {
14140                 look_for_pages = TRUE;
14141                 *count = VM_REGION_SUBMAP_INFO_V0_COUNT_64;
14142                 short_info = NULL;
14143
14144                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14145                         *count = VM_REGION_SUBMAP_INFO_V1_COUNT_64;
14146                 }
14147                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14148                         *count = VM_REGION_SUBMAP_INFO_V2_COUNT_64;
14149                 }
14150         }
14151
14152         user_address = *address;
14153         user_max_depth = *nesting_depth;
14154
14155         if (not_in_kdp) {
14156                 vm_map_lock_read(map);
14157         }
14158
14159 recurse_again:
14160         curr_entry = NULL;
14161         curr_map = map;
14162         curr_address = user_address;
14163         curr_offset = 0;
14164         curr_skip = 0;
14165         curr_depth = 0;
14166         curr_max_above = ((vm_map_offset_t) -1) - curr_address;
14167         curr_max_below = curr_address;
14168
14169         next_entry = NULL;
14170         next_map = NULL;
14171         next_address = 0;
14172         next_offset = 0;
14173         next_skip = 0;
14174         next_depth = 0;
14175         next_max_above = (vm_map_offset_t) -1;
14176         next_max_below = (vm_map_offset_t) -1;
14177
14178         for (;;) {
14179                 if (vm_map_lookup_entry(curr_map,
14180                     curr_address,
14181                     &tmp_entry)) {
14182                         /* tmp_entry contains the address we're looking for */
14183                         curr_entry = tmp_entry;
14184                 } else {
14185                         vm_map_offset_t skip;
14186                         /*
14187                          * The address is not mapped.  "tmp_entry" is the
14188                          * map entry preceding the address.  We want the next
14189                          * one, if it exists.
14190                          */
14191                         curr_entry = tmp_entry->vme_next;
14192
14193                         if (curr_entry == vm_map_to_entry(curr_map) ||
14194                             (curr_entry->vme_start >=
14195                             curr_address + curr_max_above)) {
14196                                 /* no next entry at this level: stop looking */
14197                                 if (not_in_kdp) {
14198                                         vm_map_unlock_read(curr_map);
14199                                 }
14200                                 curr_entry = NULL;
14201                                 curr_map = NULL;
14202                                 curr_skip = 0;
14203                                 curr_offset = 0;
14204                                 curr_depth = 0;
14205                                 curr_max_above = 0;
14206                                 curr_max_below = 0;
14207                                 break;
14208                         }
14209
14210                         /* adjust current address and offset */
14211                         skip = curr_entry->vme_start - curr_address;
14212                         curr_address = curr_entry->vme_start;
14213                         curr_skip += skip;
14214                         curr_offset += skip;
14215                         curr_max_above -= skip;
14216                         curr_max_below = 0;
14217                 }
14218
14219                 /*
14220                  * Is the next entry at this level closer to the address (or
14221                  * deeper in the submap chain) than the one we had
14222                  * so far ?
14223                  */
14224                 tmp_entry = curr_entry->vme_next;
14225                 if (tmp_entry == vm_map_to_entry(curr_map)) {
14226                         /* no next entry at this level */
14227                 } else if (tmp_entry->vme_start >=
14228                     curr_address + curr_max_above) {
14229                         /*
14230                          * tmp_entry is beyond the scope of what we mapped of
14231                          * this submap in the upper level: ignore it.
14232                          */
14233                 } else if ((next_entry == NULL) ||
14234                     (tmp_entry->vme_start + curr_offset <=
14235                     next_entry->vme_start + next_offset)) {
14236                         /*
14237                          * We didn't have a "next_entry" or this one is
14238                          * closer to the address we're looking for:
14239                          * use this "tmp_entry" as the new "next_entry".
14240                          */
14241                         if (next_entry != NULL) {
14242                                 /* unlock the last "next_map" */
14243                                 if (next_map != curr_map && not_in_kdp) {
14244                                         vm_map_unlock_read(next_map);
14245                                 }
14246                         }
14247                         next_entry = tmp_entry;
14248                         next_map = curr_map;
14249                         next_depth = curr_depth;
14250                         next_address = next_entry->vme_start;
14251                         next_skip = curr_skip;
14252                         next_skip += (next_address - curr_address);
14253                         next_offset = curr_offset;
14254                         next_offset += (next_address - curr_address);
14255                         next_max_above = MIN(next_max_above, curr_max_above);
14256                         next_max_above = MIN(next_max_above,
14257                             next_entry->vme_end - next_address);
14258                         next_max_below = MIN(next_max_below, curr_max_below);
14259                         next_max_below = MIN(next_max_below,
14260                             next_address - next_entry->vme_start);
14261                 }
14262
14263                 /*
14264                  * "curr_max_{above,below}" allow us to keep track of the
14265                  * portion of the submap that is actually mapped at this level:
14266                  * the rest of that submap is irrelevant to us, since it's not
14267                  * mapped here.
14268                  * The relevant portion of the map starts at
14269                  * "VME_OFFSET(curr_entry)" up to the size of "curr_entry".
14270                  */
14271                 curr_max_above = MIN(curr_max_above,
14272                     curr_entry->vme_end - curr_address);
14273                 curr_max_below = MIN(curr_max_below,
14274                     curr_address - curr_entry->vme_start);
14275
14276                 if (!curr_entry->is_sub_map ||
14277                     curr_depth >= user_max_depth) {
14278                         /*
14279                          * We hit a leaf map or we reached the maximum depth
14280                          * we could, so stop looking.  Keep the current map
14281                          * locked.
14282                          */
14283                         break;
14284                 }
14285
14286                 /*
14287                  * Get down to the next submap level.
14288                  */
14289
14290                 /*
14291                  * Lock the next level and unlock the current level,
14292                  * unless we need to keep it locked to access the "next_entry"
14293                  * later.
14294                  */
14295                 if (not_in_kdp) {
14296                         vm_map_lock_read(VME_SUBMAP(curr_entry));
14297                 }
14298                 if (curr_map == next_map) {
14299                         /* keep "next_map" locked in case we need it */
14300                 } else {
14301                         /* release this map */
14302                         if (not_in_kdp) {
14303                                 vm_map_unlock_read(curr_map);
14304                         }
14305                 }
14306
14307                 /*
14308                  * Adjust the offset.  "curr_entry" maps the submap
14309                  * at relative address "curr_entry->vme_start" in the
14310                  * curr_map but skips the first "VME_OFFSET(curr_entry)"
14311                  * bytes of the submap.
14312                  * "curr_offset" always represents the offset of a virtual
14313                  * address in the curr_map relative to the absolute address
14314                  * space (i.e. the top-level VM map).
14315                  */
14316                 curr_offset +=
14317                     (VME_OFFSET(curr_entry) - curr_entry->vme_start);
14318                 curr_address = user_address + curr_offset;
14319                 /* switch to the submap */
14320                 curr_map = VME_SUBMAP(curr_entry);
14321                 curr_depth++;
14322                 curr_entry = NULL;
14323         }
14324
14325 // LP64todo: all the current tools are 32bit, obviously never worked for 64b
14326 // so probably should be a real 32b ID vs. ptr.
14327 // Current users just check for equality
14328
14329         if (curr_entry == NULL) {
14330                 /* no VM region contains the address... */
14331
14332                 if (do_region_footprint && /* we want footprint numbers */
14333                     next_entry == NULL && /* & there are no more regions */
14334                     /* & we haven't already provided our fake region: */
14335                     user_address <= vm_map_last_entry(map)->vme_end) {
14336                         ledger_amount_t ledger_resident, ledger_compressed;
14337
14338                         /*
14339                          * Add a fake memory region to account for
14340                          * purgeable and/or ledger-tagged memory that
14341                          * counts towards this task's memory footprint,
14342                          * i.e. the resident/compressed pages of non-volatile
14343                          * objects owned by that task.
14344                          */
14345                         task_ledgers_footprint(map->pmap->ledger,
14346                             &ledger_resident,
14347                             &ledger_compressed);
14348                         if (ledger_resident + ledger_compressed == 0) {
14349                                 /* no purgeable memory usage to report */
14350                                 return KERN_INVALID_ADDRESS;
14351                         }
14352                         /* fake region to show nonvolatile footprint */
14353                         if (look_for_pages) {
14354                                 submap_info->protection = VM_PROT_DEFAULT;
14355                                 submap_info->max_protection = VM_PROT_DEFAULT;
14356                                 submap_info->inheritance = VM_INHERIT_DEFAULT;
14357                                 submap_info->offset = 0;
14358                                 submap_info->user_tag = -1;
14359                                 submap_info->pages_resident = (unsigned int) (ledger_resident / effective_page_size);
14360                                 submap_info->pages_shared_now_private = 0;
14361                                 submap_info->pages_swapped_out = (unsigned int) (ledger_compressed / effective_page_size);
14362                                 submap_info->pages_dirtied = submap_info->pages_resident;
14363                                 submap_info->ref_count = 1;
14364                                 submap_info->shadow_depth = 0;
14365                                 submap_info->external_pager = 0;
14366                                 submap_info->share_mode = SM_PRIVATE;
14367                                 submap_info->is_submap = 0;
14368                                 submap_info->behavior = VM_BEHAVIOR_DEFAULT;
14369                                 submap_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14370                                 submap_info->user_wired_count = 0;
14371                                 submap_info->pages_reusable = 0;
14372                         } else {
14373                                 short_info->user_tag = -1;
14374                                 short_info->offset = 0;
14375                                 short_info->protection = VM_PROT_DEFAULT;
14376                                 short_info->inheritance = VM_INHERIT_DEFAULT;
14377                                 short_info->max_protection = VM_PROT_DEFAULT;
14378                                 short_info->behavior = VM_BEHAVIOR_DEFAULT;
14379                                 short_info->user_wired_count = 0;
14380                                 short_info->is_submap = 0;
14381                                 short_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
14382                                 short_info->external_pager = 0;
14383                                 short_info->shadow_depth = 0;
14384                                 short_info->share_mode = SM_PRIVATE;
14385                                 short_info->ref_count = 1;
14386                         }
14387                         *nesting_depth = 0;
14388                         *size = (vm_map_size_t) (ledger_resident + ledger_compressed);
14389 //                      *address = user_address;
14390                         *address = vm_map_last_entry(map)->vme_end;
14391                         return KERN_SUCCESS;
14392                 }
14393
14394                 if (next_entry == NULL) {
14395                         /* ... and no VM region follows it either */
14396                         return KERN_INVALID_ADDRESS;
14397                 }
14398                 /* ... gather info about the next VM region */
14399                 curr_entry = next_entry;
14400                 curr_map = next_map;    /* still locked ... */
14401                 curr_address = next_address;
14402                 curr_skip = next_skip;
14403                 curr_offset = next_offset;
14404                 curr_depth = next_depth;
14405                 curr_max_above = next_max_above;
14406                 curr_max_below = next_max_below;
14407         } else {
14408                 /* we won't need "next_entry" after all */
14409                 if (next_entry != NULL) {
14410                         /* release "next_map" */
14411                         if (next_map != curr_map && not_in_kdp) {
14412                                 vm_map_unlock_read(next_map);
14413                         }
14414                 }
14415         }
14416         next_entry = NULL;
14417         next_map = NULL;
14418         next_offset = 0;
14419         next_skip = 0;
14420         next_depth = 0;
14421         next_max_below = -1;
14422         next_max_above = -1;
14423
14424         if (curr_entry->is_sub_map &&
14425             curr_depth < user_max_depth) {
14426                 /*
14427                  * We're not as deep as we could be:  we must have
14428                  * gone back up after not finding anything mapped
14429                  * below the original top-level map entry's.
14430                  * Let's move "curr_address" forward and recurse again.
14431                  */
14432                 user_address = curr_address;
14433                 goto recurse_again;
14434         }
14435
14436         *nesting_depth = curr_depth;
14437         *size = curr_max_above + curr_max_below;
14438         *address = user_address + curr_skip - curr_max_below;
14439
14440         if (look_for_pages) {
14441                 submap_info->user_tag = VME_ALIAS(curr_entry);
14442                 submap_info->offset = VME_OFFSET(curr_entry);
14443                 submap_info->protection = curr_entry->protection;
14444                 submap_info->inheritance = curr_entry->inheritance;
14445                 submap_info->max_protection = curr_entry->max_protection;
14446                 submap_info->behavior = curr_entry->behavior;
14447                 submap_info->user_wired_count = curr_entry->user_wired_count;
14448                 submap_info->is_submap = curr_entry->is_sub_map;
14449                 submap_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14450         } else {
14451                 short_info->user_tag = VME_ALIAS(curr_entry);
14452                 short_info->offset = VME_OFFSET(curr_entry);
14453                 short_info->protection = curr_entry->protection;
14454                 short_info->inheritance = curr_entry->inheritance;
14455                 short_info->max_protection = curr_entry->max_protection;
14456                 short_info->behavior = curr_entry->behavior;
14457                 short_info->user_wired_count = curr_entry->user_wired_count;
14458                 short_info->is_submap = curr_entry->is_sub_map;
14459                 short_info->object_id = VM_OBJECT_ID(VME_OBJECT(curr_entry));
14460         }
14461
14462         extended.pages_resident = 0;
14463         extended.pages_swapped_out = 0;
14464         extended.pages_shared_now_private = 0;
14465         extended.pages_dirtied = 0;
14466         extended.pages_reusable = 0;
14467         extended.external_pager = 0;
14468         extended.shadow_depth = 0;
14469         extended.share_mode = SM_EMPTY;
14470         extended.ref_count = 0;
14471
14472         if (not_in_kdp) {
14473                 if (!curr_entry->is_sub_map) {
14474                         vm_map_offset_t range_start, range_end;
14475                         range_start = MAX((curr_address - curr_max_below),
14476                             curr_entry->vme_start);
14477                         range_end = MIN((curr_address + curr_max_above),
14478                             curr_entry->vme_end);
14479                         vm_map_region_walk(curr_map,
14480                             range_start,
14481                             curr_entry,
14482                             (VME_OFFSET(curr_entry) +
14483                             (range_start -
14484                             curr_entry->vme_start)),
14485                             range_end - range_start,
14486                             &extended,
14487                             look_for_pages, VM_REGION_EXTENDED_INFO_COUNT);
14488                         if (extended.external_pager &&
14489                             extended.ref_count == 2 &&
14490                             extended.share_mode == SM_SHARED) {
14491                                 extended.share_mode = SM_PRIVATE;
14492                         }
14493                 } else {
14494                         if (curr_entry->use_pmap) {
14495                                 extended.share_mode = SM_TRUESHARED;
14496                         } else {
14497                                 extended.share_mode = SM_PRIVATE;
14498                         }
14499                         extended.ref_count = os_ref_get_count(&VME_SUBMAP(curr_entry)->map_refcnt);
14500                 }
14501         }
14502
14503         if (look_for_pages) {
14504                 submap_info->pages_resident = extended.pages_resident;
14505                 submap_info->pages_swapped_out = extended.pages_swapped_out;
14506                 submap_info->pages_shared_now_private =
14507                     extended.pages_shared_now_private;
14508                 submap_info->pages_dirtied = extended.pages_dirtied;
14509                 submap_info->external_pager = extended.external_pager;
14510                 submap_info->shadow_depth = extended.shadow_depth;
14511                 submap_info->share_mode = extended.share_mode;
14512                 submap_info->ref_count = extended.ref_count;
14513
14514                 if (original_count >= VM_REGION_SUBMAP_INFO_V1_COUNT_64) {
14515                         submap_info->pages_reusable = extended.pages_reusable;
14516                 }
14517                 if (original_count >= VM_REGION_SUBMAP_INFO_V2_COUNT_64) {
14518                         submap_info->object_id_full = (vm_object_id_t) (VME_OBJECT(curr_entry) != NULL) ? VM_KERNEL_ADDRPERM(VME_OBJECT(curr_entry)) : 0ULL;
14519                 }
14520         } else {
14521                 short_info->external_pager = extended.external_pager;
14522                 short_info->shadow_depth = extended.shadow_depth;
14523                 short_info->share_mode = extended.share_mode;
14524                 short_info->ref_count = extended.ref_count;
14525         }
14526
14527         if (not_in_kdp) {
14528                 vm_map_unlock_read(curr_map);
14529         }
14530
14531         return KERN_SUCCESS;
14532 }
14533
14534 /*
14535  *      vm_region:
14536  *
14537  *      User call to obtain information about a region in
14538  *      a task's address map. Currently, only one flavor is
14539  *      supported.
14540  *
14541  *      XXX The reserved and behavior fields cannot be filled
14542  *          in until the vm merge from the IK is completed, and
14543  *          vm_reserve is implemented.
14544  */
14545
14546 kern_return_t
14547 vm_map_region(
14548         vm_map_t                 map,
14549         vm_map_offset_t *address,               /* IN/OUT */
14550         vm_map_size_t           *size,                  /* OUT */
14551         vm_region_flavor_t       flavor,                /* IN */
14552         vm_region_info_t         info,                  /* OUT */
14553         mach_msg_type_number_t  *count, /* IN/OUT */
14554         mach_port_t             *object_name)           /* OUT */
14555 {
14556         vm_map_entry_t          tmp_entry;
14557         vm_map_entry_t          entry;
14558         vm_map_offset_t         start;
14559
14560         if (map == VM_MAP_NULL) {
14561                 return KERN_INVALID_ARGUMENT;
14562         }
14563
14564         switch (flavor) {
14565         case VM_REGION_BASIC_INFO:
14566                 /* legacy for old 32-bit objects info */
14567         {
14568                 vm_region_basic_info_t  basic;
14569
14570                 if (*count < VM_REGION_BASIC_INFO_COUNT) {
14571                         return KERN_INVALID_ARGUMENT;
14572                 }
14573
14574                 basic = (vm_region_basic_info_t) info;
14575                 *count = VM_REGION_BASIC_INFO_COUNT;
14576
14577                 vm_map_lock_read(map);
14578
14579                 start = *address;
14580                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14581                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14582                                 vm_map_unlock_read(map);
14583                                 return KERN_INVALID_ADDRESS;
14584                         }
14585                 } else {
14586                         entry = tmp_entry;
14587                 }
14588
14589                 start = entry->vme_start;
14590
14591                 basic->offset = (uint32_t)VME_OFFSET(entry);
14592                 basic->protection = entry->protection;
14593                 basic->inheritance = entry->inheritance;
14594                 basic->max_protection = entry->max_protection;
14595                 basic->behavior = entry->behavior;
14596                 basic->user_wired_count = entry->user_wired_count;
14597                 basic->reserved = entry->is_sub_map;
14598                 *address = start;
14599                 *size = (entry->vme_end - start);
14600
14601                 if (object_name) {
14602                         *object_name = IP_NULL;
14603                 }
14604                 if (entry->is_sub_map) {
14605                         basic->shared = FALSE;
14606                 } else {
14607                         basic->shared = entry->is_shared;
14608                 }
14609
14610                 vm_map_unlock_read(map);
14611                 return KERN_SUCCESS;
14612         }
14613
14614         case VM_REGION_BASIC_INFO_64:
14615         {
14616                 vm_region_basic_info_64_t       basic;
14617
14618                 if (*count < VM_REGION_BASIC_INFO_COUNT_64) {
14619                         return KERN_INVALID_ARGUMENT;
14620                 }
14621
14622                 basic = (vm_region_basic_info_64_t) info;
14623                 *count = VM_REGION_BASIC_INFO_COUNT_64;
14624
14625                 vm_map_lock_read(map);
14626
14627                 start = *address;
14628                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14629                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14630                                 vm_map_unlock_read(map);
14631                                 return KERN_INVALID_ADDRESS;
14632                         }
14633                 } else {
14634                         entry = tmp_entry;
14635                 }
14636
14637                 start = entry->vme_start;
14638
14639                 basic->offset = VME_OFFSET(entry);
14640                 basic->protection = entry->protection;
14641                 basic->inheritance = entry->inheritance;
14642                 basic->max_protection = entry->max_protection;
14643                 basic->behavior = entry->behavior;
14644                 basic->user_wired_count = entry->user_wired_count;
14645                 basic->reserved = entry->is_sub_map;
14646                 *address = start;
14647                 *size = (entry->vme_end - start);
14648
14649                 if (object_name) {
14650                         *object_name = IP_NULL;
14651                 }
14652                 if (entry->is_sub_map) {
14653                         basic->shared = FALSE;
14654                 } else {
14655                         basic->shared = entry->is_shared;
14656                 }
14657
14658                 vm_map_unlock_read(map);
14659                 return KERN_SUCCESS;
14660         }
14661         case VM_REGION_EXTENDED_INFO:
14662                 if (*count < VM_REGION_EXTENDED_INFO_COUNT) {
14663                         return KERN_INVALID_ARGUMENT;
14664                 }
14665                 OS_FALLTHROUGH;
14666         case VM_REGION_EXTENDED_INFO__legacy:
14667                 if (*count < VM_REGION_EXTENDED_INFO_COUNT__legacy) {
14668                         return KERN_INVALID_ARGUMENT;
14669                 }
14670
14671                 {
14672                         vm_region_extended_info_t       extended;
14673                         mach_msg_type_number_t original_count;
14674                         int effective_page_size, effective_page_shift;
14675
14676                         extended = (vm_region_extended_info_t) info;
14677
14678                         effective_page_shift = vm_self_region_page_shift(map);
14679                         effective_page_size = (1 << effective_page_shift);
14680
14681                         vm_map_lock_read(map);
14682
14683                         start = *address;
14684                         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14685                                 if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14686                                         vm_map_unlock_read(map);
14687                                         return KERN_INVALID_ADDRESS;
14688                                 }
14689                         } else {
14690                                 entry = tmp_entry;
14691                         }
14692                         start = entry->vme_start;
14693
14694                         extended->protection = entry->protection;
14695                         extended->user_tag = VME_ALIAS(entry);
14696                         extended->pages_resident = 0;
14697                         extended->pages_swapped_out = 0;
14698                         extended->pages_shared_now_private = 0;
14699                         extended->pages_dirtied = 0;
14700                         extended->external_pager = 0;
14701                         extended->shadow_depth = 0;
14702
14703                         original_count = *count;
14704                         if (flavor == VM_REGION_EXTENDED_INFO__legacy) {
14705                                 *count = VM_REGION_EXTENDED_INFO_COUNT__legacy;
14706                         } else {
14707                                 extended->pages_reusable = 0;
14708                                 *count = VM_REGION_EXTENDED_INFO_COUNT;
14709                         }
14710
14711                         vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, extended, TRUE, *count);
14712
14713                         if (extended->external_pager && extended->ref_count == 2 && extended->share_mode == SM_SHARED) {
14714                                 extended->share_mode = SM_PRIVATE;
14715                         }
14716
14717                         if (object_name) {
14718                                 *object_name = IP_NULL;
14719                         }
14720                         *address = start;
14721                         *size = (entry->vme_end - start);
14722
14723                         vm_map_unlock_read(map);
14724                         return KERN_SUCCESS;
14725                 }
14726         case VM_REGION_TOP_INFO:
14727         {
14728                 vm_region_top_info_t    top;
14729
14730                 if (*count < VM_REGION_TOP_INFO_COUNT) {
14731                         return KERN_INVALID_ARGUMENT;
14732                 }
14733
14734                 top = (vm_region_top_info_t) info;
14735                 *count = VM_REGION_TOP_INFO_COUNT;
14736
14737                 vm_map_lock_read(map);
14738
14739                 start = *address;
14740                 if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
14741                         if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
14742                                 vm_map_unlock_read(map);
14743                                 return KERN_INVALID_ADDRESS;
14744                         }
14745                 } else {
14746                         entry = tmp_entry;
14747                 }
14748                 start = entry->vme_start;
14749
14750                 top->private_pages_resident = 0;
14751                 top->shared_pages_resident = 0;
14752
14753                 vm_map_region_top_walk(entry, top);
14754
14755                 if (object_name) {
14756                         *object_name = IP_NULL;
14757                 }
14758                 *address = start;
14759                 *size = (entry->vme_end - start);
14760
14761                 vm_map_unlock_read(map);
14762                 return KERN_SUCCESS;
14763         }
14764         default:
14765                 return KERN_INVALID_ARGUMENT;
14766         }
14767 }
14768
14769 #define OBJ_RESIDENT_COUNT(obj, entry_size)                             \
14770         MIN((entry_size),                                               \
14771             ((obj)->all_reusable ?                                      \
14772              (obj)->wired_page_count :                                  \
14773              (obj)->resident_page_count - (obj)->reusable_page_count))
14774
14775 void
14776 vm_map_region_top_walk(
14777         vm_map_entry_t             entry,
14778         vm_region_top_info_t       top)
14779 {
14780         if (VME_OBJECT(entry) == 0 || entry->is_sub_map) {
14781                 top->share_mode = SM_EMPTY;
14782                 top->ref_count = 0;
14783                 top->obj_id = 0;
14784                 return;
14785         }
14786
14787         {
14788                 struct  vm_object *obj, *tmp_obj;
14789                 int             ref_count;
14790                 uint32_t        entry_size;
14791
14792                 entry_size = (uint32_t) ((entry->vme_end - entry->vme_start) / PAGE_SIZE_64);
14793
14794                 obj = VME_OBJECT(entry);
14795
14796                 vm_object_lock(obj);
14797
14798                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14799                         ref_count--;
14800                 }
14801
14802                 assert(obj->reusable_page_count <= obj->resident_page_count);
14803                 if (obj->shadow) {
14804                         if (ref_count == 1) {
14805                                 top->private_pages_resident =
14806                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14807                         } else {
14808                                 top->shared_pages_resident =
14809                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14810                         }
14811                         top->ref_count  = ref_count;
14812                         top->share_mode = SM_COW;
14813
14814                         while ((tmp_obj = obj->shadow)) {
14815                                 vm_object_lock(tmp_obj);
14816                                 vm_object_unlock(obj);
14817                                 obj = tmp_obj;
14818
14819                                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14820                                         ref_count--;
14821                                 }
14822
14823                                 assert(obj->reusable_page_count <= obj->resident_page_count);
14824                                 top->shared_pages_resident +=
14825                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14826                                 top->ref_count += ref_count - 1;
14827                         }
14828                 } else {
14829                         if (entry->superpage_size) {
14830                                 top->share_mode = SM_LARGE_PAGE;
14831                                 top->shared_pages_resident = 0;
14832                                 top->private_pages_resident = entry_size;
14833                         } else if (entry->needs_copy) {
14834                                 top->share_mode = SM_COW;
14835                                 top->shared_pages_resident =
14836                                     OBJ_RESIDENT_COUNT(obj, entry_size);
14837                         } else {
14838                                 if (ref_count == 1 ||
14839                                     (ref_count == 2 && obj->named)) {
14840                                         top->share_mode = SM_PRIVATE;
14841                                         top->private_pages_resident =
14842                                             OBJ_RESIDENT_COUNT(obj,
14843                                             entry_size);
14844                                 } else {
14845                                         top->share_mode = SM_SHARED;
14846                                         top->shared_pages_resident =
14847                                             OBJ_RESIDENT_COUNT(obj,
14848                                             entry_size);
14849                                 }
14850                         }
14851                         top->ref_count = ref_count;
14852                 }
14853                 /* XXX K64: obj_id will be truncated */
14854                 top->obj_id = (unsigned int) (uintptr_t)VM_KERNEL_ADDRPERM(obj);
14855
14856                 vm_object_unlock(obj);
14857         }
14858 }
14859
14860 void
14861 vm_map_region_walk(
14862         vm_map_t                        map,
14863         vm_map_offset_t                 va,
14864         vm_map_entry_t                  entry,
14865         vm_object_offset_t              offset,
14866         vm_object_size_t                range,
14867         vm_region_extended_info_t       extended,
14868         boolean_t                       look_for_pages,
14869         mach_msg_type_number_t count)
14870 {
14871         struct vm_object *obj, *tmp_obj;
14872         vm_map_offset_t       last_offset;
14873         int               i;
14874         int               ref_count;
14875         struct vm_object        *shadow_object;
14876         unsigned short          shadow_depth;
14877         boolean_t         do_region_footprint;
14878         int                     effective_page_size, effective_page_shift;
14879         vm_map_offset_t         effective_page_mask;
14880
14881         do_region_footprint = task_self_region_footprint();
14882
14883         if ((VME_OBJECT(entry) == 0) ||
14884             (entry->is_sub_map) ||
14885             (VME_OBJECT(entry)->phys_contiguous &&
14886             !entry->superpage_size)) {
14887                 extended->share_mode = SM_EMPTY;
14888                 extended->ref_count = 0;
14889                 return;
14890         }
14891
14892         if (entry->superpage_size) {
14893                 extended->shadow_depth = 0;
14894                 extended->share_mode = SM_LARGE_PAGE;
14895                 extended->ref_count = 1;
14896                 extended->external_pager = 0;
14897
14898                 /* TODO4K: Superpage in 4k mode? */
14899                 extended->pages_resident = (unsigned int)(range >> PAGE_SHIFT);
14900                 extended->shadow_depth = 0;
14901                 return;
14902         }
14903
14904         effective_page_shift = vm_self_region_page_shift(map);
14905         effective_page_size = (1 << effective_page_shift);
14906         effective_page_mask = effective_page_size - 1;
14907
14908         offset = vm_map_trunc_page(offset, effective_page_mask);
14909
14910         obj = VME_OBJECT(entry);
14911
14912         vm_object_lock(obj);
14913
14914         if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
14915                 ref_count--;
14916         }
14917
14918         if (look_for_pages) {
14919                 for (last_offset = offset + range;
14920                     offset < last_offset;
14921                     offset += effective_page_size, va += effective_page_size) {
14922                         if (do_region_footprint) {
14923                                 int disp;
14924
14925                                 disp = 0;
14926                                 if (map->has_corpse_footprint) {
14927                                         /*
14928                                          * Query the page info data we saved
14929                                          * while forking the corpse.
14930                                          */
14931                                         vm_map_corpse_footprint_query_page_info(
14932                                                 map,
14933                                                 va,
14934                                                 &disp);
14935                                 } else {
14936                                         /*
14937                                          * Query the pmap.
14938                                          */
14939                                         vm_map_footprint_query_page_info(
14940                                                 map,
14941                                                 entry,
14942                                                 va,
14943                                                 &disp);
14944                                 }
14945                                 if (disp & VM_PAGE_QUERY_PAGE_PRESENT) {
14946                                         extended->pages_resident++;
14947                                 }
14948                                 if (disp & VM_PAGE_QUERY_PAGE_REUSABLE) {
14949                                         extended->pages_reusable++;
14950                                 }
14951                                 if (disp & VM_PAGE_QUERY_PAGE_DIRTY) {
14952                                         extended->pages_dirtied++;
14953                                 }
14954                                 if (disp & PMAP_QUERY_PAGE_COMPRESSED) {
14955                                         extended->pages_swapped_out++;
14956                                 }
14957                                 continue;
14958                         }
14959
14960                         vm_map_region_look_for_page(map, va, obj,
14961                             vm_object_trunc_page(offset), ref_count,
14962                             0, extended, count);
14963                 }
14964
14965                 if (do_region_footprint) {
14966                         goto collect_object_info;
14967                 }
14968         } else {
14969 collect_object_info:
14970                 shadow_object = obj->shadow;
14971                 shadow_depth = 0;
14972
14973                 if (!(obj->internal)) {
14974                         extended->external_pager = 1;
14975                 }
14976
14977                 if (shadow_object != VM_OBJECT_NULL) {
14978                         vm_object_lock(shadow_object);
14979                         for (;
14980                             shadow_object != VM_OBJECT_NULL;
14981                             shadow_depth++) {
14982                                 vm_object_t     next_shadow;
14983
14984                                 if (!(shadow_object->internal)) {
14985                                         extended->external_pager = 1;
14986                                 }
14987
14988                                 next_shadow = shadow_object->shadow;
14989                                 if (next_shadow) {
14990                                         vm_object_lock(next_shadow);
14991                                 }
14992                                 vm_object_unlock(shadow_object);
14993                                 shadow_object = next_shadow;
14994                         }
14995                 }
14996                 extended->shadow_depth = shadow_depth;
14997         }
14998
14999         if (extended->shadow_depth || entry->needs_copy) {
15000                 extended->share_mode = SM_COW;
15001         } else {
15002                 if (ref_count == 1) {
15003                         extended->share_mode = SM_PRIVATE;
15004                 } else {
15005                         if (obj->true_share) {
15006                                 extended->share_mode = SM_TRUESHARED;
15007                         } else {
15008                                 extended->share_mode = SM_SHARED;
15009                         }
15010                 }
15011         }
15012         extended->ref_count = ref_count - extended->shadow_depth;
15013
15014         for (i = 0; i < extended->shadow_depth; i++) {
15015                 if ((tmp_obj = obj->shadow) == 0) {
15016                         break;
15017                 }
15018                 vm_object_lock(tmp_obj);
15019                 vm_object_unlock(obj);
15020
15021                 if ((ref_count = tmp_obj->ref_count) > 1 && tmp_obj->paging_in_progress) {
15022                         ref_count--;
15023                 }
15024
15025                 extended->ref_count += ref_count;
15026                 obj = tmp_obj;
15027         }
15028         vm_object_unlock(obj);
15029
15030         if (extended->share_mode == SM_SHARED) {
15031                 vm_map_entry_t       cur;
15032                 vm_map_entry_t       last;
15033                 int      my_refs;
15034
15035                 obj = VME_OBJECT(entry);
15036                 last = vm_map_to_entry(map);
15037                 my_refs = 0;
15038
15039                 if ((ref_count = obj->ref_count) > 1 && obj->paging_in_progress) {
15040                         ref_count--;
15041                 }
15042                 for (cur = vm_map_first_entry(map); cur != last; cur = cur->vme_next) {
15043                         my_refs += vm_map_region_count_obj_refs(cur, obj);
15044                 }
15045
15046                 if (my_refs == ref_count) {
15047                         extended->share_mode = SM_PRIVATE_ALIASED;
15048                 } else if (my_refs > 1) {
15049                         extended->share_mode = SM_SHARED_ALIASED;
15050                 }
15051         }
15052 }
15053
15054
15055 /* object is locked on entry and locked on return */
15056
15057
15058 static void
15059 vm_map_region_look_for_page(
15060         __unused vm_map_t               map,
15061         __unused vm_map_offset_t        va,
15062         vm_object_t                     object,
15063         vm_object_offset_t              offset,
15064         int                             max_refcnt,
15065         unsigned short                  depth,
15066         vm_region_extended_info_t       extended,
15067         mach_msg_type_number_t count)
15068 {
15069         vm_page_t       p;
15070         vm_object_t     shadow;
15071         int             ref_count;
15072         vm_object_t     caller_object;
15073
15074         shadow = object->shadow;
15075         caller_object = object;
15076
15077
15078         while (TRUE) {
15079                 if (!(object->internal)) {
15080                         extended->external_pager = 1;
15081                 }
15082
15083                 if ((p = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
15084                         if (shadow && (max_refcnt == 1)) {
15085                                 extended->pages_shared_now_private++;
15086                         }
15087
15088                         if (!p->vmp_fictitious &&
15089                             (p->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(p)))) {
15090                                 extended->pages_dirtied++;
15091                         } else if (count >= VM_REGION_EXTENDED_INFO_COUNT) {
15092                                 if (p->vmp_reusable || object->all_reusable) {
15093                                         extended->pages_reusable++;
15094                                 }
15095                         }
15096
15097                         extended->pages_resident++;
15098
15099                         if (object != caller_object) {
15100                                 vm_object_unlock(object);
15101                         }
15102
15103                         return;
15104                 }
15105                 if (object->internal &&
15106                     object->alive &&
15107                     !object->terminating &&
15108                     object->pager_ready) {
15109                         if (VM_COMPRESSOR_PAGER_STATE_GET(object, offset)
15110                             == VM_EXTERNAL_STATE_EXISTS) {
15111                                 /* the pager has that page */
15112                                 extended->pages_swapped_out++;
15113                                 if (object != caller_object) {
15114                                         vm_object_unlock(object);
15115                                 }
15116                                 return;
15117                         }
15118                 }
15119
15120                 if (shadow) {
15121                         vm_object_lock(shadow);
15122
15123                         if ((ref_count = shadow->ref_count) > 1 && shadow->paging_in_progress) {
15124                                 ref_count--;
15125                         }
15126
15127                         if (++depth > extended->shadow_depth) {
15128                                 extended->shadow_depth = depth;
15129                         }
15130
15131                         if (ref_count > max_refcnt) {
15132                                 max_refcnt = ref_count;
15133                         }
15134
15135                         if (object != caller_object) {
15136                                 vm_object_unlock(object);
15137                         }
15138
15139                         offset = offset + object->vo_shadow_offset;
15140                         object = shadow;
15141                         shadow = object->shadow;
15142                         continue;
15143                 }
15144                 if (object != caller_object) {
15145                         vm_object_unlock(object);
15146                 }
15147                 break;
15148         }
15149 }
15150
15151 static int
15152 vm_map_region_count_obj_refs(
15153         vm_map_entry_t    entry,
15154         vm_object_t       object)
15155 {
15156         int ref_count;
15157         vm_object_t chk_obj;
15158         vm_object_t tmp_obj;
15159
15160         if (VME_OBJECT(entry) == 0) {
15161                 return 0;
15162         }
15163
15164         if (entry->is_sub_map) {
15165                 return 0;
15166         } else {
15167                 ref_count = 0;
15168
15169                 chk_obj = VME_OBJECT(entry);
15170                 vm_object_lock(chk_obj);
15171
15172                 while (chk_obj) {
15173                         if (chk_obj == object) {
15174                                 ref_count++;
15175                         }
15176                         tmp_obj = chk_obj->shadow;
15177                         if (tmp_obj) {
15178                                 vm_object_lock(tmp_obj);
15179                         }
15180                         vm_object_unlock(chk_obj);
15181
15182                         chk_obj = tmp_obj;
15183                 }
15184         }
15185         return ref_count;
15186 }
15187
15188
15189 /*
15190  *      Routine:        vm_map_simplify
15191  *
15192  *      Description:
15193  *              Attempt to simplify the map representation in
15194  *              the vicinity of the given starting address.
15195  *      Note:
15196  *              This routine is intended primarily to keep the
15197  *              kernel maps more compact -- they generally don't
15198  *              benefit from the "expand a map entry" technology
15199  *              at allocation time because the adjacent entry
15200  *              is often wired down.
15201  */
15202 void
15203 vm_map_simplify_entry(
15204         vm_map_t        map,
15205         vm_map_entry_t  this_entry)
15206 {
15207         vm_map_entry_t  prev_entry;
15208
15209         counter(c_vm_map_simplify_entry_called++);
15210
15211         prev_entry = this_entry->vme_prev;
15212
15213         if ((this_entry != vm_map_to_entry(map)) &&
15214             (prev_entry != vm_map_to_entry(map)) &&
15215
15216             (prev_entry->vme_end == this_entry->vme_start) &&
15217
15218             (prev_entry->is_sub_map == this_entry->is_sub_map) &&
15219             (VME_OBJECT(prev_entry) == VME_OBJECT(this_entry)) &&
15220             ((VME_OFFSET(prev_entry) + (prev_entry->vme_end -
15221             prev_entry->vme_start))
15222             == VME_OFFSET(this_entry)) &&
15223
15224             (prev_entry->behavior == this_entry->behavior) &&
15225             (prev_entry->needs_copy == this_entry->needs_copy) &&
15226             (prev_entry->protection == this_entry->protection) &&
15227             (prev_entry->max_protection == this_entry->max_protection) &&
15228             (prev_entry->inheritance == this_entry->inheritance) &&
15229             (prev_entry->use_pmap == this_entry->use_pmap) &&
15230             (VME_ALIAS(prev_entry) == VME_ALIAS(this_entry)) &&
15231             (prev_entry->no_cache == this_entry->no_cache) &&
15232             (prev_entry->permanent == this_entry->permanent) &&
15233             (prev_entry->map_aligned == this_entry->map_aligned) &&
15234             (prev_entry->zero_wired_pages == this_entry->zero_wired_pages) &&
15235             (prev_entry->used_for_jit == this_entry->used_for_jit) &&
15236             (prev_entry->pmap_cs_associated == this_entry->pmap_cs_associated) &&
15237             /* from_reserved_zone: OK if that field doesn't match */
15238             (prev_entry->iokit_acct == this_entry->iokit_acct) &&
15239             (prev_entry->vme_resilient_codesign ==
15240             this_entry->vme_resilient_codesign) &&
15241             (prev_entry->vme_resilient_media ==
15242             this_entry->vme_resilient_media) &&
15243             (prev_entry->vme_no_copy_on_read == this_entry->vme_no_copy_on_read) &&
15244
15245             (prev_entry->wired_count == this_entry->wired_count) &&
15246             (prev_entry->user_wired_count == this_entry->user_wired_count) &&
15247
15248             ((prev_entry->vme_atomic == FALSE) && (this_entry->vme_atomic == FALSE)) &&
15249             (prev_entry->in_transition == FALSE) &&
15250             (this_entry->in_transition == FALSE) &&
15251             (prev_entry->needs_wakeup == FALSE) &&
15252             (this_entry->needs_wakeup == FALSE) &&
15253             (prev_entry->is_shared == this_entry->is_shared) &&
15254             (prev_entry->superpage_size == FALSE) &&
15255             (this_entry->superpage_size == FALSE)
15256             ) {
15257                 vm_map_store_entry_unlink(map, prev_entry);
15258                 assert(prev_entry->vme_start < this_entry->vme_end);
15259                 if (prev_entry->map_aligned) {
15260                         assert(VM_MAP_PAGE_ALIGNED(prev_entry->vme_start,
15261                             VM_MAP_PAGE_MASK(map)));
15262                 }
15263                 this_entry->vme_start = prev_entry->vme_start;
15264                 VME_OFFSET_SET(this_entry, VME_OFFSET(prev_entry));
15265
15266                 if (map->holelistenabled) {
15267                         vm_map_store_update_first_free(map, this_entry, TRUE);
15268                 }
15269
15270                 if (prev_entry->is_sub_map) {
15271                         vm_map_deallocate(VME_SUBMAP(prev_entry));
15272                 } else {
15273                         vm_object_deallocate(VME_OBJECT(prev_entry));
15274                 }
15275                 vm_map_entry_dispose(map, prev_entry);
15276                 SAVE_HINT_MAP_WRITE(map, this_entry);
15277                 counter(c_vm_map_simplified++);
15278         }
15279 }
15280
15281 void
15282 vm_map_simplify(
15283         vm_map_t        map,
15284         vm_map_offset_t start)
15285 {
15286         vm_map_entry_t  this_entry;
15287
15288         vm_map_lock(map);
15289         if (vm_map_lookup_entry(map, start, &this_entry)) {
15290                 vm_map_simplify_entry(map, this_entry);
15291                 vm_map_simplify_entry(map, this_entry->vme_next);
15292         }
15293         counter(c_vm_map_simplify_called++);
15294         vm_map_unlock(map);
15295 }
15296
15297 static void
15298 vm_map_simplify_range(
15299         vm_map_t        map,
15300         vm_map_offset_t start,
15301         vm_map_offset_t end)
15302 {
15303         vm_map_entry_t  entry;
15304
15305         /*
15306          * The map should be locked (for "write") by the caller.
15307          */
15308
15309         if (start >= end) {
15310                 /* invalid address range */
15311                 return;
15312         }
15313
15314         start = vm_map_trunc_page(start,
15315             VM_MAP_PAGE_MASK(map));
15316         end = vm_map_round_page(end,
15317             VM_MAP_PAGE_MASK(map));
15318
15319         if (!vm_map_lookup_entry(map, start, &entry)) {
15320                 /* "start" is not mapped and "entry" ends before "start" */
15321                 if (entry == vm_map_to_entry(map)) {
15322                         /* start with first entry in the map */
15323                         entry = vm_map_first_entry(map);
15324                 } else {
15325                         /* start with next entry */
15326                         entry = entry->vme_next;
15327                 }
15328         }
15329
15330         while (entry != vm_map_to_entry(map) &&
15331             entry->vme_start <= end) {
15332                 /* try and coalesce "entry" with its previous entry */
15333                 vm_map_simplify_entry(map, entry);
15334                 entry = entry->vme_next;
15335         }
15336 }
15337
15338
15339 /*
15340  *      Routine:        vm_map_machine_attribute
15341  *      Purpose:
15342  *              Provide machine-specific attributes to mappings,
15343  *              such as cachability etc. for machines that provide
15344  *              them.  NUMA architectures and machines with big/strange
15345  *              caches will use this.
15346  *      Note:
15347  *              Responsibilities for locking and checking are handled here,
15348  *              everything else in the pmap module. If any non-volatile
15349  *              information must be kept, the pmap module should handle
15350  *              it itself. [This assumes that attributes do not
15351  *              need to be inherited, which seems ok to me]
15352  */
15353 kern_return_t
15354 vm_map_machine_attribute(
15355         vm_map_t                        map,
15356         vm_map_offset_t         start,
15357         vm_map_offset_t         end,
15358         vm_machine_attribute_t  attribute,
15359         vm_machine_attribute_val_t* value)              /* IN/OUT */
15360 {
15361         kern_return_t   ret;
15362         vm_map_size_t sync_size;
15363         vm_map_entry_t entry;
15364
15365         if (start < vm_map_min(map) || end > vm_map_max(map)) {
15366                 return KERN_INVALID_ADDRESS;
15367         }
15368
15369         /* Figure how much memory we need to flush (in page increments) */
15370         sync_size = end - start;
15371
15372         vm_map_lock(map);
15373
15374         if (attribute != MATTR_CACHE) {
15375                 /* If we don't have to find physical addresses, we */
15376                 /* don't have to do an explicit traversal here.    */
15377                 ret = pmap_attribute(map->pmap, start, end - start,
15378                     attribute, value);
15379                 vm_map_unlock(map);
15380                 return ret;
15381         }
15382
15383         ret = KERN_SUCCESS;                                                                             /* Assume it all worked */
15384
15385         while (sync_size) {
15386                 if (vm_map_lookup_entry(map, start, &entry)) {
15387                         vm_map_size_t   sub_size;
15388                         if ((entry->vme_end - start) > sync_size) {
15389                                 sub_size = sync_size;
15390                                 sync_size = 0;
15391                         } else {
15392                                 sub_size = entry->vme_end - start;
15393                                 sync_size -= sub_size;
15394                         }
15395                         if (entry->is_sub_map) {
15396                                 vm_map_offset_t sub_start;
15397                                 vm_map_offset_t sub_end;
15398
15399                                 sub_start = (start - entry->vme_start)
15400                                     + VME_OFFSET(entry);
15401                                 sub_end = sub_start + sub_size;
15402                                 vm_map_machine_attribute(
15403                                         VME_SUBMAP(entry),
15404                                         sub_start,
15405                                         sub_end,
15406                                         attribute, value);
15407                         } else {
15408                                 if (VME_OBJECT(entry)) {
15409                                         vm_page_t               m;
15410                                         vm_object_t             object;
15411                                         vm_object_t             base_object;
15412                                         vm_object_t             last_object;
15413                                         vm_object_offset_t      offset;
15414                                         vm_object_offset_t      base_offset;
15415                                         vm_map_size_t           range;
15416                                         range = sub_size;
15417                                         offset = (start - entry->vme_start)
15418                                             + VME_OFFSET(entry);
15419                                         offset = vm_object_trunc_page(offset);
15420                                         base_offset = offset;
15421                                         object = VME_OBJECT(entry);
15422                                         base_object = object;
15423                                         last_object = NULL;
15424
15425                                         vm_object_lock(object);
15426
15427                                         while (range) {
15428                                                 m = vm_page_lookup(
15429                                                         object, offset);
15430
15431                                                 if (m && !m->vmp_fictitious) {
15432                                                         ret =
15433                                                             pmap_attribute_cache_sync(
15434                                                                 VM_PAGE_GET_PHYS_PAGE(m),
15435                                                                 PAGE_SIZE,
15436                                                                 attribute, value);
15437                                                 } else if (object->shadow) {
15438                                                         offset = offset + object->vo_shadow_offset;
15439                                                         last_object = object;
15440                                                         object = object->shadow;
15441                                                         vm_object_lock(last_object->shadow);
15442                                                         vm_object_unlock(last_object);
15443                                                         continue;
15444                                                 }
15445                                                 if (range < PAGE_SIZE) {
15446                                                         range = 0;
15447                                                 } else {
15448                                                         range -= PAGE_SIZE;
15449                                                 }
15450
15451                                                 if (base_object != object) {
15452                                                         vm_object_unlock(object);
15453                                                         vm_object_lock(base_object);
15454                                                         object = base_object;
15455                                                 }
15456                                                 /* Bump to the next page */
15457                                                 base_offset += PAGE_SIZE;
15458                                                 offset = base_offset;
15459                                         }
15460                                         vm_object_unlock(object);
15461                                 }
15462                         }
15463                         start += sub_size;
15464                 } else {
15465                         vm_map_unlock(map);
15466                         return KERN_FAILURE;
15467                 }
15468         }
15469
15470         vm_map_unlock(map);
15471
15472         return ret;
15473 }
15474
15475 /*
15476  *      vm_map_behavior_set:
15477  *
15478  *      Sets the paging reference behavior of the specified address
15479  *      range in the target map.  Paging reference behavior affects
15480  *      how pagein operations resulting from faults on the map will be
15481  *      clustered.
15482  */
15483 kern_return_t
15484 vm_map_behavior_set(
15485         vm_map_t        map,
15486         vm_map_offset_t start,
15487         vm_map_offset_t end,
15488         vm_behavior_t   new_behavior)
15489 {
15490         vm_map_entry_t  entry;
15491         vm_map_entry_t  temp_entry;
15492
15493         if (start > end ||
15494             start < vm_map_min(map) ||
15495             end > vm_map_max(map)) {
15496                 return KERN_NO_SPACE;
15497         }
15498
15499         switch (new_behavior) {
15500         /*
15501          * This first block of behaviors all set a persistent state on the specified
15502          * memory range.  All we have to do here is to record the desired behavior
15503          * in the vm_map_entry_t's.
15504          */
15505
15506         case VM_BEHAVIOR_DEFAULT:
15507         case VM_BEHAVIOR_RANDOM:
15508         case VM_BEHAVIOR_SEQUENTIAL:
15509         case VM_BEHAVIOR_RSEQNTL:
15510         case VM_BEHAVIOR_ZERO_WIRED_PAGES:
15511                 vm_map_lock(map);
15512
15513                 /*
15514                  *      The entire address range must be valid for the map.
15515                  *      Note that vm_map_range_check() does a
15516                  *      vm_map_lookup_entry() internally and returns the
15517                  *      entry containing the start of the address range if
15518                  *      the entire range is valid.
15519                  */
15520                 if (vm_map_range_check(map, start, end, &temp_entry)) {
15521                         entry = temp_entry;
15522                         vm_map_clip_start(map, entry, start);
15523                 } else {
15524                         vm_map_unlock(map);
15525                         return KERN_INVALID_ADDRESS;
15526                 }
15527
15528                 while ((entry != vm_map_to_entry(map)) && (entry->vme_start < end)) {
15529                         vm_map_clip_end(map, entry, end);
15530                         if (entry->is_sub_map) {
15531                                 assert(!entry->use_pmap);
15532                         }
15533
15534                         if (new_behavior == VM_BEHAVIOR_ZERO_WIRED_PAGES) {
15535                                 entry->zero_wired_pages = TRUE;
15536                         } else {
15537                                 entry->behavior = new_behavior;
15538                         }
15539                         entry = entry->vme_next;
15540                 }
15541
15542                 vm_map_unlock(map);
15543                 break;
15544
15545         /*
15546          * The rest of these are different from the above in that they cause
15547          * an immediate action to take place as opposed to setting a behavior that
15548          * affects future actions.
15549          */
15550
15551         case VM_BEHAVIOR_WILLNEED:
15552                 return vm_map_willneed(map, start, end);
15553
15554         case VM_BEHAVIOR_DONTNEED:
15555                 return vm_map_msync(map, start, end - start, VM_SYNC_DEACTIVATE | VM_SYNC_CONTIGUOUS);
15556
15557         case VM_BEHAVIOR_FREE:
15558                 return vm_map_msync(map, start, end - start, VM_SYNC_KILLPAGES | VM_SYNC_CONTIGUOUS);
15559
15560         case VM_BEHAVIOR_REUSABLE:
15561                 return vm_map_reusable_pages(map, start, end);
15562
15563         case VM_BEHAVIOR_REUSE:
15564                 return vm_map_reuse_pages(map, start, end);
15565
15566         case VM_BEHAVIOR_CAN_REUSE:
15567                 return vm_map_can_reuse(map, start, end);
15568
15569 #if MACH_ASSERT
15570         case VM_BEHAVIOR_PAGEOUT:
15571                 return vm_map_pageout(map, start, end);
15572 #endif /* MACH_ASSERT */
15573
15574         default:
15575                 return KERN_INVALID_ARGUMENT;
15576         }
15577
15578         return KERN_SUCCESS;
15579 }
15580
15581
15582 /*
15583  * Internals for madvise(MADV_WILLNEED) system call.
15584  *
15585  * The implementation is to do:-
15586  * a) read-ahead if the mapping corresponds to a mapped regular file
15587  * b) or, fault in the pages (zero-fill, decompress etc) if it's an anonymous mapping
15588  */
15589
15590
15591 static kern_return_t
15592 vm_map_willneed(
15593         vm_map_t        map,
15594         vm_map_offset_t start,
15595         vm_map_offset_t end
15596         )
15597 {
15598         vm_map_entry_t                  entry;
15599         vm_object_t                     object;
15600         memory_object_t                 pager;
15601         struct vm_object_fault_info     fault_info = {};
15602         kern_return_t                   kr;
15603         vm_object_size_t                len;
15604         vm_object_offset_t              offset;
15605
15606         fault_info.interruptible = THREAD_UNINT;        /* ignored value */
15607         fault_info.behavior      = VM_BEHAVIOR_SEQUENTIAL;
15608         fault_info.stealth       = TRUE;
15609
15610         /*
15611          * The MADV_WILLNEED operation doesn't require any changes to the
15612          * vm_map_entry_t's, so the read lock is sufficient.
15613          */
15614
15615         vm_map_lock_read(map);
15616
15617         /*
15618          * The madvise semantics require that the address range be fully
15619          * allocated with no holes.  Otherwise, we're required to return
15620          * an error.
15621          */
15622
15623         if (!vm_map_range_check(map, start, end, &entry)) {
15624                 vm_map_unlock_read(map);
15625                 return KERN_INVALID_ADDRESS;
15626         }
15627
15628         /*
15629          * Examine each vm_map_entry_t in the range.
15630          */
15631         for (; entry != vm_map_to_entry(map) && start < end;) {
15632                 /*
15633                  * The first time through, the start address could be anywhere
15634                  * within the vm_map_entry we found.  So adjust the offset to
15635                  * correspond.  After that, the offset will always be zero to
15636                  * correspond to the beginning of the current vm_map_entry.
15637                  */
15638                 offset = (start - entry->vme_start) + VME_OFFSET(entry);
15639
15640                 /*
15641                  * Set the length so we don't go beyond the end of the
15642                  * map_entry or beyond the end of the range we were given.
15643                  * This range could span also multiple map entries all of which
15644                  * map different files, so make sure we only do the right amount
15645                  * of I/O for each object.  Note that it's possible for there
15646                  * to be multiple map entries all referring to the same object
15647                  * but with different page permissions, but it's not worth
15648                  * trying to optimize that case.
15649                  */
15650                 len = MIN(entry->vme_end - start, end - start);
15651
15652                 if ((vm_size_t) len != len) {
15653                         /* 32-bit overflow */
15654                         len = (vm_size_t) (0 - PAGE_SIZE);
15655                 }
15656                 fault_info.cluster_size = (vm_size_t) len;
15657                 fault_info.lo_offset    = offset;
15658                 fault_info.hi_offset    = offset + len;
15659                 fault_info.user_tag     = VME_ALIAS(entry);
15660                 fault_info.pmap_options = 0;
15661                 if (entry->iokit_acct ||
15662                     (!entry->is_sub_map && !entry->use_pmap)) {
15663                         fault_info.pmap_options |= PMAP_OPTIONS_ALT_ACCT;
15664                 }
15665
15666                 /*
15667                  * If the entry is a submap OR there's no read permission
15668                  * to this mapping, then just skip it.
15669                  */
15670                 if ((entry->is_sub_map) || (entry->protection & VM_PROT_READ) == 0) {
15671                         entry = entry->vme_next;
15672                         start = entry->vme_start;
15673                         continue;
15674                 }
15675
15676                 object = VME_OBJECT(entry);
15677
15678                 if (object == NULL ||
15679                     (object && object->internal)) {
15680                         /*
15681                          * Memory range backed by anonymous memory.
15682                          */
15683                         vm_size_t region_size = 0, effective_page_size = 0;
15684                         vm_map_offset_t addr = 0, effective_page_mask = 0;
15685
15686                         region_size = len;
15687                         addr = start;
15688
15689                         effective_page_mask = MIN(vm_map_page_mask(current_map()), PAGE_MASK);
15690                         effective_page_size = effective_page_mask + 1;
15691
15692                         vm_map_unlock_read(map);
15693
15694                         while (region_size) {
15695                                 vm_pre_fault(
15696                                         vm_map_trunc_page(addr, effective_page_mask),
15697                                         VM_PROT_READ | VM_PROT_WRITE);
15698
15699                                 region_size -= effective_page_size;
15700                                 addr += effective_page_size;
15701                         }
15702                 } else {
15703                         /*
15704                          * Find the file object backing this map entry.  If there is
15705                          * none, then we simply ignore the "will need" advice for this
15706                          * entry and go on to the next one.
15707                          */
15708                         if ((object = find_vnode_object(entry)) == VM_OBJECT_NULL) {
15709                                 entry = entry->vme_next;
15710                                 start = entry->vme_start;
15711                                 continue;
15712                         }
15713
15714                         vm_object_paging_begin(object);
15715                         pager = object->pager;
15716                         vm_object_unlock(object);
15717
15718                         /*
15719                          * The data_request() could take a long time, so let's
15720                          * release the map lock to avoid blocking other threads.
15721                          */
15722                         vm_map_unlock_read(map);
15723
15724                         /*
15725                          * Get the data from the object asynchronously.
15726                          *
15727                          * Note that memory_object_data_request() places limits on the
15728                          * amount of I/O it will do.  Regardless of the len we
15729                          * specified, it won't do more than MAX_UPL_TRANSFER_BYTES and it
15730                          * silently truncates the len to that size.  This isn't
15731                          * necessarily bad since madvise shouldn't really be used to
15732                          * page in unlimited amounts of data.  Other Unix variants
15733                          * limit the willneed case as well.  If this turns out to be an
15734                          * issue for developers, then we can always adjust the policy
15735                          * here and still be backwards compatible since this is all
15736                          * just "advice".
15737                          */
15738                         kr = memory_object_data_request(
15739                                 pager,
15740                                 vm_object_trunc_page(offset) + object->paging_offset,
15741                                 0,      /* ignored */
15742                                 VM_PROT_READ,
15743                                 (memory_object_fault_info_t)&fault_info);
15744
15745                         vm_object_lock(object);
15746                         vm_object_paging_end(object);
15747                         vm_object_unlock(object);
15748
15749                         /*
15750                          * If we couldn't do the I/O for some reason, just give up on
15751                          * the madvise.  We still return success to the user since
15752                          * madvise isn't supposed to fail when the advice can't be
15753                          * taken.
15754                          */
15755
15756                         if (kr != KERN_SUCCESS) {
15757                                 return KERN_SUCCESS;
15758                         }
15759                 }
15760
15761                 start += len;
15762                 if (start >= end) {
15763                         /* done */
15764                         return KERN_SUCCESS;
15765                 }
15766
15767                 /* look up next entry */
15768                 vm_map_lock_read(map);
15769                 if (!vm_map_lookup_entry(map, start, &entry)) {
15770                         /*
15771                          * There's a new hole in the address range.
15772                          */
15773                         vm_map_unlock_read(map);
15774                         return KERN_INVALID_ADDRESS;
15775                 }
15776         }
15777
15778         vm_map_unlock_read(map);
15779         return KERN_SUCCESS;
15780 }
15781
15782 static boolean_t
15783 vm_map_entry_is_reusable(
15784         vm_map_entry_t entry)
15785 {
15786         /* Only user map entries */
15787
15788         vm_object_t object;
15789
15790         if (entry->is_sub_map) {
15791                 return FALSE;
15792         }
15793
15794         switch (VME_ALIAS(entry)) {
15795         case VM_MEMORY_MALLOC:
15796         case VM_MEMORY_MALLOC_SMALL:
15797         case VM_MEMORY_MALLOC_LARGE:
15798         case VM_MEMORY_REALLOC:
15799         case VM_MEMORY_MALLOC_TINY:
15800         case VM_MEMORY_MALLOC_LARGE_REUSABLE:
15801         case VM_MEMORY_MALLOC_LARGE_REUSED:
15802                 /*
15803                  * This is a malloc() memory region: check if it's still
15804                  * in its original state and can be re-used for more
15805                  * malloc() allocations.
15806                  */
15807                 break;
15808         default:
15809                 /*
15810                  * Not a malloc() memory region: let the caller decide if
15811                  * it's re-usable.
15812                  */
15813                 return TRUE;
15814         }
15815
15816         if (/*entry->is_shared ||*/
15817                 entry->is_sub_map ||
15818                 entry->in_transition ||
15819                 entry->protection != VM_PROT_DEFAULT ||
15820                 entry->max_protection != VM_PROT_ALL ||
15821                 entry->inheritance != VM_INHERIT_DEFAULT ||
15822                 entry->no_cache ||
15823                 entry->permanent ||
15824                 entry->superpage_size != FALSE ||
15825                 entry->zero_wired_pages ||
15826                 entry->wired_count != 0 ||
15827                 entry->user_wired_count != 0) {
15828                 return FALSE;
15829         }
15830
15831         object = VME_OBJECT(entry);
15832         if (object == VM_OBJECT_NULL) {
15833                 return TRUE;
15834         }
15835         if (
15836 #if 0
15837                 /*
15838                  * Let's proceed even if the VM object is potentially
15839                  * shared.
15840                  * We check for this later when processing the actual
15841                  * VM pages, so the contents will be safe if shared.
15842                  *
15843                  * But we can still mark this memory region as "reusable" to
15844                  * acknowledge that the caller did let us know that the memory
15845                  * could be re-used and should not be penalized for holding
15846                  * on to it.  This allows its "resident size" to not include
15847                  * the reusable range.
15848                  */
15849                 object->ref_count == 1 &&
15850 #endif
15851                 object->wired_page_count == 0 &&
15852                 object->copy == VM_OBJECT_NULL &&
15853                 object->shadow == VM_OBJECT_NULL &&
15854                 object->internal &&
15855                 object->purgable == VM_PURGABLE_DENY &&
15856                 object->copy_strategy != MEMORY_OBJECT_COPY_DELAY &&
15857                 !object->true_share &&
15858                 object->wimg_bits == VM_WIMG_USE_DEFAULT &&
15859                 !object->code_signed) {
15860                 return TRUE;
15861         }
15862         return FALSE;
15863 }
15864
15865 static kern_return_t
15866 vm_map_reuse_pages(
15867         vm_map_t        map,
15868         vm_map_offset_t start,
15869         vm_map_offset_t end)
15870 {
15871         vm_map_entry_t                  entry;
15872         vm_object_t                     object;
15873         vm_object_offset_t              start_offset, end_offset;
15874
15875         /*
15876          * The MADV_REUSE operation doesn't require any changes to the
15877          * vm_map_entry_t's, so the read lock is sufficient.
15878          */
15879
15880         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15881                 /*
15882                  * XXX TODO4K
15883                  * need to figure out what reusable means for a
15884                  * portion of a native page.
15885                  */
15886                 return KERN_SUCCESS;
15887         }
15888
15889         vm_map_lock_read(map);
15890         assert(map->pmap != kernel_pmap);       /* protect alias access */
15891
15892         /*
15893          * The madvise semantics require that the address range be fully
15894          * allocated with no holes.  Otherwise, we're required to return
15895          * an error.
15896          */
15897
15898         if (!vm_map_range_check(map, start, end, &entry)) {
15899                 vm_map_unlock_read(map);
15900                 vm_page_stats_reusable.reuse_pages_failure++;
15901                 return KERN_INVALID_ADDRESS;
15902         }
15903
15904         /*
15905          * Examine each vm_map_entry_t in the range.
15906          */
15907         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
15908             entry = entry->vme_next) {
15909                 /*
15910                  * Sanity check on the VM map entry.
15911                  */
15912                 if (!vm_map_entry_is_reusable(entry)) {
15913                         vm_map_unlock_read(map);
15914                         vm_page_stats_reusable.reuse_pages_failure++;
15915                         return KERN_INVALID_ADDRESS;
15916                 }
15917
15918                 /*
15919                  * The first time through, the start address could be anywhere
15920                  * within the vm_map_entry we found.  So adjust the offset to
15921                  * correspond.
15922                  */
15923                 if (entry->vme_start < start) {
15924                         start_offset = start - entry->vme_start;
15925                 } else {
15926                         start_offset = 0;
15927                 }
15928                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
15929                 start_offset += VME_OFFSET(entry);
15930                 end_offset += VME_OFFSET(entry);
15931
15932                 assert(!entry->is_sub_map);
15933                 object = VME_OBJECT(entry);
15934                 if (object != VM_OBJECT_NULL) {
15935                         vm_object_lock(object);
15936                         vm_object_reuse_pages(object, start_offset, end_offset,
15937                             TRUE);
15938                         vm_object_unlock(object);
15939                 }
15940
15941                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSABLE) {
15942                         /*
15943                          * XXX
15944                          * We do not hold the VM map exclusively here.
15945                          * The "alias" field is not that critical, so it's
15946                          * safe to update it here, as long as it is the only
15947                          * one that can be modified while holding the VM map
15948                          * "shared".
15949                          */
15950                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSED);
15951                 }
15952         }
15953
15954         vm_map_unlock_read(map);
15955         vm_page_stats_reusable.reuse_pages_success++;
15956         return KERN_SUCCESS;
15957 }
15958
15959
15960 static kern_return_t
15961 vm_map_reusable_pages(
15962         vm_map_t        map,
15963         vm_map_offset_t start,
15964         vm_map_offset_t end)
15965 {
15966         vm_map_entry_t                  entry;
15967         vm_object_t                     object;
15968         vm_object_offset_t              start_offset, end_offset;
15969         vm_map_offset_t                 pmap_offset;
15970
15971         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
15972                 /*
15973                  * XXX TODO4K
15974                  * need to figure out what reusable means for a portion
15975                  * of a native page.
15976                  */
15977                 return KERN_SUCCESS;
15978         }
15979
15980         /*
15981          * The MADV_REUSABLE operation doesn't require any changes to the
15982          * vm_map_entry_t's, so the read lock is sufficient.
15983          */
15984
15985         vm_map_lock_read(map);
15986         assert(map->pmap != kernel_pmap);       /* protect alias access */
15987
15988         /*
15989          * The madvise semantics require that the address range be fully
15990          * allocated with no holes.  Otherwise, we're required to return
15991          * an error.
15992          */
15993
15994         if (!vm_map_range_check(map, start, end, &entry)) {
15995                 vm_map_unlock_read(map);
15996                 vm_page_stats_reusable.reusable_pages_failure++;
15997                 return KERN_INVALID_ADDRESS;
15998         }
15999
16000         /*
16001          * Examine each vm_map_entry_t in the range.
16002          */
16003         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16004             entry = entry->vme_next) {
16005                 int kill_pages = 0;
16006
16007                 /*
16008                  * Sanity check on the VM map entry.
16009                  */
16010                 if (!vm_map_entry_is_reusable(entry)) {
16011                         vm_map_unlock_read(map);
16012                         vm_page_stats_reusable.reusable_pages_failure++;
16013                         return KERN_INVALID_ADDRESS;
16014                 }
16015
16016                 if (!(entry->protection & VM_PROT_WRITE) && !entry->used_for_jit) {
16017                         /* not writable: can't discard contents */
16018                         vm_map_unlock_read(map);
16019                         vm_page_stats_reusable.reusable_nonwritable++;
16020                         vm_page_stats_reusable.reusable_pages_failure++;
16021                         return KERN_PROTECTION_FAILURE;
16022                 }
16023
16024                 /*
16025                  * The first time through, the start address could be anywhere
16026                  * within the vm_map_entry we found.  So adjust the offset to
16027                  * correspond.
16028                  */
16029                 if (entry->vme_start < start) {
16030                         start_offset = start - entry->vme_start;
16031                         pmap_offset = start;
16032                 } else {
16033                         start_offset = 0;
16034                         pmap_offset = entry->vme_start;
16035                 }
16036                 end_offset = MIN(end, entry->vme_end) - entry->vme_start;
16037                 start_offset += VME_OFFSET(entry);
16038                 end_offset += VME_OFFSET(entry);
16039
16040                 assert(!entry->is_sub_map);
16041                 object = VME_OBJECT(entry);
16042                 if (object == VM_OBJECT_NULL) {
16043                         continue;
16044                 }
16045
16046
16047                 vm_object_lock(object);
16048                 if (((object->ref_count == 1) ||
16049                     (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC &&
16050                     object->copy == VM_OBJECT_NULL)) &&
16051                     object->shadow == VM_OBJECT_NULL &&
16052                     /*
16053                      * "iokit_acct" entries are billed for their virtual size
16054                      * (rather than for their resident pages only), so they
16055                      * wouldn't benefit from making pages reusable, and it
16056                      * would be hard to keep track of pages that are both
16057                      * "iokit_acct" and "reusable" in the pmap stats and
16058                      * ledgers.
16059                      */
16060                     !(entry->iokit_acct ||
16061                     (!entry->is_sub_map && !entry->use_pmap))) {
16062                         if (object->ref_count != 1) {
16063                                 vm_page_stats_reusable.reusable_shared++;
16064                         }
16065                         kill_pages = 1;
16066                 } else {
16067                         kill_pages = -1;
16068                 }
16069                 if (kill_pages != -1) {
16070                         vm_object_deactivate_pages(object,
16071                             start_offset,
16072                             end_offset - start_offset,
16073                             kill_pages,
16074                             TRUE /*reusable_pages*/,
16075                             map->pmap,
16076                             pmap_offset);
16077                 } else {
16078                         vm_page_stats_reusable.reusable_pages_shared++;
16079                 }
16080                 vm_object_unlock(object);
16081
16082                 if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE ||
16083                     VME_ALIAS(entry) == VM_MEMORY_MALLOC_LARGE_REUSED) {
16084                         /*
16085                          * XXX
16086                          * We do not hold the VM map exclusively here.
16087                          * The "alias" field is not that critical, so it's
16088                          * safe to update it here, as long as it is the only
16089                          * one that can be modified while holding the VM map
16090                          * "shared".
16091                          */
16092                         VME_ALIAS_SET(entry, VM_MEMORY_MALLOC_LARGE_REUSABLE);
16093                 }
16094         }
16095
16096         vm_map_unlock_read(map);
16097         vm_page_stats_reusable.reusable_pages_success++;
16098         return KERN_SUCCESS;
16099 }
16100
16101
16102 static kern_return_t
16103 vm_map_can_reuse(
16104         vm_map_t        map,
16105         vm_map_offset_t start,
16106         vm_map_offset_t end)
16107 {
16108         vm_map_entry_t                  entry;
16109
16110         /*
16111          * The MADV_REUSABLE operation doesn't require any changes to the
16112          * vm_map_entry_t's, so the read lock is sufficient.
16113          */
16114
16115         vm_map_lock_read(map);
16116         assert(map->pmap != kernel_pmap);       /* protect alias access */
16117
16118         /*
16119          * The madvise semantics require that the address range be fully
16120          * allocated with no holes.  Otherwise, we're required to return
16121          * an error.
16122          */
16123
16124         if (!vm_map_range_check(map, start, end, &entry)) {
16125                 vm_map_unlock_read(map);
16126                 vm_page_stats_reusable.can_reuse_failure++;
16127                 return KERN_INVALID_ADDRESS;
16128         }
16129
16130         /*
16131          * Examine each vm_map_entry_t in the range.
16132          */
16133         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16134             entry = entry->vme_next) {
16135                 /*
16136                  * Sanity check on the VM map entry.
16137                  */
16138                 if (!vm_map_entry_is_reusable(entry)) {
16139                         vm_map_unlock_read(map);
16140                         vm_page_stats_reusable.can_reuse_failure++;
16141                         return KERN_INVALID_ADDRESS;
16142                 }
16143         }
16144
16145         vm_map_unlock_read(map);
16146         vm_page_stats_reusable.can_reuse_success++;
16147         return KERN_SUCCESS;
16148 }
16149
16150
16151 #if MACH_ASSERT
16152 static kern_return_t
16153 vm_map_pageout(
16154         vm_map_t        map,
16155         vm_map_offset_t start,
16156         vm_map_offset_t end)
16157 {
16158         vm_map_entry_t                  entry;
16159
16160         /*
16161          * The MADV_PAGEOUT operation doesn't require any changes to the
16162          * vm_map_entry_t's, so the read lock is sufficient.
16163          */
16164
16165         vm_map_lock_read(map);
16166
16167         /*
16168          * The madvise semantics require that the address range be fully
16169          * allocated with no holes.  Otherwise, we're required to return
16170          * an error.
16171          */
16172
16173         if (!vm_map_range_check(map, start, end, &entry)) {
16174                 vm_map_unlock_read(map);
16175                 return KERN_INVALID_ADDRESS;
16176         }
16177
16178         /*
16179          * Examine each vm_map_entry_t in the range.
16180          */
16181         for (; entry != vm_map_to_entry(map) && entry->vme_start < end;
16182             entry = entry->vme_next) {
16183                 vm_object_t     object;
16184
16185                 /*
16186                  * Sanity check on the VM map entry.
16187                  */
16188                 if (entry->is_sub_map) {
16189                         vm_map_t submap;
16190                         vm_map_offset_t submap_start;
16191                         vm_map_offset_t submap_end;
16192                         vm_map_entry_t submap_entry;
16193
16194                         submap = VME_SUBMAP(entry);
16195                         submap_start = VME_OFFSET(entry);
16196                         submap_end = submap_start + (entry->vme_end -
16197                             entry->vme_start);
16198
16199                         vm_map_lock_read(submap);
16200
16201                         if (!vm_map_range_check(submap,
16202                             submap_start,
16203                             submap_end,
16204                             &submap_entry)) {
16205                                 vm_map_unlock_read(submap);
16206                                 vm_map_unlock_read(map);
16207                                 return KERN_INVALID_ADDRESS;
16208                         }
16209
16210                         object = VME_OBJECT(submap_entry);
16211                         if (submap_entry->is_sub_map ||
16212                             object == VM_OBJECT_NULL ||
16213                             !object->internal) {
16214                                 vm_map_unlock_read(submap);
16215                                 continue;
16216                         }
16217
16218                         vm_object_pageout(object);
16219
16220                         vm_map_unlock_read(submap);
16221                         submap = VM_MAP_NULL;
16222                         submap_entry = VM_MAP_ENTRY_NULL;
16223                         continue;
16224                 }
16225
16226                 object = VME_OBJECT(entry);
16227                 if (entry->is_sub_map ||
16228                     object == VM_OBJECT_NULL ||
16229                     !object->internal) {
16230                         continue;
16231                 }
16232
16233                 vm_object_pageout(object);
16234         }
16235
16236         vm_map_unlock_read(map);
16237         return KERN_SUCCESS;
16238 }
16239 #endif /* MACH_ASSERT */
16240
16241
16242 /*
16243  *      Routine:        vm_map_entry_insert
16244  *
16245  *      Description:    This routine inserts a new vm_entry in a locked map.
16246  */
16247 vm_map_entry_t
16248 vm_map_entry_insert(
16249         vm_map_t                map,
16250         vm_map_entry_t          insp_entry,
16251         vm_map_offset_t         start,
16252         vm_map_offset_t         end,
16253         vm_object_t             object,
16254         vm_object_offset_t      offset,
16255         boolean_t               needs_copy,
16256         boolean_t               is_shared,
16257         boolean_t               in_transition,
16258         vm_prot_t               cur_protection,
16259         vm_prot_t               max_protection,
16260         vm_behavior_t           behavior,
16261         vm_inherit_t            inheritance,
16262         unsigned short          wired_count,
16263         boolean_t               no_cache,
16264         boolean_t               permanent,
16265         boolean_t               no_copy_on_read,
16266         unsigned int            superpage_size,
16267         boolean_t               clear_map_aligned,
16268         boolean_t               is_submap,
16269         boolean_t               used_for_jit,
16270         int                     alias,
16271         boolean_t               translated_allow_execute)
16272 {
16273         vm_map_entry_t  new_entry;
16274
16275         assert(insp_entry != (vm_map_entry_t)0);
16276         vm_map_lock_assert_exclusive(map);
16277
16278 #if DEVELOPMENT || DEBUG
16279         vm_object_offset_t      end_offset = 0;
16280         assertf(!os_add_overflow(end - start, offset, &end_offset), "size 0x%llx, offset 0x%llx caused overflow", (uint64_t)(end - start), offset);
16281 #endif /* DEVELOPMENT || DEBUG */
16282
16283         new_entry = vm_map_entry_create(map, !map->hdr.entries_pageable);
16284
16285         if (VM_MAP_PAGE_SHIFT(map) != PAGE_SHIFT) {
16286                 new_entry->map_aligned = TRUE;
16287         } else {
16288                 new_entry->map_aligned = FALSE;
16289         }
16290         if (clear_map_aligned &&
16291             (!VM_MAP_PAGE_ALIGNED(start, VM_MAP_PAGE_MASK(map)) ||
16292             !VM_MAP_PAGE_ALIGNED(end, VM_MAP_PAGE_MASK(map)))) {
16293                 new_entry->map_aligned = FALSE;
16294         }
16295
16296         new_entry->vme_start = start;
16297         new_entry->vme_end = end;
16298         if (new_entry->map_aligned) {
16299                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_start,
16300                     VM_MAP_PAGE_MASK(map)));
16301                 assert(VM_MAP_PAGE_ALIGNED(new_entry->vme_end,
16302                     VM_MAP_PAGE_MASK(map)));
16303         } else {
16304                 assert(page_aligned(new_entry->vme_start));
16305                 assert(page_aligned(new_entry->vme_end));
16306         }
16307         assert(new_entry->vme_start < new_entry->vme_end);
16308
16309         VME_OBJECT_SET(new_entry, object);
16310         VME_OFFSET_SET(new_entry, offset);
16311         new_entry->is_shared = is_shared;
16312         new_entry->is_sub_map = is_submap;
16313         new_entry->needs_copy = needs_copy;
16314         new_entry->in_transition = in_transition;
16315         new_entry->needs_wakeup = FALSE;
16316         new_entry->inheritance = inheritance;
16317         new_entry->protection = cur_protection;
16318         new_entry->max_protection = max_protection;
16319         new_entry->behavior = behavior;
16320         new_entry->wired_count = wired_count;
16321         new_entry->user_wired_count = 0;
16322         if (is_submap) {
16323                 /*
16324                  * submap: "use_pmap" means "nested".
16325                  * default: false.
16326                  */
16327                 new_entry->use_pmap = FALSE;
16328         } else {
16329                 /*
16330                  * object: "use_pmap" means "use pmap accounting" for footprint.
16331                  * default: true.
16332                  */
16333                 new_entry->use_pmap = TRUE;
16334         }
16335         VME_ALIAS_SET(new_entry, alias);
16336         new_entry->zero_wired_pages = FALSE;
16337         new_entry->no_cache = no_cache;
16338         new_entry->permanent = permanent;
16339         if (superpage_size) {
16340                 new_entry->superpage_size = TRUE;
16341         } else {
16342                 new_entry->superpage_size = FALSE;
16343         }
16344         if (used_for_jit) {
16345                 if (!(map->jit_entry_exists) ||
16346                     VM_MAP_POLICY_ALLOW_MULTIPLE_JIT(map)) {
16347                         new_entry->used_for_jit = TRUE;
16348                         map->jit_entry_exists = TRUE;
16349                 }
16350         } else {
16351                 new_entry->used_for_jit = FALSE;
16352         }
16353         if (translated_allow_execute) {
16354                 new_entry->translated_allow_execute = TRUE;
16355         } else {
16356                 new_entry->translated_allow_execute = FALSE;
16357         }
16358         new_entry->pmap_cs_associated = FALSE;
16359         new_entry->iokit_acct = FALSE;
16360         new_entry->vme_resilient_codesign = FALSE;
16361         new_entry->vme_resilient_media = FALSE;
16362         new_entry->vme_atomic = FALSE;
16363         new_entry->vme_no_copy_on_read = no_copy_on_read;
16364
16365         /*
16366          *      Insert the new entry into the list.
16367          */
16368
16369         vm_map_store_entry_link(map, insp_entry, new_entry,
16370             VM_MAP_KERNEL_FLAGS_NONE);
16371         map->size += end - start;
16372
16373         /*
16374          *      Update the free space hint and the lookup hint.
16375          */
16376
16377         SAVE_HINT_MAP_WRITE(map, new_entry);
16378         return new_entry;
16379 }
16380
16381 int vm_remap_old_path = 0;
16382 int vm_remap_new_path = 0;
16383 /*
16384  *      Routine:        vm_map_remap_extract
16385  *
16386  *      Description:    This routine returns a vm_entry list from a map.
16387  */
16388 static kern_return_t
16389 vm_map_remap_extract(
16390         vm_map_t                map,
16391         vm_map_offset_t         addr,
16392         vm_map_size_t           size,
16393         vm_prot_t               required_protection,
16394         boolean_t               copy,
16395         struct vm_map_header    *map_header,
16396         vm_prot_t               *cur_protection,
16397         vm_prot_t               *max_protection,
16398         /* What, no behavior? */
16399         vm_inherit_t            inheritance,
16400         vm_map_kernel_flags_t   vmk_flags)
16401 {
16402         kern_return_t           result;
16403         vm_map_size_t           mapped_size;
16404         vm_map_size_t           tmp_size;
16405         vm_map_entry_t          src_entry;     /* result of last map lookup */
16406         vm_map_entry_t          new_entry;
16407         vm_object_offset_t      offset;
16408         vm_map_offset_t         map_address;
16409         vm_map_offset_t         src_start;     /* start of entry to map */
16410         vm_map_offset_t         src_end;       /* end of region to be mapped */
16411         vm_object_t             object;
16412         vm_map_version_t        version;
16413         boolean_t               src_needs_copy;
16414         boolean_t               new_entry_needs_copy;
16415         vm_map_entry_t          saved_src_entry;
16416         boolean_t               src_entry_was_wired;
16417         vm_prot_t               max_prot_for_prot_copy;
16418         vm_map_offset_t         effective_page_mask;
16419         boolean_t               pageable, same_map;
16420
16421         pageable = vmk_flags.vmkf_copy_pageable;
16422         same_map = vmk_flags.vmkf_copy_same_map;
16423
16424         effective_page_mask = MIN(PAGE_MASK, VM_MAP_PAGE_MASK(map));
16425
16426         assert(map != VM_MAP_NULL);
16427         assert(size != 0);
16428         assert(size == vm_map_round_page(size, effective_page_mask));
16429         assert(inheritance == VM_INHERIT_NONE ||
16430             inheritance == VM_INHERIT_COPY ||
16431             inheritance == VM_INHERIT_SHARE);
16432         assert(!(required_protection & ~VM_PROT_ALL));
16433
16434         /*
16435          *      Compute start and end of region.
16436          */
16437         src_start = vm_map_trunc_page(addr, effective_page_mask);
16438         src_end = vm_map_round_page(src_start + size, effective_page_mask);
16439
16440         /*
16441          *      Initialize map_header.
16442          */
16443         map_header->links.next = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16444         map_header->links.prev = CAST_TO_VM_MAP_ENTRY(&map_header->links);
16445         map_header->nentries = 0;
16446         map_header->entries_pageable = pageable;
16447 //      map_header->page_shift = MIN(VM_MAP_PAGE_SHIFT(map), PAGE_SHIFT);
16448         map_header->page_shift = VM_MAP_PAGE_SHIFT(map);
16449         map_header->rb_head_store.rbh_root = (void *)(int)SKIP_RB_TREE;
16450
16451         vm_map_store_init( map_header );
16452
16453         if (copy && vmk_flags.vmkf_remap_prot_copy) {
16454                 max_prot_for_prot_copy = *max_protection & VM_PROT_ALL;
16455         } else {
16456                 max_prot_for_prot_copy = VM_PROT_NONE;
16457         }
16458         *cur_protection = VM_PROT_ALL;
16459         *max_protection = VM_PROT_ALL;
16460
16461         map_address = 0;
16462         mapped_size = 0;
16463         result = KERN_SUCCESS;
16464
16465         /*
16466          *      The specified source virtual space might correspond to
16467          *      multiple map entries, need to loop on them.
16468          */
16469         vm_map_lock(map);
16470         if (VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) {
16471                 /*
16472                  * This address space uses sub-pages so the range might
16473                  * not be re-mappable in an address space with larger
16474                  * pages. Re-assemble any broken-up VM map entries to
16475                  * improve our chances of making it work.
16476                  */
16477                 vm_map_simplify_range(map, src_start, src_end);
16478         }
16479         while (mapped_size != size) {
16480                 vm_map_size_t   entry_size;
16481
16482                 /*
16483                  *      Find the beginning of the region.
16484                  */
16485                 if (!vm_map_lookup_entry(map, src_start, &src_entry)) {
16486                         result = KERN_INVALID_ADDRESS;
16487                         break;
16488                 }
16489
16490                 if (src_start < src_entry->vme_start ||
16491                     (mapped_size && src_start != src_entry->vme_start)) {
16492                         result = KERN_INVALID_ADDRESS;
16493                         break;
16494                 }
16495
16496                 tmp_size = size - mapped_size;
16497                 if (src_end > src_entry->vme_end) {
16498                         tmp_size -= (src_end - src_entry->vme_end);
16499                 }
16500
16501                 entry_size = (vm_map_size_t)(src_entry->vme_end -
16502                     src_entry->vme_start);
16503
16504                 if (src_entry->is_sub_map &&
16505                     vmk_flags.vmkf_copy_single_object) {
16506                         vm_map_t submap;
16507                         vm_map_offset_t submap_start;
16508                         vm_map_size_t submap_size;
16509
16510                         /*
16511                          * No check for "required_protection" on "src_entry"
16512                          * because the protections that matter are the ones
16513                          * on the submap's VM map entry, which will be checked
16514                          * during the call to vm_map_remap_extract() below.
16515                          */
16516                         submap_size = src_entry->vme_end - src_start;
16517                         if (submap_size > size) {
16518                                 submap_size = size;
16519                         }
16520                         submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16521                         submap = VME_SUBMAP(src_entry);
16522                         vm_map_reference(submap);
16523                         vm_map_unlock(map);
16524                         src_entry = NULL;
16525                         result = vm_map_remap_extract(submap,
16526                             submap_start,
16527                             submap_size,
16528                             required_protection,
16529                             copy,
16530                             map_header,
16531                             cur_protection,
16532                             max_protection,
16533                             inheritance,
16534                             vmk_flags);
16535                         vm_map_deallocate(submap);
16536                         return result;
16537                 }
16538
16539                 if ((src_entry->protection & required_protection)
16540                     != required_protection) {
16541                         if (vmk_flags.vmkf_copy_single_object &&
16542                             mapped_size != 0) {
16543                                 /*
16544                                  * Single object extraction.
16545                                  * We can't extract more with the required
16546                                  * protection but we've extracted some, so
16547                                  * stop there and declare success.
16548                                  * The caller should check the size of
16549                                  * the copy entry we've extracted.
16550                                  */
16551                                 result = KERN_SUCCESS;
16552                         } else {
16553                                 /*
16554                                  * VM range extraction.
16555                                  * Required proctection is not available
16556                                  * for this part of the range: fail.
16557                                  */
16558                                 result = KERN_PROTECTION_FAILURE;
16559                         }
16560                         break;
16561                 }
16562
16563                 if (src_entry->is_sub_map &&
16564                     VM_MAP_PAGE_SHIFT(VME_SUBMAP(src_entry)) < PAGE_SHIFT) {
16565                         vm_map_t submap;
16566                         vm_map_offset_t submap_start;
16567                         vm_map_size_t submap_size;
16568                         vm_map_copy_t submap_copy;
16569                         vm_prot_t submap_curprot, submap_maxprot;
16570
16571                         vm_remap_new_path++;
16572
16573                         /*
16574                          * No check for "required_protection" on "src_entry"
16575                          * because the protections that matter are the ones
16576                          * on the submap's VM map entry, which will be checked
16577                          * during the call to vm_map_copy_extract() below.
16578                          */
16579                         object = VM_OBJECT_NULL;
16580                         submap_copy = VM_MAP_COPY_NULL;
16581
16582                         /* find equivalent range in the submap */
16583                         submap = VME_SUBMAP(src_entry);
16584                         submap_start = VME_OFFSET(src_entry) + src_start - src_entry->vme_start;
16585                         submap_size = tmp_size;
16586                         /* extra ref to keep submap alive */
16587                         vm_map_reference(submap);
16588
16589                         DTRACE_VM6(remap_submap_recurse,
16590                             vm_map_t, map,
16591                             vm_map_offset_t, addr,
16592                             vm_map_size_t, size,
16593                             boolean_t, copy,
16594                             vm_map_offset_t, submap_start,
16595                             vm_map_size_t, submap_size);
16596
16597                         /*
16598                          * The map can be safely unlocked since we
16599                          * already hold a reference on the submap.
16600                          *
16601                          * No timestamp since we don't care if the map
16602                          * gets modified while we're down in the submap.
16603                          * We'll resume the extraction at src_start + tmp_size
16604                          * anyway.
16605                          */
16606                         vm_map_unlock(map);
16607                         src_entry = NULL; /* not valid once map is unlocked */
16608
16609                         result = vm_map_copy_extract(submap,
16610                             submap_start,
16611                             submap_size,
16612                             required_protection,
16613                             copy,
16614                             &submap_copy,
16615                             &submap_curprot,
16616                             &submap_maxprot,
16617                             inheritance,
16618                             vmk_flags);
16619
16620                         /* release extra ref on submap */
16621                         vm_map_deallocate(submap);
16622                         submap = VM_MAP_NULL;
16623
16624                         if (result != KERN_SUCCESS) {
16625                                 vm_map_lock(map);
16626                                 break;
16627                         }
16628
16629                         /* transfer submap_copy entries to map_header */
16630                         while (vm_map_copy_first_entry(submap_copy) !=
16631                             vm_map_copy_to_entry(submap_copy)) {
16632                                 vm_map_entry_t copy_entry;
16633                                 vm_map_size_t copy_entry_size;
16634
16635                                 copy_entry = vm_map_copy_first_entry(submap_copy);
16636                                 assert(!copy_entry->is_sub_map);
16637                                 vm_map_copy_entry_unlink(submap_copy, copy_entry);
16638                                 copy_entry_size = copy_entry->vme_end - copy_entry->vme_start;
16639                                 copy_entry->vme_start = map_address;
16640                                 copy_entry->vme_end = map_address + copy_entry_size;
16641                                 map_address += copy_entry_size;
16642                                 mapped_size += copy_entry_size;
16643                                 src_start += copy_entry_size;
16644                                 assert(src_start <= src_end);
16645                                 _vm_map_store_entry_link(map_header,
16646                                     map_header->links.prev,
16647                                     copy_entry);
16648                         }
16649                         /* done with submap_copy */
16650                         vm_map_copy_discard(submap_copy);
16651
16652                         *cur_protection &= submap_curprot;
16653                         *max_protection &= submap_maxprot;
16654
16655                         /* re-acquire the map lock and continue to next entry */
16656                         vm_map_lock(map);
16657                         continue;
16658                 } else if (src_entry->is_sub_map) {
16659                         vm_remap_old_path++;
16660                         DTRACE_VM4(remap_submap,
16661                             vm_map_t, map,
16662                             vm_map_offset_t, addr,
16663                             vm_map_size_t, size,
16664                             boolean_t, copy);
16665
16666                         vm_map_reference(VME_SUBMAP(src_entry));
16667                         object = VM_OBJECT_NULL;
16668                 } else {
16669                         object = VME_OBJECT(src_entry);
16670                         if (src_entry->iokit_acct) {
16671                                 /*
16672                                  * This entry uses "IOKit accounting".
16673                                  */
16674                         } else if (object != VM_OBJECT_NULL &&
16675                             (object->purgable != VM_PURGABLE_DENY ||
16676                             object->vo_ledger_tag != VM_LEDGER_TAG_NONE)) {
16677                                 /*
16678                                  * Purgeable objects have their own accounting:
16679                                  * no pmap accounting for them.
16680                                  */
16681                                 assertf(!src_entry->use_pmap,
16682                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16683                                     map,
16684                                     src_entry,
16685                                     (uint64_t)src_entry->vme_start,
16686                                     (uint64_t)src_entry->vme_end,
16687                                     src_entry->protection,
16688                                     src_entry->max_protection,
16689                                     VME_ALIAS(src_entry));
16690                         } else {
16691                                 /*
16692                                  * Not IOKit or purgeable:
16693                                  * must be accounted by pmap stats.
16694                                  */
16695                                 assertf(src_entry->use_pmap,
16696                                     "map=%p src_entry=%p [0x%llx:0x%llx] 0x%x/0x%x %d",
16697                                     map,
16698                                     src_entry,
16699                                     (uint64_t)src_entry->vme_start,
16700                                     (uint64_t)src_entry->vme_end,
16701                                     src_entry->protection,
16702                                     src_entry->max_protection,
16703                                     VME_ALIAS(src_entry));
16704                         }
16705
16706                         if (object == VM_OBJECT_NULL) {
16707                                 assert(!src_entry->needs_copy);
16708                                 object = vm_object_allocate(entry_size);
16709                                 VME_OFFSET_SET(src_entry, 0);
16710                                 VME_OBJECT_SET(src_entry, object);
16711                                 assert(src_entry->use_pmap);
16712                         } else if (src_entry->wired_count ||
16713                             object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
16714                                 /*
16715                                  * A wired memory region should not have
16716                                  * any pending copy-on-write and needs to
16717                                  * keep pointing at the VM object that
16718                                  * contains the wired pages.
16719                                  * If we're sharing this memory (copy=false),
16720                                  * we'll share this VM object.
16721                                  * If we're copying this memory (copy=true),
16722                                  * we'll call vm_object_copy_slowly() below
16723                                  * and use the new VM object for the remapping.
16724                                  *
16725                                  * Or, we are already using an asymmetric
16726                                  * copy, and therefore we already have
16727                                  * the right object.
16728                                  */
16729                                 assert(!src_entry->needs_copy);
16730                         } else if (src_entry->needs_copy || object->shadowed ||
16731                             (object->internal && !object->true_share &&
16732                             !src_entry->is_shared &&
16733                             object->vo_size > entry_size)) {
16734                                 VME_OBJECT_SHADOW(src_entry, entry_size);
16735                                 assert(src_entry->use_pmap);
16736
16737                                 if (!src_entry->needs_copy &&
16738                                     (src_entry->protection & VM_PROT_WRITE)) {
16739                                         vm_prot_t prot;
16740
16741                                         assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16742
16743                                         prot = src_entry->protection & ~VM_PROT_WRITE;
16744
16745                                         if (override_nx(map,
16746                                             VME_ALIAS(src_entry))
16747                                             && prot) {
16748                                                 prot |= VM_PROT_EXECUTE;
16749                                         }
16750
16751                                         assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16752
16753                                         if (map->mapped_in_other_pmaps) {
16754                                                 vm_object_pmap_protect(
16755                                                         VME_OBJECT(src_entry),
16756                                                         VME_OFFSET(src_entry),
16757                                                         entry_size,
16758                                                         PMAP_NULL,
16759                                                         PAGE_SIZE,
16760                                                         src_entry->vme_start,
16761                                                         prot);
16762 #if MACH_ASSERT
16763                                         } else if (__improbable(map->pmap == PMAP_NULL)) {
16764                                                 extern boolean_t vm_tests_in_progress;
16765                                                 assert(vm_tests_in_progress);
16766                                                 /*
16767                                                  * Some VM tests (in vm_tests.c)
16768                                                  * sometimes want to use a VM
16769                                                  * map without a pmap.
16770                                                  * Otherwise, this should never
16771                                                  * happen.
16772                                                  */
16773 #endif /* MACH_ASSERT */
16774                                         } else {
16775                                                 pmap_protect(vm_map_pmap(map),
16776                                                     src_entry->vme_start,
16777                                                     src_entry->vme_end,
16778                                                     prot);
16779                                         }
16780                                 }
16781
16782                                 object = VME_OBJECT(src_entry);
16783                                 src_entry->needs_copy = FALSE;
16784                         }
16785
16786
16787                         vm_object_lock(object);
16788                         vm_object_reference_locked(object); /* object ref. for new entry */
16789                         assert(!src_entry->needs_copy);
16790                         if (object->copy_strategy ==
16791                             MEMORY_OBJECT_COPY_SYMMETRIC) {
16792                                 /*
16793                                  * If we want to share this object (copy==0),
16794                                  * it needs to be COPY_DELAY.
16795                                  * If we want to copy this object (copy==1),
16796                                  * we can't just set "needs_copy" on our side
16797                                  * and expect the other side to do the same
16798                                  * (symmetrically), so we can't let the object
16799                                  * stay COPY_SYMMETRIC.
16800                                  * So we always switch from COPY_SYMMETRIC to
16801                                  * COPY_DELAY.
16802                                  */
16803                                 object->copy_strategy =
16804                                     MEMORY_OBJECT_COPY_DELAY;
16805                         }
16806                         vm_object_unlock(object);
16807                 }
16808
16809                 offset = (VME_OFFSET(src_entry) +
16810                     (src_start - src_entry->vme_start));
16811
16812                 new_entry = _vm_map_entry_create(map_header, !map_header->entries_pageable);
16813                 vm_map_entry_copy(map, new_entry, src_entry);
16814                 if (new_entry->is_sub_map) {
16815                         /* clr address space specifics */
16816                         new_entry->use_pmap = FALSE;
16817                 } else if (copy) {
16818                         /*
16819                          * We're dealing with a copy-on-write operation,
16820                          * so the resulting mapping should not inherit the
16821                          * original mapping's accounting settings.
16822                          * "use_pmap" should be reset to its default (TRUE)
16823                          * so that the new mapping gets accounted for in
16824                          * the task's memory footprint.
16825                          */
16826                         new_entry->use_pmap = TRUE;
16827                 }
16828                 /* "iokit_acct" was cleared in vm_map_entry_copy() */
16829                 assert(!new_entry->iokit_acct);
16830
16831                 new_entry->map_aligned = FALSE;
16832
16833                 new_entry->vme_start = map_address;
16834                 new_entry->vme_end = map_address + tmp_size;
16835                 assert(new_entry->vme_start < new_entry->vme_end);
16836                 if (copy && vmk_flags.vmkf_remap_prot_copy) {
16837                         /*
16838                          * Remapping for vm_map_protect(VM_PROT_COPY)
16839                          * to convert a read-only mapping into a
16840                          * copy-on-write version of itself but
16841                          * with write access:
16842                          * keep the original inheritance and add
16843                          * VM_PROT_WRITE to the max protection.
16844                          */
16845                         new_entry->inheritance = src_entry->inheritance;
16846                         new_entry->protection &= max_prot_for_prot_copy;
16847                         new_entry->max_protection |= VM_PROT_WRITE;
16848                 } else {
16849                         new_entry->inheritance = inheritance;
16850                 }
16851                 VME_OFFSET_SET(new_entry, offset);
16852
16853                 /*
16854                  * The new region has to be copied now if required.
16855                  */
16856 RestartCopy:
16857                 if (!copy) {
16858                         if (src_entry->used_for_jit == TRUE) {
16859                                 if (same_map) {
16860 #if __APRR_SUPPORTED__
16861                                         /*
16862                                          * Disallow re-mapping of any JIT regions on APRR devices.
16863                                          */
16864                                         result = KERN_PROTECTION_FAILURE;
16865                                         break;
16866 #endif /* __APRR_SUPPORTED__*/
16867                                 } else if (!VM_MAP_POLICY_ALLOW_JIT_SHARING(map)) {
16868                                         /*
16869                                          * Cannot allow an entry describing a JIT
16870                                          * region to be shared across address spaces.
16871                                          */
16872                                         result = KERN_INVALID_ARGUMENT;
16873                                         break;
16874                                 }
16875                         }
16876
16877                         src_entry->is_shared = TRUE;
16878                         new_entry->is_shared = TRUE;
16879                         if (!(new_entry->is_sub_map)) {
16880                                 new_entry->needs_copy = FALSE;
16881                         }
16882                 } else if (src_entry->is_sub_map) {
16883                         /* make this a COW sub_map if not already */
16884                         assert(new_entry->wired_count == 0);
16885                         new_entry->needs_copy = TRUE;
16886                         object = VM_OBJECT_NULL;
16887                 } else if (src_entry->wired_count == 0 &&
16888                     !(debug4k_no_cow_copyin && VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT) &&
16889                     vm_object_copy_quickly(VME_OBJECT_PTR(new_entry),
16890                     VME_OFFSET(new_entry),
16891                     (new_entry->vme_end -
16892                     new_entry->vme_start),
16893                     &src_needs_copy,
16894                     &new_entry_needs_copy)) {
16895                         new_entry->needs_copy = new_entry_needs_copy;
16896                         new_entry->is_shared = FALSE;
16897                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16898
16899                         /*
16900                          * Handle copy_on_write semantics.
16901                          */
16902                         if (src_needs_copy && !src_entry->needs_copy) {
16903                                 vm_prot_t prot;
16904
16905                                 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, src_entry->protection));
16906
16907                                 prot = src_entry->protection & ~VM_PROT_WRITE;
16908
16909                                 if (override_nx(map,
16910                                     VME_ALIAS(src_entry))
16911                                     && prot) {
16912                                         prot |= VM_PROT_EXECUTE;
16913                                 }
16914
16915                                 assert(!pmap_has_prot_policy(map->pmap, src_entry->translated_allow_execute, prot));
16916
16917                                 vm_object_pmap_protect(object,
16918                                     offset,
16919                                     entry_size,
16920                                     ((src_entry->is_shared
16921                                     || map->mapped_in_other_pmaps) ?
16922                                     PMAP_NULL : map->pmap),
16923                                     VM_MAP_PAGE_SIZE(map),
16924                                     src_entry->vme_start,
16925                                     prot);
16926
16927                                 assert(src_entry->wired_count == 0);
16928                                 src_entry->needs_copy = TRUE;
16929                         }
16930                         /*
16931                          * Throw away the old object reference of the new entry.
16932                          */
16933                         vm_object_deallocate(object);
16934                 } else {
16935                         new_entry->is_shared = FALSE;
16936                         assertf(new_entry->use_pmap, "map %p new_entry %p\n", map, new_entry);
16937
16938                         src_entry_was_wired = (src_entry->wired_count > 0);
16939                         saved_src_entry = src_entry;
16940                         src_entry = VM_MAP_ENTRY_NULL;
16941
16942                         /*
16943                          * The map can be safely unlocked since we
16944                          * already hold a reference on the object.
16945                          *
16946                          * Record the timestamp of the map for later
16947                          * verification, and unlock the map.
16948                          */
16949                         version.main_timestamp = map->timestamp;
16950                         vm_map_unlock(map);     /* Increments timestamp once! */
16951
16952                         /*
16953                          * Perform the copy.
16954                          */
16955                         if (src_entry_was_wired > 0 ||
16956                             (debug4k_no_cow_copyin &&
16957                             VM_MAP_PAGE_SHIFT(map) < PAGE_SHIFT)) {
16958                                 vm_object_lock(object);
16959                                 result = vm_object_copy_slowly(
16960                                         object,
16961                                         offset,
16962                                         (new_entry->vme_end -
16963                                         new_entry->vme_start),
16964                                         THREAD_UNINT,
16965                                         VME_OBJECT_PTR(new_entry));
16966
16967                                 VME_OFFSET_SET(new_entry, offset - vm_object_trunc_page(offset));
16968                                 new_entry->needs_copy = FALSE;
16969                         } else {
16970                                 vm_object_offset_t new_offset;
16971
16972                                 new_offset = VME_OFFSET(new_entry);
16973                                 result = vm_object_copy_strategically(
16974                                         object,
16975                                         offset,
16976                                         (new_entry->vme_end -
16977                                         new_entry->vme_start),
16978                                         VME_OBJECT_PTR(new_entry),
16979                                         &new_offset,
16980                                         &new_entry_needs_copy);
16981                                 if (new_offset != VME_OFFSET(new_entry)) {
16982                                         VME_OFFSET_SET(new_entry, new_offset);
16983                                 }
16984
16985                                 new_entry->needs_copy = new_entry_needs_copy;
16986                         }
16987
16988                         /*
16989                          * Throw away the old object reference of the new entry.
16990                          */
16991                         vm_object_deallocate(object);
16992
16993                         if (result != KERN_SUCCESS &&
16994                             result != KERN_MEMORY_RESTART_COPY) {
16995                                 _vm_map_entry_dispose(map_header, new_entry);
16996                                 vm_map_lock(map);
16997                                 break;
16998                         }
16999
17000                         /*
17001                          * Verify that the map has not substantially
17002                          * changed while the copy was being made.
17003                          */
17004
17005                         vm_map_lock(map);
17006                         if (version.main_timestamp + 1 != map->timestamp) {
17007                                 /*
17008                                  * Simple version comparison failed.
17009                                  *
17010                                  * Retry the lookup and verify that the
17011                                  * same object/offset are still present.
17012                                  */
17013                                 saved_src_entry = VM_MAP_ENTRY_NULL;
17014                                 vm_object_deallocate(VME_OBJECT(new_entry));
17015                                 _vm_map_entry_dispose(map_header, new_entry);
17016                                 if (result == KERN_MEMORY_RESTART_COPY) {
17017                                         result = KERN_SUCCESS;
17018                                 }
17019                                 continue;
17020                         }
17021                         /* map hasn't changed: src_entry is still valid */
17022                         src_entry = saved_src_entry;
17023                         saved_src_entry = VM_MAP_ENTRY_NULL;
17024
17025                         if (result == KERN_MEMORY_RESTART_COPY) {
17026                                 vm_object_reference(object);
17027                                 goto RestartCopy;
17028                         }
17029                 }
17030
17031                 _vm_map_store_entry_link(map_header,
17032                     map_header->links.prev, new_entry);
17033
17034                 /*Protections for submap mapping are irrelevant here*/
17035                 if (!src_entry->is_sub_map) {
17036                         *cur_protection &= src_entry->protection;
17037                         *max_protection &= src_entry->max_protection;
17038                 }
17039
17040                 map_address += tmp_size;
17041                 mapped_size += tmp_size;
17042                 src_start += tmp_size;
17043
17044                 if (vmk_flags.vmkf_copy_single_object) {
17045                         if (mapped_size != size) {
17046                                 DEBUG4K_SHARE("map %p addr 0x%llx size 0x%llx clipped copy at mapped_size 0x%llx\n", map, (uint64_t)addr, (uint64_t)size, (uint64_t)mapped_size);
17047                                 if (src_entry->vme_next != vm_map_to_entry(map) &&
17048                                     VME_OBJECT(src_entry->vme_next) == VME_OBJECT(src_entry)) {
17049                                         /* XXX TODO4K */
17050                                         DEBUG4K_ERROR("could have extended copy to next entry...\n");
17051                                 }
17052                         }
17053                         break;
17054                 }
17055         } /* end while */
17056
17057         vm_map_unlock(map);
17058         if (result != KERN_SUCCESS) {
17059                 /*
17060                  * Free all allocated elements.
17061                  */
17062                 for (src_entry = map_header->links.next;
17063                     src_entry != CAST_TO_VM_MAP_ENTRY(&map_header->links);
17064                     src_entry = new_entry) {
17065                         new_entry = src_entry->vme_next;
17066                         _vm_map_store_entry_unlink(map_header, src_entry);
17067                         if (src_entry->is_sub_map) {
17068                                 vm_map_deallocate(VME_SUBMAP(src_entry));
17069                         } else {
17070                                 vm_object_deallocate(VME_OBJECT(src_entry));
17071                         }
17072                         _vm_map_entry_dispose(map_header, src_entry);
17073                 }
17074         }
17075         return result;
17076 }
17077
17078 bool
17079 vm_map_is_exotic(
17080         vm_map_t map)
17081 {
17082         return VM_MAP_IS_EXOTIC(map);
17083 }
17084
17085 bool
17086 vm_map_is_alien(
17087         vm_map_t map)
17088 {
17089         return VM_MAP_IS_ALIEN(map);
17090 }
17091
17092 #if XNU_TARGET_OS_OSX
17093 void
17094 vm_map_mark_alien(
17095         vm_map_t map)
17096 {
17097         vm_map_lock(map);
17098         map->is_alien = true;
17099         vm_map_unlock(map);
17100 }
17101 #endif /* XNU_TARGET_OS_OSX */
17102
17103 void vm_map_copy_to_physcopy(vm_map_copy_t copy_map, vm_map_t target_map);
17104 void
17105 vm_map_copy_to_physcopy(
17106         vm_map_copy_t   copy_map,
17107         vm_map_t        target_map)
17108 {
17109         vm_map_size_t           size;
17110         vm_map_entry_t          entry;
17111         vm_map_entry_t          new_entry;
17112         vm_object_t             new_object;
17113         unsigned int            pmap_flags;
17114         pmap_t                  new_pmap;
17115         vm_map_t                new_map;
17116         vm_map_address_t        src_start, src_end, src_cur;
17117         vm_map_address_t        dst_start, dst_end, dst_cur;
17118         kern_return_t           kr;
17119         void                    *kbuf;
17120
17121         /*
17122          * Perform the equivalent of vm_allocate() and memcpy().
17123          * Replace the mappings in "copy_map" with the newly allocated mapping.
17124          */
17125         DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) BEFORE\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17126
17127         assert(copy_map->cpy_hdr.page_shift != VM_MAP_PAGE_MASK(target_map));
17128
17129         /* allocate new VM object */
17130         size = VM_MAP_ROUND_PAGE(copy_map->size, PAGE_MASK);
17131         new_object = vm_object_allocate(size);
17132         assert(new_object);
17133
17134         /* allocate new VM map entry */
17135         new_entry = vm_map_copy_entry_create(copy_map, FALSE);
17136         assert(new_entry);
17137
17138         /* finish initializing new VM map entry */
17139         new_entry->protection = VM_PROT_DEFAULT;
17140         new_entry->max_protection = VM_PROT_DEFAULT;
17141         new_entry->use_pmap = TRUE;
17142
17143         /* make new VM map entry point to new VM object */
17144         new_entry->vme_start = 0;
17145         new_entry->vme_end = size;
17146         VME_OBJECT_SET(new_entry, new_object);
17147         VME_OFFSET_SET(new_entry, 0);
17148
17149         /* create a new pmap to map "copy_map" */
17150         pmap_flags = 0;
17151         assert(copy_map->cpy_hdr.page_shift == FOURK_PAGE_SHIFT);
17152 #if PMAP_CREATE_FORCE_4K_PAGES
17153         pmap_flags |= PMAP_CREATE_FORCE_4K_PAGES;
17154 #endif /* PMAP_CREATE_FORCE_4K_PAGES */
17155         pmap_flags |= PMAP_CREATE_64BIT;
17156         new_pmap = pmap_create_options(NULL, (vm_map_size_t)0, pmap_flags);
17157         assert(new_pmap);
17158
17159         /* create a new pageable VM map to map "copy_map" */
17160         new_map = vm_map_create(new_pmap, 0, MACH_VM_MAX_ADDRESS, TRUE);
17161         assert(new_map);
17162         vm_map_set_page_shift(new_map, copy_map->cpy_hdr.page_shift);
17163
17164         /* map "copy_map" in the new VM map */
17165         src_start = 0;
17166         kr = vm_map_copyout_internal(
17167                 new_map,
17168                 &src_start,
17169                 copy_map,
17170                 copy_map->size,
17171                 FALSE, /* consume_on_success */
17172                 VM_PROT_DEFAULT,
17173                 VM_PROT_DEFAULT,
17174                 VM_INHERIT_DEFAULT);
17175         assert(kr == KERN_SUCCESS);
17176         src_end = src_start + copy_map->size;
17177
17178         /* map "new_object" in the new VM map */
17179         vm_object_reference(new_object);
17180         dst_start = 0;
17181         kr = vm_map_enter(new_map,
17182             &dst_start,
17183             size,
17184             0,               /* mask */
17185             VM_FLAGS_ANYWHERE,
17186             VM_MAP_KERNEL_FLAGS_NONE,
17187             VM_KERN_MEMORY_OSFMK,
17188             new_object,
17189             0,               /* offset */
17190             FALSE,               /* needs copy */
17191             VM_PROT_DEFAULT,
17192             VM_PROT_DEFAULT,
17193             VM_INHERIT_DEFAULT);
17194         assert(kr == KERN_SUCCESS);
17195         dst_end = dst_start + size;
17196
17197         /* get a kernel buffer */
17198         kbuf = kheap_alloc(KHEAP_TEMP, PAGE_SIZE, Z_WAITOK);
17199         assert(kbuf);
17200
17201         /* physically copy "copy_map" mappings to new VM object */
17202         for (src_cur = src_start, dst_cur = dst_start;
17203             src_cur < src_end;
17204             src_cur += PAGE_SIZE, dst_cur += PAGE_SIZE) {
17205                 vm_size_t bytes;
17206
17207                 bytes = PAGE_SIZE;
17208                 if (src_cur + PAGE_SIZE > src_end) {
17209                         /* partial copy for last page */
17210                         bytes = src_end - src_cur;
17211                         assert(bytes > 0 && bytes < PAGE_SIZE);
17212                         /* rest of dst page should be zero-filled */
17213                 }
17214                 /* get bytes from src mapping */
17215                 kr = copyinmap(new_map, src_cur, kbuf, bytes);
17216                 if (kr != KERN_SUCCESS) {
17217                         DEBUG4K_COPY("copyinmap(%p, 0x%llx, %p, 0x%llx) kr 0x%x\n", new_map, (uint64_t)src_cur, kbuf, (uint64_t)bytes, kr);
17218                 }
17219                 /* put bytes in dst mapping */
17220                 assert(dst_cur < dst_end);
17221                 assert(dst_cur + bytes <= dst_end);
17222                 kr = copyoutmap(new_map, kbuf, dst_cur, bytes);
17223                 if (kr != KERN_SUCCESS) {
17224                         DEBUG4K_COPY("copyoutmap(%p, %p, 0x%llx, 0x%llx) kr 0x%x\n", new_map, kbuf, (uint64_t)dst_cur, (uint64_t)bytes, kr);
17225                 }
17226         }
17227
17228         /* free kernel buffer */
17229         kheap_free(KHEAP_TEMP, kbuf, PAGE_SIZE);
17230         kbuf = NULL;
17231
17232         /* destroy new map */
17233         vm_map_destroy(new_map, VM_MAP_REMOVE_NO_FLAGS);
17234         new_map = VM_MAP_NULL;
17235
17236         /* dispose of the old map entries in "copy_map" */
17237         while (vm_map_copy_first_entry(copy_map) !=
17238             vm_map_copy_to_entry(copy_map)) {
17239                 entry = vm_map_copy_first_entry(copy_map);
17240                 vm_map_copy_entry_unlink(copy_map, entry);
17241                 if (entry->is_sub_map) {
17242                         vm_map_deallocate(VME_SUBMAP(entry));
17243                 } else {
17244                         vm_object_deallocate(VME_OBJECT(entry));
17245                 }
17246                 vm_map_copy_entry_dispose(copy_map, entry);
17247         }
17248
17249         /* change "copy_map"'s page_size to match "target_map" */
17250         copy_map->cpy_hdr.page_shift = VM_MAP_PAGE_SHIFT(target_map);
17251         copy_map->offset = 0;
17252         copy_map->size = size;
17253
17254         /* insert new map entry in "copy_map" */
17255         assert(vm_map_copy_last_entry(copy_map) == vm_map_copy_to_entry(copy_map));
17256         vm_map_copy_entry_link(copy_map, vm_map_copy_last_entry(copy_map), new_entry);
17257
17258         DEBUG4K_COPY("copy_map %p (%d %d 0x%llx 0x%llx) AFTER\n", copy_map, copy_map->cpy_hdr.page_shift, copy_map->cpy_hdr.nentries, copy_map->offset, (uint64_t)copy_map->size);
17259 }
17260
17261 void
17262 vm_map_copy_adjust_get_target_copy_map(
17263         vm_map_copy_t   copy_map,
17264         vm_map_copy_t   *target_copy_map_p);
17265 void
17266 vm_map_copy_adjust_get_target_copy_map(
17267         vm_map_copy_t   copy_map,
17268         vm_map_copy_t   *target_copy_map_p)
17269 {
17270         vm_map_copy_t   target_copy_map;
17271         vm_map_entry_t  entry, target_entry;
17272
17273         if (*target_copy_map_p != VM_MAP_COPY_NULL) {
17274                 /* the caller already has a "target_copy_map": use it */
17275                 return;
17276         }
17277
17278         /* the caller wants us to create a new copy of "copy_map" */
17279         target_copy_map = vm_map_copy_allocate();
17280         target_copy_map->type = copy_map->type;
17281         assert(target_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17282         target_copy_map->offset = copy_map->offset;
17283         target_copy_map->size = copy_map->size;
17284         target_copy_map->cpy_hdr.page_shift = copy_map->cpy_hdr.page_shift;
17285         vm_map_store_init(&target_copy_map->cpy_hdr);
17286         for (entry = vm_map_copy_first_entry(copy_map);
17287             entry != vm_map_copy_to_entry(copy_map);
17288             entry = entry->vme_next) {
17289                 target_entry = vm_map_copy_entry_create(target_copy_map, FALSE);
17290                 vm_map_entry_copy_full(target_entry, entry);
17291                 if (target_entry->is_sub_map) {
17292                         vm_map_reference(VME_SUBMAP(target_entry));
17293                 } else {
17294                         vm_object_reference(VME_OBJECT(target_entry));
17295                 }
17296                 vm_map_copy_entry_link(
17297                         target_copy_map,
17298                         vm_map_copy_last_entry(target_copy_map),
17299                         target_entry);
17300         }
17301         entry = VM_MAP_ENTRY_NULL;
17302         *target_copy_map_p = target_copy_map;
17303 }
17304
17305 void
17306 vm_map_copy_trim(
17307         vm_map_copy_t   copy_map,
17308         int             new_page_shift,
17309         vm_map_offset_t trim_start,
17310         vm_map_offset_t trim_end);
17311 void
17312 vm_map_copy_trim(
17313         vm_map_copy_t   copy_map,
17314         int             new_page_shift,
17315         vm_map_offset_t trim_start,
17316         vm_map_offset_t trim_end)
17317 {
17318         int             copy_page_shift;
17319         vm_map_entry_t  entry, next_entry;
17320
17321         assert(copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17322         assert(copy_map->cpy_hdr.nentries > 0);
17323
17324         trim_start += vm_map_copy_first_entry(copy_map)->vme_start;
17325         trim_end += vm_map_copy_first_entry(copy_map)->vme_start;
17326
17327         /* use the new page_shift to do the clipping */
17328         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17329         copy_map->cpy_hdr.page_shift = new_page_shift;
17330
17331         for (entry = vm_map_copy_first_entry(copy_map);
17332             entry != vm_map_copy_to_entry(copy_map);
17333             entry = next_entry) {
17334                 next_entry = entry->vme_next;
17335                 if (entry->vme_end <= trim_start) {
17336                         /* entry fully before trim range: skip */
17337                         continue;
17338                 }
17339                 if (entry->vme_start >= trim_end) {
17340                         /* entry fully after trim range: done */
17341                         break;
17342                 }
17343                 /* clip entry if needed */
17344                 vm_map_copy_clip_start(copy_map, entry, trim_start);
17345                 vm_map_copy_clip_end(copy_map, entry, trim_end);
17346                 /* dispose of entry */
17347                 copy_map->size -= entry->vme_end - entry->vme_start;
17348                 vm_map_copy_entry_unlink(copy_map, entry);
17349                 if (entry->is_sub_map) {
17350                         vm_map_deallocate(VME_SUBMAP(entry));
17351                 } else {
17352                         vm_object_deallocate(VME_OBJECT(entry));
17353                 }
17354                 vm_map_copy_entry_dispose(copy_map, entry);
17355                 entry = VM_MAP_ENTRY_NULL;
17356         }
17357
17358         /* restore copy_map's original page_shift */
17359         copy_map->cpy_hdr.page_shift = copy_page_shift;
17360 }
17361
17362 /*
17363  * Make any necessary adjustments to "copy_map" to allow it to be
17364  * mapped into "target_map".
17365  * If no changes were necessary, "target_copy_map" points to the
17366  * untouched "copy_map".
17367  * If changes are necessary, changes will be made to "target_copy_map".
17368  * If "target_copy_map" was NULL, we create a new "vm_map_copy_t" and
17369  * copy the original "copy_map" to it before applying the changes.
17370  * The caller should discard "target_copy_map" if it's not the same as
17371  * the original "copy_map".
17372  */
17373 /* TODO4K: also adjust to sub-range in the copy_map -> add start&end? */
17374 kern_return_t
17375 vm_map_copy_adjust_to_target(
17376         vm_map_copy_t           src_copy_map,
17377         vm_map_offset_t         offset,
17378         vm_map_size_t           size,
17379         vm_map_t                target_map,
17380         boolean_t               copy,
17381         vm_map_copy_t           *target_copy_map_p,
17382         vm_map_offset_t         *overmap_start_p,
17383         vm_map_offset_t         *overmap_end_p,
17384         vm_map_offset_t         *trimmed_start_p)
17385 {
17386         vm_map_copy_t           copy_map, target_copy_map;
17387         vm_map_size_t           target_size;
17388         vm_map_size_t           src_copy_map_size;
17389         vm_map_size_t           overmap_start, overmap_end;
17390         int                     misalignments;
17391         vm_map_entry_t          entry, target_entry;
17392         vm_map_offset_t         addr_adjustment;
17393         vm_map_offset_t         new_start, new_end;
17394         int                     copy_page_mask, target_page_mask;
17395         int                     copy_page_shift, target_page_shift;
17396         vm_map_offset_t         trimmed_end;
17397
17398         /*
17399          * Assert that the vm_map_copy is coming from the right
17400          * zone and hasn't been forged
17401          */
17402         vm_map_copy_require(src_copy_map);
17403         assert(src_copy_map->type == VM_MAP_COPY_ENTRY_LIST);
17404
17405         /*
17406          * Start working with "src_copy_map" but we'll switch
17407          * to "target_copy_map" as soon as we start making adjustments.
17408          */
17409         copy_map = src_copy_map;
17410         src_copy_map_size = src_copy_map->size;
17411
17412         copy_page_shift = VM_MAP_COPY_PAGE_SHIFT(copy_map);
17413         copy_page_mask = VM_MAP_COPY_PAGE_MASK(copy_map);
17414         target_page_shift = VM_MAP_PAGE_SHIFT(target_map);
17415         target_page_mask = VM_MAP_PAGE_MASK(target_map);
17416
17417         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p...\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, *target_copy_map_p);
17418
17419         target_copy_map = *target_copy_map_p;
17420         if (target_copy_map != VM_MAP_COPY_NULL) {
17421                 vm_map_copy_require(target_copy_map);
17422         }
17423
17424         if (offset + size > copy_map->size) {
17425                 DEBUG4K_ERROR("copy_map %p (%d->%d) copy_map->size 0x%llx offset 0x%llx size 0x%llx KERN_INVALID_ARGUMENT\n", copy_map, copy_page_shift, target_page_shift, (uint64_t)copy_map->size, (uint64_t)offset, (uint64_t)size);
17426                 return KERN_INVALID_ARGUMENT;
17427         }
17428
17429         /* trim the end */
17430         trimmed_end = 0;
17431         new_end = VM_MAP_ROUND_PAGE(offset + size, target_page_mask);
17432         if (new_end < copy_map->size) {
17433                 trimmed_end = src_copy_map_size - new_end;
17434                 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim end from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)new_end, (uint64_t)copy_map->size);
17435                 /* get "target_copy_map" if needed and adjust it */
17436                 vm_map_copy_adjust_get_target_copy_map(copy_map,
17437                     &target_copy_map);
17438                 copy_map = target_copy_map;
17439                 vm_map_copy_trim(target_copy_map, target_page_shift,
17440                     new_end, copy_map->size);
17441         }
17442
17443         /* trim the start */
17444         new_start = VM_MAP_TRUNC_PAGE(offset, target_page_mask);
17445         if (new_start != 0) {
17446                 DEBUG4K_ADJUST("copy_map %p (%d->%d) copy %d offset 0x%llx size 0x%llx target_copy_map %p... trim start from 0x%llx to 0x%llx\n", copy_map, copy_page_shift, target_page_shift, copy, (uint64_t)offset, (uint64_t)size, target_copy_map, (uint64_t)0, (uint64_t)new_start);
17447                 /* get "target_copy_map" if needed and adjust it */
17448                 vm_map_copy_adjust_get_target_copy_map(copy_map,
17449                     &target_copy_map);
17450                 copy_map = target_copy_map;
17451                 vm_map_copy_trim(target_copy_map, target_page_shift,
17452                     0, new_start);
17453         }
17454         *trimmed_start_p = new_start;
17455
17456         /* target_size starts with what's left after trimming */
17457         target_size = copy_map->size;
17458         assertf(target_size == src_copy_map_size - *trimmed_start_p - trimmed_end,
17459             "target_size 0x%llx src_copy_map_size 0x%llx trimmed_start 0x%llx trimmed_end 0x%llx\n",
17460             (uint64_t)target_size, (uint64_t)src_copy_map_size,
17461             (uint64_t)*trimmed_start_p, (uint64_t)trimmed_end);
17462
17463         /* check for misalignments but don't adjust yet */
17464         misalignments = 0;
17465         overmap_start = 0;
17466         overmap_end = 0;
17467         if (copy_page_shift < target_page_shift) {
17468                 /*
17469                  * Remapping from 4K to 16K: check the VM object alignments
17470                  * throughout the range.
17471                  * If the start and end of the range are mis-aligned, we can
17472                  * over-map to re-align, and adjust the "overmap" start/end
17473                  * and "target_size" of the range accordingly.
17474                  * If there is any mis-alignment within the range:
17475                  *     if "copy":
17476                  *         we can do immediate-copy instead of copy-on-write,
17477                  *     else:
17478                  *         no way to remap and share; fail.
17479                  */
17480                 for (entry = vm_map_copy_first_entry(copy_map);
17481                     entry != vm_map_copy_to_entry(copy_map);
17482                     entry = entry->vme_next) {
17483                         vm_object_offset_t object_offset_start, object_offset_end;
17484
17485                         object_offset_start = VME_OFFSET(entry);
17486                         object_offset_end = object_offset_start;
17487                         object_offset_end += entry->vme_end - entry->vme_start;
17488                         if (object_offset_start & target_page_mask) {
17489                                 if (entry == vm_map_copy_first_entry(copy_map) && !copy) {
17490                                         overmap_start++;
17491                                 } else {
17492                                         misalignments++;
17493                                 }
17494                         }
17495                         if (object_offset_end & target_page_mask) {
17496                                 if (entry->vme_next == vm_map_copy_to_entry(copy_map) && !copy) {
17497                                         overmap_end++;
17498                                 } else {
17499                                         misalignments++;
17500                                 }
17501                         }
17502                 }
17503         }
17504         entry = VM_MAP_ENTRY_NULL;
17505
17506         /* decide how to deal with misalignments */
17507         assert(overmap_start <= 1);
17508         assert(overmap_end <= 1);
17509         if (!overmap_start && !overmap_end && !misalignments) {
17510                 /* copy_map is properly aligned for target_map ... */
17511                 if (*trimmed_start_p) {
17512                         /* ... but we trimmed it, so still need to adjust */
17513                 } else {
17514                         /* ... and we didn't trim anything: we're done */
17515                         if (target_copy_map == VM_MAP_COPY_NULL) {
17516                                 target_copy_map = copy_map;
17517                         }
17518                         *target_copy_map_p = target_copy_map;
17519                         *overmap_start_p = 0;
17520                         *overmap_end_p = 0;
17521                         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17522                         return KERN_SUCCESS;
17523                 }
17524         } else if (misalignments && !copy) {
17525                 /* can't "share" if misaligned */
17526                 DEBUG4K_ADJUST("unsupported sharing\n");
17527 #if MACH_ASSERT
17528                 if (debug4k_panic_on_misaligned_sharing) {
17529                         panic("DEBUG4k %s:%d unsupported sharing\n", __FUNCTION__, __LINE__);
17530                 }
17531 #endif /* MACH_ASSERT */
17532                 DEBUG4K_ADJUST("copy_map %p (%d) target_map %p (%d) copy %d target_copy_map %p -> KERN_NOT_SUPPORTED\n", copy_map, copy_page_shift, target_map, target_page_shift, copy, *target_copy_map_p);
17533                 return KERN_NOT_SUPPORTED;
17534         } else {
17535                 /* can't virtual-copy if misaligned (but can physical-copy) */
17536                 DEBUG4K_ADJUST("mis-aligned copying\n");
17537         }
17538
17539         /* get a "target_copy_map" if needed and switch to it */
17540         vm_map_copy_adjust_get_target_copy_map(copy_map, &target_copy_map);
17541         copy_map = target_copy_map;
17542
17543         if (misalignments && copy) {
17544                 vm_map_size_t target_copy_map_size;
17545
17546                 /*
17547                  * Can't do copy-on-write with misaligned mappings.
17548                  * Replace the mappings with a physical copy of the original
17549                  * mappings' contents.
17550                  */
17551                 target_copy_map_size = target_copy_map->size;
17552                 vm_map_copy_to_physcopy(target_copy_map, target_map);
17553                 *target_copy_map_p = target_copy_map;
17554                 *overmap_start_p = 0;
17555                 *overmap_end_p = target_copy_map->size - target_copy_map_size;
17556                 DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx)-> trimmed 0x%llx overmap start 0x%llx end 0x%llx PHYSCOPY\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17557                 return KERN_SUCCESS;
17558         }
17559
17560         /* apply the adjustments */
17561         misalignments = 0;
17562         overmap_start = 0;
17563         overmap_end = 0;
17564         /* remove copy_map->offset, so that everything starts at offset 0 */
17565         addr_adjustment = copy_map->offset;
17566         /* also remove whatever we trimmed from the start */
17567         addr_adjustment += *trimmed_start_p;
17568         for (target_entry = vm_map_copy_first_entry(target_copy_map);
17569             target_entry != vm_map_copy_to_entry(target_copy_map);
17570             target_entry = target_entry->vme_next) {
17571                 vm_object_offset_t object_offset_start, object_offset_end;
17572
17573                 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx BEFORE\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17574                 object_offset_start = VME_OFFSET(target_entry);
17575                 if (object_offset_start & target_page_mask) {
17576                         DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at start\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17577                         if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17578                                 /*
17579                                  * start of 1st entry is mis-aligned:
17580                                  * re-adjust by over-mapping.
17581                                  */
17582                                 overmap_start = object_offset_start - trunc_page_mask_64(object_offset_start, target_page_mask);
17583                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_start 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_start);
17584                                 VME_OFFSET_SET(target_entry, VME_OFFSET(target_entry) - overmap_start);
17585                         } else {
17586                                 misalignments++;
17587                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17588                                 assert(copy);
17589                         }
17590                 }
17591
17592                 if (target_entry == vm_map_copy_first_entry(target_copy_map)) {
17593                         target_size += overmap_start;
17594                 } else {
17595                         target_entry->vme_start += overmap_start;
17596                 }
17597                 target_entry->vme_end += overmap_start;
17598
17599                 object_offset_end = VME_OFFSET(target_entry) + target_entry->vme_end - target_entry->vme_start;
17600                 if (object_offset_end & target_page_mask) {
17601                         DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx misaligned at end\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17602                         if (target_entry->vme_next == vm_map_copy_to_entry(target_copy_map)) {
17603                                 /*
17604                                  * end of last entry is mis-aligned: re-adjust by over-mapping.
17605                                  */
17606                                 overmap_end = round_page_mask_64(object_offset_end, target_page_mask) - object_offset_end;
17607                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> overmap_end 0x%llx\n", target_entry, VME_OFFSET(target_entry), copy, (uint64_t)overmap_end);
17608                                 target_entry->vme_end += overmap_end;
17609                                 target_size += overmap_end;
17610                         } else {
17611                                 misalignments++;
17612                                 DEBUG4K_ADJUST("entry %p offset 0x%llx copy %d -> misalignments %d\n", target_entry, VME_OFFSET(target_entry), copy, misalignments);
17613                                 assert(copy);
17614                         }
17615                 }
17616                 target_entry->vme_start -= addr_adjustment;
17617                 target_entry->vme_end -= addr_adjustment;
17618                 DEBUG4K_ADJUST("copy %p (%d 0x%llx 0x%llx) entry %p [ 0x%llx 0x%llx ] object %p offset 0x%llx AFTER\n", target_copy_map, VM_MAP_COPY_PAGE_SHIFT(target_copy_map), target_copy_map->offset, (uint64_t)target_copy_map->size, target_entry, (uint64_t)target_entry->vme_start, (uint64_t)target_entry->vme_end, VME_OBJECT(target_entry), VME_OFFSET(target_entry));
17619         }
17620
17621         target_copy_map->size = target_size;
17622         target_copy_map->offset += overmap_start;
17623         target_copy_map->offset -= addr_adjustment;
17624         target_copy_map->cpy_hdr.page_shift = target_page_shift;
17625
17626 //      assert(VM_MAP_PAGE_ALIGNED(target_copy_map->size, target_page_mask));
17627 //      assert(VM_MAP_PAGE_ALIGNED(target_copy_map->offset, FOURK_PAGE_MASK));
17628         assert(overmap_start < VM_MAP_PAGE_SIZE(target_map));
17629         assert(overmap_end < VM_MAP_PAGE_SIZE(target_map));
17630
17631         *target_copy_map_p = target_copy_map;
17632         *overmap_start_p = overmap_start;
17633         *overmap_end_p = overmap_end;
17634
17635         DEBUG4K_ADJUST("copy_map %p (%d offset 0x%llx size 0x%llx) target_map %p (%d) copy %d target_copy_map %p (%d offset 0x%llx size 0x%llx) -> trimmed 0x%llx overmap start 0x%llx end 0x%llx KERN_SUCCESS\n", copy_map, copy_page_shift, (uint64_t)copy_map->offset, (uint64_t)copy_map->size, target_map, target_page_shift, copy, *target_copy_map_p, VM_MAP_COPY_PAGE_SHIFT(*target_copy_map_p), (uint64_t)(*target_copy_map_p)->offset, (uint64_t)(*target_copy_map_p)->size, (uint64_t)*trimmed_start_p, (uint64_t)*overmap_start_p, (uint64_t)*overmap_end_p);
17636         return KERN_SUCCESS;
17637 }
17638
17639 kern_return_t
17640 vm_map_range_physical_size(
17641         vm_map_t         map,
17642         vm_map_address_t start,
17643         mach_vm_size_t   size,
17644         mach_vm_size_t * phys_size)
17645 {
17646         kern_return_t   kr;
17647         vm_map_copy_t   copy_map, target_copy_map;
17648         vm_map_offset_t adjusted_start, adjusted_end;
17649         vm_map_size_t   adjusted_size;
17650         vm_prot_t       cur_prot, max_prot;
17651         vm_map_offset_t overmap_start, overmap_end, trimmed_start;
17652         vm_map_kernel_flags_t vmk_flags;
17653
17654         adjusted_start = vm_map_trunc_page(start, VM_MAP_PAGE_MASK(map));
17655         adjusted_end = vm_map_round_page(start + size, VM_MAP_PAGE_MASK(map));
17656         adjusted_size = adjusted_end - adjusted_start;
17657         *phys_size = adjusted_size;
17658         if (VM_MAP_PAGE_SIZE(map) == PAGE_SIZE) {
17659                 return KERN_SUCCESS;
17660         }
17661         if (start == 0) {
17662                 adjusted_start = vm_map_trunc_page(start, PAGE_MASK);
17663                 adjusted_end = vm_map_round_page(start + size, PAGE_MASK);
17664                 adjusted_size = adjusted_end - adjusted_start;
17665                 *phys_size = adjusted_size;
17666                 return KERN_SUCCESS;
17667         }
17668         if (adjusted_size == 0) {
17669                 DEBUG4K_SHARE("map %p start 0x%llx size 0x%llx adjusted 0x%llx -> phys_size 0!\n", map, (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_size);
17670                 *phys_size = 0;
17671                 return KERN_SUCCESS;
17672         }
17673
17674         vmk_flags = VM_MAP_KERNEL_FLAGS_NONE;
17675         vmk_flags.vmkf_copy_pageable = TRUE;
17676         vmk_flags.vmkf_copy_same_map = TRUE;
17677         assert(adjusted_size != 0);
17678         kr = vm_map_copy_extract(map, adjusted_start, adjusted_size,
17679             VM_PROT_NONE, /* required_protection: no check here */
17680             FALSE /* copy */,
17681             &copy_map,
17682             &cur_prot, &max_prot, VM_INHERIT_DEFAULT,
17683             vmk_flags);
17684         if (kr != KERN_SUCCESS) {
17685                 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17686                 //assert(0);
17687                 *phys_size = 0;
17688                 return kr;
17689         }
17690         assert(copy_map != VM_MAP_COPY_NULL);
17691         target_copy_map = copy_map;
17692         DEBUG4K_ADJUST("adjusting...\n");
17693         kr = vm_map_copy_adjust_to_target(
17694                 copy_map,
17695                 start - adjusted_start, /* offset */
17696                 size, /* size */
17697                 kernel_map,
17698                 FALSE,                          /* copy */
17699                 &target_copy_map,
17700                 &overmap_start,
17701                 &overmap_end,
17702                 &trimmed_start);
17703         if (kr == KERN_SUCCESS) {
17704                 if (target_copy_map->size != *phys_size) {
17705                         DEBUG4K_ADJUST("map %p (%d) start 0x%llx size 0x%llx adjusted_start 0x%llx adjusted_end 0x%llx overmap_start 0x%llx overmap_end 0x%llx trimmed_start 0x%llx phys_size 0x%llx -> 0x%llx\n", map, VM_MAP_PAGE_SHIFT(map), (uint64_t)start, (uint64_t)size, (uint64_t)adjusted_start, (uint64_t)adjusted_end, (uint64_t)overmap_start, (uint64_t)overmap_end, (uint64_t)trimmed_start, (uint64_t)*phys_size, (uint64_t)target_copy_map->size);
17706                 }
17707                 *phys_size = target_copy_map->size;
17708         } else {
17709                 DEBUG4K_ERROR("map %p start 0x%llx 0x%llx size 0x%llx 0x%llx kr 0x%x\n", map, (uint64_t)start, (uint64_t)adjusted_start, size, (uint64_t)adjusted_size, kr);
17710                 //assert(0);
17711                 *phys_size = 0;
17712         }
17713         vm_map_copy_discard(copy_map);
17714         copy_map = VM_MAP_COPY_NULL;
17715
17716         return kr;
17717 }
17718
17719
17720 kern_return_t
17721 memory_entry_check_for_adjustment(
17722         vm_map_t                        src_map,
17723         ipc_port_t                      port,
17724         vm_map_offset_t         *overmap_start,
17725         vm_map_offset_t         *overmap_end)
17726 {
17727         kern_return_t kr = KERN_SUCCESS;
17728         vm_map_copy_t copy_map = VM_MAP_COPY_NULL, target_copy_map = VM_MAP_COPY_NULL;
17729
17730         assert(port);
17731         assertf(ip_kotype(port) == IKOT_NAMED_ENTRY, "Port Type expected: %d...received:%d\n", IKOT_NAMED_ENTRY, ip_kotype(port));
17732
17733         vm_named_entry_t        named_entry;
17734
17735         named_entry = (vm_named_entry_t) port->ip_kobject;
17736         named_entry_lock(named_entry);
17737         copy_map = named_entry->backing.copy;
17738         target_copy_map = copy_map;
17739
17740         if (src_map && VM_MAP_PAGE_SHIFT(src_map) < PAGE_SHIFT) {
17741                 vm_map_offset_t trimmed_start;
17742
17743                 trimmed_start = 0;
17744                 DEBUG4K_ADJUST("adjusting...\n");
17745                 kr = vm_map_copy_adjust_to_target(
17746                         copy_map,
17747                         0, /* offset */
17748                         copy_map->size, /* size */
17749                         src_map,
17750                         FALSE, /* copy */
17751                         &target_copy_map,
17752                         overmap_start,
17753                         overmap_end,
17754                         &trimmed_start);
17755                 assert(trimmed_start == 0);
17756         }
17757         named_entry_unlock(named_entry);
17758
17759         return kr;
17760 }
17761
17762
17763 /*
17764  *      Routine:        vm_remap
17765  *
17766  *                      Map portion of a task's address space.
17767  *                      Mapped region must not overlap more than
17768  *                      one vm memory object. Protections and
17769  *                      inheritance attributes remain the same
17770  *                      as in the original task and are out parameters.
17771  *                      Source and Target task can be identical
17772  *                      Other attributes are identical as for vm_map()
17773  */
17774 kern_return_t
17775 vm_map_remap(
17776         vm_map_t                target_map,
17777         vm_map_address_t        *address,
17778         vm_map_size_t           size,
17779         vm_map_offset_t         mask,
17780         int                     flags,
17781         vm_map_kernel_flags_t   vmk_flags,
17782         vm_tag_t                tag,
17783         vm_map_t                src_map,
17784         vm_map_offset_t         memory_address,
17785         boolean_t               copy,
17786         vm_prot_t               *cur_protection,
17787         vm_prot_t               *max_protection,
17788         vm_inherit_t            inheritance)
17789 {
17790         kern_return_t           result;
17791         vm_map_entry_t          entry;
17792         vm_map_entry_t          insp_entry = VM_MAP_ENTRY_NULL;
17793         vm_map_entry_t          new_entry;
17794         vm_map_copy_t           copy_map;
17795         vm_map_offset_t         offset_in_mapping;
17796         vm_map_size_t           target_size = 0;
17797         vm_map_size_t           src_page_mask, target_page_mask;
17798         vm_map_offset_t         overmap_start, overmap_end, trimmed_start;
17799         vm_map_offset_t         initial_memory_address;
17800         vm_map_size_t           initial_size;
17801
17802         if (target_map == VM_MAP_NULL) {
17803                 return KERN_INVALID_ARGUMENT;
17804         }
17805
17806         initial_memory_address = memory_address;
17807         initial_size = size;
17808         src_page_mask = VM_MAP_PAGE_MASK(src_map);
17809         target_page_mask = VM_MAP_PAGE_MASK(target_map);
17810
17811         switch (inheritance) {
17812         case VM_INHERIT_NONE:
17813         case VM_INHERIT_COPY:
17814         case VM_INHERIT_SHARE:
17815                 if (size != 0 && src_map != VM_MAP_NULL) {
17816                         break;
17817                 }
17818                 OS_FALLTHROUGH;
17819         default:
17820                 return KERN_INVALID_ARGUMENT;
17821         }
17822
17823         if (src_page_mask != target_page_mask) {
17824                 if (copy) {
17825                         DEBUG4K_COPY("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17826                 } else {
17827                         DEBUG4K_SHARE("src_map %p pgsz 0x%x addr 0x%llx size 0x%llx copy %d -> target_map %p pgsz 0x%x\n", src_map, VM_MAP_PAGE_SIZE(src_map), (uint64_t)memory_address, (uint64_t)size, copy, target_map, VM_MAP_PAGE_SIZE(target_map));
17828                 }
17829         }
17830
17831         /*
17832          * If the user is requesting that we return the address of the
17833          * first byte of the data (rather than the base of the page),
17834          * then we use different rounding semantics: specifically,
17835          * we assume that (memory_address, size) describes a region
17836          * all of whose pages we must cover, rather than a base to be truncated
17837          * down and a size to be added to that base.  So we figure out
17838          * the highest page that the requested region includes and make
17839          * sure that the size will cover it.
17840          *
17841          * The key example we're worried about it is of the form:
17842          *
17843          *              memory_address = 0x1ff0, size = 0x20
17844          *
17845          * With the old semantics, we round down the memory_address to 0x1000
17846          * and round up the size to 0x1000, resulting in our covering *only*
17847          * page 0x1000.  With the new semantics, we'd realize that the region covers
17848          * 0x1ff0-0x2010, and compute a size of 0x2000.  Thus, we cover both page
17849          * 0x1000 and page 0x2000 in the region we remap.
17850          */
17851         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
17852                 vm_map_offset_t range_start, range_end;
17853
17854                 range_start = vm_map_trunc_page(memory_address, src_page_mask);
17855                 range_end = vm_map_round_page(memory_address + size, src_page_mask);
17856                 memory_address = range_start;
17857                 size = range_end - range_start;
17858                 offset_in_mapping = initial_memory_address - memory_address;
17859         } else {
17860                 /*
17861                  * IMPORTANT:
17862                  * This legacy code path is broken: for the range mentioned
17863                  * above [ memory_address = 0x1ff0,size = 0x20 ], which spans
17864                  * two 4k pages, it yields [ memory_address = 0x1000,
17865                  * size = 0x1000 ], which covers only the first 4k page.
17866                  * BUT some code unfortunately depends on this bug, so we
17867                  * can't fix it without breaking something.
17868                  * New code should get automatically opted in the new
17869                  * behavior with the new VM_FLAGS_RETURN_DATA_ADDR flags.
17870                  */
17871                 offset_in_mapping = 0;
17872                 memory_address = vm_map_trunc_page(memory_address, src_page_mask);
17873                 size = vm_map_round_page(size, src_page_mask);
17874                 initial_memory_address = memory_address;
17875                 initial_size = size;
17876         }
17877
17878
17879         if (size == 0) {
17880                 return KERN_INVALID_ARGUMENT;
17881         }
17882
17883         if (flags & VM_FLAGS_RESILIENT_MEDIA) {
17884                 /* must be copy-on-write to be "media resilient" */
17885                 if (!copy) {
17886                         return KERN_INVALID_ARGUMENT;
17887                 }
17888         }
17889
17890         vmk_flags.vmkf_copy_pageable = target_map->hdr.entries_pageable;
17891         vmk_flags.vmkf_copy_same_map = (src_map == target_map);
17892
17893         assert(size != 0);
17894         result = vm_map_copy_extract(src_map,
17895             memory_address,
17896             size,
17897             VM_PROT_NONE, /* required_protection: no check here */
17898             copy, &copy_map,
17899             cur_protection,
17900             max_protection,
17901             inheritance,
17902             vmk_flags);
17903         if (result != KERN_SUCCESS) {
17904                 return result;
17905         }
17906         assert(copy_map != VM_MAP_COPY_NULL);
17907
17908         overmap_start = 0;
17909         overmap_end = 0;
17910         trimmed_start = 0;
17911         target_size = size;
17912         if (src_page_mask != target_page_mask) {
17913                 vm_map_copy_t target_copy_map;
17914
17915                 target_copy_map = copy_map; /* can modify "copy_map" itself */
17916                 DEBUG4K_ADJUST("adjusting...\n");
17917                 result = vm_map_copy_adjust_to_target(
17918                         copy_map,
17919                         offset_in_mapping, /* offset */
17920                         initial_size,
17921                         target_map,
17922                         copy,
17923                         &target_copy_map,
17924                         &overmap_start,
17925                         &overmap_end,
17926                         &trimmed_start);
17927                 if (result != KERN_SUCCESS) {
17928                         DEBUG4K_COPY("failed to adjust 0x%x\n", result);
17929                         vm_map_copy_discard(copy_map);
17930                         return result;
17931                 }
17932                 if (trimmed_start == 0) {
17933                         /* nothing trimmed: no adjustment needed */
17934                 } else if (trimmed_start >= offset_in_mapping) {
17935                         /* trimmed more than offset_in_mapping: nothing left */
17936                         assert(overmap_start == 0);
17937                         assert(overmap_end == 0);
17938                         offset_in_mapping = 0;
17939                 } else {
17940                         /* trimmed some of offset_in_mapping: adjust */
17941                         assert(overmap_start == 0);
17942                         assert(overmap_end == 0);
17943                         offset_in_mapping -= trimmed_start;
17944                 }
17945                 offset_in_mapping += overmap_start;
17946                 target_size = target_copy_map->size;
17947         }
17948
17949         /*
17950          * Allocate/check a range of free virtual address
17951          * space for the target
17952          */
17953         *address = vm_map_trunc_page(*address, target_page_mask);
17954         vm_map_lock(target_map);
17955         target_size = vm_map_round_page(target_size, target_page_mask);
17956         result = vm_map_remap_range_allocate(target_map, address,
17957             target_size,
17958             mask, flags, vmk_flags, tag,
17959             &insp_entry);
17960
17961         for (entry = vm_map_copy_first_entry(copy_map);
17962             entry != vm_map_copy_to_entry(copy_map);
17963             entry = new_entry) {
17964                 new_entry = entry->vme_next;
17965                 vm_map_copy_entry_unlink(copy_map, entry);
17966                 if (result == KERN_SUCCESS) {
17967                         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17968                                 /* no codesigning -> read-only access */
17969                                 entry->max_protection = VM_PROT_READ;
17970                                 entry->protection = VM_PROT_READ;
17971                                 entry->vme_resilient_codesign = TRUE;
17972                         }
17973                         entry->vme_start += *address;
17974                         entry->vme_end += *address;
17975                         assert(!entry->map_aligned);
17976                         if ((flags & VM_FLAGS_RESILIENT_MEDIA) &&
17977                             !entry->is_sub_map &&
17978                             (VME_OBJECT(entry) == VM_OBJECT_NULL ||
17979                             VME_OBJECT(entry)->internal)) {
17980                                 entry->vme_resilient_media = TRUE;
17981                         }
17982                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_start, MIN(target_page_mask, PAGE_MASK)));
17983                         assert(VM_MAP_PAGE_ALIGNED(entry->vme_end, MIN(target_page_mask, PAGE_MASK)));
17984                         assert(VM_MAP_PAGE_ALIGNED(VME_OFFSET(entry), MIN(target_page_mask, PAGE_MASK)));
17985                         vm_map_store_entry_link(target_map, insp_entry, entry,
17986                             vmk_flags);
17987                         insp_entry = entry;
17988                 } else {
17989                         if (!entry->is_sub_map) {
17990                                 vm_object_deallocate(VME_OBJECT(entry));
17991                         } else {
17992                                 vm_map_deallocate(VME_SUBMAP(entry));
17993                         }
17994                         vm_map_copy_entry_dispose(copy_map, entry);
17995                 }
17996         }
17997
17998         if (flags & VM_FLAGS_RESILIENT_CODESIGN) {
17999                 *cur_protection = VM_PROT_READ;
18000                 *max_protection = VM_PROT_READ;
18001         }
18002
18003         if (target_map->disable_vmentry_reuse == TRUE) {
18004                 assert(!target_map->is_nested_map);
18005                 if (target_map->highest_entry_end < insp_entry->vme_end) {
18006                         target_map->highest_entry_end = insp_entry->vme_end;
18007                 }
18008         }
18009
18010         if (result == KERN_SUCCESS) {
18011                 target_map->size += target_size;
18012                 SAVE_HINT_MAP_WRITE(target_map, insp_entry);
18013
18014 #if PMAP_CS
18015                 if (*max_protection & VM_PROT_EXECUTE) {
18016                         vm_map_address_t region_start = 0, region_size = 0;
18017                         struct pmap_cs_code_directory *region_cd = NULL;
18018                         vm_map_address_t base = 0;
18019                         struct pmap_cs_lookup_results results = {};
18020                         vm_map_size_t page_addr = vm_map_trunc_page(memory_address, PAGE_MASK);
18021                         vm_map_size_t assoc_size = vm_map_round_page(memory_address + size - page_addr, PAGE_MASK);
18022
18023                         pmap_cs_lookup(src_map->pmap, memory_address, &results);
18024                         region_size = results.region_size;
18025                         region_start = results.region_start;
18026                         region_cd = results.region_cd_entry;
18027                         base = results.base;
18028
18029                         if (region_cd != NULL && (page_addr != region_start || assoc_size != region_size)) {
18030                                 *cur_protection = VM_PROT_READ;
18031                                 *max_protection = VM_PROT_READ;
18032                                 printf("mismatched remap of executable range 0x%llx-0x%llx to 0x%llx, "
18033                                     "region_start 0x%llx, region_size 0x%llx, cd_entry %sNULL, making non-executable.\n",
18034                                     page_addr, page_addr + assoc_size, *address,
18035                                     region_start, region_size,
18036                                     region_cd != NULL ? "not " : ""                     // Don't leak kernel slide
18037                                     );
18038                         }
18039                 }
18040 #endif
18041         }
18042         vm_map_unlock(target_map);
18043
18044         if (result == KERN_SUCCESS && target_map->wiring_required) {
18045                 result = vm_map_wire_kernel(target_map, *address,
18046                     *address + size, *cur_protection, VM_KERN_MEMORY_MLOCK,
18047                     TRUE);
18048         }
18049
18050         /*
18051          * If requested, return the address of the data pointed to by the
18052          * request, rather than the base of the resulting page.
18053          */
18054         if ((flags & VM_FLAGS_RETURN_DATA_ADDR) != 0) {
18055                 *address += offset_in_mapping;
18056         }
18057
18058         if (src_page_mask != target_page_mask) {
18059                 DEBUG4K_SHARE("vm_remap(%p 0x%llx 0x%llx copy=%d-> %p 0x%llx 0x%llx  result=0x%x\n", src_map, (uint64_t)memory_address, (uint64_t)size, copy, target_map, (uint64_t)*address, (uint64_t)offset_in_mapping, result);
18060         }
18061         vm_map_copy_discard(copy_map);
18062         copy_map = VM_MAP_COPY_NULL;
18063
18064         return result;
18065 }
18066
18067 /*
18068  *      Routine:        vm_map_remap_range_allocate
18069  *
18070  *      Description:
18071  *              Allocate a range in the specified virtual address map.
18072  *              returns the address and the map entry just before the allocated
18073  *              range
18074  *
18075  *      Map must be locked.
18076  */
18077
18078 static kern_return_t
18079 vm_map_remap_range_allocate(
18080         vm_map_t                map,
18081         vm_map_address_t        *address,       /* IN/OUT */
18082         vm_map_size_t           size,
18083         vm_map_offset_t         mask,
18084         int                     flags,
18085         vm_map_kernel_flags_t   vmk_flags,
18086         __unused vm_tag_t       tag,
18087         vm_map_entry_t          *map_entry)     /* OUT */
18088 {
18089         vm_map_entry_t  entry;
18090         vm_map_offset_t start;
18091         vm_map_offset_t end;
18092         vm_map_offset_t desired_empty_end;
18093         kern_return_t   kr;
18094         vm_map_entry_t          hole_entry;
18095
18096 StartAgain:;
18097
18098         start = *address;
18099
18100         if (flags & VM_FLAGS_ANYWHERE) {
18101                 if (flags & VM_FLAGS_RANDOM_ADDR) {
18102                         /*
18103                          * Get a random start address.
18104                          */
18105                         kr = vm_map_random_address_for_size(map, address, size);
18106                         if (kr != KERN_SUCCESS) {
18107                                 return kr;
18108                         }
18109                         start = *address;
18110                 }
18111
18112                 /*
18113                  *      Calculate the first possible address.
18114                  */
18115
18116                 if (start < map->min_offset) {
18117                         start = map->min_offset;
18118                 }
18119                 if (start > map->max_offset) {
18120                         return KERN_NO_SPACE;
18121                 }
18122
18123                 /*
18124                  *      Look for the first possible address;
18125                  *      if there's already something at this
18126                  *      address, we have to start after it.
18127                  */
18128
18129                 if (map->disable_vmentry_reuse == TRUE) {
18130                         VM_MAP_HIGHEST_ENTRY(map, entry, start);
18131                 } else {
18132                         if (map->holelistenabled) {
18133                                 hole_entry = CAST_TO_VM_MAP_ENTRY(map->holes_list);
18134
18135                                 if (hole_entry == NULL) {
18136                                         /*
18137                                          * No more space in the map?
18138                                          */
18139                                         return KERN_NO_SPACE;
18140                                 } else {
18141                                         boolean_t found_hole = FALSE;
18142
18143                                         do {
18144                                                 if (hole_entry->vme_start >= start) {
18145                                                         start = hole_entry->vme_start;
18146                                                         found_hole = TRUE;
18147                                                         break;
18148                                                 }
18149
18150                                                 if (hole_entry->vme_end > start) {
18151                                                         found_hole = TRUE;
18152                                                         break;
18153                                                 }
18154                                                 hole_entry = hole_entry->vme_next;
18155                                         } while (hole_entry != CAST_TO_VM_MAP_ENTRY(map->holes_list));
18156
18157                                         if (found_hole == FALSE) {
18158                                                 return KERN_NO_SPACE;
18159                                         }
18160
18161                                         entry = hole_entry;
18162                                 }
18163                         } else {
18164                                 assert(first_free_is_valid(map));
18165                                 if (start == map->min_offset) {
18166                                         if ((entry = map->first_free) != vm_map_to_entry(map)) {
18167                                                 start = entry->vme_end;
18168                                         }
18169                                 } else {
18170                                         vm_map_entry_t  tmp_entry;
18171                                         if (vm_map_lookup_entry(map, start, &tmp_entry)) {
18172                                                 start = tmp_entry->vme_end;
18173                                         }
18174                                         entry = tmp_entry;
18175                                 }
18176                         }
18177                         start = vm_map_round_page(start,
18178                             VM_MAP_PAGE_MASK(map));
18179                 }
18180
18181                 /*
18182                  *      In any case, the "entry" always precedes
18183                  *      the proposed new region throughout the
18184                  *      loop:
18185                  */
18186
18187                 while (TRUE) {
18188                         vm_map_entry_t  next;
18189
18190                         /*
18191                          *      Find the end of the proposed new region.
18192                          *      Be sure we didn't go beyond the end, or
18193                          *      wrap around the address.
18194                          */
18195
18196                         end = ((start + mask) & ~mask);
18197                         end = vm_map_round_page(end,
18198                             VM_MAP_PAGE_MASK(map));
18199                         if (end < start) {
18200                                 return KERN_NO_SPACE;
18201                         }
18202                         start = end;
18203                         end += size;
18204
18205                         /* We want an entire page of empty space, but don't increase the allocation size. */
18206                         desired_empty_end = vm_map_round_page(end, VM_MAP_PAGE_MASK(map));
18207
18208                         if ((desired_empty_end > map->max_offset) || (desired_empty_end < start)) {
18209                                 if (map->wait_for_space) {
18210                                         if (size <= (map->max_offset -
18211                                             map->min_offset)) {
18212                                                 assert_wait((event_t) map, THREAD_INTERRUPTIBLE);
18213                                                 vm_map_unlock(map);
18214                                                 thread_block(THREAD_CONTINUE_NULL);
18215                                                 vm_map_lock(map);
18216                                                 goto StartAgain;
18217                                         }
18218                                 }
18219
18220                                 return KERN_NO_SPACE;
18221                         }
18222
18223                         next = entry->vme_next;
18224
18225                         if (map->holelistenabled) {
18226                                 if (entry->vme_end >= desired_empty_end) {
18227                                         break;
18228                                 }
18229                         } else {
18230                                 /*
18231                                  *      If there are no more entries, we must win.
18232                                  *
18233                                  *      OR
18234                                  *
18235                                  *      If there is another entry, it must be
18236                                  *      after the end of the potential new region.
18237                                  */
18238
18239                                 if (next == vm_map_to_entry(map)) {
18240                                         break;
18241                                 }
18242
18243                                 if (next->vme_start >= desired_empty_end) {
18244                                         break;
18245                                 }
18246                         }
18247
18248                         /*
18249                          *      Didn't fit -- move to the next entry.
18250                          */
18251
18252                         entry = next;
18253
18254                         if (map->holelistenabled) {
18255                                 if (entry == CAST_TO_VM_MAP_ENTRY(map->holes_list)) {
18256                                         /*
18257                                          * Wrapped around
18258                                          */
18259                                         return KERN_NO_SPACE;
18260                                 }
18261                                 start = entry->vme_start;
18262                         } else {
18263                                 start = entry->vme_end;
18264                         }
18265                 }
18266
18267                 if (map->holelistenabled) {
18268                         if (vm_map_lookup_entry(map, entry->vme_start, &entry)) {
18269                                 panic("Found an existing entry (%p) instead of potential hole at address: 0x%llx.\n", entry, (unsigned long long)entry->vme_start);
18270                         }
18271                 }
18272
18273                 *address = start;
18274         } else {
18275                 vm_map_entry_t          temp_entry;
18276
18277                 /*
18278                  *      Verify that:
18279                  *              the address doesn't itself violate
18280                  *              the mask requirement.
18281                  */
18282
18283                 if ((start & mask) != 0) {
18284                         return KERN_NO_SPACE;
18285                 }
18286
18287
18288                 /*
18289                  *      ...     the address is within bounds
18290                  */
18291
18292                 end = start + size;
18293
18294                 if ((start < map->min_offset) ||
18295                     (end > map->max_offset) ||
18296                     (start >= end)) {
18297                         return KERN_INVALID_ADDRESS;
18298                 }
18299
18300                 /*
18301                  * If we're asked to overwrite whatever was mapped in that
18302                  * range, first deallocate that range.
18303                  */
18304                 if (flags & VM_FLAGS_OVERWRITE) {
18305                         vm_map_t zap_map;
18306                         int remove_flags = VM_MAP_REMOVE_SAVE_ENTRIES | VM_MAP_REMOVE_NO_MAP_ALIGN;
18307
18308                         /*
18309                          * We use a "zap_map" to avoid having to unlock
18310                          * the "map" in vm_map_delete(), which would compromise
18311                          * the atomicity of the "deallocate" and then "remap"
18312                          * combination.
18313                          */
18314                         zap_map = vm_map_create(PMAP_NULL,
18315                             start,
18316                             end,
18317                             map->hdr.entries_pageable);
18318                         if (zap_map == VM_MAP_NULL) {
18319                                 return KERN_RESOURCE_SHORTAGE;
18320                         }
18321                         vm_map_set_page_shift(zap_map, VM_MAP_PAGE_SHIFT(map));
18322                         vm_map_disable_hole_optimization(zap_map);
18323
18324                         if (vmk_flags.vmkf_overwrite_immutable) {
18325                                 remove_flags |= VM_MAP_REMOVE_IMMUTABLE;
18326                         }
18327                         kr = vm_map_delete(map, start, end,
18328                             remove_flags,
18329                             zap_map);
18330                         if (kr == KERN_SUCCESS) {
18331                                 vm_map_destroy(zap_map,
18332                                     VM_MAP_REMOVE_NO_PMAP_CLEANUP);
18333                                 zap_map = VM_MAP_NULL;
18334                         }
18335                 }
18336
18337                 /*
18338                  *      ...     the starting address isn't allocated
18339                  */
18340
18341                 if (vm_map_lookup_entry(map, start, &temp_entry)) {
18342                         return KERN_NO_SPACE;
18343                 }
18344
18345                 entry = temp_entry;
18346
18347                 /*
18348                  *      ...     the next region doesn't overlap the
18349                  *              end point.
18350                  */
18351
18352                 if ((entry->vme_next != vm_map_to_entry(map)) &&
18353                     (entry->vme_next->vme_start < end)) {
18354                         return KERN_NO_SPACE;
18355                 }
18356         }
18357         *map_entry = entry;
18358         return KERN_SUCCESS;
18359 }
18360
18361 /*
18362  *      vm_map_switch:
18363  *
18364  *      Set the address map for the current thread to the specified map
18365  */
18366
18367 vm_map_t
18368 vm_map_switch(
18369         vm_map_t        map)
18370 {
18371         int             mycpu;
18372         thread_t        thread = current_thread();
18373         vm_map_t        oldmap = thread->map;
18374
18375         mp_disable_preemption();
18376         mycpu = cpu_number();
18377
18378         /*
18379          *      Deactivate the current map and activate the requested map
18380          */
18381         PMAP_SWITCH_USER(thread, map, mycpu);
18382
18383         mp_enable_preemption();
18384         return oldmap;
18385 }
18386
18387
18388 /*
18389  *      Routine:        vm_map_write_user
18390  *
18391  *      Description:
18392  *              Copy out data from a kernel space into space in the
18393  *              destination map. The space must already exist in the
18394  *              destination map.
18395  *              NOTE:  This routine should only be called by threads
18396  *              which can block on a page fault. i.e. kernel mode user
18397  *              threads.
18398  *
18399  */
18400 kern_return_t
18401 vm_map_write_user(
18402         vm_map_t                map,
18403         void                    *src_p,
18404         vm_map_address_t        dst_addr,
18405         vm_size_t               size)
18406 {
18407         kern_return_t   kr = KERN_SUCCESS;
18408
18409         if (current_map() == map) {
18410                 if (copyout(src_p, dst_addr, size)) {
18411                         kr = KERN_INVALID_ADDRESS;
18412                 }
18413         } else {
18414                 vm_map_t        oldmap;
18415
18416                 /* take on the identity of the target map while doing */
18417                 /* the transfer */
18418
18419                 vm_map_reference(map);
18420                 oldmap = vm_map_switch(map);
18421                 if (copyout(src_p, dst_addr, size)) {
18422                         kr = KERN_INVALID_ADDRESS;
18423                 }
18424                 vm_map_switch(oldmap);
18425                 vm_map_deallocate(map);
18426         }
18427         return kr;
18428 }
18429
18430 /*
18431  *      Routine:        vm_map_read_user
18432  *
18433  *      Description:
18434  *              Copy in data from a user space source map into the
18435  *              kernel map. The space must already exist in the
18436  *              kernel map.
18437  *              NOTE:  This routine should only be called by threads
18438  *              which can block on a page fault. i.e. kernel mode user
18439  *              threads.
18440  *
18441  */
18442 kern_return_t
18443 vm_map_read_user(
18444         vm_map_t                map,
18445         vm_map_address_t        src_addr,
18446         void                    *dst_p,
18447         vm_size_t               size)
18448 {
18449         kern_return_t   kr = KERN_SUCCESS;
18450
18451         if (current_map() == map) {
18452                 if (copyin(src_addr, dst_p, size)) {
18453                         kr = KERN_INVALID_ADDRESS;
18454                 }
18455         } else {
18456                 vm_map_t        oldmap;
18457
18458                 /* take on the identity of the target map while doing */
18459                 /* the transfer */
18460
18461                 vm_map_reference(map);
18462                 oldmap = vm_map_switch(map);
18463                 if (copyin(src_addr, dst_p, size)) {
18464                         kr = KERN_INVALID_ADDRESS;
18465                 }
18466                 vm_map_switch(oldmap);
18467                 vm_map_deallocate(map);
18468         }
18469         return kr;
18470 }
18471
18472
18473 /*
18474  *      vm_map_check_protection:
18475  *
18476  *      Assert that the target map allows the specified
18477  *      privilege on the entire address region given.
18478  *      The entire region must be allocated.
18479  */
18480 boolean_t
18481 vm_map_check_protection(vm_map_t map, vm_map_offset_t start,
18482     vm_map_offset_t end, vm_prot_t protection)
18483 {
18484         vm_map_entry_t entry;
18485         vm_map_entry_t tmp_entry;
18486
18487         vm_map_lock(map);
18488
18489         if (start < vm_map_min(map) || end > vm_map_max(map) || start > end) {
18490                 vm_map_unlock(map);
18491                 return FALSE;
18492         }
18493
18494         if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
18495                 vm_map_unlock(map);
18496                 return FALSE;
18497         }
18498
18499         entry = tmp_entry;
18500
18501         while (start < end) {
18502                 if (entry == vm_map_to_entry(map)) {
18503                         vm_map_unlock(map);
18504                         return FALSE;
18505                 }
18506
18507                 /*
18508                  *      No holes allowed!
18509                  */
18510
18511                 if (start < entry->vme_start) {
18512                         vm_map_unlock(map);
18513                         return FALSE;
18514                 }
18515
18516                 /*
18517                  * Check protection associated with entry.
18518                  */
18519
18520                 if ((entry->protection & protection) != protection) {
18521                         vm_map_unlock(map);
18522                         return FALSE;
18523                 }
18524
18525                 /* go to next entry */
18526
18527                 start = entry->vme_end;
18528                 entry = entry->vme_next;
18529         }
18530         vm_map_unlock(map);
18531         return TRUE;
18532 }
18533
18534 kern_return_t
18535 vm_map_purgable_control(
18536         vm_map_t                map,
18537         vm_map_offset_t         address,
18538         vm_purgable_t           control,
18539         int                     *state)
18540 {
18541         vm_map_entry_t          entry;
18542         vm_object_t             object;
18543         kern_return_t           kr;
18544         boolean_t               was_nonvolatile;
18545
18546         /*
18547          * Vet all the input parameters and current type and state of the
18548          * underlaying object.  Return with an error if anything is amiss.
18549          */
18550         if (map == VM_MAP_NULL) {
18551                 return KERN_INVALID_ARGUMENT;
18552         }
18553
18554         if (control != VM_PURGABLE_SET_STATE &&
18555             control != VM_PURGABLE_GET_STATE &&
18556             control != VM_PURGABLE_PURGE_ALL &&
18557             control != VM_PURGABLE_SET_STATE_FROM_KERNEL) {
18558                 return KERN_INVALID_ARGUMENT;
18559         }
18560
18561         if (control == VM_PURGABLE_PURGE_ALL) {
18562                 vm_purgeable_object_purge_all();
18563                 return KERN_SUCCESS;
18564         }
18565
18566         if ((control == VM_PURGABLE_SET_STATE ||
18567             control == VM_PURGABLE_SET_STATE_FROM_KERNEL) &&
18568             (((*state & ~(VM_PURGABLE_ALL_MASKS)) != 0) ||
18569             ((*state & VM_PURGABLE_STATE_MASK) > VM_PURGABLE_STATE_MASK))) {
18570                 return KERN_INVALID_ARGUMENT;
18571         }
18572
18573         vm_map_lock_read(map);
18574
18575         if (!vm_map_lookup_entry(map, address, &entry) || entry->is_sub_map) {
18576                 /*
18577                  * Must pass a valid non-submap address.
18578                  */
18579                 vm_map_unlock_read(map);
18580                 return KERN_INVALID_ADDRESS;
18581         }
18582
18583         if ((entry->protection & VM_PROT_WRITE) == 0) {
18584                 /*
18585                  * Can't apply purgable controls to something you can't write.
18586                  */
18587                 vm_map_unlock_read(map);
18588                 return KERN_PROTECTION_FAILURE;
18589         }
18590
18591         object = VME_OBJECT(entry);
18592         if (object == VM_OBJECT_NULL ||
18593             object->purgable == VM_PURGABLE_DENY) {
18594                 /*
18595                  * Object must already be present and be purgeable.
18596                  */
18597                 vm_map_unlock_read(map);
18598                 return KERN_INVALID_ARGUMENT;
18599         }
18600
18601         vm_object_lock(object);
18602
18603 #if 00
18604         if (VME_OFFSET(entry) != 0 ||
18605             entry->vme_end - entry->vme_start != object->vo_size) {
18606                 /*
18607                  * Can only apply purgable controls to the whole (existing)
18608                  * object at once.
18609                  */
18610                 vm_map_unlock_read(map);
18611                 vm_object_unlock(object);
18612                 return KERN_INVALID_ARGUMENT;
18613         }
18614 #endif
18615
18616         assert(!entry->is_sub_map);
18617         assert(!entry->use_pmap); /* purgeable has its own accounting */
18618
18619         vm_map_unlock_read(map);
18620
18621         was_nonvolatile = (object->purgable == VM_PURGABLE_NONVOLATILE);
18622
18623         kr = vm_object_purgable_control(object, control, state);
18624
18625         if (was_nonvolatile &&
18626             object->purgable != VM_PURGABLE_NONVOLATILE &&
18627             map->pmap == kernel_pmap) {
18628 #if DEBUG
18629                 object->vo_purgeable_volatilizer = kernel_task;
18630 #endif /* DEBUG */
18631         }
18632
18633         vm_object_unlock(object);
18634
18635         return kr;
18636 }
18637
18638 void
18639 vm_map_footprint_query_page_info(
18640         vm_map_t        map,
18641         vm_map_entry_t  map_entry,
18642         vm_map_offset_t curr_s_offset,
18643         int             *disposition_p)
18644 {
18645         int             pmap_disp;
18646         vm_object_t     object;
18647         int             disposition;
18648         int             effective_page_size;
18649
18650         vm_map_lock_assert_held(map);
18651         assert(!map->has_corpse_footprint);
18652         assert(curr_s_offset >= map_entry->vme_start);
18653         assert(curr_s_offset < map_entry->vme_end);
18654
18655         object = VME_OBJECT(map_entry);
18656         if (object == VM_OBJECT_NULL) {
18657                 *disposition_p = 0;
18658                 return;
18659         }
18660
18661         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
18662
18663         pmap_disp = 0;
18664         if (object == VM_OBJECT_NULL) {
18665                 /* nothing mapped here: no need to ask */
18666                 *disposition_p = 0;
18667                 return;
18668         } else if (map_entry->is_sub_map &&
18669             !map_entry->use_pmap) {
18670                 /* nested pmap: no footprint */
18671                 *disposition_p = 0;
18672                 return;
18673         }
18674
18675         /*
18676          * Query the pmap.
18677          */
18678         pmap_query_page_info(map->pmap, curr_s_offset, &pmap_disp);
18679
18680         /*
18681          * Compute this page's disposition.
18682          */
18683         disposition = 0;
18684
18685         /* deal with "alternate accounting" first */
18686         if (!map_entry->is_sub_map &&
18687             object->vo_no_footprint) {
18688                 /* does not count in footprint */
18689                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18690         } else if (!map_entry->is_sub_map &&
18691             (object->purgable == VM_PURGABLE_NONVOLATILE ||
18692             (object->purgable == VM_PURGABLE_DENY &&
18693             object->vo_ledger_tag)) &&
18694             VM_OBJECT_OWNER(object) != NULL &&
18695             VM_OBJECT_OWNER(object)->map == map) {
18696                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18697                 if ((((curr_s_offset
18698                     - map_entry->vme_start
18699                     + VME_OFFSET(map_entry))
18700                     / effective_page_size) <
18701                     (object->resident_page_count +
18702                     vm_compressor_pager_get_count(object->pager)))) {
18703                         /*
18704                          * Non-volatile purgeable object owned
18705                          * by this task: report the first
18706                          * "#resident + #compressed" pages as
18707                          * "resident" (to show that they
18708                          * contribute to the footprint) but not
18709                          * "dirty" (to avoid double-counting
18710                          * with the fake "non-volatile" region
18711                          * we'll report at the end of the
18712                          * address space to account for all
18713                          * (mapped or not) non-volatile memory
18714                          * owned by this task.
18715                          */
18716                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18717                 }
18718         } else if (!map_entry->is_sub_map &&
18719             (object->purgable == VM_PURGABLE_VOLATILE ||
18720             object->purgable == VM_PURGABLE_EMPTY) &&
18721             VM_OBJECT_OWNER(object) != NULL &&
18722             VM_OBJECT_OWNER(object)->map == map) {
18723                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18724                 if ((((curr_s_offset
18725                     - map_entry->vme_start
18726                     + VME_OFFSET(map_entry))
18727                     / effective_page_size) <
18728                     object->wired_page_count)) {
18729                         /*
18730                          * Volatile|empty purgeable object owned
18731                          * by this task: report the first
18732                          * "#wired" pages as "resident" (to
18733                          * show that they contribute to the
18734                          * footprint) but not "dirty" (to avoid
18735                          * double-counting with the fake
18736                          * "non-volatile" region we'll report
18737                          * at the end of the address space to
18738                          * account for all (mapped or not)
18739                          * non-volatile memory owned by this
18740                          * task.
18741                          */
18742                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18743                 }
18744         } else if (!map_entry->is_sub_map &&
18745             map_entry->iokit_acct &&
18746             object->internal &&
18747             object->purgable == VM_PURGABLE_DENY) {
18748                 /*
18749                  * Non-purgeable IOKit memory: phys_footprint
18750                  * includes the entire virtual mapping.
18751                  */
18752                 assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18753                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18754                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18755         } else if (pmap_disp & (PMAP_QUERY_PAGE_ALTACCT |
18756             PMAP_QUERY_PAGE_COMPRESSED_ALTACCT)) {
18757                 /* alternate accounting */
18758 #if (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG)
18759                 if (map->pmap->footprint_was_suspended) {
18760                         /*
18761                          * The assertion below can fail if dyld
18762                          * suspended footprint accounting
18763                          * while doing some adjustments to
18764                          * this page;  the mapping would say
18765                          * "use pmap accounting" but the page
18766                          * would be marked "alternate
18767                          * accounting".
18768                          */
18769                 } else
18770 #endif /* (__arm__ || __arm64__) && (DEVELOPMENT || DEBUG) */
18771                 {
18772                         assertf(!map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18773                 }
18774                 disposition = 0;
18775         } else {
18776                 if (pmap_disp & PMAP_QUERY_PAGE_PRESENT) {
18777                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18778                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18779                         disposition |= VM_PAGE_QUERY_PAGE_REF;
18780                         if (pmap_disp & PMAP_QUERY_PAGE_INTERNAL) {
18781                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18782                         } else {
18783                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
18784                         }
18785                         if (pmap_disp & PMAP_QUERY_PAGE_REUSABLE) {
18786                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
18787                         }
18788                 } else if (pmap_disp & PMAP_QUERY_PAGE_COMPRESSED) {
18789                         assertf(map_entry->use_pmap, "offset 0x%llx map_entry %p", (uint64_t) curr_s_offset, map_entry);
18790                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18791                 }
18792         }
18793
18794         *disposition_p = disposition;
18795 }
18796
18797 kern_return_t
18798 vm_map_page_query_internal(
18799         vm_map_t        target_map,
18800         vm_map_offset_t offset,
18801         int             *disposition,
18802         int             *ref_count)
18803 {
18804         kern_return_t                   kr;
18805         vm_page_info_basic_data_t       info;
18806         mach_msg_type_number_t          count;
18807
18808         count = VM_PAGE_INFO_BASIC_COUNT;
18809         kr = vm_map_page_info(target_map,
18810             offset,
18811             VM_PAGE_INFO_BASIC,
18812             (vm_page_info_t) &info,
18813             &count);
18814         if (kr == KERN_SUCCESS) {
18815                 *disposition = info.disposition;
18816                 *ref_count = info.ref_count;
18817         } else {
18818                 *disposition = 0;
18819                 *ref_count = 0;
18820         }
18821
18822         return kr;
18823 }
18824
18825 kern_return_t
18826 vm_map_page_info(
18827         vm_map_t                map,
18828         vm_map_offset_t         offset,
18829         vm_page_info_flavor_t   flavor,
18830         vm_page_info_t          info,
18831         mach_msg_type_number_t  *count)
18832 {
18833         return vm_map_page_range_info_internal(map,
18834                    offset, /* start of range */
18835                    (offset + 1), /* this will get rounded in the call to the page boundary */
18836                    (int)-1, /* effective_page_shift: unspecified */
18837                    flavor,
18838                    info,
18839                    count);
18840 }
18841
18842 kern_return_t
18843 vm_map_page_range_info_internal(
18844         vm_map_t                map,
18845         vm_map_offset_t         start_offset,
18846         vm_map_offset_t         end_offset,
18847         int                     effective_page_shift,
18848         vm_page_info_flavor_t   flavor,
18849         vm_page_info_t          info,
18850         mach_msg_type_number_t  *count)
18851 {
18852         vm_map_entry_t          map_entry = VM_MAP_ENTRY_NULL;
18853         vm_object_t             object = VM_OBJECT_NULL, curr_object = VM_OBJECT_NULL;
18854         vm_page_t               m = VM_PAGE_NULL;
18855         kern_return_t           retval = KERN_SUCCESS;
18856         int                     disposition = 0;
18857         int                     ref_count = 0;
18858         int                     depth = 0, info_idx = 0;
18859         vm_page_info_basic_t    basic_info = 0;
18860         vm_map_offset_t         offset_in_page = 0, offset_in_object = 0, curr_offset_in_object = 0;
18861         vm_map_offset_t         start = 0, end = 0, curr_s_offset = 0, curr_e_offset = 0;
18862         boolean_t               do_region_footprint;
18863         ledger_amount_t         ledger_resident, ledger_compressed;
18864         int                     effective_page_size;
18865         vm_map_offset_t         effective_page_mask;
18866
18867         switch (flavor) {
18868         case VM_PAGE_INFO_BASIC:
18869                 if (*count != VM_PAGE_INFO_BASIC_COUNT) {
18870                         /*
18871                          * The "vm_page_info_basic_data" structure was not
18872                          * properly padded, so allow the size to be off by
18873                          * one to maintain backwards binary compatibility...
18874                          */
18875                         if (*count != VM_PAGE_INFO_BASIC_COUNT - 1) {
18876                                 return KERN_INVALID_ARGUMENT;
18877                         }
18878                 }
18879                 break;
18880         default:
18881                 return KERN_INVALID_ARGUMENT;
18882         }
18883
18884         if (effective_page_shift == -1) {
18885                 effective_page_shift = vm_self_region_page_shift_safely(map);
18886                 if (effective_page_shift == -1) {
18887                         return KERN_INVALID_ARGUMENT;
18888                 }
18889         }
18890         effective_page_size = (1 << effective_page_shift);
18891         effective_page_mask = effective_page_size - 1;
18892
18893         do_region_footprint = task_self_region_footprint();
18894         disposition = 0;
18895         ref_count = 0;
18896         depth = 0;
18897         info_idx = 0; /* Tracks the next index within the info structure to be filled.*/
18898         retval = KERN_SUCCESS;
18899
18900         offset_in_page = start_offset & effective_page_mask;
18901         start = vm_map_trunc_page(start_offset, effective_page_mask);
18902         end = vm_map_round_page(end_offset, effective_page_mask);
18903
18904         if (end < start) {
18905                 return KERN_INVALID_ARGUMENT;
18906         }
18907
18908         assert((end - start) <= MAX_PAGE_RANGE_QUERY);
18909
18910         vm_map_lock_read(map);
18911
18912         task_ledgers_footprint(map->pmap->ledger, &ledger_resident, &ledger_compressed);
18913
18914         for (curr_s_offset = start; curr_s_offset < end;) {
18915                 /*
18916                  * New lookup needs reset of these variables.
18917                  */
18918                 curr_object = object = VM_OBJECT_NULL;
18919                 offset_in_object = 0;
18920                 ref_count = 0;
18921                 depth = 0;
18922
18923                 if (do_region_footprint &&
18924                     curr_s_offset >= vm_map_last_entry(map)->vme_end) {
18925                         /*
18926                          * Request for "footprint" info about a page beyond
18927                          * the end of address space: this must be for
18928                          * the fake region vm_map_region_recurse_64()
18929                          * reported to account for non-volatile purgeable
18930                          * memory owned by this task.
18931                          */
18932                         disposition = 0;
18933
18934                         if (curr_s_offset - vm_map_last_entry(map)->vme_end <=
18935                             (unsigned) ledger_compressed) {
18936                                 /*
18937                                  * We haven't reported all the "non-volatile
18938                                  * compressed" pages yet, so report this fake
18939                                  * page as "compressed".
18940                                  */
18941                                 disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
18942                         } else {
18943                                 /*
18944                                  * We've reported all the non-volatile
18945                                  * compressed page but not all the non-volatile
18946                                  * pages , so report this fake page as
18947                                  * "resident dirty".
18948                                  */
18949                                 disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
18950                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
18951                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
18952                         }
18953                         switch (flavor) {
18954                         case VM_PAGE_INFO_BASIC:
18955                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
18956                                 basic_info->disposition = disposition;
18957                                 basic_info->ref_count = 1;
18958                                 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
18959                                 basic_info->offset = 0;
18960                                 basic_info->depth = 0;
18961
18962                                 info_idx++;
18963                                 break;
18964                         }
18965                         curr_s_offset += effective_page_size;
18966                         continue;
18967                 }
18968
18969                 /*
18970                  * First, find the map entry covering "curr_s_offset", going down
18971                  * submaps if necessary.
18972                  */
18973                 if (!vm_map_lookup_entry(map, curr_s_offset, &map_entry)) {
18974                         /* no entry -> no object -> no page */
18975
18976                         if (curr_s_offset < vm_map_min(map)) {
18977                                 /*
18978                                  * Illegal address that falls below map min.
18979                                  */
18980                                 curr_e_offset = MIN(end, vm_map_min(map));
18981                         } else if (curr_s_offset >= vm_map_max(map)) {
18982                                 /*
18983                                  * Illegal address that falls on/after map max.
18984                                  */
18985                                 curr_e_offset = end;
18986                         } else if (map_entry == vm_map_to_entry(map)) {
18987                                 /*
18988                                  * Hit a hole.
18989                                  */
18990                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
18991                                         /*
18992                                          * Empty map.
18993                                          */
18994                                         curr_e_offset = MIN(map->max_offset, end);
18995                                 } else {
18996                                         /*
18997                                          * Hole at start of the map.
18998                                          */
18999                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19000                                 }
19001                         } else {
19002                                 if (map_entry->vme_next == vm_map_to_entry(map)) {
19003                                         /*
19004                                          * Hole at the end of the map.
19005                                          */
19006                                         curr_e_offset = MIN(map->max_offset, end);
19007                                 } else {
19008                                         curr_e_offset = MIN(map_entry->vme_next->vme_start, end);
19009                                 }
19010                         }
19011
19012                         assert(curr_e_offset >= curr_s_offset);
19013
19014                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19015
19016                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19017
19018                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19019
19020                         curr_s_offset = curr_e_offset;
19021
19022                         info_idx += num_pages;
19023
19024                         continue;
19025                 }
19026
19027                 /* compute offset from this map entry's start */
19028                 offset_in_object = curr_s_offset - map_entry->vme_start;
19029
19030                 /* compute offset into this map entry's object (or submap) */
19031                 offset_in_object += VME_OFFSET(map_entry);
19032
19033                 if (map_entry->is_sub_map) {
19034                         vm_map_t sub_map = VM_MAP_NULL;
19035                         vm_page_info_t submap_info = 0;
19036                         vm_map_offset_t submap_s_offset = 0, submap_e_offset = 0, range_len = 0;
19037
19038                         range_len = MIN(map_entry->vme_end, end) - curr_s_offset;
19039
19040                         submap_s_offset = offset_in_object;
19041                         submap_e_offset = submap_s_offset + range_len;
19042
19043                         sub_map = VME_SUBMAP(map_entry);
19044
19045                         vm_map_reference(sub_map);
19046                         vm_map_unlock_read(map);
19047
19048                         submap_info = (vm_page_info_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19049
19050                         assertf(VM_MAP_PAGE_SHIFT(sub_map) >= VM_MAP_PAGE_SHIFT(map),
19051                             "Submap page size (%d) differs from current map (%d)\n", VM_MAP_PAGE_SIZE(sub_map), VM_MAP_PAGE_SIZE(map));
19052
19053                         retval = vm_map_page_range_info_internal(sub_map,
19054                             submap_s_offset,
19055                             submap_e_offset,
19056                             effective_page_shift,
19057                             VM_PAGE_INFO_BASIC,
19058                             (vm_page_info_t) submap_info,
19059                             count);
19060
19061                         assert(retval == KERN_SUCCESS);
19062
19063                         vm_map_lock_read(map);
19064                         vm_map_deallocate(sub_map);
19065
19066                         /* Move the "info" index by the number of pages we inspected.*/
19067                         info_idx += range_len >> effective_page_shift;
19068
19069                         /* Move our current offset by the size of the range we inspected.*/
19070                         curr_s_offset += range_len;
19071
19072                         continue;
19073                 }
19074
19075                 object = VME_OBJECT(map_entry);
19076
19077                 if (object == VM_OBJECT_NULL) {
19078                         /*
19079                          * We don't have an object here and, hence,
19080                          * no pages to inspect. We'll fill up the
19081                          * info structure appropriately.
19082                          */
19083
19084                         curr_e_offset = MIN(map_entry->vme_end, end);
19085
19086                         uint64_t num_pages = (curr_e_offset - curr_s_offset) >> effective_page_shift;
19087
19088                         void *info_ptr = (void*) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19089
19090                         bzero(info_ptr, num_pages * sizeof(struct vm_page_info_basic));
19091
19092                         curr_s_offset = curr_e_offset;
19093
19094                         info_idx += num_pages;
19095
19096                         continue;
19097                 }
19098
19099                 if (do_region_footprint) {
19100                         disposition = 0;
19101                         if (map->has_corpse_footprint) {
19102                                 /*
19103                                  * Query the page info data we saved
19104                                  * while forking the corpse.
19105                                  */
19106                                 vm_map_corpse_footprint_query_page_info(
19107                                         map,
19108                                         curr_s_offset,
19109                                         &disposition);
19110                         } else {
19111                                 /*
19112                                  * Query the live pmap for footprint info
19113                                  * about this page.
19114                                  */
19115                                 vm_map_footprint_query_page_info(
19116                                         map,
19117                                         map_entry,
19118                                         curr_s_offset,
19119                                         &disposition);
19120                         }
19121                         switch (flavor) {
19122                         case VM_PAGE_INFO_BASIC:
19123                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19124                                 basic_info->disposition = disposition;
19125                                 basic_info->ref_count = 1;
19126                                 basic_info->object_id = VM_OBJECT_ID_FAKE(map, task_ledgers.purgeable_nonvolatile);
19127                                 basic_info->offset = 0;
19128                                 basic_info->depth = 0;
19129
19130                                 info_idx++;
19131                                 break;
19132                         }
19133                         curr_s_offset += effective_page_size;
19134                         continue;
19135                 }
19136
19137                 vm_object_reference(object);
19138                 /*
19139                  * Shared mode -- so we can allow other readers
19140                  * to grab the lock too.
19141                  */
19142                 vm_object_lock_shared(object);
19143
19144                 curr_e_offset = MIN(map_entry->vme_end, end);
19145
19146                 vm_map_unlock_read(map);
19147
19148                 map_entry = NULL; /* map is unlocked, the entry is no longer valid. */
19149
19150                 curr_object = object;
19151
19152                 for (; curr_s_offset < curr_e_offset;) {
19153                         if (object == curr_object) {
19154                                 ref_count = curr_object->ref_count - 1; /* account for our object reference above. */
19155                         } else {
19156                                 ref_count = curr_object->ref_count;
19157                         }
19158
19159                         curr_offset_in_object = offset_in_object;
19160
19161                         for (;;) {
19162                                 m = vm_page_lookup(curr_object, vm_object_trunc_page(curr_offset_in_object));
19163
19164                                 if (m != VM_PAGE_NULL) {
19165                                         disposition |= VM_PAGE_QUERY_PAGE_PRESENT;
19166                                         break;
19167                                 } else {
19168                                         if (curr_object->internal &&
19169                                             curr_object->alive &&
19170                                             !curr_object->terminating &&
19171                                             curr_object->pager_ready) {
19172                                                 if (VM_COMPRESSOR_PAGER_STATE_GET(curr_object, vm_object_trunc_page(curr_offset_in_object))
19173                                                     == VM_EXTERNAL_STATE_EXISTS) {
19174                                                         /* the pager has that page */
19175                                                         disposition |= VM_PAGE_QUERY_PAGE_PAGED_OUT;
19176                                                         break;
19177                                                 }
19178                                         }
19179
19180                                         /*
19181                                          * Go down the VM object shadow chain until we find the page
19182                                          * we're looking for.
19183                                          */
19184
19185                                         if (curr_object->shadow != VM_OBJECT_NULL) {
19186                                                 vm_object_t shadow = VM_OBJECT_NULL;
19187
19188                                                 curr_offset_in_object += curr_object->vo_shadow_offset;
19189                                                 shadow = curr_object->shadow;
19190
19191                                                 vm_object_lock_shared(shadow);
19192                                                 vm_object_unlock(curr_object);
19193
19194                                                 curr_object = shadow;
19195                                                 depth++;
19196                                                 continue;
19197                                         } else {
19198                                                 break;
19199                                         }
19200                                 }
19201                         }
19202
19203                         /* The ref_count is not strictly accurate, it measures the number   */
19204                         /* of entities holding a ref on the object, they may not be mapping */
19205                         /* the object or may not be mapping the section holding the         */
19206                         /* target page but its still a ball park number and though an over- */
19207                         /* count, it picks up the copy-on-write cases                       */
19208
19209                         /* We could also get a picture of page sharing from pmap_attributes */
19210                         /* but this would under count as only faulted-in mappings would     */
19211                         /* show up.                                                         */
19212
19213                         if ((curr_object == object) && curr_object->shadow) {
19214                                 disposition |= VM_PAGE_QUERY_PAGE_COPIED;
19215                         }
19216
19217                         if (!curr_object->internal) {
19218                                 disposition |= VM_PAGE_QUERY_PAGE_EXTERNAL;
19219                         }
19220
19221                         if (m != VM_PAGE_NULL) {
19222                                 if (m->vmp_fictitious) {
19223                                         disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
19224                                 } else {
19225                                         if (m->vmp_dirty || pmap_is_modified(VM_PAGE_GET_PHYS_PAGE(m))) {
19226                                                 disposition |= VM_PAGE_QUERY_PAGE_DIRTY;
19227                                         }
19228
19229                                         if (m->vmp_reference || pmap_is_referenced(VM_PAGE_GET_PHYS_PAGE(m))) {
19230                                                 disposition |= VM_PAGE_QUERY_PAGE_REF;
19231                                         }
19232
19233                                         if (m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) {
19234                                                 disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
19235                                         }
19236
19237                                         /*
19238                                          * XXX TODO4K:
19239                                          * when this routine deals with 4k
19240                                          * pages, check the appropriate CS bit
19241                                          * here.
19242                                          */
19243                                         if (m->vmp_cs_validated) {
19244                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
19245                                         }
19246                                         if (m->vmp_cs_tainted) {
19247                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
19248                                         }
19249                                         if (m->vmp_cs_nx) {
19250                                                 disposition |= VM_PAGE_QUERY_PAGE_CS_NX;
19251                                         }
19252                                         if (m->vmp_reusable || curr_object->all_reusable) {
19253                                                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
19254                                         }
19255                                 }
19256                         }
19257
19258                         switch (flavor) {
19259                         case VM_PAGE_INFO_BASIC:
19260                                 basic_info = (vm_page_info_basic_t) (((uintptr_t) info) + (info_idx * sizeof(struct vm_page_info_basic)));
19261                                 basic_info->disposition = disposition;
19262                                 basic_info->ref_count = ref_count;
19263                                 basic_info->object_id = (vm_object_id_t) (uintptr_t)
19264                                     VM_KERNEL_ADDRPERM(curr_object);
19265                                 basic_info->offset =
19266                                     (memory_object_offset_t) curr_offset_in_object + offset_in_page;
19267                                 basic_info->depth = depth;
19268
19269                                 info_idx++;
19270                                 break;
19271                         }
19272
19273                         disposition = 0;
19274                         offset_in_page = 0; // This doesn't really make sense for any offset other than the starting offset.
19275
19276                         /*
19277                          * Move to next offset in the range and in our object.
19278                          */
19279                         curr_s_offset += effective_page_size;
19280                         offset_in_object += effective_page_size;
19281                         curr_offset_in_object = offset_in_object;
19282
19283                         if (curr_object != object) {
19284                                 vm_object_unlock(curr_object);
19285
19286                                 curr_object = object;
19287
19288                                 vm_object_lock_shared(curr_object);
19289                         } else {
19290                                 vm_object_lock_yield_shared(curr_object);
19291                         }
19292                 }
19293
19294                 vm_object_unlock(curr_object);
19295                 vm_object_deallocate(curr_object);
19296
19297                 vm_map_lock_read(map);
19298         }
19299
19300         vm_map_unlock_read(map);
19301         return retval;
19302 }
19303
19304 /*
19305  *      vm_map_msync
19306  *
19307  *      Synchronises the memory range specified with its backing store
19308  *      image by either flushing or cleaning the contents to the appropriate
19309  *      memory manager engaging in a memory object synchronize dialog with
19310  *      the manager.  The client doesn't return until the manager issues
19311  *      m_o_s_completed message.  MIG Magically converts user task parameter
19312  *      to the task's address map.
19313  *
19314  *      interpretation of sync_flags
19315  *      VM_SYNC_INVALIDATE      - discard pages, only return precious
19316  *                                pages to manager.
19317  *
19318  *      VM_SYNC_INVALIDATE & (VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS)
19319  *                              - discard pages, write dirty or precious
19320  *                                pages back to memory manager.
19321  *
19322  *      VM_SYNC_SYNCHRONOUS | VM_SYNC_ASYNCHRONOUS
19323  *                              - write dirty or precious pages back to
19324  *                                the memory manager.
19325  *
19326  *      VM_SYNC_CONTIGUOUS      - does everything normally, but if there
19327  *                                is a hole in the region, and we would
19328  *                                have returned KERN_SUCCESS, return
19329  *                                KERN_INVALID_ADDRESS instead.
19330  *
19331  *      NOTE
19332  *      The memory object attributes have not yet been implemented, this
19333  *      function will have to deal with the invalidate attribute
19334  *
19335  *      RETURNS
19336  *      KERN_INVALID_TASK               Bad task parameter
19337  *      KERN_INVALID_ARGUMENT           both sync and async were specified.
19338  *      KERN_SUCCESS                    The usual.
19339  *      KERN_INVALID_ADDRESS            There was a hole in the region.
19340  */
19341
19342 kern_return_t
19343 vm_map_msync(
19344         vm_map_t                map,
19345         vm_map_address_t        address,
19346         vm_map_size_t           size,
19347         vm_sync_t               sync_flags)
19348 {
19349         vm_map_entry_t          entry;
19350         vm_map_size_t           amount_left;
19351         vm_object_offset_t      offset;
19352         vm_object_offset_t      start_offset, end_offset;
19353         boolean_t               do_sync_req;
19354         boolean_t               had_hole = FALSE;
19355         vm_map_offset_t         pmap_offset;
19356
19357         if ((sync_flags & VM_SYNC_ASYNCHRONOUS) &&
19358             (sync_flags & VM_SYNC_SYNCHRONOUS)) {
19359                 return KERN_INVALID_ARGUMENT;
19360         }
19361
19362         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19363                 DEBUG4K_SHARE("map %p address 0x%llx size 0x%llx flags 0x%x\n", map, (uint64_t)address, (uint64_t)size, sync_flags);
19364         }
19365
19366         /*
19367          * align address and size on page boundaries
19368          */
19369         size = (vm_map_round_page(address + size,
19370             VM_MAP_PAGE_MASK(map)) -
19371             vm_map_trunc_page(address,
19372             VM_MAP_PAGE_MASK(map)));
19373         address = vm_map_trunc_page(address,
19374             VM_MAP_PAGE_MASK(map));
19375
19376         if (map == VM_MAP_NULL) {
19377                 return KERN_INVALID_TASK;
19378         }
19379
19380         if (size == 0) {
19381                 return KERN_SUCCESS;
19382         }
19383
19384         amount_left = size;
19385
19386         while (amount_left > 0) {
19387                 vm_object_size_t        flush_size;
19388                 vm_object_t             object;
19389
19390                 vm_map_lock(map);
19391                 if (!vm_map_lookup_entry(map,
19392                     address,
19393                     &entry)) {
19394                         vm_map_size_t   skip;
19395
19396                         /*
19397                          * hole in the address map.
19398                          */
19399                         had_hole = TRUE;
19400
19401                         if (sync_flags & VM_SYNC_KILLPAGES) {
19402                                 /*
19403                                  * For VM_SYNC_KILLPAGES, there should be
19404                                  * no holes in the range, since we couldn't
19405                                  * prevent someone else from allocating in
19406                                  * that hole and we wouldn't want to "kill"
19407                                  * their pages.
19408                                  */
19409                                 vm_map_unlock(map);
19410                                 break;
19411                         }
19412
19413                         /*
19414                          * Check for empty map.
19415                          */
19416                         if (entry == vm_map_to_entry(map) &&
19417                             entry->vme_next == entry) {
19418                                 vm_map_unlock(map);
19419                                 break;
19420                         }
19421                         /*
19422                          * Check that we don't wrap and that
19423                          * we have at least one real map entry.
19424                          */
19425                         if ((map->hdr.nentries == 0) ||
19426                             (entry->vme_next->vme_start < address)) {
19427                                 vm_map_unlock(map);
19428                                 break;
19429                         }
19430                         /*
19431                          * Move up to the next entry if needed
19432                          */
19433                         skip = (entry->vme_next->vme_start - address);
19434                         if (skip >= amount_left) {
19435                                 amount_left = 0;
19436                         } else {
19437                                 amount_left -= skip;
19438                         }
19439                         address = entry->vme_next->vme_start;
19440                         vm_map_unlock(map);
19441                         continue;
19442                 }
19443
19444                 offset = address - entry->vme_start;
19445                 pmap_offset = address;
19446
19447                 /*
19448                  * do we have more to flush than is contained in this
19449                  * entry ?
19450                  */
19451                 if (amount_left + entry->vme_start + offset > entry->vme_end) {
19452                         flush_size = entry->vme_end -
19453                             (entry->vme_start + offset);
19454                 } else {
19455                         flush_size = amount_left;
19456                 }
19457                 amount_left -= flush_size;
19458                 address += flush_size;
19459
19460                 if (entry->is_sub_map == TRUE) {
19461                         vm_map_t        local_map;
19462                         vm_map_offset_t local_offset;
19463
19464                         local_map = VME_SUBMAP(entry);
19465                         local_offset = VME_OFFSET(entry);
19466                         vm_map_reference(local_map);
19467                         vm_map_unlock(map);
19468                         if (vm_map_msync(
19469                                     local_map,
19470                                     local_offset,
19471                                     flush_size,
19472                                     sync_flags) == KERN_INVALID_ADDRESS) {
19473                                 had_hole = TRUE;
19474                         }
19475                         vm_map_deallocate(local_map);
19476                         continue;
19477                 }
19478                 object = VME_OBJECT(entry);
19479
19480                 /*
19481                  * We can't sync this object if the object has not been
19482                  * created yet
19483                  */
19484                 if (object == VM_OBJECT_NULL) {
19485                         vm_map_unlock(map);
19486                         continue;
19487                 }
19488                 offset += VME_OFFSET(entry);
19489
19490                 vm_object_lock(object);
19491
19492                 if (sync_flags & (VM_SYNC_KILLPAGES | VM_SYNC_DEACTIVATE)) {
19493                         int kill_pages = 0;
19494                         boolean_t reusable_pages = FALSE;
19495
19496                         if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19497                                 /*
19498                                  * This is a destructive operation and so we
19499                                  * err on the side of limiting the range of
19500                                  * the operation.
19501                                  */
19502                                 start_offset = vm_object_round_page(offset);
19503                                 end_offset = vm_object_trunc_page(offset + flush_size);
19504
19505                                 if (end_offset <= start_offset) {
19506                                         vm_object_unlock(object);
19507                                         vm_map_unlock(map);
19508                                         continue;
19509                                 }
19510
19511                                 pmap_offset += start_offset - offset;;
19512                         } else {
19513                                 start_offset = offset;
19514                                 end_offset = offset + flush_size;
19515                         }
19516
19517                         if (sync_flags & VM_SYNC_KILLPAGES) {
19518                                 if (((object->ref_count == 1) ||
19519                                     ((object->copy_strategy !=
19520                                     MEMORY_OBJECT_COPY_SYMMETRIC) &&
19521                                     (object->copy == VM_OBJECT_NULL))) &&
19522                                     (object->shadow == VM_OBJECT_NULL)) {
19523                                         if (object->ref_count != 1) {
19524                                                 vm_page_stats_reusable.free_shared++;
19525                                         }
19526                                         kill_pages = 1;
19527                                 } else {
19528                                         kill_pages = -1;
19529                                 }
19530                         }
19531                         if (kill_pages != -1) {
19532                                 vm_object_deactivate_pages(
19533                                         object,
19534                                         start_offset,
19535                                         (vm_object_size_t) (end_offset - start_offset),
19536                                         kill_pages,
19537                                         reusable_pages,
19538                                         map->pmap,
19539                                         pmap_offset);
19540                         }
19541                         vm_object_unlock(object);
19542                         vm_map_unlock(map);
19543                         continue;
19544                 }
19545                 /*
19546                  * We can't sync this object if there isn't a pager.
19547                  * Don't bother to sync internal objects, since there can't
19548                  * be any "permanent" storage for these objects anyway.
19549                  */
19550                 if ((object->pager == MEMORY_OBJECT_NULL) ||
19551                     (object->internal) || (object->private)) {
19552                         vm_object_unlock(object);
19553                         vm_map_unlock(map);
19554                         continue;
19555                 }
19556                 /*
19557                  * keep reference on the object until syncing is done
19558                  */
19559                 vm_object_reference_locked(object);
19560                 vm_object_unlock(object);
19561
19562                 vm_map_unlock(map);
19563
19564                 if (VM_MAP_PAGE_MASK(map) < PAGE_MASK) {
19565                         start_offset = vm_object_trunc_page(offset);
19566                         end_offset = vm_object_round_page(offset + flush_size);
19567                 } else {
19568                         start_offset = offset;
19569                         end_offset = offset + flush_size;
19570                 }
19571
19572                 do_sync_req = vm_object_sync(object,
19573                     start_offset,
19574                     (end_offset - start_offset),
19575                     sync_flags & VM_SYNC_INVALIDATE,
19576                     ((sync_flags & VM_SYNC_SYNCHRONOUS) ||
19577                     (sync_flags & VM_SYNC_ASYNCHRONOUS)),
19578                     sync_flags & VM_SYNC_SYNCHRONOUS);
19579
19580                 if ((sync_flags & VM_SYNC_INVALIDATE) && object->resident_page_count == 0) {
19581                         /*
19582                          * clear out the clustering and read-ahead hints
19583                          */
19584                         vm_object_lock(object);
19585
19586                         object->pages_created = 0;
19587                         object->pages_used = 0;
19588                         object->sequential = 0;
19589                         object->last_alloc = 0;
19590
19591                         vm_object_unlock(object);
19592                 }
19593                 vm_object_deallocate(object);
19594         } /* while */
19595
19596         /* for proper msync() behaviour */
19597         if (had_hole == TRUE && (sync_flags & VM_SYNC_CONTIGUOUS)) {
19598                 return KERN_INVALID_ADDRESS;
19599         }
19600
19601         return KERN_SUCCESS;
19602 }/* vm_msync */
19603
19604 kern_return_t
19605 vm_named_entry_from_vm_object(
19606         vm_named_entry_t        named_entry,
19607         vm_object_t             object,
19608         vm_object_offset_t      offset,
19609         vm_object_size_t        size,
19610         vm_prot_t               prot)
19611 {
19612         vm_map_copy_t copy;
19613         vm_map_entry_t copy_entry;
19614
19615         assert(!named_entry->is_sub_map);
19616         assert(!named_entry->is_copy);
19617         assert(!named_entry->is_object);
19618         assert(!named_entry->internal);
19619         assert(named_entry->backing.copy == VM_MAP_COPY_NULL);
19620
19621         copy = vm_map_copy_allocate();
19622         copy->type = VM_MAP_COPY_ENTRY_LIST;
19623         copy->offset = offset;
19624         copy->size = size;
19625         copy->cpy_hdr.page_shift = PAGE_SHIFT;
19626         vm_map_store_init(&copy->cpy_hdr);
19627
19628         copy_entry = vm_map_copy_entry_create(copy, FALSE);
19629         copy_entry->protection = prot;
19630         copy_entry->max_protection = prot;
19631         copy_entry->use_pmap = TRUE;
19632         copy_entry->vme_start = VM_MAP_TRUNC_PAGE(offset, PAGE_MASK);
19633         copy_entry->vme_end = VM_MAP_ROUND_PAGE(offset + size, PAGE_MASK);
19634         VME_OBJECT_SET(copy_entry, object);
19635         VME_OFFSET_SET(copy_entry, vm_object_trunc_page(offset));
19636         vm_map_copy_entry_link(copy, vm_map_copy_last_entry(copy), copy_entry);
19637
19638         named_entry->backing.copy = copy;
19639         named_entry->is_object = TRUE;
19640         if (object->internal) {
19641                 named_entry->internal = TRUE;
19642         }
19643
19644         DEBUG4K_MEMENTRY("named_entry %p copy %p object %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, object, offset, size, prot);
19645
19646         return KERN_SUCCESS;
19647 }
19648
19649 vm_object_t
19650 vm_named_entry_to_vm_object(
19651         vm_named_entry_t named_entry)
19652 {
19653         vm_map_copy_t   copy;
19654         vm_map_entry_t  copy_entry;
19655         vm_object_t     object;
19656
19657         assert(!named_entry->is_sub_map);
19658         assert(!named_entry->is_copy);
19659         assert(named_entry->is_object);
19660         copy = named_entry->backing.copy;
19661         assert(copy != VM_MAP_COPY_NULL);
19662         assert(copy->cpy_hdr.nentries == 1);
19663         copy_entry = vm_map_copy_first_entry(copy);
19664         assert(!copy_entry->is_sub_map);
19665         object = VME_OBJECT(copy_entry);
19666
19667         DEBUG4K_MEMENTRY("%p -> %p -> %p [0x%llx 0x%llx 0x%llx 0x%x/0x%x ] -> %p offset 0x%llx size 0x%llx prot 0x%x\n", named_entry, copy, copy_entry, (uint64_t)copy_entry->vme_start, (uint64_t)copy_entry->vme_end, copy_entry->vme_offset, copy_entry->protection, copy_entry->max_protection, object, named_entry->offset, named_entry->size, named_entry->protection);
19668
19669         return object;
19670 }
19671
19672 /*
19673  *      Routine:        convert_port_entry_to_map
19674  *      Purpose:
19675  *              Convert from a port specifying an entry or a task
19676  *              to a map. Doesn't consume the port ref; produces a map ref,
19677  *              which may be null.  Unlike convert_port_to_map, the
19678  *              port may be task or a named entry backed.
19679  *      Conditions:
19680  *              Nothing locked.
19681  */
19682
19683
19684 vm_map_t
19685 convert_port_entry_to_map(
19686         ipc_port_t      port)
19687 {
19688         vm_map_t map;
19689         vm_named_entry_t        named_entry;
19690         uint32_t        try_failed_count = 0;
19691
19692         if (IP_VALID(port) && (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19693                 while (TRUE) {
19694                         ip_lock(port);
19695                         if (ip_active(port) && (ip_kotype(port)
19696                             == IKOT_NAMED_ENTRY)) {
19697                                 named_entry =
19698                                     (vm_named_entry_t) ip_get_kobject(port);
19699                                 if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19700                                         ip_unlock(port);
19701
19702                                         try_failed_count++;
19703                                         mutex_pause(try_failed_count);
19704                                         continue;
19705                                 }
19706                                 named_entry->ref_count++;
19707                                 lck_mtx_unlock(&(named_entry)->Lock);
19708                                 ip_unlock(port);
19709                                 if ((named_entry->is_sub_map) &&
19710                                     (named_entry->protection
19711                                     & VM_PROT_WRITE)) {
19712                                         map = named_entry->backing.map;
19713                                         if (map->pmap != PMAP_NULL) {
19714                                                 if (map->pmap == kernel_pmap) {
19715                                                         panic("userspace has access "
19716                                                             "to a kernel map %p", map);
19717                                                 }
19718                                                 pmap_require(map->pmap);
19719                                         }
19720                                 } else {
19721                                         mach_destroy_memory_entry(port);
19722                                         return VM_MAP_NULL;
19723                                 }
19724                                 vm_map_reference_swap(map);
19725                                 mach_destroy_memory_entry(port);
19726                                 break;
19727                         } else {
19728                                 return VM_MAP_NULL;
19729                         }
19730                 }
19731         } else {
19732                 map = convert_port_to_map(port);
19733         }
19734
19735         return map;
19736 }
19737
19738 /*
19739  *      Routine:        convert_port_entry_to_object
19740  *      Purpose:
19741  *              Convert from a port specifying a named entry to an
19742  *              object. Doesn't consume the port ref; produces a map ref,
19743  *              which may be null.
19744  *      Conditions:
19745  *              Nothing locked.
19746  */
19747
19748
19749 vm_object_t
19750 convert_port_entry_to_object(
19751         ipc_port_t      port)
19752 {
19753         vm_object_t             object = VM_OBJECT_NULL;
19754         vm_named_entry_t        named_entry;
19755         uint32_t                try_failed_count = 0;
19756
19757         if (IP_VALID(port) &&
19758             (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19759 try_again:
19760                 ip_lock(port);
19761                 if (ip_active(port) &&
19762                     (ip_kotype(port) == IKOT_NAMED_ENTRY)) {
19763                         named_entry = (vm_named_entry_t) ip_get_kobject(port);
19764                         if (!(lck_mtx_try_lock(&(named_entry)->Lock))) {
19765                                 ip_unlock(port);
19766                                 try_failed_count++;
19767                                 mutex_pause(try_failed_count);
19768                                 goto try_again;
19769                         }
19770                         named_entry->ref_count++;
19771                         lck_mtx_unlock(&(named_entry)->Lock);
19772                         ip_unlock(port);
19773                         if (!(named_entry->is_sub_map) &&
19774                             !(named_entry->is_copy) &&
19775                             (named_entry->is_object) &&
19776                             (named_entry->protection & VM_PROT_WRITE)) {
19777                                 vm_map_copy_t copy;
19778                                 vm_map_entry_t copy_entry;
19779
19780                                 copy = named_entry->backing.copy;
19781                                 assert(copy->cpy_hdr.nentries == 1);
19782                                 copy_entry = vm_map_copy_first_entry(copy);
19783                                 assert(!copy_entry->is_sub_map);
19784                                 object = VME_OBJECT(copy_entry);
19785                                 assert(object != VM_OBJECT_NULL);
19786                                 vm_object_reference(object);
19787                         }
19788                         mach_destroy_memory_entry(port);
19789                 }
19790         }
19791
19792         return object;
19793 }
19794
19795 /*
19796  * Export routines to other components for the things we access locally through
19797  * macros.
19798  */
19799 #undef current_map
19800 vm_map_t
19801 current_map(void)
19802 {
19803         return current_map_fast();
19804 }
19805
19806 /*
19807  *      vm_map_reference:
19808  *
19809  *      Most code internal to the osfmk will go through a
19810  *      macro defining this.  This is always here for the
19811  *      use of other kernel components.
19812  */
19813 #undef vm_map_reference
19814 void
19815 vm_map_reference(
19816         vm_map_t        map)
19817 {
19818         if (map == VM_MAP_NULL) {
19819                 return;
19820         }
19821
19822         lck_mtx_lock(&map->s_lock);
19823 #if     TASK_SWAPPER
19824         assert(map->res_count > 0);
19825         assert(os_ref_get_count(&map->map_refcnt) >= map->res_count);
19826         map->res_count++;
19827 #endif
19828         os_ref_retain_locked(&map->map_refcnt);
19829         lck_mtx_unlock(&map->s_lock);
19830 }
19831
19832 /*
19833  *      vm_map_deallocate:
19834  *
19835  *      Removes a reference from the specified map,
19836  *      destroying it if no references remain.
19837  *      The map should not be locked.
19838  */
19839 void
19840 vm_map_deallocate(
19841         vm_map_t        map)
19842 {
19843         unsigned int            ref;
19844
19845         if (map == VM_MAP_NULL) {
19846                 return;
19847         }
19848
19849         lck_mtx_lock(&map->s_lock);
19850         ref = os_ref_release_locked(&map->map_refcnt);
19851         if (ref > 0) {
19852                 vm_map_res_deallocate(map);
19853                 lck_mtx_unlock(&map->s_lock);
19854                 return;
19855         }
19856         assert(os_ref_get_count(&map->map_refcnt) == 0);
19857         lck_mtx_unlock(&map->s_lock);
19858
19859 #if     TASK_SWAPPER
19860         /*
19861          * The map residence count isn't decremented here because
19862          * the vm_map_delete below will traverse the entire map,
19863          * deleting entries, and the residence counts on objects
19864          * and sharing maps will go away then.
19865          */
19866 #endif
19867
19868         vm_map_destroy(map, VM_MAP_REMOVE_NO_FLAGS);
19869 }
19870
19871 void
19872 vm_map_inspect_deallocate(
19873         vm_map_inspect_t      map)
19874 {
19875         vm_map_deallocate((vm_map_t)map);
19876 }
19877
19878 void
19879 vm_map_read_deallocate(
19880         vm_map_read_t      map)
19881 {
19882         vm_map_deallocate((vm_map_t)map);
19883 }
19884
19885
19886 void
19887 vm_map_disable_NX(vm_map_t map)
19888 {
19889         if (map == NULL) {
19890                 return;
19891         }
19892         if (map->pmap == NULL) {
19893                 return;
19894         }
19895
19896         pmap_disable_NX(map->pmap);
19897 }
19898
19899 void
19900 vm_map_disallow_data_exec(vm_map_t map)
19901 {
19902         if (map == NULL) {
19903                 return;
19904         }
19905
19906         map->map_disallow_data_exec = TRUE;
19907 }
19908
19909 /* XXX Consider making these constants (VM_MAX_ADDRESS and MACH_VM_MAX_ADDRESS)
19910  * more descriptive.
19911  */
19912 void
19913 vm_map_set_32bit(vm_map_t map)
19914 {
19915 #if defined(__arm__) || defined(__arm64__)
19916         map->max_offset = pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_DEVICE);
19917 #else
19918         map->max_offset = (vm_map_offset_t)VM_MAX_ADDRESS;
19919 #endif
19920 }
19921
19922
19923 void
19924 vm_map_set_64bit(vm_map_t map)
19925 {
19926 #if defined(__arm__) || defined(__arm64__)
19927         map->max_offset = pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_DEVICE);
19928 #else
19929         map->max_offset = (vm_map_offset_t)MACH_VM_MAX_ADDRESS;
19930 #endif
19931 }
19932
19933 /*
19934  * Expand the maximum size of an existing map to the maximum supported.
19935  */
19936 void
19937 vm_map_set_jumbo(vm_map_t map)
19938 {
19939 #if defined (__arm64__) && !defined(CONFIG_ARROW)
19940         vm_map_set_max_addr(map, ~0);
19941 #else /* arm64 */
19942         (void) map;
19943 #endif
19944 }
19945
19946 /*
19947  * This map has a JIT entitlement
19948  */
19949 void
19950 vm_map_set_jit_entitled(vm_map_t map)
19951 {
19952 #if defined (__arm64__)
19953         pmap_set_jit_entitled(map->pmap);
19954 #else /* arm64 */
19955         (void) map;
19956 #endif
19957 }
19958
19959 /*
19960  * Expand the maximum size of an existing map.
19961  */
19962 void
19963 vm_map_set_max_addr(vm_map_t map, vm_map_offset_t new_max_offset)
19964 {
19965 #if defined(__arm64__)
19966         vm_map_offset_t max_supported_offset = 0;
19967         vm_map_offset_t old_max_offset = map->max_offset;
19968         max_supported_offset = pmap_max_offset(vm_map_is_64bit(map), ARM_PMAP_MAX_OFFSET_JUMBO);
19969
19970         new_max_offset = trunc_page(new_max_offset);
19971
19972         /* The address space cannot be shrunk using this routine. */
19973         if (old_max_offset >= new_max_offset) {
19974                 return;
19975         }
19976
19977         if (max_supported_offset < new_max_offset) {
19978                 new_max_offset = max_supported_offset;
19979         }
19980
19981         map->max_offset = new_max_offset;
19982
19983         if (map->holes_list->prev->vme_end == old_max_offset) {
19984                 /*
19985                  * There is already a hole at the end of the map; simply make it bigger.
19986                  */
19987                 map->holes_list->prev->vme_end = map->max_offset;
19988         } else {
19989                 /*
19990                  * There is no hole at the end, so we need to create a new hole
19991                  * for the new empty space we're creating.
19992                  */
19993                 struct vm_map_links *new_hole = zalloc(vm_map_holes_zone);
19994                 new_hole->start = old_max_offset;
19995                 new_hole->end = map->max_offset;
19996                 new_hole->prev = map->holes_list->prev;
19997                 new_hole->next = (struct vm_map_entry *)map->holes_list;
19998                 map->holes_list->prev->links.next = (struct vm_map_entry *)new_hole;
19999                 map->holes_list->prev = (struct vm_map_entry *)new_hole;
20000         }
20001 #else
20002         (void)map;
20003         (void)new_max_offset;
20004 #endif
20005 }
20006
20007 vm_map_offset_t
20008 vm_compute_max_offset(boolean_t is64)
20009 {
20010 #if defined(__arm__) || defined(__arm64__)
20011         return pmap_max_offset(is64, ARM_PMAP_MAX_OFFSET_DEVICE);
20012 #else
20013         return is64 ? (vm_map_offset_t)MACH_VM_MAX_ADDRESS : (vm_map_offset_t)VM_MAX_ADDRESS;
20014 #endif
20015 }
20016
20017 void
20018 vm_map_get_max_aslr_slide_section(
20019         vm_map_t                map __unused,
20020         int64_t                 *max_sections,
20021         int64_t                 *section_size)
20022 {
20023 #if defined(__arm64__)
20024         *max_sections = 3;
20025         *section_size = ARM_TT_TWIG_SIZE;
20026 #else
20027         *max_sections = 1;
20028         *section_size = 0;
20029 #endif
20030 }
20031
20032 uint64_t
20033 vm_map_get_max_aslr_slide_pages(vm_map_t map)
20034 {
20035 #if defined(__arm64__)
20036         /* Limit arm64 slide to 16MB to conserve contiguous VA space in the more
20037          * limited embedded address space; this is also meant to minimize pmap
20038          * memory usage on 16KB page systems.
20039          */
20040         return 1 << (24 - VM_MAP_PAGE_SHIFT(map));
20041 #else
20042         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20043 #endif
20044 }
20045
20046 uint64_t
20047 vm_map_get_max_loader_aslr_slide_pages(vm_map_t map)
20048 {
20049 #if defined(__arm64__)
20050         /* We limit the loader slide to 4MB, in order to ensure at least 8 bits
20051          * of independent entropy on 16KB page systems.
20052          */
20053         return 1 << (22 - VM_MAP_PAGE_SHIFT(map));
20054 #else
20055         return 1 << (vm_map_is_64bit(map) ? 16 : 8);
20056 #endif
20057 }
20058
20059 #ifndef __arm__
20060 boolean_t
20061 vm_map_is_64bit(
20062         vm_map_t map)
20063 {
20064         return map->max_offset > ((vm_map_offset_t)VM_MAX_ADDRESS);
20065 }
20066 #endif
20067
20068 boolean_t
20069 vm_map_has_hard_pagezero(
20070         vm_map_t        map,
20071         vm_map_offset_t pagezero_size)
20072 {
20073         /*
20074          * XXX FBDP
20075          * We should lock the VM map (for read) here but we can get away
20076          * with it for now because there can't really be any race condition:
20077          * the VM map's min_offset is changed only when the VM map is created
20078          * and when the zero page is established (when the binary gets loaded),
20079          * and this routine gets called only when the task terminates and the
20080          * VM map is being torn down, and when a new map is created via
20081          * load_machfile()/execve().
20082          */
20083         return map->min_offset >= pagezero_size;
20084 }
20085
20086 /*
20087  * Raise a VM map's maximun offset.
20088  */
20089 kern_return_t
20090 vm_map_raise_max_offset(
20091         vm_map_t        map,
20092         vm_map_offset_t new_max_offset)
20093 {
20094         kern_return_t   ret;
20095
20096         vm_map_lock(map);
20097         ret = KERN_INVALID_ADDRESS;
20098
20099         if (new_max_offset >= map->max_offset) {
20100                 if (!vm_map_is_64bit(map)) {
20101                         if (new_max_offset <= (vm_map_offset_t)VM_MAX_ADDRESS) {
20102                                 map->max_offset = new_max_offset;
20103                                 ret = KERN_SUCCESS;
20104                         }
20105                 } else {
20106                         if (new_max_offset <= (vm_map_offset_t)MACH_VM_MAX_ADDRESS) {
20107                                 map->max_offset = new_max_offset;
20108                                 ret = KERN_SUCCESS;
20109                         }
20110                 }
20111         }
20112
20113         vm_map_unlock(map);
20114         return ret;
20115 }
20116
20117
20118 /*
20119  * Raise a VM map's minimum offset.
20120  * To strictly enforce "page zero" reservation.
20121  */
20122 kern_return_t
20123 vm_map_raise_min_offset(
20124         vm_map_t        map,
20125         vm_map_offset_t new_min_offset)
20126 {
20127         vm_map_entry_t  first_entry;
20128
20129         new_min_offset = vm_map_round_page(new_min_offset,
20130             VM_MAP_PAGE_MASK(map));
20131
20132         vm_map_lock(map);
20133
20134         if (new_min_offset < map->min_offset) {
20135                 /*
20136                  * Can't move min_offset backwards, as that would expose
20137                  * a part of the address space that was previously, and for
20138                  * possibly good reasons, inaccessible.
20139                  */
20140                 vm_map_unlock(map);
20141                 return KERN_INVALID_ADDRESS;
20142         }
20143         if (new_min_offset >= map->max_offset) {
20144                 /* can't go beyond the end of the address space */
20145                 vm_map_unlock(map);
20146                 return KERN_INVALID_ADDRESS;
20147         }
20148
20149         first_entry = vm_map_first_entry(map);
20150         if (first_entry != vm_map_to_entry(map) &&
20151             first_entry->vme_start < new_min_offset) {
20152                 /*
20153                  * Some memory was already allocated below the new
20154                  * minimun offset.  It's too late to change it now...
20155                  */
20156                 vm_map_unlock(map);
20157                 return KERN_NO_SPACE;
20158         }
20159
20160         map->min_offset = new_min_offset;
20161
20162         assert(map->holes_list);
20163         map->holes_list->start = new_min_offset;
20164         assert(new_min_offset < map->holes_list->end);
20165
20166         vm_map_unlock(map);
20167
20168         return KERN_SUCCESS;
20169 }
20170
20171 /*
20172  * Set the limit on the maximum amount of user wired memory allowed for this map.
20173  * This is basically a copy of the MEMLOCK rlimit value maintained by the BSD side of
20174  * the kernel.  The limits are checked in the mach VM side, so we keep a copy so we
20175  * don't have to reach over to the BSD data structures.
20176  */
20177
20178 void
20179 vm_map_set_user_wire_limit(vm_map_t     map,
20180     vm_size_t    limit)
20181 {
20182         map->user_wire_limit = limit;
20183 }
20184
20185
20186 void
20187 vm_map_switch_protect(vm_map_t     map,
20188     boolean_t    val)
20189 {
20190         vm_map_lock(map);
20191         map->switch_protect = val;
20192         vm_map_unlock(map);
20193 }
20194
20195 extern int cs_process_enforcement_enable;
20196 boolean_t
20197 vm_map_cs_enforcement(
20198         vm_map_t map)
20199 {
20200         if (cs_process_enforcement_enable) {
20201                 return TRUE;
20202         }
20203         return map->cs_enforcement;
20204 }
20205
20206 void
20207 vm_map_cs_enforcement_set(
20208         vm_map_t map,
20209         boolean_t val)
20210 {
20211         vm_map_lock(map);
20212         map->cs_enforcement = val;
20213         pmap_set_vm_map_cs_enforced(map->pmap, val);
20214         vm_map_unlock(map);
20215 }
20216
20217 /*
20218  * IOKit has mapped a region into this map; adjust the pmap's ledgers appropriately.
20219  * phys_footprint is a composite limit consisting of iokit + physmem, so we need to
20220  * bump both counters.
20221  */
20222 void
20223 vm_map_iokit_mapped_region(vm_map_t map, vm_size_t bytes)
20224 {
20225         pmap_t pmap = vm_map_pmap(map);
20226
20227         ledger_credit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20228         ledger_credit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20229 }
20230
20231 void
20232 vm_map_iokit_unmapped_region(vm_map_t map, vm_size_t bytes)
20233 {
20234         pmap_t pmap = vm_map_pmap(map);
20235
20236         ledger_debit(pmap->ledger, task_ledgers.iokit_mapped, bytes);
20237         ledger_debit(pmap->ledger, task_ledgers.phys_footprint, bytes);
20238 }
20239
20240 /* Add (generate) code signature for memory range */
20241 #if CONFIG_DYNAMIC_CODE_SIGNING
20242 kern_return_t
20243 vm_map_sign(vm_map_t map,
20244     vm_map_offset_t start,
20245     vm_map_offset_t end)
20246 {
20247         vm_map_entry_t entry;
20248         vm_page_t m;
20249         vm_object_t object;
20250
20251         /*
20252          * Vet all the input parameters and current type and state of the
20253          * underlaying object.  Return with an error if anything is amiss.
20254          */
20255         if (map == VM_MAP_NULL) {
20256                 return KERN_INVALID_ARGUMENT;
20257         }
20258
20259         vm_map_lock_read(map);
20260
20261         if (!vm_map_lookup_entry(map, start, &entry) || entry->is_sub_map) {
20262                 /*
20263                  * Must pass a valid non-submap address.
20264                  */
20265                 vm_map_unlock_read(map);
20266                 return KERN_INVALID_ADDRESS;
20267         }
20268
20269         if ((entry->vme_start > start) || (entry->vme_end < end)) {
20270                 /*
20271                  * Map entry doesn't cover the requested range. Not handling
20272                  * this situation currently.
20273                  */
20274                 vm_map_unlock_read(map);
20275                 return KERN_INVALID_ARGUMENT;
20276         }
20277
20278         object = VME_OBJECT(entry);
20279         if (object == VM_OBJECT_NULL) {
20280                 /*
20281                  * Object must already be present or we can't sign.
20282                  */
20283                 vm_map_unlock_read(map);
20284                 return KERN_INVALID_ARGUMENT;
20285         }
20286
20287         vm_object_lock(object);
20288         vm_map_unlock_read(map);
20289
20290         while (start < end) {
20291                 uint32_t refmod;
20292
20293                 m = vm_page_lookup(object,
20294                     start - entry->vme_start + VME_OFFSET(entry));
20295                 if (m == VM_PAGE_NULL) {
20296                         /* shoud we try to fault a page here? we can probably
20297                          * demand it exists and is locked for this request */
20298                         vm_object_unlock(object);
20299                         return KERN_FAILURE;
20300                 }
20301                 /* deal with special page status */
20302                 if (m->vmp_busy ||
20303                     (m->vmp_unusual && (m->vmp_error || m->vmp_restart || m->vmp_private || m->vmp_absent))) {
20304                         vm_object_unlock(object);
20305                         return KERN_FAILURE;
20306                 }
20307
20308                 /* Page is OK... now "validate" it */
20309                 /* This is the place where we'll call out to create a code
20310                  * directory, later */
20311                 /* XXX TODO4K: deal with 4k subpages individually? */
20312                 m->vmp_cs_validated = VMP_CS_ALL_TRUE;
20313
20314                 /* The page is now "clean" for codesigning purposes. That means
20315                  * we don't consider it as modified (wpmapped) anymore. But
20316                  * we'll disconnect the page so we note any future modification
20317                  * attempts. */
20318                 m->vmp_wpmapped = FALSE;
20319                 refmod = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
20320
20321                 /* Pull the dirty status from the pmap, since we cleared the
20322                  * wpmapped bit */
20323                 if ((refmod & VM_MEM_MODIFIED) && !m->vmp_dirty) {
20324                         SET_PAGE_DIRTY(m, FALSE);
20325                 }
20326
20327                 /* On to the next page */
20328                 start += PAGE_SIZE;
20329         }
20330         vm_object_unlock(object);
20331
20332         return KERN_SUCCESS;
20333 }
20334 #endif
20335
20336 kern_return_t
20337 vm_map_partial_reap(vm_map_t map, unsigned int *reclaimed_resident, unsigned int *reclaimed_compressed)
20338 {
20339         vm_map_entry_t  entry = VM_MAP_ENTRY_NULL;
20340         vm_map_entry_t next_entry;
20341         kern_return_t   kr = KERN_SUCCESS;
20342         vm_map_t        zap_map;
20343
20344         vm_map_lock(map);
20345
20346         /*
20347          * We use a "zap_map" to avoid having to unlock
20348          * the "map" in vm_map_delete().
20349          */
20350         zap_map = vm_map_create(PMAP_NULL,
20351             map->min_offset,
20352             map->max_offset,
20353             map->hdr.entries_pageable);
20354
20355         if (zap_map == VM_MAP_NULL) {
20356                 return KERN_RESOURCE_SHORTAGE;
20357         }
20358
20359         vm_map_set_page_shift(zap_map,
20360             VM_MAP_PAGE_SHIFT(map));
20361         vm_map_disable_hole_optimization(zap_map);
20362
20363         for (entry = vm_map_first_entry(map);
20364             entry != vm_map_to_entry(map);
20365             entry = next_entry) {
20366                 next_entry = entry->vme_next;
20367
20368                 if (VME_OBJECT(entry) &&
20369                     !entry->is_sub_map &&
20370                     (VME_OBJECT(entry)->internal == TRUE) &&
20371                     (VME_OBJECT(entry)->ref_count == 1)) {
20372                         *reclaimed_resident += VME_OBJECT(entry)->resident_page_count;
20373                         *reclaimed_compressed += vm_compressor_pager_get_count(VME_OBJECT(entry)->pager);
20374
20375                         (void)vm_map_delete(map,
20376                             entry->vme_start,
20377                             entry->vme_end,
20378                             VM_MAP_REMOVE_SAVE_ENTRIES,
20379                             zap_map);
20380                 }
20381         }
20382
20383         vm_map_unlock(map);
20384
20385         /*
20386          * Get rid of the "zap_maps" and all the map entries that
20387          * they may still contain.
20388          */
20389         if (zap_map != VM_MAP_NULL) {
20390                 vm_map_destroy(zap_map, VM_MAP_REMOVE_NO_PMAP_CLEANUP);
20391                 zap_map = VM_MAP_NULL;
20392         }
20393
20394         return kr;
20395 }
20396
20397
20398 #if DEVELOPMENT || DEBUG
20399
20400 int
20401 vm_map_disconnect_page_mappings(
20402         vm_map_t map,
20403         boolean_t do_unnest)
20404 {
20405         vm_map_entry_t entry;
20406         int     page_count = 0;
20407
20408         if (do_unnest == TRUE) {
20409 #ifndef NO_NESTED_PMAP
20410                 vm_map_lock(map);
20411
20412                 for (entry = vm_map_first_entry(map);
20413                     entry != vm_map_to_entry(map);
20414                     entry = entry->vme_next) {
20415                         if (entry->is_sub_map && entry->use_pmap) {
20416                                 /*
20417                                  * Make sure the range between the start of this entry and
20418                                  * the end of this entry is no longer nested, so that
20419                                  * we will only remove mappings from the pmap in use by this
20420                                  * this task
20421                                  */
20422                                 vm_map_clip_unnest(map, entry, entry->vme_start, entry->vme_end);
20423                         }
20424                 }
20425                 vm_map_unlock(map);
20426 #endif
20427         }
20428         vm_map_lock_read(map);
20429
20430         page_count = map->pmap->stats.resident_count;
20431
20432         for (entry = vm_map_first_entry(map);
20433             entry != vm_map_to_entry(map);
20434             entry = entry->vme_next) {
20435                 if (!entry->is_sub_map && ((VME_OBJECT(entry) == 0) ||
20436                     (VME_OBJECT(entry)->phys_contiguous))) {
20437                         continue;
20438                 }
20439                 if (entry->is_sub_map) {
20440                         assert(!entry->use_pmap);
20441                 }
20442
20443                 pmap_remove_options(map->pmap, entry->vme_start, entry->vme_end, 0);
20444         }
20445         vm_map_unlock_read(map);
20446
20447         return page_count;
20448 }
20449
20450 kern_return_t
20451 vm_map_inject_error(vm_map_t map, vm_map_offset_t vaddr)
20452 {
20453         vm_object_t object = NULL;
20454         vm_object_offset_t offset;
20455         vm_prot_t prot;
20456         boolean_t wired;
20457         vm_map_version_t version;
20458         vm_map_t real_map;
20459         int result = KERN_FAILURE;
20460
20461         vaddr = vm_map_trunc_page(vaddr, PAGE_MASK);
20462         vm_map_lock(map);
20463
20464         result = vm_map_lookup_locked(&map, vaddr, VM_PROT_READ,
20465             OBJECT_LOCK_EXCLUSIVE, &version, &object, &offset, &prot, &wired,
20466             NULL, &real_map, NULL);
20467         if (object == NULL) {
20468                 result = KERN_MEMORY_ERROR;
20469         } else if (object->pager) {
20470                 result = vm_compressor_pager_inject_error(object->pager,
20471                     offset);
20472         } else {
20473                 result = KERN_MEMORY_PRESENT;
20474         }
20475
20476         if (object != NULL) {
20477                 vm_object_unlock(object);
20478         }
20479
20480         if (real_map != map) {
20481                 vm_map_unlock(real_map);
20482         }
20483         vm_map_unlock(map);
20484
20485         return result;
20486 }
20487
20488 #endif
20489
20490
20491 #if CONFIG_FREEZE
20492
20493
20494 extern struct freezer_context freezer_context_global;
20495 AbsoluteTime c_freezer_last_yield_ts = 0;
20496
20497 extern unsigned int memorystatus_freeze_private_shared_pages_ratio;
20498 extern unsigned int memorystatus_freeze_shared_mb_per_process_max;
20499
20500 kern_return_t
20501 vm_map_freeze(
20502         task_t       task,
20503         unsigned int *purgeable_count,
20504         unsigned int *wired_count,
20505         unsigned int *clean_count,
20506         unsigned int *dirty_count,
20507         unsigned int dirty_budget,
20508         unsigned int *shared_count,
20509         int          *freezer_error_code,
20510         boolean_t    eval_only)
20511 {
20512         vm_map_entry_t  entry2 = VM_MAP_ENTRY_NULL;
20513         kern_return_t   kr = KERN_SUCCESS;
20514         boolean_t       evaluation_phase = TRUE;
20515         vm_object_t     cur_shared_object = NULL;
20516         int             cur_shared_obj_ref_cnt = 0;
20517         unsigned int    dirty_private_count = 0, dirty_shared_count = 0, obj_pages_snapshot = 0;
20518
20519         *purgeable_count = *wired_count = *clean_count = *dirty_count = *shared_count = 0;
20520
20521         /*
20522          * We need the exclusive lock here so that we can
20523          * block any page faults or lookups while we are
20524          * in the middle of freezing this vm map.
20525          */
20526         vm_map_t map = task->map;
20527
20528         vm_map_lock(map);
20529
20530         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
20531
20532         if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20533                 if (vm_compressor_low_on_space()) {
20534                         *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20535                 }
20536
20537                 if (vm_swap_low_on_space()) {
20538                         *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20539                 }
20540
20541                 kr = KERN_NO_SPACE;
20542                 goto done;
20543         }
20544
20545         if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE == FALSE) {
20546                 /*
20547                  * In-memory compressor backing the freezer. No disk.
20548                  * So no need to do the evaluation phase.
20549                  */
20550                 evaluation_phase = FALSE;
20551
20552                 if (eval_only == TRUE) {
20553                         /*
20554                          * We don't support 'eval_only' mode
20555                          * in this non-swap config.
20556                          */
20557                         *freezer_error_code = FREEZER_ERROR_GENERIC;
20558                         kr = KERN_INVALID_ARGUMENT;
20559                         goto done;
20560                 }
20561
20562                 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20563                 clock_get_uptime(&c_freezer_last_yield_ts);
20564         }
20565 again:
20566
20567         for (entry2 = vm_map_first_entry(map);
20568             entry2 != vm_map_to_entry(map);
20569             entry2 = entry2->vme_next) {
20570                 vm_object_t     src_object = VME_OBJECT(entry2);
20571
20572                 if (src_object &&
20573                     !entry2->is_sub_map &&
20574                     !src_object->phys_contiguous) {
20575                         /* If eligible, scan the entry, moving eligible pages over to our parent object */
20576
20577                         if (src_object->internal == TRUE) {
20578                                 if (VM_CONFIG_FREEZER_SWAP_IS_ACTIVE) {
20579                                         /*
20580                                          * We skip purgeable objects during evaluation phase only.
20581                                          * If we decide to freeze this process, we'll explicitly
20582                                          * purge these objects before we go around again with
20583                                          * 'evaluation_phase' set to FALSE.
20584                                          */
20585
20586                                         if ((src_object->purgable == VM_PURGABLE_EMPTY) || (src_object->purgable == VM_PURGABLE_VOLATILE)) {
20587                                                 /*
20588                                                  * We want to purge objects that may not belong to this task but are mapped
20589                                                  * in this task alone. Since we already purged this task's purgeable memory
20590                                                  * at the end of a successful evaluation phase, we want to avoid doing no-op calls
20591                                                  * on this task's purgeable objects. Hence the check for only volatile objects.
20592                                                  */
20593                                                 if (evaluation_phase == FALSE &&
20594                                                     (src_object->purgable == VM_PURGABLE_VOLATILE) &&
20595                                                     (src_object->ref_count == 1)) {
20596                                                         vm_object_lock(src_object);
20597                                                         vm_object_purge(src_object, 0);
20598                                                         vm_object_unlock(src_object);
20599                                                 }
20600                                                 continue;
20601                                         }
20602
20603                                         /*
20604                                          * Pages belonging to this object could be swapped to disk.
20605                                          * Make sure it's not a shared object because we could end
20606                                          * up just bringing it back in again.
20607                                          *
20608                                          * We try to optimize somewhat by checking for objects that are mapped
20609                                          * more than once within our own map. But we don't do full searches,
20610                                          * we just look at the entries following our current entry.
20611                                          */
20612
20613                                         if (src_object->ref_count > 1) {
20614                                                 if (src_object != cur_shared_object) {
20615                                                         obj_pages_snapshot = (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20616                                                         dirty_shared_count += obj_pages_snapshot;
20617
20618                                                         cur_shared_object = src_object;
20619                                                         cur_shared_obj_ref_cnt = 1;
20620                                                         continue;
20621                                                 } else {
20622                                                         cur_shared_obj_ref_cnt++;
20623                                                         if (src_object->ref_count == cur_shared_obj_ref_cnt) {
20624                                                                 /*
20625                                                                  * Fall through to below and treat this object as private.
20626                                                                  * So deduct its pages from our shared total and add it to the
20627                                                                  * private total.
20628                                                                  */
20629
20630                                                                 dirty_shared_count -= obj_pages_snapshot;
20631                                                                 dirty_private_count += obj_pages_snapshot;
20632                                                         } else {
20633                                                                 continue;
20634                                                         }
20635                                                 }
20636                                         }
20637
20638
20639                                         if (src_object->ref_count == 1) {
20640                                                 dirty_private_count += (src_object->resident_page_count - src_object->wired_page_count) + vm_compressor_pager_get_count(src_object->pager);
20641                                         }
20642
20643                                         if (evaluation_phase == TRUE) {
20644                                                 continue;
20645                                         }
20646                                 }
20647
20648                                 uint32_t paged_out_count = vm_object_compressed_freezer_pageout(src_object, dirty_budget);
20649                                 *wired_count += src_object->wired_page_count;
20650
20651                                 if (vm_compressor_low_on_space() || vm_swap_low_on_space()) {
20652                                         if (vm_compressor_low_on_space()) {
20653                                                 *freezer_error_code = FREEZER_ERROR_NO_COMPRESSOR_SPACE;
20654                                         }
20655
20656                                         if (vm_swap_low_on_space()) {
20657                                                 *freezer_error_code = FREEZER_ERROR_NO_SWAP_SPACE;
20658                                         }
20659
20660                                         kr = KERN_NO_SPACE;
20661                                         break;
20662                                 }
20663                                 if (paged_out_count >= dirty_budget) {
20664                                         break;
20665                                 }
20666                                 dirty_budget -= paged_out_count;
20667                         }
20668                 }
20669         }
20670
20671         *shared_count = (unsigned int) ((dirty_shared_count * PAGE_SIZE_64) / (1024 * 1024ULL));
20672         if (evaluation_phase) {
20673                 unsigned int shared_pages_threshold = (memorystatus_freeze_shared_mb_per_process_max * 1024 * 1024ULL) / PAGE_SIZE_64;
20674
20675                 if (dirty_shared_count > shared_pages_threshold) {
20676                         *freezer_error_code = FREEZER_ERROR_EXCESS_SHARED_MEMORY;
20677                         kr = KERN_FAILURE;
20678                         goto done;
20679                 }
20680
20681                 if (dirty_shared_count &&
20682                     ((dirty_private_count / dirty_shared_count) < memorystatus_freeze_private_shared_pages_ratio)) {
20683                         *freezer_error_code = FREEZER_ERROR_LOW_PRIVATE_SHARED_RATIO;
20684                         kr = KERN_FAILURE;
20685                         goto done;
20686                 }
20687
20688                 evaluation_phase = FALSE;
20689                 dirty_shared_count = dirty_private_count = 0;
20690
20691                 freezer_context_global.freezer_ctx_uncompressed_pages = 0;
20692                 clock_get_uptime(&c_freezer_last_yield_ts);
20693
20694                 if (eval_only) {
20695                         kr = KERN_SUCCESS;
20696                         goto done;
20697                 }
20698
20699                 vm_purgeable_purge_task_owned(task);
20700
20701                 goto again;
20702         } else {
20703                 kr = KERN_SUCCESS;
20704         }
20705
20706 done:
20707         vm_map_unlock(map);
20708
20709         if ((eval_only == FALSE) && (kr == KERN_SUCCESS)) {
20710                 vm_object_compressed_freezer_done();
20711         }
20712         return kr;
20713 }
20714
20715 #endif
20716
20717 /*
20718  * vm_map_entry_should_cow_for_true_share:
20719  *
20720  * Determines if the map entry should be clipped and setup for copy-on-write
20721  * to avoid applying "true_share" to a large VM object when only a subset is
20722  * targeted.
20723  *
20724  * For now, we target only the map entries created for the Objective C
20725  * Garbage Collector, which initially have the following properties:
20726  *      - alias == VM_MEMORY_MALLOC
20727  *      - wired_count == 0
20728  *      - !needs_copy
20729  * and a VM object with:
20730  *      - internal
20731  *      - copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC
20732  *      - !true_share
20733  *      - vo_size == ANON_CHUNK_SIZE
20734  *
20735  * Only non-kernel map entries.
20736  */
20737 boolean_t
20738 vm_map_entry_should_cow_for_true_share(
20739         vm_map_entry_t  entry)
20740 {
20741         vm_object_t     object;
20742
20743         if (entry->is_sub_map) {
20744                 /* entry does not point at a VM object */
20745                 return FALSE;
20746         }
20747
20748         if (entry->needs_copy) {
20749                 /* already set for copy_on_write: done! */
20750                 return FALSE;
20751         }
20752
20753         if (VME_ALIAS(entry) != VM_MEMORY_MALLOC &&
20754             VME_ALIAS(entry) != VM_MEMORY_MALLOC_SMALL) {
20755                 /* not a malloc heap or Obj-C Garbage Collector heap */
20756                 return FALSE;
20757         }
20758
20759         if (entry->wired_count) {
20760                 /* wired: can't change the map entry... */
20761                 vm_counters.should_cow_but_wired++;
20762                 return FALSE;
20763         }
20764
20765         object = VME_OBJECT(entry);
20766
20767         if (object == VM_OBJECT_NULL) {
20768                 /* no object yet... */
20769                 return FALSE;
20770         }
20771
20772         if (!object->internal) {
20773                 /* not an internal object */
20774                 return FALSE;
20775         }
20776
20777         if (object->copy_strategy != MEMORY_OBJECT_COPY_SYMMETRIC) {
20778                 /* not the default copy strategy */
20779                 return FALSE;
20780         }
20781
20782         if (object->true_share) {
20783                 /* already true_share: too late to avoid it */
20784                 return FALSE;
20785         }
20786
20787         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC &&
20788             object->vo_size != ANON_CHUNK_SIZE) {
20789                 /* ... not an object created for the ObjC Garbage Collector */
20790                 return FALSE;
20791         }
20792
20793         if (VME_ALIAS(entry) == VM_MEMORY_MALLOC_SMALL &&
20794             object->vo_size != 2048 * 4096) {
20795                 /* ... not a "MALLOC_SMALL" heap */
20796                 return FALSE;
20797         }
20798
20799         /*
20800          * All the criteria match: we have a large object being targeted for "true_share".
20801          * To limit the adverse side-effects linked with "true_share", tell the caller to
20802          * try and avoid setting up the entire object for "true_share" by clipping the
20803          * targeted range and setting it up for copy-on-write.
20804          */
20805         return TRUE;
20806 }
20807
20808 vm_map_offset_t
20809 vm_map_round_page_mask(
20810         vm_map_offset_t offset,
20811         vm_map_offset_t mask)
20812 {
20813         return VM_MAP_ROUND_PAGE(offset, mask);
20814 }
20815
20816 vm_map_offset_t
20817 vm_map_trunc_page_mask(
20818         vm_map_offset_t offset,
20819         vm_map_offset_t mask)
20820 {
20821         return VM_MAP_TRUNC_PAGE(offset, mask);
20822 }
20823
20824 boolean_t
20825 vm_map_page_aligned(
20826         vm_map_offset_t offset,
20827         vm_map_offset_t mask)
20828 {
20829         return ((offset) & mask) == 0;
20830 }
20831
20832 int
20833 vm_map_page_shift(
20834         vm_map_t map)
20835 {
20836         return VM_MAP_PAGE_SHIFT(map);
20837 }
20838
20839 int
20840 vm_map_page_size(
20841         vm_map_t map)
20842 {
20843         return VM_MAP_PAGE_SIZE(map);
20844 }
20845
20846 vm_map_offset_t
20847 vm_map_page_mask(
20848         vm_map_t map)
20849 {
20850         return VM_MAP_PAGE_MASK(map);
20851 }
20852
20853 kern_return_t
20854 vm_map_set_page_shift(
20855         vm_map_t        map,
20856         int             pageshift)
20857 {
20858         if (map->hdr.nentries != 0) {
20859                 /* too late to change page size */
20860                 return KERN_FAILURE;
20861         }
20862
20863         map->hdr.page_shift = pageshift;
20864
20865         return KERN_SUCCESS;
20866 }
20867
20868 kern_return_t
20869 vm_map_query_volatile(
20870         vm_map_t        map,
20871         mach_vm_size_t  *volatile_virtual_size_p,
20872         mach_vm_size_t  *volatile_resident_size_p,
20873         mach_vm_size_t  *volatile_compressed_size_p,
20874         mach_vm_size_t  *volatile_pmap_size_p,
20875         mach_vm_size_t  *volatile_compressed_pmap_size_p)
20876 {
20877         mach_vm_size_t  volatile_virtual_size;
20878         mach_vm_size_t  volatile_resident_count;
20879         mach_vm_size_t  volatile_compressed_count;
20880         mach_vm_size_t  volatile_pmap_count;
20881         mach_vm_size_t  volatile_compressed_pmap_count;
20882         mach_vm_size_t  resident_count;
20883         vm_map_entry_t  entry;
20884         vm_object_t     object;
20885
20886         /* map should be locked by caller */
20887
20888         volatile_virtual_size = 0;
20889         volatile_resident_count = 0;
20890         volatile_compressed_count = 0;
20891         volatile_pmap_count = 0;
20892         volatile_compressed_pmap_count = 0;
20893
20894         for (entry = vm_map_first_entry(map);
20895             entry != vm_map_to_entry(map);
20896             entry = entry->vme_next) {
20897                 mach_vm_size_t  pmap_resident_bytes, pmap_compressed_bytes;
20898
20899                 if (entry->is_sub_map) {
20900                         continue;
20901                 }
20902                 if (!(entry->protection & VM_PROT_WRITE)) {
20903                         continue;
20904                 }
20905                 object = VME_OBJECT(entry);
20906                 if (object == VM_OBJECT_NULL) {
20907                         continue;
20908                 }
20909                 if (object->purgable != VM_PURGABLE_VOLATILE &&
20910                     object->purgable != VM_PURGABLE_EMPTY) {
20911                         continue;
20912                 }
20913                 if (VME_OFFSET(entry)) {
20914                         /*
20915                          * If the map entry has been split and the object now
20916                          * appears several times in the VM map, we don't want
20917                          * to count the object's resident_page_count more than
20918                          * once.  We count it only for the first one, starting
20919                          * at offset 0 and ignore the other VM map entries.
20920                          */
20921                         continue;
20922                 }
20923                 resident_count = object->resident_page_count;
20924                 if ((VME_OFFSET(entry) / PAGE_SIZE) >= resident_count) {
20925                         resident_count = 0;
20926                 } else {
20927                         resident_count -= (VME_OFFSET(entry) / PAGE_SIZE);
20928                 }
20929
20930                 volatile_virtual_size += entry->vme_end - entry->vme_start;
20931                 volatile_resident_count += resident_count;
20932                 if (object->pager) {
20933                         volatile_compressed_count +=
20934                             vm_compressor_pager_get_count(object->pager);
20935                 }
20936                 pmap_compressed_bytes = 0;
20937                 pmap_resident_bytes =
20938                     pmap_query_resident(map->pmap,
20939                     entry->vme_start,
20940                     entry->vme_end,
20941                     &pmap_compressed_bytes);
20942                 volatile_pmap_count += (pmap_resident_bytes / PAGE_SIZE);
20943                 volatile_compressed_pmap_count += (pmap_compressed_bytes
20944                     / PAGE_SIZE);
20945         }
20946
20947         /* map is still locked on return */
20948
20949         *volatile_virtual_size_p = volatile_virtual_size;
20950         *volatile_resident_size_p = volatile_resident_count * PAGE_SIZE;
20951         *volatile_compressed_size_p = volatile_compressed_count * PAGE_SIZE;
20952         *volatile_pmap_size_p = volatile_pmap_count * PAGE_SIZE;
20953         *volatile_compressed_pmap_size_p = volatile_compressed_pmap_count * PAGE_SIZE;
20954
20955         return KERN_SUCCESS;
20956 }
20957
20958 void
20959 vm_map_sizes(vm_map_t map,
20960     vm_map_size_t * psize,
20961     vm_map_size_t * pfree,
20962     vm_map_size_t * plargest_free)
20963 {
20964         vm_map_entry_t  entry;
20965         vm_map_offset_t prev;
20966         vm_map_size_t   free, total_free, largest_free;
20967         boolean_t       end;
20968
20969         if (!map) {
20970                 *psize = *pfree = *plargest_free = 0;
20971                 return;
20972         }
20973         total_free = largest_free = 0;
20974
20975         vm_map_lock_read(map);
20976         if (psize) {
20977                 *psize = map->max_offset - map->min_offset;
20978         }
20979
20980         prev = map->min_offset;
20981         for (entry = vm_map_first_entry(map);; entry = entry->vme_next) {
20982                 end = (entry == vm_map_to_entry(map));
20983
20984                 if (end) {
20985                         free = entry->vme_end   - prev;
20986                 } else {
20987                         free = entry->vme_start - prev;
20988                 }
20989
20990                 total_free += free;
20991                 if (free > largest_free) {
20992                         largest_free = free;
20993                 }
20994
20995                 if (end) {
20996                         break;
20997                 }
20998                 prev = entry->vme_end;
20999         }
21000         vm_map_unlock_read(map);
21001         if (pfree) {
21002                 *pfree = total_free;
21003         }
21004         if (plargest_free) {
21005                 *plargest_free = largest_free;
21006         }
21007 }
21008
21009 #if VM_SCAN_FOR_SHADOW_CHAIN
21010 int vm_map_shadow_max(vm_map_t map);
21011 int
21012 vm_map_shadow_max(
21013         vm_map_t map)
21014 {
21015         int             shadows, shadows_max;
21016         vm_map_entry_t  entry;
21017         vm_object_t     object, next_object;
21018
21019         if (map == NULL) {
21020                 return 0;
21021         }
21022
21023         shadows_max = 0;
21024
21025         vm_map_lock_read(map);
21026
21027         for (entry = vm_map_first_entry(map);
21028             entry != vm_map_to_entry(map);
21029             entry = entry->vme_next) {
21030                 if (entry->is_sub_map) {
21031                         continue;
21032                 }
21033                 object = VME_OBJECT(entry);
21034                 if (object == NULL) {
21035                         continue;
21036                 }
21037                 vm_object_lock_shared(object);
21038                 for (shadows = 0;
21039                     object->shadow != NULL;
21040                     shadows++, object = next_object) {
21041                         next_object = object->shadow;
21042                         vm_object_lock_shared(next_object);
21043                         vm_object_unlock(object);
21044                 }
21045                 vm_object_unlock(object);
21046                 if (shadows > shadows_max) {
21047                         shadows_max = shadows;
21048                 }
21049         }
21050
21051         vm_map_unlock_read(map);
21052
21053         return shadows_max;
21054 }
21055 #endif /* VM_SCAN_FOR_SHADOW_CHAIN */
21056
21057 void
21058 vm_commit_pagezero_status(vm_map_t lmap)
21059 {
21060         pmap_advise_pagezero_range(lmap->pmap, lmap->min_offset);
21061 }
21062
21063 #if XNU_TARGET_OS_OSX
21064 void
21065 vm_map_set_high_start(
21066         vm_map_t        map,
21067         vm_map_offset_t high_start)
21068 {
21069         map->vmmap_high_start = high_start;
21070 }
21071 #endif /* XNU_TARGET_OS_OSX */
21072
21073 #if PMAP_CS
21074 kern_return_t
21075 vm_map_entry_cs_associate(
21076         vm_map_t                map,
21077         vm_map_entry_t          entry,
21078         vm_map_kernel_flags_t   vmk_flags)
21079 {
21080         vm_object_t cs_object, cs_shadow;
21081         vm_object_offset_t cs_offset;
21082         void *cs_blobs;
21083         struct vnode *cs_vnode;
21084         kern_return_t cs_ret;
21085
21086         if (map->pmap == NULL ||
21087             entry->is_sub_map || /* XXX FBDP: recurse on sub-range? */
21088             pmap_cs_exempt(map->pmap) ||
21089             VME_OBJECT(entry) == VM_OBJECT_NULL ||
21090             !(entry->protection & VM_PROT_EXECUTE)) {
21091                 return KERN_SUCCESS;
21092         }
21093
21094         vm_map_lock_assert_exclusive(map);
21095
21096         if (entry->used_for_jit) {
21097                 cs_ret = pmap_cs_associate(map->pmap,
21098                     PMAP_CS_ASSOCIATE_JIT,
21099                     entry->vme_start,
21100                     entry->vme_end - entry->vme_start,
21101                     0);
21102                 goto done;
21103         }
21104
21105         if (vmk_flags.vmkf_remap_prot_copy) {
21106                 cs_ret = pmap_cs_associate(map->pmap,
21107                     PMAP_CS_ASSOCIATE_COW,
21108                     entry->vme_start,
21109                     entry->vme_end - entry->vme_start,
21110                     0);
21111                 goto done;
21112         }
21113
21114         vm_object_lock_shared(VME_OBJECT(entry));
21115         cs_offset = VME_OFFSET(entry);
21116         for (cs_object = VME_OBJECT(entry);
21117             (cs_object != VM_OBJECT_NULL &&
21118             !cs_object->code_signed);
21119             cs_object = cs_shadow) {
21120                 cs_shadow = cs_object->shadow;
21121                 if (cs_shadow != VM_OBJECT_NULL) {
21122                         cs_offset += cs_object->vo_shadow_offset;
21123                         vm_object_lock_shared(cs_shadow);
21124                 }
21125                 vm_object_unlock(cs_object);
21126         }
21127         if (cs_object == VM_OBJECT_NULL) {
21128                 return KERN_SUCCESS;
21129         }
21130
21131         cs_offset += cs_object->paging_offset;
21132         cs_vnode = vnode_pager_lookup_vnode(cs_object->pager);
21133         cs_ret = vnode_pager_get_cs_blobs(cs_vnode,
21134             &cs_blobs);
21135         assert(cs_ret == KERN_SUCCESS);
21136         cs_ret = cs_associate_blob_with_mapping(map->pmap,
21137             entry->vme_start,
21138             (entry->vme_end -
21139             entry->vme_start),
21140             cs_offset,
21141             cs_blobs);
21142         vm_object_unlock(cs_object);
21143         cs_object = VM_OBJECT_NULL;
21144
21145 done:
21146         if (cs_ret == KERN_SUCCESS) {
21147                 DTRACE_VM2(vm_map_entry_cs_associate_success,
21148                     vm_map_offset_t, entry->vme_start,
21149                     vm_map_offset_t, entry->vme_end);
21150                 if (vm_map_executable_immutable) {
21151                         /*
21152                          * Prevent this executable
21153                          * mapping from being unmapped
21154                          * or modified.
21155                          */
21156                         entry->permanent = TRUE;
21157                 }
21158                 /*
21159                  * pmap says it will validate the
21160                  * code-signing validity of pages
21161                  * faulted in via this mapping, so
21162                  * this map entry should be marked so
21163                  * that vm_fault() bypasses code-signing
21164                  * validation for faults coming through
21165                  * this mapping.
21166                  */
21167                 entry->pmap_cs_associated = TRUE;
21168         } else if (cs_ret == KERN_NOT_SUPPORTED) {
21169                 /*
21170                  * pmap won't check the code-signing
21171                  * validity of pages faulted in via
21172                  * this mapping, so VM should keep
21173                  * doing it.
21174                  */
21175                 DTRACE_VM3(vm_map_entry_cs_associate_off,
21176                     vm_map_offset_t, entry->vme_start,
21177                     vm_map_offset_t, entry->vme_end,
21178                     int, cs_ret);
21179         } else {
21180                 /*
21181                  * A real error: do not allow
21182                  * execution in this mapping.
21183                  */
21184                 DTRACE_VM3(vm_map_entry_cs_associate_failure,
21185                     vm_map_offset_t, entry->vme_start,
21186                     vm_map_offset_t, entry->vme_end,
21187                     int, cs_ret);
21188                 entry->protection &= ~VM_PROT_EXECUTE;
21189                 entry->max_protection &= ~VM_PROT_EXECUTE;
21190         }
21191
21192         return cs_ret;
21193 }
21194 #endif /* PMAP_CS */
21195
21196 /*
21197  * FORKED CORPSE FOOTPRINT
21198  *
21199  * A forked corpse gets a copy of the original VM map but its pmap is mostly
21200  * empty since it never ran and never got to fault in any pages.
21201  * Collecting footprint info (via "sysctl vm.self_region_footprint") for
21202  * a forked corpse would therefore return very little information.
21203  *
21204  * When forking a corpse, we can pass the VM_MAP_FORK_CORPSE_FOOTPRINT option
21205  * to vm_map_fork() to collect footprint information from the original VM map
21206  * and its pmap, and store it in the forked corpse's VM map.  That information
21207  * is stored in place of the VM map's "hole list" since we'll never need to
21208  * lookup for holes in the corpse's map.
21209  *
21210  * The corpse's footprint info looks like this:
21211  *
21212  * vm_map->vmmap_corpse_footprint points to pageable kernel memory laid out
21213  * as follows:
21214  *                     +---------------------------------------+
21215  *            header-> | cf_size                               |
21216  *                     +-------------------+-------------------+
21217  *                     | cf_last_region    | cf_last_zeroes    |
21218  *                     +-------------------+-------------------+
21219  *           region1-> | cfr_vaddr                             |
21220  *                     +-------------------+-------------------+
21221  *                     | cfr_num_pages     | d0 | d1 | d2 | d3 |
21222  *                     +---------------------------------------+
21223  *                     | d4 | d5 | ...                         |
21224  *                     +---------------------------------------+
21225  *                     | ...                                   |
21226  *                     +-------------------+-------------------+
21227  *                     | dy | dz | na | na | cfr_vaddr...      | <-region2
21228  *                     +-------------------+-------------------+
21229  *                     | cfr_vaddr (ctd)   | cfr_num_pages     |
21230  *                     +---------------------------------------+
21231  *                     | d0 | d1 ...                           |
21232  *                     +---------------------------------------+
21233  *                       ...
21234  *                     +---------------------------------------+
21235  *       last region-> | cfr_vaddr                             |
21236  *                     +---------------------------------------+
21237  *                     + cfr_num_pages     | d0 | d1 | d2 | d3 |
21238  *                     +---------------------------------------+
21239  *                       ...
21240  *                     +---------------------------------------+
21241  *                     | dx | dy | dz | na | na | na | na | na |
21242  *                     +---------------------------------------+
21243  *
21244  * where:
21245  *      cf_size:        total size of the buffer (rounded to page size)
21246  *      cf_last_region: offset in the buffer of the last "region" sub-header
21247  *      cf_last_zeroes: number of trailing "zero" dispositions at the end
21248  *                      of last region
21249  *      cfr_vaddr:      virtual address of the start of the covered "region"
21250  *      cfr_num_pages:  number of pages in the covered "region"
21251  *      d*:             disposition of the page at that virtual address
21252  * Regions in the buffer are word-aligned.
21253  *
21254  * We estimate the size of the buffer based on the number of memory regions
21255  * and the virtual size of the address space.  While copying each memory region
21256  * during vm_map_fork(), we also collect the footprint info for that region
21257  * and store it in the buffer, packing it as much as possible (coalescing
21258  * contiguous memory regions to avoid having too many region headers and
21259  * avoiding long streaks of "zero" page dispositions by splitting footprint
21260  * "regions", so the number of regions in the footprint buffer might not match
21261  * the number of memory regions in the address space.
21262  *
21263  * We also have to copy the original task's "nonvolatile" ledgers since that's
21264  * part of the footprint and will need to be reported to any tool asking for
21265  * the footprint information of the forked corpse.
21266  */
21267
21268 uint64_t vm_map_corpse_footprint_count = 0;
21269 uint64_t vm_map_corpse_footprint_size_avg = 0;
21270 uint64_t vm_map_corpse_footprint_size_max = 0;
21271 uint64_t vm_map_corpse_footprint_full = 0;
21272 uint64_t vm_map_corpse_footprint_no_buf = 0;
21273
21274 struct vm_map_corpse_footprint_header {
21275         vm_size_t       cf_size;        /* allocated buffer size */
21276         uint32_t        cf_last_region; /* offset of last region in buffer */
21277         union {
21278                 uint32_t cfu_last_zeroes; /* during creation:
21279                                            * number of "zero" dispositions at
21280                                            * end of last region */
21281                 uint32_t cfu_hint_region; /* during lookup:
21282                                            * offset of last looked up region */
21283 #define cf_last_zeroes cfu.cfu_last_zeroes
21284 #define cf_hint_region cfu.cfu_hint_region
21285         } cfu;
21286 };
21287 typedef uint8_t cf_disp_t;
21288 struct vm_map_corpse_footprint_region {
21289         vm_map_offset_t cfr_vaddr;      /* region start virtual address */
21290         uint32_t        cfr_num_pages;  /* number of pages in this "region" */
21291         cf_disp_t   cfr_disposition[0]; /* disposition of each page */
21292 } __attribute__((packed));
21293
21294 static cf_disp_t
21295 vm_page_disposition_to_cf_disp(
21296         int disposition)
21297 {
21298         assert(sizeof(cf_disp_t) == 1);
21299         /* relocate bits that don't fit in a "uint8_t" */
21300         if (disposition & VM_PAGE_QUERY_PAGE_REUSABLE) {
21301                 disposition |= VM_PAGE_QUERY_PAGE_FICTITIOUS;
21302         }
21303         /* cast gets rid of extra bits */
21304         return (cf_disp_t) disposition;
21305 }
21306
21307 static int
21308 vm_page_cf_disp_to_disposition(
21309         cf_disp_t cf_disp)
21310 {
21311         int disposition;
21312
21313         assert(sizeof(cf_disp_t) == 1);
21314         disposition = (int) cf_disp;
21315         /* move relocated bits back in place */
21316         if (cf_disp & VM_PAGE_QUERY_PAGE_FICTITIOUS) {
21317                 disposition |= VM_PAGE_QUERY_PAGE_REUSABLE;
21318                 disposition &= ~VM_PAGE_QUERY_PAGE_FICTITIOUS;
21319         }
21320         return disposition;
21321 }
21322
21323 /*
21324  * vm_map_corpse_footprint_new_region:
21325  *      closes the current footprint "region" and creates a new one
21326  *
21327  * Returns NULL if there's not enough space in the buffer for a new region.
21328  */
21329 static struct vm_map_corpse_footprint_region *
21330 vm_map_corpse_footprint_new_region(
21331         struct vm_map_corpse_footprint_header *footprint_header)
21332 {
21333         uintptr_t       footprint_edge;
21334         uint32_t        new_region_offset;
21335         struct vm_map_corpse_footprint_region *footprint_region;
21336         struct vm_map_corpse_footprint_region *new_footprint_region;
21337
21338         footprint_edge = ((uintptr_t)footprint_header +
21339             footprint_header->cf_size);
21340         footprint_region = ((struct vm_map_corpse_footprint_region *)
21341             ((char *)footprint_header +
21342             footprint_header->cf_last_region));
21343         assert((uintptr_t)footprint_region + sizeof(*footprint_region) <=
21344             footprint_edge);
21345
21346         /* get rid of trailing zeroes in the last region */
21347         assert(footprint_region->cfr_num_pages >=
21348             footprint_header->cf_last_zeroes);
21349         footprint_region->cfr_num_pages -=
21350             footprint_header->cf_last_zeroes;
21351         footprint_header->cf_last_zeroes = 0;
21352
21353         /* reuse this region if it's now empty */
21354         if (footprint_region->cfr_num_pages == 0) {
21355                 return footprint_region;
21356         }
21357
21358         /* compute offset of new region */
21359         new_region_offset = footprint_header->cf_last_region;
21360         new_region_offset += sizeof(*footprint_region);
21361         new_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21362         new_region_offset = roundup(new_region_offset, sizeof(int));
21363
21364         /* check if we're going over the edge */
21365         if (((uintptr_t)footprint_header +
21366             new_region_offset +
21367             sizeof(*footprint_region)) >=
21368             footprint_edge) {
21369                 /* over the edge: no new region */
21370                 return NULL;
21371         }
21372
21373         /* adjust offset of last region in header */
21374         footprint_header->cf_last_region = new_region_offset;
21375
21376         new_footprint_region = (struct vm_map_corpse_footprint_region *)
21377             ((char *)footprint_header +
21378             footprint_header->cf_last_region);
21379         new_footprint_region->cfr_vaddr = 0;
21380         new_footprint_region->cfr_num_pages = 0;
21381         /* caller needs to initialize new region */
21382
21383         return new_footprint_region;
21384 }
21385
21386 /*
21387  * vm_map_corpse_footprint_collect:
21388  *      collect footprint information for "old_entry" in "old_map" and
21389  *      stores it in "new_map"'s vmmap_footprint_info.
21390  */
21391 kern_return_t
21392 vm_map_corpse_footprint_collect(
21393         vm_map_t        old_map,
21394         vm_map_entry_t  old_entry,
21395         vm_map_t        new_map)
21396 {
21397         vm_map_offset_t va;
21398         kern_return_t   kr;
21399         struct vm_map_corpse_footprint_header *footprint_header;
21400         struct vm_map_corpse_footprint_region *footprint_region;
21401         struct vm_map_corpse_footprint_region *new_footprint_region;
21402         cf_disp_t       *next_disp_p;
21403         uintptr_t       footprint_edge;
21404         uint32_t        num_pages_tmp;
21405         int             effective_page_size;
21406
21407         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(old_map));
21408
21409         va = old_entry->vme_start;
21410
21411         vm_map_lock_assert_exclusive(old_map);
21412         vm_map_lock_assert_exclusive(new_map);
21413
21414         assert(new_map->has_corpse_footprint);
21415         assert(!old_map->has_corpse_footprint);
21416         if (!new_map->has_corpse_footprint ||
21417             old_map->has_corpse_footprint) {
21418                 /*
21419                  * This can only transfer footprint info from a
21420                  * map with a live pmap to a map with a corpse footprint.
21421                  */
21422                 return KERN_NOT_SUPPORTED;
21423         }
21424
21425         if (new_map->vmmap_corpse_footprint == NULL) {
21426                 vm_offset_t     buf;
21427                 vm_size_t       buf_size;
21428
21429                 buf = 0;
21430                 buf_size = (sizeof(*footprint_header) +
21431                     (old_map->hdr.nentries
21432                     *
21433                     (sizeof(*footprint_region) +
21434                     +3))            /* potential alignment for each region */
21435                     +
21436                     ((old_map->size / effective_page_size)
21437                     *
21438                     sizeof(cf_disp_t)));      /* disposition for each page */
21439 //              printf("FBDP corpse map %p guestimate footprint size 0x%llx\n", new_map, (uint64_t) buf_size);
21440                 buf_size = round_page(buf_size);
21441
21442                 /* limit buffer to 1 page to validate overflow detection */
21443 //              buf_size = PAGE_SIZE;
21444
21445                 /* limit size to a somewhat sane amount */
21446 #if XNU_TARGET_OS_OSX
21447 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (8*1024*1024)   /* 8MB */
21448 #else /* XNU_TARGET_OS_OSX */
21449 #define VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE   (256*1024)      /* 256KB */
21450 #endif /* XNU_TARGET_OS_OSX */
21451                 if (buf_size > VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE) {
21452                         buf_size = VM_MAP_CORPSE_FOOTPRINT_INFO_MAX_SIZE;
21453                 }
21454
21455                 /*
21456                  * Allocate the pageable buffer (with a trailing guard page).
21457                  * It will be zero-filled on demand.
21458                  */
21459                 kr = kernel_memory_allocate(kernel_map,
21460                     &buf,
21461                     (buf_size
21462                     + PAGE_SIZE),                          /* trailing guard page */
21463                     0,                         /* mask */
21464                     KMA_PAGEABLE | KMA_GUARD_LAST,
21465                     VM_KERN_MEMORY_DIAG);
21466                 if (kr != KERN_SUCCESS) {
21467                         vm_map_corpse_footprint_no_buf++;
21468                         return kr;
21469                 }
21470
21471                 /* initialize header and 1st region */
21472                 footprint_header = (struct vm_map_corpse_footprint_header *)buf;
21473                 new_map->vmmap_corpse_footprint = footprint_header;
21474
21475                 footprint_header->cf_size = buf_size;
21476                 footprint_header->cf_last_region =
21477                     sizeof(*footprint_header);
21478                 footprint_header->cf_last_zeroes = 0;
21479
21480                 footprint_region = (struct vm_map_corpse_footprint_region *)
21481                     ((char *)footprint_header +
21482                     footprint_header->cf_last_region);
21483                 footprint_region->cfr_vaddr = 0;
21484                 footprint_region->cfr_num_pages = 0;
21485         } else {
21486                 /* retrieve header and last region */
21487                 footprint_header = (struct vm_map_corpse_footprint_header *)
21488                     new_map->vmmap_corpse_footprint;
21489                 footprint_region = (struct vm_map_corpse_footprint_region *)
21490                     ((char *)footprint_header +
21491                     footprint_header->cf_last_region);
21492         }
21493         footprint_edge = ((uintptr_t)footprint_header +
21494             footprint_header->cf_size);
21495
21496         if ((footprint_region->cfr_vaddr +
21497             (((vm_map_offset_t)footprint_region->cfr_num_pages) *
21498             effective_page_size))
21499             != old_entry->vme_start) {
21500                 uint64_t num_pages_delta, num_pages_delta_size;
21501                 uint32_t region_offset_delta_size;
21502
21503                 /*
21504                  * Not the next contiguous virtual address:
21505                  * start a new region or store "zero" dispositions for
21506                  * the missing pages?
21507                  */
21508                 /* size of gap in actual page dispositions */
21509                 num_pages_delta = ((old_entry->vme_start -
21510                     footprint_region->cfr_vaddr) / effective_page_size)
21511                     - footprint_region->cfr_num_pages;
21512                 num_pages_delta_size = num_pages_delta * sizeof(cf_disp_t);
21513                 /* size of gap as a new footprint region header */
21514                 region_offset_delta_size =
21515                     (sizeof(*footprint_region) +
21516                     roundup(((footprint_region->cfr_num_pages -
21517                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)),
21518                     sizeof(int)) -
21519                     ((footprint_region->cfr_num_pages -
21520                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)));
21521 //              printf("FBDP %s:%d region 0x%x 0x%llx 0x%x vme_start 0x%llx pages_delta 0x%llx region_delta 0x%x\n", __FUNCTION__, __LINE__, footprint_header->cf_last_region, footprint_region->cfr_vaddr, footprint_region->cfr_num_pages, old_entry->vme_start, num_pages_delta, region_offset_delta);
21522                 if (region_offset_delta_size < num_pages_delta_size ||
21523                     os_add3_overflow(footprint_region->cfr_num_pages,
21524                     (uint32_t) num_pages_delta,
21525                     1,
21526                     &num_pages_tmp)) {
21527                         /*
21528                          * Storing data for this gap would take more space
21529                          * than inserting a new footprint region header:
21530                          * let's start a new region and save space. If it's a
21531                          * tie, let's avoid using a new region, since that
21532                          * would require more region hops to find the right
21533                          * range during lookups.
21534                          *
21535                          * If the current region's cfr_num_pages would overflow
21536                          * if we added "zero" page dispositions for the gap,
21537                          * no choice but to start a new region.
21538                          */
21539 //                      printf("FBDP %s:%d new region\n", __FUNCTION__, __LINE__);
21540                         new_footprint_region =
21541                             vm_map_corpse_footprint_new_region(footprint_header);
21542                         /* check that we're not going over the edge */
21543                         if (new_footprint_region == NULL) {
21544                                 goto over_the_edge;
21545                         }
21546                         footprint_region = new_footprint_region;
21547                         /* initialize new region as empty */
21548                         footprint_region->cfr_vaddr = old_entry->vme_start;
21549                         footprint_region->cfr_num_pages = 0;
21550                 } else {
21551                         /*
21552                          * Store "zero" page dispositions for the missing
21553                          * pages.
21554                          */
21555 //                      printf("FBDP %s:%d zero gap\n", __FUNCTION__, __LINE__);
21556                         for (; num_pages_delta > 0; num_pages_delta--) {
21557                                 next_disp_p = (cf_disp_t *)
21558                                     ((uintptr_t) footprint_region +
21559                                     sizeof(*footprint_region));
21560                                 next_disp_p += footprint_region->cfr_num_pages;
21561                                 /* check that we're not going over the edge */
21562                                 if ((uintptr_t)next_disp_p >= footprint_edge) {
21563                                         goto over_the_edge;
21564                                 }
21565                                 /* store "zero" disposition for this gap page */
21566                                 footprint_region->cfr_num_pages++;
21567                                 *next_disp_p = (cf_disp_t) 0;
21568                                 footprint_header->cf_last_zeroes++;
21569                         }
21570                 }
21571         }
21572
21573         for (va = old_entry->vme_start;
21574             va < old_entry->vme_end;
21575             va += effective_page_size) {
21576                 int             disposition;
21577                 cf_disp_t       cf_disp;
21578
21579                 vm_map_footprint_query_page_info(old_map,
21580                     old_entry,
21581                     va,
21582                     &disposition);
21583                 cf_disp = vm_page_disposition_to_cf_disp(disposition);
21584
21585 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP collect map %p va 0x%llx disp 0x%x\n", new_map, va, disp);
21586
21587                 if (cf_disp == 0 && footprint_region->cfr_num_pages == 0) {
21588                         /*
21589                          * Ignore "zero" dispositions at start of
21590                          * region: just move start of region.
21591                          */
21592                         footprint_region->cfr_vaddr += effective_page_size;
21593                         continue;
21594                 }
21595
21596                 /* would region's cfr_num_pages overflow? */
21597                 if (os_add_overflow(footprint_region->cfr_num_pages, 1,
21598                     &num_pages_tmp)) {
21599                         /* overflow: create a new region */
21600                         new_footprint_region =
21601                             vm_map_corpse_footprint_new_region(
21602                                 footprint_header);
21603                         if (new_footprint_region == NULL) {
21604                                 goto over_the_edge;
21605                         }
21606                         footprint_region = new_footprint_region;
21607                         footprint_region->cfr_vaddr = va;
21608                         footprint_region->cfr_num_pages = 0;
21609                 }
21610
21611                 next_disp_p = (cf_disp_t *) ((uintptr_t) footprint_region +
21612                     sizeof(*footprint_region));
21613                 next_disp_p += footprint_region->cfr_num_pages;
21614                 /* check that we're not going over the edge */
21615                 if ((uintptr_t)next_disp_p >= footprint_edge) {
21616                         goto over_the_edge;
21617                 }
21618                 /* store this dispostion */
21619                 *next_disp_p = cf_disp;
21620                 footprint_region->cfr_num_pages++;
21621
21622                 if (cf_disp != 0) {
21623                         /* non-zero disp: break the current zero streak */
21624                         footprint_header->cf_last_zeroes = 0;
21625                         /* done */
21626                         continue;
21627                 }
21628
21629                 /* zero disp: add to the current streak of zeroes */
21630                 footprint_header->cf_last_zeroes++;
21631                 if ((footprint_header->cf_last_zeroes +
21632                     roundup(((footprint_region->cfr_num_pages -
21633                     footprint_header->cf_last_zeroes) * sizeof(cf_disp_t)) &
21634                     (sizeof(int) - 1),
21635                     sizeof(int))) <
21636                     (sizeof(*footprint_header))) {
21637                         /*
21638                          * There are not enough trailing "zero" dispositions
21639                          * (+ the extra padding we would need for the previous
21640                          * region); creating a new region would not save space
21641                          * at this point, so let's keep this "zero" disposition
21642                          * in this region and reconsider later.
21643                          */
21644                         continue;
21645                 }
21646                 /*
21647                  * Create a new region to avoid having too many consecutive
21648                  * "zero" dispositions.
21649                  */
21650                 new_footprint_region =
21651                     vm_map_corpse_footprint_new_region(footprint_header);
21652                 if (new_footprint_region == NULL) {
21653                         goto over_the_edge;
21654                 }
21655                 footprint_region = new_footprint_region;
21656                 /* initialize the new region as empty ... */
21657                 footprint_region->cfr_num_pages = 0;
21658                 /* ... and skip this "zero" disp */
21659                 footprint_region->cfr_vaddr = va + effective_page_size;
21660         }
21661
21662         return KERN_SUCCESS;
21663
21664 over_the_edge:
21665 //      printf("FBDP map %p footprint was full for va 0x%llx\n", new_map, va);
21666         vm_map_corpse_footprint_full++;
21667         return KERN_RESOURCE_SHORTAGE;
21668 }
21669
21670 /*
21671  * vm_map_corpse_footprint_collect_done:
21672  *      completes the footprint collection by getting rid of any remaining
21673  *      trailing "zero" dispositions and trimming the unused part of the
21674  *      kernel buffer
21675  */
21676 void
21677 vm_map_corpse_footprint_collect_done(
21678         vm_map_t        new_map)
21679 {
21680         struct vm_map_corpse_footprint_header *footprint_header;
21681         struct vm_map_corpse_footprint_region *footprint_region;
21682         vm_size_t       buf_size, actual_size;
21683         kern_return_t   kr;
21684
21685         assert(new_map->has_corpse_footprint);
21686         if (!new_map->has_corpse_footprint ||
21687             new_map->vmmap_corpse_footprint == NULL) {
21688                 return;
21689         }
21690
21691         footprint_header = (struct vm_map_corpse_footprint_header *)
21692             new_map->vmmap_corpse_footprint;
21693         buf_size = footprint_header->cf_size;
21694
21695         footprint_region = (struct vm_map_corpse_footprint_region *)
21696             ((char *)footprint_header +
21697             footprint_header->cf_last_region);
21698
21699         /* get rid of trailing zeroes in last region */
21700         assert(footprint_region->cfr_num_pages >= footprint_header->cf_last_zeroes);
21701         footprint_region->cfr_num_pages -= footprint_header->cf_last_zeroes;
21702         footprint_header->cf_last_zeroes = 0;
21703
21704         actual_size = (vm_size_t)(footprint_header->cf_last_region +
21705             sizeof(*footprint_region) +
21706             (footprint_region->cfr_num_pages * sizeof(cf_disp_t)));
21707
21708 //      printf("FBDP map %p buf_size 0x%llx actual_size 0x%llx\n", new_map, (uint64_t) buf_size, (uint64_t) actual_size);
21709         vm_map_corpse_footprint_size_avg =
21710             (((vm_map_corpse_footprint_size_avg *
21711             vm_map_corpse_footprint_count) +
21712             actual_size) /
21713             (vm_map_corpse_footprint_count + 1));
21714         vm_map_corpse_footprint_count++;
21715         if (actual_size > vm_map_corpse_footprint_size_max) {
21716                 vm_map_corpse_footprint_size_max = actual_size;
21717         }
21718
21719         actual_size = round_page(actual_size);
21720         if (buf_size > actual_size) {
21721                 kr = vm_deallocate(kernel_map,
21722                     ((vm_address_t)footprint_header +
21723                     actual_size +
21724                     PAGE_SIZE),                 /* trailing guard page */
21725                     (buf_size - actual_size));
21726                 assertf(kr == KERN_SUCCESS,
21727                     "trim: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21728                     footprint_header,
21729                     (uint64_t) buf_size,
21730                     (uint64_t) actual_size,
21731                     kr);
21732                 kr = vm_protect(kernel_map,
21733                     ((vm_address_t)footprint_header +
21734                     actual_size),
21735                     PAGE_SIZE,
21736                     FALSE,             /* set_maximum */
21737                     VM_PROT_NONE);
21738                 assertf(kr == KERN_SUCCESS,
21739                     "guard: footprint_header %p buf_size 0x%llx actual_size 0x%llx kr=0x%x\n",
21740                     footprint_header,
21741                     (uint64_t) buf_size,
21742                     (uint64_t) actual_size,
21743                     kr);
21744         }
21745
21746         footprint_header->cf_size = actual_size;
21747 }
21748
21749 /*
21750  * vm_map_corpse_footprint_query_page_info:
21751  *      retrieves the disposition of the page at virtual address "vaddr"
21752  *      in the forked corpse's VM map
21753  *
21754  * This is the equivalent of vm_map_footprint_query_page_info() for a forked corpse.
21755  */
21756 kern_return_t
21757 vm_map_corpse_footprint_query_page_info(
21758         vm_map_t        map,
21759         vm_map_offset_t va,
21760         int             *disposition_p)
21761 {
21762         struct vm_map_corpse_footprint_header *footprint_header;
21763         struct vm_map_corpse_footprint_region *footprint_region;
21764         uint32_t        footprint_region_offset;
21765         vm_map_offset_t region_start, region_end;
21766         int             disp_idx;
21767         kern_return_t   kr;
21768         int             effective_page_size;
21769         cf_disp_t       cf_disp;
21770
21771         if (!map->has_corpse_footprint) {
21772                 *disposition_p = 0;
21773                 kr = KERN_INVALID_ARGUMENT;
21774                 goto done;
21775         }
21776
21777         footprint_header = map->vmmap_corpse_footprint;
21778         if (footprint_header == NULL) {
21779                 *disposition_p = 0;
21780 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21781                 kr = KERN_INVALID_ARGUMENT;
21782                 goto done;
21783         }
21784
21785         /* start looking at the hint ("cf_hint_region") */
21786         footprint_region_offset = footprint_header->cf_hint_region;
21787
21788         effective_page_size = MIN(PAGE_SIZE, VM_MAP_PAGE_SIZE(map));
21789
21790 lookup_again:
21791         if (footprint_region_offset < sizeof(*footprint_header)) {
21792                 /* hint too low: start from 1st region */
21793                 footprint_region_offset = sizeof(*footprint_header);
21794         }
21795         if (footprint_region_offset >= footprint_header->cf_last_region) {
21796                 /* hint too high: re-start from 1st region */
21797                 footprint_region_offset = sizeof(*footprint_header);
21798         }
21799         footprint_region = (struct vm_map_corpse_footprint_region *)
21800             ((char *)footprint_header + footprint_region_offset);
21801         region_start = footprint_region->cfr_vaddr;
21802         region_end = (region_start +
21803             ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21804             effective_page_size));
21805         if (va < region_start &&
21806             footprint_region_offset != sizeof(*footprint_header)) {
21807                 /* our range starts before the hint region */
21808
21809                 /* reset the hint (in a racy way...) */
21810                 footprint_header->cf_hint_region = sizeof(*footprint_header);
21811                 /* lookup "va" again from 1st region */
21812                 footprint_region_offset = sizeof(*footprint_header);
21813                 goto lookup_again;
21814         }
21815
21816         while (va >= region_end) {
21817                 if (footprint_region_offset >= footprint_header->cf_last_region) {
21818                         break;
21819                 }
21820                 /* skip the region's header */
21821                 footprint_region_offset += sizeof(*footprint_region);
21822                 /* skip the region's page dispositions */
21823                 footprint_region_offset += (footprint_region->cfr_num_pages * sizeof(cf_disp_t));
21824                 /* align to next word boundary */
21825                 footprint_region_offset =
21826                     roundup(footprint_region_offset,
21827                     sizeof(int));
21828                 footprint_region = (struct vm_map_corpse_footprint_region *)
21829                     ((char *)footprint_header + footprint_region_offset);
21830                 region_start = footprint_region->cfr_vaddr;
21831                 region_end = (region_start +
21832                     ((vm_map_offset_t)(footprint_region->cfr_num_pages) *
21833                     effective_page_size));
21834         }
21835         if (va < region_start || va >= region_end) {
21836                 /* page not found */
21837                 *disposition_p = 0;
21838 //              if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21839                 kr = KERN_SUCCESS;
21840                 goto done;
21841         }
21842
21843         /* "va" found: set the lookup hint for next lookup (in a racy way...) */
21844         footprint_header->cf_hint_region = footprint_region_offset;
21845
21846         /* get page disposition for "va" in this region */
21847         disp_idx = (int) ((va - footprint_region->cfr_vaddr) / effective_page_size);
21848         cf_disp = footprint_region->cfr_disposition[disp_idx];
21849         *disposition_p = vm_page_cf_disp_to_disposition(cf_disp);
21850         kr = KERN_SUCCESS;
21851 done:
21852 //      if (va < SHARED_REGION_BASE_ARM64) printf("FBDP %d query map %p va 0x%llx disp 0x%x\n", __LINE__, map, va, *disposition_p);
21853         /* dtrace -n 'vminfo:::footprint_query_page_info { printf("map 0x%p va 0x%llx disp 0x%x kr 0x%x", arg0, arg1, arg2, arg3); }' */
21854         DTRACE_VM4(footprint_query_page_info,
21855             vm_map_t, map,
21856             vm_map_offset_t, va,
21857             int, *disposition_p,
21858             kern_return_t, kr);
21859
21860         return kr;
21861 }
21862
21863 void
21864 vm_map_corpse_footprint_destroy(
21865         vm_map_t        map)
21866 {
21867         if (map->has_corpse_footprint &&
21868             map->vmmap_corpse_footprint != 0) {
21869                 struct vm_map_corpse_footprint_header *footprint_header;
21870                 vm_size_t buf_size;
21871                 kern_return_t kr;
21872
21873                 footprint_header = map->vmmap_corpse_footprint;
21874                 buf_size = footprint_header->cf_size;
21875                 kr = vm_deallocate(kernel_map,
21876                     (vm_offset_t) map->vmmap_corpse_footprint,
21877                     ((vm_size_t) buf_size
21878                     + PAGE_SIZE));                 /* trailing guard page */
21879                 assertf(kr == KERN_SUCCESS, "kr=0x%x\n", kr);
21880                 map->vmmap_corpse_footprint = 0;
21881                 map->has_corpse_footprint = FALSE;
21882         }
21883 }
21884
21885 /*
21886  * vm_map_copy_footprint_ledgers:
21887  *      copies any ledger that's relevant to the memory footprint of "old_task"
21888  *      into the forked corpse's task ("new_task")
21889  */
21890 void
21891 vm_map_copy_footprint_ledgers(
21892         task_t  old_task,
21893         task_t  new_task)
21894 {
21895         vm_map_copy_ledger(old_task, new_task, task_ledgers.phys_footprint);
21896         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile);
21897         vm_map_copy_ledger(old_task, new_task, task_ledgers.purgeable_nonvolatile_compressed);
21898         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal);
21899         vm_map_copy_ledger(old_task, new_task, task_ledgers.internal_compressed);
21900         vm_map_copy_ledger(old_task, new_task, task_ledgers.iokit_mapped);
21901         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting);
21902         vm_map_copy_ledger(old_task, new_task, task_ledgers.alternate_accounting_compressed);
21903         vm_map_copy_ledger(old_task, new_task, task_ledgers.page_table);
21904         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint);
21905         vm_map_copy_ledger(old_task, new_task, task_ledgers.tagged_footprint_compressed);
21906         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile);
21907         vm_map_copy_ledger(old_task, new_task, task_ledgers.network_nonvolatile_compressed);
21908         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint);
21909         vm_map_copy_ledger(old_task, new_task, task_ledgers.media_footprint_compressed);
21910         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint);
21911         vm_map_copy_ledger(old_task, new_task, task_ledgers.graphics_footprint_compressed);
21912         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint);
21913         vm_map_copy_ledger(old_task, new_task, task_ledgers.neural_footprint_compressed);
21914         vm_map_copy_ledger(old_task, new_task, task_ledgers.wired_mem);
21915 }
21916
21917 /*
21918  * vm_map_copy_ledger:
21919  *      copy a single ledger from "old_task" to "new_task"
21920  */
21921 void
21922 vm_map_copy_ledger(
21923         task_t  old_task,
21924         task_t  new_task,
21925         int     ledger_entry)
21926 {
21927         ledger_amount_t old_balance, new_balance, delta;
21928
21929         assert(new_task->map->has_corpse_footprint);
21930         if (!new_task->map->has_corpse_footprint) {
21931                 return;
21932         }
21933
21934         /* turn off sanity checks for the ledger we're about to mess with */
21935         ledger_disable_panic_on_negative(new_task->ledger,
21936             ledger_entry);
21937
21938         /* adjust "new_task" to match "old_task" */
21939         ledger_get_balance(old_task->ledger,
21940             ledger_entry,
21941             &old_balance);
21942         ledger_get_balance(new_task->ledger,
21943             ledger_entry,
21944             &new_balance);
21945         if (new_balance == old_balance) {
21946                 /* new == old: done */
21947         } else if (new_balance > old_balance) {
21948                 /* new > old ==> new -= new - old */
21949                 delta = new_balance - old_balance;
21950                 ledger_debit(new_task->ledger,
21951                     ledger_entry,
21952                     delta);
21953         } else {
21954                 /* new < old ==> new += old - new */
21955                 delta = old_balance - new_balance;
21956                 ledger_credit(new_task->ledger,
21957                     ledger_entry,
21958                     delta);
21959         }
21960 }
21961
21962 #if MACH_ASSERT
21963
21964 extern int pmap_ledgers_panic;
21965 extern int pmap_ledgers_panic_leeway;
21966
21967 #define LEDGER_DRIFT(__LEDGER)                    \
21968         int             __LEDGER##_over;          \
21969         ledger_amount_t __LEDGER##_over_total;    \
21970         ledger_amount_t __LEDGER##_over_max;      \
21971         int             __LEDGER##_under;         \
21972         ledger_amount_t __LEDGER##_under_total;   \
21973         ledger_amount_t __LEDGER##_under_max
21974
21975 struct {
21976         uint64_t        num_pmaps_checked;
21977
21978         LEDGER_DRIFT(phys_footprint);
21979         LEDGER_DRIFT(internal);
21980         LEDGER_DRIFT(internal_compressed);
21981         LEDGER_DRIFT(iokit_mapped);
21982         LEDGER_DRIFT(alternate_accounting);
21983         LEDGER_DRIFT(alternate_accounting_compressed);
21984         LEDGER_DRIFT(page_table);
21985         LEDGER_DRIFT(purgeable_volatile);
21986         LEDGER_DRIFT(purgeable_nonvolatile);
21987         LEDGER_DRIFT(purgeable_volatile_compressed);
21988         LEDGER_DRIFT(purgeable_nonvolatile_compressed);
21989         LEDGER_DRIFT(tagged_nofootprint);
21990         LEDGER_DRIFT(tagged_footprint);
21991         LEDGER_DRIFT(tagged_nofootprint_compressed);
21992         LEDGER_DRIFT(tagged_footprint_compressed);
21993         LEDGER_DRIFT(network_volatile);
21994         LEDGER_DRIFT(network_nonvolatile);
21995         LEDGER_DRIFT(network_volatile_compressed);
21996         LEDGER_DRIFT(network_nonvolatile_compressed);
21997         LEDGER_DRIFT(media_nofootprint);
21998         LEDGER_DRIFT(media_footprint);
21999         LEDGER_DRIFT(media_nofootprint_compressed);
22000         LEDGER_DRIFT(media_footprint_compressed);
22001         LEDGER_DRIFT(graphics_nofootprint);
22002         LEDGER_DRIFT(graphics_footprint);
22003         LEDGER_DRIFT(graphics_nofootprint_compressed);
22004         LEDGER_DRIFT(graphics_footprint_compressed);
22005         LEDGER_DRIFT(neural_nofootprint);
22006         LEDGER_DRIFT(neural_footprint);
22007         LEDGER_DRIFT(neural_nofootprint_compressed);
22008         LEDGER_DRIFT(neural_footprint_compressed);
22009 } pmap_ledgers_drift;
22010
22011 void
22012 vm_map_pmap_check_ledgers(
22013         pmap_t          pmap,
22014         ledger_t        ledger,
22015         int             pid,
22016         char            *procname)
22017 {
22018         ledger_amount_t bal;
22019         boolean_t       do_panic;
22020
22021         do_panic = FALSE;
22022
22023         pmap_ledgers_drift.num_pmaps_checked++;
22024
22025 #define LEDGER_CHECK_BALANCE(__LEDGER)                                  \
22026 MACRO_BEGIN                                                             \
22027         int panic_on_negative = TRUE;                                   \
22028         ledger_get_balance(ledger,                                      \
22029                            task_ledgers.__LEDGER,                       \
22030                            &bal);                                       \
22031         ledger_get_panic_on_negative(ledger,                            \
22032                                      task_ledgers.__LEDGER,             \
22033                                      &panic_on_negative);               \
22034         if (bal != 0) {                                                 \
22035                 if (panic_on_negative ||                                \
22036                     (pmap_ledgers_panic &&                              \
22037                      pmap_ledgers_panic_leeway > 0 &&                   \
22038                      (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) ||  \
22039                       bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
22040                         do_panic = TRUE;                                \
22041                 }                                                       \
22042                 printf("LEDGER BALANCE proc %d (%s) "                   \
22043                        "\"%s\" = %lld\n",                               \
22044                        pid, procname, #__LEDGER, bal);                  \
22045                 if (bal > 0) {                                          \
22046                         pmap_ledgers_drift.__LEDGER##_over++;           \
22047                         pmap_ledgers_drift.__LEDGER##_over_total += bal; \
22048                         if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
22049                                 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
22050                         }                                               \
22051                 } else if (bal < 0) {                                   \
22052                         pmap_ledgers_drift.__LEDGER##_under++;          \
22053                         pmap_ledgers_drift.__LEDGER##_under_total += bal; \
22054                         if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
22055                                 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
22056                         }                                               \
22057                 }                                                       \
22058         }                                                               \
22059 MACRO_END
22060
22061         LEDGER_CHECK_BALANCE(phys_footprint);
22062         LEDGER_CHECK_BALANCE(internal);
22063         LEDGER_CHECK_BALANCE(internal_compressed);
22064         LEDGER_CHECK_BALANCE(iokit_mapped);
22065         LEDGER_CHECK_BALANCE(alternate_accounting);
22066         LEDGER_CHECK_BALANCE(alternate_accounting_compressed);
22067         LEDGER_CHECK_BALANCE(page_table);
22068         LEDGER_CHECK_BALANCE(purgeable_volatile);
22069         LEDGER_CHECK_BALANCE(purgeable_nonvolatile);
22070         LEDGER_CHECK_BALANCE(purgeable_volatile_compressed);
22071         LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed);
22072         LEDGER_CHECK_BALANCE(tagged_nofootprint);
22073         LEDGER_CHECK_BALANCE(tagged_footprint);
22074         LEDGER_CHECK_BALANCE(tagged_nofootprint_compressed);
22075         LEDGER_CHECK_BALANCE(tagged_footprint_compressed);
22076         LEDGER_CHECK_BALANCE(network_volatile);
22077         LEDGER_CHECK_BALANCE(network_nonvolatile);
22078         LEDGER_CHECK_BALANCE(network_volatile_compressed);
22079         LEDGER_CHECK_BALANCE(network_nonvolatile_compressed);
22080         LEDGER_CHECK_BALANCE(media_nofootprint);
22081         LEDGER_CHECK_BALANCE(media_footprint);
22082         LEDGER_CHECK_BALANCE(media_nofootprint_compressed);
22083         LEDGER_CHECK_BALANCE(media_footprint_compressed);
22084         LEDGER_CHECK_BALANCE(graphics_nofootprint);
22085         LEDGER_CHECK_BALANCE(graphics_footprint);
22086         LEDGER_CHECK_BALANCE(graphics_nofootprint_compressed);
22087         LEDGER_CHECK_BALANCE(graphics_footprint_compressed);
22088         LEDGER_CHECK_BALANCE(neural_nofootprint);
22089         LEDGER_CHECK_BALANCE(neural_footprint);
22090         LEDGER_CHECK_BALANCE(neural_nofootprint_compressed);
22091         LEDGER_CHECK_BALANCE(neural_footprint_compressed);
22092
22093         if (do_panic) {
22094                 if (pmap_ledgers_panic) {
22095                         panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22096                             pmap, pid, procname);
22097                 } else {
22098                         printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
22099                             pmap, pid, procname);
22100                 }
22101         }
22102 }
22103 #endif /* MACH_ASSERT */