osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_pageout.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      The proverbial page-out daemon.
  64  */
  65
  66 #include <stdint.h>
  67
  68 #include <debug.h>
  69 #include <mach_pagemap.h>
  70 #include <mach_cluster_stats.h>
  71
  72 #include <mach/mach_types.h>
  73 #include <mach/memory_object.h>
  74 #include <mach/memory_object_default.h>
  75 #include <mach/memory_object_control_server.h>
  76 #include <mach/mach_host_server.h>
  77 #include <mach/upl.h>
  78 #include <mach/vm_map.h>
  79 #include <mach/vm_param.h>
  80 #include <mach/vm_statistics.h>
  81 #include <mach/sdt.h>
  82
  83 #include <kern/kern_types.h>
  84 #include <kern/counters.h>
  85 #include <kern/host_statistics.h>
  86 #include <kern/machine.h>
  87 #include <kern/misc_protos.h>
  88 #include <kern/sched.h>
  89 #include <kern/thread.h>
  90 #include <kern/xpr.h>
  91 #include <kern/kalloc.h>
  92 #include <kern/policy_internal.h>
  93 #include <kern/thread_group.h>
  94
  95 #include <machine/vm_tuning.h>
  96 #include <machine/commpage.h>
  97
  98 #include <vm/pmap.h>
  99 #include <vm/vm_compressor_pager.h>
 100 #include <vm/vm_fault.h>
 101 #include <vm/vm_map.h>
 102 #include <vm/vm_object.h>
 103 #include <vm/vm_page.h>
 104 #include <vm/vm_pageout.h>
 105 #include <vm/vm_protos.h> /* must be last */
 106 #include <vm/memory_object.h>
 107 #include <vm/vm_purgeable_internal.h>
 108 #include <vm/vm_shared_region.h>
 109 #include <vm/vm_compressor.h>
 110
 111 #include <san/kasan.h>
 112
 113 #if CONFIG_PHANTOM_CACHE
 114 #include <vm/vm_phantom_cache.h>
 115 #endif
 116
 117 #if UPL_DEBUG
 118 #include <libkern/OSDebug.h>
 119 #endif
 120
 121 extern int cs_debug;
 122
 123 extern void mbuf_drain(boolean_t);
 124
 125 #if VM_PRESSURE_EVENTS
 126 #if CONFIG_JETSAM
 127 extern unsigned int memorystatus_available_pages;
 128 extern unsigned int memorystatus_available_pages_pressure;
 129 extern unsigned int memorystatus_available_pages_critical;
 130 #else /* CONFIG_JETSAM */
 131 extern uint64_t memorystatus_available_pages;
 132 extern uint64_t memorystatus_available_pages_pressure;
 133 extern uint64_t memorystatus_available_pages_critical;
 134 #endif /* CONFIG_JETSAM */
 135
 136 extern unsigned int memorystatus_frozen_count;
 137 extern unsigned int memorystatus_suspended_count;
 138 extern vm_pressure_level_t memorystatus_vm_pressure_level;
 139
 140 void vm_pressure_response(void);
 141 extern void consider_vm_pressure_events(void);
 142
 143 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
 144 #endif /* VM_PRESSURE_EVENTS */
 145
 146
 147 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 148 #ifdef  CONFIG_EMBEDDED
 149 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
 150 #else
 151 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
 152 #endif
 153 #endif
 154
 155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 157 #endif
 158
 159 #ifndef VM_PAGE_LAUNDRY_MAX
 160 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
 161 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 162
 163 #ifndef VM_PAGEOUT_BURST_WAIT
 164 #define VM_PAGEOUT_BURST_WAIT   1       /* milliseconds */
 165 #endif  /* VM_PAGEOUT_BURST_WAIT */
 166
 167 #ifndef VM_PAGEOUT_EMPTY_WAIT
 168 #define VM_PAGEOUT_EMPTY_WAIT   50      /* milliseconds */
 169 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 170
 171 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 172 #define VM_PAGEOUT_DEADLOCK_WAIT 100    /* milliseconds */
 173 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 174
 175 #ifndef VM_PAGEOUT_IDLE_WAIT
 176 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 177 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 178
 179 #ifndef VM_PAGEOUT_SWAP_WAIT
 180 #define VM_PAGEOUT_SWAP_WAIT    10      /* milliseconds */
 181 #endif  /* VM_PAGEOUT_SWAP_WAIT */
 182
 183
 184 #ifndef VM_PAGE_SPECULATIVE_TARGET
 185 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
 186 #endif /* VM_PAGE_SPECULATIVE_TARGET */
 187
 188
 189 /*
 190  *      To obtain a reasonable LRU approximation, the inactive queue
 191  *      needs to be large enough to give pages on it a chance to be
 192  *      referenced a second time.  This macro defines the fraction
 193  *      of active+inactive pages that should be inactive.
 194  *      The pageout daemon uses it to update vm_page_inactive_target.
 195  *
 196  *      If vm_page_free_count falls below vm_page_free_target and
 197  *      vm_page_inactive_count is below vm_page_inactive_target,
 198  *      then the pageout daemon starts running.
 199  */
 200
 201 #ifndef VM_PAGE_INACTIVE_TARGET
 202 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
 203 #endif  /* VM_PAGE_INACTIVE_TARGET */
 204
 205 /*
 206  *      Once the pageout daemon starts running, it keeps going
 207  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 208  */
 209
 210 #ifndef VM_PAGE_FREE_TARGET
 211 #ifdef  CONFIG_EMBEDDED
 212 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 100)
 213 #else
 214 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 215 #endif
 216 #endif  /* VM_PAGE_FREE_TARGET */
 217
 218
 219 /*
 220  *      The pageout daemon always starts running once vm_page_free_count
 221  *      falls below vm_page_free_min.
 222  */
 223
 224 #ifndef VM_PAGE_FREE_MIN
 225 #ifdef  CONFIG_EMBEDDED
 226 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 200)
 227 #else
 228 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
 229 #endif
 230 #endif  /* VM_PAGE_FREE_MIN */
 231
 232 #ifdef  CONFIG_EMBEDDED
 233 #define VM_PAGE_FREE_RESERVED_LIMIT     100
 234 #define VM_PAGE_FREE_MIN_LIMIT          1500
 235 #define VM_PAGE_FREE_TARGET_LIMIT       2000
 236 #else
 237 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
 238 #define VM_PAGE_FREE_MIN_LIMIT          3500
 239 #define VM_PAGE_FREE_TARGET_LIMIT       4000
 240 #endif
 241
 242 /*
 243  *      When vm_page_free_count falls below vm_page_free_reserved,
 244  *      only vm-privileged threads can allocate pages.  vm-privilege
 245  *      allows the pageout daemon and default pager (and any other
 246  *      associated threads needed for default pageout) to continue
 247  *      operation by dipping into the reserved pool of pages.
 248  */
 249
 250 #ifndef VM_PAGE_FREE_RESERVED
 251 #define VM_PAGE_FREE_RESERVED(n)        \
 252         ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
 253 #endif  /* VM_PAGE_FREE_RESERVED */
 254
 255 /*
 256  *      When we dequeue pages from the inactive list, they are
 257  *      reactivated (ie, put back on the active queue) if referenced.
 258  *      However, it is possible to starve the free list if other
 259  *      processors are referencing pages faster than we can turn off
 260  *      the referenced bit.  So we limit the number of reactivations
 261  *      we will make per call of vm_pageout_scan().
 262  */
 263 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
 264
 265 #ifndef VM_PAGE_REACTIVATE_LIMIT
 266 #ifdef  CONFIG_EMBEDDED
 267 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
 268 #else
 269 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
 270 #endif
 271 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
 272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       1000
 273
 274 extern boolean_t hibernate_cleaning_in_progress;
 275
 276 /*
 277  * Forward declarations for internal routines.
 278  */
 279 struct cq {
 280         struct vm_pageout_queue *q;
 281         void                    *current_chead;
 282         char                    *scratch_buf;
 283         int                     id;
 284 };
 285
 286 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
 287
 288
 289 #if VM_PRESSURE_EVENTS
 290 void vm_pressure_thread(void);
 291
 292 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
 293 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
 294
 295 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
 296 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
 297 #endif
 298
 299 void vm_pageout_garbage_collect(int);
 300 static void vm_pageout_iothread_external(void);
 301 static void vm_pageout_iothread_internal(struct cq *cq);
 302 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
 303
 304 extern void vm_pageout_continue(void);
 305 extern void vm_pageout_scan(void);
 306
 307 void vm_tests(void); /* forward */
 308
 309 #if !CONFIG_EMBEDDED
 310 static boolean_t vm_pageout_waiter  = FALSE;
 311 static boolean_t vm_pageout_running = FALSE;
 312 #endif /* !CONFIG_EMBEDDED */
 313
 314
 315 #if DEVELOPMENT || DEBUG
 316 struct vm_pageout_debug vm_pageout_debug;
 317 #endif
 318 struct vm_pageout_vminfo vm_pageout_vminfo;
 319 struct vm_pageout_state  vm_pageout_state;
 320 struct vm_config         vm_config;
 321
 322 struct  vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 323 struct  vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 324
 325 int         vm_upl_wait_for_pages = 0;
 326 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 327
 328 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
 329
 330 int     vm_debug_events = 0;
 331
 332 #if CONFIG_MEMORYSTATUS
 333 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
 334
 335 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
 336 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
 337
 338 #endif
 339
 340
 341
 342 /*
 343  *      Routine:        vm_pageout_object_terminate
 344  *      Purpose:
 345  *              Destroy the pageout_object, and perform all of the
 346  *              required cleanup actions.
 347  *
 348  *      In/Out conditions:
 349  *              The object must be locked, and will be returned locked.
 350  */
 351 void
 352 vm_pageout_object_terminate(
 353         vm_object_t     object)
 354 {
 355         vm_object_t     shadow_object;
 356
 357         /*
 358          * Deal with the deallocation (last reference) of a pageout object
 359          * (used for cleaning-in-place) by dropping the paging references/
 360          * freeing pages in the original object.
 361          */
 362
 363         assert(object->pageout);
 364         shadow_object = object->shadow;
 365         vm_object_lock(shadow_object);
 366
 367         while (!vm_page_queue_empty(&object->memq)) {
 368                 vm_page_t               p, m;
 369                 vm_object_offset_t      offset;
 370
 371                 p = (vm_page_t) vm_page_queue_first(&object->memq);
 372
 373                 assert(p->vmp_private);
 374                 assert(p->vmp_free_when_done);
 375                 p->vmp_free_when_done = FALSE;
 376                 assert(!p->vmp_cleaning);
 377                 assert(!p->vmp_laundry);
 378
 379                 offset = p->vmp_offset;
 380                 VM_PAGE_FREE(p);
 381                 p = VM_PAGE_NULL;
 382
 383                 m = vm_page_lookup(shadow_object,
 384                         offset + object->vo_shadow_offset);
 385
 386                 if(m == VM_PAGE_NULL)
 387                         continue;
 388
 389                 assert((m->vmp_dirty) || (m->vmp_precious) ||
 390                                 (m->vmp_busy && m->vmp_cleaning));
 391
 392                 /*
 393                  * Handle the trusted pager throttle.
 394                  * Also decrement the burst throttle (if external).
 395                  */
 396                 vm_page_lock_queues();
 397                 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
 398                         vm_pageout_throttle_up(m);
 399
 400                 /*
 401                  * Handle the "target" page(s). These pages are to be freed if
 402                  * successfully cleaned. Target pages are always busy, and are
 403                  * wired exactly once. The initial target pages are not mapped,
 404                  * (so cannot be referenced or modified) but converted target
 405                  * pages may have been modified between the selection as an
 406                  * adjacent page and conversion to a target.
 407                  */
 408                 if (m->vmp_free_when_done) {
 409                         assert(m->vmp_busy);
 410                         assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
 411                         assert(m->vmp_wire_count == 1);
 412                         m->vmp_cleaning = FALSE;
 413                         m->vmp_free_when_done = FALSE;
 414                         /*
 415                          * Revoke all access to the page. Since the object is
 416                          * locked, and the page is busy, this prevents the page
 417                          * from being dirtied after the pmap_disconnect() call
 418                          * returns.
 419                          *
 420                          * Since the page is left "dirty" but "not modifed", we
 421                          * can detect whether the page was redirtied during
 422                          * pageout by checking the modify state.
 423                          */
 424                         if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
 425                                 SET_PAGE_DIRTY(m, FALSE);
 426                         } else {
 427                                 m->vmp_dirty = FALSE;
 428                         }
 429
 430                         if (m->vmp_dirty) {
 431                                 vm_page_unwire(m, TRUE);        /* reactivates */
 432                                 VM_STAT_INCR(reactivations);
 433                                 PAGE_WAKEUP_DONE(m);
 434                         } else {
 435                                 vm_page_free(m);  /* clears busy, etc. */
 436                         }
 437                         vm_page_unlock_queues();
 438                         continue;
 439                 }
 440                 /*
 441                  * Handle the "adjacent" pages. These pages were cleaned in
 442                  * place, and should be left alone.
 443                  * If prep_pin_count is nonzero, then someone is using the
 444                  * page, so make it active.
 445                  */
 446                 if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
 447                         if (m->vmp_reference)
 448                                 vm_page_activate(m);
 449                         else
 450                                 vm_page_deactivate(m);
 451                 }
 452                 if (m->vmp_overwriting) {
 453                         /*
 454                          * the (COPY_OUT_FROM == FALSE) request_page_list case
 455                          */
 456                         if (m->vmp_busy) {
 457                                 /*
 458                                  * We do not re-set m->vmp_dirty !
 459                                  * The page was busy so no extraneous activity
 460                                  * could have occurred. COPY_INTO is a read into the
 461                                  * new pages. CLEAN_IN_PLACE does actually write
 462                                  * out the pages but handling outside of this code
 463                                  * will take care of resetting dirty. We clear the
 464                                  * modify however for the Programmed I/O case.
 465                                  */
 466                                 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 467
 468                                 m->vmp_busy = FALSE;
 469                                 m->vmp_absent = FALSE;
 470                         } else {
 471                                 /*
 472                                  * alternate (COPY_OUT_FROM == FALSE) request_page_list case
 473                                  * Occurs when the original page was wired
 474                                  * at the time of the list request
 475                                  */
 476                                  assert(VM_PAGE_WIRED(m));
 477                                  vm_page_unwire(m, TRUE);       /* reactivates */
 478                         }
 479                         m->vmp_overwriting = FALSE;
 480                 } else {
 481                         m->vmp_dirty = FALSE;
 482                 }
 483                 m->vmp_cleaning = FALSE;
 484
 485                 /*
 486                  * Wakeup any thread waiting for the page to be un-cleaning.
 487                  */
 488                 PAGE_WAKEUP(m);
 489                 vm_page_unlock_queues();
 490         }
 491         /*
 492          * Account for the paging reference taken in vm_paging_object_allocate.
 493          */
 494         vm_object_activity_end(shadow_object);
 495         vm_object_unlock(shadow_object);
 496
 497         assert(object->ref_count == 0);
 498         assert(object->paging_in_progress == 0);
 499         assert(object->activity_in_progress == 0);
 500         assert(object->resident_page_count == 0);
 501         return;
 502 }
 503
 504 /*
 505  * Routine:     vm_pageclean_setup
 506  *
 507  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 508  *              necessarily flushed from the VM page cache.
 509  *              This is accomplished by cleaning in place.
 510  *
 511  *              The page must not be busy, and new_object
 512  *              must be locked.
 513  *
 514  */
 515 static void
 516 vm_pageclean_setup(
 517         vm_page_t               m,
 518         vm_page_t               new_m,
 519         vm_object_t             new_object,
 520         vm_object_offset_t      new_offset)
 521 {
 522         assert(!m->vmp_busy);
 523 #if 0
 524         assert(!m->vmp_cleaning);
 525 #endif
 526
 527         XPR(XPR_VM_PAGEOUT,
 528             "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 529                 VM_PAGE_OBJECT(m), m->vmp_offset, m,
 530                 new_m, new_offset);
 531
 532         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 533
 534         /*
 535          * Mark original page as cleaning in place.
 536          */
 537         m->vmp_cleaning = TRUE;
 538         SET_PAGE_DIRTY(m, FALSE);
 539         m->vmp_precious = FALSE;
 540
 541         /*
 542          * Convert the fictitious page to a private shadow of
 543          * the real page.
 544          */
 545         assert(new_m->vmp_fictitious);
 546         assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
 547         new_m->vmp_fictitious = FALSE;
 548         new_m->vmp_private = TRUE;
 549         new_m->vmp_free_when_done = TRUE;
 550         VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
 551
 552         vm_page_lockspin_queues();
 553         vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
 554         vm_page_unlock_queues();
 555
 556         vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
 557         assert(!new_m->vmp_wanted);
 558         new_m->vmp_busy = FALSE;
 559 }
 560
 561 /*
 562  *      Routine:        vm_pageout_initialize_page
 563  *      Purpose:
 564  *              Causes the specified page to be initialized in
 565  *              the appropriate memory object. This routine is used to push
 566  *              pages into a copy-object when they are modified in the
 567  *              permanent object.
 568  *
 569  *              The page is moved to a temporary object and paged out.
 570  *
 571  *      In/out conditions:
 572  *              The page in question must not be on any pageout queues.
 573  *              The object to which it belongs must be locked.
 574  *              The page must be busy, but not hold a paging reference.
 575  *
 576  *      Implementation:
 577  *              Move this page to a completely new object.
 578  */
 579 void
 580 vm_pageout_initialize_page(
 581         vm_page_t       m)
 582 {
 583         vm_object_t             object;
 584         vm_object_offset_t      paging_offset;
 585         memory_object_t         pager;
 586
 587         XPR(XPR_VM_PAGEOUT,
 588                 "vm_pageout_initialize_page, page 0x%X\n",
 589                 m, 0, 0, 0, 0);
 590
 591         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 592
 593         object = VM_PAGE_OBJECT(m);
 594
 595         assert(m->vmp_busy);
 596         assert(object->internal);
 597
 598         /*
 599          *      Verify that we really want to clean this page
 600          */
 601         assert(!m->vmp_absent);
 602         assert(!m->vmp_error);
 603         assert(m->vmp_dirty);
 604
 605         /*
 606          *      Create a paging reference to let us play with the object.
 607          */
 608         paging_offset = m->vmp_offset + object->paging_offset;
 609
 610         if (m->vmp_absent || m->vmp_error || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
 611                 panic("reservation without pageout?"); /* alan */
 612
 613                 VM_PAGE_FREE(m);
 614                 vm_object_unlock(object);
 615
 616                 return;
 617         }
 618
 619         /*
 620          * If there's no pager, then we can't clean the page.  This should
 621          * never happen since this should be a copy object and therefore not
 622          * an external object, so the pager should always be there.
 623          */
 624
 625         pager = object->pager;
 626
 627         if (pager == MEMORY_OBJECT_NULL) {
 628                 panic("missing pager for copy object");
 629
 630                 VM_PAGE_FREE(m);
 631                 return;
 632         }
 633
 634         /*
 635          * set the page for future call to vm_fault_list_request
 636          */
 637         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 638         SET_PAGE_DIRTY(m, FALSE);
 639
 640         /*
 641          * keep the object from collapsing or terminating
 642          */
 643         vm_object_paging_begin(object);
 644         vm_object_unlock(object);
 645
 646         /*
 647          *      Write the data to its pager.
 648          *      Note that the data is passed by naming the new object,
 649          *      not a virtual address; the pager interface has been
 650          *      manipulated to use the "internal memory" data type.
 651          *      [The object reference from its allocation is donated
 652          *      to the eventual recipient.]
 653          */
 654         memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
 655
 656         vm_object_lock(object);
 657         vm_object_paging_end(object);
 658 }
 659
 660
 661 /*
 662  * vm_pageout_cluster:
 663  *
 664  * Given a page, queue it to the appropriate I/O thread,
 665  * which will page it out and attempt to clean adjacent pages
 666  * in the same operation.
 667  *
 668  * The object and queues must be locked. We will take a
 669  * paging reference to prevent deallocation or collapse when we
 670  * release the object lock back at the call site.  The I/O thread
 671  * is responsible for consuming this reference
 672  *
 673  * The page must not be on any pageout queue.
 674  */
 675 #if DEVELOPMENT || DEBUG
 676 vmct_stats_t vmct_stats;
 677
 678 int32_t vmct_active = 0;
 679 uint64_t vm_compressor_epoch_start = 0;
 680 uint64_t vm_compressor_epoch_stop = 0;
 681
 682 typedef enum vmct_state_t {
 683         VMCT_IDLE,
 684         VMCT_AWAKENED,
 685         VMCT_ACTIVE,
 686 } vmct_state_t;
 687 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
 688 #endif
 689
 690
 691 void
 692 vm_pageout_cluster(vm_page_t m)
 693 {
 694         vm_object_t     object = VM_PAGE_OBJECT(m);
 695         struct          vm_pageout_queue *q;
 696
 697
 698         XPR(XPR_VM_PAGEOUT,
 699                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 700                 object, m->vmp_offset, m, 0, 0);
 701
 702         VM_PAGE_CHECK(m);
 703         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 704         vm_object_lock_assert_exclusive(object);
 705
 706         /*
 707          * Only a certain kind of page is appreciated here.
 708          */
 709         assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
 710         assert(!m->vmp_cleaning && !m->vmp_laundry);
 711         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
 712
 713         /*
 714          * protect the object from collapse or termination
 715          */
 716         vm_object_activity_begin(object);
 717
 718         if (object->internal == TRUE) {
 719                 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 720
 721                 m->vmp_busy = TRUE;
 722
 723                 q = &vm_pageout_queue_internal;
 724         } else
 725                 q = &vm_pageout_queue_external;
 726
 727         /*
 728          * pgo_laundry count is tied to the laundry bit
 729          */
 730         m->vmp_laundry = TRUE;
 731         q->pgo_laundry++;
 732
 733         m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
 734         vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, vmp_pageq);
 735
 736         if (q->pgo_idle == TRUE) {
 737                 q->pgo_idle = FALSE;
 738                 thread_wakeup((event_t) &q->pgo_pending);
 739         }
 740         VM_PAGE_CHECK(m);
 741 }
 742
 743
 744 /*
 745  * A page is back from laundry or we are stealing it back from
 746  * the laundering state.  See if there are some pages waiting to
 747  * go to laundry and if we can let some of them go now.
 748  *
 749  * Object and page queues must be locked.
 750  */
 751 void
 752 vm_pageout_throttle_up(
 753        vm_page_t       m)
 754 {
 755        struct vm_pageout_queue *q;
 756        vm_object_t      m_object;
 757
 758        m_object = VM_PAGE_OBJECT(m);
 759
 760        assert(m_object != VM_OBJECT_NULL);
 761        assert(m_object != kernel_object);
 762
 763        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 764        vm_object_lock_assert_exclusive(m_object);
 765
 766        if (m_object->internal == TRUE)
 767                q = &vm_pageout_queue_internal;
 768        else
 769                q = &vm_pageout_queue_external;
 770
 771        if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
 772
 773                vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, vmp_pageq);
 774                m->vmp_q_state = VM_PAGE_NOT_ON_Q;
 775
 776                VM_PAGE_ZERO_PAGEQ_ENTRY(m);
 777
 778                vm_object_activity_end(m_object);
 779
 780                VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
 781        }
 782        if (m->vmp_laundry == TRUE) {
 783
 784                m->vmp_laundry = FALSE;
 785                q->pgo_laundry--;
 786
 787                if (q->pgo_throttled == TRUE) {
 788                        q->pgo_throttled = FALSE;
 789                        thread_wakeup((event_t) &q->pgo_laundry);
 790                }
 791                if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 792                        q->pgo_draining = FALSE;
 793                        thread_wakeup((event_t) (&q->pgo_laundry+1));
 794                }
 795                VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
 796         }
 797 }
 798
 799
 800 static void
 801 vm_pageout_throttle_up_batch(
 802         struct vm_pageout_queue *q,
 803         int             batch_cnt)
 804 {
 805        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 806
 807        VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
 808
 809        q->pgo_laundry -= batch_cnt;
 810
 811        if (q->pgo_throttled == TRUE) {
 812                q->pgo_throttled = FALSE;
 813                thread_wakeup((event_t) &q->pgo_laundry);
 814        }
 815        if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 816                q->pgo_draining = FALSE;
 817                thread_wakeup((event_t) (&q->pgo_laundry+1));
 818        }
 819 }
 820
 821
 822
 823 /*
 824  * VM memory pressure monitoring.
 825  *
 826  * vm_pageout_scan() keeps track of the number of pages it considers and
 827  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
 828  *
 829  * compute_memory_pressure() is called every second from compute_averages()
 830  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
 831  * of recalimed pages in a new vm_pageout_stat[] bucket.
 832  *
 833  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
 834  * The caller provides the number of seconds ("nsecs") worth of statistics
 835  * it wants, up to 30 seconds.
 836  * It computes the number of pages reclaimed in the past "nsecs" seconds and
 837  * also returns the number of pages the system still needs to reclaim at this
 838  * moment in time.
 839  */
 840 #if DEVELOPMENT || DEBUG
 841 #define VM_PAGEOUT_STAT_SIZE    (30 * 8) + 1
 842 #else
 843 #define VM_PAGEOUT_STAT_SIZE    (1 * 8) + 1
 844 #endif
 845 struct vm_pageout_stat {
 846         unsigned long vm_page_active_count;
 847         unsigned long vm_page_speculative_count;
 848         unsigned long vm_page_inactive_count;
 849         unsigned long vm_page_anonymous_count;
 850
 851         unsigned long vm_page_free_count;
 852         unsigned long vm_page_wire_count;
 853         unsigned long vm_page_compressor_count;
 854
 855         unsigned long vm_page_pages_compressed;
 856         unsigned long vm_page_pageable_internal_count;
 857         unsigned long vm_page_pageable_external_count;
 858         unsigned long vm_page_xpmapped_external_count;
 859
 860         unsigned int pages_grabbed;
 861         unsigned int pages_freed;
 862
 863         unsigned int pages_compressed;
 864         unsigned int pages_grabbed_by_compressor;
 865         unsigned int failed_compressions;
 866
 867         unsigned int pages_evicted;
 868         unsigned int pages_purged;
 869
 870         unsigned int considered;
 871         unsigned int considered_bq_internal;
 872         unsigned int considered_bq_external;
 873
 874         unsigned int skipped_external;
 875         unsigned int filecache_min_reactivations;
 876
 877         unsigned int freed_speculative;
 878         unsigned int freed_cleaned;
 879         unsigned int freed_internal;
 880         unsigned int freed_external;
 881
 882         unsigned int cleaned_dirty_external;
 883         unsigned int cleaned_dirty_internal;
 884
 885         unsigned int inactive_referenced;
 886         unsigned int inactive_nolock;
 887         unsigned int reactivation_limit_exceeded;
 888         unsigned int forced_inactive_reclaim;
 889
 890         unsigned int throttled_internal_q;
 891         unsigned int throttled_external_q;
 892
 893         unsigned int phantom_ghosts_found;
 894         unsigned int phantom_ghosts_added;
 895 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, };
 896
 897 unsigned int vm_pageout_stat_now = 0;
 898
 899 #define VM_PAGEOUT_STAT_BEFORE(i) \
 900         (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
 901 #define VM_PAGEOUT_STAT_AFTER(i) \
 902         (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
 903
 904 #if VM_PAGE_BUCKETS_CHECK
 905 int vm_page_buckets_check_interval = 80; /* in eighths of a second */
 906 #endif /* VM_PAGE_BUCKETS_CHECK */
 907
 908
 909 void
 910 record_memory_pressure(void);
 911 void
 912 record_memory_pressure(void)
 913 {
 914         unsigned int vm_pageout_next;
 915
 916 #if VM_PAGE_BUCKETS_CHECK
 917         /* check the consistency of VM page buckets at regular interval */
 918         static int counter = 0;
 919         if ((++counter % vm_page_buckets_check_interval) == 0) {
 920                 vm_page_buckets_check();
 921         }
 922 #endif /* VM_PAGE_BUCKETS_CHECK */
 923
 924         vm_pageout_state.vm_memory_pressure =
 925           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
 926           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
 927           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
 928           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
 929
 930         commpage_set_memory_pressure( (unsigned int)vm_pageout_state.vm_memory_pressure );
 931
 932         /* move "now" forward */
 933         vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
 934
 935         bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
 936
 937         vm_pageout_stat_now = vm_pageout_next;
 938 }
 939
 940
 941 /*
 942  * IMPORTANT
 943  * mach_vm_ctl_page_free_wanted() is called indirectly, via
 944  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
 945  * it must be safe in the restricted stackshot context. Locks and/or
 946  * blocking are not allowable.
 947  */
 948 unsigned int
 949 mach_vm_ctl_page_free_wanted(void)
 950 {
 951         unsigned int page_free_target, page_free_count, page_free_wanted;
 952
 953         page_free_target = vm_page_free_target;
 954         page_free_count = vm_page_free_count;
 955         if (page_free_target > page_free_count) {
 956                 page_free_wanted = page_free_target - page_free_count;
 957         } else {
 958                 page_free_wanted = 0;
 959         }
 960
 961         return page_free_wanted;
 962 }
 963
 964
 965 /*
 966  * IMPORTANT:
 967  * mach_vm_pressure_monitor() is called when taking a stackshot, with
 968  * wait_for_pressure FALSE, so that code path must remain safe in the
 969  * restricted stackshot context. No blocking or locks are allowable.
 970  * on that code path.
 971  */
 972
 973 kern_return_t
 974 mach_vm_pressure_monitor(
 975         boolean_t       wait_for_pressure,
 976         unsigned int    nsecs_monitored,
 977         unsigned int    *pages_reclaimed_p,
 978         unsigned int    *pages_wanted_p)
 979 {
 980         wait_result_t   wr;
 981         unsigned int    vm_pageout_then, vm_pageout_now;
 982         unsigned int    pages_reclaimed;
 983         unsigned int    units_of_monitor;
 984
 985         units_of_monitor = 8 * nsecs_monitored;
 986         /*
 987          * We don't take the vm_page_queue_lock here because we don't want
 988          * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
 989          * thread when it's trying to reclaim memory.  We don't need fully
 990          * accurate monitoring anyway...
 991          */
 992
 993         if (wait_for_pressure) {
 994                 /* wait until there's memory pressure */
 995                 while (vm_page_free_count >= vm_page_free_target) {
 996                         wr = assert_wait((event_t) &vm_page_free_wanted,
 997                                          THREAD_INTERRUPTIBLE);
 998                         if (wr == THREAD_WAITING) {
 999                                 wr = thread_block(THREAD_CONTINUE_NULL);
1000                         }
1001                         if (wr == THREAD_INTERRUPTED) {
1002                                 return KERN_ABORTED;
1003                         }
1004                         if (wr == THREAD_AWAKENED) {
1005                                 /*
1006                                  * The memory pressure might have already
1007                                  * been relieved but let's not block again
1008                                  * and let's report that there was memory
1009                                  * pressure at some point.
1010                                  */
1011                                 break;
1012                         }
1013                 }
1014         }
1015
1016         /* provide the number of pages the system wants to reclaim */
1017         if (pages_wanted_p != NULL) {
1018                 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1019         }
1020
1021         if (pages_reclaimed_p == NULL) {
1022                 return KERN_SUCCESS;
1023         }
1024
1025         /* provide number of pages reclaimed in the last "nsecs_monitored" */
1026         vm_pageout_now = vm_pageout_stat_now;
1027         pages_reclaimed = 0;
1028         for (vm_pageout_then =
1029                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1030              vm_pageout_then != vm_pageout_now &&
1031                      units_of_monitor-- != 0;
1032              vm_pageout_then =
1033                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1034                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1035                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1036                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1037                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1038         }
1039         *pages_reclaimed_p = pages_reclaimed;
1040
1041         return KERN_SUCCESS;
1042 }
1043
1044
1045
1046 #if DEVELOPMENT || DEBUG
1047
1048 static void
1049 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1050
1051 /*
1052  * condition variable used to make sure there is
1053  * only a single sweep going on at a time
1054  */
1055 boolean_t       vm_pageout_disconnect_all_pages_active = FALSE;
1056
1057
1058 void
1059 vm_pageout_disconnect_all_pages()
1060 {
1061         vm_page_lock_queues();
1062
1063         if (vm_pageout_disconnect_all_pages_active == TRUE) {
1064                 vm_page_unlock_queues();
1065                 return;
1066         }
1067         vm_pageout_disconnect_all_pages_active = TRUE;
1068         vm_page_unlock_queues();
1069
1070         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1071         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1072         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1073
1074         vm_pageout_disconnect_all_pages_active = FALSE;
1075 }
1076
1077
1078 void
1079 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1080 {
1081         vm_page_t       m;
1082         vm_object_t     t_object = NULL;
1083         vm_object_t     l_object = NULL;
1084         vm_object_t     m_object = NULL;
1085         int             delayed_unlock = 0;
1086         int             try_failed_count = 0;
1087         int             disconnected_count = 0;
1088         int             paused_count = 0;
1089         int             object_locked_count = 0;
1090
1091         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1092                                   q, qcount, 0, 0, 0);
1093
1094         vm_page_lock_queues();
1095
1096         while (qcount && !vm_page_queue_empty(q)) {
1097
1098                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1099
1100                 m = (vm_page_t) vm_page_queue_first(q);
1101                 m_object = VM_PAGE_OBJECT(m);
1102
1103                 /*
1104                  * check to see if we currently are working
1105                  * with the same object... if so, we've
1106                  * already got the lock
1107                  */
1108                 if (m_object != l_object) {
1109                         /*
1110                          * the object associated with candidate page is
1111                          * different from the one we were just working
1112                          * with... dump the lock if we still own it
1113                          */
1114                         if (l_object != NULL) {
1115                                 vm_object_unlock(l_object);
1116                                 l_object = NULL;
1117                         }
1118                         if (m_object != t_object)
1119                                 try_failed_count = 0;
1120
1121                         /*
1122                          * Try to lock object; since we've alread got the
1123                          * page queues lock, we can only 'try' for this one.
1124                          * if the 'try' fails, we need to do a mutex_pause
1125                          * to allow the owner of the object lock a chance to
1126                          * run...
1127                          */
1128                         if ( !vm_object_lock_try_scan(m_object)) {
1129
1130                                 if (try_failed_count > 20) {
1131                                         goto reenter_pg_on_q;
1132                                 }
1133                                 vm_page_unlock_queues();
1134                                 mutex_pause(try_failed_count++);
1135                                 vm_page_lock_queues();
1136                                 delayed_unlock = 0;
1137
1138                                 paused_count++;
1139
1140                                 t_object = m_object;
1141                                 continue;
1142                         }
1143                         object_locked_count++;
1144
1145                         l_object = m_object;
1146                 }
1147                 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1148                         /*
1149                          * put it back on the head of its queue
1150                          */
1151                         goto reenter_pg_on_q;
1152                 }
1153                 if (m->vmp_pmapped == TRUE) {
1154
1155                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1156
1157                         disconnected_count++;
1158                 }
1159 reenter_pg_on_q:
1160                 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1161                 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1162
1163                 qcount--;
1164                 try_failed_count = 0;
1165
1166                 if (delayed_unlock++ > 128) {
1167
1168                         if (l_object != NULL) {
1169                                 vm_object_unlock(l_object);
1170                                 l_object = NULL;
1171                         }
1172                         lck_mtx_yield(&vm_page_queue_lock);
1173                         delayed_unlock = 0;
1174                 }
1175         }
1176         if (l_object != NULL) {
1177                 vm_object_unlock(l_object);
1178                 l_object = NULL;
1179         }
1180         vm_page_unlock_queues();
1181
1182         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1183                                   q, disconnected_count, object_locked_count, paused_count, 0);
1184 }
1185
1186 #endif
1187
1188
1189 static void
1190 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1191
1192 /*
1193  * condition variable used to make sure there is
1194  * only a single sweep going on at a time
1195  */
1196 boolean_t       vm_pageout_anonymous_pages_active = FALSE;
1197
1198
1199 void
1200 vm_pageout_anonymous_pages()
1201 {
1202         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1203
1204                 vm_page_lock_queues();
1205
1206                 if (vm_pageout_anonymous_pages_active == TRUE) {
1207                         vm_page_unlock_queues();
1208                         return;
1209                 }
1210                 vm_pageout_anonymous_pages_active = TRUE;
1211                 vm_page_unlock_queues();
1212
1213                 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214                 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215                 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217                 if (VM_CONFIG_SWAP_IS_PRESENT)
1218                         vm_consider_swapping();
1219
1220                 vm_page_lock_queues();
1221                 vm_pageout_anonymous_pages_active = FALSE;
1222                 vm_page_unlock_queues();
1223         }
1224 }
1225
1226
1227 void
1228 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1229 {
1230         vm_page_t       m;
1231         vm_object_t     t_object = NULL;
1232         vm_object_t     l_object = NULL;
1233         vm_object_t     m_object = NULL;
1234         int             delayed_unlock = 0;
1235         int             try_failed_count = 0;
1236         int             refmod_state;
1237         int             pmap_options;
1238         struct          vm_pageout_queue *iq;
1239         ppnum_t         phys_page;
1240
1241
1242         iq = &vm_pageout_queue_internal;
1243
1244         vm_page_lock_queues();
1245
1246         while (qcount && !vm_page_queue_empty(q)) {
1247
1248                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1249
1250                 if (VM_PAGE_Q_THROTTLED(iq)) {
1251
1252                         if (l_object != NULL) {
1253                                 vm_object_unlock(l_object);
1254                                 l_object = NULL;
1255                         }
1256                         iq->pgo_draining = TRUE;
1257
1258                         assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1259                         vm_page_unlock_queues();
1260
1261                         thread_block(THREAD_CONTINUE_NULL);
1262
1263                         vm_page_lock_queues();
1264                         delayed_unlock = 0;
1265                         continue;
1266                 }
1267                 m = (vm_page_t) vm_page_queue_first(q);
1268                 m_object = VM_PAGE_OBJECT(m);
1269
1270                 /*
1271                  * check to see if we currently are working
1272                  * with the same object... if so, we've
1273                  * already got the lock
1274                  */
1275                 if (m_object != l_object) {
1276                         if ( !m_object->internal)
1277                                 goto reenter_pg_on_q;
1278
1279                         /*
1280                          * the object associated with candidate page is
1281                          * different from the one we were just working
1282                          * with... dump the lock if we still own it
1283                          */
1284                         if (l_object != NULL) {
1285                                 vm_object_unlock(l_object);
1286                                 l_object = NULL;
1287                         }
1288                         if (m_object != t_object)
1289                                 try_failed_count = 0;
1290
1291                         /*
1292                          * Try to lock object; since we've alread got the
1293                          * page queues lock, we can only 'try' for this one.
1294                          * if the 'try' fails, we need to do a mutex_pause
1295                          * to allow the owner of the object lock a chance to
1296                          * run...
1297                          */
1298                         if ( !vm_object_lock_try_scan(m_object)) {
1299
1300                                 if (try_failed_count > 20) {
1301                                         goto reenter_pg_on_q;
1302                                 }
1303                                 vm_page_unlock_queues();
1304                                 mutex_pause(try_failed_count++);
1305                                 vm_page_lock_queues();
1306                                 delayed_unlock = 0;
1307
1308                                 t_object = m_object;
1309                                 continue;
1310                         }
1311                         l_object = m_object;
1312                 }
1313                 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1314                         /*
1315                          * page is not to be cleaned
1316                          * put it back on the head of its queue
1317                          */
1318                         goto reenter_pg_on_q;
1319                 }
1320                 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1321
1322                 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1323                         refmod_state = pmap_get_refmod(phys_page);
1324
1325                         if (refmod_state & VM_MEM_REFERENCED)
1326                                 m->vmp_reference = TRUE;
1327                         if (refmod_state & VM_MEM_MODIFIED) {
1328                                 SET_PAGE_DIRTY(m, FALSE);
1329                         }
1330                 }
1331                 if (m->vmp_reference == TRUE) {
1332                         m->vmp_reference = FALSE;
1333                         pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1334                         goto reenter_pg_on_q;
1335                 }
1336                 if (m->vmp_pmapped == TRUE) {
1337                         if (m->vmp_dirty || m->vmp_precious) {
1338                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1339                         } else {
1340                                 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1341                         }
1342                         refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1343                         if (refmod_state & VM_MEM_MODIFIED) {
1344                                 SET_PAGE_DIRTY(m, FALSE);
1345                         }
1346                 }
1347
1348                 if ( !m->vmp_dirty && !m->vmp_precious) {
1349                         vm_page_unlock_queues();
1350                         VM_PAGE_FREE(m);
1351                         vm_page_lock_queues();
1352                         delayed_unlock = 0;
1353
1354                         goto next_pg;
1355                 }
1356                 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)  {
1357
1358                         if (!m_object->pager_initialized) {
1359
1360                                 vm_page_unlock_queues();
1361
1362                                 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1363
1364                                 if (!m_object->pager_initialized)
1365                                         vm_object_compressor_pager_create(m_object);
1366
1367                                 vm_page_lock_queues();
1368                                 delayed_unlock = 0;
1369                         }
1370                         if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1371                                 goto reenter_pg_on_q;
1372                         /*
1373                          * vm_object_compressor_pager_create will drop the object lock
1374                          * which means 'm' may no longer be valid to use
1375                          */
1376                         continue;
1377                 }
1378                 /*
1379                  * we've already factored out pages in the laundry which
1380                  * means this page can't be on the pageout queue so it's
1381                  * safe to do the vm_page_queues_remove
1382                  */
1383                 vm_page_queues_remove(m, TRUE);
1384
1385                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1386
1387                 vm_pageout_cluster(m);
1388
1389                 goto next_pg;
1390
1391 reenter_pg_on_q:
1392                 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1393                 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1394 next_pg:
1395                 qcount--;
1396                 try_failed_count = 0;
1397
1398                 if (delayed_unlock++ > 128) {
1399
1400                         if (l_object != NULL) {
1401                                 vm_object_unlock(l_object);
1402                                 l_object = NULL;
1403                         }
1404                         lck_mtx_yield(&vm_page_queue_lock);
1405                         delayed_unlock = 0;
1406                 }
1407         }
1408         if (l_object != NULL) {
1409                 vm_object_unlock(l_object);
1410                 l_object = NULL;
1411         }
1412         vm_page_unlock_queues();
1413 }
1414
1415
1416
1417 /*
1418  * function in BSD to apply I/O throttle to the pageout thread
1419  */
1420 extern void vm_pageout_io_throttle(void);
1421
1422 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj)                    \
1423         MACRO_BEGIN                                                     \
1424         /*                                                              \
1425          * If a "reusable" page somehow made it back into               \
1426          * the active queue, it's been re-used and is not               \
1427          * quite re-usable.                                             \
1428          * If the VM object was "all_reusable", consider it             \
1429          * as "all re-used" instead of converting it to                 \
1430          * "partially re-used", which could be expensive.               \
1431          */                                                             \
1432         assert(VM_PAGE_OBJECT((m)) == (obj));                           \
1433         if ((m)->vmp_reusable ||                                        \
1434             (obj)->all_reusable) {                                      \
1435                 vm_object_reuse_pages((obj),                            \
1436                                       (m)->vmp_offset,                  \
1437                                       (m)->vmp_offset + PAGE_SIZE_64,   \
1438                                       FALSE);                           \
1439         }                                                               \
1440         MACRO_END
1441
1442
1443 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1444 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1445
1446 #define FCS_IDLE                0
1447 #define FCS_DELAYED             1
1448 #define FCS_DEADLOCK_DETECTED   2
1449
1450 struct flow_control {
1451         int             state;
1452         mach_timespec_t ts;
1453 };
1454
1455
1456 #if CONFIG_BACKGROUND_QUEUE
1457 uint64_t vm_pageout_rejected_bq_internal = 0;
1458 uint64_t vm_pageout_rejected_bq_external = 0;
1459 uint64_t vm_pageout_skipped_bq_internal = 0;
1460 #endif
1461
1462 #define ANONS_GRABBED_LIMIT     2
1463
1464
1465 #if 0
1466 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1467 #endif
1468 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1469
1470 #define VM_PAGEOUT_PB_NO_ACTION                         0
1471 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1472 #define VM_PAGEOUT_PB_THREAD_YIELD                      2
1473
1474
1475 #if 0
1476 static void
1477 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1478 {
1479         if (*local_freeq) {
1480                 vm_page_unlock_queues();
1481
1482                 VM_DEBUG_CONSTANT_EVENT(
1483                         vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1484                         vm_page_free_count, 0, 0, 1);
1485
1486                 vm_page_free_list(*local_freeq, TRUE);
1487
1488                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1489                                vm_page_free_count, *local_freed, 0, 1);
1490
1491                 *local_freeq = NULL;
1492                 *local_freed = 0;
1493
1494                 vm_page_lock_queues();
1495         } else {
1496                 lck_mtx_yield(&vm_page_queue_lock);
1497         }
1498         *delayed_unlock = 1;
1499 }
1500 #endif
1501
1502
1503 static void
1504 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1505                             vm_page_t *local_freeq, int *local_freed, int action)
1506 {
1507         vm_page_unlock_queues();
1508
1509         if (*object != NULL) {
1510                 vm_object_unlock(*object);
1511                 *object = NULL;
1512         }
1513         if (*local_freeq) {
1514
1515                 vm_page_free_list(*local_freeq, TRUE);
1516
1517                 *local_freeq = NULL;
1518                 *local_freed = 0;
1519         }
1520         *delayed_unlock = 1;
1521
1522         switch (action) {
1523
1524         case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1525                 vm_consider_waking_compactor_swapper();
1526                 break;
1527         case VM_PAGEOUT_PB_THREAD_YIELD:
1528                 thread_yield_internal(1);
1529                 break;
1530         case VM_PAGEOUT_PB_NO_ACTION:
1531         default:
1532                 break;
1533         }
1534         vm_page_lock_queues();
1535 }
1536
1537
1538 static struct vm_pageout_vminfo last;
1539
1540 uint64_t last_vm_page_pages_grabbed = 0;
1541
1542 extern  uint32_t c_segment_pages_compressed;
1543
1544 extern uint64_t shared_region_pager_reclaimed;
1545 extern struct memory_object_pager_ops shared_region_pager_ops;
1546
1547 void update_vm_info(void)
1548 {
1549         uint64_t tmp;
1550
1551         vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1552         vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1553         vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1554         vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1555
1556         vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1557         vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1558         vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1559
1560         vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1561         vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1562         vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1563         vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1564
1565
1566         tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1567         vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1568         last.vm_pageout_considered_page = tmp;
1569
1570         tmp = vm_pageout_vminfo.vm_pageout_compressions;
1571         vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
1572         last.vm_pageout_compressions = tmp;
1573
1574         tmp = vm_pageout_vminfo.vm_compressor_failed;
1575         vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1576         last.vm_compressor_failed = tmp;
1577
1578         tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1579         vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
1580         last.vm_compressor_pages_grabbed = tmp;
1581
1582         tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1583         vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1584         last.vm_phantom_cache_found_ghost = tmp;
1585
1586         tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1587         vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1588         last.vm_phantom_cache_added_ghost = tmp;
1589
1590         tmp = get_pages_grabbed_count();
1591         vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
1592         last_vm_page_pages_grabbed = tmp;
1593
1594         tmp = vm_pageout_vminfo.vm_page_pages_freed;
1595         vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1596         last.vm_page_pages_freed = tmp;
1597
1598
1599         if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1600
1601                 tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1602                 vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1603                 last.vm_pageout_pages_evicted = tmp;
1604
1605                 tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1606                 vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1607                 last.vm_pageout_pages_purged = tmp;
1608
1609                 tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1610                 vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1611                 last.vm_pageout_freed_speculative = tmp;
1612
1613                 tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1614                 vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1615                 last.vm_pageout_freed_external = tmp;
1616
1617                 tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1618                 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1619                 last.vm_pageout_inactive_referenced = tmp;
1620
1621                 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1622                 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1623                 last.vm_pageout_scan_inactive_throttled_external = tmp;
1624
1625                 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1626                 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1627                 last.vm_pageout_inactive_dirty_external = tmp;
1628
1629                 tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1630                 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1631                 last.vm_pageout_freed_cleaned = tmp;
1632
1633                 tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1634                 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1635                 last.vm_pageout_inactive_nolock = tmp;
1636
1637                 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1638                 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1639                 last.vm_pageout_scan_inactive_throttled_internal = tmp;
1640
1641                 tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1642                 vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1643                 last.vm_pageout_skipped_external = tmp;
1644
1645                 tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1646                 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1647                 last.vm_pageout_reactivation_limit_exceeded = tmp;
1648
1649                 tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1650                 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1651                 last.vm_pageout_inactive_force_reclaim = tmp;
1652
1653                 tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1654                 vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1655                 last.vm_pageout_freed_internal = tmp;
1656
1657                 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1658                 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1659                 last.vm_pageout_considered_bq_internal = tmp;
1660
1661                 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1662                 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1663                 last.vm_pageout_considered_bq_external = tmp;
1664
1665                 tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1666                 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1667                 last.vm_pageout_filecache_min_reactivated = tmp;
1668
1669                 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1670                 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1671                 last.vm_pageout_inactive_dirty_internal = tmp;
1672         }
1673
1674         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1675                               vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1676                               vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1677                               vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1678                               vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
1679                               0);
1680
1681         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1682                               vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1683                               vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1684                               vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
1685                               0,
1686                               0);
1687
1688         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1689                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1690                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1691                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1692                               vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
1693                               0);
1694
1695         if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1696             vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1697             vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1698
1699                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1700                                       vm_pageout_stats[vm_pageout_stat_now].considered,
1701                                       vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1702                                       vm_pageout_stats[vm_pageout_stat_now].freed_external,
1703                                       vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
1704                                       0);
1705
1706                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1707                                       vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1708                                       vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1709                                       vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1710                                       vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
1711                                       0);
1712
1713                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1714                                       vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1715                                       vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1716                                       vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1717                                       vm_pageout_stats[vm_pageout_stat_now].skipped_external,
1718                                       0);
1719
1720                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1721                                       vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1722                                       vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1723                                       vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1724                                       vm_pageout_stats[vm_pageout_stat_now].freed_internal,
1725                                       0);
1726
1727                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE,
1728                                       vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1729                                       vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1730                                       vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1731                                       vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
1732                                       0);
1733
1734         }
1735         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE,
1736                               vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1737                               vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1738                               vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1739                               vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
1740                               0);
1741
1742         record_memory_pressure();
1743 }
1744
1745
1746 void
1747 vm_page_balance_inactive(int max_to_move)
1748 {
1749         vm_page_t m;
1750
1751         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1752
1753         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1754                                                           vm_page_inactive_count +
1755                                                           vm_page_speculative_count);
1756
1757         while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
1758
1759                 VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
1760
1761                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
1762
1763                 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
1764                 assert(!m->vmp_laundry);
1765                 assert(VM_PAGE_OBJECT(m) != kernel_object);
1766                 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1767
1768                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1769
1770                 /*
1771                  * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1772                  *
1773                  * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1774                  * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1775                  * new reference happens. If no futher references happen on the page after that remote TLB flushes
1776                  * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1777                  * by pageout_scan, which is just fine since the last reference would have happened quite far
1778                  * in the past (TLB caches don't hang around for very long), and of course could just as easily
1779                  * have happened before we moved the page
1780                  */
1781                 if (m->vmp_pmapped == TRUE)
1782                         pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1783
1784                 /*
1785                  * The page might be absent or busy,
1786                  * but vm_page_deactivate can handle that.
1787                  * FALSE indicates that we don't want a H/W clear reference
1788                  */
1789                 vm_page_deactivate_internal(m, FALSE);
1790         }
1791 }
1792
1793
1794 /*
1795  *      vm_pageout_scan does the dirty work for the pageout daemon.
1796  *      It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1797  *      held and vm_page_free_wanted == 0.
1798  */
1799 void
1800 vm_pageout_scan(void)
1801 {
1802         unsigned int loop_count = 0;
1803         unsigned int inactive_burst_count = 0;
1804         unsigned int reactivated_this_call;
1805         unsigned int reactivate_limit;
1806         vm_page_t   local_freeq = NULL;
1807         int         local_freed = 0;
1808         int         delayed_unlock;
1809         int         delayed_unlock_limit = 0;
1810         int         refmod_state = 0;
1811         int     vm_pageout_deadlock_target = 0;
1812         struct  vm_pageout_queue *iq;
1813         struct  vm_pageout_queue *eq;
1814         struct  vm_speculative_age_q *sq;
1815         struct  flow_control    flow_control = { 0, { 0, 0 } };
1816         boolean_t inactive_throttled = FALSE;
1817         mach_timespec_t ts;
1818         unsigned        int msecs = 0;
1819         vm_object_t     object = NULL;
1820         uint32_t        inactive_reclaim_run;
1821         boolean_t       exceeded_burst_throttle;
1822         boolean_t       grab_anonymous = FALSE;
1823         boolean_t       force_anonymous = FALSE;
1824         boolean_t       force_speculative_aging = FALSE;
1825         int             anons_grabbed = 0;
1826         int             page_prev_q_state = 0;
1827 #if CONFIG_BACKGROUND_QUEUE
1828         boolean_t       page_from_bg_q = FALSE;
1829 #endif
1830         int             cache_evict_throttle = 0;
1831         uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
1832         uint32_t        inactive_external_count;
1833         int             force_purge = 0;
1834         int             divisor;
1835 #define DELAY_SPECULATIVE_AGE   1000
1836         int             delay_speculative_age = 0;
1837         vm_object_t     m_object = VM_OBJECT_NULL;
1838
1839 #if VM_PRESSURE_EVENTS
1840         vm_pressure_level_t pressure_level;
1841 #endif /* VM_PRESSURE_EVENTS */
1842
1843         VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1844                                 vm_pageout_vminfo.vm_pageout_freed_speculative,
1845                                 vm_pageout_state.vm_pageout_inactive_clean,
1846                                 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
1847                                 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
1848
1849         flow_control.state = FCS_IDLE;
1850         iq = &vm_pageout_queue_internal;
1851         eq = &vm_pageout_queue_external;
1852         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1853
1854
1855         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1856
1857         /* Ask the pmap layer to return any pages it no longer needs. */
1858         uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
1859
1860         vm_page_lock_queues();
1861
1862         vm_page_wire_count -= pmap_wired_pages_freed;
1863
1864         delayed_unlock = 1;
1865
1866         /*
1867          *      Calculate the max number of referenced pages on the inactive
1868          *      queue that we will reactivate.
1869          */
1870         reactivated_this_call = 0;
1871         reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1872                                                     vm_page_inactive_count);
1873         inactive_reclaim_run = 0;
1874
1875         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1876
1877         /*
1878          *      We must limit the rate at which we send pages to the pagers
1879          *      so that we don't tie up too many pages in the I/O queues.
1880          *      We implement a throttling mechanism using the laundry count
1881          *      to limit the number of pages outstanding to the default
1882          *      and external pagers.  We can bypass the throttles and look
1883          *      for clean pages if the pageout queues don't drain in a timely
1884          *      fashion since this may indicate that the pageout paths are
1885          *      stalled waiting for memory, which only we can provide.
1886          */
1887
1888 Restart:
1889
1890         assert(object == NULL);
1891         assert(delayed_unlock != 0);
1892
1893         vm_page_anonymous_min = vm_page_inactive_target / 20;
1894
1895         if (vm_pageout_state.vm_page_speculative_percentage > 50)
1896                 vm_pageout_state.vm_page_speculative_percentage = 50;
1897         else if (vm_pageout_state.vm_page_speculative_percentage <= 0)
1898                 vm_pageout_state.vm_page_speculative_percentage = 1;
1899
1900         vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1901                                                                                  vm_page_inactive_count);
1902
1903         for (;;) {
1904                 vm_page_t m;
1905
1906                 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1907
1908                 if (vm_upl_wait_for_pages < 0)
1909                         vm_upl_wait_for_pages = 0;
1910
1911                 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1912
1913                 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1914                         delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1915
1916 #if CONFIG_SECLUDED_MEMORY
1917                 /*
1918                  * Deal with secluded_q overflow.
1919                  */
1920                 if (vm_page_secluded_count > vm_page_secluded_target) {
1921                         vm_page_t secluded_page;
1922
1923                         /*
1924                          * SECLUDED_AGING_BEFORE_ACTIVE:
1925                          * Excess secluded pages go to the active queue and
1926                          * will later go to the inactive queue.
1927                          */
1928                         assert((vm_page_secluded_count_free +
1929                                 vm_page_secluded_count_inuse) ==
1930                                 vm_page_secluded_count);
1931                         secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1932                         assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1933
1934                         vm_page_queues_remove(secluded_page, FALSE);
1935                         assert(!secluded_page->vmp_fictitious);
1936                         assert(!VM_PAGE_WIRED(secluded_page));
1937
1938                         if (secluded_page->vmp_object == 0) {
1939                                 /* transfer to free queue */
1940                                 assert(secluded_page->vmp_busy);
1941                                 secluded_page->vmp_snext = local_freeq;
1942                                 local_freeq = secluded_page;
1943                                 local_freed++;
1944                         } else {
1945                                 /* transfer to head of active queue */
1946                                 vm_page_enqueue_active(secluded_page, FALSE);
1947                                 secluded_page = VM_PAGE_NULL;
1948                         }
1949                 }
1950 #endif /* CONFIG_SECLUDED_MEMORY */
1951
1952                 assert(delayed_unlock);
1953
1954                 /*
1955                  * maintain our balance
1956                  */
1957                 vm_page_balance_inactive(1);
1958
1959
1960                 /**********************************************************************
1961                  * above this point we're playing with the active and secluded queues
1962                  * below this point we're playing with the throttling mechanisms
1963                  * and the inactive queue
1964                  **********************************************************************/
1965
1966                 if (vm_page_free_count + local_freed >= vm_page_free_target)
1967                 {
1968                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1969
1970                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
1971                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
1972                         /*
1973                          * make sure the pageout I/O threads are running
1974                          * throttled in case there are still requests
1975                          * in the laundry... since we have met our targets
1976                          * we don't need the laundry to be cleaned in a timely
1977                          * fashion... so let's avoid interfering with foreground
1978                          * activity
1979                          */
1980                         vm_pageout_adjust_eq_iothrottle(eq, TRUE);
1981
1982                         lck_mtx_lock(&vm_page_queue_free_lock);
1983
1984                         if ((vm_page_free_count >= vm_page_free_target) &&
1985                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1986                                 /*
1987                                  * done - we have met our target *and*
1988                                  * there is no one waiting for a page.
1989                                  */
1990 return_from_scan:
1991                                 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
1992
1993                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
1994                                                         vm_pageout_state.vm_pageout_inactive,
1995                                                         vm_pageout_state.vm_pageout_inactive_used, 0, 0);
1996                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
1997                                                         vm_pageout_vminfo.vm_pageout_freed_speculative,
1998                                                         vm_pageout_state.vm_pageout_inactive_clean,
1999                                                         vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
2000                                                         vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
2001
2002                                 return;
2003                         }
2004                         lck_mtx_unlock(&vm_page_queue_free_lock);
2005                 }
2006
2007                 /*
2008                  * Before anything, we check if we have any ripe volatile
2009                  * objects around. If so, try to purge the first object.
2010                  * If the purge fails, fall through to reclaim a page instead.
2011                  * If the purge succeeds, go back to the top and reevalute
2012                  * the new memory situation.
2013                  */
2014
2015                 assert (available_for_purge>=0);
2016                 force_purge = 0; /* no force-purging */
2017
2018 #if VM_PRESSURE_EVENTS
2019                 pressure_level = memorystatus_vm_pressure_level;
2020
2021                 if (pressure_level > kVMPressureNormal) {
2022
2023                         if (pressure_level >= kVMPressureCritical) {
2024                                 force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2025                         } else if (pressure_level >= kVMPressureUrgent) {
2026                                 force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2027                         } else if (pressure_level >= kVMPressureWarning) {
2028                                 force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2029                         }
2030                 }
2031 #endif /* VM_PRESSURE_EVENTS */
2032
2033                 if (available_for_purge || force_purge) {
2034
2035                         if (object != NULL) {
2036                                 vm_object_unlock(object);
2037                                 object = NULL;
2038                         }
2039
2040                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2041
2042                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2043                         if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2044                                 VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2045                                 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2046                                 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2047                                 continue;
2048                         }
2049                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2050                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2051                 }
2052
2053                 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2054                         /*
2055                          * try to pull pages from the aging bins...
2056                          * see vm_page.h for an explanation of how
2057                          * this mechanism works
2058                          */
2059                         struct vm_speculative_age_q     *aq;
2060                         boolean_t       can_steal = FALSE;
2061                         int num_scanned_queues;
2062
2063                         aq = &vm_page_queue_speculative[speculative_steal_index];
2064
2065                         num_scanned_queues = 0;
2066                         while (vm_page_queue_empty(&aq->age_q) &&
2067                                num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2068
2069                                 speculative_steal_index++;
2070
2071                                 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2072                                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2073
2074                                 aq = &vm_page_queue_speculative[speculative_steal_index];
2075                         }
2076
2077                         if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2078                                 /*
2079                                  * XXX We've scanned all the speculative
2080                                  * queues but still haven't found one
2081                                  * that is not empty, even though
2082                                  * vm_page_speculative_count is not 0.
2083                                  */
2084                                 if (!vm_page_queue_empty(&sq->age_q))
2085                                         continue;
2086 #if DEVELOPMENT || DEBUG
2087                                 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2088 #endif
2089                                 /* readjust... */
2090                                 vm_page_speculative_count = 0;
2091                                 /* ... and continue */
2092                                 continue;
2093                         }
2094
2095                         if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE)
2096                                 can_steal = TRUE;
2097                         else {
2098                                 if (!delay_speculative_age) {
2099                                         mach_timespec_t ts_fully_aged;
2100
2101                                         ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2102                                         ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2103                                                 * 1000 * NSEC_PER_USEC;
2104
2105                                         ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2106
2107                                         clock_sec_t sec;
2108                                         clock_nsec_t nsec;
2109                                         clock_get_system_nanotime(&sec, &nsec);
2110                                         ts.tv_sec = (unsigned int) sec;
2111                                         ts.tv_nsec = nsec;
2112
2113                                         if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2114                                                 can_steal = TRUE;
2115                                         else
2116                                                 delay_speculative_age++;
2117                                 } else {
2118                                         delay_speculative_age++;
2119                                         if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2120                                                 delay_speculative_age = 0;
2121                                 }
2122                         }
2123                         if (can_steal == TRUE)
2124                                 vm_page_speculate_ageit(aq);
2125                 }
2126                 force_speculative_aging = FALSE;
2127
2128                 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2129
2130                         int     pages_evicted;
2131
2132                         if (object != NULL) {
2133                                 vm_object_unlock(object);
2134                                 object = NULL;
2135                         }
2136                         KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
2137
2138                         pages_evicted = vm_object_cache_evict(100, 10);
2139
2140                         KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
2141
2142                         if (pages_evicted) {
2143
2144                                 vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2145
2146                                 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2147                                                vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2148                                 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2149
2150                                 /*
2151                                  * we just freed up to 100 pages,
2152                                  * so go back to the top of the main loop
2153                                  * and re-evaulate the memory situation
2154                                  */
2155                                 continue;
2156                         } else
2157                                 cache_evict_throttle = 1000;
2158                 }
2159                 if  (cache_evict_throttle)
2160                         cache_evict_throttle--;
2161
2162                 divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2163
2164 #if CONFIG_JETSAM
2165                 /*
2166                  * don't let the filecache_min fall below 15% of available memory
2167                  * on systems with an active compressor that isn't nearing its
2168                  * limits w/r to accepting new data
2169                  *
2170                  * on systems w/o the compressor/swapper, the filecache is always
2171                  * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2172                  * since most (if not all) of the anonymous pages are in the
2173                  * throttled queue (which isn't counted as available) which
2174                  * effectively disables this filter
2175                  */
2176                 if (vm_compressor_low_on_space() || divisor == 0)
2177                         vm_pageout_state.vm_page_filecache_min = 0;
2178                 else
2179                         vm_pageout_state.vm_page_filecache_min =
2180                           ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2181 #else
2182                 if (vm_compressor_out_of_space() || divisor == 0)
2183                         vm_pageout_state.vm_page_filecache_min = 0;
2184                 else {
2185                         /*
2186                          * don't let the filecache_min fall below the specified critical level
2187                          */
2188                         vm_pageout_state.vm_page_filecache_min =
2189                           ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2190                 }
2191 #endif
2192                 if (vm_page_free_count < (vm_page_free_reserved / 4))
2193                         vm_pageout_state.vm_page_filecache_min = 0;
2194
2195                 exceeded_burst_throttle = FALSE;
2196                 /*
2197                  * Sometimes we have to pause:
2198                  *      1) No inactive pages - nothing to do.
2199                  *      2) Loop control - no acceptable pages found on the inactive queue
2200                  *         within the last vm_pageout_burst_inactive_throttle iterations
2201                  *      3) Flow control - default pageout queue is full
2202                  */
2203                 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2204                     vm_page_queue_empty(&vm_page_queue_anonymous) &&
2205                     vm_page_queue_empty(&vm_page_queue_cleaned) &&
2206                     vm_page_queue_empty(&sq->age_q)) {
2207                         VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2208                         msecs = vm_pageout_state.vm_pageout_empty_wait;
2209                         goto vm_pageout_scan_delay;
2210
2211                 } else if (inactive_burst_count >=
2212                            MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2213                                (vm_page_inactive_count +
2214                                 vm_page_speculative_count))) {
2215                         VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2216                         msecs = vm_pageout_state.vm_pageout_burst_wait;
2217
2218                         exceeded_burst_throttle = TRUE;
2219                         goto vm_pageout_scan_delay;
2220
2221                 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2222                                   VM_DYNAMIC_PAGING_ENABLED()) {
2223                         clock_sec_t sec;
2224                         clock_nsec_t nsec;
2225
2226                         switch (flow_control.state) {
2227
2228                         case FCS_IDLE:
2229                                 if ((vm_page_free_count + local_freed) < vm_page_free_target &&
2230                                     vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2231                                         /*
2232                                          * since the compressor is running independently of vm_pageout_scan
2233                                          * let's not wait for it just yet... as long as we have a healthy supply
2234                                          * of filecache pages to work with, let's keep stealing those.
2235                                          */
2236                                         inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2237
2238                                         if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2239                                             (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2240                                                 anons_grabbed = ANONS_GRABBED_LIMIT;
2241                                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2242                                                 goto consider_inactive;
2243                                         }
2244                                 }
2245 reset_deadlock_timer:
2246                                 ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2247                                 ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2248                                 clock_get_system_nanotime(&sec, &nsec);
2249                                 flow_control.ts.tv_sec = (unsigned int) sec;
2250                                 flow_control.ts.tv_nsec = nsec;
2251                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2252
2253                                 flow_control.state = FCS_DELAYED;
2254                                 msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2255
2256                                 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2257                                 break;
2258
2259                         case FCS_DELAYED:
2260                                 clock_get_system_nanotime(&sec, &nsec);
2261                                 ts.tv_sec = (unsigned int) sec;
2262                                 ts.tv_nsec = nsec;
2263
2264                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2265                                         /*
2266                                          * the pageout thread for the default pager is potentially
2267                                          * deadlocked since the
2268                                          * default pager queue has been throttled for more than the
2269                                          * allowable time... we need to move some clean pages or dirty
2270                                          * pages belonging to the external pagers if they aren't throttled
2271                                          * vm_page_free_wanted represents the number of threads currently
2272                                          * blocked waiting for pages... we'll move one page for each of
2273                                          * these plus a fixed amount to break the logjam... once we're done
2274                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
2275                                          * with a new timeout target since we have no way of knowing
2276                                          * whether we've broken the deadlock except through observation
2277                                          * of the queue associated with the default pager... we need to
2278                                          * stop moving pages and allow the system to run to see what
2279                                          * state it settles into.
2280                                          */
2281                                         vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2282                                                                      vm_page_free_wanted + vm_page_free_wanted_privileged;
2283                                         VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2284                                         flow_control.state = FCS_DEADLOCK_DETECTED;
2285                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
2286                                         goto consider_inactive;
2287                                 }
2288                                 /*
2289                                  * just resniff instead of trying
2290                                  * to compute a new delay time... we're going to be
2291                                  * awakened immediately upon a laundry completion,
2292                                  * so we won't wait any longer than necessary
2293                                  */
2294                                 msecs = vm_pageout_state.vm_pageout_idle_wait;
2295                                 break;
2296
2297                         case FCS_DEADLOCK_DETECTED:
2298                                 if (vm_pageout_deadlock_target)
2299                                         goto consider_inactive;
2300                                 goto reset_deadlock_timer;
2301
2302                         }
2303 vm_pageout_scan_delay:
2304                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2305
2306                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2307                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2308
2309                         if (vm_page_free_count >= vm_page_free_target) {
2310                                 /*
2311                                  * we're here because
2312                                  *  1) someone else freed up some pages while we had
2313                                  *     the queues unlocked above
2314                                  * and we've hit one of the 3 conditions that
2315                                  * cause us to pause the pageout scan thread
2316                                  *
2317                                  * since we already have enough free pages,
2318                                  * let's avoid stalling and return normally
2319                                  *
2320                                  * before we return, make sure the pageout I/O threads
2321                                  * are running throttled in case there are still requests
2322                                  * in the laundry... since we have enough free pages
2323                                  * we don't need the laundry to be cleaned in a timely
2324                                  * fashion... so let's avoid interfering with foreground
2325                                  * activity
2326                                  *
2327                                  * we don't want to hold vm_page_queue_free_lock when
2328                                  * calling vm_pageout_adjust_eq_iothrottle (since it
2329                                  * may cause other locks to be taken), we do the intitial
2330                                  * check outside of the lock.  Once we take the lock,
2331                                  * we recheck the condition since it may have changed.
2332                                  * if it has, no problem, we will make the threads
2333                                  * non-throttled before actually blocking
2334                                  */
2335                                 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2336                         }
2337                         lck_mtx_lock(&vm_page_queue_free_lock);
2338
2339                         if (vm_page_free_count >= vm_page_free_target &&
2340                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2341                                 goto return_from_scan;
2342                         }
2343                         lck_mtx_unlock(&vm_page_queue_free_lock);
2344
2345                         if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2346                                 /*
2347                                  * we're most likely about to block due to one of
2348                                  * the 3 conditions that cause vm_pageout_scan to
2349                                  * not be able to make forward progress w/r
2350                                  * to providing new pages to the free queue,
2351                                  * so unthrottle the I/O threads in case we
2352                                  * have laundry to be cleaned... it needs
2353                                  * to be completed ASAP.
2354                                  *
2355                                  * even if we don't block, we want the io threads
2356                                  * running unthrottled since the sum of free +
2357                                  * clean pages is still under our free target
2358                                  */
2359                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2360                         }
2361                         if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2362                                 /*
2363                                  * if we get here we're below our free target and
2364                                  * we're stalling due to a full laundry queue or
2365                                  * we don't have any inactive pages other then
2366                                  * those in the clean queue...
2367                                  * however, we have pages on the clean queue that
2368                                  * can be moved to the free queue, so let's not
2369                                  * stall the pageout scan
2370                                  */
2371                                 flow_control.state = FCS_IDLE;
2372                                 goto consider_inactive;
2373                         }
2374                         if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2375                                 flow_control.state = FCS_IDLE;
2376                                 goto consider_inactive;
2377                         }
2378
2379                         VM_CHECK_MEMORYSTATUS;
2380
2381                         if (flow_control.state != FCS_IDLE)
2382                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2383
2384                         iq->pgo_throttled = TRUE;
2385                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2386
2387                         counter(c_vm_pageout_scan_block++);
2388
2389                         vm_page_unlock_queues();
2390
2391                         assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2392
2393                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2394                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2395                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2396
2397                         thread_block(THREAD_CONTINUE_NULL);
2398
2399                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2400                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2401                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2402
2403                         vm_page_lock_queues();
2404
2405                         iq->pgo_throttled = FALSE;
2406
2407                         if (loop_count >= vm_page_inactive_count)
2408                                 loop_count = 0;
2409                         inactive_burst_count = 0;
2410
2411                         goto Restart;
2412                         /*NOTREACHED*/
2413                 }
2414
2415
2416                 flow_control.state = FCS_IDLE;
2417 consider_inactive:
2418                 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2419                                                                             vm_pageout_inactive_external_forced_reactivate_limit);
2420                 loop_count++;
2421                 inactive_burst_count++;
2422                 vm_pageout_state.vm_pageout_inactive++;
2423
2424                 /*
2425                  * Choose a victim.
2426                  */
2427                 while (1) {
2428
2429 #if CONFIG_BACKGROUND_QUEUE
2430                         page_from_bg_q = FALSE;
2431 #endif /* CONFIG_BACKGROUND_QUEUE */
2432
2433                         m = NULL;
2434                         m_object = VM_OBJECT_NULL;
2435
2436                         if (VM_DYNAMIC_PAGING_ENABLED()) {
2437                                 assert(vm_page_throttled_count == 0);
2438                                 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2439                         }
2440
2441                         /*
2442                          * Try for a clean-queue inactive page.
2443                          * These are pages that vm_pageout_scan tried to steal earlier, but
2444                          * were dirty and had to be cleaned.  Pick them up now that they are clean.
2445                          */
2446                         if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2447                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2448
2449                                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2450
2451                                 break;
2452                         }
2453
2454                         /*
2455                          * The next most eligible pages are ones we paged in speculatively,
2456                          * but which have not yet been touched and have been aged out.
2457                          */
2458                         if (!vm_page_queue_empty(&sq->age_q)) {
2459                                 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2460
2461                                 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2462
2463                                 if (!m->vmp_dirty || force_anonymous == FALSE)
2464                                         break;
2465                                 else
2466                                         m = NULL;
2467                         }
2468
2469 #if CONFIG_BACKGROUND_QUEUE
2470                         if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2471                                 vm_object_t     bg_m_object = NULL;
2472
2473                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2474
2475                                 bg_m_object = VM_PAGE_OBJECT(m);
2476
2477                                 if (!VM_PAGE_PAGEABLE(m)) {
2478                                         /*
2479                                          * This page is on the background queue
2480                                          * but not on a pageable queue.  This is
2481                                          * likely a transient state and whoever
2482                                          * took it out of its pageable queue
2483                                          * will likely put it back on a pageable
2484                                          * queue soon but we can't deal with it
2485                                          * at this point, so let's ignore this
2486                                          * page.
2487                                          */
2488                                 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2489
2490                                         if (bg_m_object->internal &&
2491                                             (VM_PAGE_Q_THROTTLED(iq) ||
2492                                              vm_compressor_out_of_space() == TRUE ||
2493                                              vm_page_free_count < (vm_page_free_reserved / 4))) {
2494
2495                                                 vm_pageout_skipped_bq_internal++;
2496                                         } else {
2497                                                 page_from_bg_q = TRUE;
2498
2499                                                 if (bg_m_object->internal)
2500                                                         vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2501                                                 else
2502                                                         vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2503                                                 break;
2504                                         }
2505                                 }
2506                         }
2507 #endif
2508                         inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2509
2510                         if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2511                             (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2512                                 grab_anonymous = TRUE;
2513                                 anons_grabbed = 0;
2514
2515                                 vm_pageout_vminfo.vm_pageout_skipped_external++;
2516                                 goto want_anonymous;
2517                         }
2518                         grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2519
2520 #if CONFIG_JETSAM
2521                         /* If the file-backed pool has accumulated
2522                          * significantly more pages than the jetsam
2523                          * threshold, prefer to reclaim those
2524                          * inline to minimise compute overhead of reclaiming
2525                          * anonymous pages.
2526                          * This calculation does not account for the CPU local
2527                          * external page queues, as those are expected to be
2528                          * much smaller relative to the global pools.
2529                          */
2530                         if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2531                                 if (vm_page_pageable_external_count >
2532                                     vm_pageout_state.vm_page_filecache_min) {
2533                                         if ((vm_page_pageable_external_count *
2534                                                 vm_pageout_memorystatus_fb_factor_dr) >
2535                                             (memorystatus_available_pages_critical *
2536                                             vm_pageout_memorystatus_fb_factor_nr)) {
2537                                                 grab_anonymous = FALSE;
2538
2539                                                 VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2540                                         }
2541                                 }
2542                                 if (grab_anonymous) {
2543                                         VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2544                                 }
2545                         }
2546 #endif /* CONFIG_JETSAM */
2547
2548 want_anonymous:
2549                         if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2550
2551                                 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2552                                         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2553
2554                                         assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2555                                         anons_grabbed = 0;
2556
2557                                         if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2558
2559                                               if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2560                                                     if ((++reactivated_this_call % 100)) {
2561                                                           vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2562                                                           goto must_activate_page;
2563                                                     }
2564                                                     /*
2565                                                      * steal 1% of the file backed pages even if
2566                                                      * we are under the limit that has been set
2567                                                      * for a healthy filecache
2568                                                      */
2569                                               }
2570                                         }
2571                                         break;
2572                                 }
2573                         }
2574                         if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2575                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2576
2577                                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2578                                 anons_grabbed++;
2579
2580                                 break;
2581                         }
2582
2583                         /*
2584                          * if we've gotten here, we have no victim page.
2585                          * check to see if we've not finished balancing the queues
2586                          * or we have a page on the aged speculative queue that we
2587                          * skipped due to force_anonymous == TRUE.. or we have
2588                          * speculative  pages that we can prematurely age... if
2589                          * one of these cases we'll keep going, else panic
2590                          */
2591                         force_anonymous = FALSE;
2592                         VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
2593
2594                         if (!vm_page_queue_empty(&sq->age_q))
2595                                 goto done_with_inactivepage;
2596
2597                         if (vm_page_speculative_count) {
2598                                 force_speculative_aging = TRUE;
2599                                 goto done_with_inactivepage;
2600                         }
2601                         panic("vm_pageout: no victim");
2602
2603                         /* NOTREACHED */
2604                 }
2605                 assert(VM_PAGE_PAGEABLE(m));
2606                 m_object = VM_PAGE_OBJECT(m);
2607                 force_anonymous = FALSE;
2608
2609                 page_prev_q_state = m->vmp_q_state;
2610                 /*
2611                  * we just found this page on one of our queues...
2612                  * it can't also be on the pageout queue, so safe
2613                  * to call vm_page_queues_remove
2614                  */
2615                 vm_page_queues_remove(m, TRUE);
2616
2617                 assert(!m->vmp_laundry);
2618                 assert(!m->vmp_private);
2619                 assert(!m->vmp_fictitious);
2620                 assert(m_object != kernel_object);
2621                 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2622
2623                 vm_pageout_vminfo.vm_pageout_considered_page++;
2624
2625                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2626
2627                 /*
2628                  * check to see if we currently are working
2629                  * with the same object... if so, we've
2630                  * already got the lock
2631                  */
2632                 if (m_object != object) {
2633                         /*
2634                          * the object associated with candidate page is
2635                          * different from the one we were just working
2636                          * with... dump the lock if we still own it
2637                          */
2638                         if (object != NULL) {
2639                                 vm_object_unlock(object);
2640                                 object = NULL;
2641                         }
2642                         /*
2643                          * Try to lock object; since we've alread got the
2644                          * page queues lock, we can only 'try' for this one.
2645                          * if the 'try' fails, we need to do a mutex_pause
2646                          * to allow the owner of the object lock a chance to
2647                          * run... otherwise, we're likely to trip over this
2648                          * object in the same state as we work our way through
2649                          * the queue... clumps of pages associated with the same
2650                          * object are fairly typical on the inactive and active queues
2651                          */
2652                         if (!vm_object_lock_try_scan(m_object)) {
2653                                 vm_page_t m_want = NULL;
2654
2655                                 vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2656
2657                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2658                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2659
2660                                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2661
2662                                 m->vmp_reference = FALSE;
2663
2664                                 if ( !m_object->object_is_shared_cache) {
2665                                         /*
2666                                          * don't apply this optimization if this is the shared cache
2667                                          * object, it's too easy to get rid of very hot and important
2668                                          * pages...
2669                                          * m->vmp_object must be stable since we hold the page queues lock...
2670                                          * we can update the scan_collisions field sans the object lock
2671                                          * since it is a separate field and this is the only spot that does
2672                                          * a read-modify-write operation and it is never executed concurrently...
2673                                          * we can asynchronously set this field to 0 when creating a UPL, so it
2674                                          * is possible for the value to be a bit non-determistic, but that's ok
2675                                          * since it's only used as a hint
2676                                          */
2677                                         m_object->scan_collisions = 1;
2678                                 }
2679                                 if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2680                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2681                                 else if ( !vm_page_queue_empty(&sq->age_q))
2682                                         m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2683                                 else if ( (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT ||
2684                                            vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2685                                           !vm_page_queue_empty(&vm_page_queue_inactive))
2686                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2687                                 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2688                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2689
2690                                 /*
2691                                  * this is the next object we're going to be interested in
2692                                  * try to make sure its available after the mutex_pause
2693                                  * returns control
2694                                  */
2695                                 if (m_want)
2696                                         vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2697
2698                                 goto requeue_page;
2699                         }
2700                         object = m_object;
2701                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2702                 }
2703                 assert(m_object == object);
2704                 assert(VM_PAGE_OBJECT(m) == m_object);
2705
2706                 if (m->vmp_busy) {
2707                         /*
2708                          *      Somebody is already playing with this page.
2709                          *      Put it back on the appropriate queue
2710                          *
2711                          */
2712                         VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
2713
2714                         if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2715                                 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
2716 requeue_page:
2717                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2718                                 vm_page_enqueue_inactive(m, FALSE);
2719                         else
2720                                 vm_page_activate(m);
2721 #if CONFIG_BACKGROUND_QUEUE
2722 #if DEVELOPMENT || DEBUG
2723                         if (page_from_bg_q == TRUE) {
2724                                 if (m_object->internal)
2725                                         vm_pageout_rejected_bq_internal++;
2726                                 else
2727                                         vm_pageout_rejected_bq_external++;
2728                         }
2729 #endif
2730 #endif
2731                         goto done_with_inactivepage;
2732                 }
2733
2734                 /*
2735                  *   if (m->vmp_cleaning && !m->vmp_free_when_done)
2736                  *      If already cleaning this page in place
2737                  *      just leave if off the paging queues.
2738                  *      We can leave the page mapped, and upl_commit_range
2739                  *      will put it on the clean queue.
2740                  *
2741                  *   if (m->vmp_free_when_done && !m->vmp_cleaning)
2742                  *      an msync INVALIDATE is in progress...
2743                  *      this page has been marked for destruction
2744                  *      after it has been cleaned,
2745                  *      but not yet gathered into a UPL
2746                  *      where 'cleaning' will be set...
2747                  *      just leave it off the paging queues
2748                  *
2749                  *   if (m->vmp_free_when_done && m->vmp_clenaing)
2750                  *      an msync INVALIDATE is in progress
2751                  *      and the UPL has already gathered this page...
2752                  *      just leave it off the paging queues
2753                  */
2754                 if (m->vmp_free_when_done || m->vmp_cleaning) {
2755                         goto done_with_inactivepage;
2756                 }
2757
2758
2759                 /*
2760                  *      If it's absent, in error or the object is no longer alive,
2761                  *      we can reclaim the page... in the no longer alive case,
2762                  *      there are 2 states the page can be in that preclude us
2763                  *      from reclaiming it - busy or cleaning - that we've already
2764                  *      dealt with
2765                  */
2766                 if (m->vmp_absent || m->vmp_error || !object->alive) {
2767
2768                         if (m->vmp_absent)
2769                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
2770                         else if (!object->alive)
2771                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
2772                         else
2773                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
2774 reclaim_page:
2775                         if (vm_pageout_deadlock_target) {
2776                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
2777                                 vm_pageout_deadlock_target--;
2778                         }
2779
2780                         DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2781
2782                         if (object->internal) {
2783                                 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2784                         } else {
2785                                 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2786                         }
2787                         assert(!m->vmp_cleaning);
2788                         assert(!m->vmp_laundry);
2789
2790                         if (!object->internal &&
2791                             object->pager != NULL &&
2792                             object->pager->mo_pager_ops == &shared_region_pager_ops) {
2793                                 shared_region_pager_reclaimed++;
2794                         }
2795
2796                         m->vmp_busy = TRUE;
2797
2798                         /*
2799                          * remove page from object here since we're already
2800                          * behind the object lock... defer the rest of the work
2801                          * we'd normally do in vm_page_free_prepare_object
2802                          * until 'vm_page_free_list' is called
2803                          */
2804                         if (m->vmp_tabled)
2805                                 vm_page_remove(m, TRUE);
2806
2807                         assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
2808                         m->vmp_snext = local_freeq;
2809                         local_freeq = m;
2810                         local_freed++;
2811
2812                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2813                                 vm_pageout_vminfo.vm_pageout_freed_speculative++;
2814                         else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2815                                 vm_pageout_vminfo.vm_pageout_freed_cleaned++;
2816                         else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
2817                                 vm_pageout_vminfo.vm_pageout_freed_internal++;
2818                         else
2819                                 vm_pageout_vminfo.vm_pageout_freed_external++;
2820
2821                         inactive_burst_count = 0;
2822                         goto done_with_inactivepage;
2823                 }
2824                 if (object->copy == VM_OBJECT_NULL) {
2825                         /*
2826                          * No one else can have any interest in this page.
2827                          * If this is an empty purgable object, the page can be
2828                          * reclaimed even if dirty.
2829                          * If the page belongs to a volatile purgable object, we
2830                          * reactivate it if the compressor isn't active.
2831                          */
2832                         if (object->purgable == VM_PURGABLE_EMPTY) {
2833                                 if (m->vmp_pmapped == TRUE) {
2834                                         /* unmap the page */
2835                                         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2836                                         if (refmod_state & VM_MEM_MODIFIED) {
2837                                                 SET_PAGE_DIRTY(m, FALSE);
2838                                         }
2839                                 }
2840                                 if (m->vmp_dirty || m->vmp_precious) {
2841                                         /* we saved the cost of cleaning this page ! */
2842                                         vm_page_purged_count++;
2843                                 }
2844                                 goto reclaim_page;
2845                         }
2846
2847                         if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2848                                 /*
2849                                  * With the VM compressor, the cost of
2850                                  * reclaiming a page is much lower (no I/O),
2851                                  * so if we find a "volatile" page, it's better
2852                                  * to let it get compressed rather than letting
2853                                  * it occupy a full page until it gets purged.
2854                                  * So no need to check for "volatile" here.
2855                                  */
2856                         } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2857                                 /*
2858                                  * Avoid cleaning a "volatile" page which might
2859                                  * be purged soon.
2860                                  */
2861
2862                                 /* if it's wired, we can't put it on our queue */
2863                                 assert(!VM_PAGE_WIRED(m));
2864
2865                                 /* just stick it back on! */
2866                                 reactivated_this_call++;
2867
2868                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2869                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
2870
2871                                 goto reactivate_page;
2872                         }
2873                 }
2874                 /*
2875                  *      If it's being used, reactivate.
2876                  *      (Fictitious pages are either busy or absent.)
2877                  *      First, update the reference and dirty bits
2878                  *      to make sure the page is unreferenced.
2879                  */
2880                 refmod_state = -1;
2881
2882                 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
2883                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2884
2885                         if (refmod_state & VM_MEM_REFERENCED)
2886                                 m->vmp_reference = TRUE;
2887                         if (refmod_state & VM_MEM_MODIFIED) {
2888                                 SET_PAGE_DIRTY(m, FALSE);
2889                         }
2890                 }
2891
2892                 if (m->vmp_reference || m->vmp_dirty) {
2893                         /* deal with a rogue "reusable" page */
2894                         VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
2895                 }
2896                 divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
2897
2898                 if (divisor == 0)
2899                         vm_pageout_state.vm_page_xpmapped_min = 0;
2900                 else
2901                         vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / divisor;
2902
2903                 if (!m->vmp_no_cache &&
2904 #if CONFIG_BACKGROUND_QUEUE
2905                     page_from_bg_q == FALSE &&
2906 #endif
2907                     (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
2908                                       (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
2909                         /*
2910                          * The page we pulled off the inactive list has
2911                          * been referenced.  It is possible for other
2912                          * processors to be touching pages faster than we
2913                          * can clear the referenced bit and traverse the
2914                          * inactive queue, so we limit the number of
2915                          * reactivations.
2916                          */
2917                         if (++reactivated_this_call >= reactivate_limit) {
2918                                 vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
2919                         } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2920                                 vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
2921                         } else {
2922                                 uint32_t isinuse;
2923
2924                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2925                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
2926
2927                                 vm_pageout_vminfo.vm_pageout_inactive_referenced++;
2928 reactivate_page:
2929                                 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2930                                      vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2931                                         /*
2932                                          * no explict mappings of this object exist
2933                                          * and it's not open via the filesystem
2934                                          */
2935                                         vm_page_deactivate(m);
2936                                         VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
2937                                 } else {
2938 must_activate_page:
2939                                         /*
2940                                          * The page was/is being used, so put back on active list.
2941                                          */
2942                                         vm_page_activate(m);
2943                                         VM_STAT_INCR(reactivations);
2944                                         inactive_burst_count = 0;
2945                                 }
2946 #if CONFIG_BACKGROUND_QUEUE
2947 #if DEVELOPMENT || DEBUG
2948                                 if (page_from_bg_q == TRUE) {
2949                                         if (m_object->internal)
2950                                                 vm_pageout_rejected_bq_internal++;
2951                                         else
2952                                                 vm_pageout_rejected_bq_external++;
2953                                 }
2954 #endif
2955 #endif
2956                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2957                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
2958                                 vm_pageout_state.vm_pageout_inactive_used++;
2959
2960                                 goto done_with_inactivepage;
2961                         }
2962                         /*
2963                          * Make sure we call pmap_get_refmod() if it
2964                          * wasn't already called just above, to update
2965                          * the dirty bit.
2966                          */
2967                         if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
2968                                 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2969                                 if (refmod_state & VM_MEM_MODIFIED) {
2970                                         SET_PAGE_DIRTY(m, FALSE);
2971                                 }
2972                         }
2973                 }
2974
2975                 XPR(XPR_VM_PAGEOUT,
2976                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2977                 object, m->vmp_offset, m, 0,0);
2978
2979                 /*
2980                  * we've got a candidate page to steal...
2981                  *
2982                  * m->vmp_dirty is up to date courtesy of the
2983                  * preceding check for m->vmp_reference... if
2984                  * we get here, then m->vmp_reference had to be
2985                  * FALSE (or possibly "reactivate_limit" was
2986                  * exceeded), but in either case we called
2987                  * pmap_get_refmod() and updated both
2988                  * m->vmp_reference and m->vmp_dirty
2989                  *
2990                  * if it's dirty or precious we need to
2991                  * see if the target queue is throtttled
2992                  * it if is, we need to skip over it by moving it back
2993                  * to the end of the inactive queue
2994                  */
2995
2996                 inactive_throttled = FALSE;
2997
2998                 if (m->vmp_dirty || m->vmp_precious) {
2999                         if (object->internal) {
3000                                 if (VM_PAGE_Q_THROTTLED(iq))
3001                                         inactive_throttled = TRUE;
3002                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
3003                                 inactive_throttled = TRUE;
3004                         }
3005                 }
3006 throttle_inactive:
3007                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3008                     object->internal && m->vmp_dirty &&
3009                     (object->purgable == VM_PURGABLE_DENY ||
3010                      object->purgable == VM_PURGABLE_NONVOLATILE ||
3011                      object->purgable == VM_PURGABLE_VOLATILE)) {
3012                         vm_page_check_pageable_safe(m);
3013                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3014                         vm_page_queue_enter(&vm_page_queue_throttled, m,
3015                                             vm_page_t, vmp_pageq);
3016                         m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3017                         vm_page_throttled_count++;
3018
3019                         VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3020
3021                         inactive_burst_count = 0;
3022                         goto done_with_inactivepage;
3023                 }
3024                 if (inactive_throttled == TRUE) {
3025
3026                         if (object->internal == FALSE) {
3027                                 /*
3028                                  * we need to break up the following potential deadlock case...
3029                                  *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3030                                  *  b) The thread doing the writing is waiting for pages while holding the truncate lock
3031                                  *  c) Most of the pages in the inactive queue belong to this file.
3032                                  *
3033                                  * we are potentially in this deadlock because...
3034                                  *  a) the external pageout queue is throttled
3035                                  *  b) we're done with the active queue and moved on to the inactive queue
3036                                  *  c) we've got a dirty external page
3037                                  *
3038                                  * since we don't know the reason for the external pageout queue being throttled we
3039                                  * must suspect that we are deadlocked, so move the current page onto the active queue
3040                                  * in an effort to cause a page from the active queue to 'age' to the inactive queue
3041                                  *
3042                                  * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3043                                  * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3044                                  * pool the next time we select a victim page... if we can make enough new free pages,
3045                                  * the deadlock will break, the external pageout queue will empty and it will no longer
3046                                  * be throttled
3047                                  *
3048                                  * if we have jetsam configured, keep a count of the pages reactivated this way so
3049                                  * that we can try to find clean pages in the active/inactive queues before
3050                                  * deciding to jetsam a process
3051                                  */
3052                                 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
3053
3054                                 vm_page_check_pageable_safe(m);
3055                                 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3056                                 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, vmp_pageq);
3057                                 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
3058                                 vm_page_active_count++;
3059                                 vm_page_pageable_external_count++;
3060
3061                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3062
3063 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3064                                 vm_pageout_inactive_external_forced_reactivate_limit--;
3065
3066                                 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3067                                         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3068                                         /*
3069                                          * Possible deadlock scenario so request jetsam action
3070                                          */
3071                                         assert(object);
3072                                         vm_object_unlock(object);
3073                                         object = VM_OBJECT_NULL;
3074                                         vm_page_unlock_queues();
3075
3076                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3077                                                vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3078
3079                                         /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3080                                         if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3081                                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
3082                                         }
3083
3084                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3085                                                         vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3086
3087                                         vm_page_lock_queues();
3088                                         delayed_unlock = 1;
3089                                 }
3090 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3091                                 force_anonymous = TRUE;
3092 #endif
3093                                 inactive_burst_count = 0;
3094                                 goto done_with_inactivepage;
3095                         } else {
3096                                 goto must_activate_page;
3097                         }
3098                 }
3099
3100                 /*
3101                  * we've got a page that we can steal...
3102                  * eliminate all mappings and make sure
3103                  * we have the up-to-date modified state
3104                  *
3105                  * if we need to do a pmap_disconnect then we
3106                  * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3107                  * provides the true state atomically... the
3108                  * page was still mapped up to the pmap_disconnect
3109                  * and may have been dirtied at the last microsecond
3110                  *
3111                  * Note that if 'pmapped' is FALSE then the page is not
3112                  * and has not been in any map, so there is no point calling
3113                  * pmap_disconnect().  m->vmp_dirty could have been set in anticipation
3114                  * of likely usage of the page.
3115                  */
3116                 if (m->vmp_pmapped == TRUE) {
3117                         int pmap_options;
3118
3119                         /*
3120                          * Don't count this page as going into the compressor
3121                          * if any of these are true:
3122                          * 1) compressed pager isn't enabled
3123                          * 2) Freezer enabled device with compressed pager
3124                          *    backend (exclusive use) i.e. most of the VM system
3125                          *    (including vm_pageout_scan) has no knowledge of
3126                          *    the compressor
3127                          * 3) This page belongs to a file and hence will not be
3128                          *    sent into the compressor
3129                          */
3130                         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3131                             object->internal == FALSE) {
3132                                 pmap_options = 0;
3133                         } else if (m->vmp_dirty || m->vmp_precious) {
3134                                 /*
3135                                  * VM knows that this page is dirty (or
3136                                  * precious) and needs to be compressed
3137                                  * rather than freed.
3138                                  * Tell the pmap layer to count this page
3139                                  * as "compressed".
3140                                  */
3141                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3142                         } else {
3143                                 /*
3144                                  * VM does not know if the page needs to
3145                                  * be preserved but the pmap layer might tell
3146                                  * us if any mapping has "modified" it.
3147                                  * Let's the pmap layer to count this page
3148                                  * as compressed if and only if it has been
3149                                  * modified.
3150                                  */
3151                                 pmap_options =
3152                                         PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3153                         }
3154                         refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3155                                                                pmap_options,
3156                                                                NULL);
3157                         if (refmod_state & VM_MEM_MODIFIED) {
3158                                 SET_PAGE_DIRTY(m, FALSE);
3159                         }
3160                 }
3161
3162                 /*
3163                  * reset our count of pages that have been reclaimed
3164                  * since the last page was 'stolen'
3165                  */
3166                 inactive_reclaim_run = 0;
3167
3168                 /*
3169                  *      If it's clean and not precious, we can free the page.
3170                  */
3171                 if (!m->vmp_dirty && !m->vmp_precious) {
3172
3173                         vm_pageout_state.vm_pageout_inactive_clean++;
3174
3175                         /*
3176                          * OK, at this point we have found a page we are going to free.
3177                          */
3178 #if CONFIG_PHANTOM_CACHE
3179                         if (!object->internal)
3180                                 vm_phantom_cache_add_ghost(m);
3181 #endif
3182                         goto reclaim_page;
3183                 }
3184
3185                 /*
3186                  * The page may have been dirtied since the last check
3187                  * for a throttled target queue (which may have been skipped
3188                  * if the page was clean then).  With the dirty page
3189                  * disconnected here, we can make one final check.
3190                  */
3191                 if (object->internal) {
3192                         if (VM_PAGE_Q_THROTTLED(iq))
3193                                 inactive_throttled = TRUE;
3194                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3195                         inactive_throttled = TRUE;
3196                 }
3197
3198                 if (inactive_throttled == TRUE)
3199                         goto throttle_inactive;
3200
3201 #if VM_PRESSURE_EVENTS
3202 #if CONFIG_JETSAM
3203
3204                 /*
3205                  * If Jetsam is enabled, then the sending
3206                  * of memory pressure notifications is handled
3207                  * from the same thread that takes care of high-water
3208                  * and other jetsams i.e. the memorystatus_thread.
3209                  */
3210
3211 #else /* CONFIG_JETSAM */
3212
3213                 vm_pressure_response();
3214
3215 #endif /* CONFIG_JETSAM */
3216 #endif /* VM_PRESSURE_EVENTS */
3217
3218                 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3219                         VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3220
3221                 if (object->internal)
3222                         vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3223                 else
3224                         vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3225
3226                 /*
3227                  * internal pages will go to the compressor...
3228                  * external pages will go to the appropriate pager to be cleaned
3229                  * and upon completion will end up on 'vm_page_queue_cleaned' which
3230                  * is a preferred queue to steal from
3231                  */
3232                 vm_pageout_cluster(m);
3233                 inactive_burst_count = 0;
3234
3235 done_with_inactivepage:
3236
3237                 if (delayed_unlock++ > delayed_unlock_limit) {
3238                         int freed = local_freed;
3239
3240                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3241                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3242                         if (freed == 0)
3243                                 lck_mtx_yield(&vm_page_queue_lock);
3244                 } else if (vm_pageout_scan_wants_object) {
3245                         vm_page_unlock_queues();
3246                         mutex_pause(0);
3247                         vm_page_lock_queues();
3248                 }
3249                 /*
3250                  * back to top of pageout scan loop
3251                  */
3252         }
3253 }
3254
3255
3256 void
3257 vm_page_free_reserve(
3258         int pages)
3259 {
3260         int             free_after_reserve;
3261
3262         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3263
3264                 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3265                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3266                 else
3267                         vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3268
3269         } else {
3270                 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3271                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3272                 else
3273                         vm_page_free_reserved += pages;
3274         }
3275         free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3276
3277         vm_page_free_min = vm_page_free_reserved +
3278                 VM_PAGE_FREE_MIN(free_after_reserve);
3279
3280         if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3281                 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3282
3283         vm_page_free_target = vm_page_free_reserved +
3284                 VM_PAGE_FREE_TARGET(free_after_reserve);
3285
3286         if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3287                 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3288
3289         if (vm_page_free_target < vm_page_free_min + 5)
3290                 vm_page_free_target = vm_page_free_min + 5;
3291
3292         vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3293 }
3294
3295 /*
3296  *      vm_pageout is the high level pageout daemon.
3297  */
3298
3299 void
3300 vm_pageout_continue(void)
3301 {
3302         DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3303         VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3304
3305 #if !CONFIG_EMBEDDED
3306         lck_mtx_lock(&vm_page_queue_free_lock);
3307         vm_pageout_running = TRUE;
3308         lck_mtx_unlock(&vm_page_queue_free_lock);
3309 #endif /* CONFIG_EMBEDDED */
3310
3311         vm_pageout_scan();
3312         /*
3313          * we hold both the vm_page_queue_free_lock
3314          * and the vm_page_queues_lock at this point
3315          */
3316         assert(vm_page_free_wanted == 0);
3317         assert(vm_page_free_wanted_privileged == 0);
3318         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3319
3320 #if !CONFIG_EMBEDDED
3321         vm_pageout_running = FALSE;
3322         if (vm_pageout_waiter) {
3323                 vm_pageout_waiter = FALSE;
3324                 thread_wakeup((event_t)&vm_pageout_waiter);
3325         }
3326 #endif /* !CONFIG_EMBEDDED */
3327
3328         lck_mtx_unlock(&vm_page_queue_free_lock);
3329         vm_page_unlock_queues();
3330
3331         counter(c_vm_pageout_block++);
3332         thread_block((thread_continue_t)vm_pageout_continue);
3333         /*NOTREACHED*/
3334 }
3335
3336 #if !CONFIG_EMBEDDED
3337 kern_return_t
3338 vm_pageout_wait(uint64_t deadline)
3339 {
3340         kern_return_t kr;
3341
3342         lck_mtx_lock(&vm_page_queue_free_lock);
3343         for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3344                 vm_pageout_waiter = TRUE;
3345                 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3346                                 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3347                                 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3348                         kr = KERN_OPERATION_TIMED_OUT;
3349                 }
3350         }
3351         lck_mtx_unlock(&vm_page_queue_free_lock);
3352
3353         return (kr);
3354 }
3355 #endif /* !CONFIG_EMBEDDED */
3356
3357
3358 static void
3359 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3360 {
3361         vm_page_t       m = NULL;
3362         vm_object_t     object;
3363         vm_object_offset_t offset;
3364         memory_object_t pager;
3365
3366         /* On systems with a compressor, the external IO thread clears its
3367          * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3368          * creation)
3369          */
3370         if (vm_pageout_state.vm_pageout_internal_iothread != THREAD_NULL)
3371                 current_thread()->options &= ~TH_OPT_VMPRIV;
3372
3373         vm_page_lockspin_queues();
3374
3375         while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3376
3377                    q->pgo_busy = TRUE;
3378                    vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3379
3380                    assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3381                    VM_PAGE_CHECK(m);
3382                    /*
3383                     * grab a snapshot of the object and offset this
3384                     * page is tabled in so that we can relookup this
3385                     * page after we've taken the object lock - these
3386                     * fields are stable while we hold the page queues lock
3387                     * but as soon as we drop it, there is nothing to keep
3388                     * this page in this object... we hold an activity_in_progress
3389                     * on this object which will keep it from terminating
3390                     */
3391                    object = VM_PAGE_OBJECT(m);
3392                    offset = m->vmp_offset;
3393
3394                    m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3395                    VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3396
3397                    vm_page_unlock_queues();
3398
3399                    vm_object_lock(object);
3400
3401                    m = vm_page_lookup(object, offset);
3402
3403                    if (m == NULL ||
3404                        m->vmp_busy || m->vmp_cleaning || !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3405                            /*
3406                             * it's either the same page that someone else has
3407                             * started cleaning (or it's finished cleaning or
3408                             * been put back on the pageout queue), or
3409                             * the page has been freed or we have found a
3410                             * new page at this offset... in all of these cases
3411                             * we merely need to release the activity_in_progress
3412                             * we took when we put the page on the pageout queue
3413                             */
3414                            vm_object_activity_end(object);
3415                            vm_object_unlock(object);
3416
3417                            vm_page_lockspin_queues();
3418                            continue;
3419                    }
3420                    pager = object->pager;
3421
3422                    if (pager == MEMORY_OBJECT_NULL) {
3423                            /*
3424                             * This pager has been destroyed by either
3425                             * memory_object_destroy or vm_object_destroy, and
3426                             * so there is nowhere for the page to go.
3427                             */
3428                            if (m->vmp_free_when_done) {
3429                                    /*
3430                                     * Just free the page... VM_PAGE_FREE takes
3431                                     * care of cleaning up all the state...
3432                                     * including doing the vm_pageout_throttle_up
3433                                     */
3434                                    VM_PAGE_FREE(m);
3435                            } else {
3436                                    vm_page_lockspin_queues();
3437
3438                                    vm_pageout_throttle_up(m);
3439                                    vm_page_activate(m);
3440
3441                                    vm_page_unlock_queues();
3442
3443                                    /*
3444                                     *   And we are done with it.
3445                                     */
3446                            }
3447                            vm_object_activity_end(object);
3448                            vm_object_unlock(object);
3449
3450                            vm_page_lockspin_queues();
3451                            continue;
3452                    }
3453 #if 0
3454                    /*
3455                     * we don't hold the page queue lock
3456                     * so this check isn't safe to make
3457                     */
3458                    VM_PAGE_CHECK(m);
3459 #endif
3460                    /*
3461                     * give back the activity_in_progress reference we
3462                     * took when we queued up this page and replace it
3463                     * it with a paging_in_progress reference that will
3464                     * also hold the paging offset from changing and
3465                     * prevent the object from terminating
3466                     */
3467                    vm_object_activity_end(object);
3468                    vm_object_paging_begin(object);
3469                    vm_object_unlock(object);
3470
3471                    /*
3472                     * Send the data to the pager.
3473                     * any pageout clustering happens there
3474                     */
3475                    memory_object_data_return(pager,
3476                                              m->vmp_offset + object->paging_offset,
3477                                              PAGE_SIZE,
3478                                              NULL,
3479                                              NULL,
3480                                              FALSE,
3481                                              FALSE,
3482                                              0);
3483
3484                    vm_object_lock(object);
3485                    vm_object_paging_end(object);
3486                    vm_object_unlock(object);
3487
3488                    vm_pageout_io_throttle();
3489
3490                    vm_page_lockspin_queues();
3491         }
3492         q->pgo_busy = FALSE;
3493         q->pgo_idle = TRUE;
3494
3495         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3496         vm_page_unlock_queues();
3497
3498         thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3499         /*NOTREACHED*/
3500 }
3501
3502
3503 #define         MAX_FREE_BATCH          32
3504 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3505                                      * this thread.
3506                                      */
3507
3508
3509 void
3510 vm_pageout_iothread_internal_continue(struct cq *);
3511 void
3512 vm_pageout_iothread_internal_continue(struct cq *cq)
3513 {
3514         struct vm_pageout_queue *q;
3515         vm_page_t       m = NULL;
3516         boolean_t       pgo_draining;
3517         vm_page_t   local_q;
3518         int         local_cnt;
3519         vm_page_t   local_freeq = NULL;
3520         int         local_freed = 0;
3521         int         local_batch_size;
3522 #if DEVELOPMENT || DEBUG
3523         int       ncomps = 0;
3524         boolean_t marked_active = FALSE;
3525 #endif
3526         KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3527
3528         q = cq->q;
3529         local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
3530
3531 #if RECORD_THE_COMPRESSED_DATA
3532         if (q->pgo_laundry)
3533                 c_compressed_record_init();
3534 #endif
3535         while (TRUE) {
3536                 int     pages_left_on_q = 0;
3537
3538                 local_cnt = 0;
3539                 local_q = NULL;
3540
3541                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3542
3543                 vm_page_lock_queues();
3544 #if DEVELOPMENT || DEBUG
3545                 if (marked_active == FALSE) {
3546                         vmct_active++;
3547                         vmct_state[cq->id] = VMCT_ACTIVE;
3548                         marked_active = TRUE;
3549                         if (vmct_active == 1) {
3550                                 vm_compressor_epoch_start = mach_absolute_time();
3551                         }
3552                 }
3553 #endif
3554                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3555
3556                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3557
3558                 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3559
3560                         vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3561                         assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3562                         VM_PAGE_CHECK(m);
3563
3564                         m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3565                         VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3566                         m->vmp_laundry = FALSE;
3567
3568                         m->vmp_snext = local_q;
3569                         local_q = m;
3570                         local_cnt++;
3571                 }
3572                 if (local_q == NULL)
3573                         break;
3574
3575                 q->pgo_busy = TRUE;
3576
3577                 if ((pgo_draining = q->pgo_draining) == FALSE) {
3578                         vm_pageout_throttle_up_batch(q, local_cnt);
3579                         pages_left_on_q = q->pgo_laundry;
3580                 } else
3581                         pages_left_on_q = q->pgo_laundry - local_cnt;
3582
3583                 vm_page_unlock_queues();
3584
3585 #if !RECORD_THE_COMPRESSED_DATA
3586                 if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
3587                         thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3588                 }
3589 #endif
3590                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3591
3592                 while (local_q) {
3593
3594                         KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3595
3596                         m = local_q;
3597                         local_q = m->vmp_snext;
3598                         m->vmp_snext = NULL;
3599
3600                         if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m) == KERN_SUCCESS) {
3601 #if DEVELOPMENT || DEBUG
3602                                 ncomps++;
3603 #endif
3604                                 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_END, local_cnt, 0, 0, 0, 0);
3605
3606                                 m->vmp_snext = local_freeq;
3607                                 local_freeq = m;
3608                                 local_freed++;
3609
3610                                 if (local_freed >= MAX_FREE_BATCH) {
3611
3612                                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3613
3614                                         vm_page_free_list(local_freeq, TRUE);
3615
3616                                         local_freeq = NULL;
3617                                         local_freed = 0;
3618                                 }
3619                         }
3620 #if !CONFIG_JETSAM
3621                         while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3622                                 kern_return_t   wait_result;
3623                                 int             need_wakeup = 0;
3624
3625                                 if (local_freeq) {
3626                                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3627
3628                                         vm_page_free_list(local_freeq, TRUE);
3629                                         local_freeq = NULL;
3630                                         local_freed = 0;
3631
3632                                         continue;
3633                                 }
3634                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3635
3636                                 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3637
3638                                         if (vm_page_free_wanted_privileged++ == 0)
3639                                                 need_wakeup = 1;
3640                                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3641
3642                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3643
3644                                         if (need_wakeup)
3645                                                 thread_wakeup((event_t)&vm_page_free_wanted);
3646
3647                                         if (wait_result == THREAD_WAITING)
3648
3649                                                 thread_block(THREAD_CONTINUE_NULL);
3650                                 } else
3651                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3652                         }
3653 #endif
3654                 }
3655                 if (local_freeq) {
3656                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3657
3658                         vm_page_free_list(local_freeq, TRUE);
3659                         local_freeq = NULL;
3660                         local_freed = 0;
3661                 }
3662                 if (pgo_draining == TRUE) {
3663                         vm_page_lockspin_queues();
3664                         vm_pageout_throttle_up_batch(q, local_cnt);
3665                         vm_page_unlock_queues();
3666                 }
3667         }
3668         KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3669
3670         /*
3671          * queue lock is held and our q is empty
3672          */
3673         q->pgo_busy = FALSE;
3674         q->pgo_idle = TRUE;
3675
3676         assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3677 #if DEVELOPMENT || DEBUG
3678         if (marked_active == TRUE) {
3679                 vmct_active--;
3680                 vmct_state[cq->id] = VMCT_IDLE;
3681
3682                 if (vmct_active == 0) {
3683                         vm_compressor_epoch_stop = mach_absolute_time();
3684                         assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
3685                             "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
3686                             vm_compressor_epoch_start, vm_compressor_epoch_stop);
3687                         /* This interval includes intervals where one or more
3688                          * compressor threads were pre-empted
3689                          */
3690                         vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
3691                 }
3692         }
3693 #endif
3694         vm_page_unlock_queues();
3695 #if DEVELOPMENT || DEBUG
3696         if (__improbable(vm_compressor_time_thread)) {
3697                 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3698                 vmct_stats.vmct_pages[cq->id] += ncomps;
3699                 vmct_stats.vmct_iterations[cq->id]++;
3700                 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3701                         vmct_stats.vmct_maxpages[cq->id] = ncomps;
3702                 }
3703                 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3704                         vmct_stats.vmct_minpages[cq->id] = ncomps;
3705                 }
3706         }
3707 #endif
3708
3709         KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3710
3711         thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3712         /*NOTREACHED*/
3713 }
3714
3715
3716 kern_return_t
3717 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
3718 {
3719         vm_object_t     object;
3720         memory_object_t pager;
3721         int             compressed_count_delta;
3722         kern_return_t   retval;
3723
3724         object = VM_PAGE_OBJECT(m);
3725
3726         assert(!m->vmp_free_when_done);
3727         assert(!m->vmp_laundry);
3728
3729         pager = object->pager;
3730
3731         if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)  {
3732
3733                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3734
3735                 vm_object_lock(object);
3736
3737                 /*
3738                  * If there is no memory object for the page, create
3739                  * one and hand it to the compression pager.
3740                  */
3741
3742                 if (!object->pager_initialized)
3743                         vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3744                 if (!object->pager_initialized)
3745                         vm_object_compressor_pager_create(object);
3746
3747                 pager = object->pager;
3748
3749                 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3750                         /*
3751                          * Still no pager for the object,
3752                          * or the pager has been destroyed.
3753                          * Reactivate the page.
3754                          *
3755                          * Should only happen if there is no
3756                          * compression pager
3757                          */
3758                         PAGE_WAKEUP_DONE(m);
3759
3760                         vm_page_lockspin_queues();
3761                         vm_page_activate(m);
3762                         VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
3763                         vm_page_unlock_queues();
3764
3765                         /*
3766                          *      And we are done with it.
3767                          */
3768                         vm_object_activity_end(object);
3769                         vm_object_unlock(object);
3770
3771                         return KERN_FAILURE;
3772                 }
3773                 vm_object_unlock(object);
3774
3775                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3776         }
3777         assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3778         assert(object->activity_in_progress > 0);
3779
3780         retval = vm_compressor_pager_put(
3781                 pager,
3782                 m->vmp_offset + object->paging_offset,
3783                 VM_PAGE_GET_PHYS_PAGE(m),
3784                 current_chead,
3785                 scratch_buf,
3786                 &compressed_count_delta);
3787
3788         vm_object_lock(object);
3789
3790         assert(object->activity_in_progress > 0);
3791         assert(VM_PAGE_OBJECT(m) == object);
3792         assert( !VM_PAGE_WIRED(m));
3793
3794         vm_compressor_pager_count(pager,
3795                                   compressed_count_delta,
3796                                   FALSE, /* shared_lock */
3797                                   object);
3798
3799         if (retval == KERN_SUCCESS) {
3800                 /*
3801                  * If the object is purgeable, its owner's
3802                  * purgeable ledgers will be updated in
3803                  * vm_page_remove() but the page still
3804                  * contributes to the owner's memory footprint,
3805                  * so account for it as such.
3806                  */
3807                 if ((object->purgable != VM_PURGABLE_DENY ||
3808                      object->vo_ledger_tag) &&
3809                     object->vo_owner != NULL) {
3810                         /* one more compressed purgeable/tagged page */
3811                         vm_object_owner_compressed_update(object,
3812                                                           +1);
3813                 }
3814                 VM_STAT_INCR(compressions);
3815
3816                 if (m->vmp_tabled)
3817                         vm_page_remove(m, TRUE);
3818
3819         } else {
3820                 PAGE_WAKEUP_DONE(m);
3821
3822                 vm_page_lockspin_queues();
3823
3824                 vm_page_activate(m);
3825                 vm_pageout_vminfo.vm_compressor_failed++;
3826
3827                 vm_page_unlock_queues();
3828         }
3829         vm_object_activity_end(object);
3830         vm_object_unlock(object);
3831
3832         return retval;
3833 }
3834
3835
3836 static void
3837 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3838 {
3839         uint32_t        policy;
3840
3841         if (hibernate_cleaning_in_progress == TRUE)
3842                 req_lowpriority = FALSE;
3843
3844         if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3845
3846                 vm_page_unlock_queues();
3847
3848                 if (req_lowpriority == TRUE) {
3849                         policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3850                         DTRACE_VM(laundrythrottle);
3851                 } else {
3852                         policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3853                         DTRACE_VM(laundryunthrottle);
3854                 }
3855                 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
3856                                                 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3857
3858                 eq->pgo_lowpriority = req_lowpriority;
3859
3860                 vm_page_lock_queues();
3861         }
3862 }
3863
3864
3865 static void
3866 vm_pageout_iothread_external(void)
3867 {
3868         thread_t        self = current_thread();
3869
3870         self->options |= TH_OPT_VMPRIV;
3871
3872         DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3873
3874         proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
3875                                TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3876
3877         vm_page_lock_queues();
3878
3879         vm_pageout_queue_external.pgo_tid = self->thread_id;
3880         vm_pageout_queue_external.pgo_lowpriority = TRUE;
3881         vm_pageout_queue_external.pgo_inited = TRUE;
3882
3883         vm_page_unlock_queues();
3884
3885         vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3886
3887         /*NOTREACHED*/
3888 }
3889
3890
3891 static void
3892 vm_pageout_iothread_internal(struct cq *cq)
3893 {
3894         thread_t        self = current_thread();
3895
3896         self->options |= TH_OPT_VMPRIV;
3897
3898         vm_page_lock_queues();
3899
3900         vm_pageout_queue_internal.pgo_tid = self->thread_id;
3901         vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3902         vm_pageout_queue_internal.pgo_inited = TRUE;
3903
3904         vm_page_unlock_queues();
3905
3906         if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
3907                 thread_vm_bind_group_add();
3908
3909
3910         thread_set_thread_name(current_thread(), "VM_compressor");
3911 #if DEVELOPMENT || DEBUG
3912         vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
3913 #endif
3914         vm_pageout_iothread_internal_continue(cq);
3915
3916         /*NOTREACHED*/
3917 }
3918
3919 kern_return_t
3920 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3921 {
3922         if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3923                 return KERN_SUCCESS;
3924         } else {
3925                 return KERN_FAILURE; /* Already set */
3926         }
3927 }
3928
3929 extern boolean_t        memorystatus_manual_testing_on;
3930 extern unsigned int     memorystatus_level;
3931
3932
3933 #if VM_PRESSURE_EVENTS
3934
3935 boolean_t vm_pressure_events_enabled = FALSE;
3936
3937 void
3938 vm_pressure_response(void)
3939 {
3940
3941         vm_pressure_level_t     old_level = kVMPressureNormal;
3942         int                     new_level = -1;
3943         unsigned int            total_pages;
3944         uint64_t                available_memory = 0;
3945
3946         if (vm_pressure_events_enabled == FALSE)
3947                 return;
3948
3949 #if CONFIG_EMBEDDED
3950
3951         available_memory = (uint64_t) memorystatus_available_pages;
3952
3953 #else /* CONFIG_EMBEDDED */
3954
3955         available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3956         memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3957
3958 #endif /* CONFIG_EMBEDDED */
3959
3960         total_pages = (unsigned int) atop_64(max_mem);
3961 #if CONFIG_SECLUDED_MEMORY
3962         total_pages -= vm_page_secluded_count;
3963 #endif /* CONFIG_SECLUDED_MEMORY */
3964         memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
3965
3966         if (memorystatus_manual_testing_on) {
3967                 return;
3968         }
3969
3970         old_level = memorystatus_vm_pressure_level;
3971
3972         switch (memorystatus_vm_pressure_level) {
3973
3974                 case kVMPressureNormal:
3975                 {
3976                         if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3977                                 new_level = kVMPressureCritical;
3978                         }  else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3979                                 new_level = kVMPressureWarning;
3980                         }
3981                         break;
3982                 }
3983
3984                 case kVMPressureWarning:
3985                 case kVMPressureUrgent:
3986                 {
3987                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3988                                 new_level = kVMPressureNormal;
3989                         }  else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3990                                 new_level = kVMPressureCritical;
3991                         }
3992                         break;
3993                 }
3994
3995                 case kVMPressureCritical:
3996                 {
3997                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
3998                                 new_level = kVMPressureNormal;
3999                         }  else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4000                                 new_level = kVMPressureWarning;
4001                         }
4002                         break;
4003                 }
4004
4005                 default:
4006                         return;
4007         }
4008
4009         if (new_level != -1) {
4010                 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4011
4012                 if (new_level != old_level) {
4013                         VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4014                                         new_level, old_level, 0, 0);
4015                 }
4016
4017                 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level)) {
4018                         if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4019                                 thread_wakeup(&vm_pressure_thread);
4020                         }
4021
4022                         if (old_level != memorystatus_vm_pressure_level) {
4023                                 thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4024                         }
4025                 }
4026         }
4027
4028 }
4029 #endif /* VM_PRESSURE_EVENTS */
4030
4031 kern_return_t
4032 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4033
4034 #if CONFIG_EMBEDDED
4035
4036         return KERN_FAILURE;
4037
4038 #elif !VM_PRESSURE_EVENTS
4039
4040         return KERN_FAILURE;
4041
4042 #else /* VM_PRESSURE_EVENTS */
4043
4044         kern_return_t   kr = KERN_SUCCESS;
4045
4046         if (pressure_level != NULL) {
4047
4048                 vm_pressure_level_t     old_level = memorystatus_vm_pressure_level;
4049
4050                 if (wait_for_pressure == TRUE) {
4051                         wait_result_t           wr = 0;
4052
4053                         while (old_level == *pressure_level) {
4054                                 wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4055                                                  THREAD_INTERRUPTIBLE);
4056                                 if (wr == THREAD_WAITING) {
4057                                         wr = thread_block(THREAD_CONTINUE_NULL);
4058                                 }
4059                                 if (wr == THREAD_INTERRUPTED) {
4060                                         return KERN_ABORTED;
4061                                 }
4062                                 if (wr == THREAD_AWAKENED) {
4063
4064                                         old_level = memorystatus_vm_pressure_level;
4065
4066                                         if (old_level != *pressure_level) {
4067                                                 break;
4068                                         }
4069                                 }
4070                         }
4071                 }
4072
4073                 *pressure_level = old_level;
4074                 kr = KERN_SUCCESS;
4075         } else {
4076                 kr = KERN_INVALID_ARGUMENT;
4077         }
4078
4079         return kr;
4080 #endif /* VM_PRESSURE_EVENTS */
4081 }
4082
4083 #if VM_PRESSURE_EVENTS
4084 void
4085 vm_pressure_thread(void) {
4086         static boolean_t thread_initialized = FALSE;
4087
4088         if (thread_initialized == TRUE) {
4089                 vm_pageout_state.vm_pressure_thread_running = TRUE;
4090                 consider_vm_pressure_events();
4091                 vm_pageout_state.vm_pressure_thread_running = FALSE;
4092         }
4093
4094         thread_set_thread_name(current_thread(), "VM_pressure");
4095         thread_initialized = TRUE;
4096         assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4097         thread_block((thread_continue_t)vm_pressure_thread);
4098 }
4099 #endif /* VM_PRESSURE_EVENTS */
4100
4101
4102 /*
4103  * called once per-second via "compute_averages"
4104  */
4105 void
4106 compute_pageout_gc_throttle(__unused void *arg)
4107 {
4108         if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4109
4110                 vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4111
4112                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4113         }
4114 }
4115
4116 /*
4117  * vm_pageout_garbage_collect can also be called when the zone allocator needs
4118  * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4119  * jetsams. We need to check if the zone map size is above its jetsam limit to
4120  * decide if this was indeed the case.
4121  *
4122  * We need to do this on a different thread because of the following reasons:
4123  *
4124  * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4125  * itself causing the system to hang. We perform synchronous jetsams if we're
4126  * leaking in the VM map entries zone, so the leaking process could be doing a
4127  * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4128  * jetsam itself. We also need the vm_map lock on the process termination path,
4129  * which would now lead the dying process to deadlock against itself.
4130  *
4131  * 2. The jetsam path might need to allocate zone memory itself. We could try
4132  * using the non-blocking variant of zalloc for this path, but we can still
4133  * end up trying to do a kernel_memory_allocate when the zone_map is almost
4134  * full.
4135  */
4136
4137 extern boolean_t is_zone_map_nearing_exhaustion(void);
4138
4139 void
4140 vm_pageout_garbage_collect(int collect)
4141 {
4142         if (collect) {
4143                 if (is_zone_map_nearing_exhaustion()) {
4144                         /*
4145                          * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4146                          *
4147                          * Bail out after calling zone_gc (which triggers the
4148                          * zone-map-exhaustion jetsams). If we fall through, the subsequent
4149                          * operations that clear out a bunch of caches might allocate zone
4150                          * memory themselves (for eg. vm_map operations would need VM map
4151                          * entries). Since the zone map is almost full at this point, we
4152                          * could end up with a panic. We just need to quickly jetsam a
4153                          * process and exit here.
4154                          *
4155                          * It could so happen that we were woken up to relieve memory
4156                          * pressure and the zone map also happened to be near its limit at
4157                          * the time, in which case we'll skip out early. But that should be
4158                          * ok; if memory pressure persists, the thread will simply be woken
4159                          * up again.
4160                          */
4161                         consider_zone_gc(TRUE);
4162
4163                 } else {
4164                         /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4165                         boolean_t buf_large_zfree = FALSE;
4166                         boolean_t first_try = TRUE;
4167
4168                         stack_collect();
4169
4170                         consider_machine_collect();
4171                         mbuf_drain(FALSE);
4172
4173                         do {
4174                                 if (consider_buffer_cache_collect != NULL) {
4175                                         buf_large_zfree = (*consider_buffer_cache_collect)(0);
4176                                 }
4177                                 if (first_try == TRUE || buf_large_zfree == TRUE) {
4178                                         /*
4179                                          * consider_zone_gc should be last, because the other operations
4180                                          * might return memory to zones.
4181                                          */
4182                                         consider_zone_gc(FALSE);
4183                                 }
4184                                 first_try = FALSE;
4185
4186                         } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4187
4188                         consider_machine_adjust();
4189                 }
4190         }
4191
4192         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4193
4194         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4195         /*NOTREACHED*/
4196 }
4197
4198
4199 #if VM_PAGE_BUCKETS_CHECK
4200 #if VM_PAGE_FAKE_BUCKETS
4201 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4202 #endif /* VM_PAGE_FAKE_BUCKETS */
4203 #endif /* VM_PAGE_BUCKETS_CHECK */
4204
4205
4206
4207 void
4208 vm_set_restrictions()
4209 {
4210         host_basic_info_data_t hinfo;
4211         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4212
4213 #define BSD_HOST 1
4214         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4215
4216         assert(hinfo.max_cpus > 0);
4217
4218         if (hinfo.max_cpus <= 3) {
4219                 /*
4220                  * on systems with a limited number of CPUS, bind the
4221                  * 4 major threads that can free memory and that tend to use
4222                  * a fair bit of CPU under pressured conditions to a single processor.
4223                  * This insures that these threads don't hog all of the available CPUs
4224                  * (important for camera launch), while allowing them to run independently
4225                  * w/r to locks... the 4 threads are
4226                  * vm_pageout_scan,  vm_pageout_iothread_internal (compressor),
4227                  * vm_compressor_swap_trigger_thread (minor and major compactions),
4228                  * memorystatus_thread (jetsams).
4229                  *
4230                  * the first time the thread is run, it is responsible for checking the
4231                  * state of vm_restricted_to_single_processor, and if TRUE it calls
4232                  * thread_bind_master...  someday this should be replaced with a group
4233                  * scheduling mechanism and KPI.
4234                  */
4235                 vm_pageout_state.vm_restricted_to_single_processor = TRUE;
4236         } else
4237                 vm_pageout_state.vm_restricted_to_single_processor = FALSE;
4238 }
4239
4240 void
4241 vm_pageout(void)
4242 {
4243         thread_t        self = current_thread();
4244         thread_t        thread;
4245         kern_return_t   result;
4246         spl_t           s;
4247
4248         /*
4249          * Set thread privileges.
4250          */
4251         s = splsched();
4252
4253         thread_lock(self);
4254         self->options |= TH_OPT_VMPRIV;
4255         sched_set_thread_base_priority(self, BASEPRI_VM);
4256         thread_unlock(self);
4257
4258         if (!self->reserved_stack)
4259                 self->reserved_stack = self->kernel_stack;
4260
4261         if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
4262                 thread_vm_bind_group_add();
4263
4264         splx(s);
4265
4266         thread_set_thread_name(current_thread(), "VM_pageout_scan");
4267
4268         /*
4269          *      Initialize some paging parameters.
4270          */
4271
4272         vm_pageout_state.vm_pressure_thread_running = FALSE;
4273         vm_pageout_state.vm_pressure_changed = FALSE;
4274         vm_pageout_state.memorystatus_purge_on_warning = 2;
4275         vm_pageout_state.memorystatus_purge_on_urgent = 5;
4276         vm_pageout_state.memorystatus_purge_on_critical = 8;
4277         vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
4278         vm_pageout_state.vm_page_speculative_percentage = 5;
4279         vm_pageout_state.vm_page_speculative_target = 0;
4280
4281         vm_pageout_state.vm_pageout_external_iothread = THREAD_NULL;
4282         vm_pageout_state.vm_pageout_internal_iothread = THREAD_NULL;
4283
4284         vm_pageout_state.vm_pageout_swap_wait = 0;
4285         vm_pageout_state.vm_pageout_idle_wait = 0;
4286         vm_pageout_state.vm_pageout_empty_wait = 0;
4287         vm_pageout_state.vm_pageout_burst_wait = 0;
4288         vm_pageout_state.vm_pageout_deadlock_wait = 0;
4289         vm_pageout_state.vm_pageout_deadlock_relief = 0;
4290         vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
4291
4292         vm_pageout_state.vm_pageout_inactive = 0;
4293         vm_pageout_state.vm_pageout_inactive_used = 0;
4294         vm_pageout_state.vm_pageout_inactive_clean = 0;
4295
4296         vm_pageout_state.vm_memory_pressure = 0;
4297         vm_pageout_state.vm_page_filecache_min = 0;
4298 #if CONFIG_JETSAM
4299         vm_pageout_state.vm_page_filecache_min_divisor = 70;
4300         vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
4301 #else
4302         vm_pageout_state.vm_page_filecache_min_divisor = 27;
4303         vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
4304 #endif
4305         vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
4306
4307         vm_pageout_state.vm_pageout_considered_page_last = 0;
4308
4309         if (vm_pageout_state.vm_pageout_swap_wait == 0)
4310                 vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4311
4312         if (vm_pageout_state.vm_pageout_idle_wait == 0)
4313                 vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4314
4315         if (vm_pageout_state.vm_pageout_burst_wait == 0)
4316                 vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4317
4318         if (vm_pageout_state.vm_pageout_empty_wait == 0)
4319                 vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4320
4321         if (vm_pageout_state.vm_pageout_deadlock_wait == 0)
4322                 vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4323
4324         if (vm_pageout_state.vm_pageout_deadlock_relief == 0)
4325                 vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4326
4327         if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0)
4328                 vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4329         /*
4330          * even if we've already called vm_page_free_reserve
4331          * call it again here to insure that the targets are
4332          * accurately calculated (it uses vm_page_free_count_init)
4333          * calling it with an arg of 0 will not change the reserve
4334          * but will re-calculate free_min and free_target
4335          */
4336         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4337                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4338         } else
4339                 vm_page_free_reserve(0);
4340
4341
4342         vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4343         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4344         vm_pageout_queue_external.pgo_laundry = 0;
4345         vm_pageout_queue_external.pgo_idle = FALSE;
4346         vm_pageout_queue_external.pgo_busy = FALSE;
4347         vm_pageout_queue_external.pgo_throttled = FALSE;
4348         vm_pageout_queue_external.pgo_draining = FALSE;
4349         vm_pageout_queue_external.pgo_lowpriority = FALSE;
4350         vm_pageout_queue_external.pgo_tid = -1;
4351         vm_pageout_queue_external.pgo_inited = FALSE;
4352
4353         vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4354         vm_pageout_queue_internal.pgo_maxlaundry = 0;
4355         vm_pageout_queue_internal.pgo_laundry = 0;
4356         vm_pageout_queue_internal.pgo_idle = FALSE;
4357         vm_pageout_queue_internal.pgo_busy = FALSE;
4358         vm_pageout_queue_internal.pgo_throttled = FALSE;
4359         vm_pageout_queue_internal.pgo_draining = FALSE;
4360         vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4361         vm_pageout_queue_internal.pgo_tid = -1;
4362         vm_pageout_queue_internal.pgo_inited = FALSE;
4363
4364         /* internal pageout thread started when default pager registered first time */
4365         /* external pageout and garbage collection threads started here */
4366
4367         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4368                                               BASEPRI_VM,
4369                                               &vm_pageout_state.vm_pageout_external_iothread);
4370         if (result != KERN_SUCCESS)
4371                 panic("vm_pageout_iothread_external: create failed");
4372
4373         thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
4374
4375         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4376                                               BASEPRI_DEFAULT,
4377                                               &thread);
4378         if (result != KERN_SUCCESS)
4379                 panic("vm_pageout_garbage_collect: create failed");
4380
4381         thread_deallocate(thread);
4382
4383 #if VM_PRESSURE_EVENTS
4384         result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4385                                                 BASEPRI_DEFAULT,
4386                                                 &thread);
4387
4388         if (result != KERN_SUCCESS)
4389                 panic("vm_pressure_thread: create failed");
4390
4391         thread_deallocate(thread);
4392 #endif
4393
4394         vm_object_reaper_init();
4395
4396
4397         bzero(&vm_config, sizeof(vm_config));
4398
4399         switch(vm_compressor_mode) {
4400
4401         case VM_PAGER_DEFAULT:
4402                 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4403
4404         case VM_PAGER_COMPRESSOR_WITH_SWAP:
4405                 vm_config.compressor_is_present = TRUE;
4406                 vm_config.swap_is_present = TRUE;
4407                 vm_config.compressor_is_active = TRUE;
4408                 vm_config.swap_is_active = TRUE;
4409                 break;
4410
4411         case VM_PAGER_COMPRESSOR_NO_SWAP:
4412                 vm_config.compressor_is_present = TRUE;
4413                 vm_config.swap_is_present = TRUE;
4414                 vm_config.compressor_is_active = TRUE;
4415                 break;
4416
4417         case VM_PAGER_FREEZER_DEFAULT:
4418                 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4419
4420         case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4421                 vm_config.compressor_is_present = TRUE;
4422                 vm_config.swap_is_present = TRUE;
4423                 break;
4424
4425         case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4426                 vm_config.compressor_is_present = TRUE;
4427                 vm_config.swap_is_present = TRUE;
4428                 vm_config.compressor_is_active = TRUE;
4429                 vm_config.freezer_swap_is_active = TRUE;
4430                 break;
4431
4432         case VM_PAGER_NOT_CONFIGURED:
4433                 break;
4434
4435         default:
4436                 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4437                 break;
4438         }
4439         if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4440                 vm_compressor_pager_init();
4441
4442 #if VM_PRESSURE_EVENTS
4443         vm_pressure_events_enabled = TRUE;
4444 #endif /* VM_PRESSURE_EVENTS */
4445
4446 #if CONFIG_PHANTOM_CACHE
4447         vm_phantom_cache_init();
4448 #endif
4449 #if VM_PAGE_BUCKETS_CHECK
4450 #if VM_PAGE_FAKE_BUCKETS
4451         printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4452                (uint64_t) vm_page_fake_buckets_start,
4453                (uint64_t) vm_page_fake_buckets_end);
4454         pmap_protect(kernel_pmap,
4455                      vm_page_fake_buckets_start,
4456                      vm_page_fake_buckets_end,
4457                      VM_PROT_READ);
4458 //      *(char *) vm_page_fake_buckets_start = 'x';     /* panic! */
4459 #endif /* VM_PAGE_FAKE_BUCKETS */
4460 #endif /* VM_PAGE_BUCKETS_CHECK */
4461
4462 #if VM_OBJECT_TRACKING
4463         vm_object_tracking_init();
4464 #endif /* VM_OBJECT_TRACKING */
4465
4466         vm_tests();
4467
4468         vm_pageout_continue();
4469
4470         /*
4471          * Unreached code!
4472          *
4473          * The vm_pageout_continue() call above never returns, so the code below is never
4474          * executed.  We take advantage of this to declare several DTrace VM related probe
4475          * points that our kernel doesn't have an analog for.  These are probe points that
4476          * exist in Solaris and are in the DTrace documentation, so people may have written
4477          * scripts that use them.  Declaring the probe points here means their scripts will
4478          * compile and execute which we want for portability of the scripts, but since this
4479          * section of code is never reached, the probe points will simply never fire.  Yes,
4480          * this is basically a hack.  The problem is the DTrace probe points were chosen with
4481          * Solaris specific VM events in mind, not portability to different VM implementations.
4482          */
4483
4484         DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4485         DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4486         DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4487         DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4488         DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4489         DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4490         DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4491         /*NOTREACHED*/
4492 }
4493
4494
4495
4496 kern_return_t
4497 vm_pageout_internal_start(void)
4498 {
4499         kern_return_t   result;
4500         int             i;
4501         host_basic_info_data_t hinfo;
4502
4503         assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4504
4505         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4506 #define BSD_HOST 1
4507         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4508
4509         assert(hinfo.max_cpus > 0);
4510
4511 #if CONFIG_EMBEDDED
4512         vm_pageout_state.vm_compressor_thread_count = 1;
4513 #else
4514         if (hinfo.max_cpus > 4)
4515                 vm_pageout_state.vm_compressor_thread_count = 2;
4516         else
4517                 vm_pageout_state.vm_compressor_thread_count = 1;
4518 #endif
4519         PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
4520                            sizeof(vm_pageout_state.vm_compressor_thread_count));
4521
4522         if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus)
4523                 vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
4524         if (vm_pageout_state.vm_compressor_thread_count <= 0)
4525                 vm_pageout_state.vm_compressor_thread_count = 1;
4526         else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4527                 vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4528
4529         vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4530
4531         PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4532
4533         for (i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4534                 ciq[i].id = i;
4535                 ciq[i].q = &vm_pageout_queue_internal;
4536                 ciq[i].current_chead = NULL;
4537                 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4538
4539                 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
4540                                                       BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
4541
4542                 if (result == KERN_SUCCESS)
4543                         thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
4544                 else
4545                         break;
4546         }
4547         return result;
4548 }
4549
4550 #if CONFIG_IOSCHED
4551 /*
4552  * To support I/O Expedite for compressed files we mark the upls with special flags.
4553  * The way decmpfs works is that we create a big upl which marks all the pages needed to
4554  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4555  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4556  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4557  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4558  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4559  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4560  * unless the real I/O upl is being destroyed).
4561  */
4562
4563
4564 static void
4565 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4566 {
4567         assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4568
4569         upl_lock(src_upl);
4570         if (src_upl->decmp_io_upl) {
4571                 /*
4572                  * If there is already an alive real I/O UPL, ignore this new UPL.
4573                  * This case should rarely happen and even if it does, it just means
4574                  * that we might issue a spurious expedite which the driver is expected
4575                  * to handle.
4576                  */
4577                 upl_unlock(src_upl);
4578                 return;
4579         }
4580         src_upl->decmp_io_upl = (void *)upl;
4581         src_upl->ref_count++;
4582
4583         upl->flags |= UPL_DECMP_REAL_IO;
4584         upl->decmp_io_upl = (void *)src_upl;
4585         upl_unlock(src_upl);
4586 }
4587 #endif /* CONFIG_IOSCHED */
4588
4589 #if UPL_DEBUG
4590 int     upl_debug_enabled = 1;
4591 #else
4592 int     upl_debug_enabled = 0;
4593 #endif
4594
4595 static upl_t
4596 upl_create(int type, int flags, upl_size_t size)
4597 {
4598         upl_t   upl;
4599         vm_size_t       page_field_size = 0;
4600         int     upl_flags = 0;
4601         vm_size_t       upl_size  = sizeof(struct upl);
4602
4603         size = round_page_32(size);
4604
4605         if (type & UPL_CREATE_LITE) {
4606                 page_field_size = (atop(size) + 7) >> 3;
4607                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4608
4609                 upl_flags |= UPL_LITE;
4610         }
4611         if (type & UPL_CREATE_INTERNAL) {
4612                 upl_size += sizeof(struct upl_page_info) * atop(size);
4613
4614                 upl_flags |= UPL_INTERNAL;
4615         }
4616         upl = (upl_t)kalloc(upl_size + page_field_size);
4617
4618         if (page_field_size)
4619                 bzero((char *)upl + upl_size, page_field_size);
4620
4621         upl->flags = upl_flags | flags;
4622         upl->kaddr = (vm_offset_t)0;
4623         upl->size = 0;
4624         upl->map_object = NULL;
4625         upl->ref_count = 1;
4626         upl->ext_ref_count = 0;
4627         upl->highest_page = 0;
4628         upl_lock_init(upl);
4629         upl->vector_upl = NULL;
4630         upl->associated_upl = NULL;
4631         upl->upl_iodone = NULL;
4632 #if CONFIG_IOSCHED
4633         if (type & UPL_CREATE_IO_TRACKING) {
4634                 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4635         }
4636
4637         upl->upl_reprio_info = 0;
4638         upl->decmp_io_upl = 0;
4639         if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4640                 /* Only support expedite on internal UPLs */
4641                 thread_t        curthread = current_thread();
4642                 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4643                 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4644                 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4645                 if (curthread->decmp_upl != NULL)
4646                         upl_set_decmp_info(upl, curthread->decmp_upl);
4647         }
4648 #endif
4649 #if CONFIG_IOSCHED || UPL_DEBUG
4650         if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4651                 upl->upl_creator = current_thread();
4652                 upl->uplq.next = 0;
4653                 upl->uplq.prev = 0;
4654                 upl->flags |= UPL_TRACKED_BY_OBJECT;
4655         }
4656 #endif
4657
4658 #if UPL_DEBUG
4659         upl->ubc_alias1 = 0;
4660         upl->ubc_alias2 = 0;
4661
4662         upl->upl_state = 0;
4663         upl->upl_commit_index = 0;
4664         bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4665
4666         (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4667 #endif /* UPL_DEBUG */
4668
4669         return(upl);
4670 }
4671
4672 static void
4673 upl_destroy(upl_t upl)
4674 {
4675         int     page_field_size;  /* bit field in word size buf */
4676         int     size;
4677
4678         if (upl->ext_ref_count) {
4679                 panic("upl(%p) ext_ref_count", upl);
4680         }
4681
4682 #if CONFIG_IOSCHED
4683         if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4684                 upl_t src_upl;
4685                 src_upl = upl->decmp_io_upl;
4686                 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4687                 upl_lock(src_upl);
4688                 src_upl->decmp_io_upl = NULL;
4689                 upl_unlock(src_upl);
4690                 upl_deallocate(src_upl);
4691         }
4692 #endif /* CONFIG_IOSCHED */
4693
4694 #if CONFIG_IOSCHED || UPL_DEBUG
4695         if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4696                 vm_object_t     object;
4697
4698                 if (upl->flags & UPL_SHADOWED) {
4699                         object = upl->map_object->shadow;
4700                 } else {
4701                         object = upl->map_object;
4702                 }
4703
4704                 vm_object_lock(object);
4705                 queue_remove(&object->uplq, upl, upl_t, uplq);
4706                 vm_object_activity_end(object);
4707                 vm_object_collapse(object, 0, TRUE);
4708                 vm_object_unlock(object);
4709         }
4710 #endif
4711         /*
4712          * drop a reference on the map_object whether or
4713          * not a pageout object is inserted
4714          */
4715         if (upl->flags & UPL_SHADOWED)
4716                 vm_object_deallocate(upl->map_object);
4717
4718         if (upl->flags & UPL_DEVICE_MEMORY)
4719                 size = PAGE_SIZE;
4720         else
4721                 size = upl->size;
4722         page_field_size = 0;
4723
4724         if (upl->flags & UPL_LITE) {
4725                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4726                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4727         }
4728         upl_lock_destroy(upl);
4729         upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4730
4731 #if CONFIG_IOSCHED
4732         if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4733                 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4734 #endif
4735
4736         if (upl->flags & UPL_INTERNAL) {
4737                 kfree(upl,
4738                       sizeof(struct upl) +
4739                       (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4740                       + page_field_size);
4741         } else {
4742                 kfree(upl, sizeof(struct upl) + page_field_size);
4743         }
4744 }
4745
4746 void
4747 upl_deallocate(upl_t upl)
4748 {
4749         upl_lock(upl);
4750
4751         if (--upl->ref_count == 0) {
4752                 if(vector_upl_is_valid(upl))
4753                         vector_upl_deallocate(upl);
4754                 upl_unlock(upl);
4755
4756                 if (upl->upl_iodone)
4757                         upl_callout_iodone(upl);
4758
4759                 upl_destroy(upl);
4760         } else
4761                 upl_unlock(upl);
4762 }
4763
4764 #if CONFIG_IOSCHED
4765 void
4766 upl_mark_decmp(upl_t upl)
4767 {
4768         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4769                 upl->flags |= UPL_DECMP_REQ;
4770                 upl->upl_creator->decmp_upl = (void *)upl;
4771         }
4772 }
4773
4774 void
4775 upl_unmark_decmp(upl_t upl)
4776 {
4777         if(upl && (upl->flags & UPL_DECMP_REQ)) {
4778                 upl->upl_creator->decmp_upl = NULL;
4779         }
4780 }
4781
4782 #endif /* CONFIG_IOSCHED */
4783
4784 #define VM_PAGE_Q_BACKING_UP(q)         \
4785         ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4786
4787 boolean_t must_throttle_writes(void);
4788
4789 boolean_t
4790 must_throttle_writes()
4791 {
4792         if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4793             vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4794                 return (TRUE);
4795
4796         return (FALSE);
4797 }
4798
4799
4800 /*
4801  *      Routine:        vm_object_upl_request
4802  *      Purpose:
4803  *              Cause the population of a portion of a vm_object.
4804  *              Depending on the nature of the request, the pages
4805  *              returned may be contain valid data or be uninitialized.
4806  *              A page list structure, listing the physical pages
4807  *              will be returned upon request.
4808  *              This function is called by the file system or any other
4809  *              supplier of backing store to a pager.
4810  *              IMPORTANT NOTE: The caller must still respect the relationship
4811  *              between the vm_object and its backing memory object.  The
4812  *              caller MUST NOT substitute changes in the backing file
4813  *              without first doing a memory_object_lock_request on the
4814  *              target range unless it is know that the pages are not
4815  *              shared with another entity at the pager level.
4816  *              Copy_in_to:
4817  *                      if a page list structure is present
4818  *                      return the mapped physical pages, where a
4819  *                      page is not present, return a non-initialized
4820  *                      one.  If the no_sync bit is turned on, don't
4821  *                      call the pager unlock to synchronize with other
4822  *                      possible copies of the page. Leave pages busy
4823  *                      in the original object, if a page list structure
4824  *                      was specified.  When a commit of the page list
4825  *                      pages is done, the dirty bit will be set for each one.
4826  *              Copy_out_from:
4827  *                      If a page list structure is present, return
4828  *                      all mapped pages.  Where a page does not exist
4829  *                      map a zero filled one. Leave pages busy in
4830  *                      the original object.  If a page list structure
4831  *                      is not specified, this call is a no-op.
4832  *
4833  *              Note:  access of default pager objects has a rather interesting
4834  *              twist.  The caller of this routine, presumably the file system
4835  *              page cache handling code, will never actually make a request
4836  *              against a default pager backed object.  Only the default
4837  *              pager will make requests on backing store related vm_objects
4838  *              In this way the default pager can maintain the relationship
4839  *              between backing store files (abstract memory objects) and
4840  *              the vm_objects (cache objects), they support.
4841  *
4842  */
4843
4844 __private_extern__ kern_return_t
4845 vm_object_upl_request(
4846         vm_object_t             object,
4847         vm_object_offset_t      offset,
4848         upl_size_t              size,
4849         upl_t                   *upl_ptr,
4850         upl_page_info_array_t   user_page_list,
4851         unsigned int            *page_list_count,
4852         upl_control_flags_t     cntrl_flags,
4853         vm_tag_t                tag)
4854 {
4855         vm_page_t               dst_page = VM_PAGE_NULL;
4856         vm_object_offset_t      dst_offset;
4857         upl_size_t              xfer_size;
4858         unsigned int            size_in_pages;
4859         boolean_t               dirty;
4860         boolean_t               hw_dirty;
4861         upl_t                   upl = NULL;
4862         unsigned int            entry;
4863         vm_page_t               alias_page = NULL;
4864         int                     refmod_state = 0;
4865         wpl_array_t             lite_list = NULL;
4866         vm_object_t             last_copy_object;
4867         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4868         struct  vm_page_delayed_work    *dwp;
4869         int                     dw_count;
4870         int                     dw_limit;
4871         int                     io_tracking_flag = 0;
4872         int                     grab_options;
4873         int                     page_grab_count = 0;
4874         ppnum_t                 phys_page;
4875         pmap_flush_context      pmap_flush_context_storage;
4876         boolean_t               pmap_flushes_delayed = FALSE;
4877
4878         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4879                 /*
4880                  * For forward compatibility's sake,
4881                  * reject any unknown flag.
4882                  */
4883                 return KERN_INVALID_VALUE;
4884         }
4885         if ( (!object->internal) && (object->paging_offset != 0) )
4886                 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4887         if (object->phys_contiguous)
4888                 panic("vm_object_upl_request: contiguous object specified\n");
4889
4890         VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
4891
4892         if (size > MAX_UPL_SIZE_BYTES)
4893                 size = MAX_UPL_SIZE_BYTES;
4894
4895         if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4896                 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4897
4898 #if CONFIG_IOSCHED || UPL_DEBUG
4899         if (object->io_tracking || upl_debug_enabled)
4900                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
4901 #endif
4902 #if CONFIG_IOSCHED
4903         if (object->io_tracking)
4904                 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
4905 #endif
4906
4907         if (cntrl_flags & UPL_SET_INTERNAL) {
4908                 if (cntrl_flags & UPL_SET_LITE) {
4909
4910                         upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4911
4912                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4913                         lite_list = (wpl_array_t)
4914                                         (((uintptr_t)user_page_list) +
4915                                         ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4916                         if (size == 0) {
4917                                 user_page_list = NULL;
4918                                 lite_list = NULL;
4919                         }
4920                 } else {
4921                         upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
4922
4923                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4924                         if (size == 0) {
4925                                 user_page_list = NULL;
4926                         }
4927                 }
4928         } else {
4929                 if (cntrl_flags & UPL_SET_LITE) {
4930
4931                         upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4932
4933                         lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4934                         if (size == 0) {
4935                                 lite_list = NULL;
4936                         }
4937                 } else {
4938                         upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
4939                 }
4940         }
4941         *upl_ptr = upl;
4942
4943         if (user_page_list)
4944                 user_page_list[0].device = FALSE;
4945
4946         if (cntrl_flags & UPL_SET_LITE) {
4947                 upl->map_object = object;
4948         } else {
4949                 upl->map_object = vm_object_allocate(size);
4950                 /*
4951                  * No neeed to lock the new object: nobody else knows
4952                  * about it yet, so it's all ours so far.
4953                  */
4954                 upl->map_object->shadow = object;
4955                 upl->map_object->pageout = TRUE;
4956                 upl->map_object->can_persist = FALSE;
4957                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4958                 upl->map_object->vo_shadow_offset = offset;
4959                 upl->map_object->wimg_bits = object->wimg_bits;
4960
4961                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4962
4963                 upl->flags |= UPL_SHADOWED;
4964         }
4965         if (cntrl_flags & UPL_FOR_PAGEOUT)
4966                 upl->flags |= UPL_PAGEOUT;
4967
4968         vm_object_lock(object);
4969         vm_object_activity_begin(object);
4970
4971         grab_options = 0;
4972 #if CONFIG_SECLUDED_MEMORY
4973         if (object->can_grab_secluded) {
4974                 grab_options |= VM_PAGE_GRAB_SECLUDED;
4975         }
4976 #endif /* CONFIG_SECLUDED_MEMORY */
4977
4978         /*
4979          * we can lock in the paging_offset once paging_in_progress is set
4980          */
4981         upl->size = size;
4982         upl->offset = offset + object->paging_offset;
4983
4984 #if CONFIG_IOSCHED || UPL_DEBUG
4985         if (object->io_tracking || upl_debug_enabled) {
4986                 vm_object_activity_begin(object);
4987                 queue_enter(&object->uplq, upl, upl_t, uplq);
4988         }
4989 #endif
4990         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
4991                 /*
4992                  * Honor copy-on-write obligations
4993                  *
4994                  * The caller is gathering these pages and
4995                  * might modify their contents.  We need to
4996                  * make sure that the copy object has its own
4997                  * private copies of these pages before we let
4998                  * the caller modify them.
4999                  */
5000                 vm_object_update(object,
5001                                  offset,
5002                                  size,
5003                                  NULL,
5004                                  NULL,
5005                                  FALSE, /* should_return */
5006                                  MEMORY_OBJECT_COPY_SYNC,
5007                                  VM_PROT_NO_CHANGE);
5008
5009                 VM_PAGEOUT_DEBUG(upl_cow, 1);
5010                 VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5011         }
5012         /*
5013          * remember which copy object we synchronized with
5014          */
5015         last_copy_object = object->copy;
5016         entry = 0;
5017
5018         xfer_size = size;
5019         dst_offset = offset;
5020         size_in_pages = size / PAGE_SIZE;
5021
5022         dwp = &dw_array[0];
5023         dw_count = 0;
5024         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5025
5026         if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5027             object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5028                 object->scan_collisions = 0;
5029
5030         if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5031                 boolean_t       isSSD = FALSE;
5032
5033 #if CONFIG_EMBEDDED
5034                 isSSD = TRUE;
5035 #else
5036                 vnode_pager_get_isSSD(object->pager, &isSSD);
5037 #endif
5038                 vm_object_unlock(object);
5039
5040                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5041
5042                 if (isSSD == TRUE)
5043                         delay(1000 * size_in_pages);
5044                 else
5045                         delay(5000 * size_in_pages);
5046                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5047
5048                 vm_object_lock(object);
5049         }
5050
5051         while (xfer_size) {
5052
5053                 dwp->dw_mask = 0;
5054
5055                 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5056                         vm_object_unlock(object);
5057                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
5058                         vm_object_lock(object);
5059                 }
5060                 if (cntrl_flags & UPL_COPYOUT_FROM) {
5061                         upl->flags |= UPL_PAGE_SYNC_DONE;
5062
5063                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5064                                 dst_page->vmp_fictitious ||
5065                                 dst_page->vmp_absent ||
5066                                 dst_page->vmp_error ||
5067                                 dst_page->vmp_cleaning ||
5068                                 (VM_PAGE_WIRED(dst_page))) {
5069
5070                                 if (user_page_list)
5071                                         user_page_list[entry].phys_addr = 0;
5072
5073                                 goto try_next_page;
5074                         }
5075                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5076
5077                         /*
5078                          * grab this up front...
5079                          * a high percentange of the time we're going to
5080                          * need the hardware modification state a bit later
5081                          * anyway... so we can eliminate an extra call into
5082                          * the pmap layer by grabbing it here and recording it
5083                          */
5084                         if (dst_page->vmp_pmapped)
5085                                 refmod_state = pmap_get_refmod(phys_page);
5086                         else
5087                                 refmod_state = 0;
5088
5089                         if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5090                                 /*
5091                                  * page is on inactive list and referenced...
5092                                  * reactivate it now... this gets it out of the
5093                                  * way of vm_pageout_scan which would have to
5094                                  * reactivate it upon tripping over it
5095                                  */
5096                                 dwp->dw_mask |= DW_vm_page_activate;
5097                         }
5098                         if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5099                                 /*
5100                                  * we're only asking for DIRTY pages to be returned
5101                                  */
5102                                 if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5103                                         /*
5104                                          * if we were the page stolen by vm_pageout_scan to be
5105                                          * cleaned (as opposed to a buddy being clustered in
5106                                          * or this request is not being driven by a PAGEOUT cluster
5107                                          * then we only need to check for the page being dirty or
5108                                          * precious to decide whether to return it
5109                                          */
5110                                         if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED))
5111                                                 goto check_busy;
5112                                         goto dont_return;
5113                                 }
5114                                 /*
5115                                  * this is a request for a PAGEOUT cluster and this page
5116                                  * is merely along for the ride as a 'buddy'... not only
5117                                  * does it have to be dirty to be returned, but it also
5118                                  * can't have been referenced recently...
5119                                  */
5120                                 if ( (hibernate_cleaning_in_progress == TRUE ||
5121                                       (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
5122                                        (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5123                                      ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious) ) {
5124                                         goto check_busy;
5125                                 }
5126 dont_return:
5127                                 /*
5128                                  * if we reach here, we're not to return
5129                                  * the page... go on to the next one
5130                                  */
5131                                 if (dst_page->vmp_laundry == TRUE) {
5132                                         /*
5133                                          * if we get here, the page is not 'cleaning' (filtered out above).
5134                                          * since it has been referenced, remove it from the laundry
5135                                          * so we don't pay the cost of an I/O to clean a page
5136                                          * we're just going to take back
5137                                          */
5138                                         vm_page_lockspin_queues();
5139
5140                                         vm_pageout_steal_laundry(dst_page, TRUE);
5141                                         vm_page_activate(dst_page);
5142
5143                                         vm_page_unlock_queues();
5144                                 }
5145                                 if (user_page_list)
5146                                         user_page_list[entry].phys_addr = 0;
5147
5148                                 goto try_next_page;
5149                         }
5150 check_busy:
5151                         if (dst_page->vmp_busy) {
5152                                 if (cntrl_flags & UPL_NOBLOCK) {
5153                                         if (user_page_list)
5154                                                 user_page_list[entry].phys_addr = 0;
5155                                         dwp->dw_mask = 0;
5156
5157                                         goto try_next_page;
5158                                 }
5159                                 /*
5160                                  * someone else is playing with the
5161                                  * page.  We will have to wait.
5162                                  */
5163                                 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5164
5165                                 continue;
5166                         }
5167                         if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5168
5169                                 vm_page_lockspin_queues();
5170
5171                                 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5172                                         /*
5173                                          * we've buddied up a page for a clustered pageout
5174                                          * that has already been moved to the pageout
5175                                          * queue by pageout_scan... we need to remove
5176                                          * it from the queue and drop the laundry count
5177                                          * on that queue
5178                                          */
5179                                         vm_pageout_throttle_up(dst_page);
5180                                 }
5181                                 vm_page_unlock_queues();
5182                         }
5183                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5184                         dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5185
5186                         if (phys_page > upl->highest_page)
5187                                 upl->highest_page = phys_page;
5188
5189                         assert (!pmap_is_noencrypt(phys_page));
5190
5191                         if (cntrl_flags & UPL_SET_LITE) {
5192                                 unsigned int    pg_num;
5193
5194                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5195                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5196                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5197
5198                                 if (hw_dirty) {
5199                                         if (pmap_flushes_delayed == FALSE) {
5200                                                 pmap_flush_context_init(&pmap_flush_context_storage);
5201                                                 pmap_flushes_delayed = TRUE;
5202                                         }
5203                                         pmap_clear_refmod_options(phys_page,
5204                                                                   VM_MEM_MODIFIED,
5205                                                                   PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
5206                                                                   &pmap_flush_context_storage);
5207                                 }
5208
5209                                 /*
5210                                  * Mark original page as cleaning
5211                                  * in place.
5212                                  */
5213                                 dst_page->vmp_cleaning = TRUE;
5214                                 dst_page->vmp_precious = FALSE;
5215                         } else {
5216                                 /*
5217                                  * use pageclean setup, it is more
5218                                  * convenient even for the pageout
5219                                  * cases here
5220                                  */
5221                                 vm_object_lock(upl->map_object);
5222                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5223                                 vm_object_unlock(upl->map_object);
5224
5225                                 alias_page->vmp_absent = FALSE;
5226                                 alias_page = NULL;
5227                         }
5228                         if (dirty) {
5229                                 SET_PAGE_DIRTY(dst_page, FALSE);
5230                         } else {
5231                                 dst_page->vmp_dirty = FALSE;
5232                         }
5233
5234                         if (!dirty)
5235                                 dst_page->vmp_precious = TRUE;
5236
5237                         if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5238                                 if ( !VM_PAGE_WIRED(dst_page))
5239                                         dst_page->vmp_free_when_done = TRUE;
5240                         }
5241                 } else {
5242                         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5243                                 /*
5244                                  * Honor copy-on-write obligations
5245                                  *
5246                                  * The copy object has changed since we
5247                                  * last synchronized for copy-on-write.
5248                                  * Another copy object might have been
5249                                  * inserted while we released the object's
5250                                  * lock.  Since someone could have seen the
5251                                  * original contents of the remaining pages
5252                                  * through that new object, we have to
5253                                  * synchronize with it again for the remaining
5254                                  * pages only.  The previous pages are "busy"
5255                                  * so they can not be seen through the new
5256                                  * mapping.  The new mapping will see our
5257                                  * upcoming changes for those previous pages,
5258                                  * but that's OK since they couldn't see what
5259                                  * was there before.  It's just a race anyway
5260                                  * and there's no guarantee of consistency or
5261                                  * atomicity.  We just don't want new mappings
5262                                  * to see both the *before* and *after* pages.
5263                                  */
5264                                 if (object->copy != VM_OBJECT_NULL) {
5265                                         vm_object_update(
5266                                                 object,
5267                                                 dst_offset,/* current offset */
5268                                                 xfer_size, /* remaining size */
5269                                                 NULL,
5270                                                 NULL,
5271                                                 FALSE,     /* should_return */
5272                                                 MEMORY_OBJECT_COPY_SYNC,
5273                                                 VM_PROT_NO_CHANGE);
5274
5275                                         VM_PAGEOUT_DEBUG(upl_cow_again, 1);
5276                                         VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
5277                                 }
5278                                 /*
5279                                  * remember the copy object we synced with
5280                                  */
5281                                 last_copy_object = object->copy;
5282                         }
5283                         dst_page = vm_page_lookup(object, dst_offset);
5284
5285                         if (dst_page != VM_PAGE_NULL) {
5286
5287                                 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5288                                         /*
5289                                          * skip over pages already present in the cache
5290                                          */
5291                                         if (user_page_list)
5292                                                 user_page_list[entry].phys_addr = 0;
5293
5294                                         goto try_next_page;
5295                                 }
5296                                 if (dst_page->vmp_fictitious) {
5297                                         panic("need corner case for fictitious page");
5298                                 }
5299
5300                                 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
5301                                         /*
5302                                          * someone else is playing with the
5303                                          * page.  We will have to wait.
5304                                          */
5305                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5306
5307                                         continue;
5308                                 }
5309                                 if (dst_page->vmp_laundry)
5310                                         vm_pageout_steal_laundry(dst_page, FALSE);
5311                         } else {
5312                                 if (object->private) {
5313                                         /*
5314                                          * This is a nasty wrinkle for users
5315                                          * of upl who encounter device or
5316                                          * private memory however, it is
5317                                          * unavoidable, only a fault can
5318                                          * resolve the actual backing
5319                                          * physical page by asking the
5320                                          * backing device.
5321                                          */
5322                                         if (user_page_list)
5323                                                 user_page_list[entry].phys_addr = 0;
5324
5325                                         goto try_next_page;
5326                                 }
5327                                 if (object->scan_collisions) {
5328                                         /*
5329                                          * the pageout_scan thread is trying to steal
5330                                          * pages from this object, but has run into our
5331                                          * lock... grab 2 pages from the head of the object...
5332                                          * the first is freed on behalf of pageout_scan, the
5333                                          * 2nd is for our own use... we use vm_object_page_grab
5334                                          * in both cases to avoid taking pages from the free
5335                                          * list since we are under memory pressure and our
5336                                          * lock on this object is getting in the way of
5337                                          * relieving it
5338                                          */
5339                                         dst_page = vm_object_page_grab(object);
5340
5341                                         if (dst_page != VM_PAGE_NULL)
5342                                                 vm_page_release(dst_page,
5343                                                                 FALSE);
5344
5345                                         dst_page = vm_object_page_grab(object);
5346                                 }
5347                                 if (dst_page == VM_PAGE_NULL) {
5348                                         /*
5349                                          * need to allocate a page
5350                                          */
5351                                         dst_page = vm_page_grab_options(grab_options);
5352                                         if (dst_page != VM_PAGE_NULL)
5353                                                 page_grab_count++;
5354                                 }
5355                                 if (dst_page == VM_PAGE_NULL) {
5356                                         if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5357                                                /*
5358                                                 * we don't want to stall waiting for pages to come onto the free list
5359                                                 * while we're already holding absent pages in this UPL
5360                                                 * the caller will deal with the empty slots
5361                                                 */
5362                                                 if (user_page_list)
5363                                                         user_page_list[entry].phys_addr = 0;
5364
5365                                                 goto try_next_page;
5366                                         }
5367                                         /*
5368                                          * no pages available... wait
5369                                          * then try again for the same
5370                                          * offset...
5371                                          */
5372                                         vm_object_unlock(object);
5373
5374                                         OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5375
5376                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5377
5378                                         VM_PAGE_WAIT();
5379                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5380
5381                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5382
5383                                         vm_object_lock(object);
5384
5385                                         continue;
5386                                 }
5387                                 vm_page_insert(dst_page, object, dst_offset);
5388
5389                                 dst_page->vmp_absent = TRUE;
5390                                 dst_page->vmp_busy = FALSE;
5391
5392                                 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5393                                         /*
5394                                          * if UPL_RET_ONLY_ABSENT was specified,
5395                                          * than we're definitely setting up a
5396                                          * upl for a clustered read/pagein
5397                                          * operation... mark the pages as clustered
5398                                          * so upl_commit_range can put them on the
5399                                          * speculative list
5400                                          */
5401                                         dst_page->vmp_clustered = TRUE;
5402
5403                                         if ( !(cntrl_flags & UPL_FILE_IO))
5404                                                 VM_STAT_INCR(pageins);
5405                                 }
5406                         }
5407                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5408
5409                         dst_page->vmp_overwriting = TRUE;
5410
5411                         if (dst_page->vmp_pmapped) {
5412                                 if ( !(cntrl_flags & UPL_FILE_IO))
5413                                         /*
5414                                          * eliminate all mappings from the
5415                                          * original object and its prodigy
5416                                          */
5417                                         refmod_state = pmap_disconnect(phys_page);
5418                                 else
5419                                         refmod_state = pmap_get_refmod(phys_page);
5420                         } else
5421                                 refmod_state = 0;
5422
5423                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5424                         dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5425
5426                         if (cntrl_flags & UPL_SET_LITE) {
5427                                 unsigned int    pg_num;
5428
5429                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5430                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5431                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5432
5433                                 if (hw_dirty)
5434                                         pmap_clear_modify(phys_page);
5435
5436                                 /*
5437                                  * Mark original page as cleaning
5438                                  * in place.
5439                                  */
5440                                 dst_page->vmp_cleaning = TRUE;
5441                                 dst_page->vmp_precious = FALSE;
5442                         } else {
5443                                 /*
5444                                  * use pageclean setup, it is more
5445                                  * convenient even for the pageout
5446                                  * cases here
5447                                  */
5448                                 vm_object_lock(upl->map_object);
5449                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5450                                 vm_object_unlock(upl->map_object);
5451
5452                                 alias_page->vmp_absent = FALSE;
5453                                 alias_page = NULL;
5454                         }
5455
5456                         if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5457                                 upl->flags &= ~UPL_CLEAR_DIRTY;
5458                                 upl->flags |= UPL_SET_DIRTY;
5459                                 dirty = TRUE;
5460                                 upl->flags |= UPL_SET_DIRTY;
5461                         } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5462                                 /*
5463                                  * clean in place for read implies
5464                                  * that a write will be done on all
5465                                  * the pages that are dirty before
5466                                  * a upl commit is done.  The caller
5467                                  * is obligated to preserve the
5468                                  * contents of all pages marked dirty
5469                                  */
5470                                 upl->flags |= UPL_CLEAR_DIRTY;
5471                         }
5472                         dst_page->vmp_dirty = dirty;
5473
5474                         if (!dirty)
5475                                 dst_page->vmp_precious = TRUE;
5476
5477                         if ( !VM_PAGE_WIRED(dst_page)) {
5478                                 /*
5479                                  * deny access to the target page while
5480                                  * it is being worked on
5481                                  */
5482                                 dst_page->vmp_busy = TRUE;
5483                         } else
5484                                 dwp->dw_mask |= DW_vm_page_wire;
5485
5486                         /*
5487                          * We might be about to satisfy a fault which has been
5488                          * requested. So no need for the "restart" bit.
5489                          */
5490                         dst_page->vmp_restart = FALSE;
5491                         if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5492                                 /*
5493                                  * expect the page to be used
5494                                  */
5495                                 dwp->dw_mask |= DW_set_reference;
5496                         }
5497                         if (cntrl_flags & UPL_PRECIOUS) {
5498                                 if (object->internal) {
5499                                         SET_PAGE_DIRTY(dst_page, FALSE);
5500                                         dst_page->vmp_precious = FALSE;
5501                                 } else {
5502                                         dst_page->vmp_precious = TRUE;
5503                                 }
5504                         } else {
5505                                 dst_page->vmp_precious = FALSE;
5506                         }
5507                 }
5508                 if (dst_page->vmp_busy)
5509                         upl->flags |= UPL_HAS_BUSY;
5510
5511                 if (phys_page > upl->highest_page)
5512                         upl->highest_page = phys_page;
5513                 assert (!pmap_is_noencrypt(phys_page));
5514                 if (user_page_list) {
5515                         user_page_list[entry].phys_addr = phys_page;
5516                         user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
5517                         user_page_list[entry].absent    = dst_page->vmp_absent;
5518                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
5519                         user_page_list[entry].precious  = dst_page->vmp_precious;
5520                         user_page_list[entry].device    = FALSE;
5521                         user_page_list[entry].needed    = FALSE;
5522                         if (dst_page->vmp_clustered == TRUE)
5523                                 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5524                         else
5525                                 user_page_list[entry].speculative = FALSE;
5526                         user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
5527                         user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
5528                         user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
5529                         user_page_list[entry].mark      = FALSE;
5530                 }
5531                 /*
5532                  * if UPL_RET_ONLY_ABSENT is set, then
5533                  * we are working with a fresh page and we've
5534                  * just set the clustered flag on it to
5535                  * indicate that it was drug in as part of a
5536                  * speculative cluster... so leave it alone
5537                  */
5538                 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5539                         /*
5540                          * someone is explicitly grabbing this page...
5541                          * update clustered and speculative state
5542                          *
5543                          */
5544                         if (dst_page->vmp_clustered)
5545                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5546                 }
5547 try_next_page:
5548                 if (dwp->dw_mask) {
5549                         if (dwp->dw_mask & DW_vm_page_activate)
5550                                 VM_STAT_INCR(reactivations);
5551
5552                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5553
5554                         if (dw_count >= dw_limit) {
5555                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5556
5557                                 dwp = &dw_array[0];
5558                                 dw_count = 0;
5559                         }
5560                 }
5561                 entry++;
5562                 dst_offset += PAGE_SIZE_64;
5563                 xfer_size -= PAGE_SIZE;
5564         }
5565         if (dw_count)
5566                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5567
5568         if (alias_page != NULL) {
5569                 VM_PAGE_FREE(alias_page);
5570         }
5571         if (pmap_flushes_delayed == TRUE)
5572                 pmap_flush(&pmap_flush_context_storage);
5573
5574         if (page_list_count != NULL) {
5575                 if (upl->flags & UPL_INTERNAL)
5576                         *page_list_count = 0;
5577                 else if (*page_list_count > entry)
5578                         *page_list_count = entry;
5579         }
5580 #if UPL_DEBUG
5581         upl->upl_state = 1;
5582 #endif
5583         vm_object_unlock(object);
5584
5585         VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
5586
5587         return KERN_SUCCESS;
5588 }
5589
5590 /*
5591  *      Routine:        vm_object_super_upl_request
5592  *      Purpose:
5593  *              Cause the population of a portion of a vm_object
5594  *              in much the same way as memory_object_upl_request.
5595  *              Depending on the nature of the request, the pages
5596  *              returned may be contain valid data or be uninitialized.
5597  *              However, the region may be expanded up to the super
5598  *              cluster size provided.
5599  */
5600
5601 __private_extern__ kern_return_t
5602 vm_object_super_upl_request(
5603         vm_object_t object,
5604         vm_object_offset_t      offset,
5605         upl_size_t              size,
5606         upl_size_t              super_cluster,
5607         upl_t                   *upl,
5608         upl_page_info_t         *user_page_list,
5609         unsigned int            *page_list_count,
5610         upl_control_flags_t     cntrl_flags,
5611         vm_tag_t                tag)
5612 {
5613         if (object->paging_offset > offset  || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5614                 return KERN_FAILURE;
5615
5616         assert(object->paging_in_progress);
5617         offset = offset - object->paging_offset;
5618
5619         if (super_cluster > size) {
5620
5621                 vm_object_offset_t      base_offset;
5622                 upl_size_t              super_size;
5623                 vm_object_size_t        super_size_64;
5624
5625                 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5626                 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5627                 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5628                 super_size = (upl_size_t) super_size_64;
5629                 assert(super_size == super_size_64);
5630
5631                 if (offset > (base_offset + super_size)) {
5632                         panic("vm_object_super_upl_request: Missed target pageout"
5633                               " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5634                               offset, base_offset, super_size, super_cluster,
5635                               size, object->paging_offset);
5636                 }
5637                 /*
5638                  * apparently there is a case where the vm requests a
5639                  * page to be written out who's offset is beyond the
5640                  * object size
5641                  */
5642                 if ((offset + size) > (base_offset + super_size)) {
5643                         super_size_64 = (offset + size) - base_offset;
5644                         super_size = (upl_size_t) super_size_64;
5645                         assert(super_size == super_size_64);
5646                 }
5647
5648                 offset = base_offset;
5649                 size = super_size;
5650         }
5651         return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5652 }
5653
5654 #if CONFIG_EMBEDDED
5655 int cs_executable_create_upl = 0;
5656 extern int proc_selfpid(void);
5657 extern char *proc_name_address(void *p);
5658 #endif /* CONFIG_EMBEDDED */
5659
5660 kern_return_t
5661 vm_map_create_upl(
5662         vm_map_t                map,
5663         vm_map_address_t        offset,
5664         upl_size_t              *upl_size,
5665         upl_t                   *upl,
5666         upl_page_info_array_t   page_list,
5667         unsigned int            *count,
5668         upl_control_flags_t     *flags,
5669         vm_tag_t                tag)
5670 {
5671         vm_map_entry_t          entry;
5672         upl_control_flags_t     caller_flags;
5673         int                     force_data_sync;
5674         int                     sync_cow_data;
5675         vm_object_t             local_object;
5676         vm_map_offset_t         local_offset;
5677         vm_map_offset_t         local_start;
5678         kern_return_t           ret;
5679
5680         assert(page_aligned(offset));
5681
5682         caller_flags = *flags;
5683
5684         if (caller_flags & ~UPL_VALID_FLAGS) {
5685                 /*
5686                  * For forward compatibility's sake,
5687                  * reject any unknown flag.
5688                  */
5689                 return KERN_INVALID_VALUE;
5690         }
5691         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5692         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5693
5694         if (upl == NULL)
5695                 return KERN_INVALID_ARGUMENT;
5696
5697 REDISCOVER_ENTRY:
5698         vm_map_lock_read(map);
5699
5700         if (!vm_map_lookup_entry(map, offset, &entry)) {
5701                 vm_map_unlock_read(map);
5702                 return KERN_FAILURE;
5703         }
5704
5705         if ((entry->vme_end - offset) < *upl_size) {
5706                 *upl_size = (upl_size_t) (entry->vme_end - offset);
5707                 assert(*upl_size == entry->vme_end - offset);
5708         }
5709
5710         if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5711                 *flags = 0;
5712
5713                 if (!entry->is_sub_map &&
5714                     VME_OBJECT(entry) != VM_OBJECT_NULL) {
5715                         if (VME_OBJECT(entry)->private)
5716                                 *flags = UPL_DEV_MEMORY;
5717
5718                         if (VME_OBJECT(entry)->phys_contiguous)
5719                                 *flags |= UPL_PHYS_CONTIG;
5720                 }
5721                 vm_map_unlock_read(map);
5722                 return KERN_SUCCESS;
5723         }
5724
5725         if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5726             !VME_OBJECT(entry)->phys_contiguous) {
5727                 if (*upl_size > MAX_UPL_SIZE_BYTES)
5728                         *upl_size = MAX_UPL_SIZE_BYTES;
5729         }
5730
5731         /*
5732          *      Create an object if necessary.
5733          */
5734         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5735
5736                 if (vm_map_lock_read_to_write(map))
5737                         goto REDISCOVER_ENTRY;
5738
5739                 VME_OBJECT_SET(entry,
5740                                vm_object_allocate((vm_size_t)
5741                                                   (entry->vme_end -
5742                                                    entry->vme_start)));
5743                 VME_OFFSET_SET(entry, 0);
5744                 assert(entry->use_pmap);
5745
5746                 vm_map_lock_write_to_read(map);
5747         }
5748
5749         if (!(caller_flags & UPL_COPYOUT_FROM) &&
5750             !entry->is_sub_map &&
5751             !(entry->protection & VM_PROT_WRITE)) {
5752                 vm_map_unlock_read(map);
5753                 return KERN_PROTECTION_FAILURE;
5754         }
5755
5756 #if CONFIG_EMBEDDED
5757         if (map->pmap != kernel_pmap &&
5758             (caller_flags & UPL_COPYOUT_FROM) &&
5759             (entry->protection & VM_PROT_EXECUTE) &&
5760             !(entry->protection & VM_PROT_WRITE)) {
5761                 vm_offset_t     kaddr;
5762                 vm_size_t       ksize;
5763
5764                 /*
5765                  * We're about to create a read-only UPL backed by
5766                  * memory from an executable mapping.
5767                  * Wiring the pages would result in the pages being copied
5768                  * (due to the "MAP_PRIVATE" mapping) and no longer
5769                  * code-signed, so no longer eligible for execution.
5770                  * Instead, let's copy the data into a kernel buffer and
5771                  * create the UPL from this kernel buffer.
5772                  * The kernel buffer is then freed, leaving the UPL holding
5773                  * the last reference on the VM object, so the memory will
5774                  * be released when the UPL is committed.
5775                  */
5776
5777                 vm_map_unlock_read(map);
5778                 /* allocate kernel buffer */
5779                 ksize = round_page(*upl_size);
5780                 kaddr = 0;
5781                 ret = kmem_alloc_pageable(kernel_map,
5782                                           &kaddr,
5783                                           ksize,
5784                                           tag);
5785                 if (ret == KERN_SUCCESS) {
5786                         /* copyin the user data */
5787                         assert(page_aligned(offset));
5788                         ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5789                 }
5790                 if (ret == KERN_SUCCESS) {
5791                         if (ksize > *upl_size) {
5792                                 /* zero out the extra space in kernel buffer */
5793                                 memset((void *)(kaddr + *upl_size),
5794                                        0,
5795                                        ksize - *upl_size);
5796                         }
5797                         /* create the UPL from the kernel buffer */
5798                         ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5799                                                 upl, page_list, count, flags, tag);
5800                 }
5801                 if (kaddr != 0) {
5802                         /* free the kernel buffer */
5803                         kmem_free(kernel_map, kaddr, ksize);
5804                         kaddr = 0;
5805                         ksize = 0;
5806                 }
5807 #if DEVELOPMENT || DEBUG
5808                 DTRACE_VM4(create_upl_from_executable,
5809                            vm_map_t, map,
5810                            vm_map_address_t, offset,
5811                            upl_size_t, *upl_size,
5812                            kern_return_t, ret);
5813 #endif /* DEVELOPMENT || DEBUG */
5814                 return ret;
5815         }
5816 #endif /* CONFIG_EMBEDDED */
5817
5818         local_object = VME_OBJECT(entry);
5819         assert(local_object != VM_OBJECT_NULL);
5820
5821         if (!entry->is_sub_map &&
5822             !entry->needs_copy &&
5823             *upl_size != 0 &&
5824             local_object->vo_size > *upl_size && /* partial UPL */
5825             entry->wired_count == 0 && /* No COW for entries that are wired */
5826             (map->pmap != kernel_pmap) && /* alias checks */
5827             (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5828              ||
5829              (/* case 2 */
5830               local_object->internal &&
5831               (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5832               local_object->ref_count > 1))) {
5833                 vm_prot_t       prot;
5834
5835                 /*
5836                  * Case 1:
5837                  * Set up the targeted range for copy-on-write to avoid
5838                  * applying true_share/copy_delay to the entire object.
5839                  *
5840                  * Case 2:
5841                  * This map entry covers only part of an internal
5842                  * object.  There could be other map entries covering
5843                  * other areas of this object and some of these map
5844                  * entries could be marked as "needs_copy", which
5845                  * assumes that the object is COPY_SYMMETRIC.
5846                  * To avoid marking this object as COPY_DELAY and
5847                  * "true_share", let's shadow it and mark the new
5848                  * (smaller) object as "true_share" and COPY_DELAY.
5849                  */
5850
5851                 if (vm_map_lock_read_to_write(map)) {
5852                         goto REDISCOVER_ENTRY;
5853                 }
5854                 vm_map_lock_assert_exclusive(map);
5855                 assert(VME_OBJECT(entry) == local_object);
5856
5857                 vm_map_clip_start(map,
5858                                   entry,
5859                                   vm_map_trunc_page(offset,
5860                                                     VM_MAP_PAGE_MASK(map)));
5861                 vm_map_clip_end(map,
5862                                 entry,
5863                                 vm_map_round_page(offset + *upl_size,
5864                                                   VM_MAP_PAGE_MASK(map)));
5865                 if ((entry->vme_end - offset) < *upl_size) {
5866                         *upl_size = (upl_size_t) (entry->vme_end - offset);
5867                         assert(*upl_size == entry->vme_end - offset);
5868                 }
5869
5870                 prot = entry->protection & ~VM_PROT_WRITE;
5871                 if (override_nx(map, VME_ALIAS(entry)) && prot)
5872                         prot |= VM_PROT_EXECUTE;
5873                 vm_object_pmap_protect(local_object,
5874                                        VME_OFFSET(entry),
5875                                        entry->vme_end - entry->vme_start,
5876                                        ((entry->is_shared ||
5877                                          map->mapped_in_other_pmaps)
5878                                         ? PMAP_NULL
5879                                         : map->pmap),
5880                                        entry->vme_start,
5881                                        prot);
5882
5883                 assert(entry->wired_count == 0);
5884
5885                 /*
5886                  * Lock the VM object and re-check its status: if it's mapped
5887                  * in another address space, we could still be racing with
5888                  * another thread holding that other VM map exclusively.
5889                  */
5890                 vm_object_lock(local_object);
5891                 if (local_object->true_share) {
5892                         /* object is already in proper state: no COW needed */
5893                         assert(local_object->copy_strategy !=
5894                                MEMORY_OBJECT_COPY_SYMMETRIC);
5895                 } else {
5896                         /* not true_share: ask for copy-on-write below */
5897                         assert(local_object->copy_strategy ==
5898                                MEMORY_OBJECT_COPY_SYMMETRIC);
5899                         entry->needs_copy = TRUE;
5900                 }
5901                 vm_object_unlock(local_object);
5902
5903                 vm_map_lock_write_to_read(map);
5904         }
5905
5906         if (entry->needs_copy)  {
5907                 /*
5908                  * Honor copy-on-write for COPY_SYMMETRIC
5909                  * strategy.
5910                  */
5911                 vm_map_t                local_map;
5912                 vm_object_t             object;
5913                 vm_object_offset_t      new_offset;
5914                 vm_prot_t               prot;
5915                 boolean_t               wired;
5916                 vm_map_version_t        version;
5917                 vm_map_t                real_map;
5918                 vm_prot_t               fault_type;
5919
5920                 local_map = map;
5921
5922                 if (caller_flags & UPL_COPYOUT_FROM) {
5923                         fault_type = VM_PROT_READ | VM_PROT_COPY;
5924                         vm_counters.create_upl_extra_cow++;
5925                         vm_counters.create_upl_extra_cow_pages +=
5926                                 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
5927                 } else {
5928                         fault_type = VM_PROT_WRITE;
5929                 }
5930                 if (vm_map_lookup_locked(&local_map,
5931                                          offset, fault_type,
5932                                          OBJECT_LOCK_EXCLUSIVE,
5933                                          &version, &object,
5934                                          &new_offset, &prot, &wired,
5935                                          NULL,
5936                                          &real_map) != KERN_SUCCESS) {
5937                         if (fault_type == VM_PROT_WRITE) {
5938                                 vm_counters.create_upl_lookup_failure_write++;
5939                         } else {
5940                                 vm_counters.create_upl_lookup_failure_copy++;
5941                         }
5942                         vm_map_unlock_read(local_map);
5943                         return KERN_FAILURE;
5944                 }
5945                 if (real_map != map)
5946                         vm_map_unlock(real_map);
5947                 vm_map_unlock_read(local_map);
5948
5949                 vm_object_unlock(object);
5950
5951                 goto REDISCOVER_ENTRY;
5952         }
5953
5954         if (entry->is_sub_map) {
5955                 vm_map_t        submap;
5956
5957                 submap = VME_SUBMAP(entry);
5958                 local_start = entry->vme_start;
5959                 local_offset = VME_OFFSET(entry);
5960
5961                 vm_map_reference(submap);
5962                 vm_map_unlock_read(map);
5963
5964                 ret = vm_map_create_upl(submap,
5965                                         local_offset + (offset - local_start),
5966                                         upl_size, upl, page_list, count, flags, tag);
5967                 vm_map_deallocate(submap);
5968
5969                 return ret;
5970         }
5971
5972         if (sync_cow_data &&
5973             (VME_OBJECT(entry)->shadow ||
5974              VME_OBJECT(entry)->copy)) {
5975                 local_object = VME_OBJECT(entry);
5976                 local_start = entry->vme_start;
5977                 local_offset = VME_OFFSET(entry);
5978
5979                 vm_object_reference(local_object);
5980                 vm_map_unlock_read(map);
5981
5982                 if (local_object->shadow && local_object->copy) {
5983                         vm_object_lock_request(local_object->shadow,
5984                                                ((vm_object_offset_t)
5985                                                 ((offset - local_start) +
5986                                                  local_offset) +
5987                                                 local_object->vo_shadow_offset),
5988                                                *upl_size, FALSE,
5989                                                MEMORY_OBJECT_DATA_SYNC,
5990                                                VM_PROT_NO_CHANGE);
5991                 }
5992                 sync_cow_data = FALSE;
5993                 vm_object_deallocate(local_object);
5994
5995                 goto REDISCOVER_ENTRY;
5996         }
5997         if (force_data_sync) {
5998                 local_object = VME_OBJECT(entry);
5999                 local_start = entry->vme_start;
6000                 local_offset = VME_OFFSET(entry);
6001
6002                 vm_object_reference(local_object);
6003                 vm_map_unlock_read(map);
6004
6005                 vm_object_lock_request(local_object,
6006                                        ((vm_object_offset_t)
6007                                         ((offset - local_start) +
6008                                          local_offset)),
6009                                        (vm_object_size_t)*upl_size,
6010                                        FALSE,
6011                                        MEMORY_OBJECT_DATA_SYNC,
6012                                        VM_PROT_NO_CHANGE);
6013
6014                 force_data_sync = FALSE;
6015                 vm_object_deallocate(local_object);
6016
6017                 goto REDISCOVER_ENTRY;
6018         }
6019         if (VME_OBJECT(entry)->private)
6020                 *flags = UPL_DEV_MEMORY;
6021         else
6022                 *flags = 0;
6023
6024         if (VME_OBJECT(entry)->phys_contiguous)
6025                 *flags |= UPL_PHYS_CONTIG;
6026
6027         local_object = VME_OBJECT(entry);
6028         local_offset = VME_OFFSET(entry);
6029         local_start = entry->vme_start;
6030
6031 #if CONFIG_EMBEDDED
6032         /*
6033          * Wiring will copy the pages to the shadow object.
6034          * The shadow object will not be code-signed so
6035          * attempting to execute code from these copied pages
6036          * would trigger a code-signing violation.
6037          */
6038         if (entry->protection & VM_PROT_EXECUTE) {
6039 #if MACH_ASSERT
6040                 printf("pid %d[%s] create_upl out of executable range from "
6041                        "0x%llx to 0x%llx: side effects may include "
6042                        "code-signing violations later on\n",
6043                        proc_selfpid(),
6044                        (current_task()->bsd_info
6045                         ? proc_name_address(current_task()->bsd_info)
6046                         : "?"),
6047                        (uint64_t) entry->vme_start,
6048                        (uint64_t) entry->vme_end);
6049 #endif /* MACH_ASSERT */
6050                 DTRACE_VM2(cs_executable_create_upl,
6051                            uint64_t, (uint64_t)entry->vme_start,
6052                            uint64_t, (uint64_t)entry->vme_end);
6053                 cs_executable_create_upl++;
6054         }
6055 #endif /* CONFIG_EMBEDDED */
6056
6057         vm_object_lock(local_object);
6058
6059         /*
6060          * Ensure that this object is "true_share" and "copy_delay" now,
6061          * while we're still holding the VM map lock.  After we unlock the map,
6062          * anything could happen to that mapping, including some copy-on-write
6063          * activity.  We need to make sure that the IOPL will point at the
6064          * same memory as the mapping.
6065          */
6066         if (local_object->true_share) {
6067                 assert(local_object->copy_strategy !=
6068                        MEMORY_OBJECT_COPY_SYMMETRIC);
6069         } else if (local_object != kernel_object &&
6070                    local_object != compressor_object &&
6071                    !local_object->phys_contiguous) {
6072 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6073                 if (!local_object->true_share &&
6074                     vm_object_tracking_inited) {
6075                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6076                         int num = 0;
6077                         num = OSBacktrace(bt,
6078                                           VM_OBJECT_TRACKING_BTDEPTH);
6079                         btlog_add_entry(vm_object_tracking_btlog,
6080                                         local_object,
6081                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
6082                                         bt,
6083                                         num);
6084                 }
6085 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6086                 local_object->true_share = TRUE;
6087                 if (local_object->copy_strategy ==
6088                     MEMORY_OBJECT_COPY_SYMMETRIC) {
6089                         local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6090                 }
6091         }
6092
6093         vm_object_reference_locked(local_object);
6094         vm_object_unlock(local_object);
6095
6096         vm_map_unlock_read(map);
6097
6098         ret = vm_object_iopl_request(local_object,
6099                                      ((vm_object_offset_t)
6100                                       ((offset - local_start) + local_offset)),
6101                                      *upl_size,
6102                                      upl,
6103                                      page_list,
6104                                      count,
6105                                      caller_flags,
6106                                      tag);
6107         vm_object_deallocate(local_object);
6108
6109         return ret;
6110 }
6111
6112 /*
6113  * Internal routine to enter a UPL into a VM map.
6114  *
6115  * JMM - This should just be doable through the standard
6116  * vm_map_enter() API.
6117  */
6118 kern_return_t
6119 vm_map_enter_upl(
6120         vm_map_t                map,
6121         upl_t                   upl,
6122         vm_map_offset_t         *dst_addr)
6123 {
6124         vm_map_size_t           size;
6125         vm_object_offset_t      offset;
6126         vm_map_offset_t         addr;
6127         vm_page_t               m;
6128         kern_return_t           kr;
6129         int                     isVectorUPL = 0, curr_upl=0;
6130         upl_t                   vector_upl = NULL;
6131         vm_offset_t             vector_upl_dst_addr = 0;
6132         vm_map_t                vector_upl_submap = NULL;
6133         upl_offset_t            subupl_offset = 0;
6134         upl_size_t              subupl_size = 0;
6135
6136         if (upl == UPL_NULL)
6137                 return KERN_INVALID_ARGUMENT;
6138
6139         if((isVectorUPL = vector_upl_is_valid(upl))) {
6140                 int mapped=0,valid_upls=0;
6141                 vector_upl = upl;
6142
6143                 upl_lock(vector_upl);
6144                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6145                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6146                         if(upl == NULL)
6147                                 continue;
6148                         valid_upls++;
6149                         if (UPL_PAGE_LIST_MAPPED & upl->flags)
6150                                 mapped++;
6151                 }
6152
6153                 if(mapped) {
6154                         if(mapped != valid_upls)
6155                                 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6156                         else {
6157                                 upl_unlock(vector_upl);
6158                                 return KERN_FAILURE;
6159                         }
6160                 }
6161
6162                 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6163                                     VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6164                                     &vector_upl_submap);
6165                 if( kr != KERN_SUCCESS )
6166                         panic("Vector UPL submap allocation failed\n");
6167                 map = vector_upl_submap;
6168                 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6169                 curr_upl=0;
6170         }
6171         else
6172                 upl_lock(upl);
6173
6174 process_upl_to_enter:
6175         if(isVectorUPL){
6176                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6177                         *dst_addr = vector_upl_dst_addr;
6178                         upl_unlock(vector_upl);
6179                         return KERN_SUCCESS;
6180                 }
6181                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6182                 if(upl == NULL)
6183                         goto process_upl_to_enter;
6184
6185                 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6186                 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6187         } else {
6188                 /*
6189                  * check to see if already mapped
6190                  */
6191                 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6192                         upl_unlock(upl);
6193                         return KERN_FAILURE;
6194                 }
6195         }
6196         if ((!(upl->flags & UPL_SHADOWED)) &&
6197             ((upl->flags & UPL_HAS_BUSY) ||
6198              !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6199
6200                 vm_object_t             object;
6201                 vm_page_t               alias_page;
6202                 vm_object_offset_t      new_offset;
6203                 unsigned int            pg_num;
6204                 wpl_array_t             lite_list;
6205
6206                 if (upl->flags & UPL_INTERNAL) {
6207                         lite_list = (wpl_array_t)
6208                                 ((((uintptr_t)upl) + sizeof(struct upl))
6209                                  + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6210                 } else {
6211                         lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6212                 }
6213                 object = upl->map_object;
6214                 upl->map_object = vm_object_allocate(upl->size);
6215
6216                 vm_object_lock(upl->map_object);
6217
6218                 upl->map_object->shadow = object;
6219                 upl->map_object->pageout = TRUE;
6220                 upl->map_object->can_persist = FALSE;
6221                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6222                 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6223                 upl->map_object->wimg_bits = object->wimg_bits;
6224                 offset = upl->map_object->vo_shadow_offset;
6225                 new_offset = 0;
6226                 size = upl->size;
6227
6228                 upl->flags |= UPL_SHADOWED;
6229
6230                 while (size) {
6231                         pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6232                         assert(pg_num == new_offset / PAGE_SIZE);
6233
6234                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6235
6236                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6237
6238                                 vm_object_lock(object);
6239
6240                                 m = vm_page_lookup(object, offset);
6241                                 if (m == VM_PAGE_NULL) {
6242                                         panic("vm_upl_map: page missing\n");
6243                                 }
6244
6245                                 /*
6246                                  * Convert the fictitious page to a private
6247                                  * shadow of the real page.
6248                                  */
6249                                 assert(alias_page->vmp_fictitious);
6250                                 alias_page->vmp_fictitious = FALSE;
6251                                 alias_page->vmp_private = TRUE;
6252                                 alias_page->vmp_free_when_done = TRUE;
6253                                 /*
6254                                  * since m is a page in the upl it must
6255                                  * already be wired or BUSY, so it's
6256                                  * safe to assign the underlying physical
6257                                  * page to the alias
6258                                  */
6259                                 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6260
6261                                 vm_object_unlock(object);
6262
6263                                 vm_page_lockspin_queues();
6264                                 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6265                                 vm_page_unlock_queues();
6266
6267                                 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6268
6269                                 assert(!alias_page->vmp_wanted);
6270                                 alias_page->vmp_busy = FALSE;
6271                                 alias_page->vmp_absent = FALSE;
6272                         }
6273                         size -= PAGE_SIZE;
6274                         offset += PAGE_SIZE_64;
6275                         new_offset += PAGE_SIZE_64;
6276                 }
6277                 vm_object_unlock(upl->map_object);
6278         }
6279         if (upl->flags & UPL_SHADOWED)
6280                 offset = 0;
6281         else
6282                 offset = upl->offset - upl->map_object->paging_offset;
6283
6284         size = upl->size;
6285
6286         vm_object_reference(upl->map_object);
6287
6288         if(!isVectorUPL) {
6289                 *dst_addr = 0;
6290                 /*
6291                 * NEED A UPL_MAP ALIAS
6292                 */
6293                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6294                                   VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6295                                   upl->map_object, offset, FALSE,
6296                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6297
6298                 if (kr != KERN_SUCCESS) {
6299                         vm_object_deallocate(upl->map_object);
6300                         upl_unlock(upl);
6301                         return(kr);
6302                 }
6303         }
6304         else {
6305                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6306                                   VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6307                                   upl->map_object, offset, FALSE,
6308                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6309                 if(kr)
6310                         panic("vm_map_enter failed for a Vector UPL\n");
6311         }
6312         vm_object_lock(upl->map_object);
6313
6314         for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6315                 m = vm_page_lookup(upl->map_object, offset);
6316
6317                 if (m) {
6318                         m->vmp_pmapped = TRUE;
6319
6320                         /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6321                          * but only in kernel space. If this was on a user map,
6322                          * we'd have to set the wpmapped bit. */
6323                         /* m->vmp_wpmapped = TRUE; */
6324                         assert(map->pmap == kernel_pmap);
6325
6326                         PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6327
6328                         assert(kr == KERN_SUCCESS);
6329 #if KASAN
6330                         kasan_notify_address(addr, PAGE_SIZE_64);
6331 #endif
6332                 }
6333                 offset += PAGE_SIZE_64;
6334         }
6335         vm_object_unlock(upl->map_object);
6336
6337         /*
6338          * hold a reference for the mapping
6339          */
6340         upl->ref_count++;
6341         upl->flags |= UPL_PAGE_LIST_MAPPED;
6342         upl->kaddr = (vm_offset_t) *dst_addr;
6343         assert(upl->kaddr == *dst_addr);
6344
6345         if(isVectorUPL)
6346                 goto process_upl_to_enter;
6347
6348         upl_unlock(upl);
6349
6350         return KERN_SUCCESS;
6351 }
6352
6353 /*
6354  * Internal routine to remove a UPL mapping from a VM map.
6355  *
6356  * XXX - This should just be doable through a standard
6357  * vm_map_remove() operation.  Otherwise, implicit clean-up
6358  * of the target map won't be able to correctly remove
6359  * these (and release the reference on the UPL).  Having
6360  * to do this means we can't map these into user-space
6361  * maps yet.
6362  */
6363 kern_return_t
6364 vm_map_remove_upl(
6365         vm_map_t        map,
6366         upl_t           upl)
6367 {
6368         vm_address_t    addr;
6369         upl_size_t      size;
6370         int             isVectorUPL = 0, curr_upl = 0;
6371         upl_t           vector_upl = NULL;
6372
6373         if (upl == UPL_NULL)
6374                 return KERN_INVALID_ARGUMENT;
6375
6376         if((isVectorUPL = vector_upl_is_valid(upl))) {
6377                 int     unmapped=0, valid_upls=0;
6378                 vector_upl = upl;
6379                 upl_lock(vector_upl);
6380                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6381                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6382                         if(upl == NULL)
6383                                 continue;
6384                         valid_upls++;
6385                         if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6386                                 unmapped++;
6387                 }
6388
6389                 if(unmapped) {
6390                         if(unmapped != valid_upls)
6391                                 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6392                         else {
6393                                 upl_unlock(vector_upl);
6394                                 return KERN_FAILURE;
6395                         }
6396                 }
6397                 curr_upl=0;
6398         }
6399         else
6400                 upl_lock(upl);
6401
6402 process_upl_to_remove:
6403         if(isVectorUPL) {
6404                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6405                         vm_map_t v_upl_submap;
6406                         vm_offset_t v_upl_submap_dst_addr;
6407                         vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6408
6409                         vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
6410                         vm_map_deallocate(v_upl_submap);
6411                         upl_unlock(vector_upl);
6412                         return KERN_SUCCESS;
6413                 }
6414
6415                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6416                 if(upl == NULL)
6417                         goto process_upl_to_remove;
6418         }
6419
6420         if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6421                 addr = upl->kaddr;
6422                 size = upl->size;
6423
6424                 assert(upl->ref_count > 1);
6425                 upl->ref_count--;               /* removing mapping ref */
6426
6427                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6428                 upl->kaddr = (vm_offset_t) 0;
6429
6430                 if(!isVectorUPL) {
6431                         upl_unlock(upl);
6432
6433                         vm_map_remove(
6434                                 map,
6435                                 vm_map_trunc_page(addr,
6436                                                   VM_MAP_PAGE_MASK(map)),
6437                                 vm_map_round_page(addr + size,
6438                                                   VM_MAP_PAGE_MASK(map)),
6439                                 VM_MAP_REMOVE_NO_FLAGS);
6440                         return KERN_SUCCESS;
6441                 }
6442                 else {
6443                         /*
6444                         * If it's a Vectored UPL, we'll be removing the entire
6445                         * submap anyways, so no need to remove individual UPL
6446                         * element mappings from within the submap
6447                         */
6448                         goto process_upl_to_remove;
6449                 }
6450         }
6451         upl_unlock(upl);
6452
6453         return KERN_FAILURE;
6454 }
6455
6456
6457 kern_return_t
6458 upl_commit_range(
6459         upl_t                   upl,
6460         upl_offset_t            offset,
6461         upl_size_t              size,
6462         int                     flags,
6463         upl_page_info_t         *page_list,
6464         mach_msg_type_number_t  count,
6465         boolean_t               *empty)
6466 {
6467         upl_size_t              xfer_size, subupl_size = size;
6468         vm_object_t             shadow_object;
6469         vm_object_t             object;
6470         vm_object_t             m_object;
6471         vm_object_offset_t      target_offset;
6472         upl_offset_t            subupl_offset = offset;
6473         int                     entry;
6474         wpl_array_t             lite_list;
6475         int                     occupied;
6476         int                     clear_refmod = 0;
6477         int                     pgpgout_count = 0;
6478         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6479         struct  vm_page_delayed_work    *dwp;
6480         int                     dw_count;
6481         int                     dw_limit;
6482         int                     isVectorUPL = 0;
6483         upl_t                   vector_upl = NULL;
6484         boolean_t               should_be_throttled = FALSE;
6485
6486         vm_page_t               nxt_page = VM_PAGE_NULL;
6487         int                     fast_path_possible = 0;
6488         int                     fast_path_full_commit = 0;
6489         int                     throttle_page = 0;
6490         int                     unwired_count = 0;
6491         int                     local_queue_count = 0;
6492         vm_page_t               first_local, last_local;
6493
6494         *empty = FALSE;
6495
6496         if (upl == UPL_NULL)
6497                 return KERN_INVALID_ARGUMENT;
6498
6499         if (count == 0)
6500                 page_list = NULL;
6501
6502         if((isVectorUPL = vector_upl_is_valid(upl))) {
6503                 vector_upl = upl;
6504                 upl_lock(vector_upl);
6505         }
6506         else
6507                 upl_lock(upl);
6508
6509 process_upl_to_commit:
6510
6511         if(isVectorUPL) {
6512                 size = subupl_size;
6513                 offset = subupl_offset;
6514                 if(size == 0) {
6515                         upl_unlock(vector_upl);
6516                         return KERN_SUCCESS;
6517                 }
6518                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6519                 if(upl == NULL) {
6520                         upl_unlock(vector_upl);
6521                         return KERN_FAILURE;
6522                 }
6523                 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6524                 subupl_size -= size;
6525                 subupl_offset += size;
6526         }
6527
6528 #if UPL_DEBUG
6529         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6530                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6531
6532                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6533                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6534
6535                 upl->upl_commit_index++;
6536         }
6537 #endif
6538         if (upl->flags & UPL_DEVICE_MEMORY)
6539                 xfer_size = 0;
6540         else if ((offset + size) <= upl->size)
6541                 xfer_size = size;
6542         else {
6543                 if(!isVectorUPL)
6544                         upl_unlock(upl);
6545                 else {
6546                         upl_unlock(vector_upl);
6547                 }
6548                 return KERN_FAILURE;
6549         }
6550         if (upl->flags & UPL_SET_DIRTY)
6551                 flags |= UPL_COMMIT_SET_DIRTY;
6552         if (upl->flags & UPL_CLEAR_DIRTY)
6553                 flags |= UPL_COMMIT_CLEAR_DIRTY;
6554
6555         if (upl->flags & UPL_INTERNAL)
6556                 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6557                                            + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6558         else
6559                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6560
6561         object = upl->map_object;
6562
6563         if (upl->flags & UPL_SHADOWED) {
6564                 vm_object_lock(object);
6565                 shadow_object = object->shadow;
6566         } else {
6567                 shadow_object = object;
6568         }
6569         entry = offset/PAGE_SIZE;
6570         target_offset = (vm_object_offset_t)offset;
6571
6572         assert(!(target_offset & PAGE_MASK));
6573         assert(!(xfer_size & PAGE_MASK));
6574
6575         if (upl->flags & UPL_KERNEL_OBJECT)
6576                 vm_object_lock_shared(shadow_object);
6577         else
6578                 vm_object_lock(shadow_object);
6579
6580         VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6581
6582         if (upl->flags & UPL_ACCESS_BLOCKED) {
6583                 assert(shadow_object->blocked_access);
6584                 shadow_object->blocked_access = FALSE;
6585                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6586         }
6587
6588         if (shadow_object->code_signed) {
6589                 /*
6590                  * CODE SIGNING:
6591                  * If the object is code-signed, do not let this UPL tell
6592                  * us if the pages are valid or not.  Let the pages be
6593                  * validated by VM the normal way (when they get mapped or
6594                  * copied).
6595                  */
6596                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6597         }
6598         if (! page_list) {
6599                 /*
6600                  * No page list to get the code-signing info from !?
6601                  */
6602                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6603         }
6604         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6605                 should_be_throttled = TRUE;
6606
6607         dwp = &dw_array[0];
6608         dw_count = 0;
6609         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6610
6611         if ((upl->flags & UPL_IO_WIRE) &&
6612             !(flags & UPL_COMMIT_FREE_ABSENT) &&
6613             !isVectorUPL &&
6614             shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6615             shadow_object->purgable != VM_PURGABLE_EMPTY) {
6616
6617                 if (!vm_page_queue_empty(&shadow_object->memq)) {
6618
6619                         if (size == shadow_object->vo_size) {
6620                                 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6621                                 fast_path_full_commit = 1;
6622                         }
6623                         fast_path_possible = 1;
6624
6625                         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6626                             (shadow_object->purgable == VM_PURGABLE_DENY ||
6627                              shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6628                              shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6629                                 throttle_page = 1;
6630                         }
6631                 }
6632         }
6633         first_local = VM_PAGE_NULL;
6634         last_local = VM_PAGE_NULL;
6635
6636         while (xfer_size) {
6637                 vm_page_t       t, m;
6638
6639                 dwp->dw_mask = 0;
6640                 clear_refmod = 0;
6641
6642                 m = VM_PAGE_NULL;
6643
6644                 if (upl->flags & UPL_LITE) {
6645                         unsigned int    pg_num;
6646
6647                         if (nxt_page != VM_PAGE_NULL) {
6648                                 m = nxt_page;
6649                                 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
6650                                 target_offset = m->vmp_offset;
6651                         }
6652                         pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6653                         assert(pg_num == target_offset/PAGE_SIZE);
6654
6655                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6656                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6657
6658                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6659                                         m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6660                         } else
6661                                 m = NULL;
6662                 }
6663                 if (upl->flags & UPL_SHADOWED) {
6664                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6665
6666                                 t->vmp_free_when_done = FALSE;
6667
6668                                 VM_PAGE_FREE(t);
6669
6670                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6671                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6672                         }
6673                 }
6674                 if (m == VM_PAGE_NULL)
6675                         goto commit_next_page;
6676
6677                 m_object = VM_PAGE_OBJECT(m);
6678
6679                 if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6680                         assert(m->vmp_busy);
6681
6682                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6683                         goto commit_next_page;
6684                 }
6685
6686                 if (flags & UPL_COMMIT_CS_VALIDATED) {
6687                         /*
6688                          * CODE SIGNING:
6689                          * Set the code signing bits according to
6690                          * what the UPL says they should be.
6691                          */
6692                         m->vmp_cs_validated = page_list[entry].cs_validated;
6693                         m->vmp_cs_tainted = page_list[entry].cs_tainted;
6694                         m->vmp_cs_nx = page_list[entry].cs_nx;
6695                 }
6696                 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6697                         m->vmp_written_by_kernel = TRUE;
6698
6699                 if (upl->flags & UPL_IO_WIRE) {
6700
6701                         if (page_list)
6702                                 page_list[entry].phys_addr = 0;
6703
6704                         if (flags & UPL_COMMIT_SET_DIRTY) {
6705                                 SET_PAGE_DIRTY(m, FALSE);
6706                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6707                                 m->vmp_dirty = FALSE;
6708
6709                                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6710                                     m->vmp_cs_validated && !m->vmp_cs_tainted) {
6711                                         /*
6712                                          * CODE SIGNING:
6713                                          * This page is no longer dirty
6714                                          * but could have been modified,
6715                                          * so it will need to be
6716                                          * re-validated.
6717                                          */
6718                                         m->vmp_cs_validated = FALSE;
6719
6720                                         VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6721
6722                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6723                                 }
6724                                 clear_refmod |= VM_MEM_MODIFIED;
6725                         }
6726                         if (upl->flags & UPL_ACCESS_BLOCKED) {
6727                                 /*
6728                                  * We blocked access to the pages in this UPL.
6729                                  * Clear the "busy" bit and wake up any waiter
6730                                  * for this page.
6731                                  */
6732                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6733                         }
6734                         if (fast_path_possible) {
6735                                 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6736                                 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6737                                 if (m->vmp_absent) {
6738                                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
6739                                         assert(m->vmp_wire_count == 0);
6740                                         assert(m->vmp_busy);
6741
6742                                         m->vmp_absent = FALSE;
6743                                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6744                                 } else {
6745                                         if (m->vmp_wire_count == 0)
6746                                                 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6747                                         assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
6748
6749                                         /*
6750                                          * XXX FBDP need to update some other
6751                                          * counters here (purgeable_wired_count)
6752                                          * (ledgers), ...
6753                                          */
6754                                         assert(m->vmp_wire_count > 0);
6755                                         m->vmp_wire_count--;
6756
6757                                         if (m->vmp_wire_count == 0) {
6758                                                 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
6759                                                 unwired_count++;
6760                                         }
6761                                 }
6762                                 if (m->vmp_wire_count == 0) {
6763                                         assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6764
6765                                         if (last_local == VM_PAGE_NULL) {
6766                                                 assert(first_local == VM_PAGE_NULL);
6767
6768                                                 last_local = m;
6769                                                 first_local = m;
6770                                         } else {
6771                                                 assert(first_local != VM_PAGE_NULL);
6772
6773                                                 m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6774                                                 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6775                                                 first_local = m;
6776                                         }
6777                                         local_queue_count++;
6778
6779                                         if (throttle_page) {
6780                                                 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
6781                                         } else {
6782                                                 if (flags & UPL_COMMIT_INACTIVATE) {
6783                                                         if (shadow_object->internal)
6784                                                                 m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6785                                                         else
6786                                                                 m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6787                                                 } else
6788                                                         m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
6789                                         }
6790                                 }
6791                         } else {
6792                                 if (flags & UPL_COMMIT_INACTIVATE) {
6793                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6794                                         clear_refmod |= VM_MEM_REFERENCED;
6795                                 }
6796                                 if (m->vmp_absent) {
6797                                         if (flags & UPL_COMMIT_FREE_ABSENT)
6798                                                 dwp->dw_mask |= DW_vm_page_free;
6799                                         else {
6800                                                 m->vmp_absent = FALSE;
6801                                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6802
6803                                                 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6804                                                         dwp->dw_mask |= DW_vm_page_activate;
6805                                         }
6806                                 } else
6807                                         dwp->dw_mask |= DW_vm_page_unwire;
6808                         }
6809                         goto commit_next_page;
6810                 }
6811                 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6812
6813                 if (page_list)
6814                         page_list[entry].phys_addr = 0;
6815
6816                 /*
6817                  * make sure to clear the hardware
6818                  * modify or reference bits before
6819                  * releasing the BUSY bit on this page
6820                  * otherwise we risk losing a legitimate
6821                  * change of state
6822                  */
6823                 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6824                         m->vmp_dirty = FALSE;
6825
6826                         clear_refmod |= VM_MEM_MODIFIED;
6827                 }
6828                 if (m->vmp_laundry)
6829                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
6830
6831                 if (VM_PAGE_WIRED(m))
6832                         m->vmp_free_when_done = FALSE;
6833
6834                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6835                     m->vmp_cs_validated && !m->vmp_cs_tainted) {
6836                         /*
6837                          * CODE SIGNING:
6838                          * This page is no longer dirty
6839                          * but could have been modified,
6840                          * so it will need to be
6841                          * re-validated.
6842                          */
6843                         m->vmp_cs_validated = FALSE;
6844
6845                         VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6846
6847                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6848                 }
6849                 if (m->vmp_overwriting) {
6850                         /*
6851                          * the (COPY_OUT_FROM == FALSE) request_page_list case
6852                          */
6853                         if (m->vmp_busy) {
6854 #if CONFIG_PHANTOM_CACHE
6855                                 if (m->vmp_absent && !m_object->internal)
6856                                         dwp->dw_mask |= DW_vm_phantom_cache_update;
6857 #endif
6858                                 m->vmp_absent = FALSE;
6859
6860                                 dwp->dw_mask |= DW_clear_busy;
6861                         } else {
6862                                 /*
6863                                  * alternate (COPY_OUT_FROM == FALSE) page_list case
6864                                  * Occurs when the original page was wired
6865                                  * at the time of the list request
6866                                  */
6867                                 assert(VM_PAGE_WIRED(m));
6868
6869                                 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
6870                         }
6871                         m->vmp_overwriting = FALSE;
6872                 }
6873                 m->vmp_cleaning = FALSE;
6874
6875                 if (m->vmp_free_when_done) {
6876                         /*
6877                          * With the clean queue enabled, UPL_PAGEOUT should
6878                          * no longer set the pageout bit. It's pages now go
6879                          * to the clean queue.
6880                          */
6881                         assert(!(flags & UPL_PAGEOUT));
6882                         assert(!m_object->internal);
6883
6884                         m->vmp_free_when_done = FALSE;
6885
6886                         if ((flags & UPL_COMMIT_SET_DIRTY) ||
6887                             (m->vmp_pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
6888                                 /*
6889                                  * page was re-dirtied after we started
6890                                  * the pageout... reactivate it since
6891                                  * we don't know whether the on-disk
6892                                  * copy matches what is now in memory
6893                                  */
6894                                 SET_PAGE_DIRTY(m, FALSE);
6895
6896                                 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
6897
6898                                 if (upl->flags & UPL_PAGEOUT) {
6899                                         VM_STAT_INCR(reactivations);
6900                                         DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
6901                                 }
6902                         } else {
6903                                 /*
6904                                  * page has been successfully cleaned
6905                                  * go ahead and free it for other use
6906                                  */
6907                                 if (m_object->internal) {
6908                                         DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
6909                                 } else {
6910                                         DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
6911                                 }
6912                                 m->vmp_dirty = FALSE;
6913                                 m->vmp_busy = TRUE;
6914
6915                                 dwp->dw_mask |= DW_vm_page_free;
6916                         }
6917                         goto commit_next_page;
6918                 }
6919                 /*
6920                  * It is a part of the semantic of COPYOUT_FROM
6921                  * UPLs that a commit implies cache sync
6922                  * between the vm page and the backing store
6923                  * this can be used to strip the precious bit
6924                  * as well as clean
6925                  */
6926                 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6927                         m->vmp_precious = FALSE;
6928
6929                 if (flags & UPL_COMMIT_SET_DIRTY) {
6930                         SET_PAGE_DIRTY(m, FALSE);
6931                 } else {
6932                         m->vmp_dirty = FALSE;
6933                 }
6934
6935                 /* with the clean queue on, move *all* cleaned pages to the clean queue */
6936                 if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
6937                         pgpgout_count++;
6938
6939                         VM_STAT_INCR(pageouts);
6940                         DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
6941
6942                         dwp->dw_mask |= DW_enqueue_cleaned;
6943                 } else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) {
6944                         /*
6945                          * page coming back in from being 'frozen'...
6946                          * it was dirty before it was frozen, so keep it so
6947                          * the vm_page_activate will notice that it really belongs
6948                          * on the throttle queue and put it there
6949                          */
6950                         SET_PAGE_DIRTY(m, FALSE);
6951                         dwp->dw_mask |= DW_vm_page_activate;
6952
6953                 } else {
6954                         if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
6955                                 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6956                                 clear_refmod |= VM_MEM_REFERENCED;
6957                         } else if ( !VM_PAGE_PAGEABLE(m)) {
6958
6959                                 if (m->vmp_clustered || (flags & UPL_COMMIT_SPECULATE))
6960                                         dwp->dw_mask |= DW_vm_page_speculate;
6961                                 else if (m->vmp_reference)
6962                                         dwp->dw_mask |= DW_vm_page_activate;
6963                                 else {
6964                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6965                                         clear_refmod |= VM_MEM_REFERENCED;
6966                                 }
6967                         }
6968                 }
6969                 if (upl->flags & UPL_ACCESS_BLOCKED) {
6970                         /*
6971                          * We blocked access to the pages in this URL.
6972                          * Clear the "busy" bit on this page before we
6973                          * wake up any waiter.
6974                          */
6975                         dwp->dw_mask |= DW_clear_busy;
6976                 }
6977                 /*
6978                  * Wakeup any thread waiting for the page to be un-cleaning.
6979                  */
6980                 dwp->dw_mask |= DW_PAGE_WAKEUP;
6981
6982 commit_next_page:
6983                 if (clear_refmod)
6984                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
6985
6986                 target_offset += PAGE_SIZE_64;
6987                 xfer_size -= PAGE_SIZE;
6988                 entry++;
6989
6990                 if (dwp->dw_mask) {
6991                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
6992                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
6993
6994                                 if (dw_count >= dw_limit) {
6995                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
6996
6997                                         dwp = &dw_array[0];
6998                                         dw_count = 0;
6999                                 }
7000                         } else {
7001                                 if (dwp->dw_mask & DW_clear_busy)
7002                                         m->vmp_busy = FALSE;
7003
7004                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7005                                         PAGE_WAKEUP(m);
7006                         }
7007                 }
7008         }
7009         if (dw_count)
7010                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7011
7012         if (fast_path_possible) {
7013
7014                 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7015                 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7016
7017                 if (local_queue_count || unwired_count) {
7018
7019                         if (local_queue_count) {
7020                                 vm_page_t       first_target;
7021                                 vm_page_queue_head_t    *target_queue;
7022
7023                                 if (throttle_page)
7024                                         target_queue = &vm_page_queue_throttled;
7025                                 else {
7026                                         if (flags & UPL_COMMIT_INACTIVATE) {
7027                                                 if (shadow_object->internal)
7028                                                         target_queue = &vm_page_queue_anonymous;
7029                                                 else
7030                                                         target_queue = &vm_page_queue_inactive;
7031                                         } else
7032                                                 target_queue = &vm_page_queue_active;
7033                                 }
7034                                 /*
7035                                  * Transfer the entire local queue to a regular LRU page queues.
7036                                  */
7037                                 vm_page_lockspin_queues();
7038
7039                                 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7040
7041                                 if (vm_page_queue_empty(target_queue))
7042                                         target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7043                                 else
7044                                         first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7045
7046                                 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7047                                 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7048                                 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7049
7050                                 /*
7051                                  * Adjust the global page counts.
7052                                  */
7053                                 if (throttle_page) {
7054                                         vm_page_throttled_count += local_queue_count;
7055                                 } else {
7056                                         if (flags & UPL_COMMIT_INACTIVATE) {
7057                                                 if (shadow_object->internal)
7058                                                         vm_page_anonymous_count += local_queue_count;
7059                                                 vm_page_inactive_count += local_queue_count;
7060
7061                                                 token_new_pagecount += local_queue_count;
7062                                         } else
7063                                                 vm_page_active_count += local_queue_count;
7064
7065                                         if (shadow_object->internal)
7066                                                 vm_page_pageable_internal_count += local_queue_count;
7067                                         else
7068                                                 vm_page_pageable_external_count += local_queue_count;
7069                                 }
7070                         } else {
7071                                 vm_page_lockspin_queues();
7072                         }
7073                         if (unwired_count) {
7074                                 vm_page_wire_count -= unwired_count;
7075                                 VM_CHECK_MEMORYSTATUS;
7076                         }
7077                         vm_page_unlock_queues();
7078
7079                         VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7080                 }
7081         }
7082         occupied = 1;
7083
7084         if (upl->flags & UPL_DEVICE_MEMORY)  {
7085                 occupied = 0;
7086         } else if (upl->flags & UPL_LITE) {
7087                 int     pg_num;
7088                 int     i;
7089
7090                 occupied = 0;
7091
7092                 if (!fast_path_full_commit) {
7093                         pg_num = upl->size/PAGE_SIZE;
7094                         pg_num = (pg_num + 31) >> 5;
7095
7096                         for (i = 0; i < pg_num; i++) {
7097                                 if (lite_list[i] != 0) {
7098                                         occupied = 1;
7099                                         break;
7100                                 }
7101                         }
7102                 }
7103         } else {
7104                 if (vm_page_queue_empty(&upl->map_object->memq))
7105                         occupied = 0;
7106         }
7107         if (occupied == 0) {
7108                 /*
7109                  * If this UPL element belongs to a Vector UPL and is
7110                  * empty, then this is the right function to deallocate
7111                  * it. So go ahead set the *empty variable. The flag
7112                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7113                  * should be considered relevant for the Vector UPL and not
7114                  * the internal UPLs.
7115                  */
7116                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7117                         *empty = TRUE;
7118
7119                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7120                         /*
7121                          * this is not a paging object
7122                          * so we need to drop the paging reference
7123                          * that was taken when we created the UPL
7124                          * against this object
7125                          */
7126                         vm_object_activity_end(shadow_object);
7127                         vm_object_collapse(shadow_object, 0, TRUE);
7128                 } else {
7129                          /*
7130                           * we dontated the paging reference to
7131                           * the map object... vm_pageout_object_terminate
7132                           * will drop this reference
7133                           */
7134                 }
7135         }
7136         VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7137         vm_object_unlock(shadow_object);
7138         if (object != shadow_object)
7139                 vm_object_unlock(object);
7140
7141         if(!isVectorUPL)
7142                 upl_unlock(upl);
7143         else {
7144                 /*
7145                  * If we completed our operations on an UPL that is
7146                  * part of a Vectored UPL and if empty is TRUE, then
7147                  * we should go ahead and deallocate this UPL element.
7148                  * Then we check if this was the last of the UPL elements
7149                  * within that Vectored UPL. If so, set empty to TRUE
7150                  * so that in ubc_upl_commit_range or ubc_upl_commit, we
7151                  * can go ahead and deallocate the Vector UPL too.
7152                  */
7153                 if(*empty==TRUE) {
7154                         *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7155                         upl_deallocate(upl);
7156                 }
7157                 goto process_upl_to_commit;
7158         }
7159         if (pgpgout_count) {
7160                 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7161         }
7162
7163         return KERN_SUCCESS;
7164 }
7165
7166 kern_return_t
7167 upl_abort_range(
7168         upl_t                   upl,
7169         upl_offset_t            offset,
7170         upl_size_t              size,
7171         int                     error,
7172         boolean_t               *empty)
7173 {
7174         upl_page_info_t         *user_page_list = NULL;
7175         upl_size_t              xfer_size, subupl_size = size;
7176         vm_object_t             shadow_object;
7177         vm_object_t             object;
7178         vm_object_offset_t      target_offset;
7179         upl_offset_t            subupl_offset = offset;
7180         int                     entry;
7181         wpl_array_t             lite_list;
7182         int                     occupied;
7183         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7184         struct  vm_page_delayed_work    *dwp;
7185         int                     dw_count;
7186         int                     dw_limit;
7187         int                     isVectorUPL = 0;
7188         upl_t                   vector_upl = NULL;
7189
7190         *empty = FALSE;
7191
7192         if (upl == UPL_NULL)
7193                 return KERN_INVALID_ARGUMENT;
7194
7195         if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7196                 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7197
7198         if((isVectorUPL = vector_upl_is_valid(upl))) {
7199                 vector_upl = upl;
7200                 upl_lock(vector_upl);
7201         }
7202         else
7203                 upl_lock(upl);
7204
7205 process_upl_to_abort:
7206         if(isVectorUPL) {
7207                 size = subupl_size;
7208                 offset = subupl_offset;
7209                 if(size == 0) {
7210                         upl_unlock(vector_upl);
7211                         return KERN_SUCCESS;
7212                 }
7213                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7214                 if(upl == NULL) {
7215                         upl_unlock(vector_upl);
7216                         return KERN_FAILURE;
7217                 }
7218                 subupl_size -= size;
7219                 subupl_offset += size;
7220         }
7221
7222         *empty = FALSE;
7223
7224 #if UPL_DEBUG
7225         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7226                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7227
7228                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7229                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7230                 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7231
7232                 upl->upl_commit_index++;
7233         }
7234 #endif
7235         if (upl->flags & UPL_DEVICE_MEMORY)
7236                 xfer_size = 0;
7237         else if ((offset + size) <= upl->size)
7238                 xfer_size = size;
7239         else {
7240                 if(!isVectorUPL)
7241                         upl_unlock(upl);
7242                 else {
7243                         upl_unlock(vector_upl);
7244                 }
7245
7246                 return KERN_FAILURE;
7247         }
7248         if (upl->flags & UPL_INTERNAL) {
7249                 lite_list = (wpl_array_t)
7250                         ((((uintptr_t)upl) + sizeof(struct upl))
7251                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7252
7253                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7254         } else {
7255                 lite_list = (wpl_array_t)
7256                         (((uintptr_t)upl) + sizeof(struct upl));
7257         }
7258         object = upl->map_object;
7259
7260         if (upl->flags & UPL_SHADOWED) {
7261                 vm_object_lock(object);
7262                 shadow_object = object->shadow;
7263         } else
7264                 shadow_object = object;
7265
7266         entry = offset/PAGE_SIZE;
7267         target_offset = (vm_object_offset_t)offset;
7268
7269         assert(!(target_offset & PAGE_MASK));
7270         assert(!(xfer_size & PAGE_MASK));
7271
7272         if (upl->flags & UPL_KERNEL_OBJECT)
7273                 vm_object_lock_shared(shadow_object);
7274         else
7275                 vm_object_lock(shadow_object);
7276
7277         if (upl->flags & UPL_ACCESS_BLOCKED) {
7278                 assert(shadow_object->blocked_access);
7279                 shadow_object->blocked_access = FALSE;
7280                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7281         }
7282
7283         dwp = &dw_array[0];
7284         dw_count = 0;
7285         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7286
7287         if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7288                 panic("upl_abort_range: kernel_object being DUMPED");
7289
7290         while (xfer_size) {
7291                 vm_page_t       t, m;
7292                 unsigned int    pg_num;
7293                 boolean_t       needed;
7294
7295                 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7296                 assert(pg_num == target_offset/PAGE_SIZE);
7297
7298                 needed = FALSE;
7299
7300                 if (user_page_list)
7301                         needed = user_page_list[pg_num].needed;
7302
7303                 dwp->dw_mask = 0;
7304                 m = VM_PAGE_NULL;
7305
7306                 if (upl->flags & UPL_LITE) {
7307
7308                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7309                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7310
7311                                 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7312                                         m = vm_page_lookup(shadow_object, target_offset +
7313                                                            (upl->offset - shadow_object->paging_offset));
7314                         }
7315                 }
7316                 if (upl->flags & UPL_SHADOWED) {
7317                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7318                                 t->vmp_free_when_done = FALSE;
7319
7320                                 VM_PAGE_FREE(t);
7321
7322                                 if (m == VM_PAGE_NULL)
7323                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7324                         }
7325                 }
7326                 if ((upl->flags & UPL_KERNEL_OBJECT))
7327                         goto abort_next_page;
7328
7329                 if (m != VM_PAGE_NULL) {
7330
7331                         assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7332
7333                         if (m->vmp_absent) {
7334                                 boolean_t must_free = TRUE;
7335
7336                                 /*
7337                                  * COPYOUT = FALSE case
7338                                  * check for error conditions which must
7339                                  * be passed back to the pages customer
7340                                  */
7341                                 if (error & UPL_ABORT_RESTART) {
7342                                         m->vmp_restart = TRUE;
7343                                         m->vmp_absent = FALSE;
7344                                         m->vmp_unusual = TRUE;
7345                                         must_free = FALSE;
7346                                 } else if (error & UPL_ABORT_UNAVAILABLE) {
7347                                         m->vmp_restart = FALSE;
7348                                         m->vmp_unusual = TRUE;
7349                                         must_free = FALSE;
7350                                 } else if (error & UPL_ABORT_ERROR) {
7351                                         m->vmp_restart = FALSE;
7352                                         m->vmp_absent = FALSE;
7353                                         m->vmp_error = TRUE;
7354                                         m->vmp_unusual = TRUE;
7355                                         must_free = FALSE;
7356                                 }
7357                                 if (m->vmp_clustered && needed == FALSE) {
7358                                         /*
7359                                          * This page was a part of a speculative
7360                                          * read-ahead initiated by the kernel
7361                                          * itself.  No one is expecting this
7362                                          * page and no one will clean up its
7363                                          * error state if it ever becomes valid
7364                                          * in the future.
7365                                          * We have to free it here.
7366                                          */
7367                                         must_free = TRUE;
7368                                 }
7369                                 m->vmp_cleaning = FALSE;
7370
7371                                 if (m->vmp_overwriting && !m->vmp_busy) {
7372                                         /*
7373                                          * this shouldn't happen since
7374                                          * this is an 'absent' page, but
7375                                          * it doesn't hurt to check for
7376                                          * the 'alternate' method of
7377                                          * stabilizing the page...
7378                                          * we will mark 'busy' to be cleared
7379                                          * in the following code which will
7380                                          * take care of the primary stabilzation
7381                                          * method (i.e. setting 'busy' to TRUE)
7382                                          */
7383                                         dwp->dw_mask |= DW_vm_page_unwire;
7384                                 }
7385                                 m->vmp_overwriting = FALSE;
7386
7387                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7388
7389                                 if (must_free == TRUE)
7390                                         dwp->dw_mask |= DW_vm_page_free;
7391                                 else
7392                                         dwp->dw_mask |= DW_vm_page_activate;
7393                         } else {
7394                                 /*
7395                                  * Handle the trusted pager throttle.
7396                                  */
7397                                 if (m->vmp_laundry)
7398                                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
7399
7400                                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7401                                         /*
7402                                          * We blocked access to the pages in this UPL.
7403                                          * Clear the "busy" bit and wake up any waiter
7404                                          * for this page.
7405                                          */
7406                                         dwp->dw_mask |= DW_clear_busy;
7407                                 }
7408                                 if (m->vmp_overwriting) {
7409                                         if (m->vmp_busy)
7410                                                 dwp->dw_mask |= DW_clear_busy;
7411                                         else {
7412                                                 /*
7413                                                  * deal with the 'alternate' method
7414                                                  * of stabilizing the page...
7415                                                  * we will either free the page
7416                                                  * or mark 'busy' to be cleared
7417                                                  * in the following code which will
7418                                                  * take care of the primary stabilzation
7419                                                  * method (i.e. setting 'busy' to TRUE)
7420                                                  */
7421                                                 dwp->dw_mask |= DW_vm_page_unwire;
7422                                         }
7423                                         m->vmp_overwriting = FALSE;
7424                                 }
7425                                 m->vmp_free_when_done = FALSE;
7426                                 m->vmp_cleaning = FALSE;
7427
7428                                 if (error & UPL_ABORT_DUMP_PAGES) {
7429                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7430
7431                                         dwp->dw_mask |= DW_vm_page_free;
7432                                 } else {
7433                                         if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7434                                                 if (error & UPL_ABORT_REFERENCE) {
7435                                                         /*
7436                                                          * we've been told to explictly
7437                                                          * reference this page... for
7438                                                          * file I/O, this is done by
7439                                                          * implementing an LRU on the inactive q
7440                                                          */
7441                                                         dwp->dw_mask |= DW_vm_page_lru;
7442
7443                                                 } else if ( !VM_PAGE_PAGEABLE(m))
7444                                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
7445                                         }
7446                                         dwp->dw_mask |= DW_PAGE_WAKEUP;
7447                                 }
7448                         }
7449                 }
7450 abort_next_page:
7451                 target_offset += PAGE_SIZE_64;
7452                 xfer_size -= PAGE_SIZE;
7453                 entry++;
7454
7455                 if (dwp->dw_mask) {
7456                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7457                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7458
7459                                 if (dw_count >= dw_limit) {
7460                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7461
7462                                         dwp = &dw_array[0];
7463                                         dw_count = 0;
7464                                 }
7465                         } else {
7466                                 if (dwp->dw_mask & DW_clear_busy)
7467                                         m->vmp_busy = FALSE;
7468
7469                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7470                                         PAGE_WAKEUP(m);
7471                         }
7472                 }
7473         }
7474         if (dw_count)
7475                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7476
7477         occupied = 1;
7478
7479         if (upl->flags & UPL_DEVICE_MEMORY)  {
7480                 occupied = 0;
7481         } else if (upl->flags & UPL_LITE) {
7482                 int     pg_num;
7483                 int     i;
7484
7485                 pg_num = upl->size/PAGE_SIZE;
7486                 pg_num = (pg_num + 31) >> 5;
7487                 occupied = 0;
7488
7489                 for (i = 0; i < pg_num; i++) {
7490                         if (lite_list[i] != 0) {
7491                                 occupied = 1;
7492                                 break;
7493                         }
7494                 }
7495         } else {
7496                 if (vm_page_queue_empty(&upl->map_object->memq))
7497                         occupied = 0;
7498         }
7499         if (occupied == 0) {
7500                 /*
7501                  * If this UPL element belongs to a Vector UPL and is
7502                  * empty, then this is the right function to deallocate
7503                  * it. So go ahead set the *empty variable. The flag
7504                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7505                  * should be considered relevant for the Vector UPL and
7506                  * not the internal UPLs.
7507                  */
7508                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7509                         *empty = TRUE;
7510
7511                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7512                         /*
7513                          * this is not a paging object
7514                          * so we need to drop the paging reference
7515                          * that was taken when we created the UPL
7516                          * against this object
7517                          */
7518                         vm_object_activity_end(shadow_object);
7519                         vm_object_collapse(shadow_object, 0, TRUE);
7520                 } else {
7521                          /*
7522                           * we dontated the paging reference to
7523                           * the map object... vm_pageout_object_terminate
7524                           * will drop this reference
7525                           */
7526                 }
7527         }
7528         vm_object_unlock(shadow_object);
7529         if (object != shadow_object)
7530                 vm_object_unlock(object);
7531
7532         if(!isVectorUPL)
7533                 upl_unlock(upl);
7534         else {
7535                 /*
7536                 * If we completed our operations on an UPL that is
7537                 * part of a Vectored UPL and if empty is TRUE, then
7538                 * we should go ahead and deallocate this UPL element.
7539                 * Then we check if this was the last of the UPL elements
7540                 * within that Vectored UPL. If so, set empty to TRUE
7541                 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7542                 * can go ahead and deallocate the Vector UPL too.
7543                 */
7544                 if(*empty == TRUE) {
7545                         *empty = vector_upl_set_subupl(vector_upl, upl,0);
7546                         upl_deallocate(upl);
7547                 }
7548                 goto process_upl_to_abort;
7549         }
7550
7551         return KERN_SUCCESS;
7552 }
7553
7554
7555 kern_return_t
7556 upl_abort(
7557         upl_t   upl,
7558         int     error)
7559 {
7560         boolean_t       empty;
7561
7562         if (upl == UPL_NULL)
7563                 return KERN_INVALID_ARGUMENT;
7564
7565         return upl_abort_range(upl, 0, upl->size, error, &empty);
7566 }
7567
7568
7569 /* an option on commit should be wire */
7570 kern_return_t
7571 upl_commit(
7572         upl_t                   upl,
7573         upl_page_info_t         *page_list,
7574         mach_msg_type_number_t  count)
7575 {
7576         boolean_t       empty;
7577
7578         if (upl == UPL_NULL)
7579                 return KERN_INVALID_ARGUMENT;
7580
7581         return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7582 }
7583
7584
7585 void
7586 iopl_valid_data(
7587         upl_t    upl,
7588         vm_tag_t tag)
7589 {
7590         vm_object_t     object;
7591         vm_offset_t     offset;
7592         vm_page_t       m, nxt_page = VM_PAGE_NULL;
7593         upl_size_t      size;
7594         int             wired_count = 0;
7595
7596         if (upl == NULL)
7597                 panic("iopl_valid_data: NULL upl");
7598         if (vector_upl_is_valid(upl))
7599                 panic("iopl_valid_data: vector upl");
7600         if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7601                 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7602
7603         object = upl->map_object;
7604
7605         if (object == kernel_object || object == compressor_object)
7606                 panic("iopl_valid_data: object == kernel or compressor");
7607
7608         if (object->purgable == VM_PURGABLE_VOLATILE ||
7609             object->purgable == VM_PURGABLE_EMPTY)
7610                 panic("iopl_valid_data: object %p purgable %d",
7611                       object, object->purgable);
7612
7613         size = upl->size;
7614
7615         vm_object_lock(object);
7616         VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7617
7618         if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7619                 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7620         else
7621                 offset = 0 + upl->offset - object->paging_offset;
7622
7623         while (size) {
7624
7625                 if (nxt_page != VM_PAGE_NULL) {
7626                         m = nxt_page;
7627                         nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7628                 } else {
7629                         m = vm_page_lookup(object, offset);
7630                         offset += PAGE_SIZE;
7631
7632                         if (m == VM_PAGE_NULL)
7633                                 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7634                 }
7635                 if (m->vmp_busy) {
7636                         if (!m->vmp_absent)
7637                                 panic("iopl_valid_data: busy page w/o absent");
7638
7639                         if (m->vmp_pageq.next || m->vmp_pageq.prev)
7640                                 panic("iopl_valid_data: busy+absent page on page queue");
7641                         if (m->vmp_reusable) {
7642                                 panic("iopl_valid_data: %p is reusable", m);
7643                         }
7644
7645                         m->vmp_absent = FALSE;
7646                         m->vmp_dirty = TRUE;
7647                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7648                         assert(m->vmp_wire_count == 0);
7649                         m->vmp_wire_count++;
7650                         assert(m->vmp_wire_count);
7651                         if (m->vmp_wire_count == 1) {
7652                                 m->vmp_q_state = VM_PAGE_IS_WIRED;
7653                                 wired_count++;
7654                         } else {
7655                                 panic("iopl_valid_data: %p already wired\n", m);
7656                         }
7657
7658                         PAGE_WAKEUP_DONE(m);
7659                 }
7660                 size -= PAGE_SIZE;
7661         }
7662         if (wired_count) {
7663
7664                 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7665                 assert(object->resident_page_count >= object->wired_page_count);
7666
7667                 /* no need to adjust purgeable accounting for this object: */
7668                 assert(object->purgable != VM_PURGABLE_VOLATILE);
7669                 assert(object->purgable != VM_PURGABLE_EMPTY);
7670
7671                 vm_page_lockspin_queues();
7672                 vm_page_wire_count += wired_count;
7673                 vm_page_unlock_queues();
7674         }
7675         VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7676         vm_object_unlock(object);
7677 }
7678
7679
7680 void
7681 vm_object_set_pmap_cache_attr(
7682                 vm_object_t             object,
7683                 upl_page_info_array_t   user_page_list,
7684                 unsigned int            num_pages,
7685                 boolean_t               batch_pmap_op)
7686 {
7687         unsigned int    cache_attr = 0;
7688
7689         cache_attr = object->wimg_bits & VM_WIMG_MASK;
7690         assert(user_page_list);
7691         if (cache_attr != VM_WIMG_USE_DEFAULT) {
7692                 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7693         }
7694 }
7695
7696
7697 boolean_t       vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7698 kern_return_t   vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int, int*);
7699
7700
7701
7702 boolean_t
7703 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7704                             wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7705 {
7706         vm_page_t       dst_page;
7707         unsigned int    entry;
7708         int             page_count;
7709         int             delayed_unlock = 0;
7710         boolean_t       retval = TRUE;
7711         ppnum_t         phys_page;
7712
7713         vm_object_lock_assert_exclusive(object);
7714         assert(object->purgable != VM_PURGABLE_VOLATILE);
7715         assert(object->purgable != VM_PURGABLE_EMPTY);
7716         assert(object->pager == NULL);
7717         assert(object->copy == NULL);
7718         assert(object->shadow == NULL);
7719
7720         page_count = object->resident_page_count;
7721         dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7722
7723         vm_page_lock_queues();
7724
7725         while (page_count--) {
7726
7727                 if (dst_page->vmp_busy ||
7728                     dst_page->vmp_fictitious ||
7729                     dst_page->vmp_absent ||
7730                     dst_page->vmp_error ||
7731                     dst_page->vmp_cleaning ||
7732                     dst_page->vmp_restart ||
7733                     dst_page->vmp_laundry) {
7734                         retval = FALSE;
7735                         goto done;
7736                 }
7737                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7738                         retval = FALSE;
7739                         goto done;
7740                 }
7741                 dst_page->vmp_reference = TRUE;
7742
7743                 vm_page_wire(dst_page, tag, FALSE);
7744
7745                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7746                         SET_PAGE_DIRTY(dst_page, FALSE);
7747                 }
7748                 entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7749                 assert(entry >= 0 && entry < object->resident_page_count);
7750                 lite_list[entry>>5] |= 1 << (entry & 31);
7751
7752                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7753
7754                 if (phys_page > upl->highest_page)
7755                         upl->highest_page = phys_page;
7756
7757                 if (user_page_list) {
7758                         user_page_list[entry].phys_addr = phys_page;
7759                         user_page_list[entry].absent    = dst_page->vmp_absent;
7760                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
7761                         user_page_list[entry].free_when_done   = dst_page->vmp_free_when_done;
7762                         user_page_list[entry].precious  = dst_page->vmp_precious;
7763                         user_page_list[entry].device    = FALSE;
7764                         user_page_list[entry].speculative = FALSE;
7765                         user_page_list[entry].cs_validated = FALSE;
7766                         user_page_list[entry].cs_tainted = FALSE;
7767                         user_page_list[entry].cs_nx     = FALSE;
7768                         user_page_list[entry].needed    = FALSE;
7769                         user_page_list[entry].mark      = FALSE;
7770                 }
7771                 if (delayed_unlock++ > 256) {
7772                         delayed_unlock = 0;
7773                         lck_mtx_yield(&vm_page_queue_lock);
7774
7775                         VM_CHECK_MEMORYSTATUS;
7776                 }
7777                 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7778         }
7779 done:
7780         vm_page_unlock_queues();
7781
7782         VM_CHECK_MEMORYSTATUS;
7783
7784         return (retval);
7785 }
7786
7787
7788 kern_return_t
7789 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7790                              wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset,
7791                              int page_count, int* page_grab_count)
7792 {
7793         vm_page_t       dst_page;
7794         boolean_t       no_zero_fill = FALSE;
7795         int             interruptible;
7796         int             pages_wired = 0;
7797         int             pages_inserted = 0;
7798         int             entry = 0;
7799         uint64_t        delayed_ledger_update = 0;
7800         kern_return_t   ret = KERN_SUCCESS;
7801         int             grab_options;
7802         ppnum_t         phys_page;
7803
7804         vm_object_lock_assert_exclusive(object);
7805         assert(object->purgable != VM_PURGABLE_VOLATILE);
7806         assert(object->purgable != VM_PURGABLE_EMPTY);
7807         assert(object->pager == NULL);
7808         assert(object->copy == NULL);
7809         assert(object->shadow == NULL);
7810
7811         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7812                 interruptible = THREAD_ABORTSAFE;
7813         else
7814                 interruptible = THREAD_UNINT;
7815
7816         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7817                 no_zero_fill = TRUE;
7818
7819         grab_options = 0;
7820 #if CONFIG_SECLUDED_MEMORY
7821         if (object->can_grab_secluded) {
7822                 grab_options |= VM_PAGE_GRAB_SECLUDED;
7823         }
7824 #endif /* CONFIG_SECLUDED_MEMORY */
7825
7826         while (page_count--) {
7827
7828                 while ((dst_page = vm_page_grab_options(grab_options))
7829                        == VM_PAGE_NULL) {
7830
7831                         OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7832
7833                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7834
7835                         if (vm_page_wait(interruptible) == FALSE) {
7836                                 /*
7837                                  * interrupted case
7838                                  */
7839                                 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7840
7841                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7842
7843                                 ret = MACH_SEND_INTERRUPTED;
7844                                 goto done;
7845                         }
7846                         OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7847
7848                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7849                 }
7850                 if (no_zero_fill == FALSE)
7851                         vm_page_zero_fill(dst_page);
7852                 else
7853                         dst_page->vmp_absent = TRUE;
7854
7855                 dst_page->vmp_reference = TRUE;
7856
7857                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7858                         SET_PAGE_DIRTY(dst_page, FALSE);
7859                 }
7860                 if (dst_page->vmp_absent == FALSE) {
7861                         assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7862                         assert(dst_page->vmp_wire_count == 0);
7863                         dst_page->vmp_wire_count++;
7864                         dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7865                         assert(dst_page->vmp_wire_count);
7866                         pages_wired++;
7867                         PAGE_WAKEUP_DONE(dst_page);
7868                 }
7869                 pages_inserted++;
7870
7871                 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7872
7873                 lite_list[entry>>5] |= 1 << (entry & 31);
7874
7875                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7876
7877                 if (phys_page > upl->highest_page)
7878                         upl->highest_page = phys_page;
7879
7880                 if (user_page_list) {
7881                         user_page_list[entry].phys_addr = phys_page;
7882                         user_page_list[entry].absent    = dst_page->vmp_absent;
7883                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
7884                         user_page_list[entry].free_when_done    = FALSE;
7885                         user_page_list[entry].precious  = FALSE;
7886                         user_page_list[entry].device    = FALSE;
7887                         user_page_list[entry].speculative = FALSE;
7888                         user_page_list[entry].cs_validated = FALSE;
7889                         user_page_list[entry].cs_tainted = FALSE;
7890                         user_page_list[entry].cs_nx     = FALSE;
7891                         user_page_list[entry].needed    = FALSE;
7892                         user_page_list[entry].mark      = FALSE;
7893                 }
7894                 entry++;
7895                 *dst_offset += PAGE_SIZE_64;
7896         }
7897 done:
7898         if (pages_wired) {
7899                 vm_page_lockspin_queues();
7900                 vm_page_wire_count += pages_wired;
7901                 vm_page_unlock_queues();
7902         }
7903         if (pages_inserted) {
7904                 if (object->internal) {
7905                         OSAddAtomic(pages_inserted, &vm_page_internal_count);
7906                 } else {
7907                         OSAddAtomic(pages_inserted, &vm_page_external_count);
7908                 }
7909         }
7910         if (delayed_ledger_update) {
7911                 task_t          owner;
7912                 int             ledger_idx_volatile;
7913                 int             ledger_idx_nonvolatile;
7914                 int             ledger_idx_volatile_compressed;
7915                 int             ledger_idx_nonvolatile_compressed;
7916                 boolean_t       do_footprint;
7917
7918                 owner = VM_OBJECT_OWNER(object);
7919                 assert(owner);
7920
7921                 vm_object_ledger_tag_ledgers(object,
7922                                              &ledger_idx_volatile,
7923                                              &ledger_idx_nonvolatile,
7924                                              &ledger_idx_volatile_compressed,
7925                                              &ledger_idx_nonvolatile_compressed,
7926                                              &do_footprint);
7927
7928                 /* more non-volatile bytes */
7929                 ledger_credit(owner->ledger,
7930                               ledger_idx_nonvolatile,
7931                               delayed_ledger_update);
7932                 if (do_footprint) {
7933                         /* more footprint */
7934                         ledger_credit(owner->ledger,
7935                                       task_ledgers.phys_footprint,
7936                                       delayed_ledger_update);
7937                 }
7938         }
7939
7940         assert(page_grab_count);
7941         *page_grab_count = pages_inserted;
7942
7943         return (ret);
7944 }
7945
7946
7947
7948 kern_return_t
7949 vm_object_iopl_request(
7950         vm_object_t             object,
7951         vm_object_offset_t      offset,
7952         upl_size_t              size,
7953         upl_t                   *upl_ptr,
7954         upl_page_info_array_t   user_page_list,
7955         unsigned int            *page_list_count,
7956         upl_control_flags_t     cntrl_flags,
7957         vm_tag_t                tag)
7958 {
7959         vm_page_t               dst_page;
7960         vm_object_offset_t      dst_offset;
7961         upl_size_t              xfer_size;
7962         upl_t                   upl = NULL;
7963         unsigned int            entry;
7964         wpl_array_t             lite_list = NULL;
7965         int                     no_zero_fill = FALSE;
7966         unsigned int            size_in_pages;
7967         int                     page_grab_count = 0;
7968         u_int32_t               psize;
7969         kern_return_t           ret;
7970         vm_prot_t               prot;
7971         struct vm_object_fault_info fault_info = {};
7972         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7973         struct  vm_page_delayed_work    *dwp;
7974         int                     dw_count;
7975         int                     dw_limit;
7976         int                     dw_index;
7977         boolean_t               caller_lookup;
7978         int                     io_tracking_flag = 0;
7979         int                     interruptible;
7980         ppnum_t                 phys_page;
7981
7982         boolean_t               set_cache_attr_needed = FALSE;
7983         boolean_t               free_wired_pages = FALSE;
7984         boolean_t               fast_path_empty_req = FALSE;
7985         boolean_t               fast_path_full_req = FALSE;
7986
7987         if (cntrl_flags & ~UPL_VALID_FLAGS) {
7988                 /*
7989                  * For forward compatibility's sake,
7990                  * reject any unknown flag.
7991                  */
7992                 return KERN_INVALID_VALUE;
7993         }
7994         if (vm_lopage_needed == FALSE)
7995                 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
7996
7997         if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
7998                 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
7999                         return KERN_INVALID_VALUE;
8000
8001                 if (object->phys_contiguous) {
8002                         if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8003                                 return KERN_INVALID_ADDRESS;
8004
8005                         if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8006                                 return KERN_INVALID_ADDRESS;
8007                 }
8008         }
8009         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8010                 no_zero_fill = TRUE;
8011
8012         if (cntrl_flags & UPL_COPYOUT_FROM)
8013                 prot = VM_PROT_READ;
8014         else
8015                 prot = VM_PROT_READ | VM_PROT_WRITE;
8016
8017         if ((!object->internal) && (object->paging_offset != 0))
8018                 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8019
8020         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8021
8022 #if CONFIG_IOSCHED || UPL_DEBUG
8023         if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8024                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8025 #endif
8026
8027 #if CONFIG_IOSCHED
8028         if (object->io_tracking) {
8029                 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8030                 if (object != kernel_object)
8031                         io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8032         }
8033 #endif
8034
8035         if (object->phys_contiguous)
8036                 psize = PAGE_SIZE;
8037         else
8038                 psize = size;
8039
8040         if (cntrl_flags & UPL_SET_INTERNAL) {
8041                 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8042
8043                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8044                 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8045                                            ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8046                 if (size == 0) {
8047                         user_page_list = NULL;
8048                         lite_list = NULL;
8049                 }
8050         } else {
8051                 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8052
8053                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8054                 if (size == 0) {
8055                         lite_list = NULL;
8056                 }
8057         }
8058         if (user_page_list)
8059                 user_page_list[0].device = FALSE;
8060         *upl_ptr = upl;
8061
8062         if (cntrl_flags & UPL_NOZEROFILLIO) {
8063                 DTRACE_VM4(upl_nozerofillio,
8064                            vm_object_t, object,
8065                            vm_object_offset_t, offset,
8066                            upl_size_t, size,
8067                            upl_t, upl);
8068         }
8069
8070         upl->map_object = object;
8071         upl->size = size;
8072
8073         size_in_pages = size / PAGE_SIZE;
8074
8075         if (object == kernel_object &&
8076             !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8077                 upl->flags |= UPL_KERNEL_OBJECT;
8078 #if UPL_DEBUG
8079                 vm_object_lock(object);
8080 #else
8081                 vm_object_lock_shared(object);
8082 #endif
8083         } else {
8084                 vm_object_lock(object);
8085                 vm_object_activity_begin(object);
8086         }
8087         /*
8088          * paging in progress also protects the paging_offset
8089          */
8090         upl->offset = offset + object->paging_offset;
8091
8092         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8093                 /*
8094                  * The user requested that access to the pages in this UPL
8095                  * be blocked until the UPL is commited or aborted.
8096                  */
8097                 upl->flags |= UPL_ACCESS_BLOCKED;
8098         }
8099
8100 #if CONFIG_IOSCHED || UPL_DEBUG
8101         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8102                 vm_object_activity_begin(object);
8103                 queue_enter(&object->uplq, upl, upl_t, uplq);
8104         }
8105 #endif
8106
8107         if (object->phys_contiguous) {
8108
8109                 if (upl->flags & UPL_ACCESS_BLOCKED) {
8110                         assert(!object->blocked_access);
8111                         object->blocked_access = TRUE;
8112                 }
8113
8114                 vm_object_unlock(object);
8115
8116                 /*
8117                  * don't need any shadow mappings for this one
8118                  * since it is already I/O memory
8119                  */
8120                 upl->flags |= UPL_DEVICE_MEMORY;
8121
8122                 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8123
8124                 if (user_page_list) {
8125                         user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8126                         user_page_list[0].device = TRUE;
8127                 }
8128                 if (page_list_count != NULL) {
8129                         if (upl->flags & UPL_INTERNAL)
8130                                 *page_list_count = 0;
8131                         else
8132                                 *page_list_count = 1;
8133                 }
8134
8135                 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8136                 return KERN_SUCCESS;
8137         }
8138         if (object != kernel_object && object != compressor_object) {
8139                 /*
8140                  * Protect user space from future COW operations
8141                  */
8142 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8143                 if (!object->true_share &&
8144                     vm_object_tracking_inited) {
8145                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8146                         int num = 0;
8147
8148                         num = OSBacktrace(bt,
8149                                           VM_OBJECT_TRACKING_BTDEPTH);
8150                         btlog_add_entry(vm_object_tracking_btlog,
8151                                         object,
8152                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
8153                                         bt,
8154                                         num);
8155                 }
8156 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8157
8158                 vm_object_lock_assert_exclusive(object);
8159                 object->true_share = TRUE;
8160
8161                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8162                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8163         }
8164
8165         if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8166             object->copy != VM_OBJECT_NULL) {
8167                 /*
8168                  * Honor copy-on-write obligations
8169                  *
8170                  * The caller is gathering these pages and
8171                  * might modify their contents.  We need to
8172                  * make sure that the copy object has its own
8173                  * private copies of these pages before we let
8174                  * the caller modify them.
8175                  *
8176                  * NOTE: someone else could map the original object
8177                  * after we've done this copy-on-write here, and they
8178                  * could then see an inconsistent picture of the memory
8179                  * while it's being modified via the UPL.  To prevent this,
8180                  * we would have to block access to these pages until the
8181                  * UPL is released.  We could use the UPL_BLOCK_ACCESS
8182                  * code path for that...
8183                  */
8184                 vm_object_update(object,
8185                                  offset,
8186                                  size,
8187                                  NULL,
8188                                  NULL,
8189                                  FALSE, /* should_return */
8190                                  MEMORY_OBJECT_COPY_SYNC,
8191                                  VM_PROT_NO_CHANGE);
8192                 VM_PAGEOUT_DEBUG(iopl_cow, 1);
8193                 VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8194         }
8195         if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8196             object->purgable != VM_PURGABLE_VOLATILE &&
8197             object->purgable != VM_PURGABLE_EMPTY &&
8198             object->copy == NULL &&
8199             size == object->vo_size &&
8200             offset == 0 &&
8201             object->shadow == NULL &&
8202             object->pager == NULL)
8203         {
8204                 if (object->resident_page_count == size_in_pages)
8205                 {
8206                         assert(object != compressor_object);
8207                         assert(object != kernel_object);
8208                         fast_path_full_req = TRUE;
8209                 }
8210                 else if (object->resident_page_count == 0)
8211                 {
8212                         assert(object != compressor_object);
8213                         assert(object != kernel_object);
8214                         fast_path_empty_req = TRUE;
8215                         set_cache_attr_needed = TRUE;
8216                 }
8217         }
8218
8219         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8220                 interruptible = THREAD_ABORTSAFE;
8221         else
8222                 interruptible = THREAD_UNINT;
8223
8224         entry = 0;
8225
8226         xfer_size = size;
8227         dst_offset = offset;
8228         dw_count = 0;
8229
8230         if (fast_path_full_req) {
8231
8232                 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8233                         goto finish;
8234                 /*
8235                  * we couldn't complete the processing of this request on the fast path
8236                  * so fall through to the slow path and finish up
8237                  */
8238
8239         } else if (fast_path_empty_req) {
8240
8241                 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8242                         ret = KERN_MEMORY_ERROR;
8243                         goto return_err;
8244                 }
8245                 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8246
8247                 if (ret) {
8248                         free_wired_pages = TRUE;
8249                         goto return_err;
8250                 }
8251                 goto finish;
8252         }
8253
8254         fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8255         fault_info.lo_offset = offset;
8256         fault_info.hi_offset = offset + xfer_size;
8257         fault_info.mark_zf_absent = TRUE;
8258         fault_info.interruptible = interruptible;
8259         fault_info.batch_pmap_op = TRUE;
8260
8261         dwp = &dw_array[0];
8262         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8263
8264         while (xfer_size) {
8265                 vm_fault_return_t       result;
8266
8267                 dwp->dw_mask = 0;
8268
8269                 if (fast_path_full_req) {
8270                         /*
8271                          * if we get here, it means that we ran into a page
8272                          * state we couldn't handle in the fast path and
8273                          * bailed out to the slow path... since the order
8274                          * we look at pages is different between the 2 paths,
8275                          * the following check is needed to determine whether
8276                          * this page was already processed in the fast path
8277                          */
8278                         if (lite_list[entry>>5] & (1 << (entry & 31)))
8279                                 goto skip_page;
8280                 }
8281                 dst_page = vm_page_lookup(object, dst_offset);
8282
8283                 if (dst_page == VM_PAGE_NULL ||
8284                     dst_page->vmp_busy ||
8285                     dst_page->vmp_error ||
8286                     dst_page->vmp_restart ||
8287                     dst_page->vmp_absent ||
8288                     dst_page->vmp_fictitious) {
8289
8290                    if (object == kernel_object)
8291                            panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8292                    if (object == compressor_object)
8293                            panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8294
8295                    if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8296                            ret = KERN_MEMORY_ERROR;
8297                            goto return_err;
8298                    }
8299                    set_cache_attr_needed = TRUE;
8300
8301                    /*
8302                     * We just looked up the page and the result remains valid
8303                     * until the object lock is release, so send it to
8304                     * vm_fault_page() (as "dst_page"), to avoid having to
8305                     * look it up again there.
8306                     */
8307                    caller_lookup = TRUE;
8308
8309                    do {
8310                         vm_page_t       top_page;
8311                         kern_return_t   error_code;
8312
8313                         fault_info.cluster_size = xfer_size;
8314
8315                         vm_object_paging_begin(object);
8316
8317                         result = vm_fault_page(object, dst_offset,
8318                                                prot | VM_PROT_WRITE, FALSE,
8319                                                caller_lookup,
8320                                                &prot, &dst_page, &top_page,
8321                                                (int *)0,
8322                                                &error_code, no_zero_fill,
8323                                                FALSE, &fault_info);
8324
8325                         /* our lookup is no longer valid at this point */
8326                         caller_lookup = FALSE;
8327
8328                         switch (result) {
8329
8330                         case VM_FAULT_SUCCESS:
8331                                 page_grab_count++;
8332
8333                                 if ( !dst_page->vmp_absent) {
8334                                         PAGE_WAKEUP_DONE(dst_page);
8335                                 } else {
8336                                         /*
8337                                          * we only get back an absent page if we
8338                                          * requested that it not be zero-filled
8339                                          * because we are about to fill it via I/O
8340                                          *
8341                                          * absent pages should be left BUSY
8342                                          * to prevent them from being faulted
8343                                          * into an address space before we've
8344                                          * had a chance to complete the I/O on
8345                                          * them since they may contain info that
8346                                          * shouldn't be seen by the faulting task
8347                                          */
8348                                 }
8349                                 /*
8350                                  *      Release paging references and
8351                                  *      top-level placeholder page, if any.
8352                                  */
8353                                 if (top_page != VM_PAGE_NULL) {
8354                                         vm_object_t local_object;
8355
8356                                         local_object = VM_PAGE_OBJECT(top_page);
8357
8358                                         /*
8359                                          * comparing 2 packed pointers
8360                                          */
8361                                         if (top_page->vmp_object != dst_page->vmp_object) {
8362                                                 vm_object_lock(local_object);
8363                                                 VM_PAGE_FREE(top_page);
8364                                                 vm_object_paging_end(local_object);
8365                                                 vm_object_unlock(local_object);
8366                                         } else {
8367                                                 VM_PAGE_FREE(top_page);
8368                                                 vm_object_paging_end(local_object);
8369                                         }
8370                                 }
8371                                 vm_object_paging_end(object);
8372                                 break;
8373
8374                         case VM_FAULT_RETRY:
8375                                 vm_object_lock(object);
8376                                 break;
8377
8378                         case VM_FAULT_MEMORY_SHORTAGE:
8379                                 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8380
8381                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8382
8383                                 if (vm_page_wait(interruptible)) {
8384                                         OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8385
8386                                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8387                                         vm_object_lock(object);
8388
8389                                         break;
8390                                 }
8391                                 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8392
8393                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8394
8395                                 /* fall thru */
8396
8397                         case VM_FAULT_INTERRUPTED:
8398                                 error_code = MACH_SEND_INTERRUPTED;
8399                         case VM_FAULT_MEMORY_ERROR:
8400                         memory_error:
8401                                 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8402
8403                                 vm_object_lock(object);
8404                                 goto return_err;
8405
8406                         case VM_FAULT_SUCCESS_NO_VM_PAGE:
8407                                 /* success but no page: fail */
8408                                 vm_object_paging_end(object);
8409                                 vm_object_unlock(object);
8410                                 goto memory_error;
8411
8412                         default:
8413                                 panic("vm_object_iopl_request: unexpected error"
8414                                       " 0x%x from vm_fault_page()\n", result);
8415                         }
8416                    } while (result != VM_FAULT_SUCCESS);
8417
8418                 }
8419                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8420
8421                 if (upl->flags & UPL_KERNEL_OBJECT)
8422                         goto record_phys_addr;
8423
8424                 if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8425                         dst_page->vmp_busy = TRUE;
8426                         goto record_phys_addr;
8427                 }
8428
8429                 if (dst_page->vmp_cleaning) {
8430                         /*
8431                          * Someone else is cleaning this page in place.
8432                          * In theory, we should be able to  proceed and use this
8433                          * page but they'll probably end up clearing the "busy"
8434                          * bit on it in upl_commit_range() but they didn't set
8435                          * it, so they would clear our "busy" bit and open
8436                          * us to race conditions.
8437                          * We'd better wait for the cleaning to complete and
8438                          * then try again.
8439                          */
8440                         VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8441                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8442                         continue;
8443                 }
8444                 if (dst_page->vmp_laundry)
8445                         vm_pageout_steal_laundry(dst_page, FALSE);
8446
8447                 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8448                      phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8449                         vm_page_t       low_page;
8450                         int             refmod;
8451
8452                         /*
8453                          * support devices that can't DMA above 32 bits
8454                          * by substituting pages from a pool of low address
8455                          * memory for any pages we find above the 4G mark
8456                          * can't substitute if the page is already wired because
8457                          * we don't know whether that physical address has been
8458                          * handed out to some other 64 bit capable DMA device to use
8459                          */
8460                         if (VM_PAGE_WIRED(dst_page)) {
8461                                 ret = KERN_PROTECTION_FAILURE;
8462                                 goto return_err;
8463                         }
8464                         low_page = vm_page_grablo();
8465
8466                         if (low_page == VM_PAGE_NULL) {
8467                                 ret = KERN_RESOURCE_SHORTAGE;
8468                                 goto return_err;
8469                         }
8470                         /*
8471                          * from here until the vm_page_replace completes
8472                          * we musn't drop the object lock... we don't
8473                          * want anyone refaulting this page in and using
8474                          * it after we disconnect it... we want the fault
8475                          * to find the new page being substituted.
8476                          */
8477                         if (dst_page->vmp_pmapped)
8478                                 refmod = pmap_disconnect(phys_page);
8479                         else
8480                                 refmod = 0;
8481
8482                         if (!dst_page->vmp_absent)
8483                                 vm_page_copy(dst_page, low_page);
8484
8485                         low_page->vmp_reference = dst_page->vmp_reference;
8486                         low_page->vmp_dirty     = dst_page->vmp_dirty;
8487                         low_page->vmp_absent    = dst_page->vmp_absent;
8488
8489                         if (refmod & VM_MEM_REFERENCED)
8490                                 low_page->vmp_reference = TRUE;
8491                         if (refmod & VM_MEM_MODIFIED) {
8492                                 SET_PAGE_DIRTY(low_page, FALSE);
8493                         }
8494
8495                         vm_page_replace(low_page, object, dst_offset);
8496
8497                         dst_page = low_page;
8498                         /*
8499                          * vm_page_grablo returned the page marked
8500                          * BUSY... we don't need a PAGE_WAKEUP_DONE
8501                          * here, because we've never dropped the object lock
8502                          */
8503                         if ( !dst_page->vmp_absent)
8504                                 dst_page->vmp_busy = FALSE;
8505
8506                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8507                 }
8508                 if ( !dst_page->vmp_busy)
8509                         dwp->dw_mask |= DW_vm_page_wire;
8510
8511                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8512                         /*
8513                          * Mark the page "busy" to block any future page fault
8514                          * on this page in addition to wiring it.
8515                          * We'll also remove the mapping
8516                          * of all these pages before leaving this routine.
8517                          */
8518                         assert(!dst_page->vmp_fictitious);
8519                         dst_page->vmp_busy = TRUE;
8520                 }
8521                 /*
8522                  * expect the page to be used
8523                  * page queues lock must be held to set 'reference'
8524                  */
8525                 dwp->dw_mask |= DW_set_reference;
8526
8527                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8528                         SET_PAGE_DIRTY(dst_page, TRUE);
8529                 }
8530                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8531                         pmap_sync_page_attributes_phys(phys_page);
8532                         dst_page->vmp_written_by_kernel = FALSE;
8533                 }
8534
8535 record_phys_addr:
8536                 if (dst_page->vmp_busy)
8537                         upl->flags |= UPL_HAS_BUSY;
8538
8539                 lite_list[entry>>5] |= 1 << (entry & 31);
8540
8541                 if (phys_page > upl->highest_page)
8542                         upl->highest_page = phys_page;
8543
8544                 if (user_page_list) {
8545                         user_page_list[entry].phys_addr = phys_page;
8546                         user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
8547                         user_page_list[entry].absent    = dst_page->vmp_absent;
8548                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
8549                         user_page_list[entry].precious  = dst_page->vmp_precious;
8550                         user_page_list[entry].device    = FALSE;
8551                         user_page_list[entry].needed    = FALSE;
8552                         if (dst_page->vmp_clustered == TRUE)
8553                                 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8554                         else
8555                                 user_page_list[entry].speculative = FALSE;
8556                         user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8557                         user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8558                         user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8559                         user_page_list[entry].mark      = FALSE;
8560                 }
8561                 if (object != kernel_object && object != compressor_object) {
8562                         /*
8563                          * someone is explicitly grabbing this page...
8564                          * update clustered and speculative state
8565                          *
8566                          */
8567                         if (dst_page->vmp_clustered)
8568                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8569                 }
8570 skip_page:
8571                 entry++;
8572                 dst_offset += PAGE_SIZE_64;
8573                 xfer_size -= PAGE_SIZE;
8574
8575                 if (dwp->dw_mask) {
8576                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8577
8578                         if (dw_count >= dw_limit) {
8579                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8580
8581                                 dwp = &dw_array[0];
8582                                 dw_count = 0;
8583                         }
8584                 }
8585         }
8586         assert(entry == size_in_pages);
8587
8588         if (dw_count)
8589                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8590 finish:
8591         if (user_page_list && set_cache_attr_needed == TRUE)
8592                 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8593
8594         if (page_list_count != NULL) {
8595                 if (upl->flags & UPL_INTERNAL)
8596                         *page_list_count = 0;
8597                 else if (*page_list_count > size_in_pages)
8598                         *page_list_count = size_in_pages;
8599         }
8600         vm_object_unlock(object);
8601
8602         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8603                 /*
8604                  * We've marked all the pages "busy" so that future
8605                  * page faults will block.
8606                  * Now remove the mapping for these pages, so that they
8607                  * can't be accessed without causing a page fault.
8608                  */
8609                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8610                                        PMAP_NULL, 0, VM_PROT_NONE);
8611                 assert(!object->blocked_access);
8612                 object->blocked_access = TRUE;
8613         }
8614
8615         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8616         return KERN_SUCCESS;
8617
8618 return_err:
8619         dw_index = 0;
8620
8621         for (; offset < dst_offset; offset += PAGE_SIZE) {
8622                 boolean_t need_unwire;
8623
8624                 dst_page = vm_page_lookup(object, offset);
8625
8626                 if (dst_page == VM_PAGE_NULL)
8627                         panic("vm_object_iopl_request: Wired page missing. \n");
8628
8629                 /*
8630                  * if we've already processed this page in an earlier
8631                  * dw_do_work, we need to undo the wiring... we will
8632                  * leave the dirty and reference bits on if they
8633                  * were set, since we don't have a good way of knowing
8634                  * what the previous state was and we won't get here
8635                  * under any normal circumstances...  we will always
8636                  * clear BUSY and wakeup any waiters via vm_page_free
8637                  * or PAGE_WAKEUP_DONE
8638                  */
8639                 need_unwire = TRUE;
8640
8641                 if (dw_count) {
8642                         if (dw_array[dw_index].dw_m == dst_page) {
8643                                 /*
8644                                  * still in the deferred work list
8645                                  * which means we haven't yet called
8646                                  * vm_page_wire on this page
8647                                  */
8648                                 need_unwire = FALSE;
8649
8650                                 dw_index++;
8651                                 dw_count--;
8652                         }
8653                 }
8654                 vm_page_lock_queues();
8655
8656                 if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8657                         vm_page_free(dst_page);
8658
8659                         need_unwire = FALSE;
8660                 } else {
8661                         if (need_unwire == TRUE)
8662                                 vm_page_unwire(dst_page, TRUE);
8663
8664                         PAGE_WAKEUP_DONE(dst_page);
8665                 }
8666                 vm_page_unlock_queues();
8667
8668                 if (need_unwire == TRUE)
8669                         VM_STAT_INCR(reactivations);
8670         }
8671 #if UPL_DEBUG
8672         upl->upl_state = 2;
8673 #endif
8674         if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8675                 vm_object_activity_end(object);
8676                 vm_object_collapse(object, 0, TRUE);
8677         }
8678         vm_object_unlock(object);
8679         upl_destroy(upl);
8680
8681         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8682         return ret;
8683 }
8684
8685 kern_return_t
8686 upl_transpose(
8687         upl_t           upl1,
8688         upl_t           upl2)
8689 {
8690         kern_return_t           retval;
8691         boolean_t               upls_locked;
8692         vm_object_t             object1, object2;
8693
8694         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2  || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR)  || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8695                 return KERN_INVALID_ARGUMENT;
8696         }
8697
8698         upls_locked = FALSE;
8699
8700         /*
8701          * Since we need to lock both UPLs at the same time,
8702          * avoid deadlocks by always taking locks in the same order.
8703          */
8704         if (upl1 < upl2) {
8705                 upl_lock(upl1);
8706                 upl_lock(upl2);
8707         } else {
8708                 upl_lock(upl2);
8709                 upl_lock(upl1);
8710         }
8711         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8712
8713         object1 = upl1->map_object;
8714         object2 = upl2->map_object;
8715
8716         if (upl1->offset != 0 || upl2->offset != 0 ||
8717             upl1->size != upl2->size) {
8718                 /*
8719                  * We deal only with full objects, not subsets.
8720                  * That's because we exchange the entire backing store info
8721                  * for the objects: pager, resident pages, etc...  We can't do
8722                  * only part of it.
8723                  */
8724                 retval = KERN_INVALID_VALUE;
8725                 goto done;
8726         }
8727
8728         /*
8729          * Tranpose the VM objects' backing store.
8730          */
8731         retval = vm_object_transpose(object1, object2,
8732                                      (vm_object_size_t) upl1->size);
8733
8734         if (retval == KERN_SUCCESS) {
8735                 /*
8736                  * Make each UPL point to the correct VM object, i.e. the
8737                  * object holding the pages that the UPL refers to...
8738                  */
8739 #if CONFIG_IOSCHED || UPL_DEBUG
8740                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8741                         vm_object_lock(object1);
8742                         vm_object_lock(object2);
8743                 }
8744                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8745                         queue_remove(&object1->uplq, upl1, upl_t, uplq);
8746                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8747                         queue_remove(&object2->uplq, upl2, upl_t, uplq);
8748 #endif
8749                 upl1->map_object = object2;
8750                 upl2->map_object = object1;
8751
8752 #if CONFIG_IOSCHED || UPL_DEBUG
8753                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8754                         queue_enter(&object2->uplq, upl1, upl_t, uplq);
8755                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8756                         queue_enter(&object1->uplq, upl2, upl_t, uplq);
8757                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8758                         vm_object_unlock(object2);
8759                         vm_object_unlock(object1);
8760                 }
8761 #endif
8762         }
8763
8764 done:
8765         /*
8766          * Cleanup.
8767          */
8768         if (upls_locked) {
8769                 upl_unlock(upl1);
8770                 upl_unlock(upl2);
8771                 upls_locked = FALSE;
8772         }
8773
8774         return retval;
8775 }
8776
8777 void
8778 upl_range_needed(
8779         upl_t           upl,
8780         int             index,
8781         int             count)
8782 {
8783         upl_page_info_t *user_page_list;
8784         int             size_in_pages;
8785
8786         if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8787                 return;
8788
8789         size_in_pages = upl->size / PAGE_SIZE;
8790
8791         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8792
8793         while (count-- && index < size_in_pages)
8794                 user_page_list[index++].needed = TRUE;
8795 }
8796
8797
8798 /*
8799  * Reserve of virtual addresses in the kernel address space.
8800  * We need to map the physical pages in the kernel, so that we
8801  * can call the code-signing or slide routines with a kernel
8802  * virtual address.  We keep this pool of pre-allocated kernel
8803  * virtual addresses so that we don't have to scan the kernel's
8804  * virtaul address space each time we need to work with
8805  * a physical page.
8806  */
8807 decl_simple_lock_data(,vm_paging_lock)
8808 #define VM_PAGING_NUM_PAGES     64
8809 vm_map_offset_t vm_paging_base_address = 0;
8810 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8811 int             vm_paging_max_index = 0;
8812 int             vm_paging_page_waiter = 0;
8813 int             vm_paging_page_waiter_total = 0;
8814
8815 unsigned long   vm_paging_no_kernel_page = 0;
8816 unsigned long   vm_paging_objects_mapped = 0;
8817 unsigned long   vm_paging_pages_mapped = 0;
8818 unsigned long   vm_paging_objects_mapped_slow = 0;
8819 unsigned long   vm_paging_pages_mapped_slow = 0;
8820
8821 void
8822 vm_paging_map_init(void)
8823 {
8824         kern_return_t   kr;
8825         vm_map_offset_t page_map_offset;
8826         vm_map_entry_t  map_entry;
8827
8828         assert(vm_paging_base_address == 0);
8829
8830         /*
8831          * Initialize our pool of pre-allocated kernel
8832          * virtual addresses.
8833          */
8834         page_map_offset = 0;
8835         kr = vm_map_find_space(kernel_map,
8836                                &page_map_offset,
8837                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
8838                                0,
8839                                0,
8840                                VM_MAP_KERNEL_FLAGS_NONE,
8841                                VM_KERN_MEMORY_NONE,
8842                                &map_entry);
8843         if (kr != KERN_SUCCESS) {
8844                 panic("vm_paging_map_init: kernel_map full\n");
8845         }
8846         VME_OBJECT_SET(map_entry, kernel_object);
8847         VME_OFFSET_SET(map_entry, page_map_offset);
8848         map_entry->protection = VM_PROT_NONE;
8849         map_entry->max_protection = VM_PROT_NONE;
8850         map_entry->permanent = TRUE;
8851         vm_object_reference(kernel_object);
8852         vm_map_unlock(kernel_map);
8853
8854         assert(vm_paging_base_address == 0);
8855         vm_paging_base_address = page_map_offset;
8856 }
8857
8858 /*
8859  * vm_paging_map_object:
8860  *      Maps part of a VM object's pages in the kernel
8861  *      virtual address space, using the pre-allocated
8862  *      kernel virtual addresses, if possible.
8863  * Context:
8864  *      The VM object is locked.  This lock will get
8865  *      dropped and re-acquired though, so the caller
8866  *      must make sure the VM object is kept alive
8867  *      (by holding a VM map that has a reference
8868  *      on it, for example, or taking an extra reference).
8869  *      The page should also be kept busy to prevent
8870  *      it from being reclaimed.
8871  */
8872 kern_return_t
8873 vm_paging_map_object(
8874         vm_page_t               page,
8875         vm_object_t             object,
8876         vm_object_offset_t      offset,
8877         vm_prot_t               protection,
8878         boolean_t               can_unlock_object,
8879         vm_map_size_t           *size,          /* IN/OUT */
8880         vm_map_offset_t         *address,       /* OUT */
8881         boolean_t               *need_unmap)    /* OUT */
8882 {
8883         kern_return_t           kr;
8884         vm_map_offset_t         page_map_offset;
8885         vm_map_size_t           map_size;
8886         vm_object_offset_t      object_offset;
8887         int                     i;
8888
8889         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8890                 /* use permanent 1-to-1 kernel mapping of physical memory ? */
8891 #if __x86_64__
8892                 *address = (vm_map_offset_t)
8893                         PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
8894                                      PAGE_SHIFT);
8895                 *need_unmap = FALSE;
8896                 return KERN_SUCCESS;
8897 #elif __arm__ || __arm64__
8898                 *address = (vm_map_offset_t)
8899                         phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
8900                 *need_unmap = FALSE;
8901                 return KERN_SUCCESS;
8902 #else
8903 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8904 #endif
8905
8906                 assert(page->vmp_busy);
8907                 /*
8908                  * Use one of the pre-allocated kernel virtual addresses
8909                  * and just enter the VM page in the kernel address space
8910                  * at that virtual address.
8911                  */
8912                 simple_lock(&vm_paging_lock);
8913
8914                 /*
8915                  * Try and find an available kernel virtual address
8916                  * from our pre-allocated pool.
8917                  */
8918                 page_map_offset = 0;
8919                 for (;;) {
8920                         for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
8921                                 if (vm_paging_page_inuse[i] == FALSE) {
8922                                         page_map_offset =
8923                                                 vm_paging_base_address +
8924                                                 (i * PAGE_SIZE);
8925                                         break;
8926                                 }
8927                         }
8928                         if (page_map_offset != 0) {
8929                                 /* found a space to map our page ! */
8930                                 break;
8931                         }
8932
8933                         if (can_unlock_object) {
8934                                 /*
8935                                  * If we can afford to unlock the VM object,
8936                                  * let's take the slow path now...
8937                                  */
8938                                 break;
8939                         }
8940                         /*
8941                          * We can't afford to unlock the VM object, so
8942                          * let's wait for a space to become available...
8943                          */
8944                         vm_paging_page_waiter_total++;
8945                         vm_paging_page_waiter++;
8946                         kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8947                         if (kr == THREAD_WAITING) {
8948                                 simple_unlock(&vm_paging_lock);
8949                                 kr = thread_block(THREAD_CONTINUE_NULL);
8950                                 simple_lock(&vm_paging_lock);
8951                         }
8952                         vm_paging_page_waiter--;
8953                         /* ... and try again */
8954                 }
8955
8956                 if (page_map_offset != 0) {
8957                         /*
8958                          * We found a kernel virtual address;
8959                          * map the physical page to that virtual address.
8960                          */
8961                         if (i > vm_paging_max_index) {
8962                                 vm_paging_max_index = i;
8963                         }
8964                         vm_paging_page_inuse[i] = TRUE;
8965                         simple_unlock(&vm_paging_lock);
8966
8967                         page->vmp_pmapped = TRUE;
8968
8969                         /*
8970                          * Keep the VM object locked over the PMAP_ENTER
8971                          * and the actual use of the page by the kernel,
8972                          * or this pmap mapping might get undone by a
8973                          * vm_object_pmap_protect() call...
8974                          */
8975                         PMAP_ENTER(kernel_pmap,
8976                                    page_map_offset,
8977                                    page,
8978                                    protection,
8979                                    VM_PROT_NONE,
8980                                    0,
8981                                    TRUE,
8982                                    kr);
8983                         assert(kr == KERN_SUCCESS);
8984                         vm_paging_objects_mapped++;
8985                         vm_paging_pages_mapped++;
8986                         *address = page_map_offset;
8987                         *need_unmap = TRUE;
8988
8989 #if KASAN
8990                         kasan_notify_address(page_map_offset, PAGE_SIZE);
8991 #endif
8992
8993                         /* all done and mapped, ready to use ! */
8994                         return KERN_SUCCESS;
8995                 }
8996
8997                 /*
8998                  * We ran out of pre-allocated kernel virtual
8999                  * addresses.  Just map the page in the kernel
9000                  * the slow and regular way.
9001                  */
9002                 vm_paging_no_kernel_page++;
9003                 simple_unlock(&vm_paging_lock);
9004         }
9005
9006         if (! can_unlock_object) {
9007                 *address = 0;
9008                 *size = 0;
9009                 *need_unmap = FALSE;
9010                 return KERN_NOT_SUPPORTED;
9011         }
9012
9013         object_offset = vm_object_trunc_page(offset);
9014         map_size = vm_map_round_page(*size,
9015                                      VM_MAP_PAGE_MASK(kernel_map));
9016
9017         /*
9018          * Try and map the required range of the object
9019          * in the kernel_map
9020          */
9021
9022         vm_object_reference_locked(object);     /* for the map entry */
9023         vm_object_unlock(object);
9024
9025         kr = vm_map_enter(kernel_map,
9026                           address,
9027                           map_size,
9028                           0,
9029                           VM_FLAGS_ANYWHERE,
9030                           VM_MAP_KERNEL_FLAGS_NONE,
9031                           VM_KERN_MEMORY_NONE,
9032                           object,
9033                           object_offset,
9034                           FALSE,
9035                           protection,
9036                           VM_PROT_ALL,
9037                           VM_INHERIT_NONE);
9038         if (kr != KERN_SUCCESS) {
9039                 *address = 0;
9040                 *size = 0;
9041                 *need_unmap = FALSE;
9042                 vm_object_deallocate(object);   /* for the map entry */
9043                 vm_object_lock(object);
9044                 return kr;
9045         }
9046
9047         *size = map_size;
9048
9049         /*
9050          * Enter the mapped pages in the page table now.
9051          */
9052         vm_object_lock(object);
9053         /*
9054          * VM object must be kept locked from before PMAP_ENTER()
9055          * until after the kernel is done accessing the page(s).
9056          * Otherwise, the pmap mappings in the kernel could be
9057          * undone by a call to vm_object_pmap_protect().
9058          */
9059
9060         for (page_map_offset = 0;
9061              map_size != 0;
9062              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9063
9064                 page = vm_page_lookup(object, offset + page_map_offset);
9065                 if (page == VM_PAGE_NULL) {
9066                         printf("vm_paging_map_object: no page !?");
9067                         vm_object_unlock(object);
9068                         kr = vm_map_remove(kernel_map, *address, *size,
9069                                            VM_MAP_REMOVE_NO_FLAGS);
9070                         assert(kr == KERN_SUCCESS);
9071                         *address = 0;
9072                         *size = 0;
9073                         *need_unmap = FALSE;
9074                         vm_object_lock(object);
9075                         return KERN_MEMORY_ERROR;
9076                 }
9077                 page->vmp_pmapped = TRUE;
9078
9079                 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9080                 PMAP_ENTER(kernel_pmap,
9081                            *address + page_map_offset,
9082                            page,
9083                            protection,
9084                            VM_PROT_NONE,
9085                            0,
9086                            TRUE,
9087                            kr);
9088                 assert(kr == KERN_SUCCESS);
9089 #if KASAN
9090                 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9091 #endif
9092         }
9093
9094         vm_paging_objects_mapped_slow++;
9095         vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9096
9097         *need_unmap = TRUE;
9098
9099         return KERN_SUCCESS;
9100 }
9101
9102 /*
9103  * vm_paging_unmap_object:
9104  *      Unmaps part of a VM object's pages from the kernel
9105  *      virtual address space.
9106  * Context:
9107  *      The VM object is locked.  This lock will get
9108  *      dropped and re-acquired though.
9109  */
9110 void
9111 vm_paging_unmap_object(
9112         vm_object_t     object,
9113         vm_map_offset_t start,
9114         vm_map_offset_t end)
9115 {
9116         kern_return_t   kr;
9117         int             i;
9118
9119         if ((vm_paging_base_address == 0) ||
9120             (start < vm_paging_base_address) ||
9121             (end > (vm_paging_base_address
9122                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9123                 /*
9124                  * We didn't use our pre-allocated pool of
9125                  * kernel virtual address.  Deallocate the
9126                  * virtual memory.
9127                  */
9128                 if (object != VM_OBJECT_NULL) {
9129                         vm_object_unlock(object);
9130                 }
9131                 kr = vm_map_remove(kernel_map, start, end,
9132                                    VM_MAP_REMOVE_NO_FLAGS);
9133                 if (object != VM_OBJECT_NULL) {
9134                         vm_object_lock(object);
9135                 }
9136                 assert(kr == KERN_SUCCESS);
9137         } else {
9138                 /*
9139                  * We used a kernel virtual address from our
9140                  * pre-allocated pool.  Put it back in the pool
9141                  * for next time.
9142                  */
9143                 assert(end - start == PAGE_SIZE);
9144                 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9145                 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9146
9147                 /* undo the pmap mapping */
9148                 pmap_remove(kernel_pmap, start, end);
9149
9150                 simple_lock(&vm_paging_lock);
9151                 vm_paging_page_inuse[i] = FALSE;
9152                 if (vm_paging_page_waiter) {
9153                         thread_wakeup(&vm_paging_page_waiter);
9154                 }
9155                 simple_unlock(&vm_paging_lock);
9156         }
9157 }
9158
9159
9160 /*
9161  * page->vmp_object must be locked
9162  */
9163 void
9164 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9165 {
9166         if (!queues_locked) {
9167                 vm_page_lockspin_queues();
9168         }
9169
9170         page->vmp_free_when_done = FALSE;
9171         /*
9172          * need to drop the laundry count...
9173          * we may also need to remove it
9174          * from the I/O paging queue...
9175          * vm_pageout_throttle_up handles both cases
9176          *
9177          * the laundry and pageout_queue flags are cleared...
9178          */
9179         vm_pageout_throttle_up(page);
9180
9181         if (!queues_locked) {
9182                 vm_page_unlock_queues();
9183         }
9184 }
9185
9186 upl_t
9187 vector_upl_create(vm_offset_t upl_offset)
9188 {
9189         int     vector_upl_size  = sizeof(struct _vector_upl);
9190         int i=0;
9191         upl_t   upl;
9192         vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9193
9194         upl = upl_create(0,UPL_VECTOR,0);
9195         upl->vector_upl = vector_upl;
9196         upl->offset = upl_offset;
9197         vector_upl->size = 0;
9198         vector_upl->offset = upl_offset;
9199         vector_upl->invalid_upls=0;
9200         vector_upl->num_upls=0;
9201         vector_upl->pagelist = NULL;
9202
9203         for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9204                 vector_upl->upl_iostates[i].size = 0;
9205                 vector_upl->upl_iostates[i].offset = 0;
9206
9207         }
9208         return upl;
9209 }
9210
9211 void
9212 vector_upl_deallocate(upl_t upl)
9213 {
9214         if(upl) {
9215                 vector_upl_t vector_upl = upl->vector_upl;
9216                 if(vector_upl) {
9217                         if(vector_upl->invalid_upls != vector_upl->num_upls)
9218                                 panic("Deallocating non-empty Vectored UPL\n");
9219                         kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9220                         vector_upl->invalid_upls=0;
9221                         vector_upl->num_upls = 0;
9222                         vector_upl->pagelist = NULL;
9223                         vector_upl->size = 0;
9224                         vector_upl->offset = 0;
9225                         kfree(vector_upl, sizeof(struct _vector_upl));
9226                         vector_upl = (vector_upl_t)0xfeedfeed;
9227                 }
9228                 else
9229                         panic("vector_upl_deallocate was passed a non-vectored upl\n");
9230         }
9231         else
9232                 panic("vector_upl_deallocate was passed a NULL upl\n");
9233 }
9234
9235 boolean_t
9236 vector_upl_is_valid(upl_t upl)
9237 {
9238         if(upl &&  ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9239                 vector_upl_t vector_upl = upl->vector_upl;
9240                 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9241                         return FALSE;
9242                 else
9243                         return TRUE;
9244         }
9245         return FALSE;
9246 }
9247
9248 boolean_t
9249 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9250 {
9251         if(vector_upl_is_valid(upl)) {
9252                 vector_upl_t vector_upl = upl->vector_upl;
9253
9254                 if(vector_upl) {
9255                         if(subupl) {
9256                                 if(io_size) {
9257                                         if(io_size < PAGE_SIZE)
9258                                                 io_size = PAGE_SIZE;
9259                                         subupl->vector_upl = (void*)vector_upl;
9260                                         vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9261                                         vector_upl->size += io_size;
9262                                         upl->size += io_size;
9263                                 }
9264                                 else {
9265                                         uint32_t i=0,invalid_upls=0;
9266                                         for(i = 0; i < vector_upl->num_upls; i++) {
9267                                                 if(vector_upl->upl_elems[i] == subupl)
9268                                                         break;
9269                                         }
9270                                         if(i == vector_upl->num_upls)
9271                                                 panic("Trying to remove sub-upl when none exists");
9272
9273                                         vector_upl->upl_elems[i] = NULL;
9274                                         invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9275                                         if(invalid_upls == vector_upl->num_upls)
9276                                                 return TRUE;
9277                                         else
9278                                                 return FALSE;
9279                                 }
9280                         }
9281                         else
9282                                 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9283                 }
9284                 else
9285                         panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9286         }
9287         else
9288                 panic("vector_upl_set_subupl was passed a NULL upl\n");
9289
9290         return FALSE;
9291 }
9292
9293 void
9294 vector_upl_set_pagelist(upl_t upl)
9295 {
9296         if(vector_upl_is_valid(upl)) {
9297                 uint32_t i=0;
9298                 vector_upl_t vector_upl = upl->vector_upl;
9299
9300                 if(vector_upl) {
9301                         vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9302
9303                         vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9304
9305                         for(i=0; i < vector_upl->num_upls; i++) {
9306                                 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9307                                 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9308                                 pagelist_size += cur_upl_pagelist_size;
9309                                 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9310                                         upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9311                         }
9312                         assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9313                 }
9314                 else
9315                         panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9316         }
9317         else
9318                 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9319
9320 }
9321
9322 upl_t
9323 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9324 {
9325         if(vector_upl_is_valid(upl)) {
9326                 vector_upl_t vector_upl = upl->vector_upl;
9327                 if(vector_upl) {
9328                         if(index < vector_upl->num_upls)
9329                                 return vector_upl->upl_elems[index];
9330                 }
9331                 else
9332                         panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9333         }
9334         return NULL;
9335 }
9336
9337 upl_t
9338 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9339 {
9340         if(vector_upl_is_valid(upl)) {
9341                 uint32_t i=0;
9342                 vector_upl_t vector_upl = upl->vector_upl;
9343
9344                 if(vector_upl) {
9345                         upl_t subupl = NULL;
9346                         vector_upl_iostates_t subupl_state;
9347
9348                         for(i=0; i < vector_upl->num_upls; i++) {
9349                                 subupl = vector_upl->upl_elems[i];
9350                                 subupl_state = vector_upl->upl_iostates[i];
9351                                 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9352                                         /* We could have been passed an offset/size pair that belongs
9353                                          * to an UPL element that has already been committed/aborted.
9354                                          * If so, return NULL.
9355                                          */
9356                                         if(subupl == NULL)
9357                                                 return NULL;
9358                                         if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9359                                                 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9360                                                 if(*upl_size > subupl_state.size)
9361                                                         *upl_size = subupl_state.size;
9362                                         }
9363                                         if(*upl_offset >= subupl_state.offset)
9364                                                 *upl_offset -= subupl_state.offset;
9365                                         else if(i)
9366                                                 panic("Vector UPL offset miscalculation\n");
9367                                         return subupl;
9368                                 }
9369                         }
9370                 }
9371                 else
9372                         panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9373         }
9374         return NULL;
9375 }
9376
9377 void
9378 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9379 {
9380         *v_upl_submap = NULL;
9381
9382         if(vector_upl_is_valid(upl)) {
9383                 vector_upl_t vector_upl = upl->vector_upl;
9384                 if(vector_upl) {
9385                         *v_upl_submap = vector_upl->submap;
9386                         *submap_dst_addr = vector_upl->submap_dst_addr;
9387                 }
9388                 else
9389                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9390         }
9391         else
9392                 panic("vector_upl_get_submap was passed a null UPL\n");
9393 }
9394
9395 void
9396 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9397 {
9398         if(vector_upl_is_valid(upl)) {
9399                 vector_upl_t vector_upl = upl->vector_upl;
9400                 if(vector_upl) {
9401                         vector_upl->submap = submap;
9402                         vector_upl->submap_dst_addr = submap_dst_addr;
9403                 }
9404                 else
9405                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9406         }
9407         else
9408                 panic("vector_upl_get_submap was passed a NULL UPL\n");
9409 }
9410
9411 void
9412 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9413 {
9414         if(vector_upl_is_valid(upl)) {
9415                 uint32_t i = 0;
9416                 vector_upl_t vector_upl = upl->vector_upl;
9417
9418                 if(vector_upl) {
9419                         for(i = 0; i < vector_upl->num_upls; i++) {
9420                                 if(vector_upl->upl_elems[i] == subupl)
9421                                         break;
9422                         }
9423
9424                         if(i == vector_upl->num_upls)
9425                                 panic("setting sub-upl iostate when none exists");
9426
9427                         vector_upl->upl_iostates[i].offset = offset;
9428                         if(size < PAGE_SIZE)
9429                                 size = PAGE_SIZE;
9430                         vector_upl->upl_iostates[i].size = size;
9431                 }
9432                 else
9433                         panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9434         }
9435         else
9436                 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9437 }
9438
9439 void
9440 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9441 {
9442         if(vector_upl_is_valid(upl)) {
9443                 uint32_t i = 0;
9444                 vector_upl_t vector_upl = upl->vector_upl;
9445
9446                 if(vector_upl) {
9447                         for(i = 0; i < vector_upl->num_upls; i++) {
9448                                 if(vector_upl->upl_elems[i] == subupl)
9449                                         break;
9450                         }
9451
9452                         if(i == vector_upl->num_upls)
9453                                 panic("getting sub-upl iostate when none exists");
9454
9455                         *offset = vector_upl->upl_iostates[i].offset;
9456                         *size = vector_upl->upl_iostates[i].size;
9457                 }
9458                 else
9459                         panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9460         }
9461         else
9462                 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9463 }
9464
9465 void
9466 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9467 {
9468         if(vector_upl_is_valid(upl)) {
9469                 vector_upl_t vector_upl = upl->vector_upl;
9470                 if(vector_upl) {
9471                         if(index < vector_upl->num_upls) {
9472                                 *offset = vector_upl->upl_iostates[index].offset;
9473                                 *size = vector_upl->upl_iostates[index].size;
9474                         }
9475                         else
9476                                 *offset = *size = 0;
9477                 }
9478                 else
9479                         panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9480         }
9481         else
9482                 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9483 }
9484
9485 upl_page_info_t *
9486 upl_get_internal_vectorupl_pagelist(upl_t upl)
9487 {
9488         return ((vector_upl_t)(upl->vector_upl))->pagelist;
9489 }
9490
9491 void *
9492 upl_get_internal_vectorupl(upl_t upl)
9493 {
9494         return upl->vector_upl;
9495 }
9496
9497 vm_size_t
9498 upl_get_internal_pagelist_offset(void)
9499 {
9500         return sizeof(struct upl);
9501 }
9502
9503 void
9504 upl_clear_dirty(
9505         upl_t           upl,
9506         boolean_t       value)
9507 {
9508         if (value) {
9509                 upl->flags |= UPL_CLEAR_DIRTY;
9510         } else {
9511                 upl->flags &= ~UPL_CLEAR_DIRTY;
9512         }
9513 }
9514
9515 void
9516 upl_set_referenced(
9517         upl_t           upl,
9518         boolean_t       value)
9519 {
9520         upl_lock(upl);
9521         if (value) {
9522                 upl->ext_ref_count++;
9523         } else {
9524                 if (!upl->ext_ref_count) {
9525                         panic("upl_set_referenced not %p\n", upl);
9526                 }
9527                 upl->ext_ref_count--;
9528         }
9529         upl_unlock(upl);
9530 }
9531
9532 #if CONFIG_IOSCHED
9533 void
9534 upl_set_blkno(
9535         upl_t           upl,
9536         vm_offset_t     upl_offset,
9537         int             io_size,
9538         int64_t         blkno)
9539 {
9540                 int i,j;
9541                 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9542                         return;
9543
9544                 assert(upl->upl_reprio_info != 0);
9545                 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9546                         UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9547                 }
9548 }
9549 #endif
9550
9551 void inline memoryshot(unsigned int event, unsigned int control)
9552 {
9553         if (vm_debug_events) {
9554                 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9555                                         vm_page_active_count, vm_page_inactive_count,
9556                                         vm_page_free_count, vm_page_speculative_count,
9557                                         vm_page_throttled_count);
9558         } else {
9559                 (void) event;
9560                 (void) control;
9561         }
9562
9563 }
9564
9565 #ifdef MACH_BSD
9566
9567 boolean_t  upl_device_page(upl_page_info_t *upl)
9568 {
9569         return(UPL_DEVICE_PAGE(upl));
9570 }
9571 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
9572 {
9573         return(UPL_PAGE_PRESENT(upl, index));
9574 }
9575 boolean_t  upl_speculative_page(upl_page_info_t *upl, int index)
9576 {
9577         return(UPL_SPECULATIVE_PAGE(upl, index));
9578 }
9579 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
9580 {
9581         return(UPL_DIRTY_PAGE(upl, index));
9582 }
9583 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
9584 {
9585         return(UPL_VALID_PAGE(upl, index));
9586 }
9587 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
9588 {
9589         return(UPL_PHYS_PAGE(upl, index));
9590 }
9591
9592 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9593 {
9594         upl[index].mark = v;
9595 }
9596
9597 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9598 {
9599         return upl[index].mark;
9600 }
9601
9602 void
9603 vm_countdirtypages(void)
9604 {
9605         vm_page_t m;
9606         int dpages;
9607         int pgopages;
9608         int precpages;
9609
9610
9611         dpages=0;
9612         pgopages=0;
9613         precpages=0;
9614
9615         vm_page_lock_queues();
9616         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9617         do {
9618                 if (m ==(vm_page_t )0) break;
9619
9620                 if(m->vmp_dirty) dpages++;
9621                 if(m->vmp_free_when_done) pgopages++;
9622                 if(m->vmp_precious) precpages++;
9623
9624                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9625                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9626                 if (m ==(vm_page_t )0) break;
9627
9628         } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9629         vm_page_unlock_queues();
9630
9631         vm_page_lock_queues();
9632         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9633         do {
9634                 if (m ==(vm_page_t )0) break;
9635
9636                 dpages++;
9637                 assert(m->vmp_dirty);
9638                 assert(!m->vmp_free_when_done);
9639                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9640                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9641                 if (m ==(vm_page_t )0) break;
9642
9643         } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9644         vm_page_unlock_queues();
9645
9646         vm_page_lock_queues();
9647         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9648         do {
9649                 if (m ==(vm_page_t )0) break;
9650
9651                 if(m->vmp_dirty) dpages++;
9652                 if(m->vmp_free_when_done) pgopages++;
9653                 if(m->vmp_precious) precpages++;
9654
9655                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9656                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9657                 if (m ==(vm_page_t )0) break;
9658
9659         } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9660         vm_page_unlock_queues();
9661
9662         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9663
9664         dpages=0;
9665         pgopages=0;
9666         precpages=0;
9667
9668         vm_page_lock_queues();
9669         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9670
9671         do {
9672                 if(m == (vm_page_t )0) break;
9673                 if(m->vmp_dirty) dpages++;
9674                 if(m->vmp_free_when_done) pgopages++;
9675                 if(m->vmp_precious) precpages++;
9676
9677                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9678                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9679                 if(m == (vm_page_t )0) break;
9680
9681         } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9682         vm_page_unlock_queues();
9683
9684         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9685
9686 }
9687 #endif /* MACH_BSD */
9688
9689
9690 #if CONFIG_IOSCHED
9691 int upl_get_cached_tier(upl_t  upl)
9692 {
9693        assert(upl);
9694        if (upl->flags & UPL_TRACKED_BY_OBJECT)
9695                return (upl->upl_priority);
9696        return (-1);
9697 }
9698 #endif /* CONFIG_IOSCHED */
9699
9700
9701 void upl_callout_iodone(upl_t upl)
9702 {
9703         struct upl_io_completion *upl_ctx = upl->upl_iodone;
9704
9705         if (upl_ctx) {
9706                 void    (*iodone_func)(void *, int) = upl_ctx->io_done;
9707
9708                 assert(upl_ctx->io_done);
9709
9710                 (*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9711         }
9712 }
9713
9714 void upl_set_iodone(upl_t upl, void *upl_iodone)
9715 {
9716         upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9717 }
9718
9719 void upl_set_iodone_error(upl_t upl, int error)
9720 {
9721         struct upl_io_completion *upl_ctx = upl->upl_iodone;
9722
9723         if (upl_ctx)
9724                 upl_ctx->io_error = error;
9725 }
9726
9727
9728 ppnum_t upl_get_highest_page(
9729                              upl_t                      upl)
9730 {
9731         return upl->highest_page;
9732 }
9733
9734 upl_size_t upl_get_size(
9735                              upl_t                      upl)
9736 {
9737         return upl->size;
9738 }
9739
9740 upl_t upl_associated_upl(upl_t upl)
9741 {
9742         return upl->associated_upl;
9743 }
9744
9745 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9746 {
9747         upl->associated_upl = associated_upl;
9748 }
9749
9750 struct vnode * upl_lookup_vnode(upl_t upl)
9751 {
9752         if (!upl->map_object->internal)
9753                 return vnode_pager_lookup_vnode(upl->map_object->pager);
9754         else
9755                 return NULL;
9756 }
9757
9758 #if UPL_DEBUG
9759 kern_return_t  upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9760 {
9761         upl->ubc_alias1 = alias1;
9762         upl->ubc_alias2 = alias2;
9763         return KERN_SUCCESS;
9764 }
9765 int  upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9766 {
9767         if(al)
9768                 *al = upl->ubc_alias1;
9769         if(al2)
9770                 *al2 = upl->ubc_alias2;
9771         return KERN_SUCCESS;
9772 }
9773 #endif /* UPL_DEBUG */
9774
9775 #if VM_PRESSURE_EVENTS
9776 /*
9777  * Upward trajectory.
9778  */
9779 extern boolean_t vm_compressor_low_on_space(void);
9780
9781 boolean_t
9782 VM_PRESSURE_NORMAL_TO_WARNING(void)     {
9783
9784         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9785
9786                 /* Available pages below our threshold */
9787                 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9788                         /* No frozen processes to kill */
9789                         if (memorystatus_frozen_count == 0) {
9790                                 /* Not enough suspended processes available. */
9791                                 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9792                                         return TRUE;
9793                                 }
9794                         }
9795                 }
9796                 return FALSE;
9797
9798         } else {
9799                 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
9800         }
9801 }
9802
9803 boolean_t
9804 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9805
9806         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9807
9808                 /* Available pages below our threshold */
9809                 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9810                         return TRUE;
9811                 }
9812                 return FALSE;
9813         } else {
9814                 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9815         }
9816 }
9817
9818 /*
9819  * Downward trajectory.
9820  */
9821 boolean_t
9822 VM_PRESSURE_WARNING_TO_NORMAL(void) {
9823
9824         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9825
9826                 /* Available pages above our threshold */
9827                 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
9828                 if (memorystatus_available_pages > target_threshold) {
9829                         return TRUE;
9830                 }
9831                 return FALSE;
9832         } else {
9833                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
9834         }
9835 }
9836
9837 boolean_t
9838 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9839
9840         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9841
9842                 /* Available pages above our threshold */
9843                 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
9844                 if (memorystatus_available_pages > target_threshold) {
9845                         return TRUE;
9846                 }
9847                 return FALSE;
9848         } else {
9849                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9850         }
9851 }
9852 #endif /* VM_PRESSURE_EVENTS */
9853
9854
9855
9856 #define VM_TEST_COLLAPSE_COMPRESSOR             0
9857 #define VM_TEST_WIRE_AND_EXTRACT                0
9858 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC        0
9859 #if __arm64__
9860 #define VM_TEST_KERNEL_OBJECT_FAULT             0
9861 #endif /* __arm64__ */
9862 #define VM_TEST_DEVICE_PAGER_TRANSPOSE          (DEVELOPMENT || DEBUG)
9863
9864 #if VM_TEST_COLLAPSE_COMPRESSOR
9865 extern boolean_t vm_object_collapse_compressor_allowed;
9866 #include <IOKit/IOLib.h>
9867 static void
9868 vm_test_collapse_compressor(void)
9869 {
9870         vm_object_size_t        backing_size, top_size;
9871         vm_object_t             backing_object, top_object;
9872         vm_map_offset_t         backing_offset, top_offset;
9873         unsigned char           *backing_address, *top_address;
9874         kern_return_t           kr;
9875
9876         printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
9877
9878         /* create backing object */
9879         backing_size = 15 * PAGE_SIZE;
9880         backing_object = vm_object_allocate(backing_size);
9881         assert(backing_object != VM_OBJECT_NULL);
9882         printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
9883                 backing_object);
9884         /* map backing object */
9885         backing_offset = 0;
9886         kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
9887                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9888                           backing_object, 0, FALSE,
9889                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9890         assert(kr == KERN_SUCCESS);
9891         backing_address = (unsigned char *) backing_offset;
9892         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9893                "mapped backing object %p at 0x%llx\n",
9894                backing_object, (uint64_t) backing_offset);
9895         /* populate with pages to be compressed in backing object */
9896         backing_address[0x1*PAGE_SIZE] = 0xB1;
9897         backing_address[0x4*PAGE_SIZE] = 0xB4;
9898         backing_address[0x7*PAGE_SIZE] = 0xB7;
9899         backing_address[0xa*PAGE_SIZE] = 0xBA;
9900         backing_address[0xd*PAGE_SIZE] = 0xBD;
9901         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9902                "populated pages to be compressed in "
9903                "backing_object %p\n", backing_object);
9904         /* compress backing object */
9905         vm_object_pageout(backing_object);
9906         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
9907                backing_object);
9908         /* wait for all the pages to be gone */
9909         while (*(volatile int *)&backing_object->resident_page_count != 0)
9910                 IODelay(10);
9911         printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
9912                backing_object);
9913         /* populate with pages to be resident in backing object */
9914         backing_address[0x0*PAGE_SIZE] = 0xB0;
9915         backing_address[0x3*PAGE_SIZE] = 0xB3;
9916         backing_address[0x6*PAGE_SIZE] = 0xB6;
9917         backing_address[0x9*PAGE_SIZE] = 0xB9;
9918         backing_address[0xc*PAGE_SIZE] = 0xBC;
9919         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9920                "populated pages to be resident in "
9921                "backing_object %p\n", backing_object);
9922         /* leave the other pages absent */
9923         /* mess with the paging_offset of the backing_object */
9924         assert(backing_object->paging_offset == 0);
9925         backing_object->paging_offset = 0x3000;
9926
9927         /* create top object */
9928         top_size = 9 * PAGE_SIZE;
9929         top_object = vm_object_allocate(top_size);
9930         assert(top_object != VM_OBJECT_NULL);
9931         printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
9932                 top_object);
9933         /* map top object */
9934         top_offset = 0;
9935         kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
9936                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9937                           top_object, 0, FALSE,
9938                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9939         assert(kr == KERN_SUCCESS);
9940         top_address = (unsigned char *) top_offset;
9941         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9942                "mapped top object %p at 0x%llx\n",
9943                top_object, (uint64_t) top_offset);
9944         /* populate with pages to be compressed in top object */
9945         top_address[0x3*PAGE_SIZE] = 0xA3;
9946         top_address[0x4*PAGE_SIZE] = 0xA4;
9947         top_address[0x5*PAGE_SIZE] = 0xA5;
9948         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9949                "populated pages to be compressed in "
9950                "top_object %p\n", top_object);
9951         /* compress top object */
9952         vm_object_pageout(top_object);
9953         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
9954                top_object);
9955         /* wait for all the pages to be gone */
9956         while (top_object->resident_page_count != 0)
9957                 IODelay(10);
9958         printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
9959                top_object);
9960         /* populate with pages to be resident in top object */
9961         top_address[0x0*PAGE_SIZE] = 0xA0;
9962         top_address[0x1*PAGE_SIZE] = 0xA1;
9963         top_address[0x2*PAGE_SIZE] = 0xA2;
9964         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9965                "populated pages to be resident in "
9966                "top_object %p\n", top_object);
9967         /* leave the other pages absent */
9968
9969         /* link the 2 objects */
9970         vm_object_reference(backing_object);
9971         top_object->shadow = backing_object;
9972         top_object->vo_shadow_offset = 0x3000;
9973         printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
9974                top_object, backing_object);
9975
9976         /* unmap backing object */
9977         vm_map_remove(kernel_map,
9978                       backing_offset,
9979                       backing_offset + backing_size,
9980                       VM_MAP_REMOVE_NO_FLAGS);
9981         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9982                "unmapped backing_object %p [0x%llx:0x%llx]\n",
9983                backing_object,
9984                (uint64_t) backing_offset,
9985                (uint64_t) (backing_offset + backing_size));
9986
9987         /* collapse */
9988         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
9989         vm_object_lock(top_object);
9990         vm_object_collapse(top_object, 0, FALSE);
9991         vm_object_unlock(top_object);
9992         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
9993
9994         /* did it work? */
9995         if (top_object->shadow != VM_OBJECT_NULL) {
9996                 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
9997                 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
9998                 if (vm_object_collapse_compressor_allowed) {
9999                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10000                 }
10001         } else {
10002                 /* check the contents of the mapping */
10003                 unsigned char expect[9] =
10004                         { 0xA0, 0xA1, 0xA2,     /* resident in top */
10005                           0xA3, 0xA4, 0xA5,     /* compressed in top */
10006                           0xB9, /* resident in backing + shadow_offset */
10007                           0xBD, /* compressed in backing + shadow_offset + paging_offset */
10008                           0x00 };               /* absent in both */
10009                 unsigned char actual[9];
10010                 unsigned int i, errors;
10011
10012                 errors = 0;
10013                 for (i = 0; i < sizeof (actual); i++) {
10014                         actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10015                         if (actual[i] != expect[i]) {
10016                                 errors++;
10017                         }
10018                 }
10019                 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10020                        "actual [%x %x %x %x %x %x %x %x %x] "
10021                        "expect [%x %x %x %x %x %x %x %x %x] "
10022                        "%d errors\n",
10023                        actual[0], actual[1], actual[2], actual[3],
10024                        actual[4], actual[5], actual[6], actual[7],
10025                        actual[8],
10026                        expect[0], expect[1], expect[2], expect[3],
10027                        expect[4], expect[5], expect[6], expect[7],
10028                        expect[8],
10029                        errors);
10030                 if (errors) {
10031                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10032                 } else {
10033                         printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10034                 }
10035         }
10036 }
10037 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10038 #define vm_test_collapse_compressor()
10039 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10040
10041 #if VM_TEST_WIRE_AND_EXTRACT
10042 extern ledger_template_t        task_ledger_template;
10043 #include <mach/mach_vm.h>
10044 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10045                                     vm_offset_t offset);
10046 static void
10047 vm_test_wire_and_extract(void)
10048 {
10049         ledger_t                ledger;
10050         vm_map_t                user_map, wire_map;
10051         mach_vm_address_t       user_addr, wire_addr;
10052         mach_vm_size_t          user_size, wire_size;
10053         mach_vm_offset_t        cur_offset;
10054         vm_prot_t               cur_prot, max_prot;
10055         ppnum_t                 user_ppnum, wire_ppnum;
10056         kern_return_t           kr;
10057
10058         ledger = ledger_instantiate(task_ledger_template,
10059                                     LEDGER_CREATE_ACTIVE_ENTRIES);
10060         user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10061                                  0x100000000ULL,
10062                                  0x200000000ULL,
10063                                  TRUE);
10064         wire_map = vm_map_create(NULL,
10065                                  0x100000000ULL,
10066                                  0x200000000ULL,
10067                                  TRUE);
10068         user_addr = 0;
10069         user_size = 0x10000;
10070         kr = mach_vm_allocate(user_map,
10071                               &user_addr,
10072                               user_size,
10073                               VM_FLAGS_ANYWHERE);
10074         assert(kr == KERN_SUCCESS);
10075         wire_addr = 0;
10076         wire_size = user_size;
10077         kr = mach_vm_remap(wire_map,
10078                            &wire_addr,
10079                            wire_size,
10080                            0,
10081                            VM_FLAGS_ANYWHERE,
10082                            user_map,
10083                            user_addr,
10084                            FALSE,
10085                            &cur_prot,
10086                            &max_prot,
10087                            VM_INHERIT_NONE);
10088         assert(kr == KERN_SUCCESS);
10089         for (cur_offset = 0;
10090              cur_offset < wire_size;
10091              cur_offset += PAGE_SIZE) {
10092                 kr = vm_map_wire_and_extract(wire_map,
10093                                              wire_addr + cur_offset,
10094                                              VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10095                                              TRUE,
10096                                              &wire_ppnum);
10097                 assert(kr == KERN_SUCCESS);
10098                 user_ppnum = vm_map_get_phys_page(user_map,
10099                                                   user_addr + cur_offset);
10100                 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10101                        "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10102                        kr,
10103                        user_map, user_addr + cur_offset, user_ppnum,
10104                        wire_map, wire_addr + cur_offset, wire_ppnum);
10105                 if (kr != KERN_SUCCESS ||
10106                     wire_ppnum == 0 ||
10107                     wire_ppnum != user_ppnum) {
10108                         panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10109                 }
10110         }
10111         cur_offset -= PAGE_SIZE;
10112         kr = vm_map_wire_and_extract(wire_map,
10113                                      wire_addr + cur_offset,
10114                                      VM_PROT_DEFAULT,
10115                                      TRUE,
10116                                      &wire_ppnum);
10117         assert(kr == KERN_SUCCESS);
10118         printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10119                "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10120                kr,
10121                user_map, user_addr + cur_offset, user_ppnum,
10122                wire_map, wire_addr + cur_offset, wire_ppnum);
10123         if (kr != KERN_SUCCESS ||
10124             wire_ppnum == 0 ||
10125             wire_ppnum != user_ppnum) {
10126                 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10127         }
10128
10129         printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10130 }
10131 #else /* VM_TEST_WIRE_AND_EXTRACT */
10132 #define vm_test_wire_and_extract()
10133 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10134
10135 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10136 static void
10137 vm_test_page_wire_overflow_panic(void)
10138 {
10139         vm_object_t object;
10140         vm_page_t page;
10141
10142         printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10143
10144         object = vm_object_allocate(PAGE_SIZE);
10145         vm_object_lock(object);
10146         page = vm_page_alloc(object, 0x0);
10147         vm_page_lock_queues();
10148         do {
10149                 vm_page_wire(page, 1, FALSE);
10150         } while (page->wire_count != 0);
10151         vm_page_unlock_queues();
10152         vm_object_unlock(object);
10153         panic("FBDP(%p,%p): wire_count overflow not detected\n",
10154               object, page);
10155 }
10156 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10157 #define vm_test_page_wire_overflow_panic()
10158 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10159
10160 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10161 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10162 static void
10163 vm_test_kernel_object_fault(void)
10164 {
10165         kern_return_t kr;
10166         vm_offset_t stack;
10167         uintptr_t frameb[2];
10168         int ret;
10169
10170         kr = kernel_memory_allocate(kernel_map, &stack,
10171                                     kernel_stack_size + (2*PAGE_SIZE),
10172                                     0,
10173                                     (KMA_KSTACK | KMA_KOBJECT |
10174                                      KMA_GUARD_FIRST | KMA_GUARD_LAST),
10175                                     VM_KERN_MEMORY_STACK);
10176         if (kr != KERN_SUCCESS) {
10177                 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10178         }
10179         ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10180         if (ret != 0) {
10181                 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10182         } else {
10183                 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10184         }
10185         vm_map_remove(kernel_map,
10186                       stack,
10187                       stack + kernel_stack_size + (2*PAGE_SIZE),
10188                       VM_MAP_REMOVE_KUNWIRE);
10189         stack = 0;
10190 }
10191 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10192 #define vm_test_kernel_object_fault()
10193 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10194
10195 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10196 static void
10197 vm_test_device_pager_transpose(void)
10198 {
10199         memory_object_t device_pager;
10200         vm_object_t     anon_object, device_object;
10201         vm_size_t       size;
10202         vm_map_offset_t anon_mapping, device_mapping;
10203         kern_return_t   kr;
10204
10205         size = 3 * PAGE_SIZE;
10206         anon_object = vm_object_allocate(size);
10207         assert(anon_object != VM_OBJECT_NULL);
10208         device_pager = device_pager_setup(NULL, 0, size, 0);
10209         assert(device_pager != NULL);
10210         device_object = memory_object_to_vm_object(device_pager);
10211         assert(device_object != VM_OBJECT_NULL);
10212         anon_mapping = 0;
10213         kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10214                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10215                           anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10216                           VM_INHERIT_DEFAULT);
10217         assert(kr == KERN_SUCCESS);
10218         device_mapping = 0;
10219         kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10220                                      VM_FLAGS_ANYWHERE,
10221                                      VM_MAP_KERNEL_FLAGS_NONE,
10222                                      VM_KERN_MEMORY_NONE,
10223                                      (void *)device_pager, 0, FALSE,
10224                                      VM_PROT_DEFAULT, VM_PROT_ALL,
10225                                      VM_INHERIT_DEFAULT);
10226         assert(kr == KERN_SUCCESS);
10227         memory_object_deallocate(device_pager);
10228
10229         vm_object_lock(anon_object);
10230         vm_object_activity_begin(anon_object);
10231         anon_object->blocked_access = TRUE;
10232         vm_object_unlock(anon_object);
10233         vm_object_lock(device_object);
10234         vm_object_activity_begin(device_object);
10235         device_object->blocked_access = TRUE;
10236         vm_object_unlock(device_object);
10237
10238         assert(anon_object->ref_count == 1);
10239         assert(!anon_object->named);
10240         assert(device_object->ref_count == 2);
10241         assert(device_object->named);
10242
10243         kr = vm_object_transpose(device_object, anon_object, size);
10244         assert(kr == KERN_SUCCESS);
10245
10246         vm_object_lock(anon_object);
10247         vm_object_activity_end(anon_object);
10248         anon_object->blocked_access = FALSE;
10249         vm_object_unlock(anon_object);
10250         vm_object_lock(device_object);
10251         vm_object_activity_end(device_object);
10252         device_object->blocked_access = FALSE;
10253         vm_object_unlock(device_object);
10254
10255         assert(anon_object->ref_count == 2);
10256         assert(anon_object->named);
10257         kr = vm_deallocate(kernel_map, anon_mapping, size);
10258         assert(kr == KERN_SUCCESS);
10259         assert(device_object->ref_count == 1);
10260         assert(!device_object->named);
10261         kr = vm_deallocate(kernel_map, device_mapping, size);
10262         assert(kr == KERN_SUCCESS);
10263
10264         printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10265 }
10266 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10267 #define vm_test_device_pager_transpose()
10268 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10269
10270 void
10271 vm_tests(void)
10272 {
10273         vm_test_collapse_compressor();
10274         vm_test_wire_and_extract();
10275         vm_test_page_wire_overflow_panic();
10276         vm_test_kernel_object_fault();
10277         vm_test_device_pager_transpose();
10278 }