osfmk/vm/vm_pageout.c

   1 /*
   2  * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/vm_pageout.c
  60  *      Author: Avadis Tevanian, Jr., Michael Wayne Young
  61  *      Date:   1985
  62  *
  63  *      The proverbial page-out daemon.
  64  */
  65
  66 #include <stdint.h>
  67
  68 #include <debug.h>
  69 #include <mach_pagemap.h>
  70 #include <mach_cluster_stats.h>
  71
  72 #include <mach/mach_types.h>
  73 #include <mach/memory_object.h>
  74 #include <mach/memory_object_default.h>
  75 #include <mach/memory_object_control_server.h>
  76 #include <mach/mach_host_server.h>
  77 #include <mach/upl.h>
  78 #include <mach/vm_map.h>
  79 #include <mach/vm_param.h>
  80 #include <mach/vm_statistics.h>
  81 #include <mach/sdt.h>
  82
  83 #include <kern/kern_types.h>
  84 #include <kern/counters.h>
  85 #include <kern/host_statistics.h>
  86 #include <kern/machine.h>
  87 #include <kern/misc_protos.h>
  88 #include <kern/sched.h>
  89 #include <kern/thread.h>
  90 #include <kern/xpr.h>
  91 #include <kern/kalloc.h>
  92 #include <kern/policy_internal.h>
  93 #include <kern/thread_group.h>
  94
  95 #include <machine/vm_tuning.h>
  96 #include <machine/commpage.h>
  97
  98 #include <vm/pmap.h>
  99 #include <vm/vm_compressor_pager.h>
 100 #include <vm/vm_fault.h>
 101 #include <vm/vm_map.h>
 102 #include <vm/vm_object.h>
 103 #include <vm/vm_page.h>
 104 #include <vm/vm_pageout.h>
 105 #include <vm/vm_protos.h> /* must be last */
 106 #include <vm/memory_object.h>
 107 #include <vm/vm_purgeable_internal.h>
 108 #include <vm/vm_shared_region.h>
 109 #include <vm/vm_compressor.h>
 110
 111 #include <san/kasan.h>
 112
 113 #if CONFIG_PHANTOM_CACHE
 114 #include <vm/vm_phantom_cache.h>
 115 #endif
 116
 117 #if UPL_DEBUG
 118 #include <libkern/OSDebug.h>
 119 #endif
 120
 121 extern int cs_debug;
 122
 123 extern void mbuf_drain(boolean_t);
 124
 125 #if VM_PRESSURE_EVENTS
 126 #if CONFIG_JETSAM
 127 extern unsigned int memorystatus_available_pages;
 128 extern unsigned int memorystatus_available_pages_pressure;
 129 extern unsigned int memorystatus_available_pages_critical;
 130 #else /* CONFIG_JETSAM */
 131 extern uint64_t memorystatus_available_pages;
 132 extern uint64_t memorystatus_available_pages_pressure;
 133 extern uint64_t memorystatus_available_pages_critical;
 134 #endif /* CONFIG_JETSAM */
 135
 136 extern unsigned int memorystatus_frozen_count;
 137 extern unsigned int memorystatus_suspended_count;
 138 extern vm_pressure_level_t memorystatus_vm_pressure_level;
 139
 140 void vm_pressure_response(void);
 141 extern void consider_vm_pressure_events(void);
 142
 143 #define MEMORYSTATUS_SUSPENDED_THRESHOLD  4
 144 #endif /* VM_PRESSURE_EVENTS */
 145
 146
 147 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 148 #ifdef  CONFIG_EMBEDDED
 149 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 1024
 150 #else
 151 #define VM_PAGEOUT_BURST_INACTIVE_THROTTLE 4096
 152 #endif
 153 #endif
 154
 155 #ifndef VM_PAGEOUT_DEADLOCK_RELIEF
 156 #define VM_PAGEOUT_DEADLOCK_RELIEF 100  /* number of pages to move to break deadlock */
 157 #endif
 158
 159 #ifndef VM_PAGE_LAUNDRY_MAX
 160 #define VM_PAGE_LAUNDRY_MAX     128UL   /* maximum pageouts on a given pageout queue */
 161 #endif  /* VM_PAGEOUT_LAUNDRY_MAX */
 162
 163 #ifndef VM_PAGEOUT_BURST_WAIT
 164 #define VM_PAGEOUT_BURST_WAIT   1       /* milliseconds */
 165 #endif  /* VM_PAGEOUT_BURST_WAIT */
 166
 167 #ifndef VM_PAGEOUT_EMPTY_WAIT
 168 #define VM_PAGEOUT_EMPTY_WAIT   50      /* milliseconds */
 169 #endif  /* VM_PAGEOUT_EMPTY_WAIT */
 170
 171 #ifndef VM_PAGEOUT_DEADLOCK_WAIT
 172 #define VM_PAGEOUT_DEADLOCK_WAIT 100    /* milliseconds */
 173 #endif  /* VM_PAGEOUT_DEADLOCK_WAIT */
 174
 175 #ifndef VM_PAGEOUT_IDLE_WAIT
 176 #define VM_PAGEOUT_IDLE_WAIT    10      /* milliseconds */
 177 #endif  /* VM_PAGEOUT_IDLE_WAIT */
 178
 179 #ifndef VM_PAGEOUT_SWAP_WAIT
 180 #define VM_PAGEOUT_SWAP_WAIT    10      /* milliseconds */
 181 #endif  /* VM_PAGEOUT_SWAP_WAIT */
 182
 183
 184 #ifndef VM_PAGE_SPECULATIVE_TARGET
 185 #define VM_PAGE_SPECULATIVE_TARGET(total) ((total) * 1 / (100 / vm_pageout_state.vm_page_speculative_percentage))
 186 #endif /* VM_PAGE_SPECULATIVE_TARGET */
 187
 188
 189 /*
 190  *      To obtain a reasonable LRU approximation, the inactive queue
 191  *      needs to be large enough to give pages on it a chance to be
 192  *      referenced a second time.  This macro defines the fraction
 193  *      of active+inactive pages that should be inactive.
 194  *      The pageout daemon uses it to update vm_page_inactive_target.
 195  *
 196  *      If vm_page_free_count falls below vm_page_free_target and
 197  *      vm_page_inactive_count is below vm_page_inactive_target,
 198  *      then the pageout daemon starts running.
 199  */
 200
 201 #ifndef VM_PAGE_INACTIVE_TARGET
 202 #define VM_PAGE_INACTIVE_TARGET(avail)  ((avail) * 1 / 2)
 203 #endif  /* VM_PAGE_INACTIVE_TARGET */
 204
 205 /*
 206  *      Once the pageout daemon starts running, it keeps going
 207  *      until vm_page_free_count meets or exceeds vm_page_free_target.
 208  */
 209
 210 #ifndef VM_PAGE_FREE_TARGET
 211 #ifdef  CONFIG_EMBEDDED
 212 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 100)
 213 #else
 214 #define VM_PAGE_FREE_TARGET(free)       (15 + (free) / 80)
 215 #endif
 216 #endif  /* VM_PAGE_FREE_TARGET */
 217
 218
 219 /*
 220  *      The pageout daemon always starts running once vm_page_free_count
 221  *      falls below vm_page_free_min.
 222  */
 223
 224 #ifndef VM_PAGE_FREE_MIN
 225 #ifdef  CONFIG_EMBEDDED
 226 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 200)
 227 #else
 228 #define VM_PAGE_FREE_MIN(free)          (10 + (free) / 100)
 229 #endif
 230 #endif  /* VM_PAGE_FREE_MIN */
 231
 232 #ifdef  CONFIG_EMBEDDED
 233 #define VM_PAGE_FREE_RESERVED_LIMIT     100
 234 #define VM_PAGE_FREE_MIN_LIMIT          1500
 235 #define VM_PAGE_FREE_TARGET_LIMIT       2000
 236 #else
 237 #define VM_PAGE_FREE_RESERVED_LIMIT     1700
 238 #define VM_PAGE_FREE_MIN_LIMIT          3500
 239 #define VM_PAGE_FREE_TARGET_LIMIT       4000
 240 #endif
 241
 242 /*
 243  *      When vm_page_free_count falls below vm_page_free_reserved,
 244  *      only vm-privileged threads can allocate pages.  vm-privilege
 245  *      allows the pageout daemon and default pager (and any other
 246  *      associated threads needed for default pageout) to continue
 247  *      operation by dipping into the reserved pool of pages.
 248  */
 249
 250 #ifndef VM_PAGE_FREE_RESERVED
 251 #define VM_PAGE_FREE_RESERVED(n)        \
 252         ((unsigned) (6 * VM_PAGE_LAUNDRY_MAX) + (n))
 253 #endif  /* VM_PAGE_FREE_RESERVED */
 254
 255 /*
 256  *      When we dequeue pages from the inactive list, they are
 257  *      reactivated (ie, put back on the active queue) if referenced.
 258  *      However, it is possible to starve the free list if other
 259  *      processors are referencing pages faster than we can turn off
 260  *      the referenced bit.  So we limit the number of reactivations
 261  *      we will make per call of vm_pageout_scan().
 262  */
 263 #define VM_PAGE_REACTIVATE_LIMIT_MAX 20000
 264
 265 #ifndef VM_PAGE_REACTIVATE_LIMIT
 266 #ifdef  CONFIG_EMBEDDED
 267 #define VM_PAGE_REACTIVATE_LIMIT(avail) (VM_PAGE_INACTIVE_TARGET(avail) / 2)
 268 #else
 269 #define VM_PAGE_REACTIVATE_LIMIT(avail) (MAX((avail) * 1 / 20,VM_PAGE_REACTIVATE_LIMIT_MAX))
 270 #endif
 271 #endif  /* VM_PAGE_REACTIVATE_LIMIT */
 272 #define VM_PAGEOUT_INACTIVE_FORCE_RECLAIM       1000
 273
 274 extern boolean_t hibernate_cleaning_in_progress;
 275
 276 /*
 277  * Forward declarations for internal routines.
 278  */
 279 struct cq {
 280         struct vm_pageout_queue *q;
 281         void                    *current_chead;
 282         char                    *scratch_buf;
 283         int                     id;
 284 };
 285
 286 struct cq ciq[MAX_COMPRESSOR_THREAD_COUNT];
 287
 288
 289 #if VM_PRESSURE_EVENTS
 290 void vm_pressure_thread(void);
 291
 292 boolean_t VM_PRESSURE_NORMAL_TO_WARNING(void);
 293 boolean_t VM_PRESSURE_WARNING_TO_CRITICAL(void);
 294
 295 boolean_t VM_PRESSURE_WARNING_TO_NORMAL(void);
 296 boolean_t VM_PRESSURE_CRITICAL_TO_WARNING(void);
 297 #endif
 298
 299 void vm_pageout_garbage_collect(int);
 300 static void vm_pageout_iothread_external(void);
 301 static void vm_pageout_iothread_internal(struct cq *cq);
 302 static void vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *, boolean_t);
 303
 304 extern void vm_pageout_continue(void);
 305 extern void vm_pageout_scan(void);
 306
 307 void vm_tests(void); /* forward */
 308
 309 #if !CONFIG_EMBEDDED
 310 static boolean_t vm_pageout_waiter  = FALSE;
 311 static boolean_t vm_pageout_running = FALSE;
 312 #endif /* !CONFIG_EMBEDDED */
 313
 314
 315 #if DEVELOPMENT || DEBUG
 316 struct vm_pageout_debug vm_pageout_debug;
 317 #endif
 318 struct vm_pageout_vminfo vm_pageout_vminfo;
 319 struct vm_pageout_state  vm_pageout_state;
 320 struct vm_config         vm_config;
 321
 322 struct  vm_pageout_queue vm_pageout_queue_internal __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 323 struct  vm_pageout_queue vm_pageout_queue_external __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT)));
 324
 325 int         vm_upl_wait_for_pages = 0;
 326 vm_object_t vm_pageout_scan_wants_object = VM_OBJECT_NULL;
 327
 328 boolean_t (* volatile consider_buffer_cache_collect)(int) = NULL;
 329
 330 int     vm_debug_events = 0;
 331
 332 #if CONFIG_MEMORYSTATUS
 333 extern boolean_t memorystatus_kill_on_VM_page_shortage(boolean_t async);
 334
 335 uint32_t vm_pageout_memorystatus_fb_factor_nr = 5;
 336 uint32_t vm_pageout_memorystatus_fb_factor_dr = 2;
 337
 338 #endif
 339
 340
 341
 342 /*
 343  *      Routine:        vm_pageout_object_terminate
 344  *      Purpose:
 345  *              Destroy the pageout_object, and perform all of the
 346  *              required cleanup actions.
 347  *
 348  *      In/Out conditions:
 349  *              The object must be locked, and will be returned locked.
 350  */
 351 void
 352 vm_pageout_object_terminate(
 353         vm_object_t     object)
 354 {
 355         vm_object_t     shadow_object;
 356
 357         /*
 358          * Deal with the deallocation (last reference) of a pageout object
 359          * (used for cleaning-in-place) by dropping the paging references/
 360          * freeing pages in the original object.
 361          */
 362
 363         assert(object->pageout);
 364         shadow_object = object->shadow;
 365         vm_object_lock(shadow_object);
 366
 367         while (!vm_page_queue_empty(&object->memq)) {
 368                 vm_page_t               p, m;
 369                 vm_object_offset_t      offset;
 370
 371                 p = (vm_page_t) vm_page_queue_first(&object->memq);
 372
 373                 assert(p->vmp_private);
 374                 assert(p->vmp_free_when_done);
 375                 p->vmp_free_when_done = FALSE;
 376                 assert(!p->vmp_cleaning);
 377                 assert(!p->vmp_laundry);
 378
 379                 offset = p->vmp_offset;
 380                 VM_PAGE_FREE(p);
 381                 p = VM_PAGE_NULL;
 382
 383                 m = vm_page_lookup(shadow_object,
 384                         offset + object->vo_shadow_offset);
 385
 386                 if(m == VM_PAGE_NULL)
 387                         continue;
 388
 389                 assert((m->vmp_dirty) || (m->vmp_precious) ||
 390                                 (m->vmp_busy && m->vmp_cleaning));
 391
 392                 /*
 393                  * Handle the trusted pager throttle.
 394                  * Also decrement the burst throttle (if external).
 395                  */
 396                 vm_page_lock_queues();
 397                 if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q)
 398                         vm_pageout_throttle_up(m);
 399
 400                 /*
 401                  * Handle the "target" page(s). These pages are to be freed if
 402                  * successfully cleaned. Target pages are always busy, and are
 403                  * wired exactly once. The initial target pages are not mapped,
 404                  * (so cannot be referenced or modified) but converted target
 405                  * pages may have been modified between the selection as an
 406                  * adjacent page and conversion to a target.
 407                  */
 408                 if (m->vmp_free_when_done) {
 409                         assert(m->vmp_busy);
 410                         assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
 411                         assert(m->vmp_wire_count == 1);
 412                         m->vmp_cleaning = FALSE;
 413                         m->vmp_free_when_done = FALSE;
 414                         /*
 415                          * Revoke all access to the page. Since the object is
 416                          * locked, and the page is busy, this prevents the page
 417                          * from being dirtied after the pmap_disconnect() call
 418                          * returns.
 419                          *
 420                          * Since the page is left "dirty" but "not modifed", we
 421                          * can detect whether the page was redirtied during
 422                          * pageout by checking the modify state.
 423                          */
 424                         if (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED) {
 425                                 SET_PAGE_DIRTY(m, FALSE);
 426                         } else {
 427                                 m->vmp_dirty = FALSE;
 428                         }
 429
 430                         if (m->vmp_dirty) {
 431                                 vm_page_unwire(m, TRUE);        /* reactivates */
 432                                 VM_STAT_INCR(reactivations);
 433                                 PAGE_WAKEUP_DONE(m);
 434                         } else {
 435                                 vm_page_free(m);  /* clears busy, etc. */
 436                         }
 437                         vm_page_unlock_queues();
 438                         continue;
 439                 }
 440                 /*
 441                  * Handle the "adjacent" pages. These pages were cleaned in
 442                  * place, and should be left alone.
 443                  * If prep_pin_count is nonzero, then someone is using the
 444                  * page, so make it active.
 445                  */
 446                 if ((m->vmp_q_state == VM_PAGE_NOT_ON_Q) && !m->vmp_private) {
 447                         if (m->vmp_reference)
 448                                 vm_page_activate(m);
 449                         else
 450                                 vm_page_deactivate(m);
 451                 }
 452                 if (m->vmp_overwriting) {
 453                         /*
 454                          * the (COPY_OUT_FROM == FALSE) request_page_list case
 455                          */
 456                         if (m->vmp_busy) {
 457                                 /*
 458                                  * We do not re-set m->vmp_dirty !
 459                                  * The page was busy so no extraneous activity
 460                                  * could have occurred. COPY_INTO is a read into the
 461                                  * new pages. CLEAN_IN_PLACE does actually write
 462                                  * out the pages but handling outside of this code
 463                                  * will take care of resetting dirty. We clear the
 464                                  * modify however for the Programmed I/O case.
 465                                  */
 466                                 pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 467
 468                                 m->vmp_busy = FALSE;
 469                                 m->vmp_absent = FALSE;
 470                         } else {
 471                                 /*
 472                                  * alternate (COPY_OUT_FROM == FALSE) request_page_list case
 473                                  * Occurs when the original page was wired
 474                                  * at the time of the list request
 475                                  */
 476                                  assert(VM_PAGE_WIRED(m));
 477                                  vm_page_unwire(m, TRUE);       /* reactivates */
 478                         }
 479                         m->vmp_overwriting = FALSE;
 480                 } else {
 481                         m->vmp_dirty = FALSE;
 482                 }
 483                 m->vmp_cleaning = FALSE;
 484
 485                 /*
 486                  * Wakeup any thread waiting for the page to be un-cleaning.
 487                  */
 488                 PAGE_WAKEUP(m);
 489                 vm_page_unlock_queues();
 490         }
 491         /*
 492          * Account for the paging reference taken in vm_paging_object_allocate.
 493          */
 494         vm_object_activity_end(shadow_object);
 495         vm_object_unlock(shadow_object);
 496
 497         assert(object->ref_count == 0);
 498         assert(object->paging_in_progress == 0);
 499         assert(object->activity_in_progress == 0);
 500         assert(object->resident_page_count == 0);
 501         return;
 502 }
 503
 504 /*
 505  * Routine:     vm_pageclean_setup
 506  *
 507  * Purpose:     setup a page to be cleaned (made non-dirty), but not
 508  *              necessarily flushed from the VM page cache.
 509  *              This is accomplished by cleaning in place.
 510  *
 511  *              The page must not be busy, and new_object
 512  *              must be locked.
 513  *
 514  */
 515 static void
 516 vm_pageclean_setup(
 517         vm_page_t               m,
 518         vm_page_t               new_m,
 519         vm_object_t             new_object,
 520         vm_object_offset_t      new_offset)
 521 {
 522         assert(!m->vmp_busy);
 523 #if 0
 524         assert(!m->vmp_cleaning);
 525 #endif
 526
 527         XPR(XPR_VM_PAGEOUT,
 528             "vm_pageclean_setup, obj 0x%X off 0x%X page 0x%X new 0x%X new_off 0x%X\n",
 529                 VM_PAGE_OBJECT(m), m->vmp_offset, m,
 530                 new_m, new_offset);
 531
 532         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 533
 534         /*
 535          * Mark original page as cleaning in place.
 536          */
 537         m->vmp_cleaning = TRUE;
 538         SET_PAGE_DIRTY(m, FALSE);
 539         m->vmp_precious = FALSE;
 540
 541         /*
 542          * Convert the fictitious page to a private shadow of
 543          * the real page.
 544          */
 545         assert(new_m->vmp_fictitious);
 546         assert(VM_PAGE_GET_PHYS_PAGE(new_m) == vm_page_fictitious_addr);
 547         new_m->vmp_fictitious = FALSE;
 548         new_m->vmp_private = TRUE;
 549         new_m->vmp_free_when_done = TRUE;
 550         VM_PAGE_SET_PHYS_PAGE(new_m, VM_PAGE_GET_PHYS_PAGE(m));
 551
 552         vm_page_lockspin_queues();
 553         vm_page_wire(new_m, VM_KERN_MEMORY_NONE, TRUE);
 554         vm_page_unlock_queues();
 555
 556         vm_page_insert_wired(new_m, new_object, new_offset, VM_KERN_MEMORY_NONE);
 557         assert(!new_m->vmp_wanted);
 558         new_m->vmp_busy = FALSE;
 559 }
 560
 561 /*
 562  *      Routine:        vm_pageout_initialize_page
 563  *      Purpose:
 564  *              Causes the specified page to be initialized in
 565  *              the appropriate memory object. This routine is used to push
 566  *              pages into a copy-object when they are modified in the
 567  *              permanent object.
 568  *
 569  *              The page is moved to a temporary object and paged out.
 570  *
 571  *      In/out conditions:
 572  *              The page in question must not be on any pageout queues.
 573  *              The object to which it belongs must be locked.
 574  *              The page must be busy, but not hold a paging reference.
 575  *
 576  *      Implementation:
 577  *              Move this page to a completely new object.
 578  */
 579 void
 580 vm_pageout_initialize_page(
 581         vm_page_t       m)
 582 {
 583         vm_object_t             object;
 584         vm_object_offset_t      paging_offset;
 585         memory_object_t         pager;
 586
 587         XPR(XPR_VM_PAGEOUT,
 588                 "vm_pageout_initialize_page, page 0x%X\n",
 589                 m, 0, 0, 0, 0);
 590
 591         assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 592
 593         object = VM_PAGE_OBJECT(m);
 594
 595         assert(m->vmp_busy);
 596         assert(object->internal);
 597
 598         /*
 599          *      Verify that we really want to clean this page
 600          */
 601         assert(!m->vmp_absent);
 602         assert(!m->vmp_error);
 603         assert(m->vmp_dirty);
 604
 605         /*
 606          *      Create a paging reference to let us play with the object.
 607          */
 608         paging_offset = m->vmp_offset + object->paging_offset;
 609
 610         if (m->vmp_absent || m->vmp_error || m->vmp_restart || (!m->vmp_dirty && !m->vmp_precious)) {
 611                 panic("reservation without pageout?"); /* alan */
 612
 613                 VM_PAGE_FREE(m);
 614                 vm_object_unlock(object);
 615
 616                 return;
 617         }
 618
 619         /*
 620          * If there's no pager, then we can't clean the page.  This should
 621          * never happen since this should be a copy object and therefore not
 622          * an external object, so the pager should always be there.
 623          */
 624
 625         pager = object->pager;
 626
 627         if (pager == MEMORY_OBJECT_NULL) {
 628                 panic("missing pager for copy object");
 629
 630                 VM_PAGE_FREE(m);
 631                 return;
 632         }
 633
 634         /*
 635          * set the page for future call to vm_fault_list_request
 636          */
 637         pmap_clear_modify(VM_PAGE_GET_PHYS_PAGE(m));
 638         SET_PAGE_DIRTY(m, FALSE);
 639
 640         /*
 641          * keep the object from collapsing or terminating
 642          */
 643         vm_object_paging_begin(object);
 644         vm_object_unlock(object);
 645
 646         /*
 647          *      Write the data to its pager.
 648          *      Note that the data is passed by naming the new object,
 649          *      not a virtual address; the pager interface has been
 650          *      manipulated to use the "internal memory" data type.
 651          *      [The object reference from its allocation is donated
 652          *      to the eventual recipient.]
 653          */
 654         memory_object_data_initialize(pager, paging_offset, PAGE_SIZE);
 655
 656         vm_object_lock(object);
 657         vm_object_paging_end(object);
 658 }
 659
 660
 661 /*
 662  * vm_pageout_cluster:
 663  *
 664  * Given a page, queue it to the appropriate I/O thread,
 665  * which will page it out and attempt to clean adjacent pages
 666  * in the same operation.
 667  *
 668  * The object and queues must be locked. We will take a
 669  * paging reference to prevent deallocation or collapse when we
 670  * release the object lock back at the call site.  The I/O thread
 671  * is responsible for consuming this reference
 672  *
 673  * The page must not be on any pageout queue.
 674  */
 675 #if DEVELOPMENT || DEBUG
 676 vmct_stats_t vmct_stats;
 677
 678 int32_t vmct_active = 0;
 679 uint64_t vm_compressor_epoch_start = 0;
 680 uint64_t vm_compressor_epoch_stop = 0;
 681
 682 typedef enum vmct_state_t {
 683         VMCT_IDLE,
 684         VMCT_AWAKENED,
 685         VMCT_ACTIVE,
 686 } vmct_state_t;
 687 vmct_state_t vmct_state[MAX_COMPRESSOR_THREAD_COUNT];
 688 #endif
 689
 690
 691 void
 692 vm_pageout_cluster(vm_page_t m)
 693 {
 694         vm_object_t     object = VM_PAGE_OBJECT(m);
 695         struct          vm_pageout_queue *q;
 696
 697
 698         XPR(XPR_VM_PAGEOUT,
 699                 "vm_pageout_cluster, object 0x%X offset 0x%X page 0x%X\n",
 700                 object, m->vmp_offset, m, 0, 0);
 701
 702         VM_PAGE_CHECK(m);
 703         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 704         vm_object_lock_assert_exclusive(object);
 705
 706         /*
 707          * Only a certain kind of page is appreciated here.
 708          */
 709         assert((m->vmp_dirty || m->vmp_precious) && (!VM_PAGE_WIRED(m)));
 710         assert(!m->vmp_cleaning && !m->vmp_laundry);
 711         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
 712
 713         /*
 714          * protect the object from collapse or termination
 715          */
 716         vm_object_activity_begin(object);
 717
 718         if (object->internal == TRUE) {
 719                 assert(VM_CONFIG_COMPRESSOR_IS_PRESENT);
 720
 721                 m->vmp_busy = TRUE;
 722
 723                 q = &vm_pageout_queue_internal;
 724         } else
 725                 q = &vm_pageout_queue_external;
 726
 727         /*
 728          * pgo_laundry count is tied to the laundry bit
 729          */
 730         m->vmp_laundry = TRUE;
 731         q->pgo_laundry++;
 732
 733         m->vmp_q_state = VM_PAGE_ON_PAGEOUT_Q;
 734         vm_page_queue_enter(&q->pgo_pending, m, vm_page_t, vmp_pageq);
 735
 736         if (q->pgo_idle == TRUE) {
 737                 q->pgo_idle = FALSE;
 738                 thread_wakeup((event_t) &q->pgo_pending);
 739         }
 740         VM_PAGE_CHECK(m);
 741 }
 742
 743
 744 /*
 745  * A page is back from laundry or we are stealing it back from
 746  * the laundering state.  See if there are some pages waiting to
 747  * go to laundry and if we can let some of them go now.
 748  *
 749  * Object and page queues must be locked.
 750  */
 751 void
 752 vm_pageout_throttle_up(
 753        vm_page_t       m)
 754 {
 755        struct vm_pageout_queue *q;
 756        vm_object_t      m_object;
 757
 758        m_object = VM_PAGE_OBJECT(m);
 759
 760        assert(m_object != VM_OBJECT_NULL);
 761        assert(m_object != kernel_object);
 762
 763        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 764        vm_object_lock_assert_exclusive(m_object);
 765
 766        if (m_object->internal == TRUE)
 767                q = &vm_pageout_queue_internal;
 768        else
 769                q = &vm_pageout_queue_external;
 770
 771        if (m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
 772
 773                vm_page_queue_remove(&q->pgo_pending, m, vm_page_t, vmp_pageq);
 774                m->vmp_q_state = VM_PAGE_NOT_ON_Q;
 775
 776                VM_PAGE_ZERO_PAGEQ_ENTRY(m);
 777
 778                vm_object_activity_end(m_object);
 779
 780                VM_PAGEOUT_DEBUG(vm_page_steal_pageout_page, 1);
 781        }
 782        if (m->vmp_laundry == TRUE) {
 783
 784                m->vmp_laundry = FALSE;
 785                q->pgo_laundry--;
 786
 787                if (q->pgo_throttled == TRUE) {
 788                        q->pgo_throttled = FALSE;
 789                        thread_wakeup((event_t) &q->pgo_laundry);
 790                }
 791                if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 792                        q->pgo_draining = FALSE;
 793                        thread_wakeup((event_t) (&q->pgo_laundry+1));
 794                }
 795                VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, 1);
 796         }
 797 }
 798
 799
 800 static void
 801 vm_pageout_throttle_up_batch(
 802         struct vm_pageout_queue *q,
 803         int             batch_cnt)
 804 {
 805        LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
 806
 807        VM_PAGEOUT_DEBUG(vm_pageout_throttle_up_count, batch_cnt);
 808
 809        q->pgo_laundry -= batch_cnt;
 810
 811        if (q->pgo_throttled == TRUE) {
 812                q->pgo_throttled = FALSE;
 813                thread_wakeup((event_t) &q->pgo_laundry);
 814        }
 815        if (q->pgo_draining == TRUE && q->pgo_laundry == 0) {
 816                q->pgo_draining = FALSE;
 817                thread_wakeup((event_t) (&q->pgo_laundry+1));
 818        }
 819 }
 820
 821
 822
 823 /*
 824  * VM memory pressure monitoring.
 825  *
 826  * vm_pageout_scan() keeps track of the number of pages it considers and
 827  * reclaims, in the currently active vm_pageout_stat[vm_pageout_stat_now].
 828  *
 829  * compute_memory_pressure() is called every second from compute_averages()
 830  * and moves "vm_pageout_stat_now" forward, to start accumulating the number
 831  * of recalimed pages in a new vm_pageout_stat[] bucket.
 832  *
 833  * mach_vm_pressure_monitor() collects past statistics about memory pressure.
 834  * The caller provides the number of seconds ("nsecs") worth of statistics
 835  * it wants, up to 30 seconds.
 836  * It computes the number of pages reclaimed in the past "nsecs" seconds and
 837  * also returns the number of pages the system still needs to reclaim at this
 838  * moment in time.
 839  */
 840 #if DEVELOPMENT || DEBUG
 841 #define VM_PAGEOUT_STAT_SIZE    (30 * 8) + 1
 842 #else
 843 #define VM_PAGEOUT_STAT_SIZE    (1 * 8) + 1
 844 #endif
 845 struct vm_pageout_stat {
 846         unsigned long vm_page_active_count;
 847         unsigned long vm_page_speculative_count;
 848         unsigned long vm_page_inactive_count;
 849         unsigned long vm_page_anonymous_count;
 850
 851         unsigned long vm_page_free_count;
 852         unsigned long vm_page_wire_count;
 853         unsigned long vm_page_compressor_count;
 854
 855         unsigned long vm_page_pages_compressed;
 856         unsigned long vm_page_pageable_internal_count;
 857         unsigned long vm_page_pageable_external_count;
 858         unsigned long vm_page_xpmapped_external_count;
 859
 860         unsigned int pages_grabbed;
 861         unsigned int pages_freed;
 862
 863         unsigned int pages_compressed;
 864         unsigned int pages_grabbed_by_compressor;
 865         unsigned int failed_compressions;
 866
 867         unsigned int pages_evicted;
 868         unsigned int pages_purged;
 869
 870         unsigned int considered;
 871         unsigned int considered_bq_internal;
 872         unsigned int considered_bq_external;
 873
 874         unsigned int skipped_external;
 875         unsigned int filecache_min_reactivations;
 876
 877         unsigned int freed_speculative;
 878         unsigned int freed_cleaned;
 879         unsigned int freed_internal;
 880         unsigned int freed_external;
 881
 882         unsigned int cleaned_dirty_external;
 883         unsigned int cleaned_dirty_internal;
 884
 885         unsigned int inactive_referenced;
 886         unsigned int inactive_nolock;
 887         unsigned int reactivation_limit_exceeded;
 888         unsigned int forced_inactive_reclaim;
 889
 890         unsigned int throttled_internal_q;
 891         unsigned int throttled_external_q;
 892
 893         unsigned int phantom_ghosts_found;
 894         unsigned int phantom_ghosts_added;
 895 } vm_pageout_stats[VM_PAGEOUT_STAT_SIZE] = {{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, };
 896
 897 unsigned int vm_pageout_stat_now = 0;
 898
 899 #define VM_PAGEOUT_STAT_BEFORE(i) \
 900         (((i) == 0) ? VM_PAGEOUT_STAT_SIZE - 1 : (i) - 1)
 901 #define VM_PAGEOUT_STAT_AFTER(i) \
 902         (((i) == VM_PAGEOUT_STAT_SIZE - 1) ? 0 : (i) + 1)
 903
 904 #if VM_PAGE_BUCKETS_CHECK
 905 int vm_page_buckets_check_interval = 80; /* in eighths of a second */
 906 #endif /* VM_PAGE_BUCKETS_CHECK */
 907
 908
 909 void
 910 record_memory_pressure(void);
 911 void
 912 record_memory_pressure(void)
 913 {
 914         unsigned int vm_pageout_next;
 915
 916 #if VM_PAGE_BUCKETS_CHECK
 917         /* check the consistency of VM page buckets at regular interval */
 918         static int counter = 0;
 919         if ((++counter % vm_page_buckets_check_interval) == 0) {
 920                 vm_page_buckets_check();
 921         }
 922 #endif /* VM_PAGE_BUCKETS_CHECK */
 923
 924         vm_pageout_state.vm_memory_pressure =
 925           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_speculative +
 926           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_cleaned +
 927           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_internal +
 928           vm_pageout_stats[VM_PAGEOUT_STAT_BEFORE(vm_pageout_stat_now)].freed_external;
 929
 930         commpage_set_memory_pressure( (unsigned int)vm_pageout_state.vm_memory_pressure );
 931
 932         /* move "now" forward */
 933         vm_pageout_next = VM_PAGEOUT_STAT_AFTER(vm_pageout_stat_now);
 934
 935         bzero(&vm_pageout_stats[vm_pageout_next], sizeof(struct vm_pageout_stat));
 936
 937         vm_pageout_stat_now = vm_pageout_next;
 938 }
 939
 940
 941 /*
 942  * IMPORTANT
 943  * mach_vm_ctl_page_free_wanted() is called indirectly, via
 944  * mach_vm_pressure_monitor(), when taking a stackshot. Therefore,
 945  * it must be safe in the restricted stackshot context. Locks and/or
 946  * blocking are not allowable.
 947  */
 948 unsigned int
 949 mach_vm_ctl_page_free_wanted(void)
 950 {
 951         unsigned int page_free_target, page_free_count, page_free_wanted;
 952
 953         page_free_target = vm_page_free_target;
 954         page_free_count = vm_page_free_count;
 955         if (page_free_target > page_free_count) {
 956                 page_free_wanted = page_free_target - page_free_count;
 957         } else {
 958                 page_free_wanted = 0;
 959         }
 960
 961         return page_free_wanted;
 962 }
 963
 964
 965 /*
 966  * IMPORTANT:
 967  * mach_vm_pressure_monitor() is called when taking a stackshot, with
 968  * wait_for_pressure FALSE, so that code path must remain safe in the
 969  * restricted stackshot context. No blocking or locks are allowable.
 970  * on that code path.
 971  */
 972
 973 kern_return_t
 974 mach_vm_pressure_monitor(
 975         boolean_t       wait_for_pressure,
 976         unsigned int    nsecs_monitored,
 977         unsigned int    *pages_reclaimed_p,
 978         unsigned int    *pages_wanted_p)
 979 {
 980         wait_result_t   wr;
 981         unsigned int    vm_pageout_then, vm_pageout_now;
 982         unsigned int    pages_reclaimed;
 983         unsigned int    units_of_monitor;
 984
 985         units_of_monitor = 8 * nsecs_monitored;
 986         /*
 987          * We don't take the vm_page_queue_lock here because we don't want
 988          * vm_pressure_monitor() to get in the way of the vm_pageout_scan()
 989          * thread when it's trying to reclaim memory.  We don't need fully
 990          * accurate monitoring anyway...
 991          */
 992
 993         if (wait_for_pressure) {
 994                 /* wait until there's memory pressure */
 995                 while (vm_page_free_count >= vm_page_free_target) {
 996                         wr = assert_wait((event_t) &vm_page_free_wanted,
 997                                          THREAD_INTERRUPTIBLE);
 998                         if (wr == THREAD_WAITING) {
 999                                 wr = thread_block(THREAD_CONTINUE_NULL);
1000                         }
1001                         if (wr == THREAD_INTERRUPTED) {
1002                                 return KERN_ABORTED;
1003                         }
1004                         if (wr == THREAD_AWAKENED) {
1005                                 /*
1006                                  * The memory pressure might have already
1007                                  * been relieved but let's not block again
1008                                  * and let's report that there was memory
1009                                  * pressure at some point.
1010                                  */
1011                                 break;
1012                         }
1013                 }
1014         }
1015
1016         /* provide the number of pages the system wants to reclaim */
1017         if (pages_wanted_p != NULL) {
1018                 *pages_wanted_p = mach_vm_ctl_page_free_wanted();
1019         }
1020
1021         if (pages_reclaimed_p == NULL) {
1022                 return KERN_SUCCESS;
1023         }
1024
1025         /* provide number of pages reclaimed in the last "nsecs_monitored" */
1026         vm_pageout_now = vm_pageout_stat_now;
1027         pages_reclaimed = 0;
1028         for (vm_pageout_then =
1029                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_now);
1030              vm_pageout_then != vm_pageout_now &&
1031                      units_of_monitor-- != 0;
1032              vm_pageout_then =
1033                      VM_PAGEOUT_STAT_BEFORE(vm_pageout_then)) {
1034                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_speculative;
1035                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_cleaned;
1036                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_internal;
1037                 pages_reclaimed += vm_pageout_stats[vm_pageout_then].freed_external;
1038         }
1039         *pages_reclaimed_p = pages_reclaimed;
1040
1041         return KERN_SUCCESS;
1042 }
1043
1044
1045
1046 #if DEVELOPMENT || DEBUG
1047
1048 static void
1049 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *, int);
1050
1051 /*
1052  * condition variable used to make sure there is
1053  * only a single sweep going on at a time
1054  */
1055 boolean_t       vm_pageout_disconnect_all_pages_active = FALSE;
1056
1057
1058 void
1059 vm_pageout_disconnect_all_pages()
1060 {
1061         vm_page_lock_queues();
1062
1063         if (vm_pageout_disconnect_all_pages_active == TRUE) {
1064                 vm_page_unlock_queues();
1065                 return;
1066         }
1067         vm_pageout_disconnect_all_pages_active = TRUE;
1068         vm_page_unlock_queues();
1069
1070         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1071         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1072         vm_pageout_disconnect_all_pages_in_queue(&vm_page_queue_active, vm_page_active_count);
1073
1074         vm_pageout_disconnect_all_pages_active = FALSE;
1075 }
1076
1077
1078 void
1079 vm_pageout_disconnect_all_pages_in_queue(vm_page_queue_head_t *q, int qcount)
1080 {
1081         vm_page_t       m;
1082         vm_object_t     t_object = NULL;
1083         vm_object_t     l_object = NULL;
1084         vm_object_t     m_object = NULL;
1085         int             delayed_unlock = 0;
1086         int             try_failed_count = 0;
1087         int             disconnected_count = 0;
1088         int             paused_count = 0;
1089         int             object_locked_count = 0;
1090
1091         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_START,
1092                                   q, qcount, 0, 0, 0);
1093
1094         vm_page_lock_queues();
1095
1096         while (qcount && !vm_page_queue_empty(q)) {
1097
1098                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1099
1100                 m = (vm_page_t) vm_page_queue_first(q);
1101                 m_object = VM_PAGE_OBJECT(m);
1102
1103                 /*
1104                  * check to see if we currently are working
1105                  * with the same object... if so, we've
1106                  * already got the lock
1107                  */
1108                 if (m_object != l_object) {
1109                         /*
1110                          * the object associated with candidate page is
1111                          * different from the one we were just working
1112                          * with... dump the lock if we still own it
1113                          */
1114                         if (l_object != NULL) {
1115                                 vm_object_unlock(l_object);
1116                                 l_object = NULL;
1117                         }
1118                         if (m_object != t_object)
1119                                 try_failed_count = 0;
1120
1121                         /*
1122                          * Try to lock object; since we've alread got the
1123                          * page queues lock, we can only 'try' for this one.
1124                          * if the 'try' fails, we need to do a mutex_pause
1125                          * to allow the owner of the object lock a chance to
1126                          * run...
1127                          */
1128                         if ( !vm_object_lock_try_scan(m_object)) {
1129
1130                                 if (try_failed_count > 20) {
1131                                         goto reenter_pg_on_q;
1132                                 }
1133                                 vm_page_unlock_queues();
1134                                 mutex_pause(try_failed_count++);
1135                                 vm_page_lock_queues();
1136                                 delayed_unlock = 0;
1137
1138                                 paused_count++;
1139
1140                                 t_object = m_object;
1141                                 continue;
1142                         }
1143                         object_locked_count++;
1144
1145                         l_object = m_object;
1146                 }
1147                 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1148                         /*
1149                          * put it back on the head of its queue
1150                          */
1151                         goto reenter_pg_on_q;
1152                 }
1153                 if (m->vmp_pmapped == TRUE) {
1154
1155                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
1156
1157                         disconnected_count++;
1158                 }
1159 reenter_pg_on_q:
1160                 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1161                 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1162
1163                 qcount--;
1164                 try_failed_count = 0;
1165
1166                 if (delayed_unlock++ > 128) {
1167
1168                         if (l_object != NULL) {
1169                                 vm_object_unlock(l_object);
1170                                 l_object = NULL;
1171                         }
1172                         lck_mtx_yield(&vm_page_queue_lock);
1173                         delayed_unlock = 0;
1174                 }
1175         }
1176         if (l_object != NULL) {
1177                 vm_object_unlock(l_object);
1178                 l_object = NULL;
1179         }
1180         vm_page_unlock_queues();
1181
1182         KERNEL_DEBUG_CONSTANT_IST(KDEBUG_TRACE, (MACHDBG_CODE(DBG_MACH_WORKINGSET, VM_DISCONNECT_ALL_PAGE_MAPPINGS)) | DBG_FUNC_END,
1183                                   q, disconnected_count, object_locked_count, paused_count, 0);
1184 }
1185
1186 #endif
1187
1188
1189 static void
1190 vm_pageout_page_queue(vm_page_queue_head_t *, int);
1191
1192 /*
1193  * condition variable used to make sure there is
1194  * only a single sweep going on at a time
1195  */
1196 boolean_t       vm_pageout_anonymous_pages_active = FALSE;
1197
1198
1199 void
1200 vm_pageout_anonymous_pages()
1201 {
1202         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
1203
1204                 vm_page_lock_queues();
1205
1206                 if (vm_pageout_anonymous_pages_active == TRUE) {
1207                         vm_page_unlock_queues();
1208                         return;
1209                 }
1210                 vm_pageout_anonymous_pages_active = TRUE;
1211                 vm_page_unlock_queues();
1212
1213                 vm_pageout_page_queue(&vm_page_queue_throttled, vm_page_throttled_count);
1214                 vm_pageout_page_queue(&vm_page_queue_anonymous, vm_page_anonymous_count);
1215                 vm_pageout_page_queue(&vm_page_queue_active, vm_page_active_count);
1216
1217                 if (VM_CONFIG_SWAP_IS_PRESENT)
1218                         vm_consider_swapping();
1219
1220                 vm_page_lock_queues();
1221                 vm_pageout_anonymous_pages_active = FALSE;
1222                 vm_page_unlock_queues();
1223         }
1224 }
1225
1226
1227 void
1228 vm_pageout_page_queue(vm_page_queue_head_t *q, int qcount)
1229 {
1230         vm_page_t       m;
1231         vm_object_t     t_object = NULL;
1232         vm_object_t     l_object = NULL;
1233         vm_object_t     m_object = NULL;
1234         int             delayed_unlock = 0;
1235         int             try_failed_count = 0;
1236         int             refmod_state;
1237         int             pmap_options;
1238         struct          vm_pageout_queue *iq;
1239         ppnum_t         phys_page;
1240
1241
1242         iq = &vm_pageout_queue_internal;
1243
1244         vm_page_lock_queues();
1245
1246         while (qcount && !vm_page_queue_empty(q)) {
1247
1248                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1249
1250                 if (VM_PAGE_Q_THROTTLED(iq)) {
1251
1252                         if (l_object != NULL) {
1253                                 vm_object_unlock(l_object);
1254                                 l_object = NULL;
1255                         }
1256                         iq->pgo_draining = TRUE;
1257
1258                         assert_wait((event_t) (&iq->pgo_laundry + 1), THREAD_INTERRUPTIBLE);
1259                         vm_page_unlock_queues();
1260
1261                         thread_block(THREAD_CONTINUE_NULL);
1262
1263                         vm_page_lock_queues();
1264                         delayed_unlock = 0;
1265                         continue;
1266                 }
1267                 m = (vm_page_t) vm_page_queue_first(q);
1268                 m_object = VM_PAGE_OBJECT(m);
1269
1270                 /*
1271                  * check to see if we currently are working
1272                  * with the same object... if so, we've
1273                  * already got the lock
1274                  */
1275                 if (m_object != l_object) {
1276                         if ( !m_object->internal)
1277                                 goto reenter_pg_on_q;
1278
1279                         /*
1280                          * the object associated with candidate page is
1281                          * different from the one we were just working
1282                          * with... dump the lock if we still own it
1283                          */
1284                         if (l_object != NULL) {
1285                                 vm_object_unlock(l_object);
1286                                 l_object = NULL;
1287                         }
1288                         if (m_object != t_object)
1289                                 try_failed_count = 0;
1290
1291                         /*
1292                          * Try to lock object; since we've alread got the
1293                          * page queues lock, we can only 'try' for this one.
1294                          * if the 'try' fails, we need to do a mutex_pause
1295                          * to allow the owner of the object lock a chance to
1296                          * run...
1297                          */
1298                         if ( !vm_object_lock_try_scan(m_object)) {
1299
1300                                 if (try_failed_count > 20) {
1301                                         goto reenter_pg_on_q;
1302                                 }
1303                                 vm_page_unlock_queues();
1304                                 mutex_pause(try_failed_count++);
1305                                 vm_page_lock_queues();
1306                                 delayed_unlock = 0;
1307
1308                                 t_object = m_object;
1309                                 continue;
1310                         }
1311                         l_object = m_object;
1312                 }
1313                 if ( !m_object->alive || m->vmp_cleaning || m->vmp_laundry || m->vmp_busy || m->vmp_absent || m->vmp_error || m->vmp_free_when_done) {
1314                         /*
1315                          * page is not to be cleaned
1316                          * put it back on the head of its queue
1317                          */
1318                         goto reenter_pg_on_q;
1319                 }
1320                 phys_page = VM_PAGE_GET_PHYS_PAGE(m);
1321
1322                 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
1323                         refmod_state = pmap_get_refmod(phys_page);
1324
1325                         if (refmod_state & VM_MEM_REFERENCED)
1326                                 m->vmp_reference = TRUE;
1327                         if (refmod_state & VM_MEM_MODIFIED) {
1328                                 SET_PAGE_DIRTY(m, FALSE);
1329                         }
1330                 }
1331                 if (m->vmp_reference == TRUE) {
1332                         m->vmp_reference = FALSE;
1333                         pmap_clear_refmod_options(phys_page, VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1334                         goto reenter_pg_on_q;
1335                 }
1336                 if (m->vmp_pmapped == TRUE) {
1337                         if (m->vmp_dirty || m->vmp_precious) {
1338                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
1339                         } else {
1340                                 pmap_options = PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1341                         }
1342                         refmod_state = pmap_disconnect_options(phys_page, pmap_options, NULL);
1343                         if (refmod_state & VM_MEM_MODIFIED) {
1344                                 SET_PAGE_DIRTY(m, FALSE);
1345                         }
1346                 }
1347
1348                 if ( !m->vmp_dirty && !m->vmp_precious) {
1349                         vm_page_unlock_queues();
1350                         VM_PAGE_FREE(m);
1351                         vm_page_lock_queues();
1352                         delayed_unlock = 0;
1353
1354                         goto next_pg;
1355                 }
1356                 if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)  {
1357
1358                         if (!m_object->pager_initialized) {
1359
1360                                 vm_page_unlock_queues();
1361
1362                                 vm_object_collapse(m_object, (vm_object_offset_t) 0, TRUE);
1363
1364                                 if (!m_object->pager_initialized)
1365                                         vm_object_compressor_pager_create(m_object);
1366
1367                                 vm_page_lock_queues();
1368                                 delayed_unlock = 0;
1369                         }
1370                         if (!m_object->pager_initialized || m_object->pager == MEMORY_OBJECT_NULL)
1371                                 goto reenter_pg_on_q;
1372                         /*
1373                          * vm_object_compressor_pager_create will drop the object lock
1374                          * which means 'm' may no longer be valid to use
1375                          */
1376                         continue;
1377                 }
1378                 /*
1379                  * we've already factored out pages in the laundry which
1380                  * means this page can't be on the pageout queue so it's
1381                  * safe to do the vm_page_queues_remove
1382                  */
1383                 vm_page_queues_remove(m, TRUE);
1384
1385                 LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1386
1387                 vm_pageout_cluster(m);
1388
1389                 goto next_pg;
1390
1391 reenter_pg_on_q:
1392                 vm_page_queue_remove(q, m, vm_page_t, vmp_pageq);
1393                 vm_page_queue_enter(q, m, vm_page_t, vmp_pageq);
1394 next_pg:
1395                 qcount--;
1396                 try_failed_count = 0;
1397
1398                 if (delayed_unlock++ > 128) {
1399
1400                         if (l_object != NULL) {
1401                                 vm_object_unlock(l_object);
1402                                 l_object = NULL;
1403                         }
1404                         lck_mtx_yield(&vm_page_queue_lock);
1405                         delayed_unlock = 0;
1406                 }
1407         }
1408         if (l_object != NULL) {
1409                 vm_object_unlock(l_object);
1410                 l_object = NULL;
1411         }
1412         vm_page_unlock_queues();
1413 }
1414
1415
1416
1417 /*
1418  * function in BSD to apply I/O throttle to the pageout thread
1419  */
1420 extern void vm_pageout_io_throttle(void);
1421
1422 #define VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, obj)                    \
1423         MACRO_BEGIN                                                     \
1424         /*                                                              \
1425          * If a "reusable" page somehow made it back into               \
1426          * the active queue, it's been re-used and is not               \
1427          * quite re-usable.                                             \
1428          * If the VM object was "all_reusable", consider it             \
1429          * as "all re-used" instead of converting it to                 \
1430          * "partially re-used", which could be expensive.               \
1431          */                                                             \
1432         assert(VM_PAGE_OBJECT((m)) == (obj));                           \
1433         if ((m)->vmp_reusable ||                                        \
1434             (obj)->all_reusable) {                                      \
1435                 vm_object_reuse_pages((obj),                            \
1436                                       (m)->vmp_offset,                  \
1437                                       (m)->vmp_offset + PAGE_SIZE_64,   \
1438                                       FALSE);                           \
1439         }                                                               \
1440         MACRO_END
1441
1442
1443 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT         64
1444 #define VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX     1024
1445
1446 #define FCS_IDLE                0
1447 #define FCS_DELAYED             1
1448 #define FCS_DEADLOCK_DETECTED   2
1449
1450 struct flow_control {
1451         int             state;
1452         mach_timespec_t ts;
1453 };
1454
1455
1456 #if CONFIG_BACKGROUND_QUEUE
1457 uint64_t vm_pageout_rejected_bq_internal = 0;
1458 uint64_t vm_pageout_rejected_bq_external = 0;
1459 uint64_t vm_pageout_skipped_bq_internal = 0;
1460 #endif
1461
1462 #define ANONS_GRABBED_LIMIT     2
1463
1464
1465 #if 0
1466 static void vm_pageout_delayed_unlock(int *, int *, vm_page_t *);
1467 #endif
1468 static void vm_pageout_prepare_to_block(vm_object_t *, int *, vm_page_t *, int *, int);
1469
1470 #define VM_PAGEOUT_PB_NO_ACTION                         0
1471 #define VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER 1
1472 #define VM_PAGEOUT_PB_THREAD_YIELD                      2
1473
1474
1475 #if 0
1476 static void
1477 vm_pageout_delayed_unlock(int *delayed_unlock, int *local_freed, vm_page_t *local_freeq)
1478 {
1479         if (*local_freeq) {
1480                 vm_page_unlock_queues();
1481
1482                 VM_DEBUG_CONSTANT_EVENT(
1483                         vm_pageout_freelist, VM_PAGEOUT_FREELIST, DBG_FUNC_START,
1484                         vm_page_free_count, 0, 0, 1);
1485
1486                 vm_page_free_list(*local_freeq, TRUE);
1487
1488                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_freelist,VM_PAGEOUT_FREELIST, DBG_FUNC_END,
1489                                vm_page_free_count, *local_freed, 0, 1);
1490
1491                 *local_freeq = NULL;
1492                 *local_freed = 0;
1493
1494                 vm_page_lock_queues();
1495         } else {
1496                 lck_mtx_yield(&vm_page_queue_lock);
1497         }
1498         *delayed_unlock = 1;
1499 }
1500 #endif
1501
1502
1503 static void
1504 vm_pageout_prepare_to_block(vm_object_t *object, int *delayed_unlock,
1505                             vm_page_t *local_freeq, int *local_freed, int action)
1506 {
1507         vm_page_unlock_queues();
1508
1509         if (*object != NULL) {
1510                 vm_object_unlock(*object);
1511                 *object = NULL;
1512         }
1513         if (*local_freeq) {
1514
1515                 vm_page_free_list(*local_freeq, TRUE);
1516
1517                 *local_freeq = NULL;
1518                 *local_freed = 0;
1519         }
1520         *delayed_unlock = 1;
1521
1522         switch (action) {
1523
1524         case VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER:
1525                 vm_consider_waking_compactor_swapper();
1526                 break;
1527         case VM_PAGEOUT_PB_THREAD_YIELD:
1528                 thread_yield_internal(1);
1529                 break;
1530         case VM_PAGEOUT_PB_NO_ACTION:
1531         default:
1532                 break;
1533         }
1534         vm_page_lock_queues();
1535 }
1536
1537
1538 static struct vm_pageout_vminfo last;
1539
1540 uint64_t last_vm_page_pages_grabbed = 0;
1541
1542 extern  uint32_t c_segment_pages_compressed;
1543
1544 extern uint64_t shared_region_pager_reclaimed;
1545 extern struct memory_object_pager_ops shared_region_pager_ops;
1546
1547 void update_vm_info(void)
1548 {
1549         uint64_t tmp;
1550
1551         vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count = vm_page_active_count;
1552         vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count = vm_page_speculative_count;
1553         vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count = vm_page_inactive_count;
1554         vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count = vm_page_anonymous_count;
1555
1556         vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count = vm_page_free_count;
1557         vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count = vm_page_wire_count;
1558         vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count = VM_PAGE_COMPRESSOR_COUNT;
1559
1560         vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed = c_segment_pages_compressed;
1561         vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count = vm_page_pageable_internal_count;
1562         vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count = vm_page_pageable_external_count;
1563         vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count = vm_page_xpmapped_external_count;
1564
1565
1566         tmp = vm_pageout_vminfo.vm_pageout_considered_page;
1567         vm_pageout_stats[vm_pageout_stat_now].considered = (unsigned int)(tmp - last.vm_pageout_considered_page);
1568         last.vm_pageout_considered_page = tmp;
1569
1570         tmp = vm_pageout_vminfo.vm_pageout_compressions;
1571         vm_pageout_stats[vm_pageout_stat_now].pages_compressed = (unsigned int)(tmp - last.vm_pageout_compressions);
1572         last.vm_pageout_compressions = tmp;
1573
1574         tmp = vm_pageout_vminfo.vm_compressor_failed;
1575         vm_pageout_stats[vm_pageout_stat_now].failed_compressions = (unsigned int)(tmp - last.vm_compressor_failed);
1576         last.vm_compressor_failed = tmp;
1577
1578         tmp = vm_pageout_vminfo.vm_compressor_pages_grabbed;
1579         vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor = (unsigned int)(tmp - last.vm_compressor_pages_grabbed);
1580         last.vm_compressor_pages_grabbed = tmp;
1581
1582         tmp = vm_pageout_vminfo.vm_phantom_cache_found_ghost;
1583         vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found = (unsigned int)(tmp - last.vm_phantom_cache_found_ghost);
1584         last.vm_phantom_cache_found_ghost = tmp;
1585
1586         tmp = vm_pageout_vminfo.vm_phantom_cache_added_ghost;
1587         vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added = (unsigned int)(tmp - last.vm_phantom_cache_added_ghost);
1588         last.vm_phantom_cache_added_ghost = tmp;
1589
1590         tmp = get_pages_grabbed_count();
1591         vm_pageout_stats[vm_pageout_stat_now].pages_grabbed = (unsigned int)(tmp - last_vm_page_pages_grabbed);
1592         last_vm_page_pages_grabbed = tmp;
1593
1594         tmp = vm_pageout_vminfo.vm_page_pages_freed;
1595         vm_pageout_stats[vm_pageout_stat_now].pages_freed = (unsigned int)(tmp - last.vm_page_pages_freed);
1596         last.vm_page_pages_freed = tmp;
1597
1598
1599         if (vm_pageout_stats[vm_pageout_stat_now].considered) {
1600
1601                 tmp = vm_pageout_vminfo.vm_pageout_pages_evicted;
1602                 vm_pageout_stats[vm_pageout_stat_now].pages_evicted = (unsigned int)(tmp - last.vm_pageout_pages_evicted);
1603                 last.vm_pageout_pages_evicted = tmp;
1604
1605                 tmp = vm_pageout_vminfo.vm_pageout_pages_purged;
1606                 vm_pageout_stats[vm_pageout_stat_now].pages_purged = (unsigned int)(tmp - last.vm_pageout_pages_purged);
1607                 last.vm_pageout_pages_purged = tmp;
1608
1609                 tmp = vm_pageout_vminfo.vm_pageout_freed_speculative;
1610                 vm_pageout_stats[vm_pageout_stat_now].freed_speculative = (unsigned int)(tmp - last.vm_pageout_freed_speculative);
1611                 last.vm_pageout_freed_speculative = tmp;
1612
1613                 tmp = vm_pageout_vminfo.vm_pageout_freed_external;
1614                 vm_pageout_stats[vm_pageout_stat_now].freed_external = (unsigned int)(tmp - last.vm_pageout_freed_external);
1615                 last.vm_pageout_freed_external = tmp;
1616
1617                 tmp = vm_pageout_vminfo.vm_pageout_inactive_referenced;
1618                 vm_pageout_stats[vm_pageout_stat_now].inactive_referenced = (unsigned int)(tmp - last.vm_pageout_inactive_referenced);
1619                 last.vm_pageout_inactive_referenced = tmp;
1620
1621                 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external;
1622                 vm_pageout_stats[vm_pageout_stat_now].throttled_external_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_external);
1623                 last.vm_pageout_scan_inactive_throttled_external = tmp;
1624
1625                 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_external;
1626                 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_external);
1627                 last.vm_pageout_inactive_dirty_external = tmp;
1628
1629                 tmp = vm_pageout_vminfo.vm_pageout_freed_cleaned;
1630                 vm_pageout_stats[vm_pageout_stat_now].freed_cleaned = (unsigned int)(tmp - last.vm_pageout_freed_cleaned);
1631                 last.vm_pageout_freed_cleaned = tmp;
1632
1633                 tmp = vm_pageout_vminfo.vm_pageout_inactive_nolock;
1634                 vm_pageout_stats[vm_pageout_stat_now].inactive_nolock = (unsigned int)(tmp - last.vm_pageout_inactive_nolock);
1635                 last.vm_pageout_inactive_nolock = tmp;
1636
1637                 tmp = vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal;
1638                 vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q = (unsigned int)(tmp - last.vm_pageout_scan_inactive_throttled_internal);
1639                 last.vm_pageout_scan_inactive_throttled_internal = tmp;
1640
1641                 tmp = vm_pageout_vminfo.vm_pageout_skipped_external;
1642                 vm_pageout_stats[vm_pageout_stat_now].skipped_external = (unsigned int)(tmp - last.vm_pageout_skipped_external);
1643                 last.vm_pageout_skipped_external = tmp;
1644
1645                 tmp = vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded;
1646                 vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded = (unsigned int)(tmp - last.vm_pageout_reactivation_limit_exceeded);
1647                 last.vm_pageout_reactivation_limit_exceeded = tmp;
1648
1649                 tmp = vm_pageout_vminfo.vm_pageout_inactive_force_reclaim;
1650                 vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim = (unsigned int)(tmp - last.vm_pageout_inactive_force_reclaim);
1651                 last.vm_pageout_inactive_force_reclaim = tmp;
1652
1653                 tmp = vm_pageout_vminfo.vm_pageout_freed_internal;
1654                 vm_pageout_stats[vm_pageout_stat_now].freed_internal = (unsigned int)(tmp - last.vm_pageout_freed_internal);
1655                 last.vm_pageout_freed_internal = tmp;
1656
1657                 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_internal;
1658                 vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal = (unsigned int)(tmp - last.vm_pageout_considered_bq_internal);
1659                 last.vm_pageout_considered_bq_internal = tmp;
1660
1661                 tmp = vm_pageout_vminfo.vm_pageout_considered_bq_external;
1662                 vm_pageout_stats[vm_pageout_stat_now].considered_bq_external = (unsigned int)(tmp - last.vm_pageout_considered_bq_external);
1663                 last.vm_pageout_considered_bq_external = tmp;
1664
1665                 tmp = vm_pageout_vminfo.vm_pageout_filecache_min_reactivated;
1666                 vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations = (unsigned int)(tmp - last.vm_pageout_filecache_min_reactivated);
1667                 last.vm_pageout_filecache_min_reactivated = tmp;
1668
1669                 tmp = vm_pageout_vminfo.vm_pageout_inactive_dirty_internal;
1670                 vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal = (unsigned int)(tmp - last.vm_pageout_inactive_dirty_internal);
1671                 last.vm_pageout_inactive_dirty_internal = tmp;
1672         }
1673
1674         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO1)) | DBG_FUNC_NONE,
1675                               vm_pageout_stats[vm_pageout_stat_now].vm_page_active_count,
1676                               vm_pageout_stats[vm_pageout_stat_now].vm_page_speculative_count,
1677                               vm_pageout_stats[vm_pageout_stat_now].vm_page_inactive_count,
1678                               vm_pageout_stats[vm_pageout_stat_now].vm_page_anonymous_count,
1679                               0);
1680
1681         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO2)) | DBG_FUNC_NONE,
1682                               vm_pageout_stats[vm_pageout_stat_now].vm_page_free_count,
1683                               vm_pageout_stats[vm_pageout_stat_now].vm_page_wire_count,
1684                               vm_pageout_stats[vm_pageout_stat_now].vm_page_compressor_count,
1685                               0,
1686                               0);
1687
1688         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO3)) | DBG_FUNC_NONE,
1689                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pages_compressed,
1690                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_internal_count,
1691                               vm_pageout_stats[vm_pageout_stat_now].vm_page_pageable_external_count,
1692                               vm_pageout_stats[vm_pageout_stat_now].vm_page_xpmapped_external_count,
1693                               0);
1694
1695         if (vm_pageout_stats[vm_pageout_stat_now].considered ||
1696             vm_pageout_stats[vm_pageout_stat_now].pages_compressed ||
1697             vm_pageout_stats[vm_pageout_stat_now].failed_compressions) {
1698
1699                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO4)) | DBG_FUNC_NONE,
1700                                       vm_pageout_stats[vm_pageout_stat_now].considered,
1701                                       vm_pageout_stats[vm_pageout_stat_now].freed_speculative,
1702                                       vm_pageout_stats[vm_pageout_stat_now].freed_external,
1703                                       vm_pageout_stats[vm_pageout_stat_now].inactive_referenced,
1704                                       0);
1705
1706                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO5)) | DBG_FUNC_NONE,
1707                                       vm_pageout_stats[vm_pageout_stat_now].throttled_external_q,
1708                                       vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_external,
1709                                       vm_pageout_stats[vm_pageout_stat_now].freed_cleaned,
1710                                       vm_pageout_stats[vm_pageout_stat_now].inactive_nolock,
1711                                       0);
1712
1713                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO6)) | DBG_FUNC_NONE,
1714                                       vm_pageout_stats[vm_pageout_stat_now].throttled_internal_q,
1715                                       vm_pageout_stats[vm_pageout_stat_now].pages_compressed,
1716                                       vm_pageout_stats[vm_pageout_stat_now].pages_grabbed_by_compressor,
1717                                       vm_pageout_stats[vm_pageout_stat_now].skipped_external,
1718                                       0);
1719
1720                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO7)) | DBG_FUNC_NONE,
1721                                       vm_pageout_stats[vm_pageout_stat_now].reactivation_limit_exceeded,
1722                                       vm_pageout_stats[vm_pageout_stat_now].forced_inactive_reclaim,
1723                                       vm_pageout_stats[vm_pageout_stat_now].failed_compressions,
1724                                       vm_pageout_stats[vm_pageout_stat_now].freed_internal,
1725                                       0);
1726
1727                 KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO8)) | DBG_FUNC_NONE,
1728                                       vm_pageout_stats[vm_pageout_stat_now].considered_bq_internal,
1729                                       vm_pageout_stats[vm_pageout_stat_now].considered_bq_external,
1730                                       vm_pageout_stats[vm_pageout_stat_now].filecache_min_reactivations,
1731                                       vm_pageout_stats[vm_pageout_stat_now].cleaned_dirty_internal,
1732                                       0);
1733
1734         }
1735         KERNEL_DEBUG_CONSTANT((MACHDBG_CODE(DBG_MACH_VM, VM_INFO9)) | DBG_FUNC_NONE,
1736                               vm_pageout_stats[vm_pageout_stat_now].pages_grabbed,
1737                               vm_pageout_stats[vm_pageout_stat_now].pages_freed,
1738                               vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_found,
1739                               vm_pageout_stats[vm_pageout_stat_now].phantom_ghosts_added,
1740                               0);
1741
1742         record_memory_pressure();
1743 }
1744
1745 extern boolean_t hibernation_vmqueues_inspection;
1746
1747 void
1748 vm_page_balance_inactive(int max_to_move)
1749 {
1750         vm_page_t m;
1751
1752         LCK_MTX_ASSERT(&vm_page_queue_lock, LCK_MTX_ASSERT_OWNED);
1753
1754         if (hibernation_vmqueues_inspection == TRUE) {
1755                 /*
1756                  * It is likely that the hibernation code path is
1757                  * dealing with these very queues as we are about
1758                  * to move pages around in/from them and completely
1759                  * change the linkage of the pages.
1760                  *
1761                  * And so we skip the rebalancing of these queues.
1762                  */
1763                 return;
1764         }
1765         vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
1766                                                           vm_page_inactive_count +
1767                                                           vm_page_speculative_count);
1768
1769         while (max_to_move-- && (vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) {
1770
1771                 VM_PAGEOUT_DEBUG(vm_pageout_balanced, 1);
1772
1773                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
1774
1775                 assert(m->vmp_q_state == VM_PAGE_ON_ACTIVE_Q);
1776                 assert(!m->vmp_laundry);
1777                 assert(VM_PAGE_OBJECT(m) != kernel_object);
1778                 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
1779
1780                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
1781
1782                 /*
1783                  * by not passing in a pmap_flush_context we will forgo any TLB flushing, local or otherwise...
1784                  *
1785                  * a TLB flush isn't really needed here since at worst we'll miss the reference bit being
1786                  * updated in the PTE if a remote processor still has this mapping cached in its TLB when the
1787                  * new reference happens. If no futher references happen on the page after that remote TLB flushes
1788                  * we'll see a clean, non-referenced page when it eventually gets pulled out of the inactive queue
1789                  * by pageout_scan, which is just fine since the last reference would have happened quite far
1790                  * in the past (TLB caches don't hang around for very long), and of course could just as easily
1791                  * have happened before we moved the page
1792                  */
1793                 if (m->vmp_pmapped == TRUE)
1794                         pmap_clear_refmod_options(VM_PAGE_GET_PHYS_PAGE(m), VM_MEM_REFERENCED, PMAP_OPTIONS_NOFLUSH, (void *)NULL);
1795
1796                 /*
1797                  * The page might be absent or busy,
1798                  * but vm_page_deactivate can handle that.
1799                  * FALSE indicates that we don't want a H/W clear reference
1800                  */
1801                 vm_page_deactivate_internal(m, FALSE);
1802         }
1803 }
1804
1805
1806 /*
1807  *      vm_pageout_scan does the dirty work for the pageout daemon.
1808  *      It returns with both vm_page_queue_free_lock and vm_page_queue_lock
1809  *      held and vm_page_free_wanted == 0.
1810  */
1811 void
1812 vm_pageout_scan(void)
1813 {
1814         unsigned int loop_count = 0;
1815         unsigned int inactive_burst_count = 0;
1816         unsigned int reactivated_this_call;
1817         unsigned int reactivate_limit;
1818         vm_page_t   local_freeq = NULL;
1819         int         local_freed = 0;
1820         int         delayed_unlock;
1821         int         delayed_unlock_limit = 0;
1822         int         refmod_state = 0;
1823         int     vm_pageout_deadlock_target = 0;
1824         struct  vm_pageout_queue *iq;
1825         struct  vm_pageout_queue *eq;
1826         struct  vm_speculative_age_q *sq;
1827         struct  flow_control    flow_control = { 0, { 0, 0 } };
1828         boolean_t inactive_throttled = FALSE;
1829         mach_timespec_t ts;
1830         unsigned        int msecs = 0;
1831         vm_object_t     object = NULL;
1832         uint32_t        inactive_reclaim_run;
1833         boolean_t       exceeded_burst_throttle;
1834         boolean_t       grab_anonymous = FALSE;
1835         boolean_t       force_anonymous = FALSE;
1836         boolean_t       force_speculative_aging = FALSE;
1837         int             anons_grabbed = 0;
1838         int             page_prev_q_state = 0;
1839 #if CONFIG_BACKGROUND_QUEUE
1840         boolean_t       page_from_bg_q = FALSE;
1841 #endif
1842         int             cache_evict_throttle = 0;
1843         uint32_t        vm_pageout_inactive_external_forced_reactivate_limit = 0;
1844         uint32_t        inactive_external_count;
1845         int             force_purge = 0;
1846         int             divisor;
1847 #define DELAY_SPECULATIVE_AGE   1000
1848         int             delay_speculative_age = 0;
1849         vm_object_t     m_object = VM_OBJECT_NULL;
1850
1851 #if VM_PRESSURE_EVENTS
1852         vm_pressure_level_t pressure_level;
1853 #endif /* VM_PRESSURE_EVENTS */
1854
1855         VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_START,
1856                                 vm_pageout_vminfo.vm_pageout_freed_speculative,
1857                                 vm_pageout_state.vm_pageout_inactive_clean,
1858                                 vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
1859                                 vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
1860
1861         flow_control.state = FCS_IDLE;
1862         iq = &vm_pageout_queue_internal;
1863         eq = &vm_pageout_queue_external;
1864         sq = &vm_page_queue_speculative[VM_PAGE_SPECULATIVE_AGED_Q];
1865
1866
1867         XPR(XPR_VM_PAGEOUT, "vm_pageout_scan\n", 0, 0, 0, 0, 0);
1868
1869         /* Ask the pmap layer to return any pages it no longer needs. */
1870         uint64_t pmap_wired_pages_freed = pmap_release_pages_fast();
1871
1872         vm_page_lock_queues();
1873
1874         vm_page_wire_count -= pmap_wired_pages_freed;
1875
1876         delayed_unlock = 1;
1877
1878         /*
1879          *      Calculate the max number of referenced pages on the inactive
1880          *      queue that we will reactivate.
1881          */
1882         reactivated_this_call = 0;
1883         reactivate_limit = VM_PAGE_REACTIVATE_LIMIT(vm_page_active_count +
1884                                                     vm_page_inactive_count);
1885         inactive_reclaim_run = 0;
1886
1887         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
1888
1889         /*
1890          *      We must limit the rate at which we send pages to the pagers
1891          *      so that we don't tie up too many pages in the I/O queues.
1892          *      We implement a throttling mechanism using the laundry count
1893          *      to limit the number of pages outstanding to the default
1894          *      and external pagers.  We can bypass the throttles and look
1895          *      for clean pages if the pageout queues don't drain in a timely
1896          *      fashion since this may indicate that the pageout paths are
1897          *      stalled waiting for memory, which only we can provide.
1898          */
1899
1900 Restart:
1901
1902         assert(object == NULL);
1903         assert(delayed_unlock != 0);
1904
1905         vm_page_anonymous_min = vm_page_inactive_target / 20;
1906
1907         if (vm_pageout_state.vm_page_speculative_percentage > 50)
1908                 vm_pageout_state.vm_page_speculative_percentage = 50;
1909         else if (vm_pageout_state.vm_page_speculative_percentage <= 0)
1910                 vm_pageout_state.vm_page_speculative_percentage = 1;
1911
1912         vm_pageout_state.vm_page_speculative_target = VM_PAGE_SPECULATIVE_TARGET(vm_page_active_count +
1913                                                                                  vm_page_inactive_count);
1914
1915         for (;;) {
1916                 vm_page_t m;
1917
1918                 DTRACE_VM2(rev, int, 1, (uint64_t *), NULL);
1919
1920                 if (vm_upl_wait_for_pages < 0)
1921                         vm_upl_wait_for_pages = 0;
1922
1923                 delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT + vm_upl_wait_for_pages;
1924
1925                 if (delayed_unlock_limit > VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX)
1926                         delayed_unlock_limit = VM_PAGEOUT_DELAYED_UNLOCK_LIMIT_MAX;
1927
1928 #if CONFIG_SECLUDED_MEMORY
1929                 /*
1930                  * Deal with secluded_q overflow.
1931                  */
1932                 if (vm_page_secluded_count > vm_page_secluded_target) {
1933                         vm_page_t secluded_page;
1934
1935                         /*
1936                          * SECLUDED_AGING_BEFORE_ACTIVE:
1937                          * Excess secluded pages go to the active queue and
1938                          * will later go to the inactive queue.
1939                          */
1940                         assert((vm_page_secluded_count_free +
1941                                 vm_page_secluded_count_inuse) ==
1942                                 vm_page_secluded_count);
1943                         secluded_page = (vm_page_t)vm_page_queue_first(&vm_page_queue_secluded);
1944                         assert(secluded_page->vmp_q_state == VM_PAGE_ON_SECLUDED_Q);
1945
1946                         vm_page_queues_remove(secluded_page, FALSE);
1947                         assert(!secluded_page->vmp_fictitious);
1948                         assert(!VM_PAGE_WIRED(secluded_page));
1949
1950                         if (secluded_page->vmp_object == 0) {
1951                                 /* transfer to free queue */
1952                                 assert(secluded_page->vmp_busy);
1953                                 secluded_page->vmp_snext = local_freeq;
1954                                 local_freeq = secluded_page;
1955                                 local_freed++;
1956                         } else {
1957                                 /* transfer to head of active queue */
1958                                 vm_page_enqueue_active(secluded_page, FALSE);
1959                                 secluded_page = VM_PAGE_NULL;
1960                         }
1961                 }
1962 #endif /* CONFIG_SECLUDED_MEMORY */
1963
1964                 assert(delayed_unlock);
1965
1966                 /*
1967                  * maintain our balance
1968                  */
1969                 vm_page_balance_inactive(1);
1970
1971
1972                 /**********************************************************************
1973                  * above this point we're playing with the active and secluded queues
1974                  * below this point we're playing with the throttling mechanisms
1975                  * and the inactive queue
1976                  **********************************************************************/
1977
1978                 if (vm_page_free_count + local_freed >= vm_page_free_target)
1979                 {
1980                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
1981
1982                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
1983                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
1984                         /*
1985                          * make sure the pageout I/O threads are running
1986                          * throttled in case there are still requests
1987                          * in the laundry... since we have met our targets
1988                          * we don't need the laundry to be cleaned in a timely
1989                          * fashion... so let's avoid interfering with foreground
1990                          * activity
1991                          */
1992                         vm_pageout_adjust_eq_iothrottle(eq, TRUE);
1993
1994                         lck_mtx_lock(&vm_page_queue_free_lock);
1995
1996                         if ((vm_page_free_count >= vm_page_free_target) &&
1997                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
1998                                 /*
1999                                  * done - we have met our target *and*
2000                                  * there is no one waiting for a page.
2001                                  */
2002 return_from_scan:
2003                                 assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2004
2005                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_NONE,
2006                                                         vm_pageout_state.vm_pageout_inactive,
2007                                                         vm_pageout_state.vm_pageout_inactive_used, 0, 0);
2008                                 VM_DEBUG_CONSTANT_EVENT(vm_pageout_scan, VM_PAGEOUT_SCAN, DBG_FUNC_END,
2009                                                         vm_pageout_vminfo.vm_pageout_freed_speculative,
2010                                                         vm_pageout_state.vm_pageout_inactive_clean,
2011                                                         vm_pageout_vminfo.vm_pageout_inactive_dirty_internal,
2012                                                         vm_pageout_vminfo.vm_pageout_inactive_dirty_external);
2013
2014                                 return;
2015                         }
2016                         lck_mtx_unlock(&vm_page_queue_free_lock);
2017                 }
2018
2019                 /*
2020                  * Before anything, we check if we have any ripe volatile
2021                  * objects around. If so, try to purge the first object.
2022                  * If the purge fails, fall through to reclaim a page instead.
2023                  * If the purge succeeds, go back to the top and reevalute
2024                  * the new memory situation.
2025                  */
2026
2027                 assert (available_for_purge>=0);
2028                 force_purge = 0; /* no force-purging */
2029
2030 #if VM_PRESSURE_EVENTS
2031                 pressure_level = memorystatus_vm_pressure_level;
2032
2033                 if (pressure_level > kVMPressureNormal) {
2034
2035                         if (pressure_level >= kVMPressureCritical) {
2036                                 force_purge = vm_pageout_state.memorystatus_purge_on_critical;
2037                         } else if (pressure_level >= kVMPressureUrgent) {
2038                                 force_purge = vm_pageout_state.memorystatus_purge_on_urgent;
2039                         } else if (pressure_level >= kVMPressureWarning) {
2040                                 force_purge = vm_pageout_state.memorystatus_purge_on_warning;
2041                         }
2042                 }
2043 #endif /* VM_PRESSURE_EVENTS */
2044
2045                 if (available_for_purge || force_purge) {
2046
2047                         if (object != NULL) {
2048                                 vm_object_unlock(object);
2049                                 object = NULL;
2050                         }
2051
2052                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_START);
2053
2054                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_START, vm_page_free_count, 0, 0, 0);
2055                         if (vm_purgeable_object_purge_one(force_purge, C_DONT_BLOCK)) {
2056                                 VM_PAGEOUT_DEBUG(vm_pageout_purged_objects, 1);
2057                                 VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, vm_page_free_count, 0, 0, 0);
2058                                 memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2059                                 continue;
2060                         }
2061                         VM_DEBUG_EVENT(vm_pageout_purgeone, VM_PAGEOUT_PURGEONE, DBG_FUNC_END, 0, 0, 0, -1);
2062                         memoryshot(VM_PAGEOUT_PURGEONE, DBG_FUNC_END);
2063                 }
2064
2065                 if (vm_page_queue_empty(&sq->age_q) && vm_page_speculative_count) {
2066                         /*
2067                          * try to pull pages from the aging bins...
2068                          * see vm_page.h for an explanation of how
2069                          * this mechanism works
2070                          */
2071                         struct vm_speculative_age_q     *aq;
2072                         boolean_t       can_steal = FALSE;
2073                         int num_scanned_queues;
2074
2075                         aq = &vm_page_queue_speculative[speculative_steal_index];
2076
2077                         num_scanned_queues = 0;
2078                         while (vm_page_queue_empty(&aq->age_q) &&
2079                                num_scanned_queues++ != VM_PAGE_MAX_SPECULATIVE_AGE_Q) {
2080
2081                                 speculative_steal_index++;
2082
2083                                 if (speculative_steal_index > VM_PAGE_MAX_SPECULATIVE_AGE_Q)
2084                                         speculative_steal_index = VM_PAGE_MIN_SPECULATIVE_AGE_Q;
2085
2086                                 aq = &vm_page_queue_speculative[speculative_steal_index];
2087                         }
2088
2089                         if (num_scanned_queues == VM_PAGE_MAX_SPECULATIVE_AGE_Q + 1) {
2090                                 /*
2091                                  * XXX We've scanned all the speculative
2092                                  * queues but still haven't found one
2093                                  * that is not empty, even though
2094                                  * vm_page_speculative_count is not 0.
2095                                  */
2096                                 if (!vm_page_queue_empty(&sq->age_q))
2097                                         continue;
2098 #if DEVELOPMENT || DEBUG
2099                                 panic("vm_pageout_scan: vm_page_speculative_count=%d but queues are empty", vm_page_speculative_count);
2100 #endif
2101                                 /* readjust... */
2102                                 vm_page_speculative_count = 0;
2103                                 /* ... and continue */
2104                                 continue;
2105                         }
2106
2107                         if (vm_page_speculative_count > vm_pageout_state.vm_page_speculative_target || force_speculative_aging == TRUE)
2108                                 can_steal = TRUE;
2109                         else {
2110                                 if (!delay_speculative_age) {
2111                                         mach_timespec_t ts_fully_aged;
2112
2113                                         ts_fully_aged.tv_sec = (VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) / 1000;
2114                                         ts_fully_aged.tv_nsec = ((VM_PAGE_MAX_SPECULATIVE_AGE_Q * vm_pageout_state.vm_page_speculative_q_age_ms) % 1000)
2115                                                 * 1000 * NSEC_PER_USEC;
2116
2117                                         ADD_MACH_TIMESPEC(&ts_fully_aged, &aq->age_ts);
2118
2119                                         clock_sec_t sec;
2120                                         clock_nsec_t nsec;
2121                                         clock_get_system_nanotime(&sec, &nsec);
2122                                         ts.tv_sec = (unsigned int) sec;
2123                                         ts.tv_nsec = nsec;
2124
2125                                         if (CMP_MACH_TIMESPEC(&ts, &ts_fully_aged) >= 0)
2126                                                 can_steal = TRUE;
2127                                         else
2128                                                 delay_speculative_age++;
2129                                 } else {
2130                                         delay_speculative_age++;
2131                                         if (delay_speculative_age == DELAY_SPECULATIVE_AGE)
2132                                                 delay_speculative_age = 0;
2133                                 }
2134                         }
2135                         if (can_steal == TRUE)
2136                                 vm_page_speculate_ageit(aq);
2137                 }
2138                 force_speculative_aging = FALSE;
2139
2140                 if (vm_page_queue_empty(&sq->age_q) && cache_evict_throttle == 0) {
2141
2142                         int     pages_evicted;
2143
2144                         if (object != NULL) {
2145                                 vm_object_unlock(object);
2146                                 object = NULL;
2147                         }
2148                         KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_START, 0, 0, 0, 0, 0);
2149
2150                         pages_evicted = vm_object_cache_evict(100, 10);
2151
2152                         KERNEL_DEBUG_CONSTANT(0x13001ec | DBG_FUNC_END, pages_evicted, 0, 0, 0, 0);
2153
2154                         if (pages_evicted) {
2155
2156                                 vm_pageout_vminfo.vm_pageout_pages_evicted += pages_evicted;
2157
2158                                 VM_DEBUG_EVENT(vm_pageout_cache_evict, VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE,
2159                                                vm_page_free_count, pages_evicted, vm_pageout_vminfo.vm_pageout_pages_evicted, 0);
2160                                 memoryshot(VM_PAGEOUT_CACHE_EVICT, DBG_FUNC_NONE);
2161
2162                                 /*
2163                                  * we just freed up to 100 pages,
2164                                  * so go back to the top of the main loop
2165                                  * and re-evaulate the memory situation
2166                                  */
2167                                 continue;
2168                         } else
2169                                 cache_evict_throttle = 1000;
2170                 }
2171                 if  (cache_evict_throttle)
2172                         cache_evict_throttle--;
2173
2174                 divisor = vm_pageout_state.vm_page_filecache_min_divisor;
2175
2176 #if CONFIG_JETSAM
2177                 /*
2178                  * don't let the filecache_min fall below 15% of available memory
2179                  * on systems with an active compressor that isn't nearing its
2180                  * limits w/r to accepting new data
2181                  *
2182                  * on systems w/o the compressor/swapper, the filecache is always
2183                  * a very large percentage of the AVAILABLE_NON_COMPRESSED_MEMORY
2184                  * since most (if not all) of the anonymous pages are in the
2185                  * throttled queue (which isn't counted as available) which
2186                  * effectively disables this filter
2187                  */
2188                 if (vm_compressor_low_on_space() || divisor == 0)
2189                         vm_pageout_state.vm_page_filecache_min = 0;
2190                 else
2191                         vm_pageout_state.vm_page_filecache_min =
2192                           ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2193 #else
2194                 if (vm_compressor_out_of_space() || divisor == 0)
2195                         vm_pageout_state.vm_page_filecache_min = 0;
2196                 else {
2197                         /*
2198                          * don't let the filecache_min fall below the specified critical level
2199                          */
2200                         vm_pageout_state.vm_page_filecache_min =
2201                           ((AVAILABLE_NON_COMPRESSED_MEMORY) * 10) / divisor;
2202                 }
2203 #endif
2204                 if (vm_page_free_count < (vm_page_free_reserved / 4))
2205                         vm_pageout_state.vm_page_filecache_min = 0;
2206
2207                 exceeded_burst_throttle = FALSE;
2208                 /*
2209                  * Sometimes we have to pause:
2210                  *      1) No inactive pages - nothing to do.
2211                  *      2) Loop control - no acceptable pages found on the inactive queue
2212                  *         within the last vm_pageout_burst_inactive_throttle iterations
2213                  *      3) Flow control - default pageout queue is full
2214                  */
2215                 if (vm_page_queue_empty(&vm_page_queue_inactive) &&
2216                     vm_page_queue_empty(&vm_page_queue_anonymous) &&
2217                     vm_page_queue_empty(&vm_page_queue_cleaned) &&
2218                     vm_page_queue_empty(&sq->age_q)) {
2219                         VM_PAGEOUT_DEBUG(vm_pageout_scan_empty_throttle, 1);
2220                         msecs = vm_pageout_state.vm_pageout_empty_wait;
2221                         goto vm_pageout_scan_delay;
2222
2223                 } else if (inactive_burst_count >=
2224                            MIN(vm_pageout_state.vm_pageout_burst_inactive_throttle,
2225                                (vm_page_inactive_count +
2226                                 vm_page_speculative_count))) {
2227                         VM_PAGEOUT_DEBUG(vm_pageout_scan_burst_throttle, 1);
2228                         msecs = vm_pageout_state.vm_pageout_burst_wait;
2229
2230                         exceeded_burst_throttle = TRUE;
2231                         goto vm_pageout_scan_delay;
2232
2233                 } else if (VM_PAGE_Q_THROTTLED(iq) &&
2234                                   VM_DYNAMIC_PAGING_ENABLED()) {
2235                         clock_sec_t sec;
2236                         clock_nsec_t nsec;
2237
2238                         switch (flow_control.state) {
2239
2240                         case FCS_IDLE:
2241                                 if ((vm_page_free_count + local_freed) < vm_page_free_target &&
2242                                     vm_pageout_state.vm_restricted_to_single_processor == FALSE) {
2243                                         /*
2244                                          * since the compressor is running independently of vm_pageout_scan
2245                                          * let's not wait for it just yet... as long as we have a healthy supply
2246                                          * of filecache pages to work with, let's keep stealing those.
2247                                          */
2248                                         inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2249
2250                                         if (vm_page_pageable_external_count > vm_pageout_state.vm_page_filecache_min &&
2251                                             (inactive_external_count >= VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2252                                                 anons_grabbed = ANONS_GRABBED_LIMIT;
2253                                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle_deferred, 1);
2254                                                 goto consider_inactive;
2255                                         }
2256                                 }
2257 reset_deadlock_timer:
2258                                 ts.tv_sec = vm_pageout_state.vm_pageout_deadlock_wait / 1000;
2259                                 ts.tv_nsec = (vm_pageout_state.vm_pageout_deadlock_wait % 1000) * 1000 * NSEC_PER_USEC;
2260                                 clock_get_system_nanotime(&sec, &nsec);
2261                                 flow_control.ts.tv_sec = (unsigned int) sec;
2262                                 flow_control.ts.tv_nsec = nsec;
2263                                 ADD_MACH_TIMESPEC(&flow_control.ts, &ts);
2264
2265                                 flow_control.state = FCS_DELAYED;
2266                                 msecs = vm_pageout_state.vm_pageout_deadlock_wait;
2267
2268                                 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_internal++;
2269                                 break;
2270
2271                         case FCS_DELAYED:
2272                                 clock_get_system_nanotime(&sec, &nsec);
2273                                 ts.tv_sec = (unsigned int) sec;
2274                                 ts.tv_nsec = nsec;
2275
2276                                 if (CMP_MACH_TIMESPEC(&ts, &flow_control.ts) >= 0) {
2277                                         /*
2278                                          * the pageout thread for the default pager is potentially
2279                                          * deadlocked since the
2280                                          * default pager queue has been throttled for more than the
2281                                          * allowable time... we need to move some clean pages or dirty
2282                                          * pages belonging to the external pagers if they aren't throttled
2283                                          * vm_page_free_wanted represents the number of threads currently
2284                                          * blocked waiting for pages... we'll move one page for each of
2285                                          * these plus a fixed amount to break the logjam... once we're done
2286                                          * moving this number of pages, we'll re-enter the FSC_DELAYED state
2287                                          * with a new timeout target since we have no way of knowing
2288                                          * whether we've broken the deadlock except through observation
2289                                          * of the queue associated with the default pager... we need to
2290                                          * stop moving pages and allow the system to run to see what
2291                                          * state it settles into.
2292                                          */
2293                                         vm_pageout_deadlock_target = vm_pageout_state.vm_pageout_deadlock_relief +
2294                                                                      vm_page_free_wanted + vm_page_free_wanted_privileged;
2295                                         VM_PAGEOUT_DEBUG(vm_pageout_scan_deadlock_detected, 1);
2296                                         flow_control.state = FCS_DEADLOCK_DETECTED;
2297                                         thread_wakeup((event_t) &vm_pageout_garbage_collect);
2298                                         goto consider_inactive;
2299                                 }
2300                                 /*
2301                                  * just resniff instead of trying
2302                                  * to compute a new delay time... we're going to be
2303                                  * awakened immediately upon a laundry completion,
2304                                  * so we won't wait any longer than necessary
2305                                  */
2306                                 msecs = vm_pageout_state.vm_pageout_idle_wait;
2307                                 break;
2308
2309                         case FCS_DEADLOCK_DETECTED:
2310                                 if (vm_pageout_deadlock_target)
2311                                         goto consider_inactive;
2312                                 goto reset_deadlock_timer;
2313
2314                         }
2315 vm_pageout_scan_delay:
2316                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2317
2318                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
2319                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
2320
2321                         if (vm_page_free_count >= vm_page_free_target) {
2322                                 /*
2323                                  * we're here because
2324                                  *  1) someone else freed up some pages while we had
2325                                  *     the queues unlocked above
2326                                  * and we've hit one of the 3 conditions that
2327                                  * cause us to pause the pageout scan thread
2328                                  *
2329                                  * since we already have enough free pages,
2330                                  * let's avoid stalling and return normally
2331                                  *
2332                                  * before we return, make sure the pageout I/O threads
2333                                  * are running throttled in case there are still requests
2334                                  * in the laundry... since we have enough free pages
2335                                  * we don't need the laundry to be cleaned in a timely
2336                                  * fashion... so let's avoid interfering with foreground
2337                                  * activity
2338                                  *
2339                                  * we don't want to hold vm_page_queue_free_lock when
2340                                  * calling vm_pageout_adjust_eq_iothrottle (since it
2341                                  * may cause other locks to be taken), we do the intitial
2342                                  * check outside of the lock.  Once we take the lock,
2343                                  * we recheck the condition since it may have changed.
2344                                  * if it has, no problem, we will make the threads
2345                                  * non-throttled before actually blocking
2346                                  */
2347                                 vm_pageout_adjust_eq_iothrottle(eq, TRUE);
2348                         }
2349                         lck_mtx_lock(&vm_page_queue_free_lock);
2350
2351                         if (vm_page_free_count >= vm_page_free_target &&
2352                             (vm_page_free_wanted == 0) && (vm_page_free_wanted_privileged == 0)) {
2353                                 goto return_from_scan;
2354                         }
2355                         lck_mtx_unlock(&vm_page_queue_free_lock);
2356
2357                         if ((vm_page_free_count + vm_page_cleaned_count) < vm_page_free_target) {
2358                                 /*
2359                                  * we're most likely about to block due to one of
2360                                  * the 3 conditions that cause vm_pageout_scan to
2361                                  * not be able to make forward progress w/r
2362                                  * to providing new pages to the free queue,
2363                                  * so unthrottle the I/O threads in case we
2364                                  * have laundry to be cleaned... it needs
2365                                  * to be completed ASAP.
2366                                  *
2367                                  * even if we don't block, we want the io threads
2368                                  * running unthrottled since the sum of free +
2369                                  * clean pages is still under our free target
2370                                  */
2371                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
2372                         }
2373                         if (vm_page_cleaned_count > 0 && exceeded_burst_throttle == FALSE) {
2374                                 /*
2375                                  * if we get here we're below our free target and
2376                                  * we're stalling due to a full laundry queue or
2377                                  * we don't have any inactive pages other then
2378                                  * those in the clean queue...
2379                                  * however, we have pages on the clean queue that
2380                                  * can be moved to the free queue, so let's not
2381                                  * stall the pageout scan
2382                                  */
2383                                 flow_control.state = FCS_IDLE;
2384                                 goto consider_inactive;
2385                         }
2386                         if (flow_control.state == FCS_DELAYED && !VM_PAGE_Q_THROTTLED(iq)) {
2387                                 flow_control.state = FCS_IDLE;
2388                                 goto consider_inactive;
2389                         }
2390
2391                         VM_CHECK_MEMORYSTATUS;
2392
2393                         if (flow_control.state != FCS_IDLE)
2394                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_throttle, 1);
2395
2396                         iq->pgo_throttled = TRUE;
2397                         assert_wait_timeout((event_t) &iq->pgo_laundry, THREAD_INTERRUPTIBLE, msecs, 1000*NSEC_PER_USEC);
2398
2399                         counter(c_vm_pageout_scan_block++);
2400
2401                         vm_page_unlock_queues();
2402
2403                         assert(vm_pageout_scan_wants_object == VM_OBJECT_NULL);
2404
2405                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START,
2406                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2407                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_START);
2408
2409                         thread_block(THREAD_CONTINUE_NULL);
2410
2411                         VM_DEBUG_EVENT(vm_pageout_thread_block, VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END,
2412                                        iq->pgo_laundry, iq->pgo_maxlaundry, msecs, 0);
2413                         memoryshot(VM_PAGEOUT_THREAD_BLOCK, DBG_FUNC_END);
2414
2415                         vm_page_lock_queues();
2416
2417                         iq->pgo_throttled = FALSE;
2418
2419                         if (loop_count >= vm_page_inactive_count)
2420                                 loop_count = 0;
2421                         inactive_burst_count = 0;
2422
2423                         goto Restart;
2424                         /*NOTREACHED*/
2425                 }
2426
2427
2428                 flow_control.state = FCS_IDLE;
2429 consider_inactive:
2430                 vm_pageout_inactive_external_forced_reactivate_limit = MIN((vm_page_active_count + vm_page_inactive_count),
2431                                                                             vm_pageout_inactive_external_forced_reactivate_limit);
2432                 loop_count++;
2433                 inactive_burst_count++;
2434                 vm_pageout_state.vm_pageout_inactive++;
2435
2436                 /*
2437                  * Choose a victim.
2438                  */
2439                 while (1) {
2440
2441 #if CONFIG_BACKGROUND_QUEUE
2442                         page_from_bg_q = FALSE;
2443 #endif /* CONFIG_BACKGROUND_QUEUE */
2444
2445                         m = NULL;
2446                         m_object = VM_OBJECT_NULL;
2447
2448                         if (VM_DYNAMIC_PAGING_ENABLED()) {
2449                                 assert(vm_page_throttled_count == 0);
2450                                 assert(vm_page_queue_empty(&vm_page_queue_throttled));
2451                         }
2452
2453                         /*
2454                          * Try for a clean-queue inactive page.
2455                          * These are pages that vm_pageout_scan tried to steal earlier, but
2456                          * were dirty and had to be cleaned.  Pick them up now that they are clean.
2457                          */
2458                         if (!vm_page_queue_empty(&vm_page_queue_cleaned)) {
2459                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2460
2461                                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q);
2462
2463                                 break;
2464                         }
2465
2466                         /*
2467                          * The next most eligible pages are ones we paged in speculatively,
2468                          * but which have not yet been touched and have been aged out.
2469                          */
2470                         if (!vm_page_queue_empty(&sq->age_q)) {
2471                                 m = (vm_page_t) vm_page_queue_first(&sq->age_q);
2472
2473                                 assert(m->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q);
2474
2475                                 if (!m->vmp_dirty || force_anonymous == FALSE)
2476                                         break;
2477                                 else
2478                                         m = NULL;
2479                         }
2480
2481 #if CONFIG_BACKGROUND_QUEUE
2482                         if (vm_page_background_mode != VM_PAGE_BG_DISABLED && (vm_page_background_count > vm_page_background_target)) {
2483                                 vm_object_t     bg_m_object = NULL;
2484
2485                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_background);
2486
2487                                 bg_m_object = VM_PAGE_OBJECT(m);
2488
2489                                 if (!VM_PAGE_PAGEABLE(m)) {
2490                                         /*
2491                                          * This page is on the background queue
2492                                          * but not on a pageable queue.  This is
2493                                          * likely a transient state and whoever
2494                                          * took it out of its pageable queue
2495                                          * will likely put it back on a pageable
2496                                          * queue soon but we can't deal with it
2497                                          * at this point, so let's ignore this
2498                                          * page.
2499                                          */
2500                                 } else if (force_anonymous == FALSE || bg_m_object->internal) {
2501
2502                                         if (bg_m_object->internal &&
2503                                             (VM_PAGE_Q_THROTTLED(iq) ||
2504                                              vm_compressor_out_of_space() == TRUE ||
2505                                              vm_page_free_count < (vm_page_free_reserved / 4))) {
2506
2507                                                 vm_pageout_skipped_bq_internal++;
2508                                         } else {
2509                                                 page_from_bg_q = TRUE;
2510
2511                                                 if (bg_m_object->internal)
2512                                                         vm_pageout_vminfo.vm_pageout_considered_bq_internal++;
2513                                                 else
2514                                                         vm_pageout_vminfo.vm_pageout_considered_bq_external++;
2515                                                 break;
2516                                         }
2517                                 }
2518                         }
2519 #endif
2520                         inactive_external_count = vm_page_inactive_count - vm_page_anonymous_count;
2521
2522                         if ((vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min || force_anonymous == TRUE) ||
2523                             (inactive_external_count < VM_PAGE_INACTIVE_TARGET(vm_page_pageable_external_count))) {
2524                                 grab_anonymous = TRUE;
2525                                 anons_grabbed = 0;
2526
2527                                 vm_pageout_vminfo.vm_pageout_skipped_external++;
2528                                 goto want_anonymous;
2529                         }
2530                         grab_anonymous = (vm_page_anonymous_count > vm_page_anonymous_min);
2531
2532 #if CONFIG_JETSAM
2533                         /* If the file-backed pool has accumulated
2534                          * significantly more pages than the jetsam
2535                          * threshold, prefer to reclaim those
2536                          * inline to minimise compute overhead of reclaiming
2537                          * anonymous pages.
2538                          * This calculation does not account for the CPU local
2539                          * external page queues, as those are expected to be
2540                          * much smaller relative to the global pools.
2541                          */
2542                         if (grab_anonymous == TRUE && !VM_PAGE_Q_THROTTLED(eq)) {
2543                                 if (vm_page_pageable_external_count >
2544                                     vm_pageout_state.vm_page_filecache_min) {
2545                                         if ((vm_page_pageable_external_count *
2546                                                 vm_pageout_memorystatus_fb_factor_dr) >
2547                                             (memorystatus_available_pages_critical *
2548                                             vm_pageout_memorystatus_fb_factor_nr)) {
2549                                                 grab_anonymous = FALSE;
2550
2551                                                 VM_PAGEOUT_DEBUG(vm_grab_anon_overrides, 1);
2552                                         }
2553                                 }
2554                                 if (grab_anonymous) {
2555                                         VM_PAGEOUT_DEBUG(vm_grab_anon_nops, 1);
2556                                 }
2557                         }
2558 #endif /* CONFIG_JETSAM */
2559
2560 want_anonymous:
2561                         if (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT || vm_page_queue_empty(&vm_page_queue_anonymous)) {
2562
2563                                 if ( !vm_page_queue_empty(&vm_page_queue_inactive) ) {
2564                                         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2565
2566                                         assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_EXTERNAL_Q);
2567                                         anons_grabbed = 0;
2568
2569                                         if (vm_page_pageable_external_count < vm_pageout_state.vm_page_filecache_min) {
2570
2571                                               if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2572                                                     if ((++reactivated_this_call % 100)) {
2573                                                           vm_pageout_vminfo.vm_pageout_filecache_min_reactivated++;
2574                                                           goto must_activate_page;
2575                                                     }
2576                                                     /*
2577                                                      * steal 1% of the file backed pages even if
2578                                                      * we are under the limit that has been set
2579                                                      * for a healthy filecache
2580                                                      */
2581                                               }
2582                                         }
2583                                         break;
2584                                 }
2585                         }
2586                         if ( !vm_page_queue_empty(&vm_page_queue_anonymous) ) {
2587                                 m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2588
2589                                 assert(m->vmp_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q);
2590                                 anons_grabbed++;
2591
2592                                 break;
2593                         }
2594
2595                         /*
2596                          * if we've gotten here, we have no victim page.
2597                          * check to see if we've not finished balancing the queues
2598                          * or we have a page on the aged speculative queue that we
2599                          * skipped due to force_anonymous == TRUE.. or we have
2600                          * speculative  pages that we can prematurely age... if
2601                          * one of these cases we'll keep going, else panic
2602                          */
2603                         force_anonymous = FALSE;
2604                         VM_PAGEOUT_DEBUG(vm_pageout_no_victim, 1);
2605
2606                         if (!vm_page_queue_empty(&sq->age_q))
2607                                 goto done_with_inactivepage;
2608
2609                         if (vm_page_speculative_count) {
2610                                 force_speculative_aging = TRUE;
2611                                 goto done_with_inactivepage;
2612                         }
2613                         panic("vm_pageout: no victim");
2614
2615                         /* NOTREACHED */
2616                 }
2617                 assert(VM_PAGE_PAGEABLE(m));
2618                 m_object = VM_PAGE_OBJECT(m);
2619                 force_anonymous = FALSE;
2620
2621                 page_prev_q_state = m->vmp_q_state;
2622                 /*
2623                  * we just found this page on one of our queues...
2624                  * it can't also be on the pageout queue, so safe
2625                  * to call vm_page_queues_remove
2626                  */
2627                 vm_page_queues_remove(m, TRUE);
2628
2629                 assert(!m->vmp_laundry);
2630                 assert(!m->vmp_private);
2631                 assert(!m->vmp_fictitious);
2632                 assert(m_object != kernel_object);
2633                 assert(VM_PAGE_GET_PHYS_PAGE(m) != vm_page_guard_addr);
2634
2635                 vm_pageout_vminfo.vm_pageout_considered_page++;
2636
2637                 DTRACE_VM2(scan, int, 1, (uint64_t *), NULL);
2638
2639                 /*
2640                  * check to see if we currently are working
2641                  * with the same object... if so, we've
2642                  * already got the lock
2643                  */
2644                 if (m_object != object) {
2645                         /*
2646                          * the object associated with candidate page is
2647                          * different from the one we were just working
2648                          * with... dump the lock if we still own it
2649                          */
2650                         if (object != NULL) {
2651                                 vm_object_unlock(object);
2652                                 object = NULL;
2653                         }
2654                         /*
2655                          * Try to lock object; since we've alread got the
2656                          * page queues lock, we can only 'try' for this one.
2657                          * if the 'try' fails, we need to do a mutex_pause
2658                          * to allow the owner of the object lock a chance to
2659                          * run... otherwise, we're likely to trip over this
2660                          * object in the same state as we work our way through
2661                          * the queue... clumps of pages associated with the same
2662                          * object are fairly typical on the inactive and active queues
2663                          */
2664                         if (!vm_object_lock_try_scan(m_object)) {
2665                                 vm_page_t m_want = NULL;
2666
2667                                 vm_pageout_vminfo.vm_pageout_inactive_nolock++;
2668
2669                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2670                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_nolock, 1);
2671
2672                                 pmap_clear_reference(VM_PAGE_GET_PHYS_PAGE(m));
2673
2674                                 m->vmp_reference = FALSE;
2675
2676                                 if ( !m_object->object_is_shared_cache) {
2677                                         /*
2678                                          * don't apply this optimization if this is the shared cache
2679                                          * object, it's too easy to get rid of very hot and important
2680                                          * pages...
2681                                          * m->vmp_object must be stable since we hold the page queues lock...
2682                                          * we can update the scan_collisions field sans the object lock
2683                                          * since it is a separate field and this is the only spot that does
2684                                          * a read-modify-write operation and it is never executed concurrently...
2685                                          * we can asynchronously set this field to 0 when creating a UPL, so it
2686                                          * is possible for the value to be a bit non-determistic, but that's ok
2687                                          * since it's only used as a hint
2688                                          */
2689                                         m_object->scan_collisions = 1;
2690                                 }
2691                                 if ( !vm_page_queue_empty(&vm_page_queue_cleaned))
2692                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_cleaned);
2693                                 else if ( !vm_page_queue_empty(&sq->age_q))
2694                                         m_want = (vm_page_t) vm_page_queue_first(&sq->age_q);
2695                                 else if ( (grab_anonymous == FALSE || anons_grabbed >= ANONS_GRABBED_LIMIT ||
2696                                            vm_page_queue_empty(&vm_page_queue_anonymous)) &&
2697                                           !vm_page_queue_empty(&vm_page_queue_inactive))
2698                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
2699                                 else if ( !vm_page_queue_empty(&vm_page_queue_anonymous))
2700                                         m_want = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
2701
2702                                 /*
2703                                  * this is the next object we're going to be interested in
2704                                  * try to make sure its available after the mutex_pause
2705                                  * returns control
2706                                  */
2707                                 if (m_want)
2708                                         vm_pageout_scan_wants_object = VM_PAGE_OBJECT(m_want);
2709
2710                                 goto requeue_page;
2711                         }
2712                         object = m_object;
2713                         vm_pageout_scan_wants_object = VM_OBJECT_NULL;
2714                 }
2715                 assert(m_object == object);
2716                 assert(VM_PAGE_OBJECT(m) == m_object);
2717
2718                 if (m->vmp_busy) {
2719                         /*
2720                          *      Somebody is already playing with this page.
2721                          *      Put it back on the appropriate queue
2722                          *
2723                          */
2724                         VM_PAGEOUT_DEBUG(vm_pageout_inactive_busy, 1);
2725
2726                         if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2727                                 VM_PAGEOUT_DEBUG(vm_pageout_cleaned_busy, 1);
2728 requeue_page:
2729                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2730                                 vm_page_enqueue_inactive(m, FALSE);
2731                         else
2732                                 vm_page_activate(m);
2733 #if CONFIG_BACKGROUND_QUEUE
2734 #if DEVELOPMENT || DEBUG
2735                         if (page_from_bg_q == TRUE) {
2736                                 if (m_object->internal)
2737                                         vm_pageout_rejected_bq_internal++;
2738                                 else
2739                                         vm_pageout_rejected_bq_external++;
2740                         }
2741 #endif
2742 #endif
2743                         goto done_with_inactivepage;
2744                 }
2745
2746                 /*
2747                  *   if (m->vmp_cleaning && !m->vmp_free_when_done)
2748                  *      If already cleaning this page in place
2749                  *      just leave if off the paging queues.
2750                  *      We can leave the page mapped, and upl_commit_range
2751                  *      will put it on the clean queue.
2752                  *
2753                  *   if (m->vmp_free_when_done && !m->vmp_cleaning)
2754                  *      an msync INVALIDATE is in progress...
2755                  *      this page has been marked for destruction
2756                  *      after it has been cleaned,
2757                  *      but not yet gathered into a UPL
2758                  *      where 'cleaning' will be set...
2759                  *      just leave it off the paging queues
2760                  *
2761                  *   if (m->vmp_free_when_done && m->vmp_clenaing)
2762                  *      an msync INVALIDATE is in progress
2763                  *      and the UPL has already gathered this page...
2764                  *      just leave it off the paging queues
2765                  */
2766                 if (m->vmp_free_when_done || m->vmp_cleaning) {
2767                         goto done_with_inactivepage;
2768                 }
2769
2770
2771                 /*
2772                  *      If it's absent, in error or the object is no longer alive,
2773                  *      we can reclaim the page... in the no longer alive case,
2774                  *      there are 2 states the page can be in that preclude us
2775                  *      from reclaiming it - busy or cleaning - that we've already
2776                  *      dealt with
2777                  */
2778                 if (m->vmp_absent || m->vmp_error || !object->alive) {
2779
2780                         if (m->vmp_absent)
2781                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_absent, 1);
2782                         else if (!object->alive)
2783                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_notalive, 1);
2784                         else
2785                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_error, 1);
2786 reclaim_page:
2787                         if (vm_pageout_deadlock_target) {
2788                                 VM_PAGEOUT_DEBUG(vm_pageout_scan_inactive_throttle_success, 1);
2789                                 vm_pageout_deadlock_target--;
2790                         }
2791
2792                         DTRACE_VM2(dfree, int, 1, (uint64_t *), NULL);
2793
2794                         if (object->internal) {
2795                                 DTRACE_VM2(anonfree, int, 1, (uint64_t *), NULL);
2796                         } else {
2797                                 DTRACE_VM2(fsfree, int, 1, (uint64_t *), NULL);
2798                         }
2799                         assert(!m->vmp_cleaning);
2800                         assert(!m->vmp_laundry);
2801
2802                         if (!object->internal &&
2803                             object->pager != NULL &&
2804                             object->pager->mo_pager_ops == &shared_region_pager_ops) {
2805                                 shared_region_pager_reclaimed++;
2806                         }
2807
2808                         m->vmp_busy = TRUE;
2809
2810                         /*
2811                          * remove page from object here since we're already
2812                          * behind the object lock... defer the rest of the work
2813                          * we'd normally do in vm_page_free_prepare_object
2814                          * until 'vm_page_free_list' is called
2815                          */
2816                         if (m->vmp_tabled)
2817                                 vm_page_remove(m, TRUE);
2818
2819                         assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
2820                         m->vmp_snext = local_freeq;
2821                         local_freeq = m;
2822                         local_freed++;
2823
2824                         if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
2825                                 vm_pageout_vminfo.vm_pageout_freed_speculative++;
2826                         else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2827                                 vm_pageout_vminfo.vm_pageout_freed_cleaned++;
2828                         else if (page_prev_q_state == VM_PAGE_ON_INACTIVE_INTERNAL_Q)
2829                                 vm_pageout_vminfo.vm_pageout_freed_internal++;
2830                         else
2831                                 vm_pageout_vminfo.vm_pageout_freed_external++;
2832
2833                         inactive_burst_count = 0;
2834                         goto done_with_inactivepage;
2835                 }
2836                 if (object->copy == VM_OBJECT_NULL) {
2837                         /*
2838                          * No one else can have any interest in this page.
2839                          * If this is an empty purgable object, the page can be
2840                          * reclaimed even if dirty.
2841                          * If the page belongs to a volatile purgable object, we
2842                          * reactivate it if the compressor isn't active.
2843                          */
2844                         if (object->purgable == VM_PURGABLE_EMPTY) {
2845                                 if (m->vmp_pmapped == TRUE) {
2846                                         /* unmap the page */
2847                                         refmod_state = pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
2848                                         if (refmod_state & VM_MEM_MODIFIED) {
2849                                                 SET_PAGE_DIRTY(m, FALSE);
2850                                         }
2851                                 }
2852                                 if (m->vmp_dirty || m->vmp_precious) {
2853                                         /* we saved the cost of cleaning this page ! */
2854                                         vm_page_purged_count++;
2855                                 }
2856                                 goto reclaim_page;
2857                         }
2858
2859                         if (VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
2860                                 /*
2861                                  * With the VM compressor, the cost of
2862                                  * reclaiming a page is much lower (no I/O),
2863                                  * so if we find a "volatile" page, it's better
2864                                  * to let it get compressed rather than letting
2865                                  * it occupy a full page until it gets purged.
2866                                  * So no need to check for "volatile" here.
2867                                  */
2868                         } else if (object->purgable == VM_PURGABLE_VOLATILE) {
2869                                 /*
2870                                  * Avoid cleaning a "volatile" page which might
2871                                  * be purged soon.
2872                                  */
2873
2874                                 /* if it's wired, we can't put it on our queue */
2875                                 assert(!VM_PAGE_WIRED(m));
2876
2877                                 /* just stick it back on! */
2878                                 reactivated_this_call++;
2879
2880                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2881                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_volatile_reactivated, 1);
2882
2883                                 goto reactivate_page;
2884                         }
2885                 }
2886                 /*
2887                  *      If it's being used, reactivate.
2888                  *      (Fictitious pages are either busy or absent.)
2889                  *      First, update the reference and dirty bits
2890                  *      to make sure the page is unreferenced.
2891                  */
2892                 refmod_state = -1;
2893
2894                 if (m->vmp_reference == FALSE && m->vmp_pmapped == TRUE) {
2895                         refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2896
2897                         if (refmod_state & VM_MEM_REFERENCED)
2898                                 m->vmp_reference = TRUE;
2899                         if (refmod_state & VM_MEM_MODIFIED) {
2900                                 SET_PAGE_DIRTY(m, FALSE);
2901                         }
2902                 }
2903
2904                 if (m->vmp_reference || m->vmp_dirty) {
2905                         /* deal with a rogue "reusable" page */
2906                         VM_PAGEOUT_SCAN_HANDLE_REUSABLE_PAGE(m, m_object);
2907                 }
2908                 divisor = vm_pageout_state.vm_page_xpmapped_min_divisor;
2909
2910                 if (divisor == 0)
2911                         vm_pageout_state.vm_page_xpmapped_min = 0;
2912                 else
2913                         vm_pageout_state.vm_page_xpmapped_min = (vm_page_external_count * 10) / divisor;
2914
2915                 if (!m->vmp_no_cache &&
2916 #if CONFIG_BACKGROUND_QUEUE
2917                     page_from_bg_q == FALSE &&
2918 #endif
2919                     (m->vmp_reference || (m->vmp_xpmapped && !object->internal &&
2920                                       (vm_page_xpmapped_external_count < vm_pageout_state.vm_page_xpmapped_min)))) {
2921                         /*
2922                          * The page we pulled off the inactive list has
2923                          * been referenced.  It is possible for other
2924                          * processors to be touching pages faster than we
2925                          * can clear the referenced bit and traverse the
2926                          * inactive queue, so we limit the number of
2927                          * reactivations.
2928                          */
2929                         if (++reactivated_this_call >= reactivate_limit) {
2930                                 vm_pageout_vminfo.vm_pageout_reactivation_limit_exceeded++;
2931                         } else if (++inactive_reclaim_run >= VM_PAGEOUT_INACTIVE_FORCE_RECLAIM) {
2932                                 vm_pageout_vminfo.vm_pageout_inactive_force_reclaim++;
2933                         } else {
2934                                 uint32_t isinuse;
2935
2936                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2937                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reference_reactivated, 1);
2938
2939                                 vm_pageout_vminfo.vm_pageout_inactive_referenced++;
2940 reactivate_page:
2941                                 if ( !object->internal && object->pager != MEMORY_OBJECT_NULL &&
2942                                      vnode_pager_get_isinuse(object->pager, &isinuse) == KERN_SUCCESS && !isinuse) {
2943                                         /*
2944                                          * no explict mappings of this object exist
2945                                          * and it's not open via the filesystem
2946                                          */
2947                                         vm_page_deactivate(m);
2948                                         VM_PAGEOUT_DEBUG(vm_pageout_inactive_deactivated, 1);
2949                                 } else {
2950 must_activate_page:
2951                                         /*
2952                                          * The page was/is being used, so put back on active list.
2953                                          */
2954                                         vm_page_activate(m);
2955                                         VM_STAT_INCR(reactivations);
2956                                         inactive_burst_count = 0;
2957                                 }
2958 #if CONFIG_BACKGROUND_QUEUE
2959 #if DEVELOPMENT || DEBUG
2960                                 if (page_from_bg_q == TRUE) {
2961                                         if (m_object->internal)
2962                                                 vm_pageout_rejected_bq_internal++;
2963                                         else
2964                                                 vm_pageout_rejected_bq_external++;
2965                                 }
2966 #endif
2967 #endif
2968                                 if (page_prev_q_state == VM_PAGE_ON_INACTIVE_CLEANED_Q)
2969                                         VM_PAGEOUT_DEBUG(vm_pageout_cleaned_reactivated, 1);
2970                                 vm_pageout_state.vm_pageout_inactive_used++;
2971
2972                                 goto done_with_inactivepage;
2973                         }
2974                         /*
2975                          * Make sure we call pmap_get_refmod() if it
2976                          * wasn't already called just above, to update
2977                          * the dirty bit.
2978                          */
2979                         if ((refmod_state == -1) && !m->vmp_dirty && m->vmp_pmapped) {
2980                                 refmod_state = pmap_get_refmod(VM_PAGE_GET_PHYS_PAGE(m));
2981                                 if (refmod_state & VM_MEM_MODIFIED) {
2982                                         SET_PAGE_DIRTY(m, FALSE);
2983                                 }
2984                         }
2985                 }
2986
2987                 XPR(XPR_VM_PAGEOUT,
2988                 "vm_pageout_scan, replace object 0x%X offset 0x%X page 0x%X\n",
2989                 object, m->vmp_offset, m, 0,0);
2990
2991                 /*
2992                  * we've got a candidate page to steal...
2993                  *
2994                  * m->vmp_dirty is up to date courtesy of the
2995                  * preceding check for m->vmp_reference... if
2996                  * we get here, then m->vmp_reference had to be
2997                  * FALSE (or possibly "reactivate_limit" was
2998                  * exceeded), but in either case we called
2999                  * pmap_get_refmod() and updated both
3000                  * m->vmp_reference and m->vmp_dirty
3001                  *
3002                  * if it's dirty or precious we need to
3003                  * see if the target queue is throtttled
3004                  * it if is, we need to skip over it by moving it back
3005                  * to the end of the inactive queue
3006                  */
3007
3008                 inactive_throttled = FALSE;
3009
3010                 if (m->vmp_dirty || m->vmp_precious) {
3011                         if (object->internal) {
3012                                 if (VM_PAGE_Q_THROTTLED(iq))
3013                                         inactive_throttled = TRUE;
3014                         } else if (VM_PAGE_Q_THROTTLED(eq)) {
3015                                 inactive_throttled = TRUE;
3016                         }
3017                 }
3018 throttle_inactive:
3019                 if (!VM_DYNAMIC_PAGING_ENABLED() &&
3020                     object->internal && m->vmp_dirty &&
3021                     (object->purgable == VM_PURGABLE_DENY ||
3022                      object->purgable == VM_PURGABLE_NONVOLATILE ||
3023                      object->purgable == VM_PURGABLE_VOLATILE)) {
3024                         vm_page_check_pageable_safe(m);
3025                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3026                         vm_page_queue_enter(&vm_page_queue_throttled, m,
3027                                             vm_page_t, vmp_pageq);
3028                         m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
3029                         vm_page_throttled_count++;
3030
3031                         VM_PAGEOUT_DEBUG(vm_pageout_scan_reclaimed_throttled, 1);
3032
3033                         inactive_burst_count = 0;
3034                         goto done_with_inactivepage;
3035                 }
3036                 if (inactive_throttled == TRUE) {
3037
3038                         if (object->internal == FALSE) {
3039                                 /*
3040                                  * we need to break up the following potential deadlock case...
3041                                  *  a) The external pageout thread is stuck on the truncate lock for a file that is being extended i.e. written.
3042                                  *  b) The thread doing the writing is waiting for pages while holding the truncate lock
3043                                  *  c) Most of the pages in the inactive queue belong to this file.
3044                                  *
3045                                  * we are potentially in this deadlock because...
3046                                  *  a) the external pageout queue is throttled
3047                                  *  b) we're done with the active queue and moved on to the inactive queue
3048                                  *  c) we've got a dirty external page
3049                                  *
3050                                  * since we don't know the reason for the external pageout queue being throttled we
3051                                  * must suspect that we are deadlocked, so move the current page onto the active queue
3052                                  * in an effort to cause a page from the active queue to 'age' to the inactive queue
3053                                  *
3054                                  * if we don't have jetsam configured (i.e. we have a dynamic pager), set
3055                                  * 'force_anonymous' to TRUE to cause us to grab a page from the cleaned/anonymous
3056                                  * pool the next time we select a victim page... if we can make enough new free pages,
3057                                  * the deadlock will break, the external pageout queue will empty and it will no longer
3058                                  * be throttled
3059                                  *
3060                                  * if we have jetsam configured, keep a count of the pages reactivated this way so
3061                                  * that we can try to find clean pages in the active/inactive queues before
3062                                  * deciding to jetsam a process
3063                                  */
3064                                 vm_pageout_vminfo.vm_pageout_scan_inactive_throttled_external++;
3065
3066                                 vm_page_check_pageable_safe(m);
3067                                 assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
3068                                 vm_page_queue_enter(&vm_page_queue_active, m, vm_page_t, vmp_pageq);
3069                                 m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
3070                                 vm_page_active_count++;
3071                                 vm_page_pageable_external_count++;
3072
3073                                 vm_pageout_adjust_eq_iothrottle(eq, FALSE);
3074
3075 #if CONFIG_MEMORYSTATUS && CONFIG_JETSAM
3076                                 vm_pageout_inactive_external_forced_reactivate_limit--;
3077
3078                                 if (vm_pageout_inactive_external_forced_reactivate_limit <= 0) {
3079                                         vm_pageout_inactive_external_forced_reactivate_limit = vm_page_active_count + vm_page_inactive_count;
3080                                         /*
3081                                          * Possible deadlock scenario so request jetsam action
3082                                          */
3083                                         assert(object);
3084                                         vm_object_unlock(object);
3085                                         object = VM_OBJECT_NULL;
3086                                         vm_page_unlock_queues();
3087
3088                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_START,
3089                                                vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3090
3091                                         /* Kill first suitable process. If this call returned FALSE, we might have simply purged a process instead. */
3092                                         if (memorystatus_kill_on_VM_page_shortage(FALSE) == TRUE) {
3093                                                 VM_PAGEOUT_DEBUG(vm_pageout_inactive_external_forced_jetsam_count, 1);
3094                                         }
3095
3096                                         VM_DEBUG_CONSTANT_EVENT(vm_pageout_jetsam, VM_PAGEOUT_JETSAM, DBG_FUNC_END,
3097                                                         vm_page_active_count, vm_page_inactive_count, vm_page_free_count, vm_page_free_count);
3098
3099                                         vm_page_lock_queues();
3100                                         delayed_unlock = 1;
3101                                 }
3102 #else /* CONFIG_MEMORYSTATUS && CONFIG_JETSAM */
3103                                 force_anonymous = TRUE;
3104 #endif
3105                                 inactive_burst_count = 0;
3106                                 goto done_with_inactivepage;
3107                         } else {
3108                                 goto must_activate_page;
3109                         }
3110                 }
3111
3112                 /*
3113                  * we've got a page that we can steal...
3114                  * eliminate all mappings and make sure
3115                  * we have the up-to-date modified state
3116                  *
3117                  * if we need to do a pmap_disconnect then we
3118                  * need to re-evaluate m->vmp_dirty since the pmap_disconnect
3119                  * provides the true state atomically... the
3120                  * page was still mapped up to the pmap_disconnect
3121                  * and may have been dirtied at the last microsecond
3122                  *
3123                  * Note that if 'pmapped' is FALSE then the page is not
3124                  * and has not been in any map, so there is no point calling
3125                  * pmap_disconnect().  m->vmp_dirty could have been set in anticipation
3126                  * of likely usage of the page.
3127                  */
3128                 if (m->vmp_pmapped == TRUE) {
3129                         int pmap_options;
3130
3131                         /*
3132                          * Don't count this page as going into the compressor
3133                          * if any of these are true:
3134                          * 1) compressed pager isn't enabled
3135                          * 2) Freezer enabled device with compressed pager
3136                          *    backend (exclusive use) i.e. most of the VM system
3137                          *    (including vm_pageout_scan) has no knowledge of
3138                          *    the compressor
3139                          * 3) This page belongs to a file and hence will not be
3140                          *    sent into the compressor
3141                          */
3142                         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE ||
3143                             object->internal == FALSE) {
3144                                 pmap_options = 0;
3145                         } else if (m->vmp_dirty || m->vmp_precious) {
3146                                 /*
3147                                  * VM knows that this page is dirty (or
3148                                  * precious) and needs to be compressed
3149                                  * rather than freed.
3150                                  * Tell the pmap layer to count this page
3151                                  * as "compressed".
3152                                  */
3153                                 pmap_options = PMAP_OPTIONS_COMPRESSOR;
3154                         } else {
3155                                 /*
3156                                  * VM does not know if the page needs to
3157                                  * be preserved but the pmap layer might tell
3158                                  * us if any mapping has "modified" it.
3159                                  * Let's the pmap layer to count this page
3160                                  * as compressed if and only if it has been
3161                                  * modified.
3162                                  */
3163                                 pmap_options =
3164                                         PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
3165                         }
3166                         refmod_state = pmap_disconnect_options(VM_PAGE_GET_PHYS_PAGE(m),
3167                                                                pmap_options,
3168                                                                NULL);
3169                         if (refmod_state & VM_MEM_MODIFIED) {
3170                                 SET_PAGE_DIRTY(m, FALSE);
3171                         }
3172                 }
3173
3174                 /*
3175                  * reset our count of pages that have been reclaimed
3176                  * since the last page was 'stolen'
3177                  */
3178                 inactive_reclaim_run = 0;
3179
3180                 /*
3181                  *      If it's clean and not precious, we can free the page.
3182                  */
3183                 if (!m->vmp_dirty && !m->vmp_precious) {
3184
3185                         vm_pageout_state.vm_pageout_inactive_clean++;
3186
3187                         /*
3188                          * OK, at this point we have found a page we are going to free.
3189                          */
3190 #if CONFIG_PHANTOM_CACHE
3191                         if (!object->internal)
3192                                 vm_phantom_cache_add_ghost(m);
3193 #endif
3194                         goto reclaim_page;
3195                 }
3196
3197                 /*
3198                  * The page may have been dirtied since the last check
3199                  * for a throttled target queue (which may have been skipped
3200                  * if the page was clean then).  With the dirty page
3201                  * disconnected here, we can make one final check.
3202                  */
3203                 if (object->internal) {
3204                         if (VM_PAGE_Q_THROTTLED(iq))
3205                                 inactive_throttled = TRUE;
3206                 } else if (VM_PAGE_Q_THROTTLED(eq)) {
3207                         inactive_throttled = TRUE;
3208                 }
3209
3210                 if (inactive_throttled == TRUE)
3211                         goto throttle_inactive;
3212
3213 #if VM_PRESSURE_EVENTS
3214 #if CONFIG_JETSAM
3215
3216                 /*
3217                  * If Jetsam is enabled, then the sending
3218                  * of memory pressure notifications is handled
3219                  * from the same thread that takes care of high-water
3220                  * and other jetsams i.e. the memorystatus_thread.
3221                  */
3222
3223 #else /* CONFIG_JETSAM */
3224
3225                 vm_pressure_response();
3226
3227 #endif /* CONFIG_JETSAM */
3228 #endif /* VM_PRESSURE_EVENTS */
3229
3230                 if (page_prev_q_state == VM_PAGE_ON_SPECULATIVE_Q)
3231                         VM_PAGEOUT_DEBUG(vm_pageout_speculative_dirty, 1);
3232
3233                 if (object->internal)
3234                         vm_pageout_vminfo.vm_pageout_inactive_dirty_internal++;
3235                 else
3236                         vm_pageout_vminfo.vm_pageout_inactive_dirty_external++;
3237
3238                 /*
3239                  * internal pages will go to the compressor...
3240                  * external pages will go to the appropriate pager to be cleaned
3241                  * and upon completion will end up on 'vm_page_queue_cleaned' which
3242                  * is a preferred queue to steal from
3243                  */
3244                 vm_pageout_cluster(m);
3245                 inactive_burst_count = 0;
3246
3247 done_with_inactivepage:
3248
3249                 if (delayed_unlock++ > delayed_unlock_limit) {
3250                         int freed = local_freed;
3251
3252                         vm_pageout_prepare_to_block(&object, &delayed_unlock, &local_freeq, &local_freed,
3253                                                     VM_PAGEOUT_PB_CONSIDER_WAKING_COMPACTOR_SWAPPER);
3254                         if (freed == 0)
3255                                 lck_mtx_yield(&vm_page_queue_lock);
3256                 } else if (vm_pageout_scan_wants_object) {
3257                         vm_page_unlock_queues();
3258                         mutex_pause(0);
3259                         vm_page_lock_queues();
3260                 }
3261                 /*
3262                  * back to top of pageout scan loop
3263                  */
3264         }
3265 }
3266
3267
3268 void
3269 vm_page_free_reserve(
3270         int pages)
3271 {
3272         int             free_after_reserve;
3273
3274         if (VM_CONFIG_COMPRESSOR_IS_PRESENT) {
3275
3276                 if ((vm_page_free_reserved + pages + COMPRESSOR_FREE_RESERVED_LIMIT) >= (VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT))
3277                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT + COMPRESSOR_FREE_RESERVED_LIMIT;
3278                 else
3279                         vm_page_free_reserved += (pages + COMPRESSOR_FREE_RESERVED_LIMIT);
3280
3281         } else {
3282                 if ((vm_page_free_reserved + pages) >= VM_PAGE_FREE_RESERVED_LIMIT)
3283                         vm_page_free_reserved = VM_PAGE_FREE_RESERVED_LIMIT;
3284                 else
3285                         vm_page_free_reserved += pages;
3286         }
3287         free_after_reserve = vm_pageout_state.vm_page_free_count_init - vm_page_free_reserved;
3288
3289         vm_page_free_min = vm_page_free_reserved +
3290                 VM_PAGE_FREE_MIN(free_after_reserve);
3291
3292         if (vm_page_free_min > VM_PAGE_FREE_MIN_LIMIT)
3293                 vm_page_free_min = VM_PAGE_FREE_MIN_LIMIT;
3294
3295         vm_page_free_target = vm_page_free_reserved +
3296                 VM_PAGE_FREE_TARGET(free_after_reserve);
3297
3298         if (vm_page_free_target > VM_PAGE_FREE_TARGET_LIMIT)
3299                 vm_page_free_target = VM_PAGE_FREE_TARGET_LIMIT;
3300
3301         if (vm_page_free_target < vm_page_free_min + 5)
3302                 vm_page_free_target = vm_page_free_min + 5;
3303
3304         vm_page_throttle_limit = vm_page_free_target - (vm_page_free_target / 2);
3305 }
3306
3307 /*
3308  *      vm_pageout is the high level pageout daemon.
3309  */
3310
3311 void
3312 vm_pageout_continue(void)
3313 {
3314         DTRACE_VM2(pgrrun, int, 1, (uint64_t *), NULL);
3315         VM_PAGEOUT_DEBUG(vm_pageout_scan_event_counter, 1);
3316
3317 #if !CONFIG_EMBEDDED
3318         lck_mtx_lock(&vm_page_queue_free_lock);
3319         vm_pageout_running = TRUE;
3320         lck_mtx_unlock(&vm_page_queue_free_lock);
3321 #endif /* CONFIG_EMBEDDED */
3322
3323         vm_pageout_scan();
3324         /*
3325          * we hold both the vm_page_queue_free_lock
3326          * and the vm_page_queues_lock at this point
3327          */
3328         assert(vm_page_free_wanted == 0);
3329         assert(vm_page_free_wanted_privileged == 0);
3330         assert_wait((event_t) &vm_page_free_wanted, THREAD_UNINT);
3331
3332 #if !CONFIG_EMBEDDED
3333         vm_pageout_running = FALSE;
3334         if (vm_pageout_waiter) {
3335                 vm_pageout_waiter = FALSE;
3336                 thread_wakeup((event_t)&vm_pageout_waiter);
3337         }
3338 #endif /* !CONFIG_EMBEDDED */
3339
3340         lck_mtx_unlock(&vm_page_queue_free_lock);
3341         vm_page_unlock_queues();
3342
3343         counter(c_vm_pageout_block++);
3344         thread_block((thread_continue_t)vm_pageout_continue);
3345         /*NOTREACHED*/
3346 }
3347
3348 #if !CONFIG_EMBEDDED
3349 kern_return_t
3350 vm_pageout_wait(uint64_t deadline)
3351 {
3352         kern_return_t kr;
3353
3354         lck_mtx_lock(&vm_page_queue_free_lock);
3355         for (kr = KERN_SUCCESS; vm_pageout_running && (KERN_SUCCESS == kr); ) {
3356                 vm_pageout_waiter = TRUE;
3357                 if (THREAD_AWAKENED != lck_mtx_sleep_deadline(
3358                                 &vm_page_queue_free_lock, LCK_SLEEP_DEFAULT,
3359                                 (event_t) &vm_pageout_waiter, THREAD_UNINT, deadline)) {
3360                         kr = KERN_OPERATION_TIMED_OUT;
3361                 }
3362         }
3363         lck_mtx_unlock(&vm_page_queue_free_lock);
3364
3365         return (kr);
3366 }
3367 #endif /* !CONFIG_EMBEDDED */
3368
3369
3370 static void
3371 vm_pageout_iothread_external_continue(struct vm_pageout_queue *q)
3372 {
3373         vm_page_t       m = NULL;
3374         vm_object_t     object;
3375         vm_object_offset_t offset;
3376         memory_object_t pager;
3377
3378         /* On systems with a compressor, the external IO thread clears its
3379          * VM privileged bit to accommodate large allocations (e.g. bulk UPL
3380          * creation)
3381          */
3382         if (vm_pageout_state.vm_pageout_internal_iothread != THREAD_NULL)
3383                 current_thread()->options &= ~TH_OPT_VMPRIV;
3384
3385         vm_page_lockspin_queues();
3386
3387         while ( !vm_page_queue_empty(&q->pgo_pending) ) {
3388
3389                    q->pgo_busy = TRUE;
3390                    vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3391
3392                    assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3393                    VM_PAGE_CHECK(m);
3394                    /*
3395                     * grab a snapshot of the object and offset this
3396                     * page is tabled in so that we can relookup this
3397                     * page after we've taken the object lock - these
3398                     * fields are stable while we hold the page queues lock
3399                     * but as soon as we drop it, there is nothing to keep
3400                     * this page in this object... we hold an activity_in_progress
3401                     * on this object which will keep it from terminating
3402                     */
3403                    object = VM_PAGE_OBJECT(m);
3404                    offset = m->vmp_offset;
3405
3406                    m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3407                    VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3408
3409                    vm_page_unlock_queues();
3410
3411                    vm_object_lock(object);
3412
3413                    m = vm_page_lookup(object, offset);
3414
3415                    if (m == NULL ||
3416                        m->vmp_busy || m->vmp_cleaning || !m->vmp_laundry || (m->vmp_q_state != VM_PAGE_NOT_ON_Q)) {
3417                            /*
3418                             * it's either the same page that someone else has
3419                             * started cleaning (or it's finished cleaning or
3420                             * been put back on the pageout queue), or
3421                             * the page has been freed or we have found a
3422                             * new page at this offset... in all of these cases
3423                             * we merely need to release the activity_in_progress
3424                             * we took when we put the page on the pageout queue
3425                             */
3426                            vm_object_activity_end(object);
3427                            vm_object_unlock(object);
3428
3429                            vm_page_lockspin_queues();
3430                            continue;
3431                    }
3432                    pager = object->pager;
3433
3434                    if (pager == MEMORY_OBJECT_NULL) {
3435                            /*
3436                             * This pager has been destroyed by either
3437                             * memory_object_destroy or vm_object_destroy, and
3438                             * so there is nowhere for the page to go.
3439                             */
3440                            if (m->vmp_free_when_done) {
3441                                    /*
3442                                     * Just free the page... VM_PAGE_FREE takes
3443                                     * care of cleaning up all the state...
3444                                     * including doing the vm_pageout_throttle_up
3445                                     */
3446                                    VM_PAGE_FREE(m);
3447                            } else {
3448                                    vm_page_lockspin_queues();
3449
3450                                    vm_pageout_throttle_up(m);
3451                                    vm_page_activate(m);
3452
3453                                    vm_page_unlock_queues();
3454
3455                                    /*
3456                                     *   And we are done with it.
3457                                     */
3458                            }
3459                            vm_object_activity_end(object);
3460                            vm_object_unlock(object);
3461
3462                            vm_page_lockspin_queues();
3463                            continue;
3464                    }
3465 #if 0
3466                    /*
3467                     * we don't hold the page queue lock
3468                     * so this check isn't safe to make
3469                     */
3470                    VM_PAGE_CHECK(m);
3471 #endif
3472                    /*
3473                     * give back the activity_in_progress reference we
3474                     * took when we queued up this page and replace it
3475                     * it with a paging_in_progress reference that will
3476                     * also hold the paging offset from changing and
3477                     * prevent the object from terminating
3478                     */
3479                    vm_object_activity_end(object);
3480                    vm_object_paging_begin(object);
3481                    vm_object_unlock(object);
3482
3483                    /*
3484                     * Send the data to the pager.
3485                     * any pageout clustering happens there
3486                     */
3487                    memory_object_data_return(pager,
3488                                              m->vmp_offset + object->paging_offset,
3489                                              PAGE_SIZE,
3490                                              NULL,
3491                                              NULL,
3492                                              FALSE,
3493                                              FALSE,
3494                                              0);
3495
3496                    vm_object_lock(object);
3497                    vm_object_paging_end(object);
3498                    vm_object_unlock(object);
3499
3500                    vm_pageout_io_throttle();
3501
3502                    vm_page_lockspin_queues();
3503         }
3504         q->pgo_busy = FALSE;
3505         q->pgo_idle = TRUE;
3506
3507         assert_wait((event_t) &q->pgo_pending, THREAD_UNINT);
3508         vm_page_unlock_queues();
3509
3510         thread_block_parameter((thread_continue_t)vm_pageout_iothread_external_continue, (void *) q);
3511         /*NOTREACHED*/
3512 }
3513
3514
3515 #define         MAX_FREE_BATCH          32
3516 uint32_t vm_compressor_time_thread; /* Set via sysctl to record time accrued by
3517                                      * this thread.
3518                                      */
3519
3520
3521 void
3522 vm_pageout_iothread_internal_continue(struct cq *);
3523 void
3524 vm_pageout_iothread_internal_continue(struct cq *cq)
3525 {
3526         struct vm_pageout_queue *q;
3527         vm_page_t       m = NULL;
3528         boolean_t       pgo_draining;
3529         vm_page_t   local_q;
3530         int         local_cnt;
3531         vm_page_t   local_freeq = NULL;
3532         int         local_freed = 0;
3533         int         local_batch_size;
3534 #if DEVELOPMENT || DEBUG
3535         int       ncomps = 0;
3536         boolean_t marked_active = FALSE;
3537 #endif
3538         KERNEL_DEBUG(0xe040000c | DBG_FUNC_END, 0, 0, 0, 0, 0);
3539
3540         q = cq->q;
3541         local_batch_size = q->pgo_maxlaundry / (vm_pageout_state.vm_compressor_thread_count * 2);
3542
3543 #if RECORD_THE_COMPRESSED_DATA
3544         if (q->pgo_laundry)
3545                 c_compressed_record_init();
3546 #endif
3547         while (TRUE) {
3548                 int     pages_left_on_q = 0;
3549
3550                 local_cnt = 0;
3551                 local_q = NULL;
3552
3553                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_START, 0, 0, 0, 0, 0);
3554
3555                 vm_page_lock_queues();
3556 #if DEVELOPMENT || DEBUG
3557                 if (marked_active == FALSE) {
3558                         vmct_active++;
3559                         vmct_state[cq->id] = VMCT_ACTIVE;
3560                         marked_active = TRUE;
3561                         if (vmct_active == 1) {
3562                                 vm_compressor_epoch_start = mach_absolute_time();
3563                         }
3564                 }
3565 #endif
3566                 KERNEL_DEBUG(0xe0400014 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3567
3568                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_START, q->pgo_laundry, 0, 0, 0, 0);
3569
3570                 while ( !vm_page_queue_empty(&q->pgo_pending) && local_cnt < local_batch_size) {
3571
3572                         vm_page_queue_remove_first(&q->pgo_pending, m, vm_page_t, vmp_pageq);
3573                         assert(m->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q);
3574                         VM_PAGE_CHECK(m);
3575
3576                         m->vmp_q_state = VM_PAGE_NOT_ON_Q;
3577                         VM_PAGE_ZERO_PAGEQ_ENTRY(m);
3578                         m->vmp_laundry = FALSE;
3579
3580                         m->vmp_snext = local_q;
3581                         local_q = m;
3582                         local_cnt++;
3583                 }
3584                 if (local_q == NULL)
3585                         break;
3586
3587                 q->pgo_busy = TRUE;
3588
3589                 if ((pgo_draining = q->pgo_draining) == FALSE) {
3590                         vm_pageout_throttle_up_batch(q, local_cnt);
3591                         pages_left_on_q = q->pgo_laundry;
3592                 } else
3593                         pages_left_on_q = q->pgo_laundry - local_cnt;
3594
3595                 vm_page_unlock_queues();
3596
3597 #if !RECORD_THE_COMPRESSED_DATA
3598                 if (pages_left_on_q >= local_batch_size && cq->id < (vm_pageout_state.vm_compressor_thread_count - 1)) {
3599                         thread_wakeup((event_t) ((uintptr_t)&q->pgo_pending + cq->id + 1));
3600                 }
3601 #endif
3602                 KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, q->pgo_laundry, 0, 0, 0, 0);
3603
3604                 while (local_q) {
3605
3606                         KERNEL_DEBUG(0xe0400024 | DBG_FUNC_START, local_cnt, 0, 0, 0, 0);
3607
3608                         m = local_q;
3609                         local_q = m->vmp_snext;
3610                         m->vmp_snext = NULL;
3611
3612                         if (vm_pageout_compress_page(&cq->current_chead, cq->scratch_buf, m) == KERN_SUCCESS) {
3613 #if DEVELOPMENT || DEBUG
3614                                 ncomps++;
3615 #endif
3616                                 KERNEL_DEBUG(0xe0400024 | DBG_FUNC_END, local_cnt, 0, 0, 0, 0);
3617
3618                                 m->vmp_snext = local_freeq;
3619                                 local_freeq = m;
3620                                 local_freed++;
3621
3622                                 if (local_freed >= MAX_FREE_BATCH) {
3623
3624                                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3625
3626                                         vm_page_free_list(local_freeq, TRUE);
3627
3628                                         local_freeq = NULL;
3629                                         local_freed = 0;
3630                                 }
3631                         }
3632 #if !CONFIG_JETSAM
3633                         while (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3634                                 kern_return_t   wait_result;
3635                                 int             need_wakeup = 0;
3636
3637                                 if (local_freeq) {
3638                                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3639
3640                                         vm_page_free_list(local_freeq, TRUE);
3641                                         local_freeq = NULL;
3642                                         local_freed = 0;
3643
3644                                         continue;
3645                                 }
3646                                 lck_mtx_lock_spin(&vm_page_queue_free_lock);
3647
3648                                 if (vm_page_free_count < COMPRESSOR_FREE_RESERVED_LIMIT) {
3649
3650                                         if (vm_page_free_wanted_privileged++ == 0)
3651                                                 need_wakeup = 1;
3652                                         wait_result = assert_wait((event_t)&vm_page_free_wanted_privileged, THREAD_UNINT);
3653
3654                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3655
3656                                         if (need_wakeup)
3657                                                 thread_wakeup((event_t)&vm_page_free_wanted);
3658
3659                                         if (wait_result == THREAD_WAITING)
3660
3661                                                 thread_block(THREAD_CONTINUE_NULL);
3662                                 } else
3663                                         lck_mtx_unlock(&vm_page_queue_free_lock);
3664                         }
3665 #endif
3666                 }
3667                 if (local_freeq) {
3668                         OSAddAtomic64(local_freed, &vm_pageout_vminfo.vm_pageout_compressions);
3669
3670                         vm_page_free_list(local_freeq, TRUE);
3671                         local_freeq = NULL;
3672                         local_freed = 0;
3673                 }
3674                 if (pgo_draining == TRUE) {
3675                         vm_page_lockspin_queues();
3676                         vm_pageout_throttle_up_batch(q, local_cnt);
3677                         vm_page_unlock_queues();
3678                 }
3679         }
3680         KERNEL_DEBUG(0xe040000c | DBG_FUNC_START, 0, 0, 0, 0, 0);
3681
3682         /*
3683          * queue lock is held and our q is empty
3684          */
3685         q->pgo_busy = FALSE;
3686         q->pgo_idle = TRUE;
3687
3688         assert_wait((event_t) ((uintptr_t)&q->pgo_pending + cq->id), THREAD_UNINT);
3689 #if DEVELOPMENT || DEBUG
3690         if (marked_active == TRUE) {
3691                 vmct_active--;
3692                 vmct_state[cq->id] = VMCT_IDLE;
3693
3694                 if (vmct_active == 0) {
3695                         vm_compressor_epoch_stop = mach_absolute_time();
3696                         assertf(vm_compressor_epoch_stop >= vm_compressor_epoch_start,
3697                             "Compressor epoch non-monotonic: 0x%llx -> 0x%llx",
3698                             vm_compressor_epoch_start, vm_compressor_epoch_stop);
3699                         /* This interval includes intervals where one or more
3700                          * compressor threads were pre-empted
3701                          */
3702                         vmct_stats.vmct_cthreads_total += vm_compressor_epoch_stop - vm_compressor_epoch_start;
3703                 }
3704         }
3705 #endif
3706         vm_page_unlock_queues();
3707 #if DEVELOPMENT || DEBUG
3708         if (__improbable(vm_compressor_time_thread)) {
3709                 vmct_stats.vmct_runtimes[cq->id] = thread_get_runtime_self();
3710                 vmct_stats.vmct_pages[cq->id] += ncomps;
3711                 vmct_stats.vmct_iterations[cq->id]++;
3712                 if (ncomps > vmct_stats.vmct_maxpages[cq->id]) {
3713                         vmct_stats.vmct_maxpages[cq->id] = ncomps;
3714                 }
3715                 if (ncomps < vmct_stats.vmct_minpages[cq->id]) {
3716                         vmct_stats.vmct_minpages[cq->id] = ncomps;
3717                 }
3718         }
3719 #endif
3720
3721         KERNEL_DEBUG(0xe0400018 | DBG_FUNC_END, 0, 0, 0, 0, 0);
3722
3723         thread_block_parameter((thread_continue_t)vm_pageout_iothread_internal_continue, (void *) cq);
3724         /*NOTREACHED*/
3725 }
3726
3727
3728 kern_return_t
3729 vm_pageout_compress_page(void **current_chead, char *scratch_buf, vm_page_t m)
3730 {
3731         vm_object_t     object;
3732         memory_object_t pager;
3733         int             compressed_count_delta;
3734         kern_return_t   retval;
3735
3736         object = VM_PAGE_OBJECT(m);
3737
3738         assert(!m->vmp_free_when_done);
3739         assert(!m->vmp_laundry);
3740
3741         pager = object->pager;
3742
3743         if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL)  {
3744
3745                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_START, object, pager, 0, 0, 0);
3746
3747                 vm_object_lock(object);
3748
3749                 /*
3750                  * If there is no memory object for the page, create
3751                  * one and hand it to the compression pager.
3752                  */
3753
3754                 if (!object->pager_initialized)
3755                         vm_object_collapse(object, (vm_object_offset_t) 0, TRUE);
3756                 if (!object->pager_initialized)
3757                         vm_object_compressor_pager_create(object);
3758
3759                 pager = object->pager;
3760
3761                 if (!object->pager_initialized || pager == MEMORY_OBJECT_NULL) {
3762                         /*
3763                          * Still no pager for the object,
3764                          * or the pager has been destroyed.
3765                          * Reactivate the page.
3766                          *
3767                          * Should only happen if there is no
3768                          * compression pager
3769                          */
3770                         PAGE_WAKEUP_DONE(m);
3771
3772                         vm_page_lockspin_queues();
3773                         vm_page_activate(m);
3774                         VM_PAGEOUT_DEBUG(vm_pageout_dirty_no_pager, 1);
3775                         vm_page_unlock_queues();
3776
3777                         /*
3778                          *      And we are done with it.
3779                          */
3780                         vm_object_activity_end(object);
3781                         vm_object_unlock(object);
3782
3783                         return KERN_FAILURE;
3784                 }
3785                 vm_object_unlock(object);
3786
3787                 KERNEL_DEBUG(0xe0400010 | DBG_FUNC_END, object, pager, 0, 0, 0);
3788         }
3789         assert(object->pager_initialized && pager != MEMORY_OBJECT_NULL);
3790         assert(object->activity_in_progress > 0);
3791
3792         retval = vm_compressor_pager_put(
3793                 pager,
3794                 m->vmp_offset + object->paging_offset,
3795                 VM_PAGE_GET_PHYS_PAGE(m),
3796                 current_chead,
3797                 scratch_buf,
3798                 &compressed_count_delta);
3799
3800         vm_object_lock(object);
3801
3802         assert(object->activity_in_progress > 0);
3803         assert(VM_PAGE_OBJECT(m) == object);
3804         assert( !VM_PAGE_WIRED(m));
3805
3806         vm_compressor_pager_count(pager,
3807                                   compressed_count_delta,
3808                                   FALSE, /* shared_lock */
3809                                   object);
3810
3811         if (retval == KERN_SUCCESS) {
3812                 /*
3813                  * If the object is purgeable, its owner's
3814                  * purgeable ledgers will be updated in
3815                  * vm_page_remove() but the page still
3816                  * contributes to the owner's memory footprint,
3817                  * so account for it as such.
3818                  */
3819                 if ((object->purgable != VM_PURGABLE_DENY ||
3820                      object->vo_ledger_tag) &&
3821                     object->vo_owner != NULL) {
3822                         /* one more compressed purgeable/tagged page */
3823                         vm_object_owner_compressed_update(object,
3824                                                           +1);
3825                 }
3826                 VM_STAT_INCR(compressions);
3827
3828                 if (m->vmp_tabled)
3829                         vm_page_remove(m, TRUE);
3830
3831         } else {
3832                 PAGE_WAKEUP_DONE(m);
3833
3834                 vm_page_lockspin_queues();
3835
3836                 vm_page_activate(m);
3837                 vm_pageout_vminfo.vm_compressor_failed++;
3838
3839                 vm_page_unlock_queues();
3840         }
3841         vm_object_activity_end(object);
3842         vm_object_unlock(object);
3843
3844         return retval;
3845 }
3846
3847
3848 static void
3849 vm_pageout_adjust_eq_iothrottle(struct vm_pageout_queue *eq, boolean_t req_lowpriority)
3850 {
3851         uint32_t        policy;
3852
3853         if (hibernate_cleaning_in_progress == TRUE)
3854                 req_lowpriority = FALSE;
3855
3856         if (eq->pgo_inited == TRUE && eq->pgo_lowpriority != req_lowpriority) {
3857
3858                 vm_page_unlock_queues();
3859
3860                 if (req_lowpriority == TRUE) {
3861                         policy = THROTTLE_LEVEL_PAGEOUT_THROTTLED;
3862                         DTRACE_VM(laundrythrottle);
3863                 } else {
3864                         policy = THROTTLE_LEVEL_PAGEOUT_UNTHROTTLED;
3865                         DTRACE_VM(laundryunthrottle);
3866                 }
3867                 proc_set_thread_policy_with_tid(kernel_task, eq->pgo_tid,
3868                                                 TASK_POLICY_EXTERNAL, TASK_POLICY_IO, policy);
3869
3870                 eq->pgo_lowpriority = req_lowpriority;
3871
3872                 vm_page_lock_queues();
3873         }
3874 }
3875
3876
3877 static void
3878 vm_pageout_iothread_external(void)
3879 {
3880         thread_t        self = current_thread();
3881
3882         self->options |= TH_OPT_VMPRIV;
3883
3884         DTRACE_VM2(laundrythrottle, int, 1, (uint64_t *), NULL);
3885
3886         proc_set_thread_policy(self, TASK_POLICY_EXTERNAL,
3887                                TASK_POLICY_IO, THROTTLE_LEVEL_PAGEOUT_THROTTLED);
3888
3889         vm_page_lock_queues();
3890
3891         vm_pageout_queue_external.pgo_tid = self->thread_id;
3892         vm_pageout_queue_external.pgo_lowpriority = TRUE;
3893         vm_pageout_queue_external.pgo_inited = TRUE;
3894
3895         vm_page_unlock_queues();
3896
3897         vm_pageout_iothread_external_continue(&vm_pageout_queue_external);
3898
3899         /*NOTREACHED*/
3900 }
3901
3902
3903 static void
3904 vm_pageout_iothread_internal(struct cq *cq)
3905 {
3906         thread_t        self = current_thread();
3907
3908         self->options |= TH_OPT_VMPRIV;
3909
3910         vm_page_lock_queues();
3911
3912         vm_pageout_queue_internal.pgo_tid = self->thread_id;
3913         vm_pageout_queue_internal.pgo_lowpriority = TRUE;
3914         vm_pageout_queue_internal.pgo_inited = TRUE;
3915
3916         vm_page_unlock_queues();
3917
3918         if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
3919                 thread_vm_bind_group_add();
3920
3921
3922         thread_set_thread_name(current_thread(), "VM_compressor");
3923 #if DEVELOPMENT || DEBUG
3924         vmct_stats.vmct_minpages[cq->id] = INT32_MAX;
3925 #endif
3926         vm_pageout_iothread_internal_continue(cq);
3927
3928         /*NOTREACHED*/
3929 }
3930
3931 kern_return_t
3932 vm_set_buffer_cleanup_callout(boolean_t (*func)(int))
3933 {
3934         if (OSCompareAndSwapPtr(NULL, func, (void * volatile *) &consider_buffer_cache_collect)) {
3935                 return KERN_SUCCESS;
3936         } else {
3937                 return KERN_FAILURE; /* Already set */
3938         }
3939 }
3940
3941 extern boolean_t        memorystatus_manual_testing_on;
3942 extern unsigned int     memorystatus_level;
3943
3944
3945 #if VM_PRESSURE_EVENTS
3946
3947 boolean_t vm_pressure_events_enabled = FALSE;
3948
3949 void
3950 vm_pressure_response(void)
3951 {
3952
3953         vm_pressure_level_t     old_level = kVMPressureNormal;
3954         int                     new_level = -1;
3955         unsigned int            total_pages;
3956         uint64_t                available_memory = 0;
3957
3958         if (vm_pressure_events_enabled == FALSE)
3959                 return;
3960
3961 #if CONFIG_EMBEDDED
3962
3963         available_memory = (uint64_t) memorystatus_available_pages;
3964
3965 #else /* CONFIG_EMBEDDED */
3966
3967         available_memory = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3968         memorystatus_available_pages = (uint64_t) AVAILABLE_NON_COMPRESSED_MEMORY;
3969
3970 #endif /* CONFIG_EMBEDDED */
3971
3972         total_pages = (unsigned int) atop_64(max_mem);
3973 #if CONFIG_SECLUDED_MEMORY
3974         total_pages -= vm_page_secluded_count;
3975 #endif /* CONFIG_SECLUDED_MEMORY */
3976         memorystatus_level = (unsigned int) ((available_memory * 100) / total_pages);
3977
3978         if (memorystatus_manual_testing_on) {
3979                 return;
3980         }
3981
3982         old_level = memorystatus_vm_pressure_level;
3983
3984         switch (memorystatus_vm_pressure_level) {
3985
3986                 case kVMPressureNormal:
3987                 {
3988                         if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
3989                                 new_level = kVMPressureCritical;
3990                         }  else if (VM_PRESSURE_NORMAL_TO_WARNING()) {
3991                                 new_level = kVMPressureWarning;
3992                         }
3993                         break;
3994                 }
3995
3996                 case kVMPressureWarning:
3997                 case kVMPressureUrgent:
3998                 {
3999                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4000                                 new_level = kVMPressureNormal;
4001                         }  else if (VM_PRESSURE_WARNING_TO_CRITICAL()) {
4002                                 new_level = kVMPressureCritical;
4003                         }
4004                         break;
4005                 }
4006
4007                 case kVMPressureCritical:
4008                 {
4009                         if (VM_PRESSURE_WARNING_TO_NORMAL()) {
4010                                 new_level = kVMPressureNormal;
4011                         }  else if (VM_PRESSURE_CRITICAL_TO_WARNING()) {
4012                                 new_level = kVMPressureWarning;
4013                         }
4014                         break;
4015                 }
4016
4017                 default:
4018                         return;
4019         }
4020
4021         if (new_level != -1) {
4022                 memorystatus_vm_pressure_level = (vm_pressure_level_t) new_level;
4023
4024                 if (new_level != old_level) {
4025                         VM_DEBUG_CONSTANT_EVENT(vm_pressure_level_change, VM_PRESSURE_LEVEL_CHANGE, DBG_FUNC_NONE,
4026                                         new_level, old_level, 0, 0);
4027                 }
4028
4029                 if ((memorystatus_vm_pressure_level != kVMPressureNormal) || (old_level != memorystatus_vm_pressure_level)) {
4030                         if (vm_pageout_state.vm_pressure_thread_running == FALSE) {
4031                                 thread_wakeup(&vm_pressure_thread);
4032                         }
4033
4034                         if (old_level != memorystatus_vm_pressure_level) {
4035                                 thread_wakeup(&vm_pageout_state.vm_pressure_changed);
4036                         }
4037                 }
4038         }
4039
4040 }
4041 #endif /* VM_PRESSURE_EVENTS */
4042
4043 kern_return_t
4044 mach_vm_pressure_level_monitor(__unused boolean_t wait_for_pressure, __unused unsigned int *pressure_level) {
4045
4046 #if CONFIG_EMBEDDED
4047
4048         return KERN_FAILURE;
4049
4050 #elif !VM_PRESSURE_EVENTS
4051
4052         return KERN_FAILURE;
4053
4054 #else /* VM_PRESSURE_EVENTS */
4055
4056         kern_return_t   kr = KERN_SUCCESS;
4057
4058         if (pressure_level != NULL) {
4059
4060                 vm_pressure_level_t     old_level = memorystatus_vm_pressure_level;
4061
4062                 if (wait_for_pressure == TRUE) {
4063                         wait_result_t           wr = 0;
4064
4065                         while (old_level == *pressure_level) {
4066                                 wr = assert_wait((event_t) &vm_pageout_state.vm_pressure_changed,
4067                                                  THREAD_INTERRUPTIBLE);
4068                                 if (wr == THREAD_WAITING) {
4069                                         wr = thread_block(THREAD_CONTINUE_NULL);
4070                                 }
4071                                 if (wr == THREAD_INTERRUPTED) {
4072                                         return KERN_ABORTED;
4073                                 }
4074                                 if (wr == THREAD_AWAKENED) {
4075
4076                                         old_level = memorystatus_vm_pressure_level;
4077
4078                                         if (old_level != *pressure_level) {
4079                                                 break;
4080                                         }
4081                                 }
4082                         }
4083                 }
4084
4085                 *pressure_level = old_level;
4086                 kr = KERN_SUCCESS;
4087         } else {
4088                 kr = KERN_INVALID_ARGUMENT;
4089         }
4090
4091         return kr;
4092 #endif /* VM_PRESSURE_EVENTS */
4093 }
4094
4095 #if VM_PRESSURE_EVENTS
4096 void
4097 vm_pressure_thread(void) {
4098         static boolean_t thread_initialized = FALSE;
4099
4100         if (thread_initialized == TRUE) {
4101                 vm_pageout_state.vm_pressure_thread_running = TRUE;
4102                 consider_vm_pressure_events();
4103                 vm_pageout_state.vm_pressure_thread_running = FALSE;
4104         }
4105
4106         thread_set_thread_name(current_thread(), "VM_pressure");
4107         thread_initialized = TRUE;
4108         assert_wait((event_t) &vm_pressure_thread, THREAD_UNINT);
4109         thread_block((thread_continue_t)vm_pressure_thread);
4110 }
4111 #endif /* VM_PRESSURE_EVENTS */
4112
4113
4114 /*
4115  * called once per-second via "compute_averages"
4116  */
4117 void
4118 compute_pageout_gc_throttle(__unused void *arg)
4119 {
4120         if (vm_pageout_vminfo.vm_pageout_considered_page != vm_pageout_state.vm_pageout_considered_page_last) {
4121
4122                 vm_pageout_state.vm_pageout_considered_page_last = vm_pageout_vminfo.vm_pageout_considered_page;
4123
4124                 thread_wakeup((event_t) &vm_pageout_garbage_collect);
4125         }
4126 }
4127
4128 /*
4129  * vm_pageout_garbage_collect can also be called when the zone allocator needs
4130  * to call zone_gc on a different thread in order to trigger zone-map-exhaustion
4131  * jetsams. We need to check if the zone map size is above its jetsam limit to
4132  * decide if this was indeed the case.
4133  *
4134  * We need to do this on a different thread because of the following reasons:
4135  *
4136  * 1. In the case of synchronous jetsams, the leaking process can try to jetsam
4137  * itself causing the system to hang. We perform synchronous jetsams if we're
4138  * leaking in the VM map entries zone, so the leaking process could be doing a
4139  * zalloc for a VM map entry while holding its vm_map lock, when it decides to
4140  * jetsam itself. We also need the vm_map lock on the process termination path,
4141  * which would now lead the dying process to deadlock against itself.
4142  *
4143  * 2. The jetsam path might need to allocate zone memory itself. We could try
4144  * using the non-blocking variant of zalloc for this path, but we can still
4145  * end up trying to do a kernel_memory_allocate when the zone_map is almost
4146  * full.
4147  */
4148
4149 extern boolean_t is_zone_map_nearing_exhaustion(void);
4150
4151 void
4152 vm_pageout_garbage_collect(int collect)
4153 {
4154         if (collect) {
4155                 if (is_zone_map_nearing_exhaustion()) {
4156                         /*
4157                          * Woken up by the zone allocator for zone-map-exhaustion jetsams.
4158                          *
4159                          * Bail out after calling zone_gc (which triggers the
4160                          * zone-map-exhaustion jetsams). If we fall through, the subsequent
4161                          * operations that clear out a bunch of caches might allocate zone
4162                          * memory themselves (for eg. vm_map operations would need VM map
4163                          * entries). Since the zone map is almost full at this point, we
4164                          * could end up with a panic. We just need to quickly jetsam a
4165                          * process and exit here.
4166                          *
4167                          * It could so happen that we were woken up to relieve memory
4168                          * pressure and the zone map also happened to be near its limit at
4169                          * the time, in which case we'll skip out early. But that should be
4170                          * ok; if memory pressure persists, the thread will simply be woken
4171                          * up again.
4172                          */
4173                         consider_zone_gc(TRUE);
4174
4175                 } else {
4176                         /* Woken up by vm_pageout_scan or compute_pageout_gc_throttle. */
4177                         boolean_t buf_large_zfree = FALSE;
4178                         boolean_t first_try = TRUE;
4179
4180                         stack_collect();
4181
4182                         consider_machine_collect();
4183                         mbuf_drain(FALSE);
4184
4185                         do {
4186                                 if (consider_buffer_cache_collect != NULL) {
4187                                         buf_large_zfree = (*consider_buffer_cache_collect)(0);
4188                                 }
4189                                 if (first_try == TRUE || buf_large_zfree == TRUE) {
4190                                         /*
4191                                          * consider_zone_gc should be last, because the other operations
4192                                          * might return memory to zones.
4193                                          */
4194                                         consider_zone_gc(FALSE);
4195                                 }
4196                                 first_try = FALSE;
4197
4198                         } while (buf_large_zfree == TRUE && vm_page_free_count < vm_page_free_target);
4199
4200                         consider_machine_adjust();
4201                 }
4202         }
4203
4204         assert_wait((event_t) &vm_pageout_garbage_collect, THREAD_UNINT);
4205
4206         thread_block_parameter((thread_continue_t) vm_pageout_garbage_collect, (void *)1);
4207         /*NOTREACHED*/
4208 }
4209
4210
4211 #if VM_PAGE_BUCKETS_CHECK
4212 #if VM_PAGE_FAKE_BUCKETS
4213 extern vm_map_offset_t vm_page_fake_buckets_start, vm_page_fake_buckets_end;
4214 #endif /* VM_PAGE_FAKE_BUCKETS */
4215 #endif /* VM_PAGE_BUCKETS_CHECK */
4216
4217
4218
4219 void
4220 vm_set_restrictions()
4221 {
4222         host_basic_info_data_t hinfo;
4223         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4224
4225 #define BSD_HOST 1
4226         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4227
4228         assert(hinfo.max_cpus > 0);
4229
4230         if (hinfo.max_cpus <= 3) {
4231                 /*
4232                  * on systems with a limited number of CPUS, bind the
4233                  * 4 major threads that can free memory and that tend to use
4234                  * a fair bit of CPU under pressured conditions to a single processor.
4235                  * This insures that these threads don't hog all of the available CPUs
4236                  * (important for camera launch), while allowing them to run independently
4237                  * w/r to locks... the 4 threads are
4238                  * vm_pageout_scan,  vm_pageout_iothread_internal (compressor),
4239                  * vm_compressor_swap_trigger_thread (minor and major compactions),
4240                  * memorystatus_thread (jetsams).
4241                  *
4242                  * the first time the thread is run, it is responsible for checking the
4243                  * state of vm_restricted_to_single_processor, and if TRUE it calls
4244                  * thread_bind_master...  someday this should be replaced with a group
4245                  * scheduling mechanism and KPI.
4246                  */
4247                 vm_pageout_state.vm_restricted_to_single_processor = TRUE;
4248         } else
4249                 vm_pageout_state.vm_restricted_to_single_processor = FALSE;
4250 }
4251
4252 void
4253 vm_pageout(void)
4254 {
4255         thread_t        self = current_thread();
4256         thread_t        thread;
4257         kern_return_t   result;
4258         spl_t           s;
4259
4260         /*
4261          * Set thread privileges.
4262          */
4263         s = splsched();
4264
4265         thread_lock(self);
4266         self->options |= TH_OPT_VMPRIV;
4267         sched_set_thread_base_priority(self, BASEPRI_VM);
4268         thread_unlock(self);
4269
4270         if (!self->reserved_stack)
4271                 self->reserved_stack = self->kernel_stack;
4272
4273         if (vm_pageout_state.vm_restricted_to_single_processor == TRUE)
4274                 thread_vm_bind_group_add();
4275
4276         splx(s);
4277
4278         thread_set_thread_name(current_thread(), "VM_pageout_scan");
4279
4280         /*
4281          *      Initialize some paging parameters.
4282          */
4283
4284         vm_pageout_state.vm_pressure_thread_running = FALSE;
4285         vm_pageout_state.vm_pressure_changed = FALSE;
4286         vm_pageout_state.memorystatus_purge_on_warning = 2;
4287         vm_pageout_state.memorystatus_purge_on_urgent = 5;
4288         vm_pageout_state.memorystatus_purge_on_critical = 8;
4289         vm_pageout_state.vm_page_speculative_q_age_ms = VM_PAGE_SPECULATIVE_Q_AGE_MS;
4290         vm_pageout_state.vm_page_speculative_percentage = 5;
4291         vm_pageout_state.vm_page_speculative_target = 0;
4292
4293         vm_pageout_state.vm_pageout_external_iothread = THREAD_NULL;
4294         vm_pageout_state.vm_pageout_internal_iothread = THREAD_NULL;
4295
4296         vm_pageout_state.vm_pageout_swap_wait = 0;
4297         vm_pageout_state.vm_pageout_idle_wait = 0;
4298         vm_pageout_state.vm_pageout_empty_wait = 0;
4299         vm_pageout_state.vm_pageout_burst_wait = 0;
4300         vm_pageout_state.vm_pageout_deadlock_wait = 0;
4301         vm_pageout_state.vm_pageout_deadlock_relief = 0;
4302         vm_pageout_state.vm_pageout_burst_inactive_throttle = 0;
4303
4304         vm_pageout_state.vm_pageout_inactive = 0;
4305         vm_pageout_state.vm_pageout_inactive_used = 0;
4306         vm_pageout_state.vm_pageout_inactive_clean = 0;
4307
4308         vm_pageout_state.vm_memory_pressure = 0;
4309         vm_pageout_state.vm_page_filecache_min = 0;
4310 #if CONFIG_JETSAM
4311         vm_pageout_state.vm_page_filecache_min_divisor = 70;
4312         vm_pageout_state.vm_page_xpmapped_min_divisor = 40;
4313 #else
4314         vm_pageout_state.vm_page_filecache_min_divisor = 27;
4315         vm_pageout_state.vm_page_xpmapped_min_divisor = 36;
4316 #endif
4317         vm_pageout_state.vm_page_free_count_init = vm_page_free_count;
4318
4319         vm_pageout_state.vm_pageout_considered_page_last = 0;
4320
4321         if (vm_pageout_state.vm_pageout_swap_wait == 0)
4322                 vm_pageout_state.vm_pageout_swap_wait = VM_PAGEOUT_SWAP_WAIT;
4323
4324         if (vm_pageout_state.vm_pageout_idle_wait == 0)
4325                 vm_pageout_state.vm_pageout_idle_wait = VM_PAGEOUT_IDLE_WAIT;
4326
4327         if (vm_pageout_state.vm_pageout_burst_wait == 0)
4328                 vm_pageout_state.vm_pageout_burst_wait = VM_PAGEOUT_BURST_WAIT;
4329
4330         if (vm_pageout_state.vm_pageout_empty_wait == 0)
4331                 vm_pageout_state.vm_pageout_empty_wait = VM_PAGEOUT_EMPTY_WAIT;
4332
4333         if (vm_pageout_state.vm_pageout_deadlock_wait == 0)
4334                 vm_pageout_state.vm_pageout_deadlock_wait = VM_PAGEOUT_DEADLOCK_WAIT;
4335
4336         if (vm_pageout_state.vm_pageout_deadlock_relief == 0)
4337                 vm_pageout_state.vm_pageout_deadlock_relief = VM_PAGEOUT_DEADLOCK_RELIEF;
4338
4339         if (vm_pageout_state.vm_pageout_burst_inactive_throttle == 0)
4340                 vm_pageout_state.vm_pageout_burst_inactive_throttle = VM_PAGEOUT_BURST_INACTIVE_THROTTLE;
4341         /*
4342          * even if we've already called vm_page_free_reserve
4343          * call it again here to insure that the targets are
4344          * accurately calculated (it uses vm_page_free_count_init)
4345          * calling it with an arg of 0 will not change the reserve
4346          * but will re-calculate free_min and free_target
4347          */
4348         if (vm_page_free_reserved < VM_PAGE_FREE_RESERVED(processor_count)) {
4349                 vm_page_free_reserve((VM_PAGE_FREE_RESERVED(processor_count)) - vm_page_free_reserved);
4350         } else
4351                 vm_page_free_reserve(0);
4352
4353
4354         vm_page_queue_init(&vm_pageout_queue_external.pgo_pending);
4355         vm_pageout_queue_external.pgo_maxlaundry = VM_PAGE_LAUNDRY_MAX;
4356         vm_pageout_queue_external.pgo_laundry = 0;
4357         vm_pageout_queue_external.pgo_idle = FALSE;
4358         vm_pageout_queue_external.pgo_busy = FALSE;
4359         vm_pageout_queue_external.pgo_throttled = FALSE;
4360         vm_pageout_queue_external.pgo_draining = FALSE;
4361         vm_pageout_queue_external.pgo_lowpriority = FALSE;
4362         vm_pageout_queue_external.pgo_tid = -1;
4363         vm_pageout_queue_external.pgo_inited = FALSE;
4364
4365         vm_page_queue_init(&vm_pageout_queue_internal.pgo_pending);
4366         vm_pageout_queue_internal.pgo_maxlaundry = 0;
4367         vm_pageout_queue_internal.pgo_laundry = 0;
4368         vm_pageout_queue_internal.pgo_idle = FALSE;
4369         vm_pageout_queue_internal.pgo_busy = FALSE;
4370         vm_pageout_queue_internal.pgo_throttled = FALSE;
4371         vm_pageout_queue_internal.pgo_draining = FALSE;
4372         vm_pageout_queue_internal.pgo_lowpriority = FALSE;
4373         vm_pageout_queue_internal.pgo_tid = -1;
4374         vm_pageout_queue_internal.pgo_inited = FALSE;
4375
4376         /* internal pageout thread started when default pager registered first time */
4377         /* external pageout and garbage collection threads started here */
4378
4379         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_external, NULL,
4380                                               BASEPRI_VM,
4381                                               &vm_pageout_state.vm_pageout_external_iothread);
4382         if (result != KERN_SUCCESS)
4383                 panic("vm_pageout_iothread_external: create failed");
4384
4385         thread_deallocate(vm_pageout_state.vm_pageout_external_iothread);
4386
4387         result = kernel_thread_start_priority((thread_continue_t)vm_pageout_garbage_collect, NULL,
4388                                               BASEPRI_DEFAULT,
4389                                               &thread);
4390         if (result != KERN_SUCCESS)
4391                 panic("vm_pageout_garbage_collect: create failed");
4392
4393         thread_deallocate(thread);
4394
4395 #if VM_PRESSURE_EVENTS
4396         result = kernel_thread_start_priority((thread_continue_t)vm_pressure_thread, NULL,
4397                                                 BASEPRI_DEFAULT,
4398                                                 &thread);
4399
4400         if (result != KERN_SUCCESS)
4401                 panic("vm_pressure_thread: create failed");
4402
4403         thread_deallocate(thread);
4404 #endif
4405
4406         vm_object_reaper_init();
4407
4408
4409         bzero(&vm_config, sizeof(vm_config));
4410
4411         switch(vm_compressor_mode) {
4412
4413         case VM_PAGER_DEFAULT:
4414                 printf("mapping deprecated VM_PAGER_DEFAULT to VM_PAGER_COMPRESSOR_WITH_SWAP\n");
4415
4416         case VM_PAGER_COMPRESSOR_WITH_SWAP:
4417                 vm_config.compressor_is_present = TRUE;
4418                 vm_config.swap_is_present = TRUE;
4419                 vm_config.compressor_is_active = TRUE;
4420                 vm_config.swap_is_active = TRUE;
4421                 break;
4422
4423         case VM_PAGER_COMPRESSOR_NO_SWAP:
4424                 vm_config.compressor_is_present = TRUE;
4425                 vm_config.swap_is_present = TRUE;
4426                 vm_config.compressor_is_active = TRUE;
4427                 break;
4428
4429         case VM_PAGER_FREEZER_DEFAULT:
4430                 printf("mapping deprecated VM_PAGER_FREEZER_DEFAULT to VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP\n");
4431
4432         case VM_PAGER_FREEZER_COMPRESSOR_NO_SWAP:
4433                 vm_config.compressor_is_present = TRUE;
4434                 vm_config.swap_is_present = TRUE;
4435                 break;
4436
4437         case VM_PAGER_COMPRESSOR_NO_SWAP_PLUS_FREEZER_COMPRESSOR_WITH_SWAP:
4438                 vm_config.compressor_is_present = TRUE;
4439                 vm_config.swap_is_present = TRUE;
4440                 vm_config.compressor_is_active = TRUE;
4441                 vm_config.freezer_swap_is_active = TRUE;
4442                 break;
4443
4444         case VM_PAGER_NOT_CONFIGURED:
4445                 break;
4446
4447         default:
4448                 printf("unknown compressor mode - %x\n", vm_compressor_mode);
4449                 break;
4450         }
4451         if (VM_CONFIG_COMPRESSOR_IS_PRESENT)
4452                 vm_compressor_pager_init();
4453
4454 #if VM_PRESSURE_EVENTS
4455         vm_pressure_events_enabled = TRUE;
4456 #endif /* VM_PRESSURE_EVENTS */
4457
4458 #if CONFIG_PHANTOM_CACHE
4459         vm_phantom_cache_init();
4460 #endif
4461 #if VM_PAGE_BUCKETS_CHECK
4462 #if VM_PAGE_FAKE_BUCKETS
4463         printf("**** DEBUG: protecting fake buckets [0x%llx:0x%llx]\n",
4464                (uint64_t) vm_page_fake_buckets_start,
4465                (uint64_t) vm_page_fake_buckets_end);
4466         pmap_protect(kernel_pmap,
4467                      vm_page_fake_buckets_start,
4468                      vm_page_fake_buckets_end,
4469                      VM_PROT_READ);
4470 //      *(char *) vm_page_fake_buckets_start = 'x';     /* panic! */
4471 #endif /* VM_PAGE_FAKE_BUCKETS */
4472 #endif /* VM_PAGE_BUCKETS_CHECK */
4473
4474 #if VM_OBJECT_TRACKING
4475         vm_object_tracking_init();
4476 #endif /* VM_OBJECT_TRACKING */
4477
4478         vm_tests();
4479
4480         vm_pageout_continue();
4481
4482         /*
4483          * Unreached code!
4484          *
4485          * The vm_pageout_continue() call above never returns, so the code below is never
4486          * executed.  We take advantage of this to declare several DTrace VM related probe
4487          * points that our kernel doesn't have an analog for.  These are probe points that
4488          * exist in Solaris and are in the DTrace documentation, so people may have written
4489          * scripts that use them.  Declaring the probe points here means their scripts will
4490          * compile and execute which we want for portability of the scripts, but since this
4491          * section of code is never reached, the probe points will simply never fire.  Yes,
4492          * this is basically a hack.  The problem is the DTrace probe points were chosen with
4493          * Solaris specific VM events in mind, not portability to different VM implementations.
4494          */
4495
4496         DTRACE_VM2(execfree, int, 1, (uint64_t *), NULL);
4497         DTRACE_VM2(execpgin, int, 1, (uint64_t *), NULL);
4498         DTRACE_VM2(execpgout, int, 1, (uint64_t *), NULL);
4499         DTRACE_VM2(pgswapin, int, 1, (uint64_t *), NULL);
4500         DTRACE_VM2(pgswapout, int, 1, (uint64_t *), NULL);
4501         DTRACE_VM2(swapin, int, 1, (uint64_t *), NULL);
4502         DTRACE_VM2(swapout, int, 1, (uint64_t *), NULL);
4503         /*NOTREACHED*/
4504 }
4505
4506
4507
4508 kern_return_t
4509 vm_pageout_internal_start(void)
4510 {
4511         kern_return_t   result;
4512         int             i;
4513         host_basic_info_data_t hinfo;
4514
4515         assert (VM_CONFIG_COMPRESSOR_IS_PRESENT);
4516
4517         mach_msg_type_number_t count = HOST_BASIC_INFO_COUNT;
4518 #define BSD_HOST 1
4519         host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
4520
4521         assert(hinfo.max_cpus > 0);
4522
4523 #if CONFIG_EMBEDDED
4524         vm_pageout_state.vm_compressor_thread_count = 1;
4525 #else
4526         if (hinfo.max_cpus > 4)
4527                 vm_pageout_state.vm_compressor_thread_count = 2;
4528         else
4529                 vm_pageout_state.vm_compressor_thread_count = 1;
4530 #endif
4531         PE_parse_boot_argn("vmcomp_threads", &vm_pageout_state.vm_compressor_thread_count,
4532                            sizeof(vm_pageout_state.vm_compressor_thread_count));
4533
4534         if (vm_pageout_state.vm_compressor_thread_count >= hinfo.max_cpus)
4535                 vm_pageout_state.vm_compressor_thread_count = hinfo.max_cpus - 1;
4536         if (vm_pageout_state.vm_compressor_thread_count <= 0)
4537                 vm_pageout_state.vm_compressor_thread_count = 1;
4538         else if (vm_pageout_state.vm_compressor_thread_count > MAX_COMPRESSOR_THREAD_COUNT)
4539                 vm_pageout_state.vm_compressor_thread_count = MAX_COMPRESSOR_THREAD_COUNT;
4540
4541         vm_pageout_queue_internal.pgo_maxlaundry = (vm_pageout_state.vm_compressor_thread_count * 4) * VM_PAGE_LAUNDRY_MAX;
4542
4543         PE_parse_boot_argn("vmpgoi_maxlaundry", &vm_pageout_queue_internal.pgo_maxlaundry, sizeof(vm_pageout_queue_internal.pgo_maxlaundry));
4544
4545         for (i = 0; i < vm_pageout_state.vm_compressor_thread_count; i++) {
4546                 ciq[i].id = i;
4547                 ciq[i].q = &vm_pageout_queue_internal;
4548                 ciq[i].current_chead = NULL;
4549                 ciq[i].scratch_buf = kalloc(COMPRESSOR_SCRATCH_BUF_SIZE);
4550
4551                 result = kernel_thread_start_priority((thread_continue_t)vm_pageout_iothread_internal, (void *)&ciq[i],
4552                                                       BASEPRI_VM, &vm_pageout_state.vm_pageout_internal_iothread);
4553
4554                 if (result == KERN_SUCCESS)
4555                         thread_deallocate(vm_pageout_state.vm_pageout_internal_iothread);
4556                 else
4557                         break;
4558         }
4559         return result;
4560 }
4561
4562 #if CONFIG_IOSCHED
4563 /*
4564  * To support I/O Expedite for compressed files we mark the upls with special flags.
4565  * The way decmpfs works is that we create a big upl which marks all the pages needed to
4566  * represent the compressed file as busy. We tag this upl with the flag UPL_DECMP_REQ. Decmpfs
4567  * then issues smaller I/Os for compressed I/Os, deflates them and puts the data into the pages
4568  * being held in the big original UPL. We mark each of these smaller UPLs with the flag
4569  * UPL_DECMP_REAL_IO. Any outstanding real I/O UPL is tracked by the big req upl using the
4570  * decmp_io_upl field (in the upl structure). This link is protected in the forward direction
4571  * by the req upl lock (the reverse link doesnt need synch. since we never inspect this link
4572  * unless the real I/O upl is being destroyed).
4573  */
4574
4575
4576 static void
4577 upl_set_decmp_info(upl_t upl, upl_t src_upl)
4578 {
4579         assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4580
4581         upl_lock(src_upl);
4582         if (src_upl->decmp_io_upl) {
4583                 /*
4584                  * If there is already an alive real I/O UPL, ignore this new UPL.
4585                  * This case should rarely happen and even if it does, it just means
4586                  * that we might issue a spurious expedite which the driver is expected
4587                  * to handle.
4588                  */
4589                 upl_unlock(src_upl);
4590                 return;
4591         }
4592         src_upl->decmp_io_upl = (void *)upl;
4593         src_upl->ref_count++;
4594
4595         upl->flags |= UPL_DECMP_REAL_IO;
4596         upl->decmp_io_upl = (void *)src_upl;
4597         upl_unlock(src_upl);
4598 }
4599 #endif /* CONFIG_IOSCHED */
4600
4601 #if UPL_DEBUG
4602 int     upl_debug_enabled = 1;
4603 #else
4604 int     upl_debug_enabled = 0;
4605 #endif
4606
4607 static upl_t
4608 upl_create(int type, int flags, upl_size_t size)
4609 {
4610         upl_t   upl;
4611         vm_size_t       page_field_size = 0;
4612         int     upl_flags = 0;
4613         vm_size_t       upl_size  = sizeof(struct upl);
4614
4615         size = round_page_32(size);
4616
4617         if (type & UPL_CREATE_LITE) {
4618                 page_field_size = (atop(size) + 7) >> 3;
4619                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4620
4621                 upl_flags |= UPL_LITE;
4622         }
4623         if (type & UPL_CREATE_INTERNAL) {
4624                 upl_size += sizeof(struct upl_page_info) * atop(size);
4625
4626                 upl_flags |= UPL_INTERNAL;
4627         }
4628         upl = (upl_t)kalloc(upl_size + page_field_size);
4629
4630         if (page_field_size)
4631                 bzero((char *)upl + upl_size, page_field_size);
4632
4633         upl->flags = upl_flags | flags;
4634         upl->kaddr = (vm_offset_t)0;
4635         upl->size = 0;
4636         upl->map_object = NULL;
4637         upl->ref_count = 1;
4638         upl->ext_ref_count = 0;
4639         upl->highest_page = 0;
4640         upl_lock_init(upl);
4641         upl->vector_upl = NULL;
4642         upl->associated_upl = NULL;
4643         upl->upl_iodone = NULL;
4644 #if CONFIG_IOSCHED
4645         if (type & UPL_CREATE_IO_TRACKING) {
4646                 upl->upl_priority = proc_get_effective_thread_policy(current_thread(), TASK_POLICY_IO);
4647         }
4648
4649         upl->upl_reprio_info = 0;
4650         upl->decmp_io_upl = 0;
4651         if ((type & UPL_CREATE_INTERNAL) && (type & UPL_CREATE_EXPEDITE_SUP)) {
4652                 /* Only support expedite on internal UPLs */
4653                 thread_t        curthread = current_thread();
4654                 upl->upl_reprio_info = (uint64_t *)kalloc(sizeof(uint64_t) * atop(size));
4655                 bzero(upl->upl_reprio_info, (sizeof(uint64_t) * atop(size)));
4656                 upl->flags |= UPL_EXPEDITE_SUPPORTED;
4657                 if (curthread->decmp_upl != NULL)
4658                         upl_set_decmp_info(upl, curthread->decmp_upl);
4659         }
4660 #endif
4661 #if CONFIG_IOSCHED || UPL_DEBUG
4662         if ((type & UPL_CREATE_IO_TRACKING) || upl_debug_enabled) {
4663                 upl->upl_creator = current_thread();
4664                 upl->uplq.next = 0;
4665                 upl->uplq.prev = 0;
4666                 upl->flags |= UPL_TRACKED_BY_OBJECT;
4667         }
4668 #endif
4669
4670 #if UPL_DEBUG
4671         upl->ubc_alias1 = 0;
4672         upl->ubc_alias2 = 0;
4673
4674         upl->upl_state = 0;
4675         upl->upl_commit_index = 0;
4676         bzero(&upl->upl_commit_records[0], sizeof(upl->upl_commit_records));
4677
4678         (void) OSBacktrace(&upl->upl_create_retaddr[0], UPL_DEBUG_STACK_FRAMES);
4679 #endif /* UPL_DEBUG */
4680
4681         return(upl);
4682 }
4683
4684 static void
4685 upl_destroy(upl_t upl)
4686 {
4687         int     page_field_size;  /* bit field in word size buf */
4688         int     size;
4689
4690         if (upl->ext_ref_count) {
4691                 panic("upl(%p) ext_ref_count", upl);
4692         }
4693
4694 #if CONFIG_IOSCHED
4695         if ((upl->flags & UPL_DECMP_REAL_IO) && upl->decmp_io_upl) {
4696                 upl_t src_upl;
4697                 src_upl = upl->decmp_io_upl;
4698                 assert((src_upl->flags & UPL_DECMP_REQ) != 0);
4699                 upl_lock(src_upl);
4700                 src_upl->decmp_io_upl = NULL;
4701                 upl_unlock(src_upl);
4702                 upl_deallocate(src_upl);
4703         }
4704 #endif /* CONFIG_IOSCHED */
4705
4706 #if CONFIG_IOSCHED || UPL_DEBUG
4707         if ((upl->flags & UPL_TRACKED_BY_OBJECT) && !(upl->flags & UPL_VECTOR)) {
4708                 vm_object_t     object;
4709
4710                 if (upl->flags & UPL_SHADOWED) {
4711                         object = upl->map_object->shadow;
4712                 } else {
4713                         object = upl->map_object;
4714                 }
4715
4716                 vm_object_lock(object);
4717                 queue_remove(&object->uplq, upl, upl_t, uplq);
4718                 vm_object_activity_end(object);
4719                 vm_object_collapse(object, 0, TRUE);
4720                 vm_object_unlock(object);
4721         }
4722 #endif
4723         /*
4724          * drop a reference on the map_object whether or
4725          * not a pageout object is inserted
4726          */
4727         if (upl->flags & UPL_SHADOWED)
4728                 vm_object_deallocate(upl->map_object);
4729
4730         if (upl->flags & UPL_DEVICE_MEMORY)
4731                 size = PAGE_SIZE;
4732         else
4733                 size = upl->size;
4734         page_field_size = 0;
4735
4736         if (upl->flags & UPL_LITE) {
4737                 page_field_size = ((size/PAGE_SIZE) + 7) >> 3;
4738                 page_field_size = (page_field_size + 3) & 0xFFFFFFFC;
4739         }
4740         upl_lock_destroy(upl);
4741         upl->vector_upl = (vector_upl_t) 0xfeedbeef;
4742
4743 #if CONFIG_IOSCHED
4744         if (upl->flags & UPL_EXPEDITE_SUPPORTED)
4745                 kfree(upl->upl_reprio_info, sizeof(uint64_t) * (size/PAGE_SIZE));
4746 #endif
4747
4748         if (upl->flags & UPL_INTERNAL) {
4749                 kfree(upl,
4750                       sizeof(struct upl) +
4751                       (sizeof(struct upl_page_info) * (size/PAGE_SIZE))
4752                       + page_field_size);
4753         } else {
4754                 kfree(upl, sizeof(struct upl) + page_field_size);
4755         }
4756 }
4757
4758 void
4759 upl_deallocate(upl_t upl)
4760 {
4761         upl_lock(upl);
4762
4763         if (--upl->ref_count == 0) {
4764                 if(vector_upl_is_valid(upl))
4765                         vector_upl_deallocate(upl);
4766                 upl_unlock(upl);
4767
4768                 if (upl->upl_iodone)
4769                         upl_callout_iodone(upl);
4770
4771                 upl_destroy(upl);
4772         } else
4773                 upl_unlock(upl);
4774 }
4775
4776 #if CONFIG_IOSCHED
4777 void
4778 upl_mark_decmp(upl_t upl)
4779 {
4780         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
4781                 upl->flags |= UPL_DECMP_REQ;
4782                 upl->upl_creator->decmp_upl = (void *)upl;
4783         }
4784 }
4785
4786 void
4787 upl_unmark_decmp(upl_t upl)
4788 {
4789         if(upl && (upl->flags & UPL_DECMP_REQ)) {
4790                 upl->upl_creator->decmp_upl = NULL;
4791         }
4792 }
4793
4794 #endif /* CONFIG_IOSCHED */
4795
4796 #define VM_PAGE_Q_BACKING_UP(q)         \
4797         ((q)->pgo_laundry >= (((q)->pgo_maxlaundry * 8) / 10))
4798
4799 boolean_t must_throttle_writes(void);
4800
4801 boolean_t
4802 must_throttle_writes()
4803 {
4804         if (VM_PAGE_Q_BACKING_UP(&vm_pageout_queue_external) &&
4805             vm_page_pageable_external_count > (AVAILABLE_NON_COMPRESSED_MEMORY * 6) / 10)
4806                 return (TRUE);
4807
4808         return (FALSE);
4809 }
4810
4811
4812 /*
4813  *      Routine:        vm_object_upl_request
4814  *      Purpose:
4815  *              Cause the population of a portion of a vm_object.
4816  *              Depending on the nature of the request, the pages
4817  *              returned may be contain valid data or be uninitialized.
4818  *              A page list structure, listing the physical pages
4819  *              will be returned upon request.
4820  *              This function is called by the file system or any other
4821  *              supplier of backing store to a pager.
4822  *              IMPORTANT NOTE: The caller must still respect the relationship
4823  *              between the vm_object and its backing memory object.  The
4824  *              caller MUST NOT substitute changes in the backing file
4825  *              without first doing a memory_object_lock_request on the
4826  *              target range unless it is know that the pages are not
4827  *              shared with another entity at the pager level.
4828  *              Copy_in_to:
4829  *                      if a page list structure is present
4830  *                      return the mapped physical pages, where a
4831  *                      page is not present, return a non-initialized
4832  *                      one.  If the no_sync bit is turned on, don't
4833  *                      call the pager unlock to synchronize with other
4834  *                      possible copies of the page. Leave pages busy
4835  *                      in the original object, if a page list structure
4836  *                      was specified.  When a commit of the page list
4837  *                      pages is done, the dirty bit will be set for each one.
4838  *              Copy_out_from:
4839  *                      If a page list structure is present, return
4840  *                      all mapped pages.  Where a page does not exist
4841  *                      map a zero filled one. Leave pages busy in
4842  *                      the original object.  If a page list structure
4843  *                      is not specified, this call is a no-op.
4844  *
4845  *              Note:  access of default pager objects has a rather interesting
4846  *              twist.  The caller of this routine, presumably the file system
4847  *              page cache handling code, will never actually make a request
4848  *              against a default pager backed object.  Only the default
4849  *              pager will make requests on backing store related vm_objects
4850  *              In this way the default pager can maintain the relationship
4851  *              between backing store files (abstract memory objects) and
4852  *              the vm_objects (cache objects), they support.
4853  *
4854  */
4855
4856 __private_extern__ kern_return_t
4857 vm_object_upl_request(
4858         vm_object_t             object,
4859         vm_object_offset_t      offset,
4860         upl_size_t              size,
4861         upl_t                   *upl_ptr,
4862         upl_page_info_array_t   user_page_list,
4863         unsigned int            *page_list_count,
4864         upl_control_flags_t     cntrl_flags,
4865         vm_tag_t                tag)
4866 {
4867         vm_page_t               dst_page = VM_PAGE_NULL;
4868         vm_object_offset_t      dst_offset;
4869         upl_size_t              xfer_size;
4870         unsigned int            size_in_pages;
4871         boolean_t               dirty;
4872         boolean_t               hw_dirty;
4873         upl_t                   upl = NULL;
4874         unsigned int            entry;
4875         vm_page_t               alias_page = NULL;
4876         int                     refmod_state = 0;
4877         wpl_array_t             lite_list = NULL;
4878         vm_object_t             last_copy_object;
4879         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
4880         struct  vm_page_delayed_work    *dwp;
4881         int                     dw_count;
4882         int                     dw_limit;
4883         int                     io_tracking_flag = 0;
4884         int                     grab_options;
4885         int                     page_grab_count = 0;
4886         ppnum_t                 phys_page;
4887         pmap_flush_context      pmap_flush_context_storage;
4888         boolean_t               pmap_flushes_delayed = FALSE;
4889
4890         if (cntrl_flags & ~UPL_VALID_FLAGS) {
4891                 /*
4892                  * For forward compatibility's sake,
4893                  * reject any unknown flag.
4894                  */
4895                 return KERN_INVALID_VALUE;
4896         }
4897         if ( (!object->internal) && (object->paging_offset != 0) )
4898                 panic("vm_object_upl_request: external object with non-zero paging offset\n");
4899         if (object->phys_contiguous)
4900                 panic("vm_object_upl_request: contiguous object specified\n");
4901
4902         VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, 0, 0);
4903
4904         if (size > MAX_UPL_SIZE_BYTES)
4905                 size = MAX_UPL_SIZE_BYTES;
4906
4907         if ( (cntrl_flags & UPL_SET_INTERNAL) && page_list_count != NULL)
4908                 *page_list_count = MAX_UPL_SIZE_BYTES >> PAGE_SHIFT;
4909
4910 #if CONFIG_IOSCHED || UPL_DEBUG
4911         if (object->io_tracking || upl_debug_enabled)
4912                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
4913 #endif
4914 #if CONFIG_IOSCHED
4915         if (object->io_tracking)
4916                 io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
4917 #endif
4918
4919         if (cntrl_flags & UPL_SET_INTERNAL) {
4920                 if (cntrl_flags & UPL_SET_LITE) {
4921
4922                         upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4923
4924                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4925                         lite_list = (wpl_array_t)
4926                                         (((uintptr_t)user_page_list) +
4927                                         ((size/PAGE_SIZE) * sizeof(upl_page_info_t)));
4928                         if (size == 0) {
4929                                 user_page_list = NULL;
4930                                 lite_list = NULL;
4931                         }
4932                 } else {
4933                         upl = upl_create(UPL_CREATE_INTERNAL | io_tracking_flag, 0, size);
4934
4935                         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
4936                         if (size == 0) {
4937                                 user_page_list = NULL;
4938                         }
4939                 }
4940         } else {
4941                 if (cntrl_flags & UPL_SET_LITE) {
4942
4943                         upl = upl_create(UPL_CREATE_EXTERNAL | UPL_CREATE_LITE | io_tracking_flag, 0, size);
4944
4945                         lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
4946                         if (size == 0) {
4947                                 lite_list = NULL;
4948                         }
4949                 } else {
4950                         upl = upl_create(UPL_CREATE_EXTERNAL | io_tracking_flag, 0, size);
4951                 }
4952         }
4953         *upl_ptr = upl;
4954
4955         if (user_page_list)
4956                 user_page_list[0].device = FALSE;
4957
4958         if (cntrl_flags & UPL_SET_LITE) {
4959                 upl->map_object = object;
4960         } else {
4961                 upl->map_object = vm_object_allocate(size);
4962                 /*
4963                  * No neeed to lock the new object: nobody else knows
4964                  * about it yet, so it's all ours so far.
4965                  */
4966                 upl->map_object->shadow = object;
4967                 upl->map_object->pageout = TRUE;
4968                 upl->map_object->can_persist = FALSE;
4969                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4970                 upl->map_object->vo_shadow_offset = offset;
4971                 upl->map_object->wimg_bits = object->wimg_bits;
4972
4973                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
4974
4975                 upl->flags |= UPL_SHADOWED;
4976         }
4977         if (cntrl_flags & UPL_FOR_PAGEOUT)
4978                 upl->flags |= UPL_PAGEOUT;
4979
4980         vm_object_lock(object);
4981         vm_object_activity_begin(object);
4982
4983         grab_options = 0;
4984 #if CONFIG_SECLUDED_MEMORY
4985         if (object->can_grab_secluded) {
4986                 grab_options |= VM_PAGE_GRAB_SECLUDED;
4987         }
4988 #endif /* CONFIG_SECLUDED_MEMORY */
4989
4990         /*
4991          * we can lock in the paging_offset once paging_in_progress is set
4992          */
4993         upl->size = size;
4994         upl->offset = offset + object->paging_offset;
4995
4996 #if CONFIG_IOSCHED || UPL_DEBUG
4997         if (object->io_tracking || upl_debug_enabled) {
4998                 vm_object_activity_begin(object);
4999                 queue_enter(&object->uplq, upl, upl_t, uplq);
5000         }
5001 #endif
5002         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != VM_OBJECT_NULL) {
5003                 /*
5004                  * Honor copy-on-write obligations
5005                  *
5006                  * The caller is gathering these pages and
5007                  * might modify their contents.  We need to
5008                  * make sure that the copy object has its own
5009                  * private copies of these pages before we let
5010                  * the caller modify them.
5011                  */
5012                 vm_object_update(object,
5013                                  offset,
5014                                  size,
5015                                  NULL,
5016                                  NULL,
5017                                  FALSE, /* should_return */
5018                                  MEMORY_OBJECT_COPY_SYNC,
5019                                  VM_PROT_NO_CHANGE);
5020
5021                 VM_PAGEOUT_DEBUG(upl_cow, 1);
5022                 VM_PAGEOUT_DEBUG(upl_cow_pages, (size >> PAGE_SHIFT));
5023         }
5024         /*
5025          * remember which copy object we synchronized with
5026          */
5027         last_copy_object = object->copy;
5028         entry = 0;
5029
5030         xfer_size = size;
5031         dst_offset = offset;
5032         size_in_pages = size / PAGE_SIZE;
5033
5034         dwp = &dw_array[0];
5035         dw_count = 0;
5036         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
5037
5038         if (vm_page_free_count > (vm_page_free_target + size_in_pages) ||
5039             object->resident_page_count < ((MAX_UPL_SIZE_BYTES * 2) >> PAGE_SHIFT))
5040                 object->scan_collisions = 0;
5041
5042         if ((cntrl_flags & UPL_WILL_MODIFY) && must_throttle_writes() == TRUE) {
5043                 boolean_t       isSSD = FALSE;
5044
5045 #if CONFIG_EMBEDDED
5046                 isSSD = TRUE;
5047 #else
5048                 vnode_pager_get_isSSD(object->pager, &isSSD);
5049 #endif
5050                 vm_object_unlock(object);
5051
5052                 OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5053
5054                 if (isSSD == TRUE)
5055                         delay(1000 * size_in_pages);
5056                 else
5057                         delay(5000 * size_in_pages);
5058                 OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5059
5060                 vm_object_lock(object);
5061         }
5062
5063         while (xfer_size) {
5064
5065                 dwp->dw_mask = 0;
5066
5067                 if ((alias_page == NULL) && !(cntrl_flags & UPL_SET_LITE)) {
5068                         vm_object_unlock(object);
5069                         VM_PAGE_GRAB_FICTITIOUS(alias_page);
5070                         vm_object_lock(object);
5071                 }
5072                 if (cntrl_flags & UPL_COPYOUT_FROM) {
5073                         upl->flags |= UPL_PAGE_SYNC_DONE;
5074
5075                         if ( ((dst_page = vm_page_lookup(object, dst_offset)) == VM_PAGE_NULL) ||
5076                                 dst_page->vmp_fictitious ||
5077                                 dst_page->vmp_absent ||
5078                                 dst_page->vmp_error ||
5079                                 dst_page->vmp_cleaning ||
5080                                 (VM_PAGE_WIRED(dst_page))) {
5081
5082                                 if (user_page_list)
5083                                         user_page_list[entry].phys_addr = 0;
5084
5085                                 goto try_next_page;
5086                         }
5087                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5088
5089                         /*
5090                          * grab this up front...
5091                          * a high percentange of the time we're going to
5092                          * need the hardware modification state a bit later
5093                          * anyway... so we can eliminate an extra call into
5094                          * the pmap layer by grabbing it here and recording it
5095                          */
5096                         if (dst_page->vmp_pmapped)
5097                                 refmod_state = pmap_get_refmod(phys_page);
5098                         else
5099                                 refmod_state = 0;
5100
5101                         if ( (refmod_state & VM_MEM_REFERENCED) && VM_PAGE_INACTIVE(dst_page)) {
5102                                 /*
5103                                  * page is on inactive list and referenced...
5104                                  * reactivate it now... this gets it out of the
5105                                  * way of vm_pageout_scan which would have to
5106                                  * reactivate it upon tripping over it
5107                                  */
5108                                 dwp->dw_mask |= DW_vm_page_activate;
5109                         }
5110                         if (cntrl_flags & UPL_RET_ONLY_DIRTY) {
5111                                 /*
5112                                  * we're only asking for DIRTY pages to be returned
5113                                  */
5114                                 if (dst_page->vmp_laundry || !(cntrl_flags & UPL_FOR_PAGEOUT)) {
5115                                         /*
5116                                          * if we were the page stolen by vm_pageout_scan to be
5117                                          * cleaned (as opposed to a buddy being clustered in
5118                                          * or this request is not being driven by a PAGEOUT cluster
5119                                          * then we only need to check for the page being dirty or
5120                                          * precious to decide whether to return it
5121                                          */
5122                                         if (dst_page->vmp_dirty || dst_page->vmp_precious || (refmod_state & VM_MEM_MODIFIED))
5123                                                 goto check_busy;
5124                                         goto dont_return;
5125                                 }
5126                                 /*
5127                                  * this is a request for a PAGEOUT cluster and this page
5128                                  * is merely along for the ride as a 'buddy'... not only
5129                                  * does it have to be dirty to be returned, but it also
5130                                  * can't have been referenced recently...
5131                                  */
5132                                 if ( (hibernate_cleaning_in_progress == TRUE ||
5133                                       (!((refmod_state & VM_MEM_REFERENCED) || dst_page->vmp_reference) ||
5134                                        (dst_page->vmp_q_state == VM_PAGE_ON_THROTTLED_Q))) &&
5135                                      ((refmod_state & VM_MEM_MODIFIED) || dst_page->vmp_dirty || dst_page->vmp_precious) ) {
5136                                         goto check_busy;
5137                                 }
5138 dont_return:
5139                                 /*
5140                                  * if we reach here, we're not to return
5141                                  * the page... go on to the next one
5142                                  */
5143                                 if (dst_page->vmp_laundry == TRUE) {
5144                                         /*
5145                                          * if we get here, the page is not 'cleaning' (filtered out above).
5146                                          * since it has been referenced, remove it from the laundry
5147                                          * so we don't pay the cost of an I/O to clean a page
5148                                          * we're just going to take back
5149                                          */
5150                                         vm_page_lockspin_queues();
5151
5152                                         vm_pageout_steal_laundry(dst_page, TRUE);
5153                                         vm_page_activate(dst_page);
5154
5155                                         vm_page_unlock_queues();
5156                                 }
5157                                 if (user_page_list)
5158                                         user_page_list[entry].phys_addr = 0;
5159
5160                                 goto try_next_page;
5161                         }
5162 check_busy:
5163                         if (dst_page->vmp_busy) {
5164                                 if (cntrl_flags & UPL_NOBLOCK) {
5165                                         if (user_page_list)
5166                                                 user_page_list[entry].phys_addr = 0;
5167                                         dwp->dw_mask = 0;
5168
5169                                         goto try_next_page;
5170                                 }
5171                                 /*
5172                                  * someone else is playing with the
5173                                  * page.  We will have to wait.
5174                                  */
5175                                 PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5176
5177                                 continue;
5178                         }
5179                         if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5180
5181                                 vm_page_lockspin_queues();
5182
5183                                 if (dst_page->vmp_q_state == VM_PAGE_ON_PAGEOUT_Q) {
5184                                         /*
5185                                          * we've buddied up a page for a clustered pageout
5186                                          * that has already been moved to the pageout
5187                                          * queue by pageout_scan... we need to remove
5188                                          * it from the queue and drop the laundry count
5189                                          * on that queue
5190                                          */
5191                                         vm_pageout_throttle_up(dst_page);
5192                                 }
5193                                 vm_page_unlock_queues();
5194                         }
5195                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5196                         dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5197
5198                         if (phys_page > upl->highest_page)
5199                                 upl->highest_page = phys_page;
5200
5201                         assert (!pmap_is_noencrypt(phys_page));
5202
5203                         if (cntrl_flags & UPL_SET_LITE) {
5204                                 unsigned int    pg_num;
5205
5206                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5207                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5208                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5209
5210                                 if (hw_dirty) {
5211                                         if (pmap_flushes_delayed == FALSE) {
5212                                                 pmap_flush_context_init(&pmap_flush_context_storage);
5213                                                 pmap_flushes_delayed = TRUE;
5214                                         }
5215                                         pmap_clear_refmod_options(phys_page,
5216                                                                   VM_MEM_MODIFIED,
5217                                                                   PMAP_OPTIONS_NOFLUSH | PMAP_OPTIONS_CLEAR_WRITE,
5218                                                                   &pmap_flush_context_storage);
5219                                 }
5220
5221                                 /*
5222                                  * Mark original page as cleaning
5223                                  * in place.
5224                                  */
5225                                 dst_page->vmp_cleaning = TRUE;
5226                                 dst_page->vmp_precious = FALSE;
5227                         } else {
5228                                 /*
5229                                  * use pageclean setup, it is more
5230                                  * convenient even for the pageout
5231                                  * cases here
5232                                  */
5233                                 vm_object_lock(upl->map_object);
5234                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5235                                 vm_object_unlock(upl->map_object);
5236
5237                                 alias_page->vmp_absent = FALSE;
5238                                 alias_page = NULL;
5239                         }
5240                         if (dirty) {
5241                                 SET_PAGE_DIRTY(dst_page, FALSE);
5242                         } else {
5243                                 dst_page->vmp_dirty = FALSE;
5244                         }
5245
5246                         if (!dirty)
5247                                 dst_page->vmp_precious = TRUE;
5248
5249                         if ( !(cntrl_flags & UPL_CLEAN_IN_PLACE) ) {
5250                                 if ( !VM_PAGE_WIRED(dst_page))
5251                                         dst_page->vmp_free_when_done = TRUE;
5252                         }
5253                 } else {
5254                         if ((cntrl_flags & UPL_WILL_MODIFY) && object->copy != last_copy_object) {
5255                                 /*
5256                                  * Honor copy-on-write obligations
5257                                  *
5258                                  * The copy object has changed since we
5259                                  * last synchronized for copy-on-write.
5260                                  * Another copy object might have been
5261                                  * inserted while we released the object's
5262                                  * lock.  Since someone could have seen the
5263                                  * original contents of the remaining pages
5264                                  * through that new object, we have to
5265                                  * synchronize with it again for the remaining
5266                                  * pages only.  The previous pages are "busy"
5267                                  * so they can not be seen through the new
5268                                  * mapping.  The new mapping will see our
5269                                  * upcoming changes for those previous pages,
5270                                  * but that's OK since they couldn't see what
5271                                  * was there before.  It's just a race anyway
5272                                  * and there's no guarantee of consistency or
5273                                  * atomicity.  We just don't want new mappings
5274                                  * to see both the *before* and *after* pages.
5275                                  */
5276                                 if (object->copy != VM_OBJECT_NULL) {
5277                                         vm_object_update(
5278                                                 object,
5279                                                 dst_offset,/* current offset */
5280                                                 xfer_size, /* remaining size */
5281                                                 NULL,
5282                                                 NULL,
5283                                                 FALSE,     /* should_return */
5284                                                 MEMORY_OBJECT_COPY_SYNC,
5285                                                 VM_PROT_NO_CHANGE);
5286
5287                                         VM_PAGEOUT_DEBUG(upl_cow_again, 1);
5288                                         VM_PAGEOUT_DEBUG(upl_cow_again_pages, (xfer_size >> PAGE_SHIFT));
5289                                 }
5290                                 /*
5291                                  * remember the copy object we synced with
5292                                  */
5293                                 last_copy_object = object->copy;
5294                         }
5295                         dst_page = vm_page_lookup(object, dst_offset);
5296
5297                         if (dst_page != VM_PAGE_NULL) {
5298
5299                                 if ((cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5300                                         /*
5301                                          * skip over pages already present in the cache
5302                                          */
5303                                         if (user_page_list)
5304                                                 user_page_list[entry].phys_addr = 0;
5305
5306                                         goto try_next_page;
5307                                 }
5308                                 if (dst_page->vmp_fictitious) {
5309                                         panic("need corner case for fictitious page");
5310                                 }
5311
5312                                 if (dst_page->vmp_busy || dst_page->vmp_cleaning) {
5313                                         /*
5314                                          * someone else is playing with the
5315                                          * page.  We will have to wait.
5316                                          */
5317                                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
5318
5319                                         continue;
5320                                 }
5321                                 if (dst_page->vmp_laundry)
5322                                         vm_pageout_steal_laundry(dst_page, FALSE);
5323                         } else {
5324                                 if (object->private) {
5325                                         /*
5326                                          * This is a nasty wrinkle for users
5327                                          * of upl who encounter device or
5328                                          * private memory however, it is
5329                                          * unavoidable, only a fault can
5330                                          * resolve the actual backing
5331                                          * physical page by asking the
5332                                          * backing device.
5333                                          */
5334                                         if (user_page_list)
5335                                                 user_page_list[entry].phys_addr = 0;
5336
5337                                         goto try_next_page;
5338                                 }
5339                                 if (object->scan_collisions) {
5340                                         /*
5341                                          * the pageout_scan thread is trying to steal
5342                                          * pages from this object, but has run into our
5343                                          * lock... grab 2 pages from the head of the object...
5344                                          * the first is freed on behalf of pageout_scan, the
5345                                          * 2nd is for our own use... we use vm_object_page_grab
5346                                          * in both cases to avoid taking pages from the free
5347                                          * list since we are under memory pressure and our
5348                                          * lock on this object is getting in the way of
5349                                          * relieving it
5350                                          */
5351                                         dst_page = vm_object_page_grab(object);
5352
5353                                         if (dst_page != VM_PAGE_NULL)
5354                                                 vm_page_release(dst_page,
5355                                                                 FALSE);
5356
5357                                         dst_page = vm_object_page_grab(object);
5358                                 }
5359                                 if (dst_page == VM_PAGE_NULL) {
5360                                         /*
5361                                          * need to allocate a page
5362                                          */
5363                                         dst_page = vm_page_grab_options(grab_options);
5364                                         if (dst_page != VM_PAGE_NULL)
5365                                                 page_grab_count++;
5366                                 }
5367                                 if (dst_page == VM_PAGE_NULL) {
5368                                         if ( (cntrl_flags & (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) == (UPL_RET_ONLY_ABSENT | UPL_NOBLOCK)) {
5369                                                /*
5370                                                 * we don't want to stall waiting for pages to come onto the free list
5371                                                 * while we're already holding absent pages in this UPL
5372                                                 * the caller will deal with the empty slots
5373                                                 */
5374                                                 if (user_page_list)
5375                                                         user_page_list[entry].phys_addr = 0;
5376
5377                                                 goto try_next_page;
5378                                         }
5379                                         /*
5380                                          * no pages available... wait
5381                                          * then try again for the same
5382                                          * offset...
5383                                          */
5384                                         vm_object_unlock(object);
5385
5386                                         OSAddAtomic(size_in_pages, &vm_upl_wait_for_pages);
5387
5388                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
5389
5390                                         VM_PAGE_WAIT();
5391                                         OSAddAtomic(-size_in_pages, &vm_upl_wait_for_pages);
5392
5393                                         VM_DEBUG_EVENT(vm_upl_page_wait, VM_UPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
5394
5395                                         vm_object_lock(object);
5396
5397                                         continue;
5398                                 }
5399                                 vm_page_insert(dst_page, object, dst_offset);
5400
5401                                 dst_page->vmp_absent = TRUE;
5402                                 dst_page->vmp_busy = FALSE;
5403
5404                                 if (cntrl_flags & UPL_RET_ONLY_ABSENT) {
5405                                         /*
5406                                          * if UPL_RET_ONLY_ABSENT was specified,
5407                                          * than we're definitely setting up a
5408                                          * upl for a clustered read/pagein
5409                                          * operation... mark the pages as clustered
5410                                          * so upl_commit_range can put them on the
5411                                          * speculative list
5412                                          */
5413                                         dst_page->vmp_clustered = TRUE;
5414
5415                                         if ( !(cntrl_flags & UPL_FILE_IO))
5416                                                 VM_STAT_INCR(pageins);
5417                                 }
5418                         }
5419                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
5420
5421                         dst_page->vmp_overwriting = TRUE;
5422
5423                         if (dst_page->vmp_pmapped) {
5424                                 if ( !(cntrl_flags & UPL_FILE_IO))
5425                                         /*
5426                                          * eliminate all mappings from the
5427                                          * original object and its prodigy
5428                                          */
5429                                         refmod_state = pmap_disconnect(phys_page);
5430                                 else
5431                                         refmod_state = pmap_get_refmod(phys_page);
5432                         } else
5433                                 refmod_state = 0;
5434
5435                         hw_dirty = refmod_state & VM_MEM_MODIFIED;
5436                         dirty = hw_dirty ? TRUE : dst_page->vmp_dirty;
5437
5438                         if (cntrl_flags & UPL_SET_LITE) {
5439                                 unsigned int    pg_num;
5440
5441                                 pg_num = (unsigned int) ((dst_offset-offset)/PAGE_SIZE);
5442                                 assert(pg_num == (dst_offset-offset)/PAGE_SIZE);
5443                                 lite_list[pg_num>>5] |= 1 << (pg_num & 31);
5444
5445                                 if (hw_dirty)
5446                                         pmap_clear_modify(phys_page);
5447
5448                                 /*
5449                                  * Mark original page as cleaning
5450                                  * in place.
5451                                  */
5452                                 dst_page->vmp_cleaning = TRUE;
5453                                 dst_page->vmp_precious = FALSE;
5454                         } else {
5455                                 /*
5456                                  * use pageclean setup, it is more
5457                                  * convenient even for the pageout
5458                                  * cases here
5459                                  */
5460                                 vm_object_lock(upl->map_object);
5461                                 vm_pageclean_setup(dst_page, alias_page, upl->map_object, size - xfer_size);
5462                                 vm_object_unlock(upl->map_object);
5463
5464                                 alias_page->vmp_absent = FALSE;
5465                                 alias_page = NULL;
5466                         }
5467
5468                         if (cntrl_flags & UPL_REQUEST_SET_DIRTY) {
5469                                 upl->flags &= ~UPL_CLEAR_DIRTY;
5470                                 upl->flags |= UPL_SET_DIRTY;
5471                                 dirty = TRUE;
5472                                 upl->flags |= UPL_SET_DIRTY;
5473                         } else if (cntrl_flags & UPL_CLEAN_IN_PLACE) {
5474                                 /*
5475                                  * clean in place for read implies
5476                                  * that a write will be done on all
5477                                  * the pages that are dirty before
5478                                  * a upl commit is done.  The caller
5479                                  * is obligated to preserve the
5480                                  * contents of all pages marked dirty
5481                                  */
5482                                 upl->flags |= UPL_CLEAR_DIRTY;
5483                         }
5484                         dst_page->vmp_dirty = dirty;
5485
5486                         if (!dirty)
5487                                 dst_page->vmp_precious = TRUE;
5488
5489                         if ( !VM_PAGE_WIRED(dst_page)) {
5490                                 /*
5491                                  * deny access to the target page while
5492                                  * it is being worked on
5493                                  */
5494                                 dst_page->vmp_busy = TRUE;
5495                         } else
5496                                 dwp->dw_mask |= DW_vm_page_wire;
5497
5498                         /*
5499                          * We might be about to satisfy a fault which has been
5500                          * requested. So no need for the "restart" bit.
5501                          */
5502                         dst_page->vmp_restart = FALSE;
5503                         if (!dst_page->vmp_absent && !(cntrl_flags & UPL_WILL_MODIFY)) {
5504                                 /*
5505                                  * expect the page to be used
5506                                  */
5507                                 dwp->dw_mask |= DW_set_reference;
5508                         }
5509                         if (cntrl_flags & UPL_PRECIOUS) {
5510                                 if (object->internal) {
5511                                         SET_PAGE_DIRTY(dst_page, FALSE);
5512                                         dst_page->vmp_precious = FALSE;
5513                                 } else {
5514                                         dst_page->vmp_precious = TRUE;
5515                                 }
5516                         } else {
5517                                 dst_page->vmp_precious = FALSE;
5518                         }
5519                 }
5520                 if (dst_page->vmp_busy)
5521                         upl->flags |= UPL_HAS_BUSY;
5522
5523                 if (phys_page > upl->highest_page)
5524                         upl->highest_page = phys_page;
5525                 assert (!pmap_is_noencrypt(phys_page));
5526                 if (user_page_list) {
5527                         user_page_list[entry].phys_addr = phys_page;
5528                         user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
5529                         user_page_list[entry].absent    = dst_page->vmp_absent;
5530                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
5531                         user_page_list[entry].precious  = dst_page->vmp_precious;
5532                         user_page_list[entry].device    = FALSE;
5533                         user_page_list[entry].needed    = FALSE;
5534                         if (dst_page->vmp_clustered == TRUE)
5535                                 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
5536                         else
5537                                 user_page_list[entry].speculative = FALSE;
5538                         user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
5539                         user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
5540                         user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
5541                         user_page_list[entry].mark      = FALSE;
5542                 }
5543                 /*
5544                  * if UPL_RET_ONLY_ABSENT is set, then
5545                  * we are working with a fresh page and we've
5546                  * just set the clustered flag on it to
5547                  * indicate that it was drug in as part of a
5548                  * speculative cluster... so leave it alone
5549                  */
5550                 if ( !(cntrl_flags & UPL_RET_ONLY_ABSENT)) {
5551                         /*
5552                          * someone is explicitly grabbing this page...
5553                          * update clustered and speculative state
5554                          *
5555                          */
5556                         if (dst_page->vmp_clustered)
5557                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
5558                 }
5559 try_next_page:
5560                 if (dwp->dw_mask) {
5561                         if (dwp->dw_mask & DW_vm_page_activate)
5562                                 VM_STAT_INCR(reactivations);
5563
5564                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
5565
5566                         if (dw_count >= dw_limit) {
5567                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5568
5569                                 dwp = &dw_array[0];
5570                                 dw_count = 0;
5571                         }
5572                 }
5573                 entry++;
5574                 dst_offset += PAGE_SIZE_64;
5575                 xfer_size -= PAGE_SIZE;
5576         }
5577         if (dw_count)
5578                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
5579
5580         if (alias_page != NULL) {
5581                 VM_PAGE_FREE(alias_page);
5582         }
5583         if (pmap_flushes_delayed == TRUE)
5584                 pmap_flush(&pmap_flush_context_storage);
5585
5586         if (page_list_count != NULL) {
5587                 if (upl->flags & UPL_INTERNAL)
5588                         *page_list_count = 0;
5589                 else if (*page_list_count > entry)
5590                         *page_list_count = entry;
5591         }
5592 #if UPL_DEBUG
5593         upl->upl_state = 1;
5594 #endif
5595         vm_object_unlock(object);
5596
5597         VM_DEBUG_CONSTANT_EVENT(vm_object_upl_request, VM_UPL_REQUEST, DBG_FUNC_END, page_grab_count, 0, 0, 0);
5598
5599         return KERN_SUCCESS;
5600 }
5601
5602 /*
5603  *      Routine:        vm_object_super_upl_request
5604  *      Purpose:
5605  *              Cause the population of a portion of a vm_object
5606  *              in much the same way as memory_object_upl_request.
5607  *              Depending on the nature of the request, the pages
5608  *              returned may be contain valid data or be uninitialized.
5609  *              However, the region may be expanded up to the super
5610  *              cluster size provided.
5611  */
5612
5613 __private_extern__ kern_return_t
5614 vm_object_super_upl_request(
5615         vm_object_t object,
5616         vm_object_offset_t      offset,
5617         upl_size_t              size,
5618         upl_size_t              super_cluster,
5619         upl_t                   *upl,
5620         upl_page_info_t         *user_page_list,
5621         unsigned int            *page_list_count,
5622         upl_control_flags_t     cntrl_flags,
5623         vm_tag_t                tag)
5624 {
5625         if (object->paging_offset > offset  || ((cntrl_flags & UPL_VECTOR)==UPL_VECTOR))
5626                 return KERN_FAILURE;
5627
5628         assert(object->paging_in_progress);
5629         offset = offset - object->paging_offset;
5630
5631         if (super_cluster > size) {
5632
5633                 vm_object_offset_t      base_offset;
5634                 upl_size_t              super_size;
5635                 vm_object_size_t        super_size_64;
5636
5637                 base_offset = (offset & ~((vm_object_offset_t) super_cluster - 1));
5638                 super_size = (offset + size) > (base_offset + super_cluster) ? super_cluster<<1 : super_cluster;
5639                 super_size_64 = ((base_offset + super_size) > object->vo_size) ? (object->vo_size - base_offset) : super_size;
5640                 super_size = (upl_size_t) super_size_64;
5641                 assert(super_size == super_size_64);
5642
5643                 if (offset > (base_offset + super_size)) {
5644                         panic("vm_object_super_upl_request: Missed target pageout"
5645                               " %#llx,%#llx, %#x, %#x, %#x, %#llx\n",
5646                               offset, base_offset, super_size, super_cluster,
5647                               size, object->paging_offset);
5648                 }
5649                 /*
5650                  * apparently there is a case where the vm requests a
5651                  * page to be written out who's offset is beyond the
5652                  * object size
5653                  */
5654                 if ((offset + size) > (base_offset + super_size)) {
5655                         super_size_64 = (offset + size) - base_offset;
5656                         super_size = (upl_size_t) super_size_64;
5657                         assert(super_size == super_size_64);
5658                 }
5659
5660                 offset = base_offset;
5661                 size = super_size;
5662         }
5663         return vm_object_upl_request(object, offset, size, upl, user_page_list, page_list_count, cntrl_flags, tag);
5664 }
5665
5666 #if CONFIG_EMBEDDED
5667 int cs_executable_create_upl = 0;
5668 extern int proc_selfpid(void);
5669 extern char *proc_name_address(void *p);
5670 #endif /* CONFIG_EMBEDDED */
5671
5672 kern_return_t
5673 vm_map_create_upl(
5674         vm_map_t                map,
5675         vm_map_address_t        offset,
5676         upl_size_t              *upl_size,
5677         upl_t                   *upl,
5678         upl_page_info_array_t   page_list,
5679         unsigned int            *count,
5680         upl_control_flags_t     *flags,
5681         vm_tag_t                tag)
5682 {
5683         vm_map_entry_t          entry;
5684         upl_control_flags_t     caller_flags;
5685         int                     force_data_sync;
5686         int                     sync_cow_data;
5687         vm_object_t             local_object;
5688         vm_map_offset_t         local_offset;
5689         vm_map_offset_t         local_start;
5690         kern_return_t           ret;
5691
5692         assert(page_aligned(offset));
5693
5694         caller_flags = *flags;
5695
5696         if (caller_flags & ~UPL_VALID_FLAGS) {
5697                 /*
5698                  * For forward compatibility's sake,
5699                  * reject any unknown flag.
5700                  */
5701                 return KERN_INVALID_VALUE;
5702         }
5703         force_data_sync = (caller_flags & UPL_FORCE_DATA_SYNC);
5704         sync_cow_data = !(caller_flags & UPL_COPYOUT_FROM);
5705
5706         if (upl == NULL)
5707                 return KERN_INVALID_ARGUMENT;
5708
5709 REDISCOVER_ENTRY:
5710         vm_map_lock_read(map);
5711
5712         if (!vm_map_lookup_entry(map, offset, &entry)) {
5713                 vm_map_unlock_read(map);
5714                 return KERN_FAILURE;
5715         }
5716
5717         if ((entry->vme_end - offset) < *upl_size) {
5718                 *upl_size = (upl_size_t) (entry->vme_end - offset);
5719                 assert(*upl_size == entry->vme_end - offset);
5720         }
5721
5722         if (caller_flags & UPL_QUERY_OBJECT_TYPE) {
5723                 *flags = 0;
5724
5725                 if (!entry->is_sub_map &&
5726                     VME_OBJECT(entry) != VM_OBJECT_NULL) {
5727                         if (VME_OBJECT(entry)->private)
5728                                 *flags = UPL_DEV_MEMORY;
5729
5730                         if (VME_OBJECT(entry)->phys_contiguous)
5731                                 *flags |= UPL_PHYS_CONTIG;
5732                 }
5733                 vm_map_unlock_read(map);
5734                 return KERN_SUCCESS;
5735         }
5736
5737         if (VME_OBJECT(entry) == VM_OBJECT_NULL ||
5738             !VME_OBJECT(entry)->phys_contiguous) {
5739                 if (*upl_size > MAX_UPL_SIZE_BYTES)
5740                         *upl_size = MAX_UPL_SIZE_BYTES;
5741         }
5742
5743         /*
5744          *      Create an object if necessary.
5745          */
5746         if (VME_OBJECT(entry) == VM_OBJECT_NULL) {
5747
5748                 if (vm_map_lock_read_to_write(map))
5749                         goto REDISCOVER_ENTRY;
5750
5751                 VME_OBJECT_SET(entry,
5752                                vm_object_allocate((vm_size_t)
5753                                                   (entry->vme_end -
5754                                                    entry->vme_start)));
5755                 VME_OFFSET_SET(entry, 0);
5756                 assert(entry->use_pmap);
5757
5758                 vm_map_lock_write_to_read(map);
5759         }
5760
5761         if (!(caller_flags & UPL_COPYOUT_FROM) &&
5762             !entry->is_sub_map &&
5763             !(entry->protection & VM_PROT_WRITE)) {
5764                 vm_map_unlock_read(map);
5765                 return KERN_PROTECTION_FAILURE;
5766         }
5767
5768 #if CONFIG_EMBEDDED
5769         if (map->pmap != kernel_pmap &&
5770             (caller_flags & UPL_COPYOUT_FROM) &&
5771             (entry->protection & VM_PROT_EXECUTE) &&
5772             !(entry->protection & VM_PROT_WRITE)) {
5773                 vm_offset_t     kaddr;
5774                 vm_size_t       ksize;
5775
5776                 /*
5777                  * We're about to create a read-only UPL backed by
5778                  * memory from an executable mapping.
5779                  * Wiring the pages would result in the pages being copied
5780                  * (due to the "MAP_PRIVATE" mapping) and no longer
5781                  * code-signed, so no longer eligible for execution.
5782                  * Instead, let's copy the data into a kernel buffer and
5783                  * create the UPL from this kernel buffer.
5784                  * The kernel buffer is then freed, leaving the UPL holding
5785                  * the last reference on the VM object, so the memory will
5786                  * be released when the UPL is committed.
5787                  */
5788
5789                 vm_map_unlock_read(map);
5790                 /* allocate kernel buffer */
5791                 ksize = round_page(*upl_size);
5792                 kaddr = 0;
5793                 ret = kmem_alloc_pageable(kernel_map,
5794                                           &kaddr,
5795                                           ksize,
5796                                           tag);
5797                 if (ret == KERN_SUCCESS) {
5798                         /* copyin the user data */
5799                         assert(page_aligned(offset));
5800                         ret = copyinmap(map, offset, (void *)kaddr, *upl_size);
5801                 }
5802                 if (ret == KERN_SUCCESS) {
5803                         if (ksize > *upl_size) {
5804                                 /* zero out the extra space in kernel buffer */
5805                                 memset((void *)(kaddr + *upl_size),
5806                                        0,
5807                                        ksize - *upl_size);
5808                         }
5809                         /* create the UPL from the kernel buffer */
5810                         ret = vm_map_create_upl(kernel_map, kaddr, upl_size,
5811                                                 upl, page_list, count, flags, tag);
5812                 }
5813                 if (kaddr != 0) {
5814                         /* free the kernel buffer */
5815                         kmem_free(kernel_map, kaddr, ksize);
5816                         kaddr = 0;
5817                         ksize = 0;
5818                 }
5819 #if DEVELOPMENT || DEBUG
5820                 DTRACE_VM4(create_upl_from_executable,
5821                            vm_map_t, map,
5822                            vm_map_address_t, offset,
5823                            upl_size_t, *upl_size,
5824                            kern_return_t, ret);
5825 #endif /* DEVELOPMENT || DEBUG */
5826                 return ret;
5827         }
5828 #endif /* CONFIG_EMBEDDED */
5829
5830         local_object = VME_OBJECT(entry);
5831         assert(local_object != VM_OBJECT_NULL);
5832
5833         if (!entry->is_sub_map &&
5834             !entry->needs_copy &&
5835             *upl_size != 0 &&
5836             local_object->vo_size > *upl_size && /* partial UPL */
5837             entry->wired_count == 0 && /* No COW for entries that are wired */
5838             (map->pmap != kernel_pmap) && /* alias checks */
5839             (vm_map_entry_should_cow_for_true_share(entry) /* case 1 */
5840              ||
5841              (/* case 2 */
5842               local_object->internal &&
5843               (local_object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC) &&
5844               local_object->ref_count > 1))) {
5845                 vm_prot_t       prot;
5846
5847                 /*
5848                  * Case 1:
5849                  * Set up the targeted range for copy-on-write to avoid
5850                  * applying true_share/copy_delay to the entire object.
5851                  *
5852                  * Case 2:
5853                  * This map entry covers only part of an internal
5854                  * object.  There could be other map entries covering
5855                  * other areas of this object and some of these map
5856                  * entries could be marked as "needs_copy", which
5857                  * assumes that the object is COPY_SYMMETRIC.
5858                  * To avoid marking this object as COPY_DELAY and
5859                  * "true_share", let's shadow it and mark the new
5860                  * (smaller) object as "true_share" and COPY_DELAY.
5861                  */
5862
5863                 if (vm_map_lock_read_to_write(map)) {
5864                         goto REDISCOVER_ENTRY;
5865                 }
5866                 vm_map_lock_assert_exclusive(map);
5867                 assert(VME_OBJECT(entry) == local_object);
5868
5869                 vm_map_clip_start(map,
5870                                   entry,
5871                                   vm_map_trunc_page(offset,
5872                                                     VM_MAP_PAGE_MASK(map)));
5873                 vm_map_clip_end(map,
5874                                 entry,
5875                                 vm_map_round_page(offset + *upl_size,
5876                                                   VM_MAP_PAGE_MASK(map)));
5877                 if ((entry->vme_end - offset) < *upl_size) {
5878                         *upl_size = (upl_size_t) (entry->vme_end - offset);
5879                         assert(*upl_size == entry->vme_end - offset);
5880                 }
5881
5882                 prot = entry->protection & ~VM_PROT_WRITE;
5883                 if (override_nx(map, VME_ALIAS(entry)) && prot)
5884                         prot |= VM_PROT_EXECUTE;
5885                 vm_object_pmap_protect(local_object,
5886                                        VME_OFFSET(entry),
5887                                        entry->vme_end - entry->vme_start,
5888                                        ((entry->is_shared ||
5889                                          map->mapped_in_other_pmaps)
5890                                         ? PMAP_NULL
5891                                         : map->pmap),
5892                                        entry->vme_start,
5893                                        prot);
5894
5895                 assert(entry->wired_count == 0);
5896
5897                 /*
5898                  * Lock the VM object and re-check its status: if it's mapped
5899                  * in another address space, we could still be racing with
5900                  * another thread holding that other VM map exclusively.
5901                  */
5902                 vm_object_lock(local_object);
5903                 if (local_object->true_share) {
5904                         /* object is already in proper state: no COW needed */
5905                         assert(local_object->copy_strategy !=
5906                                MEMORY_OBJECT_COPY_SYMMETRIC);
5907                 } else {
5908                         /* not true_share: ask for copy-on-write below */
5909                         assert(local_object->copy_strategy ==
5910                                MEMORY_OBJECT_COPY_SYMMETRIC);
5911                         entry->needs_copy = TRUE;
5912                 }
5913                 vm_object_unlock(local_object);
5914
5915                 vm_map_lock_write_to_read(map);
5916         }
5917
5918         if (entry->needs_copy)  {
5919                 /*
5920                  * Honor copy-on-write for COPY_SYMMETRIC
5921                  * strategy.
5922                  */
5923                 vm_map_t                local_map;
5924                 vm_object_t             object;
5925                 vm_object_offset_t      new_offset;
5926                 vm_prot_t               prot;
5927                 boolean_t               wired;
5928                 vm_map_version_t        version;
5929                 vm_map_t                real_map;
5930                 vm_prot_t               fault_type;
5931
5932                 local_map = map;
5933
5934                 if (caller_flags & UPL_COPYOUT_FROM) {
5935                         fault_type = VM_PROT_READ | VM_PROT_COPY;
5936                         vm_counters.create_upl_extra_cow++;
5937                         vm_counters.create_upl_extra_cow_pages +=
5938                                 (entry->vme_end - entry->vme_start) / PAGE_SIZE;
5939                 } else {
5940                         fault_type = VM_PROT_WRITE;
5941                 }
5942                 if (vm_map_lookup_locked(&local_map,
5943                                          offset, fault_type,
5944                                          OBJECT_LOCK_EXCLUSIVE,
5945                                          &version, &object,
5946                                          &new_offset, &prot, &wired,
5947                                          NULL,
5948                                          &real_map) != KERN_SUCCESS) {
5949                         if (fault_type == VM_PROT_WRITE) {
5950                                 vm_counters.create_upl_lookup_failure_write++;
5951                         } else {
5952                                 vm_counters.create_upl_lookup_failure_copy++;
5953                         }
5954                         vm_map_unlock_read(local_map);
5955                         return KERN_FAILURE;
5956                 }
5957                 if (real_map != map)
5958                         vm_map_unlock(real_map);
5959                 vm_map_unlock_read(local_map);
5960
5961                 vm_object_unlock(object);
5962
5963                 goto REDISCOVER_ENTRY;
5964         }
5965
5966         if (entry->is_sub_map) {
5967                 vm_map_t        submap;
5968
5969                 submap = VME_SUBMAP(entry);
5970                 local_start = entry->vme_start;
5971                 local_offset = VME_OFFSET(entry);
5972
5973                 vm_map_reference(submap);
5974                 vm_map_unlock_read(map);
5975
5976                 ret = vm_map_create_upl(submap,
5977                                         local_offset + (offset - local_start),
5978                                         upl_size, upl, page_list, count, flags, tag);
5979                 vm_map_deallocate(submap);
5980
5981                 return ret;
5982         }
5983
5984         if (sync_cow_data &&
5985             (VME_OBJECT(entry)->shadow ||
5986              VME_OBJECT(entry)->copy)) {
5987                 local_object = VME_OBJECT(entry);
5988                 local_start = entry->vme_start;
5989                 local_offset = VME_OFFSET(entry);
5990
5991                 vm_object_reference(local_object);
5992                 vm_map_unlock_read(map);
5993
5994                 if (local_object->shadow && local_object->copy) {
5995                         vm_object_lock_request(local_object->shadow,
5996                                                ((vm_object_offset_t)
5997                                                 ((offset - local_start) +
5998                                                  local_offset) +
5999                                                 local_object->vo_shadow_offset),
6000                                                *upl_size, FALSE,
6001                                                MEMORY_OBJECT_DATA_SYNC,
6002                                                VM_PROT_NO_CHANGE);
6003                 }
6004                 sync_cow_data = FALSE;
6005                 vm_object_deallocate(local_object);
6006
6007                 goto REDISCOVER_ENTRY;
6008         }
6009         if (force_data_sync) {
6010                 local_object = VME_OBJECT(entry);
6011                 local_start = entry->vme_start;
6012                 local_offset = VME_OFFSET(entry);
6013
6014                 vm_object_reference(local_object);
6015                 vm_map_unlock_read(map);
6016
6017                 vm_object_lock_request(local_object,
6018                                        ((vm_object_offset_t)
6019                                         ((offset - local_start) +
6020                                          local_offset)),
6021                                        (vm_object_size_t)*upl_size,
6022                                        FALSE,
6023                                        MEMORY_OBJECT_DATA_SYNC,
6024                                        VM_PROT_NO_CHANGE);
6025
6026                 force_data_sync = FALSE;
6027                 vm_object_deallocate(local_object);
6028
6029                 goto REDISCOVER_ENTRY;
6030         }
6031         if (VME_OBJECT(entry)->private)
6032                 *flags = UPL_DEV_MEMORY;
6033         else
6034                 *flags = 0;
6035
6036         if (VME_OBJECT(entry)->phys_contiguous)
6037                 *flags |= UPL_PHYS_CONTIG;
6038
6039         local_object = VME_OBJECT(entry);
6040         local_offset = VME_OFFSET(entry);
6041         local_start = entry->vme_start;
6042
6043 #if CONFIG_EMBEDDED
6044         /*
6045          * Wiring will copy the pages to the shadow object.
6046          * The shadow object will not be code-signed so
6047          * attempting to execute code from these copied pages
6048          * would trigger a code-signing violation.
6049          */
6050         if (entry->protection & VM_PROT_EXECUTE) {
6051 #if MACH_ASSERT
6052                 printf("pid %d[%s] create_upl out of executable range from "
6053                        "0x%llx to 0x%llx: side effects may include "
6054                        "code-signing violations later on\n",
6055                        proc_selfpid(),
6056                        (current_task()->bsd_info
6057                         ? proc_name_address(current_task()->bsd_info)
6058                         : "?"),
6059                        (uint64_t) entry->vme_start,
6060                        (uint64_t) entry->vme_end);
6061 #endif /* MACH_ASSERT */
6062                 DTRACE_VM2(cs_executable_create_upl,
6063                            uint64_t, (uint64_t)entry->vme_start,
6064                            uint64_t, (uint64_t)entry->vme_end);
6065                 cs_executable_create_upl++;
6066         }
6067 #endif /* CONFIG_EMBEDDED */
6068
6069         vm_object_lock(local_object);
6070
6071         /*
6072          * Ensure that this object is "true_share" and "copy_delay" now,
6073          * while we're still holding the VM map lock.  After we unlock the map,
6074          * anything could happen to that mapping, including some copy-on-write
6075          * activity.  We need to make sure that the IOPL will point at the
6076          * same memory as the mapping.
6077          */
6078         if (local_object->true_share) {
6079                 assert(local_object->copy_strategy !=
6080                        MEMORY_OBJECT_COPY_SYMMETRIC);
6081         } else if (local_object != kernel_object &&
6082                    local_object != compressor_object &&
6083                    !local_object->phys_contiguous) {
6084 #if VM_OBJECT_TRACKING_OP_TRUESHARE
6085                 if (!local_object->true_share &&
6086                     vm_object_tracking_inited) {
6087                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
6088                         int num = 0;
6089                         num = OSBacktrace(bt,
6090                                           VM_OBJECT_TRACKING_BTDEPTH);
6091                         btlog_add_entry(vm_object_tracking_btlog,
6092                                         local_object,
6093                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
6094                                         bt,
6095                                         num);
6096                 }
6097 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
6098                 local_object->true_share = TRUE;
6099                 if (local_object->copy_strategy ==
6100                     MEMORY_OBJECT_COPY_SYMMETRIC) {
6101                         local_object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
6102                 }
6103         }
6104
6105         vm_object_reference_locked(local_object);
6106         vm_object_unlock(local_object);
6107
6108         vm_map_unlock_read(map);
6109
6110         ret = vm_object_iopl_request(local_object,
6111                                      ((vm_object_offset_t)
6112                                       ((offset - local_start) + local_offset)),
6113                                      *upl_size,
6114                                      upl,
6115                                      page_list,
6116                                      count,
6117                                      caller_flags,
6118                                      tag);
6119         vm_object_deallocate(local_object);
6120
6121         return ret;
6122 }
6123
6124 /*
6125  * Internal routine to enter a UPL into a VM map.
6126  *
6127  * JMM - This should just be doable through the standard
6128  * vm_map_enter() API.
6129  */
6130 kern_return_t
6131 vm_map_enter_upl(
6132         vm_map_t                map,
6133         upl_t                   upl,
6134         vm_map_offset_t         *dst_addr)
6135 {
6136         vm_map_size_t           size;
6137         vm_object_offset_t      offset;
6138         vm_map_offset_t         addr;
6139         vm_page_t               m;
6140         kern_return_t           kr;
6141         int                     isVectorUPL = 0, curr_upl=0;
6142         upl_t                   vector_upl = NULL;
6143         vm_offset_t             vector_upl_dst_addr = 0;
6144         vm_map_t                vector_upl_submap = NULL;
6145         upl_offset_t            subupl_offset = 0;
6146         upl_size_t              subupl_size = 0;
6147
6148         if (upl == UPL_NULL)
6149                 return KERN_INVALID_ARGUMENT;
6150
6151         if((isVectorUPL = vector_upl_is_valid(upl))) {
6152                 int mapped=0,valid_upls=0;
6153                 vector_upl = upl;
6154
6155                 upl_lock(vector_upl);
6156                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6157                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6158                         if(upl == NULL)
6159                                 continue;
6160                         valid_upls++;
6161                         if (UPL_PAGE_LIST_MAPPED & upl->flags)
6162                                 mapped++;
6163                 }
6164
6165                 if(mapped) {
6166                         if(mapped != valid_upls)
6167                                 panic("Only %d of the %d sub-upls within the Vector UPL are alread mapped\n", mapped, valid_upls);
6168                         else {
6169                                 upl_unlock(vector_upl);
6170                                 return KERN_FAILURE;
6171                         }
6172                 }
6173
6174                 kr = kmem_suballoc(map, &vector_upl_dst_addr, vector_upl->size, FALSE,
6175                                     VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
6176                                     &vector_upl_submap);
6177                 if( kr != KERN_SUCCESS )
6178                         panic("Vector UPL submap allocation failed\n");
6179                 map = vector_upl_submap;
6180                 vector_upl_set_submap(vector_upl, vector_upl_submap, vector_upl_dst_addr);
6181                 curr_upl=0;
6182         }
6183         else
6184                 upl_lock(upl);
6185
6186 process_upl_to_enter:
6187         if(isVectorUPL){
6188                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6189                         *dst_addr = vector_upl_dst_addr;
6190                         upl_unlock(vector_upl);
6191                         return KERN_SUCCESS;
6192                 }
6193                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6194                 if(upl == NULL)
6195                         goto process_upl_to_enter;
6196
6197                 vector_upl_get_iostate(vector_upl, upl, &subupl_offset, &subupl_size);
6198                 *dst_addr = (vm_map_offset_t)(vector_upl_dst_addr + (vm_map_offset_t)subupl_offset);
6199         } else {
6200                 /*
6201                  * check to see if already mapped
6202                  */
6203                 if (UPL_PAGE_LIST_MAPPED & upl->flags) {
6204                         upl_unlock(upl);
6205                         return KERN_FAILURE;
6206                 }
6207         }
6208         if ((!(upl->flags & UPL_SHADOWED)) &&
6209             ((upl->flags & UPL_HAS_BUSY) ||
6210              !((upl->flags & (UPL_DEVICE_MEMORY | UPL_IO_WIRE)) || (upl->map_object->phys_contiguous)))) {
6211
6212                 vm_object_t             object;
6213                 vm_page_t               alias_page;
6214                 vm_object_offset_t      new_offset;
6215                 unsigned int            pg_num;
6216                 wpl_array_t             lite_list;
6217
6218                 if (upl->flags & UPL_INTERNAL) {
6219                         lite_list = (wpl_array_t)
6220                                 ((((uintptr_t)upl) + sizeof(struct upl))
6221                                  + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6222                 } else {
6223                         lite_list = (wpl_array_t)(((uintptr_t)upl) + sizeof(struct upl));
6224                 }
6225                 object = upl->map_object;
6226                 upl->map_object = vm_object_allocate(upl->size);
6227
6228                 vm_object_lock(upl->map_object);
6229
6230                 upl->map_object->shadow = object;
6231                 upl->map_object->pageout = TRUE;
6232                 upl->map_object->can_persist = FALSE;
6233                 upl->map_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
6234                 upl->map_object->vo_shadow_offset = upl->offset - object->paging_offset;
6235                 upl->map_object->wimg_bits = object->wimg_bits;
6236                 offset = upl->map_object->vo_shadow_offset;
6237                 new_offset = 0;
6238                 size = upl->size;
6239
6240                 upl->flags |= UPL_SHADOWED;
6241
6242                 while (size) {
6243                         pg_num = (unsigned int) (new_offset / PAGE_SIZE);
6244                         assert(pg_num == new_offset / PAGE_SIZE);
6245
6246                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6247
6248                                 VM_PAGE_GRAB_FICTITIOUS(alias_page);
6249
6250                                 vm_object_lock(object);
6251
6252                                 m = vm_page_lookup(object, offset);
6253                                 if (m == VM_PAGE_NULL) {
6254                                         panic("vm_upl_map: page missing\n");
6255                                 }
6256
6257                                 /*
6258                                  * Convert the fictitious page to a private
6259                                  * shadow of the real page.
6260                                  */
6261                                 assert(alias_page->vmp_fictitious);
6262                                 alias_page->vmp_fictitious = FALSE;
6263                                 alias_page->vmp_private = TRUE;
6264                                 alias_page->vmp_free_when_done = TRUE;
6265                                 /*
6266                                  * since m is a page in the upl it must
6267                                  * already be wired or BUSY, so it's
6268                                  * safe to assign the underlying physical
6269                                  * page to the alias
6270                                  */
6271                                 VM_PAGE_SET_PHYS_PAGE(alias_page, VM_PAGE_GET_PHYS_PAGE(m));
6272
6273                                 vm_object_unlock(object);
6274
6275                                 vm_page_lockspin_queues();
6276                                 vm_page_wire(alias_page, VM_KERN_MEMORY_NONE, TRUE);
6277                                 vm_page_unlock_queues();
6278
6279                                 vm_page_insert_wired(alias_page, upl->map_object, new_offset, VM_KERN_MEMORY_NONE);
6280
6281                                 assert(!alias_page->vmp_wanted);
6282                                 alias_page->vmp_busy = FALSE;
6283                                 alias_page->vmp_absent = FALSE;
6284                         }
6285                         size -= PAGE_SIZE;
6286                         offset += PAGE_SIZE_64;
6287                         new_offset += PAGE_SIZE_64;
6288                 }
6289                 vm_object_unlock(upl->map_object);
6290         }
6291         if (upl->flags & UPL_SHADOWED)
6292                 offset = 0;
6293         else
6294                 offset = upl->offset - upl->map_object->paging_offset;
6295
6296         size = upl->size;
6297
6298         vm_object_reference(upl->map_object);
6299
6300         if(!isVectorUPL) {
6301                 *dst_addr = 0;
6302                 /*
6303                 * NEED A UPL_MAP ALIAS
6304                 */
6305                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6306                                   VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6307                                   upl->map_object, offset, FALSE,
6308                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6309
6310                 if (kr != KERN_SUCCESS) {
6311                         vm_object_deallocate(upl->map_object);
6312                         upl_unlock(upl);
6313                         return(kr);
6314                 }
6315         }
6316         else {
6317                 kr = vm_map_enter(map, dst_addr, (vm_map_size_t)size, (vm_map_offset_t) 0,
6318                                   VM_FLAGS_FIXED, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_OSFMK,
6319                                   upl->map_object, offset, FALSE,
6320                                   VM_PROT_DEFAULT, VM_PROT_ALL, VM_INHERIT_DEFAULT);
6321                 if(kr)
6322                         panic("vm_map_enter failed for a Vector UPL\n");
6323         }
6324         vm_object_lock(upl->map_object);
6325
6326         for (addr = *dst_addr; size > 0; size -= PAGE_SIZE, addr += PAGE_SIZE) {
6327                 m = vm_page_lookup(upl->map_object, offset);
6328
6329                 if (m) {
6330                         m->vmp_pmapped = TRUE;
6331
6332                         /* CODE SIGNING ENFORCEMENT: page has been wpmapped,
6333                          * but only in kernel space. If this was on a user map,
6334                          * we'd have to set the wpmapped bit. */
6335                         /* m->vmp_wpmapped = TRUE; */
6336                         assert(map->pmap == kernel_pmap);
6337
6338                         PMAP_ENTER(map->pmap, addr, m, VM_PROT_DEFAULT, VM_PROT_NONE, 0, TRUE, kr);
6339
6340                         assert(kr == KERN_SUCCESS);
6341 #if KASAN
6342                         kasan_notify_address(addr, PAGE_SIZE_64);
6343 #endif
6344                 }
6345                 offset += PAGE_SIZE_64;
6346         }
6347         vm_object_unlock(upl->map_object);
6348
6349         /*
6350          * hold a reference for the mapping
6351          */
6352         upl->ref_count++;
6353         upl->flags |= UPL_PAGE_LIST_MAPPED;
6354         upl->kaddr = (vm_offset_t) *dst_addr;
6355         assert(upl->kaddr == *dst_addr);
6356
6357         if(isVectorUPL)
6358                 goto process_upl_to_enter;
6359
6360         upl_unlock(upl);
6361
6362         return KERN_SUCCESS;
6363 }
6364
6365 /*
6366  * Internal routine to remove a UPL mapping from a VM map.
6367  *
6368  * XXX - This should just be doable through a standard
6369  * vm_map_remove() operation.  Otherwise, implicit clean-up
6370  * of the target map won't be able to correctly remove
6371  * these (and release the reference on the UPL).  Having
6372  * to do this means we can't map these into user-space
6373  * maps yet.
6374  */
6375 kern_return_t
6376 vm_map_remove_upl(
6377         vm_map_t        map,
6378         upl_t           upl)
6379 {
6380         vm_address_t    addr;
6381         upl_size_t      size;
6382         int             isVectorUPL = 0, curr_upl = 0;
6383         upl_t           vector_upl = NULL;
6384
6385         if (upl == UPL_NULL)
6386                 return KERN_INVALID_ARGUMENT;
6387
6388         if((isVectorUPL = vector_upl_is_valid(upl))) {
6389                 int     unmapped=0, valid_upls=0;
6390                 vector_upl = upl;
6391                 upl_lock(vector_upl);
6392                 for(curr_upl=0; curr_upl < MAX_VECTOR_UPL_ELEMENTS; curr_upl++) {
6393                         upl =  vector_upl_subupl_byindex(vector_upl, curr_upl );
6394                         if(upl == NULL)
6395                                 continue;
6396                         valid_upls++;
6397                         if (!(UPL_PAGE_LIST_MAPPED & upl->flags))
6398                                 unmapped++;
6399                 }
6400
6401                 if(unmapped) {
6402                         if(unmapped != valid_upls)
6403                                 panic("%d of the %d sub-upls within the Vector UPL is/are not mapped\n", unmapped, valid_upls);
6404                         else {
6405                                 upl_unlock(vector_upl);
6406                                 return KERN_FAILURE;
6407                         }
6408                 }
6409                 curr_upl=0;
6410         }
6411         else
6412                 upl_lock(upl);
6413
6414 process_upl_to_remove:
6415         if(isVectorUPL) {
6416                 if(curr_upl == MAX_VECTOR_UPL_ELEMENTS) {
6417                         vm_map_t v_upl_submap;
6418                         vm_offset_t v_upl_submap_dst_addr;
6419                         vector_upl_get_submap(vector_upl, &v_upl_submap, &v_upl_submap_dst_addr);
6420
6421                         vm_map_remove(map, v_upl_submap_dst_addr, v_upl_submap_dst_addr + vector_upl->size, VM_MAP_REMOVE_NO_FLAGS);
6422                         vm_map_deallocate(v_upl_submap);
6423                         upl_unlock(vector_upl);
6424                         return KERN_SUCCESS;
6425                 }
6426
6427                 upl =  vector_upl_subupl_byindex(vector_upl, curr_upl++ );
6428                 if(upl == NULL)
6429                         goto process_upl_to_remove;
6430         }
6431
6432         if (upl->flags & UPL_PAGE_LIST_MAPPED) {
6433                 addr = upl->kaddr;
6434                 size = upl->size;
6435
6436                 assert(upl->ref_count > 1);
6437                 upl->ref_count--;               /* removing mapping ref */
6438
6439                 upl->flags &= ~UPL_PAGE_LIST_MAPPED;
6440                 upl->kaddr = (vm_offset_t) 0;
6441
6442                 if(!isVectorUPL) {
6443                         upl_unlock(upl);
6444
6445                         vm_map_remove(
6446                                 map,
6447                                 vm_map_trunc_page(addr,
6448                                                   VM_MAP_PAGE_MASK(map)),
6449                                 vm_map_round_page(addr + size,
6450                                                   VM_MAP_PAGE_MASK(map)),
6451                                 VM_MAP_REMOVE_NO_FLAGS);
6452                         return KERN_SUCCESS;
6453                 }
6454                 else {
6455                         /*
6456                         * If it's a Vectored UPL, we'll be removing the entire
6457                         * submap anyways, so no need to remove individual UPL
6458                         * element mappings from within the submap
6459                         */
6460                         goto process_upl_to_remove;
6461                 }
6462         }
6463         upl_unlock(upl);
6464
6465         return KERN_FAILURE;
6466 }
6467
6468
6469 kern_return_t
6470 upl_commit_range(
6471         upl_t                   upl,
6472         upl_offset_t            offset,
6473         upl_size_t              size,
6474         int                     flags,
6475         upl_page_info_t         *page_list,
6476         mach_msg_type_number_t  count,
6477         boolean_t               *empty)
6478 {
6479         upl_size_t              xfer_size, subupl_size = size;
6480         vm_object_t             shadow_object;
6481         vm_object_t             object;
6482         vm_object_t             m_object;
6483         vm_object_offset_t      target_offset;
6484         upl_offset_t            subupl_offset = offset;
6485         int                     entry;
6486         wpl_array_t             lite_list;
6487         int                     occupied;
6488         int                     clear_refmod = 0;
6489         int                     pgpgout_count = 0;
6490         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
6491         struct  vm_page_delayed_work    *dwp;
6492         int                     dw_count;
6493         int                     dw_limit;
6494         int                     isVectorUPL = 0;
6495         upl_t                   vector_upl = NULL;
6496         boolean_t               should_be_throttled = FALSE;
6497
6498         vm_page_t               nxt_page = VM_PAGE_NULL;
6499         int                     fast_path_possible = 0;
6500         int                     fast_path_full_commit = 0;
6501         int                     throttle_page = 0;
6502         int                     unwired_count = 0;
6503         int                     local_queue_count = 0;
6504         vm_page_t               first_local, last_local;
6505
6506         *empty = FALSE;
6507
6508         if (upl == UPL_NULL)
6509                 return KERN_INVALID_ARGUMENT;
6510
6511         if (count == 0)
6512                 page_list = NULL;
6513
6514         if((isVectorUPL = vector_upl_is_valid(upl))) {
6515                 vector_upl = upl;
6516                 upl_lock(vector_upl);
6517         }
6518         else
6519                 upl_lock(upl);
6520
6521 process_upl_to_commit:
6522
6523         if(isVectorUPL) {
6524                 size = subupl_size;
6525                 offset = subupl_offset;
6526                 if(size == 0) {
6527                         upl_unlock(vector_upl);
6528                         return KERN_SUCCESS;
6529                 }
6530                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
6531                 if(upl == NULL) {
6532                         upl_unlock(vector_upl);
6533                         return KERN_FAILURE;
6534                 }
6535                 page_list = UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(upl);
6536                 subupl_size -= size;
6537                 subupl_offset += size;
6538         }
6539
6540 #if UPL_DEBUG
6541         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
6542                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
6543
6544                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
6545                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
6546
6547                 upl->upl_commit_index++;
6548         }
6549 #endif
6550         if (upl->flags & UPL_DEVICE_MEMORY)
6551                 xfer_size = 0;
6552         else if ((offset + size) <= upl->size)
6553                 xfer_size = size;
6554         else {
6555                 if(!isVectorUPL)
6556                         upl_unlock(upl);
6557                 else {
6558                         upl_unlock(vector_upl);
6559                 }
6560                 return KERN_FAILURE;
6561         }
6562         if (upl->flags & UPL_SET_DIRTY)
6563                 flags |= UPL_COMMIT_SET_DIRTY;
6564         if (upl->flags & UPL_CLEAR_DIRTY)
6565                 flags |= UPL_COMMIT_CLEAR_DIRTY;
6566
6567         if (upl->flags & UPL_INTERNAL)
6568                 lite_list = (wpl_array_t) ((((uintptr_t)upl) + sizeof(struct upl))
6569                                            + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
6570         else
6571                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
6572
6573         object = upl->map_object;
6574
6575         if (upl->flags & UPL_SHADOWED) {
6576                 vm_object_lock(object);
6577                 shadow_object = object->shadow;
6578         } else {
6579                 shadow_object = object;
6580         }
6581         entry = offset/PAGE_SIZE;
6582         target_offset = (vm_object_offset_t)offset;
6583
6584         assert(!(target_offset & PAGE_MASK));
6585         assert(!(xfer_size & PAGE_MASK));
6586
6587         if (upl->flags & UPL_KERNEL_OBJECT)
6588                 vm_object_lock_shared(shadow_object);
6589         else
6590                 vm_object_lock(shadow_object);
6591
6592         VM_OBJECT_WIRED_PAGE_UPDATE_START(shadow_object);
6593
6594         if (upl->flags & UPL_ACCESS_BLOCKED) {
6595                 assert(shadow_object->blocked_access);
6596                 shadow_object->blocked_access = FALSE;
6597                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
6598         }
6599
6600         if (shadow_object->code_signed) {
6601                 /*
6602                  * CODE SIGNING:
6603                  * If the object is code-signed, do not let this UPL tell
6604                  * us if the pages are valid or not.  Let the pages be
6605                  * validated by VM the normal way (when they get mapped or
6606                  * copied).
6607                  */
6608                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6609         }
6610         if (! page_list) {
6611                 /*
6612                  * No page list to get the code-signing info from !?
6613                  */
6614                 flags &= ~UPL_COMMIT_CS_VALIDATED;
6615         }
6616         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal)
6617                 should_be_throttled = TRUE;
6618
6619         dwp = &dw_array[0];
6620         dw_count = 0;
6621         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
6622
6623         if ((upl->flags & UPL_IO_WIRE) &&
6624             !(flags & UPL_COMMIT_FREE_ABSENT) &&
6625             !isVectorUPL &&
6626             shadow_object->purgable != VM_PURGABLE_VOLATILE &&
6627             shadow_object->purgable != VM_PURGABLE_EMPTY) {
6628
6629                 if (!vm_page_queue_empty(&shadow_object->memq)) {
6630
6631                         if (size == shadow_object->vo_size) {
6632                                 nxt_page = (vm_page_t)vm_page_queue_first(&shadow_object->memq);
6633                                 fast_path_full_commit = 1;
6634                         }
6635                         fast_path_possible = 1;
6636
6637                         if (!VM_DYNAMIC_PAGING_ENABLED() && shadow_object->internal &&
6638                             (shadow_object->purgable == VM_PURGABLE_DENY ||
6639                              shadow_object->purgable == VM_PURGABLE_NONVOLATILE ||
6640                              shadow_object->purgable == VM_PURGABLE_VOLATILE)) {
6641                                 throttle_page = 1;
6642                         }
6643                 }
6644         }
6645         first_local = VM_PAGE_NULL;
6646         last_local = VM_PAGE_NULL;
6647
6648         while (xfer_size) {
6649                 vm_page_t       t, m;
6650
6651                 dwp->dw_mask = 0;
6652                 clear_refmod = 0;
6653
6654                 m = VM_PAGE_NULL;
6655
6656                 if (upl->flags & UPL_LITE) {
6657                         unsigned int    pg_num;
6658
6659                         if (nxt_page != VM_PAGE_NULL) {
6660                                 m = nxt_page;
6661                                 nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
6662                                 target_offset = m->vmp_offset;
6663                         }
6664                         pg_num = (unsigned int) (target_offset/PAGE_SIZE);
6665                         assert(pg_num == target_offset/PAGE_SIZE);
6666
6667                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
6668                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
6669
6670                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6671                                         m = vm_page_lookup(shadow_object, target_offset + (upl->offset - shadow_object->paging_offset));
6672                         } else
6673                                 m = NULL;
6674                 }
6675                 if (upl->flags & UPL_SHADOWED) {
6676                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
6677
6678                                 t->vmp_free_when_done = FALSE;
6679
6680                                 VM_PAGE_FREE(t);
6681
6682                                 if (!(upl->flags & UPL_KERNEL_OBJECT) && m == VM_PAGE_NULL)
6683                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
6684                         }
6685                 }
6686                 if (m == VM_PAGE_NULL)
6687                         goto commit_next_page;
6688
6689                 m_object = VM_PAGE_OBJECT(m);
6690
6691                 if (m->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
6692                         assert(m->vmp_busy);
6693
6694                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6695                         goto commit_next_page;
6696                 }
6697
6698                 if (flags & UPL_COMMIT_CS_VALIDATED) {
6699                         /*
6700                          * CODE SIGNING:
6701                          * Set the code signing bits according to
6702                          * what the UPL says they should be.
6703                          */
6704                         m->vmp_cs_validated = page_list[entry].cs_validated;
6705                         m->vmp_cs_tainted = page_list[entry].cs_tainted;
6706                         m->vmp_cs_nx = page_list[entry].cs_nx;
6707                 }
6708                 if (flags & UPL_COMMIT_WRITTEN_BY_KERNEL)
6709                         m->vmp_written_by_kernel = TRUE;
6710
6711                 if (upl->flags & UPL_IO_WIRE) {
6712
6713                         if (page_list)
6714                                 page_list[entry].phys_addr = 0;
6715
6716                         if (flags & UPL_COMMIT_SET_DIRTY) {
6717                                 SET_PAGE_DIRTY(m, FALSE);
6718                         } else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6719                                 m->vmp_dirty = FALSE;
6720
6721                                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6722                                     m->vmp_cs_validated && !m->vmp_cs_tainted) {
6723                                         /*
6724                                          * CODE SIGNING:
6725                                          * This page is no longer dirty
6726                                          * but could have been modified,
6727                                          * so it will need to be
6728                                          * re-validated.
6729                                          */
6730                                         m->vmp_cs_validated = FALSE;
6731
6732                                         VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6733
6734                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6735                                 }
6736                                 clear_refmod |= VM_MEM_MODIFIED;
6737                         }
6738                         if (upl->flags & UPL_ACCESS_BLOCKED) {
6739                                 /*
6740                                  * We blocked access to the pages in this UPL.
6741                                  * Clear the "busy" bit and wake up any waiter
6742                                  * for this page.
6743                                  */
6744                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6745                         }
6746                         if (fast_path_possible) {
6747                                 assert(m_object->purgable != VM_PURGABLE_EMPTY);
6748                                 assert(m_object->purgable != VM_PURGABLE_VOLATILE);
6749                                 if (m->vmp_absent) {
6750                                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
6751                                         assert(m->vmp_wire_count == 0);
6752                                         assert(m->vmp_busy);
6753
6754                                         m->vmp_absent = FALSE;
6755                                         dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6756                                 } else {
6757                                         if (m->vmp_wire_count == 0)
6758                                                 panic("wire_count == 0, m = %p, obj = %p\n", m, shadow_object);
6759                                         assert(m->vmp_q_state == VM_PAGE_IS_WIRED);
6760
6761                                         /*
6762                                          * XXX FBDP need to update some other
6763                                          * counters here (purgeable_wired_count)
6764                                          * (ledgers), ...
6765                                          */
6766                                         assert(m->vmp_wire_count > 0);
6767                                         m->vmp_wire_count--;
6768
6769                                         if (m->vmp_wire_count == 0) {
6770                                                 m->vmp_q_state = VM_PAGE_NOT_ON_Q;
6771                                                 unwired_count++;
6772                                         }
6773                                 }
6774                                 if (m->vmp_wire_count == 0) {
6775                                         assert(m->vmp_pageq.next == 0 && m->vmp_pageq.prev == 0);
6776
6777                                         if (last_local == VM_PAGE_NULL) {
6778                                                 assert(first_local == VM_PAGE_NULL);
6779
6780                                                 last_local = m;
6781                                                 first_local = m;
6782                                         } else {
6783                                                 assert(first_local != VM_PAGE_NULL);
6784
6785                                                 m->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
6786                                                 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(m);
6787                                                 first_local = m;
6788                                         }
6789                                         local_queue_count++;
6790
6791                                         if (throttle_page) {
6792                                                 m->vmp_q_state = VM_PAGE_ON_THROTTLED_Q;
6793                                         } else {
6794                                                 if (flags & UPL_COMMIT_INACTIVATE) {
6795                                                         if (shadow_object->internal)
6796                                                                 m->vmp_q_state = VM_PAGE_ON_INACTIVE_INTERNAL_Q;
6797                                                         else
6798                                                                 m->vmp_q_state = VM_PAGE_ON_INACTIVE_EXTERNAL_Q;
6799                                                 } else
6800                                                         m->vmp_q_state = VM_PAGE_ON_ACTIVE_Q;
6801                                         }
6802                                 }
6803                         } else {
6804                                 if (flags & UPL_COMMIT_INACTIVATE) {
6805                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6806                                         clear_refmod |= VM_MEM_REFERENCED;
6807                                 }
6808                                 if (m->vmp_absent) {
6809                                         if (flags & UPL_COMMIT_FREE_ABSENT)
6810                                                 dwp->dw_mask |= DW_vm_page_free;
6811                                         else {
6812                                                 m->vmp_absent = FALSE;
6813                                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
6814
6815                                                 if ( !(dwp->dw_mask & DW_vm_page_deactivate_internal))
6816                                                         dwp->dw_mask |= DW_vm_page_activate;
6817                                         }
6818                                 } else
6819                                         dwp->dw_mask |= DW_vm_page_unwire;
6820                         }
6821                         goto commit_next_page;
6822                 }
6823                 assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
6824
6825                 if (page_list)
6826                         page_list[entry].phys_addr = 0;
6827
6828                 /*
6829                  * make sure to clear the hardware
6830                  * modify or reference bits before
6831                  * releasing the BUSY bit on this page
6832                  * otherwise we risk losing a legitimate
6833                  * change of state
6834                  */
6835                 if (flags & UPL_COMMIT_CLEAR_DIRTY) {
6836                         m->vmp_dirty = FALSE;
6837
6838                         clear_refmod |= VM_MEM_MODIFIED;
6839                 }
6840                 if (m->vmp_laundry)
6841                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
6842
6843                 if (VM_PAGE_WIRED(m))
6844                         m->vmp_free_when_done = FALSE;
6845
6846                 if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
6847                     m->vmp_cs_validated && !m->vmp_cs_tainted) {
6848                         /*
6849                          * CODE SIGNING:
6850                          * This page is no longer dirty
6851                          * but could have been modified,
6852                          * so it will need to be
6853                          * re-validated.
6854                          */
6855                         m->vmp_cs_validated = FALSE;
6856
6857                         VM_PAGEOUT_DEBUG(vm_cs_validated_resets, 1);
6858
6859                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
6860                 }
6861                 if (m->vmp_overwriting) {
6862                         /*
6863                          * the (COPY_OUT_FROM == FALSE) request_page_list case
6864                          */
6865                         if (m->vmp_busy) {
6866 #if CONFIG_PHANTOM_CACHE
6867                                 if (m->vmp_absent && !m_object->internal)
6868                                         dwp->dw_mask |= DW_vm_phantom_cache_update;
6869 #endif
6870                                 m->vmp_absent = FALSE;
6871
6872                                 dwp->dw_mask |= DW_clear_busy;
6873                         } else {
6874                                 /*
6875                                  * alternate (COPY_OUT_FROM == FALSE) page_list case
6876                                  * Occurs when the original page was wired
6877                                  * at the time of the list request
6878                                  */
6879                                 assert(VM_PAGE_WIRED(m));
6880
6881                                 dwp->dw_mask |= DW_vm_page_unwire; /* reactivates */
6882                         }
6883                         m->vmp_overwriting = FALSE;
6884                 }
6885                 m->vmp_cleaning = FALSE;
6886
6887                 if (m->vmp_free_when_done) {
6888                         /*
6889                          * With the clean queue enabled, UPL_PAGEOUT should
6890                          * no longer set the pageout bit. It's pages now go
6891                          * to the clean queue.
6892                          */
6893                         assert(!(flags & UPL_PAGEOUT));
6894                         assert(!m_object->internal);
6895
6896                         m->vmp_free_when_done = FALSE;
6897
6898                         if ((flags & UPL_COMMIT_SET_DIRTY) ||
6899                             (m->vmp_pmapped && (pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m)) & VM_MEM_MODIFIED))) {
6900                                 /*
6901                                  * page was re-dirtied after we started
6902                                  * the pageout... reactivate it since
6903                                  * we don't know whether the on-disk
6904                                  * copy matches what is now in memory
6905                                  */
6906                                 SET_PAGE_DIRTY(m, FALSE);
6907
6908                                 dwp->dw_mask |= DW_vm_page_activate | DW_PAGE_WAKEUP;
6909
6910                                 if (upl->flags & UPL_PAGEOUT) {
6911                                         VM_STAT_INCR(reactivations);
6912                                         DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
6913                                 }
6914                         } else {
6915                                 /*
6916                                  * page has been successfully cleaned
6917                                  * go ahead and free it for other use
6918                                  */
6919                                 if (m_object->internal) {
6920                                         DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
6921                                 } else {
6922                                         DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
6923                                 }
6924                                 m->vmp_dirty = FALSE;
6925                                 m->vmp_busy = TRUE;
6926
6927                                 dwp->dw_mask |= DW_vm_page_free;
6928                         }
6929                         goto commit_next_page;
6930                 }
6931                 /*
6932                  * It is a part of the semantic of COPYOUT_FROM
6933                  * UPLs that a commit implies cache sync
6934                  * between the vm page and the backing store
6935                  * this can be used to strip the precious bit
6936                  * as well as clean
6937                  */
6938                 if ((upl->flags & UPL_PAGE_SYNC_DONE) || (flags & UPL_COMMIT_CLEAR_PRECIOUS))
6939                         m->vmp_precious = FALSE;
6940
6941                 if (flags & UPL_COMMIT_SET_DIRTY) {
6942                         SET_PAGE_DIRTY(m, FALSE);
6943                 } else {
6944                         m->vmp_dirty = FALSE;
6945                 }
6946
6947                 /* with the clean queue on, move *all* cleaned pages to the clean queue */
6948                 if (hibernate_cleaning_in_progress == FALSE && !m->vmp_dirty && (upl->flags & UPL_PAGEOUT)) {
6949                         pgpgout_count++;
6950
6951                         VM_STAT_INCR(pageouts);
6952                         DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
6953
6954                         dwp->dw_mask |= DW_enqueue_cleaned;
6955                 } else if (should_be_throttled == TRUE && (m->vmp_q_state == VM_PAGE_NOT_ON_Q)) {
6956                         /*
6957                          * page coming back in from being 'frozen'...
6958                          * it was dirty before it was frozen, so keep it so
6959                          * the vm_page_activate will notice that it really belongs
6960                          * on the throttle queue and put it there
6961                          */
6962                         SET_PAGE_DIRTY(m, FALSE);
6963                         dwp->dw_mask |= DW_vm_page_activate;
6964
6965                 } else {
6966                         if ((flags & UPL_COMMIT_INACTIVATE) && !m->vmp_clustered && (m->vmp_q_state != VM_PAGE_ON_SPECULATIVE_Q)) {
6967                                 dwp->dw_mask |= DW_vm_page_deactivate_internal;
6968                                 clear_refmod |= VM_MEM_REFERENCED;
6969                         } else if ( !VM_PAGE_PAGEABLE(m)) {
6970
6971                                 if (m->vmp_clustered || (flags & UPL_COMMIT_SPECULATE))
6972                                         dwp->dw_mask |= DW_vm_page_speculate;
6973                                 else if (m->vmp_reference)
6974                                         dwp->dw_mask |= DW_vm_page_activate;
6975                                 else {
6976                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
6977                                         clear_refmod |= VM_MEM_REFERENCED;
6978                                 }
6979                         }
6980                 }
6981                 if (upl->flags & UPL_ACCESS_BLOCKED) {
6982                         /*
6983                          * We blocked access to the pages in this URL.
6984                          * Clear the "busy" bit on this page before we
6985                          * wake up any waiter.
6986                          */
6987                         dwp->dw_mask |= DW_clear_busy;
6988                 }
6989                 /*
6990                  * Wakeup any thread waiting for the page to be un-cleaning.
6991                  */
6992                 dwp->dw_mask |= DW_PAGE_WAKEUP;
6993
6994 commit_next_page:
6995                 if (clear_refmod)
6996                         pmap_clear_refmod(VM_PAGE_GET_PHYS_PAGE(m), clear_refmod);
6997
6998                 target_offset += PAGE_SIZE_64;
6999                 xfer_size -= PAGE_SIZE;
7000                 entry++;
7001
7002                 if (dwp->dw_mask) {
7003                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7004                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7005
7006                                 if (dw_count >= dw_limit) {
7007                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7008
7009                                         dwp = &dw_array[0];
7010                                         dw_count = 0;
7011                                 }
7012                         } else {
7013                                 if (dwp->dw_mask & DW_clear_busy)
7014                                         m->vmp_busy = FALSE;
7015
7016                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7017                                         PAGE_WAKEUP(m);
7018                         }
7019                 }
7020         }
7021         if (dw_count)
7022                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7023
7024         if (fast_path_possible) {
7025
7026                 assert(shadow_object->purgable != VM_PURGABLE_VOLATILE);
7027                 assert(shadow_object->purgable != VM_PURGABLE_EMPTY);
7028
7029                 if (local_queue_count || unwired_count) {
7030
7031                         if (local_queue_count) {
7032                                 vm_page_t       first_target;
7033                                 vm_page_queue_head_t    *target_queue;
7034
7035                                 if (throttle_page)
7036                                         target_queue = &vm_page_queue_throttled;
7037                                 else {
7038                                         if (flags & UPL_COMMIT_INACTIVATE) {
7039                                                 if (shadow_object->internal)
7040                                                         target_queue = &vm_page_queue_anonymous;
7041                                                 else
7042                                                         target_queue = &vm_page_queue_inactive;
7043                                         } else
7044                                                 target_queue = &vm_page_queue_active;
7045                                 }
7046                                 /*
7047                                  * Transfer the entire local queue to a regular LRU page queues.
7048                                  */
7049                                 vm_page_lockspin_queues();
7050
7051                                 first_target = (vm_page_t) vm_page_queue_first(target_queue);
7052
7053                                 if (vm_page_queue_empty(target_queue))
7054                                         target_queue->prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7055                                 else
7056                                         first_target->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(last_local);
7057
7058                                 target_queue->next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_local);
7059                                 first_local->vmp_pageq.prev = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(target_queue);
7060                                 last_local->vmp_pageq.next = VM_PAGE_CONVERT_TO_QUEUE_ENTRY(first_target);
7061
7062                                 /*
7063                                  * Adjust the global page counts.
7064                                  */
7065                                 if (throttle_page) {
7066                                         vm_page_throttled_count += local_queue_count;
7067                                 } else {
7068                                         if (flags & UPL_COMMIT_INACTIVATE) {
7069                                                 if (shadow_object->internal)
7070                                                         vm_page_anonymous_count += local_queue_count;
7071                                                 vm_page_inactive_count += local_queue_count;
7072
7073                                                 token_new_pagecount += local_queue_count;
7074                                         } else
7075                                                 vm_page_active_count += local_queue_count;
7076
7077                                         if (shadow_object->internal)
7078                                                 vm_page_pageable_internal_count += local_queue_count;
7079                                         else
7080                                                 vm_page_pageable_external_count += local_queue_count;
7081                                 }
7082                         } else {
7083                                 vm_page_lockspin_queues();
7084                         }
7085                         if (unwired_count) {
7086                                 vm_page_wire_count -= unwired_count;
7087                                 VM_CHECK_MEMORYSTATUS;
7088                         }
7089                         vm_page_unlock_queues();
7090
7091                         VM_OBJECT_WIRED_PAGE_COUNT(shadow_object, -unwired_count);
7092                 }
7093         }
7094         occupied = 1;
7095
7096         if (upl->flags & UPL_DEVICE_MEMORY)  {
7097                 occupied = 0;
7098         } else if (upl->flags & UPL_LITE) {
7099                 int     pg_num;
7100                 int     i;
7101
7102                 occupied = 0;
7103
7104                 if (!fast_path_full_commit) {
7105                         pg_num = upl->size/PAGE_SIZE;
7106                         pg_num = (pg_num + 31) >> 5;
7107
7108                         for (i = 0; i < pg_num; i++) {
7109                                 if (lite_list[i] != 0) {
7110                                         occupied = 1;
7111                                         break;
7112                                 }
7113                         }
7114                 }
7115         } else {
7116                 if (vm_page_queue_empty(&upl->map_object->memq))
7117                         occupied = 0;
7118         }
7119         if (occupied == 0) {
7120                 /*
7121                  * If this UPL element belongs to a Vector UPL and is
7122                  * empty, then this is the right function to deallocate
7123                  * it. So go ahead set the *empty variable. The flag
7124                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7125                  * should be considered relevant for the Vector UPL and not
7126                  * the internal UPLs.
7127                  */
7128                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7129                         *empty = TRUE;
7130
7131                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7132                         /*
7133                          * this is not a paging object
7134                          * so we need to drop the paging reference
7135                          * that was taken when we created the UPL
7136                          * against this object
7137                          */
7138                         vm_object_activity_end(shadow_object);
7139                         vm_object_collapse(shadow_object, 0, TRUE);
7140                 } else {
7141                          /*
7142                           * we dontated the paging reference to
7143                           * the map object... vm_pageout_object_terminate
7144                           * will drop this reference
7145                           */
7146                 }
7147         }
7148         VM_OBJECT_WIRED_PAGE_UPDATE_END(shadow_object, shadow_object->wire_tag);
7149         vm_object_unlock(shadow_object);
7150         if (object != shadow_object)
7151                 vm_object_unlock(object);
7152
7153         if(!isVectorUPL)
7154                 upl_unlock(upl);
7155         else {
7156                 /*
7157                  * If we completed our operations on an UPL that is
7158                  * part of a Vectored UPL and if empty is TRUE, then
7159                  * we should go ahead and deallocate this UPL element.
7160                  * Then we check if this was the last of the UPL elements
7161                  * within that Vectored UPL. If so, set empty to TRUE
7162                  * so that in ubc_upl_commit_range or ubc_upl_commit, we
7163                  * can go ahead and deallocate the Vector UPL too.
7164                  */
7165                 if(*empty==TRUE) {
7166                         *empty = vector_upl_set_subupl(vector_upl, upl, 0);
7167                         upl_deallocate(upl);
7168                 }
7169                 goto process_upl_to_commit;
7170         }
7171         if (pgpgout_count) {
7172                 DTRACE_VM2(pgpgout, int, pgpgout_count, (uint64_t *), NULL);
7173         }
7174
7175         return KERN_SUCCESS;
7176 }
7177
7178 kern_return_t
7179 upl_abort_range(
7180         upl_t                   upl,
7181         upl_offset_t            offset,
7182         upl_size_t              size,
7183         int                     error,
7184         boolean_t               *empty)
7185 {
7186         upl_page_info_t         *user_page_list = NULL;
7187         upl_size_t              xfer_size, subupl_size = size;
7188         vm_object_t             shadow_object;
7189         vm_object_t             object;
7190         vm_object_offset_t      target_offset;
7191         upl_offset_t            subupl_offset = offset;
7192         int                     entry;
7193         wpl_array_t             lite_list;
7194         int                     occupied;
7195         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7196         struct  vm_page_delayed_work    *dwp;
7197         int                     dw_count;
7198         int                     dw_limit;
7199         int                     isVectorUPL = 0;
7200         upl_t                   vector_upl = NULL;
7201
7202         *empty = FALSE;
7203
7204         if (upl == UPL_NULL)
7205                 return KERN_INVALID_ARGUMENT;
7206
7207         if ( (upl->flags & UPL_IO_WIRE) && !(error & UPL_ABORT_DUMP_PAGES) )
7208                 return upl_commit_range(upl, offset, size, UPL_COMMIT_FREE_ABSENT, NULL, 0, empty);
7209
7210         if((isVectorUPL = vector_upl_is_valid(upl))) {
7211                 vector_upl = upl;
7212                 upl_lock(vector_upl);
7213         }
7214         else
7215                 upl_lock(upl);
7216
7217 process_upl_to_abort:
7218         if(isVectorUPL) {
7219                 size = subupl_size;
7220                 offset = subupl_offset;
7221                 if(size == 0) {
7222                         upl_unlock(vector_upl);
7223                         return KERN_SUCCESS;
7224                 }
7225                 upl =  vector_upl_subupl_byoffset(vector_upl, &offset, &size);
7226                 if(upl == NULL) {
7227                         upl_unlock(vector_upl);
7228                         return KERN_FAILURE;
7229                 }
7230                 subupl_size -= size;
7231                 subupl_offset += size;
7232         }
7233
7234         *empty = FALSE;
7235
7236 #if UPL_DEBUG
7237         if (upl->upl_commit_index < UPL_DEBUG_COMMIT_RECORDS) {
7238                 (void) OSBacktrace(&upl->upl_commit_records[upl->upl_commit_index].c_retaddr[0], UPL_DEBUG_STACK_FRAMES);
7239
7240                 upl->upl_commit_records[upl->upl_commit_index].c_beg = offset;
7241                 upl->upl_commit_records[upl->upl_commit_index].c_end = (offset + size);
7242                 upl->upl_commit_records[upl->upl_commit_index].c_aborted = 1;
7243
7244                 upl->upl_commit_index++;
7245         }
7246 #endif
7247         if (upl->flags & UPL_DEVICE_MEMORY)
7248                 xfer_size = 0;
7249         else if ((offset + size) <= upl->size)
7250                 xfer_size = size;
7251         else {
7252                 if(!isVectorUPL)
7253                         upl_unlock(upl);
7254                 else {
7255                         upl_unlock(vector_upl);
7256                 }
7257
7258                 return KERN_FAILURE;
7259         }
7260         if (upl->flags & UPL_INTERNAL) {
7261                 lite_list = (wpl_array_t)
7262                         ((((uintptr_t)upl) + sizeof(struct upl))
7263                         + ((upl->size/PAGE_SIZE) * sizeof(upl_page_info_t)));
7264
7265                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
7266         } else {
7267                 lite_list = (wpl_array_t)
7268                         (((uintptr_t)upl) + sizeof(struct upl));
7269         }
7270         object = upl->map_object;
7271
7272         if (upl->flags & UPL_SHADOWED) {
7273                 vm_object_lock(object);
7274                 shadow_object = object->shadow;
7275         } else
7276                 shadow_object = object;
7277
7278         entry = offset/PAGE_SIZE;
7279         target_offset = (vm_object_offset_t)offset;
7280
7281         assert(!(target_offset & PAGE_MASK));
7282         assert(!(xfer_size & PAGE_MASK));
7283
7284         if (upl->flags & UPL_KERNEL_OBJECT)
7285                 vm_object_lock_shared(shadow_object);
7286         else
7287                 vm_object_lock(shadow_object);
7288
7289         if (upl->flags & UPL_ACCESS_BLOCKED) {
7290                 assert(shadow_object->blocked_access);
7291                 shadow_object->blocked_access = FALSE;
7292                 vm_object_wakeup(object, VM_OBJECT_EVENT_UNBLOCKED);
7293         }
7294
7295         dwp = &dw_array[0];
7296         dw_count = 0;
7297         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
7298
7299         if ((error & UPL_ABORT_DUMP_PAGES) && (upl->flags & UPL_KERNEL_OBJECT))
7300                 panic("upl_abort_range: kernel_object being DUMPED");
7301
7302         while (xfer_size) {
7303                 vm_page_t       t, m;
7304                 unsigned int    pg_num;
7305                 boolean_t       needed;
7306
7307                 pg_num = (unsigned int) (target_offset/PAGE_SIZE);
7308                 assert(pg_num == target_offset/PAGE_SIZE);
7309
7310                 needed = FALSE;
7311
7312                 if (user_page_list)
7313                         needed = user_page_list[pg_num].needed;
7314
7315                 dwp->dw_mask = 0;
7316                 m = VM_PAGE_NULL;
7317
7318                 if (upl->flags & UPL_LITE) {
7319
7320                         if (lite_list[pg_num>>5] & (1 << (pg_num & 31))) {
7321                                 lite_list[pg_num>>5] &= ~(1 << (pg_num & 31));
7322
7323                                 if ( !(upl->flags & UPL_KERNEL_OBJECT))
7324                                         m = vm_page_lookup(shadow_object, target_offset +
7325                                                            (upl->offset - shadow_object->paging_offset));
7326                         }
7327                 }
7328                 if (upl->flags & UPL_SHADOWED) {
7329                         if ((t = vm_page_lookup(object, target_offset)) != VM_PAGE_NULL) {
7330                                 t->vmp_free_when_done = FALSE;
7331
7332                                 VM_PAGE_FREE(t);
7333
7334                                 if (m == VM_PAGE_NULL)
7335                                         m = vm_page_lookup(shadow_object, target_offset + object->vo_shadow_offset);
7336                         }
7337                 }
7338                 if ((upl->flags & UPL_KERNEL_OBJECT))
7339                         goto abort_next_page;
7340
7341                 if (m != VM_PAGE_NULL) {
7342
7343                         assert(m->vmp_q_state != VM_PAGE_USED_BY_COMPRESSOR);
7344
7345                         if (m->vmp_absent) {
7346                                 boolean_t must_free = TRUE;
7347
7348                                 /*
7349                                  * COPYOUT = FALSE case
7350                                  * check for error conditions which must
7351                                  * be passed back to the pages customer
7352                                  */
7353                                 if (error & UPL_ABORT_RESTART) {
7354                                         m->vmp_restart = TRUE;
7355                                         m->vmp_absent = FALSE;
7356                                         m->vmp_unusual = TRUE;
7357                                         must_free = FALSE;
7358                                 } else if (error & UPL_ABORT_UNAVAILABLE) {
7359                                         m->vmp_restart = FALSE;
7360                                         m->vmp_unusual = TRUE;
7361                                         must_free = FALSE;
7362                                 } else if (error & UPL_ABORT_ERROR) {
7363                                         m->vmp_restart = FALSE;
7364                                         m->vmp_absent = FALSE;
7365                                         m->vmp_error = TRUE;
7366                                         m->vmp_unusual = TRUE;
7367                                         must_free = FALSE;
7368                                 }
7369                                 if (m->vmp_clustered && needed == FALSE) {
7370                                         /*
7371                                          * This page was a part of a speculative
7372                                          * read-ahead initiated by the kernel
7373                                          * itself.  No one is expecting this
7374                                          * page and no one will clean up its
7375                                          * error state if it ever becomes valid
7376                                          * in the future.
7377                                          * We have to free it here.
7378                                          */
7379                                         must_free = TRUE;
7380                                 }
7381                                 m->vmp_cleaning = FALSE;
7382
7383                                 if (m->vmp_overwriting && !m->vmp_busy) {
7384                                         /*
7385                                          * this shouldn't happen since
7386                                          * this is an 'absent' page, but
7387                                          * it doesn't hurt to check for
7388                                          * the 'alternate' method of
7389                                          * stabilizing the page...
7390                                          * we will mark 'busy' to be cleared
7391                                          * in the following code which will
7392                                          * take care of the primary stabilzation
7393                                          * method (i.e. setting 'busy' to TRUE)
7394                                          */
7395                                         dwp->dw_mask |= DW_vm_page_unwire;
7396                                 }
7397                                 m->vmp_overwriting = FALSE;
7398
7399                                 dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP);
7400
7401                                 if (must_free == TRUE)
7402                                         dwp->dw_mask |= DW_vm_page_free;
7403                                 else
7404                                         dwp->dw_mask |= DW_vm_page_activate;
7405                         } else {
7406                                 /*
7407                                  * Handle the trusted pager throttle.
7408                                  */
7409                                 if (m->vmp_laundry)
7410                                         dwp->dw_mask |= DW_vm_pageout_throttle_up;
7411
7412                                 if (upl->flags & UPL_ACCESS_BLOCKED) {
7413                                         /*
7414                                          * We blocked access to the pages in this UPL.
7415                                          * Clear the "busy" bit and wake up any waiter
7416                                          * for this page.
7417                                          */
7418                                         dwp->dw_mask |= DW_clear_busy;
7419                                 }
7420                                 if (m->vmp_overwriting) {
7421                                         if (m->vmp_busy)
7422                                                 dwp->dw_mask |= DW_clear_busy;
7423                                         else {
7424                                                 /*
7425                                                  * deal with the 'alternate' method
7426                                                  * of stabilizing the page...
7427                                                  * we will either free the page
7428                                                  * or mark 'busy' to be cleared
7429                                                  * in the following code which will
7430                                                  * take care of the primary stabilzation
7431                                                  * method (i.e. setting 'busy' to TRUE)
7432                                                  */
7433                                                 dwp->dw_mask |= DW_vm_page_unwire;
7434                                         }
7435                                         m->vmp_overwriting = FALSE;
7436                                 }
7437                                 m->vmp_free_when_done = FALSE;
7438                                 m->vmp_cleaning = FALSE;
7439
7440                                 if (error & UPL_ABORT_DUMP_PAGES) {
7441                                         pmap_disconnect(VM_PAGE_GET_PHYS_PAGE(m));
7442
7443                                         dwp->dw_mask |= DW_vm_page_free;
7444                                 } else {
7445                                         if (!(dwp->dw_mask & DW_vm_page_unwire)) {
7446                                                 if (error & UPL_ABORT_REFERENCE) {
7447                                                         /*
7448                                                          * we've been told to explictly
7449                                                          * reference this page... for
7450                                                          * file I/O, this is done by
7451                                                          * implementing an LRU on the inactive q
7452                                                          */
7453                                                         dwp->dw_mask |= DW_vm_page_lru;
7454
7455                                                 } else if ( !VM_PAGE_PAGEABLE(m))
7456                                                         dwp->dw_mask |= DW_vm_page_deactivate_internal;
7457                                         }
7458                                         dwp->dw_mask |= DW_PAGE_WAKEUP;
7459                                 }
7460                         }
7461                 }
7462 abort_next_page:
7463                 target_offset += PAGE_SIZE_64;
7464                 xfer_size -= PAGE_SIZE;
7465                 entry++;
7466
7467                 if (dwp->dw_mask) {
7468                         if (dwp->dw_mask & ~(DW_clear_busy | DW_PAGE_WAKEUP)) {
7469                                 VM_PAGE_ADD_DELAYED_WORK(dwp, m, dw_count);
7470
7471                                 if (dw_count >= dw_limit) {
7472                                         vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7473
7474                                         dwp = &dw_array[0];
7475                                         dw_count = 0;
7476                                 }
7477                         } else {
7478                                 if (dwp->dw_mask & DW_clear_busy)
7479                                         m->vmp_busy = FALSE;
7480
7481                                 if (dwp->dw_mask & DW_PAGE_WAKEUP)
7482                                         PAGE_WAKEUP(m);
7483                         }
7484                 }
7485         }
7486         if (dw_count)
7487                 vm_page_do_delayed_work(shadow_object, VM_KERN_MEMORY_NONE, &dw_array[0], dw_count);
7488
7489         occupied = 1;
7490
7491         if (upl->flags & UPL_DEVICE_MEMORY)  {
7492                 occupied = 0;
7493         } else if (upl->flags & UPL_LITE) {
7494                 int     pg_num;
7495                 int     i;
7496
7497                 pg_num = upl->size/PAGE_SIZE;
7498                 pg_num = (pg_num + 31) >> 5;
7499                 occupied = 0;
7500
7501                 for (i = 0; i < pg_num; i++) {
7502                         if (lite_list[i] != 0) {
7503                                 occupied = 1;
7504                                 break;
7505                         }
7506                 }
7507         } else {
7508                 if (vm_page_queue_empty(&upl->map_object->memq))
7509                         occupied = 0;
7510         }
7511         if (occupied == 0) {
7512                 /*
7513                  * If this UPL element belongs to a Vector UPL and is
7514                  * empty, then this is the right function to deallocate
7515                  * it. So go ahead set the *empty variable. The flag
7516                  * UPL_COMMIT_NOTIFY_EMPTY, from the caller's point of view
7517                  * should be considered relevant for the Vector UPL and
7518                  * not the internal UPLs.
7519                  */
7520                 if ((upl->flags & UPL_COMMIT_NOTIFY_EMPTY) || isVectorUPL)
7521                         *empty = TRUE;
7522
7523                 if (object == shadow_object && !(upl->flags & UPL_KERNEL_OBJECT)) {
7524                         /*
7525                          * this is not a paging object
7526                          * so we need to drop the paging reference
7527                          * that was taken when we created the UPL
7528                          * against this object
7529                          */
7530                         vm_object_activity_end(shadow_object);
7531                         vm_object_collapse(shadow_object, 0, TRUE);
7532                 } else {
7533                          /*
7534                           * we dontated the paging reference to
7535                           * the map object... vm_pageout_object_terminate
7536                           * will drop this reference
7537                           */
7538                 }
7539         }
7540         vm_object_unlock(shadow_object);
7541         if (object != shadow_object)
7542                 vm_object_unlock(object);
7543
7544         if(!isVectorUPL)
7545                 upl_unlock(upl);
7546         else {
7547                 /*
7548                 * If we completed our operations on an UPL that is
7549                 * part of a Vectored UPL and if empty is TRUE, then
7550                 * we should go ahead and deallocate this UPL element.
7551                 * Then we check if this was the last of the UPL elements
7552                 * within that Vectored UPL. If so, set empty to TRUE
7553                 * so that in ubc_upl_abort_range or ubc_upl_abort, we
7554                 * can go ahead and deallocate the Vector UPL too.
7555                 */
7556                 if(*empty == TRUE) {
7557                         *empty = vector_upl_set_subupl(vector_upl, upl,0);
7558                         upl_deallocate(upl);
7559                 }
7560                 goto process_upl_to_abort;
7561         }
7562
7563         return KERN_SUCCESS;
7564 }
7565
7566
7567 kern_return_t
7568 upl_abort(
7569         upl_t   upl,
7570         int     error)
7571 {
7572         boolean_t       empty;
7573
7574         if (upl == UPL_NULL)
7575                 return KERN_INVALID_ARGUMENT;
7576
7577         return upl_abort_range(upl, 0, upl->size, error, &empty);
7578 }
7579
7580
7581 /* an option on commit should be wire */
7582 kern_return_t
7583 upl_commit(
7584         upl_t                   upl,
7585         upl_page_info_t         *page_list,
7586         mach_msg_type_number_t  count)
7587 {
7588         boolean_t       empty;
7589
7590         if (upl == UPL_NULL)
7591                 return KERN_INVALID_ARGUMENT;
7592
7593         return upl_commit_range(upl, 0, upl->size, 0, page_list, count, &empty);
7594 }
7595
7596
7597 void
7598 iopl_valid_data(
7599         upl_t    upl,
7600         vm_tag_t tag)
7601 {
7602         vm_object_t     object;
7603         vm_offset_t     offset;
7604         vm_page_t       m, nxt_page = VM_PAGE_NULL;
7605         upl_size_t      size;
7606         int             wired_count = 0;
7607
7608         if (upl == NULL)
7609                 panic("iopl_valid_data: NULL upl");
7610         if (vector_upl_is_valid(upl))
7611                 panic("iopl_valid_data: vector upl");
7612         if ((upl->flags & (UPL_DEVICE_MEMORY|UPL_SHADOWED|UPL_ACCESS_BLOCKED|UPL_IO_WIRE|UPL_INTERNAL)) != UPL_IO_WIRE)
7613                 panic("iopl_valid_data: unsupported upl, flags = %x", upl->flags);
7614
7615         object = upl->map_object;
7616
7617         if (object == kernel_object || object == compressor_object)
7618                 panic("iopl_valid_data: object == kernel or compressor");
7619
7620         if (object->purgable == VM_PURGABLE_VOLATILE ||
7621             object->purgable == VM_PURGABLE_EMPTY)
7622                 panic("iopl_valid_data: object %p purgable %d",
7623                       object, object->purgable);
7624
7625         size = upl->size;
7626
7627         vm_object_lock(object);
7628         VM_OBJECT_WIRED_PAGE_UPDATE_START(object);
7629
7630         if (object->vo_size == size && object->resident_page_count == (size / PAGE_SIZE))
7631                 nxt_page = (vm_page_t)vm_page_queue_first(&object->memq);
7632         else
7633                 offset = 0 + upl->offset - object->paging_offset;
7634
7635         while (size) {
7636
7637                 if (nxt_page != VM_PAGE_NULL) {
7638                         m = nxt_page;
7639                         nxt_page = (vm_page_t)vm_page_queue_next(&nxt_page->vmp_listq);
7640                 } else {
7641                         m = vm_page_lookup(object, offset);
7642                         offset += PAGE_SIZE;
7643
7644                         if (m == VM_PAGE_NULL)
7645                                 panic("iopl_valid_data: missing expected page at offset %lx", (long)offset);
7646                 }
7647                 if (m->vmp_busy) {
7648                         if (!m->vmp_absent)
7649                                 panic("iopl_valid_data: busy page w/o absent");
7650
7651                         if (m->vmp_pageq.next || m->vmp_pageq.prev)
7652                                 panic("iopl_valid_data: busy+absent page on page queue");
7653                         if (m->vmp_reusable) {
7654                                 panic("iopl_valid_data: %p is reusable", m);
7655                         }
7656
7657                         m->vmp_absent = FALSE;
7658                         m->vmp_dirty = TRUE;
7659                         assert(m->vmp_q_state == VM_PAGE_NOT_ON_Q);
7660                         assert(m->vmp_wire_count == 0);
7661                         m->vmp_wire_count++;
7662                         assert(m->vmp_wire_count);
7663                         if (m->vmp_wire_count == 1) {
7664                                 m->vmp_q_state = VM_PAGE_IS_WIRED;
7665                                 wired_count++;
7666                         } else {
7667                                 panic("iopl_valid_data: %p already wired\n", m);
7668                         }
7669
7670                         PAGE_WAKEUP_DONE(m);
7671                 }
7672                 size -= PAGE_SIZE;
7673         }
7674         if (wired_count) {
7675
7676                 VM_OBJECT_WIRED_PAGE_COUNT(object, wired_count);
7677                 assert(object->resident_page_count >= object->wired_page_count);
7678
7679                 /* no need to adjust purgeable accounting for this object: */
7680                 assert(object->purgable != VM_PURGABLE_VOLATILE);
7681                 assert(object->purgable != VM_PURGABLE_EMPTY);
7682
7683                 vm_page_lockspin_queues();
7684                 vm_page_wire_count += wired_count;
7685                 vm_page_unlock_queues();
7686         }
7687         VM_OBJECT_WIRED_PAGE_UPDATE_END(object, tag);
7688         vm_object_unlock(object);
7689 }
7690
7691
7692 void
7693 vm_object_set_pmap_cache_attr(
7694                 vm_object_t             object,
7695                 upl_page_info_array_t   user_page_list,
7696                 unsigned int            num_pages,
7697                 boolean_t               batch_pmap_op)
7698 {
7699         unsigned int    cache_attr = 0;
7700
7701         cache_attr = object->wimg_bits & VM_WIMG_MASK;
7702         assert(user_page_list);
7703         if (cache_attr != VM_WIMG_USE_DEFAULT) {
7704                 PMAP_BATCH_SET_CACHE_ATTR(object, user_page_list, cache_attr, num_pages, batch_pmap_op);
7705         }
7706 }
7707
7708
7709 boolean_t       vm_object_iopl_wire_full(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t);
7710 kern_return_t   vm_object_iopl_wire_empty(vm_object_t, upl_t, upl_page_info_array_t, wpl_array_t, upl_control_flags_t, vm_tag_t, vm_object_offset_t *, int, int*);
7711
7712
7713
7714 boolean_t
7715 vm_object_iopl_wire_full(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7716                             wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag)
7717 {
7718         vm_page_t       dst_page;
7719         unsigned int    entry;
7720         int             page_count;
7721         int             delayed_unlock = 0;
7722         boolean_t       retval = TRUE;
7723         ppnum_t         phys_page;
7724
7725         vm_object_lock_assert_exclusive(object);
7726         assert(object->purgable != VM_PURGABLE_VOLATILE);
7727         assert(object->purgable != VM_PURGABLE_EMPTY);
7728         assert(object->pager == NULL);
7729         assert(object->copy == NULL);
7730         assert(object->shadow == NULL);
7731
7732         page_count = object->resident_page_count;
7733         dst_page = (vm_page_t)vm_page_queue_first(&object->memq);
7734
7735         vm_page_lock_queues();
7736
7737         while (page_count--) {
7738
7739                 if (dst_page->vmp_busy ||
7740                     dst_page->vmp_fictitious ||
7741                     dst_page->vmp_absent ||
7742                     dst_page->vmp_error ||
7743                     dst_page->vmp_cleaning ||
7744                     dst_page->vmp_restart ||
7745                     dst_page->vmp_laundry) {
7746                         retval = FALSE;
7747                         goto done;
7748                 }
7749                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
7750                         retval = FALSE;
7751                         goto done;
7752                 }
7753                 dst_page->vmp_reference = TRUE;
7754
7755                 vm_page_wire(dst_page, tag, FALSE);
7756
7757                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7758                         SET_PAGE_DIRTY(dst_page, FALSE);
7759                 }
7760                 entry = (unsigned int)(dst_page->vmp_offset / PAGE_SIZE);
7761                 assert(entry >= 0 && entry < object->resident_page_count);
7762                 lite_list[entry>>5] |= 1 << (entry & 31);
7763
7764                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7765
7766                 if (phys_page > upl->highest_page)
7767                         upl->highest_page = phys_page;
7768
7769                 if (user_page_list) {
7770                         user_page_list[entry].phys_addr = phys_page;
7771                         user_page_list[entry].absent    = dst_page->vmp_absent;
7772                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
7773                         user_page_list[entry].free_when_done   = dst_page->vmp_free_when_done;
7774                         user_page_list[entry].precious  = dst_page->vmp_precious;
7775                         user_page_list[entry].device    = FALSE;
7776                         user_page_list[entry].speculative = FALSE;
7777                         user_page_list[entry].cs_validated = FALSE;
7778                         user_page_list[entry].cs_tainted = FALSE;
7779                         user_page_list[entry].cs_nx     = FALSE;
7780                         user_page_list[entry].needed    = FALSE;
7781                         user_page_list[entry].mark      = FALSE;
7782                 }
7783                 if (delayed_unlock++ > 256) {
7784                         delayed_unlock = 0;
7785                         lck_mtx_yield(&vm_page_queue_lock);
7786
7787                         VM_CHECK_MEMORYSTATUS;
7788                 }
7789                 dst_page = (vm_page_t)vm_page_queue_next(&dst_page->vmp_listq);
7790         }
7791 done:
7792         vm_page_unlock_queues();
7793
7794         VM_CHECK_MEMORYSTATUS;
7795
7796         return (retval);
7797 }
7798
7799
7800 kern_return_t
7801 vm_object_iopl_wire_empty(vm_object_t object, upl_t upl, upl_page_info_array_t user_page_list,
7802                              wpl_array_t lite_list, upl_control_flags_t cntrl_flags, vm_tag_t tag, vm_object_offset_t *dst_offset,
7803                              int page_count, int* page_grab_count)
7804 {
7805         vm_page_t       dst_page;
7806         boolean_t       no_zero_fill = FALSE;
7807         int             interruptible;
7808         int             pages_wired = 0;
7809         int             pages_inserted = 0;
7810         int             entry = 0;
7811         uint64_t        delayed_ledger_update = 0;
7812         kern_return_t   ret = KERN_SUCCESS;
7813         int             grab_options;
7814         ppnum_t         phys_page;
7815
7816         vm_object_lock_assert_exclusive(object);
7817         assert(object->purgable != VM_PURGABLE_VOLATILE);
7818         assert(object->purgable != VM_PURGABLE_EMPTY);
7819         assert(object->pager == NULL);
7820         assert(object->copy == NULL);
7821         assert(object->shadow == NULL);
7822
7823         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
7824                 interruptible = THREAD_ABORTSAFE;
7825         else
7826                 interruptible = THREAD_UNINT;
7827
7828         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
7829                 no_zero_fill = TRUE;
7830
7831         grab_options = 0;
7832 #if CONFIG_SECLUDED_MEMORY
7833         if (object->can_grab_secluded) {
7834                 grab_options |= VM_PAGE_GRAB_SECLUDED;
7835         }
7836 #endif /* CONFIG_SECLUDED_MEMORY */
7837
7838         while (page_count--) {
7839
7840                 while ((dst_page = vm_page_grab_options(grab_options))
7841                        == VM_PAGE_NULL) {
7842
7843                         OSAddAtomic(page_count, &vm_upl_wait_for_pages);
7844
7845                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
7846
7847                         if (vm_page_wait(interruptible) == FALSE) {
7848                                 /*
7849                                  * interrupted case
7850                                  */
7851                                 OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7852
7853                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
7854
7855                                 ret = MACH_SEND_INTERRUPTED;
7856                                 goto done;
7857                         }
7858                         OSAddAtomic(-page_count, &vm_upl_wait_for_pages);
7859
7860                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
7861                 }
7862                 if (no_zero_fill == FALSE)
7863                         vm_page_zero_fill(dst_page);
7864                 else
7865                         dst_page->vmp_absent = TRUE;
7866
7867                 dst_page->vmp_reference = TRUE;
7868
7869                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
7870                         SET_PAGE_DIRTY(dst_page, FALSE);
7871                 }
7872                 if (dst_page->vmp_absent == FALSE) {
7873                         assert(dst_page->vmp_q_state == VM_PAGE_NOT_ON_Q);
7874                         assert(dst_page->vmp_wire_count == 0);
7875                         dst_page->vmp_wire_count++;
7876                         dst_page->vmp_q_state = VM_PAGE_IS_WIRED;
7877                         assert(dst_page->vmp_wire_count);
7878                         pages_wired++;
7879                         PAGE_WAKEUP_DONE(dst_page);
7880                 }
7881                 pages_inserted++;
7882
7883                 vm_page_insert_internal(dst_page, object, *dst_offset, tag, FALSE, TRUE, TRUE, TRUE, &delayed_ledger_update);
7884
7885                 lite_list[entry>>5] |= 1 << (entry & 31);
7886
7887                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
7888
7889                 if (phys_page > upl->highest_page)
7890                         upl->highest_page = phys_page;
7891
7892                 if (user_page_list) {
7893                         user_page_list[entry].phys_addr = phys_page;
7894                         user_page_list[entry].absent    = dst_page->vmp_absent;
7895                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
7896                         user_page_list[entry].free_when_done    = FALSE;
7897                         user_page_list[entry].precious  = FALSE;
7898                         user_page_list[entry].device    = FALSE;
7899                         user_page_list[entry].speculative = FALSE;
7900                         user_page_list[entry].cs_validated = FALSE;
7901                         user_page_list[entry].cs_tainted = FALSE;
7902                         user_page_list[entry].cs_nx     = FALSE;
7903                         user_page_list[entry].needed    = FALSE;
7904                         user_page_list[entry].mark      = FALSE;
7905                 }
7906                 entry++;
7907                 *dst_offset += PAGE_SIZE_64;
7908         }
7909 done:
7910         if (pages_wired) {
7911                 vm_page_lockspin_queues();
7912                 vm_page_wire_count += pages_wired;
7913                 vm_page_unlock_queues();
7914         }
7915         if (pages_inserted) {
7916                 if (object->internal) {
7917                         OSAddAtomic(pages_inserted, &vm_page_internal_count);
7918                 } else {
7919                         OSAddAtomic(pages_inserted, &vm_page_external_count);
7920                 }
7921         }
7922         if (delayed_ledger_update) {
7923                 task_t          owner;
7924                 int             ledger_idx_volatile;
7925                 int             ledger_idx_nonvolatile;
7926                 int             ledger_idx_volatile_compressed;
7927                 int             ledger_idx_nonvolatile_compressed;
7928                 boolean_t       do_footprint;
7929
7930                 owner = VM_OBJECT_OWNER(object);
7931                 assert(owner);
7932
7933                 vm_object_ledger_tag_ledgers(object,
7934                                              &ledger_idx_volatile,
7935                                              &ledger_idx_nonvolatile,
7936                                              &ledger_idx_volatile_compressed,
7937                                              &ledger_idx_nonvolatile_compressed,
7938                                              &do_footprint);
7939
7940                 /* more non-volatile bytes */
7941                 ledger_credit(owner->ledger,
7942                               ledger_idx_nonvolatile,
7943                               delayed_ledger_update);
7944                 if (do_footprint) {
7945                         /* more footprint */
7946                         ledger_credit(owner->ledger,
7947                                       task_ledgers.phys_footprint,
7948                                       delayed_ledger_update);
7949                 }
7950         }
7951
7952         assert(page_grab_count);
7953         *page_grab_count = pages_inserted;
7954
7955         return (ret);
7956 }
7957
7958
7959
7960 kern_return_t
7961 vm_object_iopl_request(
7962         vm_object_t             object,
7963         vm_object_offset_t      offset,
7964         upl_size_t              size,
7965         upl_t                   *upl_ptr,
7966         upl_page_info_array_t   user_page_list,
7967         unsigned int            *page_list_count,
7968         upl_control_flags_t     cntrl_flags,
7969         vm_tag_t                tag)
7970 {
7971         vm_page_t               dst_page;
7972         vm_object_offset_t      dst_offset;
7973         upl_size_t              xfer_size;
7974         upl_t                   upl = NULL;
7975         unsigned int            entry;
7976         wpl_array_t             lite_list = NULL;
7977         int                     no_zero_fill = FALSE;
7978         unsigned int            size_in_pages;
7979         int                     page_grab_count = 0;
7980         u_int32_t               psize;
7981         kern_return_t           ret;
7982         vm_prot_t               prot;
7983         struct vm_object_fault_info fault_info = {};
7984         struct  vm_page_delayed_work    dw_array[DEFAULT_DELAYED_WORK_LIMIT];
7985         struct  vm_page_delayed_work    *dwp;
7986         int                     dw_count;
7987         int                     dw_limit;
7988         int                     dw_index;
7989         boolean_t               caller_lookup;
7990         int                     io_tracking_flag = 0;
7991         int                     interruptible;
7992         ppnum_t                 phys_page;
7993
7994         boolean_t               set_cache_attr_needed = FALSE;
7995         boolean_t               free_wired_pages = FALSE;
7996         boolean_t               fast_path_empty_req = FALSE;
7997         boolean_t               fast_path_full_req = FALSE;
7998
7999         if (cntrl_flags & ~UPL_VALID_FLAGS) {
8000                 /*
8001                  * For forward compatibility's sake,
8002                  * reject any unknown flag.
8003                  */
8004                 return KERN_INVALID_VALUE;
8005         }
8006         if (vm_lopage_needed == FALSE)
8007                 cntrl_flags &= ~UPL_NEED_32BIT_ADDR;
8008
8009         if (cntrl_flags & UPL_NEED_32BIT_ADDR) {
8010                 if ( (cntrl_flags & (UPL_SET_IO_WIRE | UPL_SET_LITE)) != (UPL_SET_IO_WIRE | UPL_SET_LITE))
8011                         return KERN_INVALID_VALUE;
8012
8013                 if (object->phys_contiguous) {
8014                         if ((offset + object->vo_shadow_offset) >= (vm_object_offset_t)max_valid_dma_address)
8015                                 return KERN_INVALID_ADDRESS;
8016
8017                         if (((offset + object->vo_shadow_offset) + size) >= (vm_object_offset_t)max_valid_dma_address)
8018                                 return KERN_INVALID_ADDRESS;
8019                 }
8020         }
8021         if (cntrl_flags & (UPL_NOZEROFILL | UPL_NOZEROFILLIO))
8022                 no_zero_fill = TRUE;
8023
8024         if (cntrl_flags & UPL_COPYOUT_FROM)
8025                 prot = VM_PROT_READ;
8026         else
8027                 prot = VM_PROT_READ | VM_PROT_WRITE;
8028
8029         if ((!object->internal) && (object->paging_offset != 0))
8030                 panic("vm_object_iopl_request: external object with non-zero paging offset\n");
8031
8032         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_START, size, cntrl_flags, prot, 0);
8033
8034 #if CONFIG_IOSCHED || UPL_DEBUG
8035         if ((object->io_tracking && object != kernel_object) || upl_debug_enabled)
8036                 io_tracking_flag |= UPL_CREATE_IO_TRACKING;
8037 #endif
8038
8039 #if CONFIG_IOSCHED
8040         if (object->io_tracking) {
8041                 /* Check if we're dealing with the kernel object. We do not support expedite on kernel object UPLs */
8042                 if (object != kernel_object)
8043                         io_tracking_flag |= UPL_CREATE_EXPEDITE_SUP;
8044         }
8045 #endif
8046
8047         if (object->phys_contiguous)
8048                 psize = PAGE_SIZE;
8049         else
8050                 psize = size;
8051
8052         if (cntrl_flags & UPL_SET_INTERNAL) {
8053                 upl = upl_create(UPL_CREATE_INTERNAL | UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8054
8055                 user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8056                 lite_list = (wpl_array_t) (((uintptr_t)user_page_list) +
8057                                            ((psize / PAGE_SIZE) * sizeof(upl_page_info_t)));
8058                 if (size == 0) {
8059                         user_page_list = NULL;
8060                         lite_list = NULL;
8061                 }
8062         } else {
8063                 upl = upl_create(UPL_CREATE_LITE | io_tracking_flag, UPL_IO_WIRE, psize);
8064
8065                 lite_list = (wpl_array_t) (((uintptr_t)upl) + sizeof(struct upl));
8066                 if (size == 0) {
8067                         lite_list = NULL;
8068                 }
8069         }
8070         if (user_page_list)
8071                 user_page_list[0].device = FALSE;
8072         *upl_ptr = upl;
8073
8074         if (cntrl_flags & UPL_NOZEROFILLIO) {
8075                 DTRACE_VM4(upl_nozerofillio,
8076                            vm_object_t, object,
8077                            vm_object_offset_t, offset,
8078                            upl_size_t, size,
8079                            upl_t, upl);
8080         }
8081
8082         upl->map_object = object;
8083         upl->size = size;
8084
8085         size_in_pages = size / PAGE_SIZE;
8086
8087         if (object == kernel_object &&
8088             !(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS))) {
8089                 upl->flags |= UPL_KERNEL_OBJECT;
8090 #if UPL_DEBUG
8091                 vm_object_lock(object);
8092 #else
8093                 vm_object_lock_shared(object);
8094 #endif
8095         } else {
8096                 vm_object_lock(object);
8097                 vm_object_activity_begin(object);
8098         }
8099         /*
8100          * paging in progress also protects the paging_offset
8101          */
8102         upl->offset = offset + object->paging_offset;
8103
8104         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8105                 /*
8106                  * The user requested that access to the pages in this UPL
8107                  * be blocked until the UPL is commited or aborted.
8108                  */
8109                 upl->flags |= UPL_ACCESS_BLOCKED;
8110         }
8111
8112 #if CONFIG_IOSCHED || UPL_DEBUG
8113         if (upl->flags & UPL_TRACKED_BY_OBJECT) {
8114                 vm_object_activity_begin(object);
8115                 queue_enter(&object->uplq, upl, upl_t, uplq);
8116         }
8117 #endif
8118
8119         if (object->phys_contiguous) {
8120
8121                 if (upl->flags & UPL_ACCESS_BLOCKED) {
8122                         assert(!object->blocked_access);
8123                         object->blocked_access = TRUE;
8124                 }
8125
8126                 vm_object_unlock(object);
8127
8128                 /*
8129                  * don't need any shadow mappings for this one
8130                  * since it is already I/O memory
8131                  */
8132                 upl->flags |= UPL_DEVICE_MEMORY;
8133
8134                 upl->highest_page = (ppnum_t) ((offset + object->vo_shadow_offset + size - 1)>>PAGE_SHIFT);
8135
8136                 if (user_page_list) {
8137                         user_page_list[0].phys_addr = (ppnum_t) ((offset + object->vo_shadow_offset)>>PAGE_SHIFT);
8138                         user_page_list[0].device = TRUE;
8139                 }
8140                 if (page_list_count != NULL) {
8141                         if (upl->flags & UPL_INTERNAL)
8142                                 *page_list_count = 0;
8143                         else
8144                                 *page_list_count = 1;
8145                 }
8146
8147                 VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8148                 return KERN_SUCCESS;
8149         }
8150         if (object != kernel_object && object != compressor_object) {
8151                 /*
8152                  * Protect user space from future COW operations
8153                  */
8154 #if VM_OBJECT_TRACKING_OP_TRUESHARE
8155                 if (!object->true_share &&
8156                     vm_object_tracking_inited) {
8157                         void *bt[VM_OBJECT_TRACKING_BTDEPTH];
8158                         int num = 0;
8159
8160                         num = OSBacktrace(bt,
8161                                           VM_OBJECT_TRACKING_BTDEPTH);
8162                         btlog_add_entry(vm_object_tracking_btlog,
8163                                         object,
8164                                         VM_OBJECT_TRACKING_OP_TRUESHARE,
8165                                         bt,
8166                                         num);
8167                 }
8168 #endif /* VM_OBJECT_TRACKING_OP_TRUESHARE */
8169
8170                 vm_object_lock_assert_exclusive(object);
8171                 object->true_share = TRUE;
8172
8173                 if (object->copy_strategy == MEMORY_OBJECT_COPY_SYMMETRIC)
8174                         object->copy_strategy = MEMORY_OBJECT_COPY_DELAY;
8175         }
8176
8177         if (!(cntrl_flags & UPL_COPYOUT_FROM) &&
8178             object->copy != VM_OBJECT_NULL) {
8179                 /*
8180                  * Honor copy-on-write obligations
8181                  *
8182                  * The caller is gathering these pages and
8183                  * might modify their contents.  We need to
8184                  * make sure that the copy object has its own
8185                  * private copies of these pages before we let
8186                  * the caller modify them.
8187                  *
8188                  * NOTE: someone else could map the original object
8189                  * after we've done this copy-on-write here, and they
8190                  * could then see an inconsistent picture of the memory
8191                  * while it's being modified via the UPL.  To prevent this,
8192                  * we would have to block access to these pages until the
8193                  * UPL is released.  We could use the UPL_BLOCK_ACCESS
8194                  * code path for that...
8195                  */
8196                 vm_object_update(object,
8197                                  offset,
8198                                  size,
8199                                  NULL,
8200                                  NULL,
8201                                  FALSE, /* should_return */
8202                                  MEMORY_OBJECT_COPY_SYNC,
8203                                  VM_PROT_NO_CHANGE);
8204                 VM_PAGEOUT_DEBUG(iopl_cow, 1);
8205                 VM_PAGEOUT_DEBUG(iopl_cow_pages, (size >> PAGE_SHIFT));
8206         }
8207         if (!(cntrl_flags & (UPL_NEED_32BIT_ADDR | UPL_BLOCK_ACCESS)) &&
8208             object->purgable != VM_PURGABLE_VOLATILE &&
8209             object->purgable != VM_PURGABLE_EMPTY &&
8210             object->copy == NULL &&
8211             size == object->vo_size &&
8212             offset == 0 &&
8213             object->shadow == NULL &&
8214             object->pager == NULL)
8215         {
8216                 if (object->resident_page_count == size_in_pages)
8217                 {
8218                         assert(object != compressor_object);
8219                         assert(object != kernel_object);
8220                         fast_path_full_req = TRUE;
8221                 }
8222                 else if (object->resident_page_count == 0)
8223                 {
8224                         assert(object != compressor_object);
8225                         assert(object != kernel_object);
8226                         fast_path_empty_req = TRUE;
8227                         set_cache_attr_needed = TRUE;
8228                 }
8229         }
8230
8231         if (cntrl_flags & UPL_SET_INTERRUPTIBLE)
8232                 interruptible = THREAD_ABORTSAFE;
8233         else
8234                 interruptible = THREAD_UNINT;
8235
8236         entry = 0;
8237
8238         xfer_size = size;
8239         dst_offset = offset;
8240         dw_count = 0;
8241
8242         if (fast_path_full_req) {
8243
8244                 if (vm_object_iopl_wire_full(object, upl, user_page_list, lite_list, cntrl_flags, tag) == TRUE)
8245                         goto finish;
8246                 /*
8247                  * we couldn't complete the processing of this request on the fast path
8248                  * so fall through to the slow path and finish up
8249                  */
8250
8251         } else if (fast_path_empty_req) {
8252
8253                 if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8254                         ret = KERN_MEMORY_ERROR;
8255                         goto return_err;
8256                 }
8257                 ret = vm_object_iopl_wire_empty(object, upl, user_page_list, lite_list, cntrl_flags, tag, &dst_offset, size_in_pages, &page_grab_count);
8258
8259                 if (ret) {
8260                         free_wired_pages = TRUE;
8261                         goto return_err;
8262                 }
8263                 goto finish;
8264         }
8265
8266         fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
8267         fault_info.lo_offset = offset;
8268         fault_info.hi_offset = offset + xfer_size;
8269         fault_info.mark_zf_absent = TRUE;
8270         fault_info.interruptible = interruptible;
8271         fault_info.batch_pmap_op = TRUE;
8272
8273         dwp = &dw_array[0];
8274         dw_limit = DELAYED_WORK_LIMIT(DEFAULT_DELAYED_WORK_LIMIT);
8275
8276         while (xfer_size) {
8277                 vm_fault_return_t       result;
8278
8279                 dwp->dw_mask = 0;
8280
8281                 if (fast_path_full_req) {
8282                         /*
8283                          * if we get here, it means that we ran into a page
8284                          * state we couldn't handle in the fast path and
8285                          * bailed out to the slow path... since the order
8286                          * we look at pages is different between the 2 paths,
8287                          * the following check is needed to determine whether
8288                          * this page was already processed in the fast path
8289                          */
8290                         if (lite_list[entry>>5] & (1 << (entry & 31)))
8291                                 goto skip_page;
8292                 }
8293                 dst_page = vm_page_lookup(object, dst_offset);
8294
8295                 if (dst_page == VM_PAGE_NULL ||
8296                     dst_page->vmp_busy ||
8297                     dst_page->vmp_error ||
8298                     dst_page->vmp_restart ||
8299                     dst_page->vmp_absent ||
8300                     dst_page->vmp_fictitious) {
8301
8302                    if (object == kernel_object)
8303                            panic("vm_object_iopl_request: missing/bad page in kernel object\n");
8304                    if (object == compressor_object)
8305                            panic("vm_object_iopl_request: missing/bad page in compressor object\n");
8306
8307                    if (cntrl_flags & UPL_REQUEST_NO_FAULT) {
8308                            ret = KERN_MEMORY_ERROR;
8309                            goto return_err;
8310                    }
8311                    set_cache_attr_needed = TRUE;
8312
8313                    /*
8314                     * We just looked up the page and the result remains valid
8315                     * until the object lock is release, so send it to
8316                     * vm_fault_page() (as "dst_page"), to avoid having to
8317                     * look it up again there.
8318                     */
8319                    caller_lookup = TRUE;
8320
8321                    do {
8322                         vm_page_t       top_page;
8323                         kern_return_t   error_code;
8324
8325                         fault_info.cluster_size = xfer_size;
8326
8327                         vm_object_paging_begin(object);
8328
8329                         result = vm_fault_page(object, dst_offset,
8330                                                prot | VM_PROT_WRITE, FALSE,
8331                                                caller_lookup,
8332                                                &prot, &dst_page, &top_page,
8333                                                (int *)0,
8334                                                &error_code, no_zero_fill,
8335                                                FALSE, &fault_info);
8336
8337                         /* our lookup is no longer valid at this point */
8338                         caller_lookup = FALSE;
8339
8340                         switch (result) {
8341
8342                         case VM_FAULT_SUCCESS:
8343                                 page_grab_count++;
8344
8345                                 if ( !dst_page->vmp_absent) {
8346                                         PAGE_WAKEUP_DONE(dst_page);
8347                                 } else {
8348                                         /*
8349                                          * we only get back an absent page if we
8350                                          * requested that it not be zero-filled
8351                                          * because we are about to fill it via I/O
8352                                          *
8353                                          * absent pages should be left BUSY
8354                                          * to prevent them from being faulted
8355                                          * into an address space before we've
8356                                          * had a chance to complete the I/O on
8357                                          * them since they may contain info that
8358                                          * shouldn't be seen by the faulting task
8359                                          */
8360                                 }
8361                                 /*
8362                                  *      Release paging references and
8363                                  *      top-level placeholder page, if any.
8364                                  */
8365                                 if (top_page != VM_PAGE_NULL) {
8366                                         vm_object_t local_object;
8367
8368                                         local_object = VM_PAGE_OBJECT(top_page);
8369
8370                                         /*
8371                                          * comparing 2 packed pointers
8372                                          */
8373                                         if (top_page->vmp_object != dst_page->vmp_object) {
8374                                                 vm_object_lock(local_object);
8375                                                 VM_PAGE_FREE(top_page);
8376                                                 vm_object_paging_end(local_object);
8377                                                 vm_object_unlock(local_object);
8378                                         } else {
8379                                                 VM_PAGE_FREE(top_page);
8380                                                 vm_object_paging_end(local_object);
8381                                         }
8382                                 }
8383                                 vm_object_paging_end(object);
8384                                 break;
8385
8386                         case VM_FAULT_RETRY:
8387                                 vm_object_lock(object);
8388                                 break;
8389
8390                         case VM_FAULT_MEMORY_SHORTAGE:
8391                                 OSAddAtomic((size_in_pages - entry), &vm_upl_wait_for_pages);
8392
8393                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_START, vm_upl_wait_for_pages, 0, 0, 0);
8394
8395                                 if (vm_page_wait(interruptible)) {
8396                                         OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8397
8398                                         VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, 0);
8399                                         vm_object_lock(object);
8400
8401                                         break;
8402                                 }
8403                                 OSAddAtomic(-(size_in_pages - entry), &vm_upl_wait_for_pages);
8404
8405                                 VM_DEBUG_EVENT(vm_iopl_page_wait, VM_IOPL_PAGE_WAIT, DBG_FUNC_END, vm_upl_wait_for_pages, 0, 0, -1);
8406
8407                                 /* fall thru */
8408
8409                         case VM_FAULT_INTERRUPTED:
8410                                 error_code = MACH_SEND_INTERRUPTED;
8411                         case VM_FAULT_MEMORY_ERROR:
8412                         memory_error:
8413                                 ret = (error_code ? error_code: KERN_MEMORY_ERROR);
8414
8415                                 vm_object_lock(object);
8416                                 goto return_err;
8417
8418                         case VM_FAULT_SUCCESS_NO_VM_PAGE:
8419                                 /* success but no page: fail */
8420                                 vm_object_paging_end(object);
8421                                 vm_object_unlock(object);
8422                                 goto memory_error;
8423
8424                         default:
8425                                 panic("vm_object_iopl_request: unexpected error"
8426                                       " 0x%x from vm_fault_page()\n", result);
8427                         }
8428                    } while (result != VM_FAULT_SUCCESS);
8429
8430                 }
8431                 phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8432
8433                 if (upl->flags & UPL_KERNEL_OBJECT)
8434                         goto record_phys_addr;
8435
8436                 if (dst_page->vmp_q_state == VM_PAGE_USED_BY_COMPRESSOR) {
8437                         dst_page->vmp_busy = TRUE;
8438                         goto record_phys_addr;
8439                 }
8440
8441                 if (dst_page->vmp_cleaning) {
8442                         /*
8443                          * Someone else is cleaning this page in place.
8444                          * In theory, we should be able to  proceed and use this
8445                          * page but they'll probably end up clearing the "busy"
8446                          * bit on it in upl_commit_range() but they didn't set
8447                          * it, so they would clear our "busy" bit and open
8448                          * us to race conditions.
8449                          * We'd better wait for the cleaning to complete and
8450                          * then try again.
8451                          */
8452                         VM_PAGEOUT_DEBUG(vm_object_iopl_request_sleep_for_cleaning, 1);
8453                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
8454                         continue;
8455                 }
8456                 if (dst_page->vmp_laundry)
8457                         vm_pageout_steal_laundry(dst_page, FALSE);
8458
8459                 if ( (cntrl_flags & UPL_NEED_32BIT_ADDR) &&
8460                      phys_page >= (max_valid_dma_address >> PAGE_SHIFT) ) {
8461                         vm_page_t       low_page;
8462                         int             refmod;
8463
8464                         /*
8465                          * support devices that can't DMA above 32 bits
8466                          * by substituting pages from a pool of low address
8467                          * memory for any pages we find above the 4G mark
8468                          * can't substitute if the page is already wired because
8469                          * we don't know whether that physical address has been
8470                          * handed out to some other 64 bit capable DMA device to use
8471                          */
8472                         if (VM_PAGE_WIRED(dst_page)) {
8473                                 ret = KERN_PROTECTION_FAILURE;
8474                                 goto return_err;
8475                         }
8476                         low_page = vm_page_grablo();
8477
8478                         if (low_page == VM_PAGE_NULL) {
8479                                 ret = KERN_RESOURCE_SHORTAGE;
8480                                 goto return_err;
8481                         }
8482                         /*
8483                          * from here until the vm_page_replace completes
8484                          * we musn't drop the object lock... we don't
8485                          * want anyone refaulting this page in and using
8486                          * it after we disconnect it... we want the fault
8487                          * to find the new page being substituted.
8488                          */
8489                         if (dst_page->vmp_pmapped)
8490                                 refmod = pmap_disconnect(phys_page);
8491                         else
8492                                 refmod = 0;
8493
8494                         if (!dst_page->vmp_absent)
8495                                 vm_page_copy(dst_page, low_page);
8496
8497                         low_page->vmp_reference = dst_page->vmp_reference;
8498                         low_page->vmp_dirty     = dst_page->vmp_dirty;
8499                         low_page->vmp_absent    = dst_page->vmp_absent;
8500
8501                         if (refmod & VM_MEM_REFERENCED)
8502                                 low_page->vmp_reference = TRUE;
8503                         if (refmod & VM_MEM_MODIFIED) {
8504                                 SET_PAGE_DIRTY(low_page, FALSE);
8505                         }
8506
8507                         vm_page_replace(low_page, object, dst_offset);
8508
8509                         dst_page = low_page;
8510                         /*
8511                          * vm_page_grablo returned the page marked
8512                          * BUSY... we don't need a PAGE_WAKEUP_DONE
8513                          * here, because we've never dropped the object lock
8514                          */
8515                         if ( !dst_page->vmp_absent)
8516                                 dst_page->vmp_busy = FALSE;
8517
8518                         phys_page = VM_PAGE_GET_PHYS_PAGE(dst_page);
8519                 }
8520                 if ( !dst_page->vmp_busy)
8521                         dwp->dw_mask |= DW_vm_page_wire;
8522
8523                 if (cntrl_flags & UPL_BLOCK_ACCESS) {
8524                         /*
8525                          * Mark the page "busy" to block any future page fault
8526                          * on this page in addition to wiring it.
8527                          * We'll also remove the mapping
8528                          * of all these pages before leaving this routine.
8529                          */
8530                         assert(!dst_page->vmp_fictitious);
8531                         dst_page->vmp_busy = TRUE;
8532                 }
8533                 /*
8534                  * expect the page to be used
8535                  * page queues lock must be held to set 'reference'
8536                  */
8537                 dwp->dw_mask |= DW_set_reference;
8538
8539                 if (!(cntrl_flags & UPL_COPYOUT_FROM)) {
8540                         SET_PAGE_DIRTY(dst_page, TRUE);
8541                 }
8542                 if ((cntrl_flags & UPL_REQUEST_FORCE_COHERENCY) && dst_page->vmp_written_by_kernel == TRUE) {
8543                         pmap_sync_page_attributes_phys(phys_page);
8544                         dst_page->vmp_written_by_kernel = FALSE;
8545                 }
8546
8547 record_phys_addr:
8548                 if (dst_page->vmp_busy)
8549                         upl->flags |= UPL_HAS_BUSY;
8550
8551                 lite_list[entry>>5] |= 1 << (entry & 31);
8552
8553                 if (phys_page > upl->highest_page)
8554                         upl->highest_page = phys_page;
8555
8556                 if (user_page_list) {
8557                         user_page_list[entry].phys_addr = phys_page;
8558                         user_page_list[entry].free_when_done    = dst_page->vmp_free_when_done;
8559                         user_page_list[entry].absent    = dst_page->vmp_absent;
8560                         user_page_list[entry].dirty     = dst_page->vmp_dirty;
8561                         user_page_list[entry].precious  = dst_page->vmp_precious;
8562                         user_page_list[entry].device    = FALSE;
8563                         user_page_list[entry].needed    = FALSE;
8564                         if (dst_page->vmp_clustered == TRUE)
8565                                 user_page_list[entry].speculative = (dst_page->vmp_q_state == VM_PAGE_ON_SPECULATIVE_Q) ? TRUE : FALSE;
8566                         else
8567                                 user_page_list[entry].speculative = FALSE;
8568                         user_page_list[entry].cs_validated = dst_page->vmp_cs_validated;
8569                         user_page_list[entry].cs_tainted = dst_page->vmp_cs_tainted;
8570                         user_page_list[entry].cs_nx = dst_page->vmp_cs_nx;
8571                         user_page_list[entry].mark      = FALSE;
8572                 }
8573                 if (object != kernel_object && object != compressor_object) {
8574                         /*
8575                          * someone is explicitly grabbing this page...
8576                          * update clustered and speculative state
8577                          *
8578                          */
8579                         if (dst_page->vmp_clustered)
8580                                 VM_PAGE_CONSUME_CLUSTERED(dst_page);
8581                 }
8582 skip_page:
8583                 entry++;
8584                 dst_offset += PAGE_SIZE_64;
8585                 xfer_size -= PAGE_SIZE;
8586
8587                 if (dwp->dw_mask) {
8588                         VM_PAGE_ADD_DELAYED_WORK(dwp, dst_page, dw_count);
8589
8590                         if (dw_count >= dw_limit) {
8591                                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8592
8593                                 dwp = &dw_array[0];
8594                                 dw_count = 0;
8595                         }
8596                 }
8597         }
8598         assert(entry == size_in_pages);
8599
8600         if (dw_count)
8601                 vm_page_do_delayed_work(object, tag, &dw_array[0], dw_count);
8602 finish:
8603         if (user_page_list && set_cache_attr_needed == TRUE)
8604                 vm_object_set_pmap_cache_attr(object, user_page_list, size_in_pages, TRUE);
8605
8606         if (page_list_count != NULL) {
8607                 if (upl->flags & UPL_INTERNAL)
8608                         *page_list_count = 0;
8609                 else if (*page_list_count > size_in_pages)
8610                         *page_list_count = size_in_pages;
8611         }
8612         vm_object_unlock(object);
8613
8614         if (cntrl_flags & UPL_BLOCK_ACCESS) {
8615                 /*
8616                  * We've marked all the pages "busy" so that future
8617                  * page faults will block.
8618                  * Now remove the mapping for these pages, so that they
8619                  * can't be accessed without causing a page fault.
8620                  */
8621                 vm_object_pmap_protect(object, offset, (vm_object_size_t)size,
8622                                        PMAP_NULL, 0, VM_PROT_NONE);
8623                 assert(!object->blocked_access);
8624                 object->blocked_access = TRUE;
8625         }
8626
8627         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, KERN_SUCCESS, 0, 0);
8628         return KERN_SUCCESS;
8629
8630 return_err:
8631         dw_index = 0;
8632
8633         for (; offset < dst_offset; offset += PAGE_SIZE) {
8634                 boolean_t need_unwire;
8635
8636                 dst_page = vm_page_lookup(object, offset);
8637
8638                 if (dst_page == VM_PAGE_NULL)
8639                         panic("vm_object_iopl_request: Wired page missing. \n");
8640
8641                 /*
8642                  * if we've already processed this page in an earlier
8643                  * dw_do_work, we need to undo the wiring... we will
8644                  * leave the dirty and reference bits on if they
8645                  * were set, since we don't have a good way of knowing
8646                  * what the previous state was and we won't get here
8647                  * under any normal circumstances...  we will always
8648                  * clear BUSY and wakeup any waiters via vm_page_free
8649                  * or PAGE_WAKEUP_DONE
8650                  */
8651                 need_unwire = TRUE;
8652
8653                 if (dw_count) {
8654                         if (dw_array[dw_index].dw_m == dst_page) {
8655                                 /*
8656                                  * still in the deferred work list
8657                                  * which means we haven't yet called
8658                                  * vm_page_wire on this page
8659                                  */
8660                                 need_unwire = FALSE;
8661
8662                                 dw_index++;
8663                                 dw_count--;
8664                         }
8665                 }
8666                 vm_page_lock_queues();
8667
8668                 if (dst_page->vmp_absent || free_wired_pages == TRUE) {
8669                         vm_page_free(dst_page);
8670
8671                         need_unwire = FALSE;
8672                 } else {
8673                         if (need_unwire == TRUE)
8674                                 vm_page_unwire(dst_page, TRUE);
8675
8676                         PAGE_WAKEUP_DONE(dst_page);
8677                 }
8678                 vm_page_unlock_queues();
8679
8680                 if (need_unwire == TRUE)
8681                         VM_STAT_INCR(reactivations);
8682         }
8683 #if UPL_DEBUG
8684         upl->upl_state = 2;
8685 #endif
8686         if (! (upl->flags & UPL_KERNEL_OBJECT)) {
8687                 vm_object_activity_end(object);
8688                 vm_object_collapse(object, 0, TRUE);
8689         }
8690         vm_object_unlock(object);
8691         upl_destroy(upl);
8692
8693         VM_DEBUG_CONSTANT_EVENT(vm_object_iopl_request, VM_IOPL_REQUEST, DBG_FUNC_END, page_grab_count, ret, 0, 0);
8694         return ret;
8695 }
8696
8697 kern_return_t
8698 upl_transpose(
8699         upl_t           upl1,
8700         upl_t           upl2)
8701 {
8702         kern_return_t           retval;
8703         boolean_t               upls_locked;
8704         vm_object_t             object1, object2;
8705
8706         if (upl1 == UPL_NULL || upl2 == UPL_NULL || upl1 == upl2  || ((upl1->flags & UPL_VECTOR)==UPL_VECTOR)  || ((upl2->flags & UPL_VECTOR)==UPL_VECTOR)) {
8707                 return KERN_INVALID_ARGUMENT;
8708         }
8709
8710         upls_locked = FALSE;
8711
8712         /*
8713          * Since we need to lock both UPLs at the same time,
8714          * avoid deadlocks by always taking locks in the same order.
8715          */
8716         if (upl1 < upl2) {
8717                 upl_lock(upl1);
8718                 upl_lock(upl2);
8719         } else {
8720                 upl_lock(upl2);
8721                 upl_lock(upl1);
8722         }
8723         upls_locked = TRUE;     /* the UPLs will need to be unlocked */
8724
8725         object1 = upl1->map_object;
8726         object2 = upl2->map_object;
8727
8728         if (upl1->offset != 0 || upl2->offset != 0 ||
8729             upl1->size != upl2->size) {
8730                 /*
8731                  * We deal only with full objects, not subsets.
8732                  * That's because we exchange the entire backing store info
8733                  * for the objects: pager, resident pages, etc...  We can't do
8734                  * only part of it.
8735                  */
8736                 retval = KERN_INVALID_VALUE;
8737                 goto done;
8738         }
8739
8740         /*
8741          * Tranpose the VM objects' backing store.
8742          */
8743         retval = vm_object_transpose(object1, object2,
8744                                      (vm_object_size_t) upl1->size);
8745
8746         if (retval == KERN_SUCCESS) {
8747                 /*
8748                  * Make each UPL point to the correct VM object, i.e. the
8749                  * object holding the pages that the UPL refers to...
8750                  */
8751 #if CONFIG_IOSCHED || UPL_DEBUG
8752                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8753                         vm_object_lock(object1);
8754                         vm_object_lock(object2);
8755                 }
8756                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8757                         queue_remove(&object1->uplq, upl1, upl_t, uplq);
8758                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8759                         queue_remove(&object2->uplq, upl2, upl_t, uplq);
8760 #endif
8761                 upl1->map_object = object2;
8762                 upl2->map_object = object1;
8763
8764 #if CONFIG_IOSCHED || UPL_DEBUG
8765                 if (upl1->flags & UPL_TRACKED_BY_OBJECT)
8766                         queue_enter(&object2->uplq, upl1, upl_t, uplq);
8767                 if (upl2->flags & UPL_TRACKED_BY_OBJECT)
8768                         queue_enter(&object1->uplq, upl2, upl_t, uplq);
8769                 if ((upl1->flags & UPL_TRACKED_BY_OBJECT) || (upl2->flags & UPL_TRACKED_BY_OBJECT)) {
8770                         vm_object_unlock(object2);
8771                         vm_object_unlock(object1);
8772                 }
8773 #endif
8774         }
8775
8776 done:
8777         /*
8778          * Cleanup.
8779          */
8780         if (upls_locked) {
8781                 upl_unlock(upl1);
8782                 upl_unlock(upl2);
8783                 upls_locked = FALSE;
8784         }
8785
8786         return retval;
8787 }
8788
8789 void
8790 upl_range_needed(
8791         upl_t           upl,
8792         int             index,
8793         int             count)
8794 {
8795         upl_page_info_t *user_page_list;
8796         int             size_in_pages;
8797
8798         if ( !(upl->flags & UPL_INTERNAL) || count <= 0)
8799                 return;
8800
8801         size_in_pages = upl->size / PAGE_SIZE;
8802
8803         user_page_list = (upl_page_info_t *) (((uintptr_t)upl) + sizeof(struct upl));
8804
8805         while (count-- && index < size_in_pages)
8806                 user_page_list[index++].needed = TRUE;
8807 }
8808
8809
8810 /*
8811  * Reserve of virtual addresses in the kernel address space.
8812  * We need to map the physical pages in the kernel, so that we
8813  * can call the code-signing or slide routines with a kernel
8814  * virtual address.  We keep this pool of pre-allocated kernel
8815  * virtual addresses so that we don't have to scan the kernel's
8816  * virtaul address space each time we need to work with
8817  * a physical page.
8818  */
8819 decl_simple_lock_data(,vm_paging_lock)
8820 #define VM_PAGING_NUM_PAGES     64
8821 vm_map_offset_t vm_paging_base_address = 0;
8822 boolean_t       vm_paging_page_inuse[VM_PAGING_NUM_PAGES] = { FALSE, };
8823 int             vm_paging_max_index = 0;
8824 int             vm_paging_page_waiter = 0;
8825 int             vm_paging_page_waiter_total = 0;
8826
8827 unsigned long   vm_paging_no_kernel_page = 0;
8828 unsigned long   vm_paging_objects_mapped = 0;
8829 unsigned long   vm_paging_pages_mapped = 0;
8830 unsigned long   vm_paging_objects_mapped_slow = 0;
8831 unsigned long   vm_paging_pages_mapped_slow = 0;
8832
8833 void
8834 vm_paging_map_init(void)
8835 {
8836         kern_return_t   kr;
8837         vm_map_offset_t page_map_offset;
8838         vm_map_entry_t  map_entry;
8839
8840         assert(vm_paging_base_address == 0);
8841
8842         /*
8843          * Initialize our pool of pre-allocated kernel
8844          * virtual addresses.
8845          */
8846         page_map_offset = 0;
8847         kr = vm_map_find_space(kernel_map,
8848                                &page_map_offset,
8849                                VM_PAGING_NUM_PAGES * PAGE_SIZE,
8850                                0,
8851                                0,
8852                                VM_MAP_KERNEL_FLAGS_NONE,
8853                                VM_KERN_MEMORY_NONE,
8854                                &map_entry);
8855         if (kr != KERN_SUCCESS) {
8856                 panic("vm_paging_map_init: kernel_map full\n");
8857         }
8858         VME_OBJECT_SET(map_entry, kernel_object);
8859         VME_OFFSET_SET(map_entry, page_map_offset);
8860         map_entry->protection = VM_PROT_NONE;
8861         map_entry->max_protection = VM_PROT_NONE;
8862         map_entry->permanent = TRUE;
8863         vm_object_reference(kernel_object);
8864         vm_map_unlock(kernel_map);
8865
8866         assert(vm_paging_base_address == 0);
8867         vm_paging_base_address = page_map_offset;
8868 }
8869
8870 /*
8871  * vm_paging_map_object:
8872  *      Maps part of a VM object's pages in the kernel
8873  *      virtual address space, using the pre-allocated
8874  *      kernel virtual addresses, if possible.
8875  * Context:
8876  *      The VM object is locked.  This lock will get
8877  *      dropped and re-acquired though, so the caller
8878  *      must make sure the VM object is kept alive
8879  *      (by holding a VM map that has a reference
8880  *      on it, for example, or taking an extra reference).
8881  *      The page should also be kept busy to prevent
8882  *      it from being reclaimed.
8883  */
8884 kern_return_t
8885 vm_paging_map_object(
8886         vm_page_t               page,
8887         vm_object_t             object,
8888         vm_object_offset_t      offset,
8889         vm_prot_t               protection,
8890         boolean_t               can_unlock_object,
8891         vm_map_size_t           *size,          /* IN/OUT */
8892         vm_map_offset_t         *address,       /* OUT */
8893         boolean_t               *need_unmap)    /* OUT */
8894 {
8895         kern_return_t           kr;
8896         vm_map_offset_t         page_map_offset;
8897         vm_map_size_t           map_size;
8898         vm_object_offset_t      object_offset;
8899         int                     i;
8900
8901         if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
8902                 /* use permanent 1-to-1 kernel mapping of physical memory ? */
8903 #if __x86_64__
8904                 *address = (vm_map_offset_t)
8905                         PHYSMAP_PTOV((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) <<
8906                                      PAGE_SHIFT);
8907                 *need_unmap = FALSE;
8908                 return KERN_SUCCESS;
8909 #elif __arm__ || __arm64__
8910                 *address = (vm_map_offset_t)
8911                         phystokv((pmap_paddr_t)VM_PAGE_GET_PHYS_PAGE(page) << PAGE_SHIFT);
8912                 *need_unmap = FALSE;
8913                 return KERN_SUCCESS;
8914 #else
8915 #warn "vm_paging_map_object: no 1-to-1 kernel mapping of physical memory..."
8916 #endif
8917
8918                 assert(page->vmp_busy);
8919                 /*
8920                  * Use one of the pre-allocated kernel virtual addresses
8921                  * and just enter the VM page in the kernel address space
8922                  * at that virtual address.
8923                  */
8924                 simple_lock(&vm_paging_lock);
8925
8926                 /*
8927                  * Try and find an available kernel virtual address
8928                  * from our pre-allocated pool.
8929                  */
8930                 page_map_offset = 0;
8931                 for (;;) {
8932                         for (i = 0; i < VM_PAGING_NUM_PAGES; i++) {
8933                                 if (vm_paging_page_inuse[i] == FALSE) {
8934                                         page_map_offset =
8935                                                 vm_paging_base_address +
8936                                                 (i * PAGE_SIZE);
8937                                         break;
8938                                 }
8939                         }
8940                         if (page_map_offset != 0) {
8941                                 /* found a space to map our page ! */
8942                                 break;
8943                         }
8944
8945                         if (can_unlock_object) {
8946                                 /*
8947                                  * If we can afford to unlock the VM object,
8948                                  * let's take the slow path now...
8949                                  */
8950                                 break;
8951                         }
8952                         /*
8953                          * We can't afford to unlock the VM object, so
8954                          * let's wait for a space to become available...
8955                          */
8956                         vm_paging_page_waiter_total++;
8957                         vm_paging_page_waiter++;
8958                         kr = assert_wait((event_t)&vm_paging_page_waiter, THREAD_UNINT);
8959                         if (kr == THREAD_WAITING) {
8960                                 simple_unlock(&vm_paging_lock);
8961                                 kr = thread_block(THREAD_CONTINUE_NULL);
8962                                 simple_lock(&vm_paging_lock);
8963                         }
8964                         vm_paging_page_waiter--;
8965                         /* ... and try again */
8966                 }
8967
8968                 if (page_map_offset != 0) {
8969                         /*
8970                          * We found a kernel virtual address;
8971                          * map the physical page to that virtual address.
8972                          */
8973                         if (i > vm_paging_max_index) {
8974                                 vm_paging_max_index = i;
8975                         }
8976                         vm_paging_page_inuse[i] = TRUE;
8977                         simple_unlock(&vm_paging_lock);
8978
8979                         page->vmp_pmapped = TRUE;
8980
8981                         /*
8982                          * Keep the VM object locked over the PMAP_ENTER
8983                          * and the actual use of the page by the kernel,
8984                          * or this pmap mapping might get undone by a
8985                          * vm_object_pmap_protect() call...
8986                          */
8987                         PMAP_ENTER(kernel_pmap,
8988                                    page_map_offset,
8989                                    page,
8990                                    protection,
8991                                    VM_PROT_NONE,
8992                                    0,
8993                                    TRUE,
8994                                    kr);
8995                         assert(kr == KERN_SUCCESS);
8996                         vm_paging_objects_mapped++;
8997                         vm_paging_pages_mapped++;
8998                         *address = page_map_offset;
8999                         *need_unmap = TRUE;
9000
9001 #if KASAN
9002                         kasan_notify_address(page_map_offset, PAGE_SIZE);
9003 #endif
9004
9005                         /* all done and mapped, ready to use ! */
9006                         return KERN_SUCCESS;
9007                 }
9008
9009                 /*
9010                  * We ran out of pre-allocated kernel virtual
9011                  * addresses.  Just map the page in the kernel
9012                  * the slow and regular way.
9013                  */
9014                 vm_paging_no_kernel_page++;
9015                 simple_unlock(&vm_paging_lock);
9016         }
9017
9018         if (! can_unlock_object) {
9019                 *address = 0;
9020                 *size = 0;
9021                 *need_unmap = FALSE;
9022                 return KERN_NOT_SUPPORTED;
9023         }
9024
9025         object_offset = vm_object_trunc_page(offset);
9026         map_size = vm_map_round_page(*size,
9027                                      VM_MAP_PAGE_MASK(kernel_map));
9028
9029         /*
9030          * Try and map the required range of the object
9031          * in the kernel_map
9032          */
9033
9034         vm_object_reference_locked(object);     /* for the map entry */
9035         vm_object_unlock(object);
9036
9037         kr = vm_map_enter(kernel_map,
9038                           address,
9039                           map_size,
9040                           0,
9041                           VM_FLAGS_ANYWHERE,
9042                           VM_MAP_KERNEL_FLAGS_NONE,
9043                           VM_KERN_MEMORY_NONE,
9044                           object,
9045                           object_offset,
9046                           FALSE,
9047                           protection,
9048                           VM_PROT_ALL,
9049                           VM_INHERIT_NONE);
9050         if (kr != KERN_SUCCESS) {
9051                 *address = 0;
9052                 *size = 0;
9053                 *need_unmap = FALSE;
9054                 vm_object_deallocate(object);   /* for the map entry */
9055                 vm_object_lock(object);
9056                 return kr;
9057         }
9058
9059         *size = map_size;
9060
9061         /*
9062          * Enter the mapped pages in the page table now.
9063          */
9064         vm_object_lock(object);
9065         /*
9066          * VM object must be kept locked from before PMAP_ENTER()
9067          * until after the kernel is done accessing the page(s).
9068          * Otherwise, the pmap mappings in the kernel could be
9069          * undone by a call to vm_object_pmap_protect().
9070          */
9071
9072         for (page_map_offset = 0;
9073              map_size != 0;
9074              map_size -= PAGE_SIZE_64, page_map_offset += PAGE_SIZE_64) {
9075
9076                 page = vm_page_lookup(object, offset + page_map_offset);
9077                 if (page == VM_PAGE_NULL) {
9078                         printf("vm_paging_map_object: no page !?");
9079                         vm_object_unlock(object);
9080                         kr = vm_map_remove(kernel_map, *address, *size,
9081                                            VM_MAP_REMOVE_NO_FLAGS);
9082                         assert(kr == KERN_SUCCESS);
9083                         *address = 0;
9084                         *size = 0;
9085                         *need_unmap = FALSE;
9086                         vm_object_lock(object);
9087                         return KERN_MEMORY_ERROR;
9088                 }
9089                 page->vmp_pmapped = TRUE;
9090
9091                 //assert(pmap_verify_free(VM_PAGE_GET_PHYS_PAGE(page)));
9092                 PMAP_ENTER(kernel_pmap,
9093                            *address + page_map_offset,
9094                            page,
9095                            protection,
9096                            VM_PROT_NONE,
9097                            0,
9098                            TRUE,
9099                            kr);
9100                 assert(kr == KERN_SUCCESS);
9101 #if KASAN
9102                 kasan_notify_address(*address + page_map_offset, PAGE_SIZE);
9103 #endif
9104         }
9105
9106         vm_paging_objects_mapped_slow++;
9107         vm_paging_pages_mapped_slow += (unsigned long) (map_size / PAGE_SIZE_64);
9108
9109         *need_unmap = TRUE;
9110
9111         return KERN_SUCCESS;
9112 }
9113
9114 /*
9115  * vm_paging_unmap_object:
9116  *      Unmaps part of a VM object's pages from the kernel
9117  *      virtual address space.
9118  * Context:
9119  *      The VM object is locked.  This lock will get
9120  *      dropped and re-acquired though.
9121  */
9122 void
9123 vm_paging_unmap_object(
9124         vm_object_t     object,
9125         vm_map_offset_t start,
9126         vm_map_offset_t end)
9127 {
9128         kern_return_t   kr;
9129         int             i;
9130
9131         if ((vm_paging_base_address == 0) ||
9132             (start < vm_paging_base_address) ||
9133             (end > (vm_paging_base_address
9134                      + (VM_PAGING_NUM_PAGES * PAGE_SIZE)))) {
9135                 /*
9136                  * We didn't use our pre-allocated pool of
9137                  * kernel virtual address.  Deallocate the
9138                  * virtual memory.
9139                  */
9140                 if (object != VM_OBJECT_NULL) {
9141                         vm_object_unlock(object);
9142                 }
9143                 kr = vm_map_remove(kernel_map, start, end,
9144                                    VM_MAP_REMOVE_NO_FLAGS);
9145                 if (object != VM_OBJECT_NULL) {
9146                         vm_object_lock(object);
9147                 }
9148                 assert(kr == KERN_SUCCESS);
9149         } else {
9150                 /*
9151                  * We used a kernel virtual address from our
9152                  * pre-allocated pool.  Put it back in the pool
9153                  * for next time.
9154                  */
9155                 assert(end - start == PAGE_SIZE);
9156                 i = (int) ((start - vm_paging_base_address) >> PAGE_SHIFT);
9157                 assert(i >= 0 && i < VM_PAGING_NUM_PAGES);
9158
9159                 /* undo the pmap mapping */
9160                 pmap_remove(kernel_pmap, start, end);
9161
9162                 simple_lock(&vm_paging_lock);
9163                 vm_paging_page_inuse[i] = FALSE;
9164                 if (vm_paging_page_waiter) {
9165                         thread_wakeup(&vm_paging_page_waiter);
9166                 }
9167                 simple_unlock(&vm_paging_lock);
9168         }
9169 }
9170
9171
9172 /*
9173  * page->vmp_object must be locked
9174  */
9175 void
9176 vm_pageout_steal_laundry(vm_page_t page, boolean_t queues_locked)
9177 {
9178         if (!queues_locked) {
9179                 vm_page_lockspin_queues();
9180         }
9181
9182         page->vmp_free_when_done = FALSE;
9183         /*
9184          * need to drop the laundry count...
9185          * we may also need to remove it
9186          * from the I/O paging queue...
9187          * vm_pageout_throttle_up handles both cases
9188          *
9189          * the laundry and pageout_queue flags are cleared...
9190          */
9191         vm_pageout_throttle_up(page);
9192
9193         if (!queues_locked) {
9194                 vm_page_unlock_queues();
9195         }
9196 }
9197
9198 upl_t
9199 vector_upl_create(vm_offset_t upl_offset)
9200 {
9201         int     vector_upl_size  = sizeof(struct _vector_upl);
9202         int i=0;
9203         upl_t   upl;
9204         vector_upl_t vector_upl = (vector_upl_t)kalloc(vector_upl_size);
9205
9206         upl = upl_create(0,UPL_VECTOR,0);
9207         upl->vector_upl = vector_upl;
9208         upl->offset = upl_offset;
9209         vector_upl->size = 0;
9210         vector_upl->offset = upl_offset;
9211         vector_upl->invalid_upls=0;
9212         vector_upl->num_upls=0;
9213         vector_upl->pagelist = NULL;
9214
9215         for(i=0; i < MAX_VECTOR_UPL_ELEMENTS ; i++) {
9216                 vector_upl->upl_iostates[i].size = 0;
9217                 vector_upl->upl_iostates[i].offset = 0;
9218
9219         }
9220         return upl;
9221 }
9222
9223 void
9224 vector_upl_deallocate(upl_t upl)
9225 {
9226         if(upl) {
9227                 vector_upl_t vector_upl = upl->vector_upl;
9228                 if(vector_upl) {
9229                         if(vector_upl->invalid_upls != vector_upl->num_upls)
9230                                 panic("Deallocating non-empty Vectored UPL\n");
9231                         kfree(vector_upl->pagelist,(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)));
9232                         vector_upl->invalid_upls=0;
9233                         vector_upl->num_upls = 0;
9234                         vector_upl->pagelist = NULL;
9235                         vector_upl->size = 0;
9236                         vector_upl->offset = 0;
9237                         kfree(vector_upl, sizeof(struct _vector_upl));
9238                         vector_upl = (vector_upl_t)0xfeedfeed;
9239                 }
9240                 else
9241                         panic("vector_upl_deallocate was passed a non-vectored upl\n");
9242         }
9243         else
9244                 panic("vector_upl_deallocate was passed a NULL upl\n");
9245 }
9246
9247 boolean_t
9248 vector_upl_is_valid(upl_t upl)
9249 {
9250         if(upl &&  ((upl->flags & UPL_VECTOR)==UPL_VECTOR)) {
9251                 vector_upl_t vector_upl = upl->vector_upl;
9252                 if(vector_upl == NULL || vector_upl == (vector_upl_t)0xfeedfeed || vector_upl == (vector_upl_t)0xfeedbeef)
9253                         return FALSE;
9254                 else
9255                         return TRUE;
9256         }
9257         return FALSE;
9258 }
9259
9260 boolean_t
9261 vector_upl_set_subupl(upl_t upl,upl_t subupl, uint32_t io_size)
9262 {
9263         if(vector_upl_is_valid(upl)) {
9264                 vector_upl_t vector_upl = upl->vector_upl;
9265
9266                 if(vector_upl) {
9267                         if(subupl) {
9268                                 if(io_size) {
9269                                         if(io_size < PAGE_SIZE)
9270                                                 io_size = PAGE_SIZE;
9271                                         subupl->vector_upl = (void*)vector_upl;
9272                                         vector_upl->upl_elems[vector_upl->num_upls++] = subupl;
9273                                         vector_upl->size += io_size;
9274                                         upl->size += io_size;
9275                                 }
9276                                 else {
9277                                         uint32_t i=0,invalid_upls=0;
9278                                         for(i = 0; i < vector_upl->num_upls; i++) {
9279                                                 if(vector_upl->upl_elems[i] == subupl)
9280                                                         break;
9281                                         }
9282                                         if(i == vector_upl->num_upls)
9283                                                 panic("Trying to remove sub-upl when none exists");
9284
9285                                         vector_upl->upl_elems[i] = NULL;
9286                                         invalid_upls = hw_atomic_add(&(vector_upl)->invalid_upls, 1);
9287                                         if(invalid_upls == vector_upl->num_upls)
9288                                                 return TRUE;
9289                                         else
9290                                                 return FALSE;
9291                                 }
9292                         }
9293                         else
9294                                 panic("vector_upl_set_subupl was passed a NULL upl element\n");
9295                 }
9296                 else
9297                         panic("vector_upl_set_subupl was passed a non-vectored upl\n");
9298         }
9299         else
9300                 panic("vector_upl_set_subupl was passed a NULL upl\n");
9301
9302         return FALSE;
9303 }
9304
9305 void
9306 vector_upl_set_pagelist(upl_t upl)
9307 {
9308         if(vector_upl_is_valid(upl)) {
9309                 uint32_t i=0;
9310                 vector_upl_t vector_upl = upl->vector_upl;
9311
9312                 if(vector_upl) {
9313                         vm_offset_t pagelist_size=0, cur_upl_pagelist_size=0;
9314
9315                         vector_upl->pagelist = (upl_page_info_array_t)kalloc(sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE));
9316
9317                         for(i=0; i < vector_upl->num_upls; i++) {
9318                                 cur_upl_pagelist_size = sizeof(struct upl_page_info) * vector_upl->upl_elems[i]->size/PAGE_SIZE;
9319                                 bcopy(UPL_GET_INTERNAL_PAGE_LIST_SIMPLE(vector_upl->upl_elems[i]), (char*)vector_upl->pagelist + pagelist_size, cur_upl_pagelist_size);
9320                                 pagelist_size += cur_upl_pagelist_size;
9321                                 if(vector_upl->upl_elems[i]->highest_page > upl->highest_page)
9322                                         upl->highest_page = vector_upl->upl_elems[i]->highest_page;
9323                         }
9324                         assert( pagelist_size == (sizeof(struct upl_page_info)*(vector_upl->size/PAGE_SIZE)) );
9325                 }
9326                 else
9327                         panic("vector_upl_set_pagelist was passed a non-vectored upl\n");
9328         }
9329         else
9330                 panic("vector_upl_set_pagelist was passed a NULL upl\n");
9331
9332 }
9333
9334 upl_t
9335 vector_upl_subupl_byindex(upl_t upl, uint32_t index)
9336 {
9337         if(vector_upl_is_valid(upl)) {
9338                 vector_upl_t vector_upl = upl->vector_upl;
9339                 if(vector_upl) {
9340                         if(index < vector_upl->num_upls)
9341                                 return vector_upl->upl_elems[index];
9342                 }
9343                 else
9344                         panic("vector_upl_subupl_byindex was passed a non-vectored upl\n");
9345         }
9346         return NULL;
9347 }
9348
9349 upl_t
9350 vector_upl_subupl_byoffset(upl_t upl, upl_offset_t *upl_offset, upl_size_t *upl_size)
9351 {
9352         if(vector_upl_is_valid(upl)) {
9353                 uint32_t i=0;
9354                 vector_upl_t vector_upl = upl->vector_upl;
9355
9356                 if(vector_upl) {
9357                         upl_t subupl = NULL;
9358                         vector_upl_iostates_t subupl_state;
9359
9360                         for(i=0; i < vector_upl->num_upls; i++) {
9361                                 subupl = vector_upl->upl_elems[i];
9362                                 subupl_state = vector_upl->upl_iostates[i];
9363                                 if( *upl_offset <= (subupl_state.offset + subupl_state.size - 1)) {
9364                                         /* We could have been passed an offset/size pair that belongs
9365                                          * to an UPL element that has already been committed/aborted.
9366                                          * If so, return NULL.
9367                                          */
9368                                         if(subupl == NULL)
9369                                                 return NULL;
9370                                         if((subupl_state.offset + subupl_state.size) < (*upl_offset + *upl_size)) {
9371                                                 *upl_size = (subupl_state.offset + subupl_state.size) - *upl_offset;
9372                                                 if(*upl_size > subupl_state.size)
9373                                                         *upl_size = subupl_state.size;
9374                                         }
9375                                         if(*upl_offset >= subupl_state.offset)
9376                                                 *upl_offset -= subupl_state.offset;
9377                                         else if(i)
9378                                                 panic("Vector UPL offset miscalculation\n");
9379                                         return subupl;
9380                                 }
9381                         }
9382                 }
9383                 else
9384                         panic("vector_upl_subupl_byoffset was passed a non-vectored UPL\n");
9385         }
9386         return NULL;
9387 }
9388
9389 void
9390 vector_upl_get_submap(upl_t upl, vm_map_t *v_upl_submap, vm_offset_t *submap_dst_addr)
9391 {
9392         *v_upl_submap = NULL;
9393
9394         if(vector_upl_is_valid(upl)) {
9395                 vector_upl_t vector_upl = upl->vector_upl;
9396                 if(vector_upl) {
9397                         *v_upl_submap = vector_upl->submap;
9398                         *submap_dst_addr = vector_upl->submap_dst_addr;
9399                 }
9400                 else
9401                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9402         }
9403         else
9404                 panic("vector_upl_get_submap was passed a null UPL\n");
9405 }
9406
9407 void
9408 vector_upl_set_submap(upl_t upl, vm_map_t submap, vm_offset_t submap_dst_addr)
9409 {
9410         if(vector_upl_is_valid(upl)) {
9411                 vector_upl_t vector_upl = upl->vector_upl;
9412                 if(vector_upl) {
9413                         vector_upl->submap = submap;
9414                         vector_upl->submap_dst_addr = submap_dst_addr;
9415                 }
9416                 else
9417                         panic("vector_upl_get_submap was passed a non-vectored UPL\n");
9418         }
9419         else
9420                 panic("vector_upl_get_submap was passed a NULL UPL\n");
9421 }
9422
9423 void
9424 vector_upl_set_iostate(upl_t upl, upl_t subupl, upl_offset_t offset, upl_size_t size)
9425 {
9426         if(vector_upl_is_valid(upl)) {
9427                 uint32_t i = 0;
9428                 vector_upl_t vector_upl = upl->vector_upl;
9429
9430                 if(vector_upl) {
9431                         for(i = 0; i < vector_upl->num_upls; i++) {
9432                                 if(vector_upl->upl_elems[i] == subupl)
9433                                         break;
9434                         }
9435
9436                         if(i == vector_upl->num_upls)
9437                                 panic("setting sub-upl iostate when none exists");
9438
9439                         vector_upl->upl_iostates[i].offset = offset;
9440                         if(size < PAGE_SIZE)
9441                                 size = PAGE_SIZE;
9442                         vector_upl->upl_iostates[i].size = size;
9443                 }
9444                 else
9445                         panic("vector_upl_set_iostate was passed a non-vectored UPL\n");
9446         }
9447         else
9448                 panic("vector_upl_set_iostate was passed a NULL UPL\n");
9449 }
9450
9451 void
9452 vector_upl_get_iostate(upl_t upl, upl_t subupl, upl_offset_t *offset, upl_size_t *size)
9453 {
9454         if(vector_upl_is_valid(upl)) {
9455                 uint32_t i = 0;
9456                 vector_upl_t vector_upl = upl->vector_upl;
9457
9458                 if(vector_upl) {
9459                         for(i = 0; i < vector_upl->num_upls; i++) {
9460                                 if(vector_upl->upl_elems[i] == subupl)
9461                                         break;
9462                         }
9463
9464                         if(i == vector_upl->num_upls)
9465                                 panic("getting sub-upl iostate when none exists");
9466
9467                         *offset = vector_upl->upl_iostates[i].offset;
9468                         *size = vector_upl->upl_iostates[i].size;
9469                 }
9470                 else
9471                         panic("vector_upl_get_iostate was passed a non-vectored UPL\n");
9472         }
9473         else
9474                 panic("vector_upl_get_iostate was passed a NULL UPL\n");
9475 }
9476
9477 void
9478 vector_upl_get_iostate_byindex(upl_t upl, uint32_t index, upl_offset_t *offset, upl_size_t *size)
9479 {
9480         if(vector_upl_is_valid(upl)) {
9481                 vector_upl_t vector_upl = upl->vector_upl;
9482                 if(vector_upl) {
9483                         if(index < vector_upl->num_upls) {
9484                                 *offset = vector_upl->upl_iostates[index].offset;
9485                                 *size = vector_upl->upl_iostates[index].size;
9486                         }
9487                         else
9488                                 *offset = *size = 0;
9489                 }
9490                 else
9491                         panic("vector_upl_get_iostate_byindex was passed a non-vectored UPL\n");
9492         }
9493         else
9494                 panic("vector_upl_get_iostate_byindex was passed a NULL UPL\n");
9495 }
9496
9497 upl_page_info_t *
9498 upl_get_internal_vectorupl_pagelist(upl_t upl)
9499 {
9500         return ((vector_upl_t)(upl->vector_upl))->pagelist;
9501 }
9502
9503 void *
9504 upl_get_internal_vectorupl(upl_t upl)
9505 {
9506         return upl->vector_upl;
9507 }
9508
9509 vm_size_t
9510 upl_get_internal_pagelist_offset(void)
9511 {
9512         return sizeof(struct upl);
9513 }
9514
9515 void
9516 upl_clear_dirty(
9517         upl_t           upl,
9518         boolean_t       value)
9519 {
9520         if (value) {
9521                 upl->flags |= UPL_CLEAR_DIRTY;
9522         } else {
9523                 upl->flags &= ~UPL_CLEAR_DIRTY;
9524         }
9525 }
9526
9527 void
9528 upl_set_referenced(
9529         upl_t           upl,
9530         boolean_t       value)
9531 {
9532         upl_lock(upl);
9533         if (value) {
9534                 upl->ext_ref_count++;
9535         } else {
9536                 if (!upl->ext_ref_count) {
9537                         panic("upl_set_referenced not %p\n", upl);
9538                 }
9539                 upl->ext_ref_count--;
9540         }
9541         upl_unlock(upl);
9542 }
9543
9544 #if CONFIG_IOSCHED
9545 void
9546 upl_set_blkno(
9547         upl_t           upl,
9548         vm_offset_t     upl_offset,
9549         int             io_size,
9550         int64_t         blkno)
9551 {
9552                 int i,j;
9553                 if ((upl->flags & UPL_EXPEDITE_SUPPORTED) == 0)
9554                         return;
9555
9556                 assert(upl->upl_reprio_info != 0);
9557                 for(i = (int)(upl_offset / PAGE_SIZE), j = 0; j < io_size; i++, j += PAGE_SIZE) {
9558                         UPL_SET_REPRIO_INFO(upl, i, blkno, io_size);
9559                 }
9560 }
9561 #endif
9562
9563 void inline memoryshot(unsigned int event, unsigned int control)
9564 {
9565         if (vm_debug_events) {
9566                 KERNEL_DEBUG_CONSTANT1((MACHDBG_CODE(DBG_MACH_VM_PRESSURE, event)) | control,
9567                                         vm_page_active_count, vm_page_inactive_count,
9568                                         vm_page_free_count, vm_page_speculative_count,
9569                                         vm_page_throttled_count);
9570         } else {
9571                 (void) event;
9572                 (void) control;
9573         }
9574
9575 }
9576
9577 #ifdef MACH_BSD
9578
9579 boolean_t  upl_device_page(upl_page_info_t *upl)
9580 {
9581         return(UPL_DEVICE_PAGE(upl));
9582 }
9583 boolean_t  upl_page_present(upl_page_info_t *upl, int index)
9584 {
9585         return(UPL_PAGE_PRESENT(upl, index));
9586 }
9587 boolean_t  upl_speculative_page(upl_page_info_t *upl, int index)
9588 {
9589         return(UPL_SPECULATIVE_PAGE(upl, index));
9590 }
9591 boolean_t  upl_dirty_page(upl_page_info_t *upl, int index)
9592 {
9593         return(UPL_DIRTY_PAGE(upl, index));
9594 }
9595 boolean_t  upl_valid_page(upl_page_info_t *upl, int index)
9596 {
9597         return(UPL_VALID_PAGE(upl, index));
9598 }
9599 ppnum_t  upl_phys_page(upl_page_info_t *upl, int index)
9600 {
9601         return(UPL_PHYS_PAGE(upl, index));
9602 }
9603
9604 void upl_page_set_mark(upl_page_info_t *upl, int index, boolean_t v)
9605 {
9606         upl[index].mark = v;
9607 }
9608
9609 boolean_t upl_page_get_mark(upl_page_info_t *upl, int index)
9610 {
9611         return upl[index].mark;
9612 }
9613
9614 void
9615 vm_countdirtypages(void)
9616 {
9617         vm_page_t m;
9618         int dpages;
9619         int pgopages;
9620         int precpages;
9621
9622
9623         dpages=0;
9624         pgopages=0;
9625         precpages=0;
9626
9627         vm_page_lock_queues();
9628         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_inactive);
9629         do {
9630                 if (m ==(vm_page_t )0) break;
9631
9632                 if(m->vmp_dirty) dpages++;
9633                 if(m->vmp_free_when_done) pgopages++;
9634                 if(m->vmp_precious) precpages++;
9635
9636                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9637                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9638                 if (m ==(vm_page_t )0) break;
9639
9640         } while (!vm_page_queue_end(&vm_page_queue_inactive, (vm_page_queue_entry_t) m));
9641         vm_page_unlock_queues();
9642
9643         vm_page_lock_queues();
9644         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_throttled);
9645         do {
9646                 if (m ==(vm_page_t )0) break;
9647
9648                 dpages++;
9649                 assert(m->vmp_dirty);
9650                 assert(!m->vmp_free_when_done);
9651                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9652                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9653                 if (m ==(vm_page_t )0) break;
9654
9655         } while (!vm_page_queue_end(&vm_page_queue_throttled, (vm_page_queue_entry_t) m));
9656         vm_page_unlock_queues();
9657
9658         vm_page_lock_queues();
9659         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_anonymous);
9660         do {
9661                 if (m ==(vm_page_t )0) break;
9662
9663                 if(m->vmp_dirty) dpages++;
9664                 if(m->vmp_free_when_done) pgopages++;
9665                 if(m->vmp_precious) precpages++;
9666
9667                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9668                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9669                 if (m ==(vm_page_t )0) break;
9670
9671         } while (!vm_page_queue_end(&vm_page_queue_anonymous, (vm_page_queue_entry_t) m));
9672         vm_page_unlock_queues();
9673
9674         printf("IN Q: %d : %d : %d\n", dpages, pgopages, precpages);
9675
9676         dpages=0;
9677         pgopages=0;
9678         precpages=0;
9679
9680         vm_page_lock_queues();
9681         m = (vm_page_t) vm_page_queue_first(&vm_page_queue_active);
9682
9683         do {
9684                 if(m == (vm_page_t )0) break;
9685                 if(m->vmp_dirty) dpages++;
9686                 if(m->vmp_free_when_done) pgopages++;
9687                 if(m->vmp_precious) precpages++;
9688
9689                 assert(VM_PAGE_OBJECT(m) != kernel_object);
9690                 m = (vm_page_t) vm_page_queue_next(&m->vmp_pageq);
9691                 if(m == (vm_page_t )0) break;
9692
9693         } while (!vm_page_queue_end(&vm_page_queue_active, (vm_page_queue_entry_t) m));
9694         vm_page_unlock_queues();
9695
9696         printf("AC Q: %d : %d : %d\n", dpages, pgopages, precpages);
9697
9698 }
9699 #endif /* MACH_BSD */
9700
9701
9702 #if CONFIG_IOSCHED
9703 int upl_get_cached_tier(upl_t  upl)
9704 {
9705        assert(upl);
9706        if (upl->flags & UPL_TRACKED_BY_OBJECT)
9707                return (upl->upl_priority);
9708        return (-1);
9709 }
9710 #endif /* CONFIG_IOSCHED */
9711
9712
9713 void upl_callout_iodone(upl_t upl)
9714 {
9715         struct upl_io_completion *upl_ctx = upl->upl_iodone;
9716
9717         if (upl_ctx) {
9718                 void    (*iodone_func)(void *, int) = upl_ctx->io_done;
9719
9720                 assert(upl_ctx->io_done);
9721
9722                 (*iodone_func)(upl_ctx->io_context, upl_ctx->io_error);
9723         }
9724 }
9725
9726 void upl_set_iodone(upl_t upl, void *upl_iodone)
9727 {
9728         upl->upl_iodone = (struct upl_io_completion *)upl_iodone;
9729 }
9730
9731 void upl_set_iodone_error(upl_t upl, int error)
9732 {
9733         struct upl_io_completion *upl_ctx = upl->upl_iodone;
9734
9735         if (upl_ctx)
9736                 upl_ctx->io_error = error;
9737 }
9738
9739
9740 ppnum_t upl_get_highest_page(
9741                              upl_t                      upl)
9742 {
9743         return upl->highest_page;
9744 }
9745
9746 upl_size_t upl_get_size(
9747                              upl_t                      upl)
9748 {
9749         return upl->size;
9750 }
9751
9752 upl_t upl_associated_upl(upl_t upl)
9753 {
9754         return upl->associated_upl;
9755 }
9756
9757 void upl_set_associated_upl(upl_t upl, upl_t associated_upl)
9758 {
9759         upl->associated_upl = associated_upl;
9760 }
9761
9762 struct vnode * upl_lookup_vnode(upl_t upl)
9763 {
9764         if (!upl->map_object->internal)
9765                 return vnode_pager_lookup_vnode(upl->map_object->pager);
9766         else
9767                 return NULL;
9768 }
9769
9770 #if UPL_DEBUG
9771 kern_return_t  upl_ubc_alias_set(upl_t upl, uintptr_t alias1, uintptr_t alias2)
9772 {
9773         upl->ubc_alias1 = alias1;
9774         upl->ubc_alias2 = alias2;
9775         return KERN_SUCCESS;
9776 }
9777 int  upl_ubc_alias_get(upl_t upl, uintptr_t * al, uintptr_t * al2)
9778 {
9779         if(al)
9780                 *al = upl->ubc_alias1;
9781         if(al2)
9782                 *al2 = upl->ubc_alias2;
9783         return KERN_SUCCESS;
9784 }
9785 #endif /* UPL_DEBUG */
9786
9787 #if VM_PRESSURE_EVENTS
9788 /*
9789  * Upward trajectory.
9790  */
9791 extern boolean_t vm_compressor_low_on_space(void);
9792
9793 boolean_t
9794 VM_PRESSURE_NORMAL_TO_WARNING(void)     {
9795
9796         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9797
9798                 /* Available pages below our threshold */
9799                 if (memorystatus_available_pages < memorystatus_available_pages_pressure) {
9800                         /* No frozen processes to kill */
9801                         if (memorystatus_frozen_count == 0) {
9802                                 /* Not enough suspended processes available. */
9803                                 if (memorystatus_suspended_count < MEMORYSTATUS_SUSPENDED_THRESHOLD) {
9804                                         return TRUE;
9805                                 }
9806                         }
9807                 }
9808                 return FALSE;
9809
9810         } else {
9811                 return ((AVAILABLE_NON_COMPRESSED_MEMORY < VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) ? 1 : 0);
9812         }
9813 }
9814
9815 boolean_t
9816 VM_PRESSURE_WARNING_TO_CRITICAL(void) {
9817
9818         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9819
9820                 /* Available pages below our threshold */
9821                 if (memorystatus_available_pages < memorystatus_available_pages_critical) {
9822                         return TRUE;
9823                 }
9824                 return FALSE;
9825         } else {
9826                 return (vm_compressor_low_on_space() || (AVAILABLE_NON_COMPRESSED_MEMORY < ((12 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9827         }
9828 }
9829
9830 /*
9831  * Downward trajectory.
9832  */
9833 boolean_t
9834 VM_PRESSURE_WARNING_TO_NORMAL(void) {
9835
9836         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9837
9838                 /* Available pages above our threshold */
9839                 unsigned int target_threshold = (unsigned int) (memorystatus_available_pages_pressure + ((15 * memorystatus_available_pages_pressure) / 100));
9840                 if (memorystatus_available_pages > target_threshold) {
9841                         return TRUE;
9842                 }
9843                 return FALSE;
9844         } else {
9845                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((12 * VM_PAGE_COMPRESSOR_COMPACT_THRESHOLD) / 10)) ? 1 : 0);
9846         }
9847 }
9848
9849 boolean_t
9850 VM_PRESSURE_CRITICAL_TO_WARNING(void) {
9851
9852         if ( !VM_CONFIG_COMPRESSOR_IS_ACTIVE) {
9853
9854                 /* Available pages above our threshold */
9855                 unsigned int target_threshold = (unsigned int)(memorystatus_available_pages_critical + ((15 * memorystatus_available_pages_critical) / 100));
9856                 if (memorystatus_available_pages > target_threshold) {
9857                         return TRUE;
9858                 }
9859                 return FALSE;
9860         } else {
9861                 return ((AVAILABLE_NON_COMPRESSED_MEMORY > ((14 * VM_PAGE_COMPRESSOR_SWAP_UNTHROTTLE_THRESHOLD) / 10)) ? 1 : 0);
9862         }
9863 }
9864 #endif /* VM_PRESSURE_EVENTS */
9865
9866
9867
9868 #define VM_TEST_COLLAPSE_COMPRESSOR             0
9869 #define VM_TEST_WIRE_AND_EXTRACT                0
9870 #define VM_TEST_PAGE_WIRE_OVERFLOW_PANIC        0
9871 #if __arm64__
9872 #define VM_TEST_KERNEL_OBJECT_FAULT             0
9873 #endif /* __arm64__ */
9874 #define VM_TEST_DEVICE_PAGER_TRANSPOSE          (DEVELOPMENT || DEBUG)
9875
9876 #if VM_TEST_COLLAPSE_COMPRESSOR
9877 extern boolean_t vm_object_collapse_compressor_allowed;
9878 #include <IOKit/IOLib.h>
9879 static void
9880 vm_test_collapse_compressor(void)
9881 {
9882         vm_object_size_t        backing_size, top_size;
9883         vm_object_t             backing_object, top_object;
9884         vm_map_offset_t         backing_offset, top_offset;
9885         unsigned char           *backing_address, *top_address;
9886         kern_return_t           kr;
9887
9888         printf("VM_TEST_COLLAPSE_COMPRESSOR:\n");
9889
9890         /* create backing object */
9891         backing_size = 15 * PAGE_SIZE;
9892         backing_object = vm_object_allocate(backing_size);
9893         assert(backing_object != VM_OBJECT_NULL);
9894         printf("VM_TEST_COLLAPSE_COMPRESSOR: created backing object %p\n",
9895                 backing_object);
9896         /* map backing object */
9897         backing_offset = 0;
9898         kr = vm_map_enter(kernel_map, &backing_offset, backing_size, 0,
9899                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9900                           backing_object, 0, FALSE,
9901                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9902         assert(kr == KERN_SUCCESS);
9903         backing_address = (unsigned char *) backing_offset;
9904         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9905                "mapped backing object %p at 0x%llx\n",
9906                backing_object, (uint64_t) backing_offset);
9907         /* populate with pages to be compressed in backing object */
9908         backing_address[0x1*PAGE_SIZE] = 0xB1;
9909         backing_address[0x4*PAGE_SIZE] = 0xB4;
9910         backing_address[0x7*PAGE_SIZE] = 0xB7;
9911         backing_address[0xa*PAGE_SIZE] = 0xBA;
9912         backing_address[0xd*PAGE_SIZE] = 0xBD;
9913         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9914                "populated pages to be compressed in "
9915                "backing_object %p\n", backing_object);
9916         /* compress backing object */
9917         vm_object_pageout(backing_object);
9918         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing backing_object %p\n",
9919                backing_object);
9920         /* wait for all the pages to be gone */
9921         while (*(volatile int *)&backing_object->resident_page_count != 0)
9922                 IODelay(10);
9923         printf("VM_TEST_COLLAPSE_COMPRESSOR: backing_object %p compressed\n",
9924                backing_object);
9925         /* populate with pages to be resident in backing object */
9926         backing_address[0x0*PAGE_SIZE] = 0xB0;
9927         backing_address[0x3*PAGE_SIZE] = 0xB3;
9928         backing_address[0x6*PAGE_SIZE] = 0xB6;
9929         backing_address[0x9*PAGE_SIZE] = 0xB9;
9930         backing_address[0xc*PAGE_SIZE] = 0xBC;
9931         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9932                "populated pages to be resident in "
9933                "backing_object %p\n", backing_object);
9934         /* leave the other pages absent */
9935         /* mess with the paging_offset of the backing_object */
9936         assert(backing_object->paging_offset == 0);
9937         backing_object->paging_offset = 0x3000;
9938
9939         /* create top object */
9940         top_size = 9 * PAGE_SIZE;
9941         top_object = vm_object_allocate(top_size);
9942         assert(top_object != VM_OBJECT_NULL);
9943         printf("VM_TEST_COLLAPSE_COMPRESSOR: created top object %p\n",
9944                 top_object);
9945         /* map top object */
9946         top_offset = 0;
9947         kr = vm_map_enter(kernel_map, &top_offset, top_size, 0,
9948                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE,
9949                           top_object, 0, FALSE,
9950                           VM_PROT_DEFAULT, VM_PROT_DEFAULT, VM_INHERIT_DEFAULT);
9951         assert(kr == KERN_SUCCESS);
9952         top_address = (unsigned char *) top_offset;
9953         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9954                "mapped top object %p at 0x%llx\n",
9955                top_object, (uint64_t) top_offset);
9956         /* populate with pages to be compressed in top object */
9957         top_address[0x3*PAGE_SIZE] = 0xA3;
9958         top_address[0x4*PAGE_SIZE] = 0xA4;
9959         top_address[0x5*PAGE_SIZE] = 0xA5;
9960         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9961                "populated pages to be compressed in "
9962                "top_object %p\n", top_object);
9963         /* compress top object */
9964         vm_object_pageout(top_object);
9965         printf("VM_TEST_COLLAPSE_COMPRESSOR: compressing top_object %p\n",
9966                top_object);
9967         /* wait for all the pages to be gone */
9968         while (top_object->resident_page_count != 0)
9969                 IODelay(10);
9970         printf("VM_TEST_COLLAPSE_COMPRESSOR: top_object %p compressed\n",
9971                top_object);
9972         /* populate with pages to be resident in top object */
9973         top_address[0x0*PAGE_SIZE] = 0xA0;
9974         top_address[0x1*PAGE_SIZE] = 0xA1;
9975         top_address[0x2*PAGE_SIZE] = 0xA2;
9976         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9977                "populated pages to be resident in "
9978                "top_object %p\n", top_object);
9979         /* leave the other pages absent */
9980
9981         /* link the 2 objects */
9982         vm_object_reference(backing_object);
9983         top_object->shadow = backing_object;
9984         top_object->vo_shadow_offset = 0x3000;
9985         printf("VM_TEST_COLLAPSE_COMPRESSOR: linked %p and %p\n",
9986                top_object, backing_object);
9987
9988         /* unmap backing object */
9989         vm_map_remove(kernel_map,
9990                       backing_offset,
9991                       backing_offset + backing_size,
9992                       VM_MAP_REMOVE_NO_FLAGS);
9993         printf("VM_TEST_COLLAPSE_COMPRESSOR: "
9994                "unmapped backing_object %p [0x%llx:0x%llx]\n",
9995                backing_object,
9996                (uint64_t) backing_offset,
9997                (uint64_t) (backing_offset + backing_size));
9998
9999         /* collapse */
10000         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsing %p\n", top_object);
10001         vm_object_lock(top_object);
10002         vm_object_collapse(top_object, 0, FALSE);
10003         vm_object_unlock(top_object);
10004         printf("VM_TEST_COLLAPSE_COMPRESSOR: collapsed %p\n", top_object);
10005
10006         /* did it work? */
10007         if (top_object->shadow != VM_OBJECT_NULL) {
10008                 printf("VM_TEST_COLLAPSE_COMPRESSOR: not collapsed\n");
10009                 printf("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10010                 if (vm_object_collapse_compressor_allowed) {
10011                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10012                 }
10013         } else {
10014                 /* check the contents of the mapping */
10015                 unsigned char expect[9] =
10016                         { 0xA0, 0xA1, 0xA2,     /* resident in top */
10017                           0xA3, 0xA4, 0xA5,     /* compressed in top */
10018                           0xB9, /* resident in backing + shadow_offset */
10019                           0xBD, /* compressed in backing + shadow_offset + paging_offset */
10020                           0x00 };               /* absent in both */
10021                 unsigned char actual[9];
10022                 unsigned int i, errors;
10023
10024                 errors = 0;
10025                 for (i = 0; i < sizeof (actual); i++) {
10026                         actual[i] = (unsigned char) top_address[i*PAGE_SIZE];
10027                         if (actual[i] != expect[i]) {
10028                                 errors++;
10029                         }
10030                 }
10031                 printf("VM_TEST_COLLAPSE_COMPRESSOR: "
10032                        "actual [%x %x %x %x %x %x %x %x %x] "
10033                        "expect [%x %x %x %x %x %x %x %x %x] "
10034                        "%d errors\n",
10035                        actual[0], actual[1], actual[2], actual[3],
10036                        actual[4], actual[5], actual[6], actual[7],
10037                        actual[8],
10038                        expect[0], expect[1], expect[2], expect[3],
10039                        expect[4], expect[5], expect[6], expect[7],
10040                        expect[8],
10041                        errors);
10042                 if (errors) {
10043                         panic("VM_TEST_COLLAPSE_COMPRESSOR: FAIL\n");
10044                 } else {
10045                         printf("VM_TEST_COLLAPSE_COMPRESSOR: PASS\n");
10046                 }
10047         }
10048 }
10049 #else /* VM_TEST_COLLAPSE_COMPRESSOR */
10050 #define vm_test_collapse_compressor()
10051 #endif /* VM_TEST_COLLAPSE_COMPRESSOR */
10052
10053 #if VM_TEST_WIRE_AND_EXTRACT
10054 extern ledger_template_t        task_ledger_template;
10055 #include <mach/mach_vm.h>
10056 extern ppnum_t vm_map_get_phys_page(vm_map_t map,
10057                                     vm_offset_t offset);
10058 static void
10059 vm_test_wire_and_extract(void)
10060 {
10061         ledger_t                ledger;
10062         vm_map_t                user_map, wire_map;
10063         mach_vm_address_t       user_addr, wire_addr;
10064         mach_vm_size_t          user_size, wire_size;
10065         mach_vm_offset_t        cur_offset;
10066         vm_prot_t               cur_prot, max_prot;
10067         ppnum_t                 user_ppnum, wire_ppnum;
10068         kern_return_t           kr;
10069
10070         ledger = ledger_instantiate(task_ledger_template,
10071                                     LEDGER_CREATE_ACTIVE_ENTRIES);
10072         user_map = vm_map_create(pmap_create(ledger, 0, PMAP_CREATE_64BIT),
10073                                  0x100000000ULL,
10074                                  0x200000000ULL,
10075                                  TRUE);
10076         wire_map = vm_map_create(NULL,
10077                                  0x100000000ULL,
10078                                  0x200000000ULL,
10079                                  TRUE);
10080         user_addr = 0;
10081         user_size = 0x10000;
10082         kr = mach_vm_allocate(user_map,
10083                               &user_addr,
10084                               user_size,
10085                               VM_FLAGS_ANYWHERE);
10086         assert(kr == KERN_SUCCESS);
10087         wire_addr = 0;
10088         wire_size = user_size;
10089         kr = mach_vm_remap(wire_map,
10090                            &wire_addr,
10091                            wire_size,
10092                            0,
10093                            VM_FLAGS_ANYWHERE,
10094                            user_map,
10095                            user_addr,
10096                            FALSE,
10097                            &cur_prot,
10098                            &max_prot,
10099                            VM_INHERIT_NONE);
10100         assert(kr == KERN_SUCCESS);
10101         for (cur_offset = 0;
10102              cur_offset < wire_size;
10103              cur_offset += PAGE_SIZE) {
10104                 kr = vm_map_wire_and_extract(wire_map,
10105                                              wire_addr + cur_offset,
10106                                              VM_PROT_DEFAULT | VM_PROT_MEMORY_TAG_MAKE(VM_KERN_MEMORY_OSFMK),
10107                                              TRUE,
10108                                              &wire_ppnum);
10109                 assert(kr == KERN_SUCCESS);
10110                 user_ppnum = vm_map_get_phys_page(user_map,
10111                                                   user_addr + cur_offset);
10112                 printf("VM_TEST_WIRE_AND_EXTRACT: kr=0x%x "
10113                        "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10114                        kr,
10115                        user_map, user_addr + cur_offset, user_ppnum,
10116                        wire_map, wire_addr + cur_offset, wire_ppnum);
10117                 if (kr != KERN_SUCCESS ||
10118                     wire_ppnum == 0 ||
10119                     wire_ppnum != user_ppnum) {
10120                         panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10121                 }
10122         }
10123         cur_offset -= PAGE_SIZE;
10124         kr = vm_map_wire_and_extract(wire_map,
10125                                      wire_addr + cur_offset,
10126                                      VM_PROT_DEFAULT,
10127                                      TRUE,
10128                                      &wire_ppnum);
10129         assert(kr == KERN_SUCCESS);
10130         printf("VM_TEST_WIRE_AND_EXTRACT: re-wire kr=0x%x "
10131                "user[%p:0x%llx:0x%x] wire[%p:0x%llx:0x%x]\n",
10132                kr,
10133                user_map, user_addr + cur_offset, user_ppnum,
10134                wire_map, wire_addr + cur_offset, wire_ppnum);
10135         if (kr != KERN_SUCCESS ||
10136             wire_ppnum == 0 ||
10137             wire_ppnum != user_ppnum) {
10138                 panic("VM_TEST_WIRE_AND_EXTRACT: FAIL\n");
10139         }
10140
10141         printf("VM_TEST_WIRE_AND_EXTRACT: PASS\n");
10142 }
10143 #else /* VM_TEST_WIRE_AND_EXTRACT */
10144 #define vm_test_wire_and_extract()
10145 #endif /* VM_TEST_WIRE_AND_EXTRACT */
10146
10147 #if VM_TEST_PAGE_WIRE_OVERFLOW_PANIC
10148 static void
10149 vm_test_page_wire_overflow_panic(void)
10150 {
10151         vm_object_t object;
10152         vm_page_t page;
10153
10154         printf("VM_TEST_PAGE_WIRE_OVERFLOW_PANIC: starting...\n");
10155
10156         object = vm_object_allocate(PAGE_SIZE);
10157         vm_object_lock(object);
10158         page = vm_page_alloc(object, 0x0);
10159         vm_page_lock_queues();
10160         do {
10161                 vm_page_wire(page, 1, FALSE);
10162         } while (page->wire_count != 0);
10163         vm_page_unlock_queues();
10164         vm_object_unlock(object);
10165         panic("FBDP(%p,%p): wire_count overflow not detected\n",
10166               object, page);
10167 }
10168 #else /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10169 #define vm_test_page_wire_overflow_panic()
10170 #endif /* VM_TEST_PAGE_WIRE_OVERFLOW_PANIC */
10171
10172 #if __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT
10173 extern int copyinframe(vm_address_t fp, char *frame, boolean_t is64bit);
10174 static void
10175 vm_test_kernel_object_fault(void)
10176 {
10177         kern_return_t kr;
10178         vm_offset_t stack;
10179         uintptr_t frameb[2];
10180         int ret;
10181
10182         kr = kernel_memory_allocate(kernel_map, &stack,
10183                                     kernel_stack_size + (2*PAGE_SIZE),
10184                                     0,
10185                                     (KMA_KSTACK | KMA_KOBJECT |
10186                                      KMA_GUARD_FIRST | KMA_GUARD_LAST),
10187                                     VM_KERN_MEMORY_STACK);
10188         if (kr != KERN_SUCCESS) {
10189                 panic("VM_TEST_KERNEL_OBJECT_FAULT: kernel_memory_allocate kr 0x%x\n", kr);
10190         }
10191         ret = copyinframe((uintptr_t)stack, (char *)frameb, TRUE);
10192         if (ret != 0) {
10193                 printf("VM_TEST_KERNEL_OBJECT_FAULT: PASS\n");
10194         } else {
10195                 printf("VM_TEST_KERNEL_OBJECT_FAULT: FAIL\n");
10196         }
10197         vm_map_remove(kernel_map,
10198                       stack,
10199                       stack + kernel_stack_size + (2*PAGE_SIZE),
10200                       VM_MAP_REMOVE_KUNWIRE);
10201         stack = 0;
10202 }
10203 #else /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10204 #define vm_test_kernel_object_fault()
10205 #endif /* __arm64__ && VM_TEST_KERNEL_OBJECT_FAULT */
10206
10207 #if VM_TEST_DEVICE_PAGER_TRANSPOSE
10208 static void
10209 vm_test_device_pager_transpose(void)
10210 {
10211         memory_object_t device_pager;
10212         vm_object_t     anon_object, device_object;
10213         vm_size_t       size;
10214         vm_map_offset_t anon_mapping, device_mapping;
10215         kern_return_t   kr;
10216
10217         size = 3 * PAGE_SIZE;
10218         anon_object = vm_object_allocate(size);
10219         assert(anon_object != VM_OBJECT_NULL);
10220         device_pager = device_pager_setup(NULL, 0, size, 0);
10221         assert(device_pager != NULL);
10222         device_object = memory_object_to_vm_object(device_pager);
10223         assert(device_object != VM_OBJECT_NULL);
10224         anon_mapping = 0;
10225         kr = vm_map_enter(kernel_map, &anon_mapping, size, 0,
10226                           VM_FLAGS_ANYWHERE, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_NONE,
10227                           anon_object, 0, FALSE, VM_PROT_DEFAULT, VM_PROT_ALL,
10228                           VM_INHERIT_DEFAULT);
10229         assert(kr == KERN_SUCCESS);
10230         device_mapping = 0;
10231         kr = vm_map_enter_mem_object(kernel_map, &device_mapping, size, 0,
10232                                      VM_FLAGS_ANYWHERE,
10233                                      VM_MAP_KERNEL_FLAGS_NONE,
10234                                      VM_KERN_MEMORY_NONE,
10235                                      (void *)device_pager, 0, FALSE,
10236                                      VM_PROT_DEFAULT, VM_PROT_ALL,
10237                                      VM_INHERIT_DEFAULT);
10238         assert(kr == KERN_SUCCESS);
10239         memory_object_deallocate(device_pager);
10240
10241         vm_object_lock(anon_object);
10242         vm_object_activity_begin(anon_object);
10243         anon_object->blocked_access = TRUE;
10244         vm_object_unlock(anon_object);
10245         vm_object_lock(device_object);
10246         vm_object_activity_begin(device_object);
10247         device_object->blocked_access = TRUE;
10248         vm_object_unlock(device_object);
10249
10250         assert(anon_object->ref_count == 1);
10251         assert(!anon_object->named);
10252         assert(device_object->ref_count == 2);
10253         assert(device_object->named);
10254
10255         kr = vm_object_transpose(device_object, anon_object, size);
10256         assert(kr == KERN_SUCCESS);
10257
10258         vm_object_lock(anon_object);
10259         vm_object_activity_end(anon_object);
10260         anon_object->blocked_access = FALSE;
10261         vm_object_unlock(anon_object);
10262         vm_object_lock(device_object);
10263         vm_object_activity_end(device_object);
10264         device_object->blocked_access = FALSE;
10265         vm_object_unlock(device_object);
10266
10267         assert(anon_object->ref_count == 2);
10268         assert(anon_object->named);
10269         kr = vm_deallocate(kernel_map, anon_mapping, size);
10270         assert(kr == KERN_SUCCESS);
10271         assert(device_object->ref_count == 1);
10272         assert(!device_object->named);
10273         kr = vm_deallocate(kernel_map, device_mapping, size);
10274         assert(kr == KERN_SUCCESS);
10275
10276         printf("VM_TEST_DEVICE_PAGER_TRANSPOSE: PASS\n");
10277 }
10278 #else /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10279 #define vm_test_device_pager_transpose()
10280 #endif /* VM_TEST_DEVICE_PAGER_TRANSPOSE */
10281
10282 void
10283 vm_tests(void)
10284 {
10285         vm_test_collapse_compressor();
10286         vm_test_wire_and_extract();
10287         vm_test_page_wire_overflow_panic();
10288         vm_test_kernel_object_fault();
10289         vm_test_device_pager_transpose();
10290 }