]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/vm/vm_page.h
xnu-1699.22.81.tar.gz
[apple/xnu.git] / osfmk / vm / vm_page.h
index 7ab8fe83eec4b930fa001c91633a4309ac64a3de..543a0c6f5d447fb68ec15c6a67491a316e93722b 100644 (file)
@@ -1,23 +1,29 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
@@ -60,6 +66,8 @@
 #ifndef        _VM_VM_PAGE_H_
 #define _VM_VM_PAGE_H_
 
+#include <debug.h>
+
 #include <mach/boolean.h>
 #include <mach/vm_prot.h>
 #include <mach/vm_param.h>
 #include <kern/lock.h>
 
 #include <kern/macro_help.h>
+#include <libkern/OSAtomic.h>
+
 
 /* 
- * Each page entered on the inactive queue obtains a ticket from a
- * particular ticket roll.  Pages granted tickets from a particular 
- * roll  generally flow through the queue as a group.  In this way when a
- * page with a ticket from a particular roll is pulled from the top of the
- * queue it is extremely likely that the pages near the top will have tickets
- * from the same or adjacent rolls.  In this way the proximity to the top
- * of the queue can be loosely ascertained by determining the identity of
- * the roll the pages ticket came from. 
+ * VM_PAGE_MIN_SPECULATIVE_AGE_Q through VM_PAGE_MAX_SPECULATIVE_AGE_Q
+ * represents a set of aging bins that are 'protected'...
+ *
+ * VM_PAGE_SPECULATIVE_AGED_Q is a list of the speculative pages that have
+ * not yet been 'claimed' but have been aged out of the protective bins
+ * this occurs in vm_page_speculate when it advances to the next bin 
+ * and discovers that it is still occupied... at that point, all of the
+ * pages in that bin are moved to the VM_PAGE_SPECULATIVE_AGED_Q.  the pages
+ * in that bin are all guaranteed to have reached at least the maximum age
+ * we allow for a protected page... they can be older if there is no
+ * memory pressure to pull them from the bin, or there are no new speculative pages
+ * being generated to push them out.
+ * this list is the one that vm_pageout_scan will prefer when looking 
+ * for pages to move to the underweight free list
+ * 
+ * VM_PAGE_MAX_SPECULATIVE_AGE_Q * VM_PAGE_SPECULATIVE_Q_AGE_MS
+ * defines the amount of time a speculative page is normally
+ * allowed to live in the 'protected' state (i.e. not available
+ * to be stolen if vm_pageout_scan is running and looking for
+ * pages)...  however, if the total number of speculative pages
+ * in the protected state exceeds our limit (defined in vm_pageout.c)
+ * and there are none available in VM_PAGE_SPECULATIVE_AGED_Q, then
+ * vm_pageout_scan is allowed to steal pages from the protected
+ * bucket even if they are underage.
+ *
+ * vm_pageout_scan is also allowed to pull pages from a protected
+ * bin if the bin has reached the "age of consent" we've set
  */
+#define VM_PAGE_MAX_SPECULATIVE_AGE_Q  10
+#define VM_PAGE_MIN_SPECULATIVE_AGE_Q  1
+#define VM_PAGE_SPECULATIVE_AGED_Q     0
+
+#define VM_PAGE_SPECULATIVE_Q_AGE_MS   500
+
+struct vm_speculative_age_q {
+       /*
+        * memory queue for speculative pages via clustered pageins
+        */
+        queue_head_t   age_q;
+        mach_timespec_t        age_ts;
+};
 
 
-extern int     vm_page_ticket_roll;
-extern int     vm_page_ticket;
 
+extern
+struct vm_speculative_age_q    vm_page_queue_speculative[];
+
+extern int                     speculative_steal_index;
+extern int                     speculative_age_index;
+extern unsigned int            vm_page_speculative_q_age_ms;
 
-#define VM_PAGE_TICKETS_IN_ROLL  512
-#define VM_PAGE_TICKET_ROLL_IDS  16
 
 /*
  *     Management of resident (logical) pages.
@@ -115,36 +159,66 @@ extern int        vm_page_ticket;
  */
 
 struct vm_page {
-       queue_chain_t   pageq;          /* queue info for FIFO
-                                        * queue or free list (P) */
+       queue_chain_t   pageq;          /* queue info for FIFO */
+                                       /* queue or free list (P) */
+
        queue_chain_t   listq;          /* all pages in same object (O) */
        struct vm_page  *next;          /* VP bucket link (O) */
 
        vm_object_t     object;         /* which object am I in (O&P) */
        vm_object_offset_t offset;      /* offset into that object (O,P) */
 
+       /*
+        * The following word of flags is protected
+        * by the "page queues" lock.
+        *
+        * we use the 'wire_count' field to store the local
+        * queue id if local queues are enabled...
+        * see the comments at 'VM_PAGE_QUEUES_REMOVE' as to
+        * why this is safe to do
+        */
+#define local_id wire_count
        unsigned int    wire_count:16,  /* how many wired down maps use me? (O&P) */
-                       page_ticket:4,  /* age of the page on the       */
-                                       /* inactive queue.                  */
        /* boolean_t */ inactive:1,     /* page is in inactive list (P) */
+                       zero_fill:1,
                        active:1,       /* page is in active list (P) */
+                       pageout_queue:1,/* page is on queue for pageout (P) */
+                       speculative:1,  /* page is on speculative list (P) */
                        laundry:1,      /* page is being cleaned now (P)*/
                        free:1,         /* page is on free list (P) */
                        reference:1,    /* page has been used (P) */
-                       pageout:1,      /* page wired & busy for pageout (P) */
                        gobbled:1,      /* page used internally (P) */
                        private:1,      /* Page should not be returned to
-                                        *  the free list (O) */
-                       zero_fill:1,
-                       :0;
+                                        *  the free list (P) */
+                       throttled:1,    /* pager is not responding (P) */
+                       local:1,
+                       no_cache:1,     /* page is not to be cached and should
+                                        * be reused ahead of other pages (P) */
+                       __unused_pageq_bits:3;  /* 3 bits available here */
+
+       ppnum_t         phys_page;      /* Physical address of page, passed
+                                        *  to pmap_enter (read-only) */
 
+       /*
+        * The following word of flags is protected
+        * by the "VM object" lock.
+        */
        unsigned int
-                       page_error:8,   /* error from I/O operations */
        /* boolean_t */ busy:1,         /* page is in transit (O) */
                        wanted:1,       /* someone is waiting for page (O) */
                        tabled:1,       /* page is in VP table (O) */
                        fictitious:1,   /* Physical page doesn't exist (O) */
-                       no_isync:1,     /* page has not been instruction synced */
+       /*
+        * IMPORTANT: the "pmapped" bit can be turned on while holding the
+        * VM object "shared" lock.  See vm_fault_enter().
+        * This is OK as long as it's the only bit in this bit field that
+        * can be updated without holding the VM object "exclusive" lock.
+        */
+                       pmapped:1,      /* page has been entered at some
+                                        * point into a pmap (O **shared**) */
+                       wpmapped:1,     /* page has been entered at some
+                                        * point into a pmap for write (O) */
+                       pageout:1,      /* page wired & busy for pageout (O) */
                        absent:1,       /* Data has been requested, but is
                                         *  not yet available (O) */
                        error:1,        /* Data manager was unable to provide
@@ -160,31 +234,59 @@ struct vm_page {
                        restart:1,      /* Page was pushed higher in shadow
                                           chain by copy_call-related pagers;
                                           start again at top of chain */
-                       lock_supplied:1,/* protection supplied by pager (O) */
-       /* vm_prot_t */ page_lock:3,    /* Uses prohibited by pager (O) */
-       /* vm_prot_t */ unlock_request:3,/* Outstanding unlock request (O) */
                        unusual:1,      /* Page is absent, error, restart or
                                           page locked */
-                       discard_request:1,/* a memory_object_discard_request()
-                                          * has been sent */
+                       encrypted:1,    /* encrypted for secure swap (O) */
+                       encrypted_cleaning:1,   /* encrypting page */
                        list_req_pending:1, /* pagein/pageout alt mechanism */
                                            /* allows creation of list      */
                                            /* requests on pages that are   */
                                            /* actively being paged.        */
-                       dump_cleaning:1;   /* set by the pageout daemon when */
+                       dump_cleaning:1,   /* set by the pageout daemon when */
                                           /* a page being cleaned is       */
                                           /* encountered and targeted as   */
                                           /* a pageout candidate           */
-        /* we've used up all 32 bits */
-
-       vm_offset_t     phys_addr;      /* Physical address of page, passed
-                                        *  to pmap_enter (read-only) */
+                       cs_validated:1,    /* code-signing: page was checked */ 
+                       cs_tainted:1,      /* code-signing: page is tainted */
+                       reusable:1,
+                       lopage:1,
+                       slid:1,
+                       __unused_object_bits:7;  /* 7 bits available here */
+
+#if __LP64__
+       unsigned int __unused_padding;  /* Pad structure explicitly
+                                       * to 8-byte multiple for LP64 */
+#endif
 };
 
+#define DEBUG_ENCRYPTED_SWAP   1
+#if DEBUG_ENCRYPTED_SWAP
+#define ASSERT_PAGE_DECRYPTED(page)                                    \
+       MACRO_BEGIN                                                     \
+       if ((page)->encrypted) {                                        \
+               panic("VM page %p should not be encrypted here\n",      \
+                     (page));                                          \
+       }                                                               \
+       MACRO_END
+#else  /* DEBUG_ENCRYPTED_SWAP */
+#define ASSERT_PAGE_DECRYPTED(page) assert(!(page)->encrypted)
+#endif /* DEBUG_ENCRYPTED_SWAP */
+
 typedef struct vm_page *vm_page_t;
 
+
+typedef struct vm_locks_array {
+       char    pad  __attribute__ ((aligned (64)));
+       lck_mtx_t       vm_page_queue_lock2 __attribute__ ((aligned (64)));
+       lck_mtx_t       vm_page_queue_free_lock2 __attribute__ ((aligned (64)));
+       char    pad2  __attribute__ ((aligned (64)));
+} vm_locks_array_t;
+
+
+#define VM_PAGE_WIRED(m)       ((!(m)->local && (m)->wire_count))
 #define VM_PAGE_NULL           ((vm_page_t) 0)
-#define NEXT_PAGE(m)    ((vm_page_t) (m)->pageq.next)
+#define NEXT_PAGE(m)           ((vm_page_t) (m)->pageq.next)
+#define NEXT_PAGE_PTR(m)       ((vm_page_t *) &(m)->pageq.next)
 
 /*
  * XXX The unusual bit should not be necessary.  Most of the bit
@@ -196,33 +298,116 @@ typedef struct vm_page   *vm_page_t;
  *     some useful check on a page structure.
  */
 
-#define VM_PAGE_CHECK(mem)
+#define VM_PAGE_CHECK(mem)                     \
+       MACRO_BEGIN                             \
+       VM_PAGE_QUEUES_ASSERT(mem, 1);          \
+       MACRO_END
+
+/*     Page coloring:
+ *
+ *     The free page list is actually n lists, one per color,
+ *     where the number of colors is a function of the machine's
+ *     cache geometry set at system initialization.  To disable
+ *     coloring, set vm_colors to 1 and vm_color_mask to 0.
+ *     The boot-arg "colors" may be used to override vm_colors.
+ *     Note that there is little harm in having more colors than needed.
+ */
+#define MAX_COLORS      128
+#define        DEFAULT_COLORS  32
+
+extern
+unsigned int   vm_colors;              /* must be in range 1..MAX_COLORS */
+extern
+unsigned int   vm_color_mask;          /* must be (vm_colors-1) */
+extern
+unsigned int   vm_cache_geometry_colors; /* optimal #colors based on cache geometry */
+
+/*
+ * Wired memory is a very limited resource and we can't let users exhaust it
+ * and deadlock the entire system.  We enforce the following limits:
+ * 
+ * vm_user_wire_limit (default: all memory minus vm_global_no_user_wire_amount)
+ *     how much memory can be user-wired in one user task
+ *
+ * vm_global_user_wire_limit (default: same as vm_user_wire_limit)
+ *     how much memory can be user-wired in all user tasks
+ *
+ * vm_global_no_user_wire_amount (default: VM_NOT_USER_WIREABLE)
+ *     how much memory must remain user-unwired at any time
+ */
+#define VM_NOT_USER_WIREABLE (64*1024*1024)    /* 64MB */
+extern
+vm_map_size_t  vm_user_wire_limit;
+extern
+vm_map_size_t  vm_global_user_wire_limit;
+extern
+vm_map_size_t  vm_global_no_user_wire_amount;
 
 /*
  *     Each pageable resident page falls into one of three lists:
  *
  *     free    
- *             Available for allocation now.
+ *             Available for allocation now.  The free list is
+ *             actually an array of lists, one per color.
  *     inactive
  *             Not referenced in any map, but still has an
  *             object/offset-page mapping, and may be dirty.
  *             This is the list of pages that should be
- *             paged out next.
+ *             paged out next.  There are actually two
+ *             inactive lists, one for pages brought in from
+ *             disk or other backing store, and another
+ *             for "zero-filled" pages.  See vm_pageout_scan()
+ *             for the distinction and usage.
  *     active
  *             A list of pages which have been placed in
  *             at least one physical map.  This list is
  *             ordered, in LRU-like fashion.
  */
 
+
+#define VPL_LOCK_SPIN 1
+
+struct vpl {
+       unsigned int    vpl_count;
+       queue_head_t    vpl_queue;
+#ifdef VPL_LOCK_SPIN
+       lck_spin_t      vpl_lock;
+#else
+       lck_mtx_t       vpl_lock;
+       lck_mtx_ext_t   vpl_lock_ext;
+#endif
+};
+
+struct vplq {
+       union {
+               char   cache_line_pad[128];
+               struct vpl vpl;
+       } vpl_un;
+};
 extern
-vm_page_t      vm_page_queue_free;     /* memory free queue */
+unsigned int   vm_page_local_q_count;
+extern
+struct vplq    *vm_page_local_q;
+extern
+unsigned int   vm_page_local_q_soft_limit;
+extern
+unsigned int   vm_page_local_q_hard_limit;
+extern
+vm_locks_array_t vm_page_locks;
+
 extern
-vm_page_t      vm_page_queue_fictitious;       /* fictitious free queue */
+queue_head_t   vm_page_queue_free[MAX_COLORS]; /* memory free queue */
+extern
+queue_head_t   vm_lopage_queue_free;           /* low memory free queue */
 extern
 queue_head_t   vm_page_queue_active;   /* active memory queue */
 extern
-queue_head_t   vm_page_queue_inactive; /* inactive memory queue */
+queue_head_t   vm_page_queue_inactive; /* inactive memory queue for normal pages */
+extern
 queue_head_t   vm_page_queue_zf;       /* inactive memory queue for zero fill */
+extern
+queue_head_t   vm_page_queue_throttled;        /* memory queue for throttled pageout pages */
 
 extern
 vm_offset_t    first_phys_addr;        /* physical address for first_page */
@@ -230,51 +415,94 @@ extern
 vm_offset_t    last_phys_addr;         /* physical address for last_page */
 
 extern
-int    vm_page_free_count;     /* How many pages are free? */
+unsigned int   vm_page_free_count;     /* How many pages are free? (sum of all colors) */
+extern
+unsigned int   vm_page_fictitious_count;/* How many fictitious pages are free? */
+extern
+unsigned int   vm_page_active_count;   /* How many pages are active? */
+extern
+unsigned int   vm_page_inactive_count; /* How many pages are inactive? */
+extern
+unsigned int   vm_page_throttled_count;/* How many inactives are throttled */
+extern
+unsigned int   vm_page_speculative_count;      /* How many speculative pages are unclaimed? */
+extern
+unsigned int   vm_page_wire_count;     /* How many pages are wired? */
 extern
-int    vm_page_fictitious_count;/* How many fictitious pages are free? */
+unsigned int   vm_page_free_target;    /* How many do we want free? */
 extern
-int    vm_page_active_count;   /* How many pages are active? */
+unsigned int   vm_page_free_min;       /* When to wakeup pageout */
 extern
-int    vm_page_inactive_count; /* How many pages are inactive? */
+unsigned int   vm_page_throttle_limit; /* When to throttle new page creation */
 extern
-int    vm_page_wire_count;     /* How many pages are wired? */
+uint32_t       vm_page_creation_throttle;      /* When to throttle new page creation */
 extern
-int    vm_page_free_target;    /* How many do we want free? */
+unsigned int   vm_page_inactive_target;/* How many do we want inactive? */
 extern
-int    vm_page_free_min;       /* When to wakeup pageout */
+unsigned int   vm_page_inactive_min;   /* When do wakeup pageout */
 extern
-int    vm_page_inactive_target;/* How many do we want inactive? */
+unsigned int   vm_page_free_reserved;  /* How many pages reserved to do pageout */
 extern
-int    vm_page_free_reserved;  /* How many pages reserved to do pageout */
+unsigned int   vm_page_throttle_count; /* Count of page allocations throttled */
 extern
-int    vm_page_laundry_count;  /* How many pages being laundered? */
+unsigned int   vm_page_gobble_count;
 
-decl_mutex_data(,vm_page_queue_lock)
-                               /* lock on active and inactive page queues */
-decl_mutex_data(,vm_page_queue_free_lock)
-                               /* lock on free page queue */
-decl_simple_lock_data(extern,vm_page_preppin_lock)     /* lock for prep/pin */
-decl_mutex_data(,vm_page_zero_fill_lock)
+#if DEVELOPMENT || DEBUG
+extern
+unsigned int   vm_page_speculative_used;
+#endif
+
+extern
+unsigned int   vm_page_purgeable_count;/* How many pages are purgeable now ? */
+extern
+unsigned int   vm_page_purgeable_wired_count;/* How many purgeable pages are wired now ? */
+extern
+uint64_t       vm_page_purged_count;   /* How many pages got purged so far ? */
 
 extern unsigned int    vm_page_free_wanted;
                                /* how many threads are waiting for memory */
 
-extern vm_offset_t     vm_page_fictitious_addr;
+extern unsigned int    vm_page_free_wanted_privileged;
+                               /* how many VM privileged threads are waiting for memory */
+
+extern ppnum_t vm_page_fictitious_addr;
                                /* (fake) phys_addr of fictitious pages */
 
+extern ppnum_t vm_page_guard_addr;
+                               /* (fake) phys_addr of guard pages */
+
+
+extern boolean_t       vm_page_deactivate_hint;
+
+/*
+   0 = all pages avail ( default. )
+   1 = disable high mem ( cap max pages to 4G)
+   2 = prefer himem
+*/   
+extern int             vm_himemory_mode;
+
+extern boolean_t       vm_lopage_needed;
+extern uint32_t                vm_lopage_free_count;
+extern uint32_t                vm_lopage_free_limit;
+extern uint32_t                vm_lopage_lowater;
+extern boolean_t       vm_lopage_refill;
+extern uint64_t                max_valid_dma_address;
+extern ppnum_t         max_valid_low_ppnum;
+
 /*
  * Prototypes for functions exported by this module.
  */
 extern void            vm_page_bootstrap(
                                        vm_offset_t     *startp,
-                                       vm_offset_t     *endp);
+                                       vm_offset_t     *endp) __attribute__((section("__TEXT, initcode")));
 
-extern void            vm_page_module_init(void);
+extern void            vm_page_module_init(void) __attribute__((section("__TEXT, initcode")));
+                                       
+extern void            vm_page_init_local_q(void);
 
 extern void            vm_page_create(
-                                       vm_offset_t     start,
-                                       vm_offset_t     end);
+                                       ppnum_t         start,
+                                       ppnum_t         end);
 
 extern vm_page_t       vm_page_lookup(
                                        vm_object_t             object,
@@ -282,28 +510,22 @@ extern vm_page_t  vm_page_lookup(
 
 extern vm_page_t       vm_page_grab_fictitious(void);
 
+extern vm_page_t       vm_page_grab_guard(void);
+
 extern void            vm_page_release_fictitious(
                                        vm_page_t page);
 
-extern boolean_t       vm_page_convert(
-                                       vm_page_t       page);
-
 extern void            vm_page_more_fictitious(void);
 
 extern int             vm_pool_low(void);
 
 extern vm_page_t       vm_page_grab(void);
 
-extern void            vm_page_release(
-                                       vm_page_t       page);
+extern vm_page_t       vm_page_grablo(void);
 
-extern void            vm_page_release_limbo(
+extern void            vm_page_release(
                                        vm_page_t       page);
 
-extern void            vm_page_limbo_exchange(
-                                       vm_page_t       limbo_m,
-                                       vm_page_t       new_m);
-
 extern boolean_t       vm_page_wait(
                                        int             interruptible );
 
@@ -311,12 +533,25 @@ extern vm_page_t  vm_page_alloc(
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
+extern vm_page_t       vm_page_alloclo(
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset);
+
+extern vm_page_t       vm_page_alloc_guard(
+       vm_object_t             object,
+       vm_object_offset_t      offset);
+
 extern void            vm_page_init(
                                        vm_page_t       page,
-                                       vm_offset_t     phys_addr);
+                                       ppnum_t         phys_page,
+                                       boolean_t       lopage);
 
 extern void            vm_page_free(
-                                       vm_page_t       page);
+                                       vm_page_t       page);
+
+extern void            vm_page_free_unlocked(
+                                       vm_page_t       page,
+                                       boolean_t       remove_from_hash);
 
 extern void            vm_page_activate(
                                        vm_page_t       page);
@@ -324,23 +559,50 @@ extern void               vm_page_activate(
 extern void            vm_page_deactivate(
                                        vm_page_t       page);
 
+extern void            vm_page_deactivate_internal(
+                                       vm_page_t       page,
+                                       boolean_t       clear_hw_reference);
+
+extern void            vm_page_lru(
+                                       vm_page_t       page);
+
+extern void            vm_page_speculate(
+                                       vm_page_t       page,
+                                       boolean_t       new);
+
+extern void            vm_page_speculate_ageit(
+                                       struct vm_speculative_age_q *aq);
+
+extern void            vm_page_reactivate_all_throttled(void);
+
+extern void            vm_page_reactivate_local(uint32_t lid, boolean_t force, boolean_t nolocks);
+
 extern void            vm_page_rename(
                                        vm_page_t               page,
                                        vm_object_t             new_object,
-                                       vm_object_offset_t      new_offset);
+                                       vm_object_offset_t      new_offset,
+                                       boolean_t               encrypted_ok);
 
 extern void            vm_page_insert(
                                        vm_page_t               page,
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
+extern void            vm_page_insert_internal(
+                                       vm_page_t               page,
+                                       vm_object_t             object,
+                                       vm_object_offset_t      offset,
+                                       boolean_t               queues_lock_held,
+                                       boolean_t               insert_in_hash);
+
 extern void            vm_page_replace(
                                        vm_page_t               mem,
                                        vm_object_t             object,
                                        vm_object_offset_t      offset);
 
 extern void            vm_page_remove(
-                                       vm_page_t       page);
+                                       vm_page_t       page,
+                                       boolean_t       remove_from_hash);
 
 extern void            vm_page_zero_fill(
                                        vm_page_t       page);
@@ -365,13 +627,29 @@ extern void               vm_page_wire(
                                        vm_page_t       page);
 
 extern void            vm_page_unwire(
-                                       vm_page_t       page);
+                                       vm_page_t       page,
+                                       boolean_t       queueit);
 
 extern void            vm_set_page_size(void);
 
 extern void            vm_page_gobble(
                                        vm_page_t      page);
 
+extern void            vm_page_validate_cs(vm_page_t   page);
+extern void            vm_page_validate_cs_mapped(
+       vm_page_t       page,
+       const void      *kaddr);
+
+extern void            vm_page_free_prepare_queues(
+                                       vm_page_t       page);
+
+extern void            vm_page_free_prepare_object(
+                                       vm_page_t       page,
+                                       boolean_t       remove_from_hash);
+
+extern void            vm_check_memorystatus(void);
+
+
 /*
  *     Functions implemented as macros. m->wanted and m->busy are
  *     protected by the object lock.
@@ -404,9 +682,7 @@ extern void         vm_page_gobble(
 
 #define VM_PAGE_FREE(p)                        \
                MACRO_BEGIN                     \
-               vm_page_lock_queues();          \
-               vm_page_free(p);                \
-               vm_page_unlock_queues();        \
+               vm_page_free_unlocked(p, TRUE); \
                MACRO_END
 
 #define VM_PAGE_GRAB_FICTITIOUS(M)                                     \
@@ -415,41 +691,237 @@ extern void              vm_page_gobble(
                        vm_page_more_fictitious();                      \
                MACRO_END
 
-#define VM_PAGE_THROTTLED()                                            \
-               (vm_page_free_count < (vm_page_free_target -            \
-                ((vm_page_free_target-vm_page_free_min)>>2)))
-
 #define        VM_PAGE_WAIT()          ((void)vm_page_wait(THREAD_UNINT))
 
-#define vm_page_lock_queues()  mutex_lock(&vm_page_queue_lock)
-#define vm_page_unlock_queues()        mutex_unlock(&vm_page_queue_lock)
-#define vm_page_pin_lock()     simple_lock(&vm_page_preppin_lock)
-#define vm_page_pin_unlock()   simple_unlock(&vm_page_preppin_lock)
+#define vm_page_queue_lock (vm_page_locks.vm_page_queue_lock2)
+#define vm_page_queue_free_lock (vm_page_locks.vm_page_queue_free_lock2)
+
+#define vm_page_lock_queues()  lck_mtx_lock(&vm_page_queue_lock)
+#define vm_page_unlock_queues()        lck_mtx_unlock(&vm_page_queue_lock)
+
+#define vm_page_lockspin_queues()      lck_mtx_lock_spin(&vm_page_queue_lock)
+#define vm_page_trylockspin_queues()   lck_mtx_try_lock_spin(&vm_page_queue_lock)
+#define vm_page_lockconvert_queues()   lck_mtx_convert_spin(&vm_page_queue_lock)
 
+#ifdef VPL_LOCK_SPIN
+#define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_spin_init(&vlq->vpl_lock, vpl_grp, vpl_attr)
+#define VPL_LOCK(vpl) lck_spin_lock(vpl)
+#define VPL_UNLOCK(vpl) lck_spin_unlock(vpl)
+#else
+#define VPL_LOCK_INIT(vlq, vpl_grp, vpl_attr) lck_mtx_init_ext(&vlq->vpl_lock, &vlq->vpl_lock_ext, vpl_grp, vpl_attr)
+#define VPL_LOCK(vpl) lck_mtx_lock_spin(vpl)
+#define VPL_UNLOCK(vpl) lck_mtx_unlock(vpl)
+#endif
+
+#if MACH_ASSERT
+extern void vm_page_queues_assert(vm_page_t mem, int val);
+#define VM_PAGE_QUEUES_ASSERT(mem, val)        vm_page_queues_assert((mem), (val))
+#else
+#define VM_PAGE_QUEUES_ASSERT(mem, val)
+#endif
+
+
+/*
+ * 'vm_fault_enter' will place newly created pages (zero-fill and COW) onto the
+ * local queues if they exist... its the only spot in the system where we add pages
+ * to those queues...  once on those queues, those pages can only move to one of the
+ * global page queues or the free queues... they NEVER move from local q to local q.
+ * the 'local' state is stable when VM_PAGE_QUEUES_REMOVE is called since we're behind
+ * the global vm_page_queue_lock at this point...  we still need to take the local lock
+ * in case this operation is being run on a different CPU then the local queue's identity,
+ * but we don't have to worry about the page moving to a global queue or becoming wired
+ * while we're grabbing the local lock since those operations would require the global
+ * vm_page_queue_lock to be held, and we already own it.
+ *
+ * this is why its safe to utilze the wire_count field in the vm_page_t as the local_id...
+ * 'wired' and local are ALWAYS mutually exclusive conditions.
+ */
 #define VM_PAGE_QUEUES_REMOVE(mem)                             \
        MACRO_BEGIN                                             \
-       if (mem->active) {                                      \
-               assert(!mem->inactive);                         \
+       VM_PAGE_QUEUES_ASSERT(mem, 1);                          \
+       assert(!mem->laundry);                                  \
+       assert(!mem->pageout_queue);                            \
+       if (mem->local) {                                       \
+               struct vpl      *lq;                            \
+               assert(mem->object != kernel_object);           \
+               assert(!mem->inactive && !mem->speculative);    \
+               assert(!mem->active && !mem->throttled);        \
+               assert(!mem->fictitious);                       \
+               lq = &vm_page_local_q[mem->local_id].vpl_un.vpl;        \
+               VPL_LOCK(&lq->vpl_lock);                        \
+               queue_remove(&lq->vpl_queue,                    \
+                            mem, vm_page_t, pageq);            \
+               mem->local = FALSE;                             \
+               mem->local_id = 0;                              \
+               lq->vpl_count--;                                \
+               VPL_UNLOCK(&lq->vpl_lock);                      \
+       }                                                       \
+                                                               \
+       else if (mem->active) {                                 \
+               assert(mem->object != kernel_object);           \
+               assert(!mem->inactive && !mem->speculative);    \
+               assert(!mem->throttled);                        \
+               assert(!mem->fictitious);                       \
                queue_remove(&vm_page_queue_active,             \
                        mem, vm_page_t, pageq);                 \
                mem->active = FALSE;                            \
-               if (!mem->fictitious)                           \
-                       vm_page_active_count--;                 \
+               vm_page_active_count--;                         \
        }                                                       \
                                                                \
-       if (mem->inactive) {                                    \
-               assert(!mem->active);                           \
+       else if (mem->inactive) {                               \
+               assert(mem->object != kernel_object);           \
+               assert(!mem->active && !mem->speculative);      \
+               assert(!mem->throttled);                        \
+               assert(!mem->fictitious);                       \
                if (mem->zero_fill) {                           \
                        queue_remove(&vm_page_queue_zf,         \
                        mem, vm_page_t, pageq);                 \
+                       vm_zf_queue_count--;                    \
                } else {                                        \
                        queue_remove(&vm_page_queue_inactive,   \
                        mem, vm_page_t, pageq);                 \
                }                                               \
                mem->inactive = FALSE;                          \
-               if (!mem->fictitious)                           \
-                       vm_page_inactive_count--;               \
+               vm_page_inactive_count--;                       \
+               vm_purgeable_q_advance_all();                   \
+       }                                                       \
+                                                               \
+       else if (mem->throttled) {                              \
+               assert(!mem->active && !mem->inactive);         \
+               assert(!mem->speculative);                      \
+               assert(!mem->fictitious);                       \
+               queue_remove(&vm_page_queue_throttled,          \
+                            mem, vm_page_t, pageq);            \
+               mem->throttled = FALSE;                         \
+               vm_page_throttled_count--;                      \
+       }                                                       \
+                                                               \
+       else if (mem->speculative) {                            \
+               assert(!mem->active && !mem->inactive);         \
+               assert(!mem->throttled);                        \
+               assert(!mem->fictitious);                       \
+                remque(&mem->pageq);                           \
+               mem->speculative = FALSE;                       \
+               vm_page_speculative_count--;                    \
+       }                                                       \
+                                                               \
+       else if (mem->pageq.next || mem->pageq.prev)            \
+               panic("VM_PAGE_QUEUES_REMOVE: unmarked page on Q");     \
+       mem->pageq.next = NULL;                                 \
+       mem->pageq.prev = NULL;                                 \
+       VM_PAGE_QUEUES_ASSERT(mem, 0);                          \
+       MACRO_END
+
+
+#define VM_PAGE_ENQUEUE_INACTIVE(mem, first)                   \
+       MACRO_BEGIN                                             \
+       VM_PAGE_QUEUES_ASSERT(mem, 0);                          \
+       assert(!mem->fictitious);                               \
+       assert(!mem->laundry);                                  \
+       assert(!mem->pageout_queue);                            \
+       if (mem->zero_fill) {                                   \
+               if (first == TRUE)                              \
+                       queue_enter_first(&vm_page_queue_zf, mem, vm_page_t, pageq);    \
+               else                                            \
+                       queue_enter(&vm_page_queue_zf, mem, vm_page_t, pageq);          \
+               vm_zf_queue_count++;                            \
+       } else {                                                \
+               if (first == TRUE)                              \
+                       queue_enter_first(&vm_page_queue_inactive, mem, vm_page_t, pageq); \
+               else                                            \
+                       queue_enter(&vm_page_queue_inactive, mem, vm_page_t, pageq);    \
+       }                                                       \
+       mem->inactive = TRUE;                                   \
+       vm_page_inactive_count++;                               \
+       token_new_pagecount++;                                  \
+       MACRO_END
+
+
+#if DEVELOPMENT || DEBUG
+#define VM_PAGE_SPECULATIVE_USED_ADD()                         \
+       MACRO_BEGIN                                             \
+       OSAddAtomic(1, &vm_page_speculative_used);      \
+       MACRO_END
+#else
+#define        VM_PAGE_SPECULATIVE_USED_ADD()
+#endif
+
+
+#define VM_PAGE_CONSUME_CLUSTERED(mem)                         \
+       MACRO_BEGIN                                             \
+       if (mem->clustered) {                                   \
+               assert(mem->object);                            \
+               mem->object->pages_used++;                      \
+               mem->clustered = FALSE;                         \
+               VM_PAGE_SPECULATIVE_USED_ADD();                 \
        }                                                       \
        MACRO_END
 
+
+       
+#define DW_vm_page_unwire              0x01
+#define DW_vm_page_wire                        0x02
+#define DW_vm_page_free                        0x04
+#define DW_vm_page_activate            0x08
+#define DW_vm_page_deactivate_internal 0x10
+#define DW_vm_page_speculate           0x20
+#define DW_vm_page_lru                 0x40
+#define DW_vm_pageout_throttle_up      0x80
+#define DW_PAGE_WAKEUP                 0x100
+#define DW_clear_busy                  0x200
+#define DW_clear_reference             0x400
+#define DW_set_reference               0x800
+#define DW_move_page                   0x1000
+#define DW_VM_PAGE_QUEUES_REMOVE       0x2000
+#define DW_set_list_req_pending                0x4000
+
+struct vm_page_delayed_work {
+       vm_page_t       dw_m;
+       int             dw_mask;
+};
+
+void vm_page_do_delayed_work(vm_object_t object, struct vm_page_delayed_work *dwp, int dw_count);
+
+extern unsigned int vm_max_delayed_work_limit;
+
+#define DEFAULT_DELAYED_WORK_LIMIT     32
+
+#define DELAYED_WORK_LIMIT(max)        ((vm_max_delayed_work_limit >= max ? max : vm_max_delayed_work_limit))
+
+/*
+ * vm_page_do_delayed_work may need to drop the object lock...
+ * if it does, we need the pages it's looking at to
+ * be held stable via the busy bit, so if busy isn't already
+ * set, we need to set it and ask vm_page_do_delayed_work
+ * to clear it and wakeup anyone that might have blocked on
+ * it once we're done processing the page.
+ *
+ * additionally, we can't call vm_page_do_delayed_work with
+ * list_req_pending == TRUE since it may need to 
+ * drop the object lock before dealing
+ * with this page and because list_req_pending == TRUE, 
+ * busy == TRUE will NOT protect this page from being stolen
+ * so clear list_req_pending and ask vm_page_do_delayed_work
+ * to re-set it once it holds both the pageq and object locks
+ */
+
+#define VM_PAGE_ADD_DELAYED_WORK(dwp, mem, dw_cnt)             \
+       MACRO_BEGIN                                             \
+       if (mem->busy == FALSE) {                               \
+               mem->busy = TRUE;                               \
+               if ( !(dwp->dw_mask & DW_vm_page_free))         \
+                       dwp->dw_mask |= (DW_clear_busy | DW_PAGE_WAKEUP); \
+       }                                                       \
+       if (mem->list_req_pending) {                            \
+               mem->list_req_pending = FALSE;                  \
+               dwp->dw_mask |= DW_set_list_req_pending;        \
+       }                                                       \
+       dwp->dw_m = mem;                                        \
+       dwp++;                                                  \
+       dw_count++;                                             \
+       MACRO_END
+
+extern vm_page_t vm_object_page_grab(vm_object_t);
+
+
 #endif /* _VM_VM_PAGE_H_ */