]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/default_pager/dp_backing_store.c
xnu-2422.1.72.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_backing_store.c
index effec727ca954115c2b4ea9fdb2c828db116ebe4..b861ccf821528f7e423da516edf9c57f22e256a3 100644 (file)
@@ -1,24 +1,29 @@
-
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 /*
  * @OSF_COPYRIGHT@
  *             Paging File Management.
  */
 
+#include <mach/host_priv.h>
 #include <mach/memory_object_control.h>
 #include <mach/memory_object_server.h>
-#include "default_pager_internal.h"
+#include <mach/upl.h>
+#include <default_pager/default_pager_internal.h>
 #include <default_pager/default_pager_alerts.h>
+#include <default_pager/default_pager_object_server.h>
+
+#include <ipc/ipc_types.h>
 #include <ipc/ipc_port.h>
 #include <ipc/ipc_space.h>
+
+#include <kern/kern_types.h>
+#include <kern/host.h>
 #include <kern/queue.h>
 #include <kern/counters.h>
 #include <kern/sched_prim.h>
+
 #include <vm/vm_kern.h> 
 #include <vm/vm_pageout.h>
-/* CDY CDY */
 #include <vm/vm_map.h>
+#include <vm/vm_object.h>
+#include <vm/vm_protos.h>
+
+
+/* todo - need large internal object support */
 
 /*
  * ALLOC_STRIDE... the maximum number of bytes allocated from
 #define ALLOC_STRIDE  (1024 * 1024 * 1024)
 int physical_transfer_cluster_count = 0;
 
-#define VM_SUPER_CLUSTER       0x20000
-#define VM_SUPER_PAGES          32
+#define VM_SUPER_CLUSTER       0x40000
+#define VM_SUPER_PAGES          (VM_SUPER_CLUSTER / PAGE_SIZE)
 
 /*
  * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
  * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
  */
+#define VSTRUCT_MIN_CLSHIFT    0
+
 #define VSTRUCT_DEF_CLSHIFT    2
-int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
 int default_pager_clsize = 0;
 
+int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
+
 /* statistics */
 unsigned int clustered_writes[VM_SUPER_PAGES+1];
 unsigned int clustered_reads[VM_SUPER_PAGES+1];
@@ -115,7 +136,7 @@ int async_requests_out;
 #define VS_ASYNC_REUSE 1
 struct vs_async *vs_async_free_list;
 
-mutex_t default_pager_async_lock;      /* Protects globals above */
+lck_mtx_t      default_pager_async_lock;       /* Protects globals above */
 
 
 int vs_alloc_async_failed = 0;                 /* statistics */
@@ -127,25 +148,38 @@ void vs_free_async(struct vs_async *vsa); /* forward */
 #define VS_ALLOC_ASYNC()       vs_alloc_async()
 #define VS_FREE_ASYNC(vsa)     vs_free_async(vsa)
 
-#define VS_ASYNC_LOCK()                mutex_lock(&default_pager_async_lock)
-#define VS_ASYNC_UNLOCK()      mutex_unlock(&default_pager_async_lock)
-#define VS_ASYNC_LOCK_INIT()   mutex_init(&default_pager_async_lock,  \
-                                               ETAP_IO_DEV_PAGEH)
+#define VS_ASYNC_LOCK()                lck_mtx_lock(&default_pager_async_lock)
+#define VS_ASYNC_UNLOCK()      lck_mtx_unlock(&default_pager_async_lock)
+#define VS_ASYNC_LOCK_INIT()   lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr)
+#define VS_ASYNC_LOCK_DESTROY()        lck_mtx_destroy(&default_pager_async_lock, &default_pager_lck_grp)
 #define VS_ASYNC_LOCK_ADDR()   (&default_pager_async_lock)
 /*
  *  Paging Space Hysteresis triggers and the target notification port
  *
  */ 
-
+unsigned int   dp_pages_free_drift_count = 0;
+unsigned int   dp_pages_free_drifted_max = 0;
 unsigned int   minimum_pages_remaining = 0;
 unsigned int   maximum_pages_free = 0;
 ipc_port_t     min_pages_trigger_port = NULL;
 ipc_port_t     max_pages_trigger_port = NULL;
 
+#if CONFIG_FREEZE
+boolean_t      use_emergency_swap_file_first = TRUE;
+#else
+boolean_t      use_emergency_swap_file_first = FALSE;
+#endif
 boolean_t      bs_low = FALSE;
 int            backing_store_release_trigger_disable = 0;
+boolean_t      backing_store_stop_compaction = FALSE;
+boolean_t      backing_store_abort_compaction = FALSE;
 
+/* Have we decided if swap needs to be encrypted yet ? */
+boolean_t      dp_encryption_inited = FALSE;
+/* Should we encrypt swap ? */
+boolean_t      dp_encryption = FALSE;
+
+boolean_t      dp_isssd = FALSE;
 
 /*
  * Object sizes are rounded up to the next power of 2,
@@ -156,9 +190,10 @@ vm_size_t  max_doubled_size = 4 * 1024 * 1024;     /* 4 meg */
 /*
  * List of all backing store and segments.
  */
+MACH_PORT_FACE         emergency_segment_backing_store;
 struct backing_store_list_head backing_store_list;
 paging_segment_t       paging_segments[MAX_NUM_PAGING_SEGMENTS];
-mutex_t                        paging_segments_lock;
+lck_mtx_t                      paging_segments_lock;
 int                    paging_segment_max = 0;
 int                    paging_segment_count = 0;
 int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
@@ -171,14 +206,46 @@ int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
  * likely to be deprecated.
  */
 unsigned  int  dp_pages_free = 0;
+unsigned  int  dp_pages_reserve = 0;
 unsigned  int  cluster_transfer_minimum = 100;
 
-kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int);     /* forward */
-kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int);     /* forward */
+/* 
+ * Trim state 
+ */
+struct ps_vnode_trim_data {
+       struct vnode *vp;
+       dp_offset_t   offset;
+       dp_size_t     length;
+};
+
+/* forward declarations */
+kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, int);    /* forward */
+kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, unsigned int *, int);    /* forward */
+default_pager_thread_t *get_read_buffer( void );
+kern_return_t ps_vstruct_transfer_from_segment(
+       vstruct_t        vs,
+       paging_segment_t segment,
+       upl_t            upl);
+kern_return_t ps_read_device(paging_segment_t, dp_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
+kern_return_t ps_write_device(paging_segment_t, dp_offset_t, vm_offset_t, unsigned int, struct vs_async *);    /* forward */
+kern_return_t vs_cluster_transfer(
+       vstruct_t       vs,
+       dp_offset_t     offset,
+       dp_size_t       cnt,
+       upl_t           upl);
+vs_map_t vs_get_map_entry(
+       vstruct_t       vs, 
+       dp_offset_t     offset);
+
+kern_return_t
+default_pager_backing_store_delete_internal( MACH_PORT_FACE );
 
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data);
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length);
 
 default_pager_thread_t *
-get_read_buffer()
+get_read_buffer( void )
 {
        int     i;
 
@@ -191,9 +258,7 @@ get_read_buffer()
                          return  dpt_array[i];
                        }
                }
-               assert_wait(&dpt_array, THREAD_UNINT);
-               DPT_UNLOCK(dpt_lock);
-               thread_block((void(*)(void))0);
+               DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
        }
 }
 
@@ -313,10 +378,10 @@ int default_pager_info_verbose = 1;
 
 void
 bs_global_info(
-       vm_size_t       *totalp,
-       vm_size_t       *freep)
+       uint64_t        *totalp,
+       uint64_t        *freep)
 {
-       vm_size_t               pages_total, pages_free;
+       uint64_t                pages_total, pages_free;
        paging_segment_t        ps;
        int                     i;
 
@@ -334,9 +399,9 @@ bs_global_info(
                 */
                pages_total += ps->ps_pgnum;
                pages_free += ps->ps_clcount << ps->ps_clshift;
-               DEBUG(DEBUG_BS_INTERNAL,
-                     ("segment #%d: %d total, %d free\n",
-                      i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
+               DP_DEBUG(DEBUG_BS_INTERNAL,
+                        ("segment #%d: %d total, %d free\n",
+                         i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
        }
        *totalp = pages_total;
        *freep = pages_free;
@@ -393,7 +458,7 @@ backing_store_lookup(
 
        if ((port == MACH_PORT_NULL) || port_is_vs(port))
 */
-       if ((port == MACH_PORT_NULL))
+       if (port == MACH_PORT_NULL)
                return BACKING_STORE_NULL;
 
        BSL_LOCK();
@@ -415,10 +480,10 @@ void backing_store_add(backing_store_t);  /* forward */
 
 void
 backing_store_add(
-       backing_store_t bs)
+       __unused backing_store_t bs)
 {
-       MACH_PORT_FACE          port = bs->bs_port;
-       MACH_PORT_FACE          pset = default_pager_default_set;
+//     MACH_PORT_FACE          port = bs->bs_port;
+//     MACH_PORT_FACE          pset = default_pager_default_set;
        kern_return_t           kr = KERN_SUCCESS;
 
        if (kr != KERN_SUCCESS)
@@ -515,7 +580,7 @@ default_pager_backing_store_create(
 {
        backing_store_t bs;
        MACH_PORT_FACE  port;
-       kern_return_t   kr;
+//     kern_return_t   kr;
        struct vstruct_alias *alias_struct;
 
        if (pager != default_pager_object)
@@ -526,20 +591,23 @@ default_pager_backing_store_create(
        ipc_port_make_send(port);
        assert (port != IP_NULL);
 
-       DEBUG(DEBUG_BS_EXTERNAL,
-             ("priority=%d clsize=%d bs_port=0x%x\n",
-              priority, clsize, (int) backing_store));
+       DP_DEBUG(DEBUG_BS_EXTERNAL,
+                ("priority=%d clsize=%d bs_port=0x%x\n",
+                 priority, clsize, (int) backing_store));
 
        alias_struct = (struct vstruct_alias *) 
                                kalloc(sizeof (struct vstruct_alias));
        if(alias_struct != NULL) {
                alias_struct->vs = (struct vstruct *)bs;
-               alias_struct->name = ISVS;
-               port->alias = (int) alias_struct;
+               alias_struct->name = &default_pager_ops;
+               port->ip_alias = (uintptr_t) alias_struct;
        }
        else {
                ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
-               kfree((vm_offset_t)bs, sizeof (struct backing_store));
+
+               BS_LOCK_DESTROY(bs);
+               kfree(bs, sizeof (struct backing_store));
+
                return KERN_RESOURCE_SHORTAGE;
        }
 
@@ -552,7 +620,7 @@ default_pager_backing_store_create(
                priority = BS_MINPRI;
        bs->bs_priority = priority;
 
-       bs->bs_clsize = bs_get_global_clsize(atop(clsize));
+       bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
 
        BSL_LOCK();
        queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
@@ -616,7 +684,7 @@ default_pager_backing_store_info(
        basic->bs_pages_out_fail= bs->bs_pages_out_fail;
 
        basic->bs_priority      = bs->bs_priority;
-       basic->bs_clsize        = ptoa(bs->bs_clsize);  /* in bytes */
+       basic->bs_clsize        = ptoa_32(bs->bs_clsize);       /* in bytes */
 
        BS_UNLOCK(bs);
 
@@ -624,6 +692,7 @@ default_pager_backing_store_info(
 }
 
 int ps_delete(paging_segment_t);       /* forward */
+boolean_t current_thread_aborted(void);
 
 int
 ps_delete(
@@ -650,12 +719,7 @@ ps_delete(
 
 
        while(backing_store_release_trigger_disable != 0) {
-               assert_wait((event_t) 
-                       &backing_store_release_trigger_disable, 
-                       THREAD_UNINT);
-               VSL_UNLOCK();
-               thread_block((void (*)(void)) 0);
-               VSL_LOCK();
+               VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
        }
 
        /* we will choose instead to hold a send right */
@@ -670,6 +734,10 @@ ps_delete(
        if ((vs_count != 0) && (vs != NULL))
                vs->vs_async_pending += 1;  /* hold parties calling  */
                                            /* vs_async_wait */
+
+       if (bs_low == FALSE)
+               backing_store_abort_compaction = FALSE;
+
        VS_UNLOCK(vs);
        VSL_UNLOCK();
        while((vs_count != 0) && (vs != NULL)) {
@@ -690,35 +758,33 @@ ps_delete(
                        error = KERN_FAILURE;
                else {
                        vm_object_t     transfer_object;
-                       int             count;
+                       unsigned int    count;
                        upl_t           upl;
+                       int             upl_flags;
 
-                       transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
+                       transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
                        count = 0;
+                       upl_flags = (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE |
+                                    UPL_SET_LITE | UPL_SET_INTERNAL);
+                       if (dp_encryption) {
+                               /* mark the pages as "encrypted" when they come in */
+                               upl_flags |= UPL_ENCRYPT;
+                       }
                        error = vm_object_upl_request(transfer_object, 
                                (vm_object_offset_t)0, VM_SUPER_CLUSTER,
-                               &upl, NULL, &count,
-                               UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
-                                           | UPL_SET_INTERNAL);
+                               &upl, NULL, &count, upl_flags);
+
                        if(error == KERN_SUCCESS) {
-#ifndef ubc_sync_working
-                               upl_commit(upl, NULL);
-                               upl_deallocate(upl);
-                               error = ps_vstruct_transfer_from_segment(
-                                               vs, ps, transfer_object);
-#else
                                error = ps_vstruct_transfer_from_segment(
                                                        vs, ps, upl);
-                               upl_commit(upl, NULL);
+                               upl_commit(upl, NULL, 0);
                                upl_deallocate(upl);
-#endif
-                               vm_object_deallocate(transfer_object);
                        } else {
-                               vm_object_deallocate(transfer_object);
                                error = KERN_FAILURE;
                        }
+                       vm_object_deallocate(transfer_object);
                }
-               if(error) {
+               if(error || current_thread_aborted()) {
                        VS_LOCK(vs);
                        vs->vs_async_pending -= 1;  /* release vs_async_wait */
                        if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
@@ -734,12 +800,8 @@ ps_delete(
                VSL_LOCK(); 
 
                while(backing_store_release_trigger_disable != 0) {
-                       assert_wait((event_t) 
-                               &backing_store_release_trigger_disable, 
-                               THREAD_UNINT);
-                       VSL_UNLOCK();
-                       thread_block((void (*)(void)) 0);
-                       VSL_LOCK();
+                       VSL_SLEEP(&backing_store_release_trigger_disable,
+                                 THREAD_UNINT);
                }
 
                next_vs = (vstruct_t) queue_next(&(vs->vs_links));
@@ -773,7 +835,7 @@ ps_delete(
 
 
 kern_return_t
-default_pager_backing_store_delete(
+default_pager_backing_store_delete_internal(
        MACH_PORT_FACE backing_store)
 {
        backing_store_t         bs;
@@ -781,28 +843,35 @@ default_pager_backing_store_delete(
        paging_segment_t        ps;
        int                     error;
        int                     interim_pages_removed = 0;
-       kern_return_t           kr;
+       boolean_t               dealing_with_emergency_segment = ( backing_store == emergency_segment_backing_store );
 
        if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
                return KERN_INVALID_ARGUMENT;
 
-#if 0
-       /* not implemented */
-       BS_UNLOCK(bs);
-       return KERN_FAILURE;
-#endif
-
-    restart:
+restart:
        PSL_LOCK();
        error = KERN_SUCCESS;
        for (i = 0; i <= paging_segment_max; i++) {
                ps = paging_segments[i];
                if (ps != PAGING_SEGMENT_NULL &&
                    ps->ps_bs == bs &&
-                   ! ps->ps_going_away) {
+                   ! IS_PS_GOING_AWAY(ps)) {
                        PS_LOCK(ps);
+                       
+                       if( IS_PS_GOING_AWAY(ps) || !IS_PS_OK_TO_USE(ps)) {
+                       /* 
+                        * Someone is already busy reclamining this paging segment.
+                        * If it's the emergency segment we are looking at then check
+                        * that someone has not already recovered it and set the right
+                        * state i.e. online but not activated.
+                        */
+                               PS_UNLOCK(ps);
+                               continue;
+                       }
+
                        /* disable access to this segment */
-                       ps->ps_going_away = TRUE;
+                       ps->ps_state &= ~PS_CAN_USE;
+                       ps->ps_state |= PS_GOING_AWAY;
                        PS_UNLOCK(ps);
                        /*
                         * The "ps" segment is "off-line" now,
@@ -843,10 +912,26 @@ default_pager_backing_store_delete(
                        ps = paging_segments[i];
                        if (ps != PAGING_SEGMENT_NULL &&
                            ps->ps_bs == bs &&
-                           ps->ps_going_away) {
+                           IS_PS_GOING_AWAY(ps)) {
                                PS_LOCK(ps);
+                               
+                               if( !IS_PS_GOING_AWAY(ps)) {
+                                       PS_UNLOCK(ps);
+                                       continue;
+                               }
+                               /* Handle the special clusters that came in while we let go the lock*/  
+                               if( ps->ps_special_clusters) {
+                                       dp_pages_free += ps->ps_special_clusters << ps->ps_clshift;
+                                       ps->ps_pgcount += ps->ps_special_clusters << ps->ps_clshift;
+                                       ps->ps_clcount += ps->ps_special_clusters;
+                                       if ( ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) {
+                                               ps_select_array[ps->ps_bs->bs_priority] = 0;
+                                       }
+                                       ps->ps_special_clusters = 0;
+                               }
                                /* re-enable access to this segment */
-                               ps->ps_going_away = FALSE;
+                               ps->ps_state &= ~PS_GOING_AWAY;
+                               ps->ps_state |= PS_CAN_USE;
                                PS_UNLOCK(ps);
                        }
                }
@@ -860,13 +945,22 @@ default_pager_backing_store_delete(
                ps = paging_segments[i];
                if (ps != PAGING_SEGMENT_NULL &&
                    ps->ps_bs == bs) { 
-                       if(ps->ps_going_away) {
-                               paging_segments[i] = PAGING_SEGMENT_NULL;
-                               paging_segment_count--;
-                               PS_LOCK(ps);
-                               kfree((vm_offset_t)ps->ps_bmap, 
-                                               RMAPSIZE(ps->ps_ncls));
-                               kfree((vm_offset_t)ps, sizeof *ps);
+                       if(IS_PS_GOING_AWAY(ps)) {
+                               if(IS_PS_EMERGENCY_SEGMENT(ps)) {
+                                       PS_LOCK(ps);
+                                       ps->ps_state &= ~PS_GOING_AWAY;
+                                       ps->ps_special_clusters = 0;
+                                       ps->ps_pgcount = ps->ps_pgnum;
+                                       ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
+                                       dp_pages_reserve += ps->ps_pgcount;
+                                       PS_UNLOCK(ps);
+                               } else {
+                                       paging_segments[i] = PAGING_SEGMENT_NULL;
+                                       paging_segment_count--;
+                                       PS_LOCK(ps);
+                                       kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+                                       kfree(ps, sizeof *ps);
+                               }
                        }
                }
        }
@@ -880,6 +974,11 @@ default_pager_backing_store_delete(
 
        PSL_UNLOCK();
 
+       if( dealing_with_emergency_segment ) {
+               BS_UNLOCK(bs);
+               return KERN_SUCCESS;
+       }
+
        /*
         * All the segments have been deleted.
         * We can remove the backing store.
@@ -888,9 +987,9 @@ default_pager_backing_store_delete(
        /*
         * Disable lookups of this backing store.
         */
-       if((void *)bs->bs_port->alias != NULL)
-               kfree((vm_offset_t) bs->bs_port->alias, 
-                               sizeof (struct vstruct_alias));
+       if((void *)bs->bs_port->ip_alias != NULL)
+               kfree((void *) bs->bs_port->ip_alias,
+                     sizeof (struct vstruct_alias));
        ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
        bs->bs_port = MACH_PORT_NULL;
        BS_UNLOCK(bs);
@@ -906,11 +1005,22 @@ default_pager_backing_store_delete(
        /*
         * Free the backing store structure.
         */
-       kfree((vm_offset_t)bs, sizeof *bs);
+       BS_LOCK_DESTROY(bs);
+       kfree(bs, sizeof *bs);
 
        return KERN_SUCCESS;
 }
 
+kern_return_t
+default_pager_backing_store_delete(
+       MACH_PORT_FACE backing_store) 
+{
+       if( backing_store != emergency_segment_backing_store ) {
+               default_pager_backing_store_delete_internal(emergency_segment_backing_store);
+       }
+       return(default_pager_backing_store_delete_internal(backing_store));
+}
+
 int    ps_enter(paging_segment_t);     /* forward */
 
 int
@@ -1005,7 +1115,8 @@ default_pager_add_segment(
        PS_LOCK_INIT(ps);
        ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
        if (!ps->ps_bmap) {
-               kfree((vm_offset_t)ps, sizeof *ps);
+               PS_LOCK_DESTROY(ps);
+               kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
        }
@@ -1013,12 +1124,22 @@ default_pager_add_segment(
                clrbit(ps->ps_bmap, i);
        }
 
-       ps->ps_going_away = FALSE;
+       if(paging_segment_count == 0) {
+               ps->ps_state = PS_EMERGENCY_SEGMENT;
+               if(use_emergency_swap_file_first) {
+                       ps->ps_state |= PS_CAN_USE;
+               }
+       } else {
+               ps->ps_state = PS_CAN_USE;
+       }
+
        ps->ps_bs = bs;
 
        if ((error = ps_enter(ps)) != 0) {
-               kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
-               kfree((vm_offset_t)ps, sizeof *ps);
+               kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+
+               PS_LOCK_DESTROY(ps);
+               kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
        }
@@ -1028,15 +1149,19 @@ default_pager_add_segment(
        BS_UNLOCK(bs);
 
        PSL_LOCK();
-       dp_pages_free += ps->ps_pgcount;
+       if(IS_PS_OK_TO_USE(ps)) {
+               dp_pages_free += ps->ps_pgcount;
+       } else {
+               dp_pages_reserve += ps->ps_pgcount;
+       }
        PSL_UNLOCK();
 
        bs_more_space(ps->ps_clcount);
 
-       DEBUG(DEBUG_BS_INTERNAL,
-             ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
-              device, offset, count, record_size,
-              ps->ps_record_shift, ps->ps_pgnum));
+       DP_DEBUG(DEBUG_BS_INTERNAL,
+                ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
+                 device, offset, count, record_size,
+                 ps->ps_record_shift, ps->ps_pgnum));
 
        return KERN_SUCCESS;
 }
@@ -1092,7 +1217,7 @@ vs_alloc_async(void)
 {
        struct vs_async *vsa;
        MACH_PORT_FACE  reply_port;
-       kern_return_t   kr;
+//     kern_return_t   kr;
 
        VS_ASYNC_LOCK();
        if (vs_async_free_list == NULL) {
@@ -1110,8 +1235,8 @@ vs_alloc_async(void)
                                kalloc(sizeof (struct vstruct_alias));
                        if(alias_struct != NULL) {
                                alias_struct->vs = (struct vstruct *)vsa;
-                               alias_struct->name = ISVS;
-                               reply_port->alias = (int) alias_struct;
+                               alias_struct->name = &default_pager_ops;
+                               reply_port->ip_alias = (uintptr_t) alias_struct;
                                vsa->reply_port = reply_port;
                                vs_alloc_async_count++;
                        }
@@ -1119,8 +1244,7 @@ vs_alloc_async(void)
                                vs_alloc_async_failed++;
                                ipc_port_dealloc_kernel((MACH_PORT_FACE) 
                                                                (reply_port));
-                               kfree((vm_offset_t)vsa, 
-                                               sizeof (struct vs_async));
+                               kfree(vsa, sizeof (struct vs_async));
                                vsa = NULL;
                        }
                }
@@ -1163,8 +1287,8 @@ vs_alloc_async(void)
                                kalloc(sizeof (struct vstruct_alias));
                        if(alias_struct != NULL) {
                                alias_struct->vs = reply_port;
-                               alias_struct->name = ISVS;
-                               reply_port->alias = (int) vsa;
+                               alias_struct->name = &default_pager_ops;
+                               reply_port->defpager_importance.alias = (int) vsa;
                                vsa->reply_port = reply_port;
                                vs_alloc_async_count++;
                        }
@@ -1172,8 +1296,7 @@ vs_alloc_async(void)
                                vs_alloc_async_failed++;
                                ipc_port_dealloc_kernel((MACH_PORT_FACE) 
                                                                (reply_port));
-                               kfree((vm_offset_t) vsa, 
-                                               sizeof (struct vs_async));
+                               kfree(vsa, sizeof (struct vs_async));
                                vsa = NULL;
                        }
        }
@@ -1189,8 +1312,8 @@ vs_free_async(
        kern_return_t   kr;
 
        reply_port = vsa->reply_port;
-       kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
-       kfree((vm_offset_t) vsa, sizeof (struct vs_async));
+       kfree(reply_port->ip_alias, sizeof (struct vstuct_alias));
+       kfree(vsa, sizeof (struct vs_async));
        ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
 #if 0
        VS_ASYNC_LOCK();
@@ -1205,10 +1328,10 @@ zone_t  vstruct_zone;
 
 vstruct_t
 ps_vstruct_create(
-       vm_size_t size)
+       dp_size_t size)
 {
        vstruct_t       vs;
-       int             i;
+       unsigned int    i;
 
        vs = (vstruct_t) zalloc(vstruct_zone);
        if (vs == VSTRUCT_NULL) {
@@ -1220,23 +1343,15 @@ ps_vstruct_create(
        /*
         * The following fields will be provided later.
         */
-       vs->vs_mem_obj = NULL;
+       vs->vs_pager_ops = NULL;
        vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
        vs->vs_references = 1;
        vs->vs_seqno = 0;
 
-#ifdef MACH_KERNEL
        vs->vs_waiting_seqno = FALSE;
        vs->vs_waiting_read = FALSE;
        vs->vs_waiting_write = FALSE;
        vs->vs_waiting_async = FALSE;
-#else
-       mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
-       mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD);
-       mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE);
-       mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS);
-       mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC);
-#endif
 
        vs->vs_readers = 0;
        vs->vs_writers = 0;
@@ -1244,7 +1359,7 @@ ps_vstruct_create(
        vs->vs_errors = 0;
 
        vs->vs_clshift = local_log2(bs_get_global_clsize(0));
-       vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1;
+       vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
        vs->vs_async_pending = 0;
 
        /*
@@ -1261,14 +1376,14 @@ ps_vstruct_create(
                vs->vs_indirect = FALSE;
        }
        vs->vs_xfer_pending = FALSE;
-       DEBUG(DEBUG_VS_INTERNAL,
-             ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
+       DP_DEBUG(DEBUG_VS_INTERNAL,
+                ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
 
        /*
         * Check to see that we got the space.
         */
        if (!vs->vs_dmap) {
-               kfree((vm_offset_t)vs, sizeof *vs);
+               kfree(vs, sizeof *vs);
                return VSTRUCT_NULL;
        }
 
@@ -1289,12 +1404,12 @@ ps_vstruct_create(
        return vs;
 }
 
-paging_segment_t ps_select_segment(int, int *);        /* forward */
+paging_segment_t ps_select_segment(unsigned int, int *);       /* forward */
 
 paging_segment_t
 ps_select_segment(
-       int     shift,
-       int     *psindex)
+       unsigned int    shift,
+       int             *psindex)
 {
        paging_segment_t        ps;
        int                     i;
@@ -1307,34 +1422,49 @@ ps_select_segment(
 
        PSL_LOCK();
        if (paging_segment_count == 1) {
-               paging_segment_t lps;   /* used to avoid extra PS_UNLOCK */
+               paging_segment_t lps = PAGING_SEGMENT_NULL;     /* used to avoid extra PS_UNLOCK */
                ipc_port_t trigger = IP_NULL;
 
                ps = paging_segments[paging_segment_max];
                *psindex = paging_segment_max;
                PS_LOCK(ps);
-               if (ps->ps_going_away) {
-                       /* this segment is being turned off */
-                       lps = PAGING_SEGMENT_NULL;
-               } else {
-                       ASSERT(ps->ps_clshift >= shift);
+               if( !IS_PS_EMERGENCY_SEGMENT(ps) ) {
+                       panic("Emergency paging segment missing\n");
+               }
+               ASSERT(ps->ps_clshift >= shift);
+               if(IS_PS_OK_TO_USE(ps)) {
                        if (ps->ps_clcount) {
                                ps->ps_clcount--;
                                dp_pages_free -=  1 << ps->ps_clshift;
+                               ps->ps_pgcount -=  1 << ps->ps_clshift;
                                if(min_pages_trigger_port && 
                                  (dp_pages_free < minimum_pages_remaining)) {
                                        trigger = min_pages_trigger_port;
                                        min_pages_trigger_port = NULL;
                                        bs_low = TRUE;
+                                       backing_store_abort_compaction = TRUE;
                                }
                                lps = ps;
-                       } else
-                               lps = PAGING_SEGMENT_NULL;
-               }
+                       } 
+               } 
                PS_UNLOCK(ps);
+               
+               if( lps == PAGING_SEGMENT_NULL ) {
+                       if(dp_pages_free) {
+                               dp_pages_free_drift_count++;
+                               if(dp_pages_free > dp_pages_free_drifted_max) {
+                                       dp_pages_free_drifted_max = dp_pages_free;
+                               }
+                               dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free));
+                       }
+                       dp_pages_free = 0;
+               }
+
                PSL_UNLOCK();
 
                if (trigger != IP_NULL) {
+                       dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
                        default_pager_space_alert(trigger, HI_WAT_ALERT);
                        ipc_port_release_send(trigger);
                }
@@ -1342,6 +1472,14 @@ ps_select_segment(
        }
 
        if (paging_segment_count == 0) {
+               if(dp_pages_free) {
+                       dp_pages_free_drift_count++;
+                       if(dp_pages_free > dp_pages_free_drifted_max) {
+                               dp_pages_free_drifted_max = dp_pages_free;
+                       }
+                       dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free));
+               }
+               dp_pages_free = 0;
                PSL_UNLOCK();
                return PAGING_SEGMENT_NULL;
        }
@@ -1383,35 +1521,40 @@ ps_select_segment(
                                 * >= that of the vstruct.
                                 */
                                PS_LOCK(ps);
-                               if (ps->ps_going_away) {
-                                       /* this segment is being turned off */
-                               } else if ((ps->ps_clcount) &&
-                                          (ps->ps_clshift >= shift)) {
-                                       ipc_port_t trigger = IP_NULL;
-
-                                       ps->ps_clcount--;
-                                       dp_pages_free -=  1 << ps->ps_clshift;
-                                       if(min_pages_trigger_port && 
-                                               (dp_pages_free < 
-                                               minimum_pages_remaining)) {
-                                               trigger = min_pages_trigger_port;
-                                               min_pages_trigger_port = NULL;
-                                       }
-                                       PS_UNLOCK(ps);
-                                       /*
-                                        * found one, quit looking.
-                                        */
-                                       ps_select_array[i] = j;
-                                       PSL_UNLOCK();
-                                       
-                                       if (trigger != IP_NULL) {
-                                               default_pager_space_alert(
-                                                       trigger,
-                                                       HI_WAT_ALERT);
-                                               ipc_port_release_send(trigger);
+                               if (IS_PS_OK_TO_USE(ps)) {
+                                       if ((ps->ps_clcount) &&
+                                                  (ps->ps_clshift >= shift)) {
+                                               ipc_port_t trigger = IP_NULL;
+
+                                               ps->ps_clcount--;
+                                               dp_pages_free -=  1 << ps->ps_clshift;
+                                               ps->ps_pgcount -=  1 << ps->ps_clshift;
+                                               if(min_pages_trigger_port && 
+                                                       (dp_pages_free < 
+                                                       minimum_pages_remaining)) {
+                                                       trigger = min_pages_trigger_port;
+                                                       min_pages_trigger_port = NULL;
+                                                       bs_low = TRUE;
+                                                       backing_store_abort_compaction = TRUE;
+                                               }
+                                               PS_UNLOCK(ps);
+                                               /*
+                                                * found one, quit looking.
+                                                */
+                                               ps_select_array[i] = j;
+                                               PSL_UNLOCK();
+                                               
+                                               if (trigger != IP_NULL) {
+                                                       dprintf(("ps_select_segment - send HI_WAT_ALERT\n"));
+
+                                                       default_pager_space_alert(
+                                                               trigger,
+                                                               HI_WAT_ALERT);
+                                                       ipc_port_release_send(trigger);
+                                               }
+                                               *psindex = j;
+                                               return ps;
                                        }
-                                       *psindex = j;
-                                       return ps;
                                }
                                PS_UNLOCK(ps);
                        }
@@ -1425,22 +1568,31 @@ ps_select_segment(
                        j++;
                }
        }
+       
+       if(dp_pages_free) {
+               dp_pages_free_drift_count++;
+               if(dp_pages_free > dp_pages_free_drifted_max) {
+                       dp_pages_free_drifted_max = dp_pages_free;
+               }
+               dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count,dp_pages_free));
+       }
+       dp_pages_free = 0;
        PSL_UNLOCK();
        return PAGING_SEGMENT_NULL;
 }
 
-vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
+dp_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
 
-vm_offset_t
+dp_offset_t
 ps_allocate_cluster(
        vstruct_t               vs,
        int                     *psindex,
        paging_segment_t        use_ps)
 {
-       int                     byte_num;
+       unsigned int            byte_num;
        int                     bit_num = 0;
        paging_segment_t        ps;
-       vm_offset_t             cluster;
+       dp_offset_t             cluster;
        ipc_port_t              trigger = IP_NULL;
 
        /*
@@ -1466,50 +1618,132 @@ ps_allocate_cluster(
         * This and the ordering of the paging segment "going_away" bit setting
         * protects us.
         */
+retry:
        if (use_ps != PAGING_SEGMENT_NULL) {
                ps = use_ps;
                PSL_LOCK();
                PS_LOCK(ps);
+
+               ASSERT(ps->ps_clcount != 0);
+
                ps->ps_clcount--;
                dp_pages_free -=  1 << ps->ps_clshift;
+               ps->ps_pgcount -=  1 << ps->ps_clshift;
                if(min_pages_trigger_port && 
                                (dp_pages_free < minimum_pages_remaining)) {
                        trigger = min_pages_trigger_port;
                        min_pages_trigger_port = NULL;
+                       bs_low = TRUE;
+                       backing_store_abort_compaction = TRUE;
                }
                PSL_UNLOCK();
                PS_UNLOCK(ps);
                if (trigger != IP_NULL) {
+                       dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
                        default_pager_space_alert(trigger, HI_WAT_ALERT);
                        ipc_port_release_send(trigger);
                }
 
        } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
                   PAGING_SEGMENT_NULL) {
-#if 0
-               bs_no_paging_space(TRUE);
-#endif
-#if 0
-               if (verbose)
-#endif
-                       dprintf(("no space in available paging segments; "
-                                "swapon suggested\n"));
-               /* the count got off maybe, reset to zero */
+               static clock_sec_t lastnotify = 0;
+               clock_sec_t now;
+               clock_nsec_t nanoseconds_dummy;
+               
+               /* 
+                * Don't immediately jump to the emergency segment. Give the
+                * dynamic pager a chance to create it's first normal swap file.
+                * Unless, of course the very first normal swap file can't be 
+                * created due to some problem and we didn't expect that problem
+                * i.e. use_emergency_swap_file_first was never set to true initially.
+                * It then gets set in the swap file creation error handling.
+                */
+               if(paging_segment_count > 1 || use_emergency_swap_file_first == TRUE) {
+                       
+                       ps = paging_segments[EMERGENCY_PSEG_INDEX];
+                       if(IS_PS_EMERGENCY_SEGMENT(ps) && !IS_PS_GOING_AWAY(ps)) {
+                               PSL_LOCK();
+                               PS_LOCK(ps);
+                               
+                               if(IS_PS_GOING_AWAY(ps)) {
+                                       /* Someone de-activated the emergency paging segment*/
+                                       PS_UNLOCK(ps);
+                                       PSL_UNLOCK();
+
+                               } else if(dp_pages_free) {
+                                       /* 
+                                        * Someone has already activated the emergency paging segment 
+                                        * OR
+                                        * Between us having rec'd a NULL segment from ps_select_segment
+                                        * and reaching here a new normal segment could have been added.
+                                        * E.g. we get NULL segment and another thread just added the
+                                        * new swap file. Hence check to see if we have more dp_pages_free
+                                        * before activating the emergency segment.
+                                        */
+                                       PS_UNLOCK(ps);
+                                       PSL_UNLOCK();
+                                       goto retry;
+                               
+                               } else if(!IS_PS_OK_TO_USE(ps) && ps->ps_clcount) {
+                                       /*
+                                        * PS_CAN_USE is only reset from the emergency segment when it's
+                                        * been successfully recovered. So it's legal to have an emergency
+                                        * segment that has PS_CAN_USE but no clusters because it's recovery
+                                        * failed.
+                                        */
+                                       backing_store_t bs = ps->ps_bs;
+                                       ps->ps_state |= PS_CAN_USE;
+                                       if(ps_select_array[bs->bs_priority] == BS_FULLPRI ||
+                                               ps_select_array[bs->bs_priority] == BS_NOPRI) {
+                                               ps_select_array[bs->bs_priority] = 0;
+                                       }
+                                       dp_pages_free += ps->ps_pgcount;
+                                       dp_pages_reserve -= ps->ps_pgcount;
+                                       PS_UNLOCK(ps);
+                                       PSL_UNLOCK();
+                                       dprintf(("Switching ON Emergency paging segment\n"));
+                                       goto retry;
+                               }
+
+                               PS_UNLOCK(ps);
+                               PSL_UNLOCK();
+                       }
+               }
+               
+               /*
+                * Emit a notification of the low-paging resource condition
+                * but don't issue it more than once every five seconds.  This
+                * prevents us from overflowing logs with thousands of
+                * repetitions of the message.
+                */
+               clock_get_system_nanotime(&now, &nanoseconds_dummy);
+               if (paging_segment_count > 1 && (now > lastnotify + 5)) {
+                       /* With an activated emergency paging segment we still
+                        * didn't get any clusters. This could mean that the 
+                        * emergency paging segment is exhausted.
+                        */
+                       dprintf(("System is out of paging space.\n"));
+                       lastnotify = now;
+               }
+
                PSL_LOCK();
-               dp_pages_free = 0;
+               
                if(min_pages_trigger_port) {
                        trigger = min_pages_trigger_port;
                        min_pages_trigger_port = NULL;
                        bs_low = TRUE;
+                       backing_store_abort_compaction = TRUE;
                }
                PSL_UNLOCK();
                if (trigger != IP_NULL) {
+                       dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n"));
+
                        default_pager_space_alert(trigger, HI_WAT_ALERT);
                        ipc_port_release_send(trigger);
                }
-               return (vm_offset_t) -1;
+               return (dp_offset_t) -1;
        }
-       ASSERT(ps->ps_clcount != 0);
 
        /*
         * Look for an available cluster.  At the end of the loop,
@@ -1540,16 +1774,15 @@ ps_allocate_cluster(
        return cluster;
 }
 
-void ps_deallocate_cluster(paging_segment_t, vm_offset_t);     /* forward */
+void ps_deallocate_cluster(paging_segment_t, dp_offset_t);     /* forward */
 
 void
 ps_deallocate_cluster(
        paging_segment_t        ps,
-       vm_offset_t             cluster)
+       dp_offset_t             cluster)
 {
-       ipc_port_t trigger = IP_NULL;
 
-       if (cluster >= (vm_offset_t) ps->ps_ncls)
+       if (cluster >= ps->ps_ncls)
                panic("ps_deallocate_cluster: Invalid cluster number");
 
        /*
@@ -1559,15 +1792,13 @@ ps_deallocate_cluster(
        PSL_LOCK();
        PS_LOCK(ps);
        clrbit(ps->ps_bmap, cluster);
-       ++ps->ps_clcount;
-       dp_pages_free +=  1 << ps->ps_clshift;
-       if(max_pages_trigger_port
-               && (backing_store_release_trigger_disable == 0)
-               && (dp_pages_free > maximum_pages_free)) {
-               trigger = max_pages_trigger_port;
-               max_pages_trigger_port = NULL;
+       if( IS_PS_OK_TO_USE(ps)) {
+               ++ps->ps_clcount;
+               ps->ps_pgcount +=  1 << ps->ps_clshift;
+               dp_pages_free +=  1 << ps->ps_clshift;
+       } else {
+               ps->ps_special_clusters += 1;
        }
-       PSL_UNLOCK();
 
        /*
         * Move the hint down to the freed cluster if it is
@@ -1577,54 +1808,51 @@ ps_deallocate_cluster(
                ps->ps_hint = (cluster/NBBY);
        }
 
-       PS_UNLOCK(ps);
 
        /*
         * If we're freeing space on a full priority, reset the array.
         */
-       PSL_LOCK();
-       if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
+       if ( IS_PS_OK_TO_USE(ps) && ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
                ps_select_array[ps->ps_bs->bs_priority] = 0;
+       PS_UNLOCK(ps);
        PSL_UNLOCK();
 
-       if (trigger != IP_NULL) {
-               VSL_LOCK();
-               if(backing_store_release_trigger_disable != 0) {
-                       assert_wait((event_t) 
-                           &backing_store_release_trigger_disable, 
-                           THREAD_UNINT);
-                       VSL_UNLOCK();
-                       thread_block((void (*)(void)) 0);
-               } else {
-                       VSL_UNLOCK();
-               }
-               default_pager_space_alert(trigger, LO_WAT_ALERT);
-               ipc_port_release_send(trigger);
-       }
-
        return;
 }
 
-void ps_dealloc_vsmap(struct vs_map *, vm_size_t);     /* forward */
+void ps_dealloc_vsmap(struct vs_map *, dp_size_t);     /* forward */
 
 void
 ps_dealloc_vsmap(
        struct vs_map   *vsmap,
-       vm_size_t       size)
+       dp_size_t       size)
 {
-       int i;
-       for (i = 0; i < size; i++)
-               if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
+       unsigned int i;
+       struct ps_vnode_trim_data trim_data;
+
+       ps_vnode_trim_init(&trim_data);
+
+       for (i = 0; i < size; i++) {
+               if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) {
+                       ps_vnode_trim_more(&trim_data,
+                                             &vsmap[i],
+                                             VSM_PS(vsmap[i])->ps_clshift,
+                                             vm_page_size << VSM_PS(vsmap[i])->ps_clshift);
                        ps_deallocate_cluster(VSM_PS(vsmap[i]),
                                              VSM_CLOFF(vsmap[i]));
+               } else {
+                       ps_vnode_trim_now(&trim_data);
+               }
+       }
+       ps_vnode_trim_now(&trim_data);
 }
 
 void
 ps_vstruct_dealloc(
        vstruct_t vs)
 {
-       int     i;
-       spl_t   s;
+       unsigned int    i;
+//     spl_t   s;
 
        VS_MAP_LOCK(vs);
 
@@ -1641,31 +1869,159 @@ ps_vstruct_dealloc(
                for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
                        if (vs->vs_imap[i] != NULL) {
                                ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
-                               kfree((vm_offset_t)vs->vs_imap[i], 
-                                                       CLMAP_THRESHOLD);
+                               kfree(vs->vs_imap[i], CLMAP_THRESHOLD);
                        }
                }
-               kfree((vm_offset_t)vs->vs_imap, 
-                                       INDIRECT_CLMAP_SIZE(vs->vs_size));
+               kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
        } else {
                /*
                 * Direct map.  Free used clusters, then memory.
                 */
                ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
-               kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
+               kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
        }
        VS_MAP_UNLOCK(vs);
 
        bs_commit(- vs->vs_size);
 
-       zfree(vstruct_zone, (vm_offset_t)vs);
+       VS_MAP_LOCK_DESTROY(vs);
+
+       zfree(vstruct_zone, vs);
+}
+
+kern_return_t
+ps_vstruct_reclaim(
+       vstruct_t vs,
+       boolean_t return_to_vm,
+       boolean_t reclaim_backing_store)
+{
+       unsigned int    i, j;
+       struct vs_map   *vsmap;
+       boolean_t       vsmap_all_clear, vsimap_all_clear;
+       struct vm_object_fault_info fault_info;
+       int             clmap_off;
+       unsigned int    vsmap_size;
+       kern_return_t   kr = KERN_SUCCESS;
+
+       VS_MAP_LOCK(vs);
+
+       fault_info.cluster_size = VM_SUPER_CLUSTER;
+       fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL;
+       fault_info.user_tag = 0;
+       fault_info.lo_offset = 0;
+       fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift);
+       fault_info.io_sync = reclaim_backing_store;
+       fault_info.batch_pmap_op = FALSE;
+
+       /*
+        * If this is an indirect structure, then we walk through the valid
+        * (non-zero) indirect pointers and deallocate the clusters
+        * associated with each used map entry (via ps_dealloc_vsmap).
+        * When all of the clusters in an indirect block have been
+        * freed, we deallocate the block.  When all of the indirect
+        * blocks have been deallocated we deallocate the memory
+        * holding the indirect pointers.
+        */
+       if (vs->vs_indirect) {
+               vsimap_all_clear = TRUE;
+               for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
+                       vsmap = vs->vs_imap[i];
+                       if (vsmap == NULL)
+                               continue;
+                       /* loop on clusters in this indirect map */
+                       clmap_off = (vm_page_size * CLMAP_ENTRIES *
+                                    VSCLSIZE(vs) * i);
+                       if (i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
+                               vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
+                       else
+                               vsmap_size = CLMAP_ENTRIES;
+                       vsmap_all_clear = TRUE;
+                       if (return_to_vm) {
+                               for (j = 0; j < vsmap_size;) {
+                                       if (VSM_ISCLR(vsmap[j]) ||
+                                           VSM_ISERR(vsmap[j])) {
+                                               j++;
+                                               clmap_off += vm_page_size * VSCLSIZE(vs);
+                                               continue;
+                                       }
+                                       VS_MAP_UNLOCK(vs);
+                                       kr = pvs_cluster_read(
+                                               vs,
+                                               clmap_off,
+                                               (dp_size_t) -1, /* read whole cluster */
+                                               &fault_info);
+
+                                       VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+                                       if (kr != KERN_SUCCESS) {
+                                               vsmap_all_clear = FALSE;
+                                               vsimap_all_clear = FALSE;
+
+                                               kr = KERN_MEMORY_ERROR;
+                                               goto out;
+                                       }
+                               }
+                       }
+                       if (vsmap_all_clear) {
+                               ps_dealloc_vsmap(vsmap, CLMAP_ENTRIES);
+                               kfree(vsmap, CLMAP_THRESHOLD);
+                               vs->vs_imap[i] = NULL;
+                       }
+               }
+               if (vsimap_all_clear) {
+//                     kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
+               }
+       } else {
+               /*
+                * Direct map.  Free used clusters, then memory.
+                */
+               vsmap = vs->vs_dmap;
+               if (vsmap == NULL) {
+                       goto out;
+               }
+               vsmap_all_clear = TRUE;
+               /* loop on clusters in the direct map */
+               if (return_to_vm) {
+                       for (j = 0; j < vs->vs_size;) {
+                               if (VSM_ISCLR(vsmap[j]) ||
+                                   VSM_ISERR(vsmap[j])) {
+                                       j++;
+                                       continue;
+                               }
+                               clmap_off = vm_page_size * (j << vs->vs_clshift);
+                               VS_MAP_UNLOCK(vs);
+                               kr = pvs_cluster_read(
+                                       vs,
+                                       clmap_off,
+                                       (dp_size_t) -1, /* read whole cluster */
+                                       &fault_info);
+
+                               VS_MAP_LOCK(vs); /* XXX what if it changed ? */
+                               if (kr != KERN_SUCCESS) {
+                                       vsmap_all_clear = FALSE;
+
+                                       kr = KERN_MEMORY_ERROR;
+                                       goto out;
+                               } else {
+//                                     VSM_CLR(vsmap[j]);
+                               }
+                       }
+               }
+               if (vsmap_all_clear) {
+                       ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
+//                     kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
+               }
+       }
+out:
+       VS_MAP_UNLOCK(vs);
+
+       return kr;
 }
 
-int ps_map_extend(vstruct_t, int);     /* forward */
+int ps_map_extend(vstruct_t, unsigned int);    /* forward */
 
 int ps_map_extend(
        vstruct_t       vs,
-       int             new_size)
+       unsigned int    new_size)
 {
        struct vs_map   **new_imap;
        struct vs_map   *new_dmap = NULL;
@@ -1727,7 +2083,7 @@ int ps_map_extend(
                        /* Allocate an indirect page */
                        if ((new_imap[0] = (struct vs_map *)
                             kalloc(CLMAP_THRESHOLD)) == NULL) {
-                               kfree((vm_offset_t)new_imap, new_map_size);
+                               kfree(new_imap, new_map_size);
                                return -1;
                        }
                        new_dmap = new_imap[0];
@@ -1768,29 +2124,29 @@ int ps_map_extend(
        bs_commit(new_size - vs->vs_size);
        vs->vs_size = new_size;
        if (old_map)
-               kfree((vm_offset_t)old_map, old_map_size);
+               kfree(old_map, old_map_size);
        return 0;
 }
 
-vm_offset_t
+dp_offset_t
 ps_clmap(
        vstruct_t       vs,
-       vm_offset_t     offset,
+       dp_offset_t     offset,
        struct clmap    *clmap,
        int             flag,
-       vm_size_t       size,
+       dp_size_t       size,
        int             error)
 {
-       vm_offset_t     cluster;        /* The cluster of offset.       */
-       vm_offset_t     newcl;          /* The new cluster allocated.   */
-       vm_offset_t     newoff;
-       int             i;
+       dp_offset_t     cluster;        /* The cluster of offset.       */
+       dp_offset_t     newcl;          /* The new cluster allocated.   */
+       dp_offset_t     newoff;
+       unsigned int    i;
        struct vs_map   *vsmap;
 
        VS_MAP_LOCK(vs);
 
        ASSERT(vs->vs_dmap);
-       cluster = atop(offset) >> vs->vs_clshift;
+       cluster = atop_32(offset) >> vs->vs_clshift;
 
        /*
         * Initialize cluster error value
@@ -1804,11 +2160,11 @@ ps_clmap(
                if (flag == CL_FIND) {
                        /* Do not allocate if just doing a lookup */
                        VS_MAP_UNLOCK(vs);
-                       return (vm_offset_t) -1;
+                       return (dp_offset_t) -1;
                }
                if (ps_map_extend(vs, cluster + 1)) {
                        VS_MAP_UNLOCK(vs);
-                       return (vm_offset_t) -1;
+                       return (dp_offset_t) -1;
                }
        }
 
@@ -1830,14 +2186,14 @@ ps_clmap(
                if (vsmap == NULL) {
                        if (flag == CL_FIND) {
                                VS_MAP_UNLOCK(vs);
-                               return (vm_offset_t) -1;
+                               return (dp_offset_t) -1;
                        }
 
                        /* Allocate the indirect block */
                        vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
                        if (vsmap == NULL) {
                                VS_MAP_UNLOCK(vs);
-                               return (vm_offset_t) -1;
+                               return (dp_offset_t) -1;
                        }
                        /* Initialize the cluster offsets */
                        for (i = 0; i < CLMAP_ENTRIES; i++)
@@ -1861,7 +2217,7 @@ ps_clmap(
        if (VSM_ISERR(*vsmap)) {
                clmap->cl_error = VSM_GETERR(*vsmap);
                VS_MAP_UNLOCK(vs);
-               return (vm_offset_t) -1;
+               return (dp_offset_t) -1;
        } else if (VSM_ISCLR(*vsmap)) {
                int psindex;
 
@@ -1875,16 +2231,16 @@ ps_clmap(
                                VSM_SETERR(*vsmap, error);
                        }
                        VS_MAP_UNLOCK(vs);
-                       return (vm_offset_t) -1;
+                       return (dp_offset_t) -1;
                } else {
                        /*
                         * Attempt to allocate a cluster from the paging segment
                         */
                        newcl = ps_allocate_cluster(vs, &psindex,
                                                    PAGING_SEGMENT_NULL);
-                       if (newcl == -1) {
+                       if (newcl == (dp_offset_t) -1) {
                                VS_MAP_UNLOCK(vs);
-                               return (vm_offset_t) -1;
+                               return (dp_offset_t) -1;
                        }
                        VSM_CLR(*vsmap);
                        VSM_SETCLOFF(*vsmap, newcl);
@@ -1906,14 +2262,14 @@ ps_clmap(
         * relatively quick.
         */
        ASSERT(trunc_page(offset) == offset);
-       newcl = ptoa(newcl) << vs->vs_clshift;
+       newcl = ptoa_32(newcl) << vs->vs_clshift;
        newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
        if (flag == CL_ALLOC) {
                /*
                 * set bits in the allocation bitmap according to which
                 * pages were requested.  size is in bytes.
                 */
-               i = atop(newoff);
+               i = atop_32(newoff);
                while ((size > 0) && (i < VSCLSIZE(vs))) {
                        VSM_SETALLOC(*vsmap, i);
                        i++;
@@ -1926,7 +2282,7 @@ ps_clmap(
                 * Offset is not cluster aligned, so number of pages
                 * and bitmaps must be adjusted
                 */
-               clmap->cl_numpages -= atop(newoff);
+               clmap->cl_numpages -= atop_32(newoff);
                CLMAP_SHIFT(clmap, vs);
                CLMAP_SHIFTALLOC(clmap, vs);
        }
@@ -1943,7 +2299,7 @@ ps_clmap(
         * entire cluster is in error.
         */
        if (size && flag == CL_FIND) {
-               vm_offset_t off = (vm_offset_t) 0;
+               dp_offset_t off = (dp_offset_t) 0;
 
                if (!error) {
                        for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
@@ -1955,7 +2311,7 @@ ps_clmap(
                } else {
                        BS_STAT(clmap->cl_ps->ps_bs,
                                clmap->cl_ps->ps_bs->bs_pages_out_fail +=
-                                       atop(size));
+                                       atop_32(size));
                        off = VSM_CLOFF(*vsmap);
                        VSM_SETERR(*vsmap, error);
                }
@@ -1963,34 +2319,37 @@ ps_clmap(
                 * Deallocate cluster if error, and no valid pages
                 * already present.
                 */
-               if (off != (vm_offset_t) 0)
+               if (off != (dp_offset_t) 0)
                        ps_deallocate_cluster(clmap->cl_ps, off);
                VS_MAP_UNLOCK(vs);
-               return (vm_offset_t) 0;
+               return (dp_offset_t) 0;
        } else
                VS_MAP_UNLOCK(vs);
 
-       DEBUG(DEBUG_VS_INTERNAL,
-             ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
-              newcl+newoff, (int) vs, (int) vsmap, flag));
-       DEBUG(DEBUG_VS_INTERNAL,
-             ("        clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
-              (int) clmap->cl_ps, clmap->cl_numpages,
-              (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
+       DP_DEBUG(DEBUG_VS_INTERNAL,
+                ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
+                 newcl+newoff, (int) vs, (int) vsmap, flag));
+       DP_DEBUG(DEBUG_VS_INTERNAL,
+                ("     clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
+                 (int) clmap->cl_ps, clmap->cl_numpages,
+                 (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
 
        return (newcl + newoff);
 }
 
-void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t);    /* forward */
+void ps_clunmap(vstruct_t, dp_offset_t, dp_size_t);    /* forward */
 
 void
 ps_clunmap(
        vstruct_t       vs,
-       vm_offset_t     offset,
-       vm_size_t       length)
+       dp_offset_t     offset,
+       dp_size_t       length)
 {
-       vm_offset_t             cluster; /* The cluster number of offset */
+       dp_offset_t             cluster; /* The cluster number of offset */
        struct vs_map           *vsmap;
+       struct ps_vnode_trim_data trim_data;
+
+       ps_vnode_trim_init(&trim_data);
 
        VS_MAP_LOCK(vs);
 
@@ -1999,20 +2358,22 @@ ps_clunmap(
         * clusters and map entries as encountered.
         */
        while (length > 0) {
-               vm_offset_t     newoff;
-               int             i;
+               dp_offset_t     newoff;
+               unsigned int    i;
 
-               cluster = atop(offset) >> vs->vs_clshift;
+               cluster = atop_32(offset) >> vs->vs_clshift;
                if (vs->vs_indirect)    /* indirect map */
                        vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
                else
                        vsmap = vs->vs_dmap;
                if (vsmap == NULL) {
+                       ps_vnode_trim_now(&trim_data);
                        VS_MAP_UNLOCK(vs);
                        return;
                }
                vsmap += cluster%CLMAP_ENTRIES;
                if (VSM_ISCLR(*vsmap)) {
+                       ps_vnode_trim_now(&trim_data);
                        length -= vm_page_size;
                        offset += vm_page_size;
                        continue;
@@ -2022,12 +2383,12 @@ ps_clunmap(
                 * paging segment cluster pages.
                 * Optimize for entire cluster cleraing.
                 */
-               if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) {
+               if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) {
                        /*
                         * Not cluster aligned.
                         */
                        ASSERT(trunc_page(newoff) == newoff);
-                       i = atop(newoff);
+                       i = atop_32(newoff);
                } else
                        i = 0;
                while ((i < VSCLSIZE(vs)) && (length > 0)) {
@@ -2041,23 +2402,30 @@ ps_clunmap(
                /*
                 * If map entry is empty, clear and deallocate cluster.
                 */
-               if (!VSM_ALLOC(*vsmap)) {
+               if (!VSM_BMAP(*vsmap)) {
+                       ps_vnode_trim_more(&trim_data, 
+                                             vsmap,
+                                             vs->vs_clshift,
+                                             VSCLSIZE(vs) * vm_page_size);
                        ps_deallocate_cluster(VSM_PS(*vsmap),
                                              VSM_CLOFF(*vsmap));
                        VSM_CLR(*vsmap);
+               } else {
+                       ps_vnode_trim_now(&trim_data);
                }
        }
+       ps_vnode_trim_now(&trim_data);
 
        VS_MAP_UNLOCK(vs);
 }
 
-void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
+void ps_vs_write_complete(vstruct_t, dp_offset_t, dp_size_t, int); /* forward */
 
 void
 ps_vs_write_complete(
        vstruct_t       vs,
-       vm_offset_t     offset,
-       vm_size_t       size,
+       dp_offset_t     offset,
+       dp_size_t       size,
        int             error)
 {
        struct clmap    clmap;
@@ -2075,19 +2443,19 @@ ps_vs_write_complete(
        (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
 }
 
-void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int);   /* forward */
+void vs_cl_write_complete(vstruct_t, paging_segment_t, dp_offset_t, vm_offset_t, dp_size_t, boolean_t, int);   /* forward */
 
 void
 vs_cl_write_complete(
-       vstruct_t               vs,
-       paging_segment_t        ps,
-       vm_offset_t             offset,
-       vm_offset_t             addr,
-       vm_size_t               size,
-       boolean_t               async,
-       int                     error)
+       vstruct_t                       vs,
+       __unused paging_segment_t       ps,
+       dp_offset_t                     offset,
+       __unused vm_offset_t            addr,
+       dp_size_t                       size,
+       boolean_t                       async,
+       int                             error)
 {
-       kern_return_t   kr;
+//     kern_return_t   kr;
 
        if (error) {
                /*
@@ -2098,7 +2466,7 @@ vs_cl_write_complete(
                dprintf(("write failed error = 0x%x\n", error));
                /* add upl_abort code here */
        } else
-               GSTAT(global_stats.gs_pages_out += atop(size));
+               GSTAT(global_stats.gs_pages_out += atop_32(size));
        /*
         * Notify the vstruct mapping code, so it can do its accounting.
         */
@@ -2111,7 +2479,6 @@ vs_cl_write_complete(
                if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
                        vs->vs_waiting_async = FALSE;
                        VS_UNLOCK(vs);
-                       /* mutex_unlock(&vs->vs_waiting_async); */
                        thread_wakeup(&vs->vs_async_pending);
                } else {
                        VS_UNLOCK(vs);
@@ -2131,7 +2498,7 @@ device_write_reply(
        struct vs_async *vsa;
 
        vsa = (struct vs_async *)
-               ((struct vstruct_alias *)(reply_port->alias))->vs;
+               ((struct vstruct_alias *)(reply_port->ip_alias))->vs;
 
        if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
                device_code = KERN_FAILURE;
@@ -2146,11 +2513,7 @@ device_write_reply(
                if(vsa->vsa_error) {
                   /* need to consider error condition.  re-write data or */
                   /* throw it away here. */
-                  vm_offset_t  ioaddr;
-                  if(vm_map_copyout(kernel_map, &ioaddr,
-                                (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS)
-                  panic("vs_cluster_write: unable to copy source list\n");
-                  vm_deallocate(kernel_map, ioaddr, vsa->vsa_size);
+                  vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr);
                }
                ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset, 
                                                vsa->vsa_size, vsa->vsa_error);
@@ -2185,11 +2548,11 @@ device_read_reply(
 {
        struct vs_async *vsa;
        vsa = (struct vs_async *)
-               ((struct vstruct_alias *)(reply_port->alias))->vs;
+               ((struct vstruct_alias *)(reply_port->defpager_importance.alias))->vs;
        vsa->vsa_addr = (vm_offset_t)data;
        vsa->vsa_size = (vm_size_t)dataCnt;
        vsa->vsa_error = return_code;
-       thread_wakeup(&vsa->vsa_lock);
+       thread_wakeup(&vsa);
        return KERN_SUCCESS;
 }
 
@@ -2227,12 +2590,10 @@ device_open_reply(
        return KERN_SUCCESS;
 }
 
-kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
-
 kern_return_t
 ps_read_device(
        paging_segment_t        ps,
-       vm_offset_t             offset,
+       dp_offset_t             offset,
        vm_offset_t             *bufferp,
        unsigned int            size,
        unsigned int            *residualp,
@@ -2247,14 +2608,13 @@ ps_read_device(
        vm_offset_t     buf_ptr;
        unsigned int    records_read;
        struct vs_async *vsa;   
-       mutex_t vs_waiting_read_reply;
 
        device_t        device;
        vm_map_copy_t   device_data = NULL;
        default_pager_thread_t *dpt = NULL;
 
        device = dev_port_lookup(ps->ps_device);
-       clustered_reads[atop(size)]++;
+       clustered_reads[atop_32(size)]++;
 
        dev_offset = (ps->ps_offset +
                      (offset >> (vm_page_shift - ps->ps_record_shift)));
@@ -2271,7 +2631,6 @@ ps_read_device(
                        vsa->vsa_size = 0;
                        vsa->vsa_ps = NULL;
                }
-               mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO);
                ip_lock(vsa->reply_port);
                vsa->reply_port->ip_sorights++;
                ip_reference(vsa->reply_port);
@@ -2287,8 +2646,8 @@ ps_read_device(
                                 (io_buf_ptr_t *) &dev_buffer,
                                 (mach_msg_type_number_t *) &bytes_read);
                if(kr == MIG_NO_REPLY) { 
-                       assert_wait(&vsa->vsa_lock, THREAD_UNINT);
-                       thread_block((void(*)(void))0);
+                       assert_wait(&vsa, THREAD_UNINT);
+                       thread_block(THREAD_CONTINUE_NULL);
 
                        dev_buffer = vsa->vsa_addr;
                        bytes_read = (unsigned int)vsa->vsa_size;
@@ -2327,9 +2686,9 @@ ps_read_device(
                records_read = (bytes_read >>
                                (vm_page_shift - ps->ps_record_shift));
                dev_offset += records_read;
-               DEBUG(DEBUG_VS_INTERNAL,
-                     ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
-                      dev_buffer, bytes_read));
+               DP_DEBUG(DEBUG_VS_INTERNAL,
+                        ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
+                         dev_buffer, bytes_read));
                if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
                    != KERN_SUCCESS)
                        Panic("dealloc buf");
@@ -2338,7 +2697,7 @@ ps_read_device(
        *residualp = size - total_read;
        if((dev_buffer != *bufferp) && (total_read != 0)) {
                vm_offset_t temp_buffer;
-               vm_allocate(kernel_map, &temp_buffer, total_read, TRUE);
+               vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE);
                memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
                if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read, 
                        VM_MAP_COPYIN_OPT_SRC_DESTROY | 
@@ -2368,12 +2727,10 @@ ps_read_device(
        return KERN_SUCCESS;
 }
 
-kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *);    /* forward */
-
 kern_return_t
 ps_write_device(
        paging_segment_t        ps,
-       vm_offset_t             offset,
+       dp_offset_t             offset,
        vm_offset_t             addr,
        unsigned int            size,
        struct vs_async         *vsa)
@@ -2386,7 +2743,7 @@ ps_write_device(
 
 
 
-       clustered_writes[atop(size)]++;
+       clustered_writes[atop_32(size)]++;
 
        dev_offset = (ps->ps_offset +
                      (offset >> (vm_page_shift - ps->ps_record_shift)));
@@ -2423,7 +2780,7 @@ ps_write_device(
                                         "device_write_request returned ",
                                         kr, addr, size, offset));
                        BS_STAT(ps->ps_bs,
-                               ps->ps_bs->bs_pages_out_fail += atop(size));
+                               ps->ps_bs->bs_pages_out_fail += atop_32(size));
                        /* do the completion notification to free resources */
                        device_write_reply(reply_port, kr, 0);
                        return PAGER_ERROR;
@@ -2449,7 +2806,7 @@ ps_write_device(
                                 "device_write returned ",
                                 kr, addr, size, offset));
                        BS_STAT(ps->ps_bs,
-                               ps->ps_bs->bs_pages_out_fail += atop(size));
+                               ps->ps_bs->bs_pages_out_fail += atop_32(size));
                        return PAGER_ERROR;
                }
                if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
@@ -2475,116 +2832,287 @@ ps_write_device(
 
 kern_return_t
 ps_read_device(
-       paging_segment_t        ps,
-       vm_offset_t             offset,
-       vm_offset_t             *bufferp,
-       unsigned int            size,
-       unsigned int            *residualp,
-       int                     flags)
+       __unused paging_segment_t       ps,
+       __unused dp_offset_t            offset,
+       __unused vm_offset_t            *bufferp,
+       __unused unsigned int           size,
+       __unused unsigned int           *residualp,
+       __unused int                            flags)
 {
   panic("ps_read_device not supported");
+  return KERN_FAILURE;
 }
 
+kern_return_t
 ps_write_device(
-       paging_segment_t        ps,
-       vm_offset_t             offset,
-       vm_offset_t             addr,
-       unsigned int            size,
-       struct vs_async         *vsa)
+       __unused paging_segment_t       ps,
+       __unused dp_offset_t            offset,
+       __unused vm_offset_t            addr,
+       __unused unsigned int           size,
+       __unused struct vs_async        *vsa)
 {
   panic("ps_write_device not supported");
+  return KERN_FAILURE;
 }
 
 #endif /* DEVICE_PAGING */
-void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t);       /* forward */
+void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t);     /* forward */
 
 void
 pvs_object_data_provided(
-       vstruct_t       vs,
-       upl_t           upl,
-       vm_offset_t     offset,
-       vm_size_t       size)
+       __unused vstruct_t              vs,
+       __unused upl_t                  upl,
+       __unused upl_offset_t   offset,
+       upl_size_t                              size)
 {
+#if    RECLAIM_SWAP
+       boolean_t       empty;
+#endif
 
-       DEBUG(DEBUG_VS_INTERNAL,
-             ("buffer=0x%x,offset=0x%x,size=0x%x\n",
-              upl, offset, size));
+       DP_DEBUG(DEBUG_VS_INTERNAL,
+                ("buffer=0x%x,offset=0x%x,size=0x%x\n",
+                 upl, offset, size));
 
        ASSERT(size > 0);
-       GSTAT(global_stats.gs_pages_in += atop(size));
-
-
-#if    USE_PRECIOUS
-       ps_clunmap(vs, offset, size);
-#endif /* USE_PRECIOUS */
+       GSTAT(global_stats.gs_pages_in += atop_32(size));
+
+/* check upl iosync flag instead of using RECLAIM_SWAP*/
+#if    RECLAIM_SWAP
+       if (size != upl->size) {
+               if (size) {
+                       ps_clunmap(vs, offset, size);
+                       upl_commit_range(upl, 0, size, 0, NULL, 0, &empty);
+               }
+               upl_abort(upl, UPL_ABORT_ERROR);
+               upl_deallocate(upl);
+       } else {
+               ps_clunmap(vs, offset, size);
+               upl_commit(upl, NULL, 0);
+               upl_deallocate(upl);
+       }
+#endif /* RECLAIM_SWAP */
 
 }
 
+static memory_object_offset_t   last_start;
+static vm_size_t               last_length;
+
+/*
+ * A "cnt" of 0 means that the caller just wants to check if the page at
+ * offset "vs_offset" exists in the backing store.  That page hasn't been
+ * prepared, so no need to release it.
+ *
+ * A "cnt" of -1 means that the caller wants to bring back from the backing
+ * store all existing pages in the cluster containing "vs_offset".
+ */
 kern_return_t
 pvs_cluster_read(
        vstruct_t       vs,
-       vm_offset_t     vs_offset,
-       vm_size_t       cnt)
+       dp_offset_t     vs_offset,
+       dp_size_t       cnt,
+        void           *fault_info)
 {
-       upl_t                   upl;
        kern_return_t           error = KERN_SUCCESS;
-       int                     size;
+       unsigned int            size;
        unsigned int            residual;
        unsigned int            request_flags;
-       int                     seg_index;
-       int                     pages_in_cl;
+       int                     io_flags = 0;
+       int                     seg_index;
+       int                     pages_in_cl;
        int                     cl_size;
        int                     cl_mask;
-       int                     cl_index;
-       int                     xfer_size;
-       vm_offset_t       ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
-       paging_segment_t        psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+       int                     cl_index;
+       unsigned int            xfer_size;
+       dp_offset_t             orig_vs_offset;
+       dp_offset_t       ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT];
+       paging_segment_t        psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT];
        struct clmap            clmap;
+       upl_t                   upl;
+       unsigned int            page_list_count;
+       memory_object_offset_t  cluster_start;
+       vm_size_t               cluster_length;
+       uint32_t                io_streaming;
+       int                     i;
+       boolean_t               io_sync = FALSE;
+       boolean_t               reclaim_all = FALSE;
 
        pages_in_cl = 1 << vs->vs_clshift;
        cl_size = pages_in_cl * vm_page_size;
        cl_mask = cl_size - 1;
 
+       request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
+       
+       if (cnt == (dp_size_t) -1)
+               reclaim_all = TRUE;
+
+       if (reclaim_all == TRUE) {
+               /*
+                * We've been called from ps_vstruct_reclaim() to move all
+                * the object's swapped pages back to VM pages.
+                * This can put memory pressure on the system, so we do want
+                * to wait for free pages, to avoid getting in the way of the
+                * vm_pageout_scan() thread.
+                * Let's not use UPL_NOBLOCK in this case.
+                */
+               vs_offset &= ~cl_mask;
+               i = pages_in_cl;
+       } else {
+               i = 1;
+
+               /*
+                * if the I/O cluster size == PAGE_SIZE, we don't want to set
+                * the UPL_NOBLOCK since we may be trying to recover from a
+                * previous partial pagein I/O that occurred because we were low
+                * on memory and bailed early in order to honor the UPL_NOBLOCK...
+                * since we're only asking for a single page, we can block w/o fear
+                * of tying up pages while waiting for more to become available
+                */
+               if (fault_info == NULL || ((vm_object_fault_info_t)fault_info)->cluster_size > PAGE_SIZE)
+                       request_flags |= UPL_NOBLOCK;
+       }
+
+again:
+       cl_index = (vs_offset & cl_mask) / vm_page_size;
+
+        if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (dp_offset_t)-1) ||
+           !CLMAP_ISSET(clmap, cl_index)) {
+               /*
+                * the needed page doesn't exist in the backing store...
+                * we don't want to try to do any I/O, just abort the
+                * page and let the fault handler provide a zero-fill
+                */
+               if (cnt == 0) {
+                       /*
+                        * The caller was just poking at us to see if
+                        * the page has been paged out.  No need to 
+                        * mess with the page at all.
+                        * Just let the caller know we don't have that page.
+                        */
+                       return KERN_FAILURE;
+               }
+               if (reclaim_all == TRUE) {
+                       i--;
+                       if (i == 0) {
+                               /* no more pages in this cluster */
+                               return KERN_FAILURE;
+                       }
+                       /* try the next page in this cluster */
+                       vs_offset += vm_page_size;
+                       goto again;
+               }
+
+               page_list_count = 0;
+
+               memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
+                                               PAGE_SIZE, PAGE_SIZE, 
+                                               &upl, NULL, &page_list_count,
+                                               request_flags  | UPL_SET_INTERNAL);
+               upl_range_needed(upl, 0, 1);
+
+               if (clmap.cl_error)
+                       upl_abort(upl, UPL_ABORT_ERROR);
+               else
+                       upl_abort(upl, UPL_ABORT_UNAVAILABLE);
+               upl_deallocate(upl);
+
+               return KERN_SUCCESS;
+       }
+
+       if (cnt == 0) {
+               /*
+                * The caller was just poking at us to see if
+                * the page has been paged out.  No need to 
+                * mess with the page at all.
+                * Just let the caller know we do have that page.
+                */
+               return KERN_SUCCESS;
+       }
+       
+       if(((vm_object_fault_info_t)fault_info)->io_sync == TRUE ) {
+               io_sync = TRUE;
+       } else {
+#if RECLAIM_SWAP
+               io_sync = TRUE;
+#endif /* RECLAIM_SWAP */
+       }
+
+       if( io_sync == TRUE ) {
+
+               io_flags |= UPL_IOSYNC | UPL_NOCOMMIT;
+#if USE_PRECIOUS
+               request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE;
+#else  /* USE_PRECIOUS */
+               request_flags |= UPL_REQUEST_SET_DIRTY;
+#endif /* USE_PRECIOUS */
+       }
+
+       assert(dp_encryption_inited);
+       if (dp_encryption) {
+               /*
+                * ENCRYPTED SWAP:
+                * request that the UPL be prepared for
+                * decryption.
+                */
+               request_flags |= UPL_ENCRYPT;
+               io_flags |= UPL_PAGING_ENCRYPTED;
+       }
+       orig_vs_offset = vs_offset;
+
+       assert(cnt != 0);
+       cnt = VM_SUPER_CLUSTER;
+       cluster_start = (memory_object_offset_t) vs_offset;
+       cluster_length = (vm_size_t) cnt;
+       io_streaming = 0;
+
+       /*
+        * determine how big a speculative I/O we should try for...
+        */
+       if (memory_object_cluster_size(vs->vs_control, &cluster_start, &cluster_length, &io_streaming, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) {
+               assert(vs_offset >= (dp_offset_t) cluster_start &&
+                      vs_offset < (dp_offset_t) (cluster_start + cluster_length));
+               vs_offset = (dp_offset_t) cluster_start;
+               cnt = (dp_size_t) cluster_length;
+       } else {
+               cluster_length = PAGE_SIZE;
+               cnt = PAGE_SIZE;
+       }
+
+       if (io_streaming)
+                io_flags |= UPL_IOSTREAMING;
+
+       last_start = cluster_start;
+       last_length = cluster_length;
+
        /*
         * This loop will be executed multiple times until the entire
-        * request has been satisfied... if the request spans cluster
+        * range has been looked at or we issue an I/O... if the request spans cluster
         * boundaries, the clusters will be checked for logical continunity,
-        * if contiguous the I/O request will span multiple clusters, otherwise
-        * it will be broken up into the minimal set of I/O's
-        *
-        * If there are holes in a request (either unallocated pages in a paging
-        * segment or an unallocated paging segment), we stop
-        * reading at the hole, inform the VM of any data read, inform
-        * the VM of an unavailable range, then loop again, hoping to
-        * find valid pages later in the requested range.  This continues until
-        * the entire range has been examined, and read, if present.
+        * if contiguous the I/O request will span multiple clusters...
+        * at most only 1 I/O will be issued... it will encompass the original offset
         */
-
-#if    USE_PRECIOUS
-       request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_PRECIOUS;
-#else
-       request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE ;
-#endif
-       while (cnt && (error == KERN_SUCCESS)) {
+       while (cnt && error == KERN_SUCCESS) {
                int     ps_info_valid;
-               int     page_list_count;
 
-               if (cnt > VM_SUPER_CLUSTER)
+               if ((vs_offset & cl_mask) && (cnt > (VM_SUPER_CLUSTER - (vs_offset & cl_mask)))) {
+                       size = VM_SUPER_CLUSTER;
+                       size -= vs_offset & cl_mask;
+               } else if (cnt > VM_SUPER_CLUSTER)
                        size = VM_SUPER_CLUSTER;
                else
                        size = cnt;
+
                cnt -= size;
 
                ps_info_valid = 0;
                seg_index     = 0;
 
                while (size > 0 && error == KERN_SUCCESS) {
-                       int           abort_size;
+                       unsigned int  abort_size;
+                       unsigned int  lsize;
                        int           failed_size;
                        int           beg_pseg;
                        int           beg_indx;
-                       vm_offset_t   cur_offset;
-
+                       dp_offset_t   cur_offset;
 
                        if ( !ps_info_valid) {
                                ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
@@ -2594,30 +3122,16 @@ pvs_cluster_read(
                        /*
                         * skip over unallocated physical segments 
                         */
-                       if (ps_offset[seg_index] == (vm_offset_t) -1) {
+                       if (ps_offset[seg_index] == (dp_offset_t) -1) {
                                abort_size = cl_size - (vs_offset & cl_mask);
                                abort_size = MIN(abort_size, size);
 
-                               page_list_count = 0;
-                               memory_object_super_upl_request(
-                                       vs->vs_control,
-                                       (memory_object_offset_t)vs_offset,
-                                       abort_size, abort_size, 
-                                       &upl, NULL, &page_list_count,
-                                       request_flags);
-
-                               if (clmap.cl_error) {
-                                       upl_abort(upl, UPL_ABORT_ERROR);
-                               } else {
-                                       upl_abort(upl, UPL_ABORT_UNAVAILABLE);
-                               }
-                               upl_deallocate(upl);
-
-                               size       -= abort_size;
-                               vs_offset  += abort_size;
+                               size      -= abort_size;
+                               vs_offset += abort_size;
 
                                seg_index++;
                                ps_info_valid = 0;
+
                                continue;
                        }
                        cl_index = (vs_offset & cl_mask) / vm_page_size;
@@ -2631,24 +3145,8 @@ pvs_cluster_read(
                                abort_size += vm_page_size;
                        }
                        if (abort_size) {
-                               /*
-                                * Let VM system know about holes in clusters.
-                                */
-                               GSTAT(global_stats.gs_pages_unavail += atop(abort_size));
-
-                               page_list_count = 0;
-                               memory_object_super_upl_request(
-                                       vs->vs_control,
-                                       (memory_object_offset_t)vs_offset,
-                                       abort_size, abort_size, 
-                                       &upl, NULL, &page_list_count,
-                                       request_flags);
-
-                               upl_abort(upl, UPL_ABORT_UNAVAILABLE);
-                               upl_deallocate(upl);
-
-                               size       -= abort_size;
-                               vs_offset  += abort_size;
+                               size      -= abort_size;
+                               vs_offset += abort_size;
 
                                if (cl_index == pages_in_cl) {
                                        /*
@@ -2657,6 +3155,7 @@ pvs_cluster_read(
                                         */
                                        seg_index++;
                                        ps_info_valid = 0;
+
                                        continue;
                                }
                                if (size == 0)
@@ -2679,7 +3178,8 @@ pvs_cluster_read(
 
                                while (cl_index < pages_in_cl && xfer_size < size) {
                                        /*
-                                        * accumulate allocated pages within a physical segment
+                                        * accumulate allocated pages within 
+                                        * a physical segment
                                         */
                                        if (CLMAP_ISSET(clmap, cl_index)) {
                                                xfer_size  += vm_page_size;
@@ -2693,74 +3193,99 @@ pvs_cluster_read(
                                }
                                if (cl_index < pages_in_cl || xfer_size >= size) {
                                        /*
-                                        * we've hit an unallocated page or the
-                                        * end of this request... go fire the I/O
+                                        * we've hit an unallocated page or 
+                                        * the end of this request... see if
+                                        * it's time to fire the I/O
                                         */
                                        break;
                                }
                                /*
-                                * we've hit the end of the current physical segment
-                                * and there's more to do, so try moving to the next one
+                                * we've hit the end of the current physical
+                                * segment and there's more to do, so try 
+                                * moving to the next one
                                 */
                                seg_index++;
                                  
                                ps_offset[seg_index] = ps_clmap(vs, cur_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
-                               psp[seg_index]       = CLMAP_PS(clmap);
+                               psp[seg_index] = CLMAP_PS(clmap);
                                ps_info_valid = 1;
 
                                if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
                                        /*
-                                        * if the physical segment we're about to step into
-                                        * is not contiguous to the one we're currently
-                                        * in, or it's in a different paging file, or
+                                        * if the physical segment we're about 
+                                        * to step into is not contiguous to 
+                                        * the one we're currently in, or it's 
+                                        * in a different paging file, or
                                         * it hasn't been allocated....
-                                        * we stop here and generate the I/O
+                                        * we stop this run and go check
+                                        * to see if it's time to fire the I/O
                                         */
                                        break;
                                }
                                /*
-                                * start with first page of the next physical segment
+                                * start with first page of the next physical
+                                * segment
                                 */
                                cl_index = 0;
                        }
-                       if (xfer_size) {
+                       if (xfer_size == 0) {
                                /*
-                                * we have a contiguous range of allocated pages
-                                * to read from
+                                * no I/O to generate for this segment
                                 */
-                               page_list_count = 0;
-                               memory_object_super_upl_request(vs->vs_control,
-                                               (memory_object_offset_t)vs_offset,
-                                               xfer_size, xfer_size, 
-                                               &upl, NULL, &page_list_count,
-                                               request_flags | UPL_SET_INTERNAL);
-
-                               error = ps_read_file(psp[beg_pseg], upl, (vm_offset_t) 0, 
-                                               ps_offset[beg_pseg] + (beg_indx * vm_page_size), xfer_size, &residual, 0);
-                       } else
                                continue;
+                       }
+                       if (cur_offset <= orig_vs_offset) {
+                               /*
+                                * we've hit a hole in our speculative cluster
+                                * before the offset that we're really after...
+                                * don't issue the I/O since it doesn't encompass
+                                * the original offset and we're looking to only
+                                * pull in the speculative pages if they can be
+                                * made part of a single I/O
+                                */
+                               size      -= xfer_size;
+                               vs_offset += xfer_size;
+
+                               continue;
+                       }
+                       /*
+                        * we have a contiguous range of allocated pages
+                        * to read from that encompasses the original offset
+                        */
+                       page_list_count = 0;
+                       memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
+                                                       xfer_size, xfer_size, 
+                                                       &upl, NULL, &page_list_count,
+                                                       request_flags | UPL_SET_INTERNAL);
+
+                       error = ps_read_file(psp[beg_pseg], 
+                                            upl, (upl_offset_t) 0, 
+                                            ps_offset[beg_pseg] + (beg_indx * vm_page_size), 
+                                            xfer_size, &residual, io_flags);
 
-                       failed_size = 0;
 
                        /*
-                        * Adjust counts and send response to VM.  Optimize for the
-                        * common case, i.e. no error and/or partial data.
-                        * If there was an error, then we need to error the entire
-                        * range, even if some data was successfully read.
-                        * If there was a partial read we may supply some
+                        * Adjust counts and send response to VM.  Optimize 
+                        * for the common case, i.e. no error and/or partial
+                        * data. If there was an error, then we need to error
+                        * the entire range, even if some data was successfully
+                        * read. If there was a partial read we may supply some
                         * data and may error some as well.  In all cases the
-                        * VM must receive some notification for every page in the
-                        * range.
+                        * VM must receive some notification for every page 
+                        * in the range.
                         */
                        if ((error == KERN_SUCCESS) && (residual == 0)) {
                                /*
-                                * Got everything we asked for, supply the data to
-                                * the VM.  Note that as a side effect of supplying
-                                * the data, the buffer holding the supplied data is
-                                * deallocated from the pager's address space.
+                                * Got everything we asked for, supply the data
+                                * to the VM.  Note that as a side effect of 
+                                * supplying the data, the buffer holding the 
+                                * supplied data is deallocated from the pager's
+                                *  address space.
                                 */
-                               pvs_object_data_provided(vs, upl, vs_offset, xfer_size);
+                               lsize = xfer_size;
+                               failed_size = 0;
                        } else {
+                               lsize = 0;
                                failed_size = xfer_size;
 
                                if (error == KERN_SUCCESS) {
@@ -2770,8 +3295,7 @@ pvs_cluster_read(
                                                 * and no data moved, we turn it into
                                                 * an error, assuming we're reading at
                                                 * or beyong EOF.
-                                                * Fall through and error the entire
-                                                * range.
+                                                * Fall through and error the entire range.
                                                 */
                                                error = KERN_FAILURE;
                                        } else {
@@ -2784,33 +3308,40 @@ pvs_cluster_read(
                                                 * Fall through and error the remainder
                                                 * of the range, if any.
                                                 */
-                                               int fill, lsize;
+                                               int fill;
 
-                                               fill = residual & ~vm_page_size;
+                                               fill = residual & (vm_page_size - 1);
                                                lsize = (xfer_size - residual) + fill;
-                                               pvs_object_data_provided(vs, upl, vs_offset, lsize);
 
-                                               if (lsize < xfer_size) {
+                                               if (lsize < xfer_size)
                                                        failed_size = xfer_size - lsize;
+
+                                               if (reclaim_all == FALSE)
                                                        error = KERN_FAILURE;
-                                               }
                                        }
                                } 
                        }
-                       /*
-                        * If there was an error in any part of the range, tell
-                        * the VM. Note that error is explicitly checked again since
-                        * it can be modified above.
-                        */
-                       if (error != KERN_SUCCESS) {
+                       pvs_object_data_provided(vs, upl, vs_offset, lsize);
+
+                       if (failed_size) {
+                               /*
+                                * There was an error in some part of the range, tell
+                                * the VM. Note that error is explicitly checked again
+                                * since it can be modified above.
+                                */
                                BS_STAT(psp[beg_pseg]->ps_bs,
-                                       psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop(failed_size));
+                                       psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop_32(failed_size));
                        }
-                       size       -= xfer_size;
-                       vs_offset  += xfer_size;
+                       /*
+                        * we've issued a single I/O that encompassed the original offset
+                        * at this point we either met our speculative request length or 
+                        * we ran into a 'hole' (i.e. page not present in the cluster, cluster
+                        * not present or not physically contiguous to the previous one), so
+                        * we're done issuing I/O at this point
+                        */
+                       return (error);
                }
-
-       } /* END while (cnt && (error == 0)) */
+       }
        return error;
 }
 
@@ -2820,63 +3351,92 @@ kern_return_t
 vs_cluster_write(
        vstruct_t       vs,
        upl_t           internal_upl,
-       vm_offset_t     offset,
-       vm_size_t       cnt,
+       upl_offset_t    offset,
+       upl_size_t      cnt,
        boolean_t       dp_internal,
        int             flags)
 {
-       vm_offset_t     size;
-       vm_offset_t     transfer_size;
+       upl_size_t      transfer_size;
        int             error = 0;
        struct clmap    clmap;
 
-       vm_offset_t     actual_offset;  /* Offset within paging segment */
+       dp_offset_t     actual_offset;  /* Offset within paging segment */
        paging_segment_t ps;
-       vm_offset_t     subx_size;
-       vm_offset_t     mobj_base_addr;
-       vm_offset_t     mobj_target_addr;
-       int             mobj_size;
-
-       struct vs_async *vsa;
-       vm_map_copy_t   copy;
+       dp_offset_t     mobj_base_addr;
+       dp_offset_t     mobj_target_addr;
 
        upl_t           upl;
        upl_page_info_t *pl;
        int             page_index;
+       unsigned int    page_max_index;
        int             list_size;
-       int             cl_size;
+       int             pages_in_cl;
+       unsigned int    cl_size;
+       int             base_index;
+       unsigned int    seg_size;
+       unsigned int    upl_offset_in_object;
+       boolean_t       minimal_clustering = FALSE;
+       boolean_t       found_dirty;
+
+       if (!dp_encryption_inited) {
+               /*
+                * ENCRYPTED SWAP:
+                * Once we've started using swap, we
+                * can't change our mind on whether
+                * it needs to be encrypted or
+                * not.
+                */
+               dp_encryption_inited = TRUE;
+       }
+       if (dp_encryption) {
+               /*
+                * ENCRYPTED SWAP:
+                * the UPL will need to be encrypted...
+                */
+               flags |= UPL_PAGING_ENCRYPTED;
+       }
+
+       pages_in_cl = 1 << vs->vs_clshift;
+       cl_size = pages_in_cl * vm_page_size;
        
+#if CONFIG_FREEZE
+       minimal_clustering = TRUE;
+#else
+       if (dp_isssd == TRUE)
+               minimal_clustering = TRUE;
+#endif
        if (!dp_internal) {
-               int          page_list_count;
+               unsigned int page_list_count;
                int          request_flags;
-               int          super_size;
+               unsigned int super_size;
                int          first_dirty;
                int          num_dirty;
                int          num_of_pages;
                int          seg_index;
-               int          pages_in_cl;
-               int          must_abort;
-               vm_offset_t  upl_offset;
-               vm_offset_t  seg_offset;
-               vm_offset_t  ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
-               paging_segment_t   psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
+               upl_offset_t  upl_offset;
+               upl_offset_t  upl_offset_aligned;
+               dp_offset_t  seg_offset;
+               dp_offset_t  ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1];
+               paging_segment_t   psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1];
 
 
-               pages_in_cl = 1 << vs->vs_clshift;
-               cl_size = pages_in_cl * vm_page_size;
-
-               if (bs_low) {
+               if (bs_low)
                        super_size = cl_size;
-
-                       request_flags = UPL_NOBLOCK |
-                               UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | 
-                               UPL_NO_SYNC | UPL_SET_INTERNAL;
-               } else {
+               else
                        super_size = VM_SUPER_CLUSTER;
 
-                       request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
-                               UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | 
-                               UPL_NO_SYNC | UPL_SET_INTERNAL;
+               request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
+                               UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | 
+                               UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE;
+
+               if (dp_encryption) {
+                       /*
+                        * ENCRYPTED SWAP:
+                        * request that the UPL be prepared for
+                        * encryption.
+                        */
+                       request_flags |= UPL_ENCRYPT;
+                       flags |= UPL_PAGING_ENCRYPTED;
                }
 
                page_list_count = 0;
@@ -2884,47 +3444,113 @@ vs_cluster_write(
                                (memory_object_offset_t)offset,
                                cnt, super_size, 
                                &upl, NULL, &page_list_count,
-                               request_flags | UPL_PAGEOUT);
+                               request_flags | UPL_FOR_PAGEOUT);
+
+               /*
+                * The default pager does not handle objects larger than
+                * 4GB, so it does not deal with offset that don't fit in
+                * 32-bit.  Cast down upl->offset now and make sure we
+                * did not lose any valuable bits.
+                */
+               upl_offset_in_object = (unsigned int) upl->offset;
+               assert(upl->offset == upl_offset_in_object);
 
                pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
 
+               seg_size = cl_size - (upl_offset_in_object % cl_size);
+               upl_offset_aligned = upl_offset_in_object & ~(cl_size - 1);
+               page_index = 0;
+               page_max_index = upl->size / PAGE_SIZE;
+               found_dirty = TRUE;
+
                for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) {
 
-                       ps_offset[seg_index] = ps_clmap(vs, upl->offset + (seg_index * cl_size),
-                                                     &clmap, CL_ALLOC, 
-                                                     transfer_size < cl_size ? 
-                                                     transfer_size : cl_size, 0);
+                       unsigned int    seg_pgcnt;
 
-                       if (ps_offset[seg_index] == (vm_offset_t) -1) {
-                               upl_abort(upl, 0);
-                               upl_deallocate(upl);
-                               
-                               return KERN_FAILURE;
+                       seg_pgcnt = seg_size / PAGE_SIZE;
 
-                       }
-                       psp[seg_index] = CLMAP_PS(clmap);
+                       if (minimal_clustering == TRUE) {
+                               unsigned int    non_dirty;
 
-                       if (transfer_size > cl_size) {
-                               transfer_size -= cl_size;
+                               non_dirty = 0;
+                               found_dirty = FALSE;
+
+                               for (; non_dirty < seg_pgcnt; non_dirty++) {
+                                       if ((page_index + non_dirty) >= page_max_index)
+                                               break;
+
+                                       if (UPL_DIRTY_PAGE(pl, page_index + non_dirty) ||
+                                           UPL_PRECIOUS_PAGE(pl, page_index + non_dirty)) {
+                                               found_dirty = TRUE;
+                                               break;
+                                       }
+                               }
+                       }
+                       if (found_dirty == TRUE) {
+                               ps_offset[seg_index] = 
+                                       ps_clmap(vs, 
+                                                upl_offset_aligned,
+                                                &clmap, CL_ALLOC, 
+                                                cl_size, 0); 
+
+                               if (ps_offset[seg_index] == (dp_offset_t) -1) {
+                                       upl_abort(upl, 0);
+                                       upl_deallocate(upl);
+                               
+                                       return KERN_FAILURE;
+                               }
+                               psp[seg_index] = CLMAP_PS(clmap);
+                       }
+                       if (transfer_size > seg_size) {
+                               page_index += seg_pgcnt;
+                               transfer_size -= seg_size;
+                               upl_offset_aligned += cl_size;
+                               seg_size = cl_size;
                                seg_index++;
                        } else
                                transfer_size = 0;
                }
-               for (page_index = 0, num_of_pages = upl->size / vm_page_size; page_index < num_of_pages; ) {
+               /*
+                * Ignore any non-present pages at the end of the
+                * UPL.
+                */
+               for (page_index = upl->size / vm_page_size; page_index > 0;)  {
+                       if (UPL_PAGE_PRESENT(pl, --page_index)) {
+                               page_index++;
+                               break;
+                       }
+               }
+               if (page_index == 0) {
+                       /*
+                        * no pages in the UPL
+                        * abort and return
+                        */
+                       upl_abort(upl, 0);
+                       upl_deallocate(upl);
+
+                       return KERN_SUCCESS;
+               }
+               num_of_pages = page_index;
+
+               base_index = (upl_offset_in_object % cl_size) / PAGE_SIZE;
+
+               for (page_index = 0; page_index < num_of_pages; ) {
                        /*
                         * skip over non-dirty pages
                         */
                        for ( ; page_index < num_of_pages; page_index++) {
-                               if (UPL_DIRTY_PAGE(pl, page_index) || UPL_PRECIOUS_PAGE(pl, page_index))
+                               if (UPL_DIRTY_PAGE(pl, page_index) 
+                                       || UPL_PRECIOUS_PAGE(pl, page_index))
                                        /*
                                         * this is a page we need to write
-                                        * go see if we can buddy it up with others
-                                        * that are contiguous to it
+                                        * go see if we can buddy it up with 
+                                        * others that are contiguous to it
                                         */
                                        break;
                                /*
-                                * if the page is not-dirty, but present we need to commit it...
-                                * this is an unusual case since we only asked for dirty pages
+                                * if the page is not-dirty, but present we 
+                                * need to commit it...  This is an unusual 
+                                * case since we only asked for dirty pages
                                 */
                                if (UPL_PAGE_PRESENT(pl, page_index)) {
                                        boolean_t empty = FALSE;
@@ -2933,10 +3559,13 @@ vs_cluster_write(
                                                 vm_page_size, 
                                                 UPL_COMMIT_NOTIFY_EMPTY,
                                                 pl,
-                                                MAX_UPL_TRANSFER,
+                                                page_list_count,
                                                 &empty);
-                                       if (empty)
+                                       if (empty) {
+                                               assert(page_index == 
+                                                      num_of_pages - 1);
                                                upl_deallocate(upl);
+                                       }
                                }
                        }
                        if (page_index == num_of_pages)
@@ -2946,14 +3575,16 @@ vs_cluster_write(
                                break;
 
                        /*
-                        * gather up contiguous dirty pages... we have at least 1
-                        * otherwise we would have bailed above
+                        * gather up contiguous dirty pages... we have at 
+                        * least 1 * otherwise we would have bailed above
                         * make sure that each physical segment that we step
                         * into is contiguous to the one we're currently in
                         * if it's not, we have to stop and write what we have
                         */
-                       for (first_dirty = page_index; page_index < num_of_pages; ) {
-                               if ( !UPL_DIRTY_PAGE(pl, page_index) && !UPL_PRECIOUS_PAGE(pl, page_index))
+                       for (first_dirty = page_index; 
+                                       page_index < num_of_pages; ) {
+                               if ( !UPL_DIRTY_PAGE(pl, page_index) 
+                                       && !UPL_PRECIOUS_PAGE(pl, page_index))
                                        break;
                                page_index++;
                                /*
@@ -2965,62 +3596,63 @@ vs_cluster_write(
                                        int cur_seg;
                                        int nxt_seg;
 
-                                       cur_seg = (page_index - 1) / pages_in_cl;
-                                       nxt_seg = page_index / pages_in_cl;
+                                       cur_seg = (base_index + (page_index - 1))/pages_in_cl;
+                                       nxt_seg = (base_index + page_index)/pages_in_cl;
 
                                        if (cur_seg != nxt_seg) {
                                                if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
-                                                       /*
-                                                        * if the segment we're about to step into
-                                                        * is not contiguous to the one we're currently
-                                                        * in, or it's in a different paging file....
-                                                        * we stop here and generate the I/O
-                                                        */
+                                               /*
+                                                * if the segment we're about 
+                                                * to step into is not 
+                                                * contiguous to the one we're 
+                                                * currently in, or it's in a 
+                                                * different paging file....
+                                                * we stop here and generate 
+                                                * the I/O
+                                                */
                                                        break;
                                        }
                                }
                        }
                        num_dirty = page_index - first_dirty;
-                       must_abort = 1;
 
                        if (num_dirty) {
                                upl_offset = first_dirty * vm_page_size;
-                               seg_index  = first_dirty / pages_in_cl;
-                               seg_offset = upl_offset - (seg_index * cl_size);
                                transfer_size = num_dirty * vm_page_size;
 
-                               error = ps_write_file(psp[seg_index], upl, upl_offset,
-                                                     ps_offset[seg_index] + seg_offset, transfer_size, flags);
-
-                               if (error == 0) {
-                                       while (transfer_size) {
-                                               int seg_size;
+                               while (transfer_size) {
 
-                                               if ((seg_size = cl_size - (upl_offset % cl_size)) > transfer_size)
-                                                       seg_size = transfer_size;
+                                       if ((seg_size = cl_size - 
+                                               ((upl_offset_in_object +
+                                                 upl_offset) % cl_size)) 
+                                                       > transfer_size)
+                                               seg_size = transfer_size;
 
-                                               ps_vs_write_complete(vs, upl->offset + upl_offset, seg_size, error);
+                                       ps_vs_write_complete(
+                                               vs, 
+                                               (upl_offset_in_object +
+                                                upl_offset), 
+                                               seg_size, error);
 
-                                               transfer_size -= seg_size;
-                                               upl_offset += seg_size;
-                                       }
-                                       must_abort = 0;
+                                       transfer_size -= seg_size;
+                                       upl_offset += seg_size;
                                }
-                       }
-                       if (must_abort) {
-                               boolean_t empty = FALSE;
-                               upl_abort_range(upl,
-                                               first_dirty * vm_page_size, 
-                                               num_dirty   * vm_page_size,
-                                               UPL_ABORT_NOTIFY_EMPTY,
-                                               &empty);
-                               if (empty)
-                                       upl_deallocate(upl);
+                               upl_offset = first_dirty * vm_page_size;
+                               transfer_size = num_dirty * vm_page_size;
+
+                               seg_index  = (base_index + first_dirty) / pages_in_cl;
+                               seg_offset = (upl_offset_in_object + upl_offset) % cl_size;
+
+                               error = ps_write_file(psp[seg_index], 
+                                               upl, upl_offset,
+                                               ps_offset[seg_index] 
+                                                               + seg_offset, 
+                                               transfer_size, flags);
                        }
                }
 
        } else {
-               assert(cnt  <= (vm_page_size << vs->vs_clshift));
+               assert(cnt <= (unsigned) (vm_page_size << vs->vs_clshift));
                list_size = cnt;
 
                page_index = 0;
@@ -3037,23 +3669,23 @@ vs_cluster_write(
                                &clmap, CL_ALLOC, 
                                transfer_size < cl_size ? 
                                        transfer_size : cl_size, 0);
-                       if(actual_offset == (vm_offset_t) -1) {
+                       if(actual_offset == (dp_offset_t) -1) {
                                error = 1;
                                break;
                        }
                        cnt = MIN(transfer_size, 
-                               CLMAP_NPGS(clmap) * vm_page_size);
+                                 (unsigned) CLMAP_NPGS(clmap) * vm_page_size);
                        ps = CLMAP_PS(clmap);
                        /* Assume that the caller has given us contiguous */
                        /* pages */
                        if(cnt) {
+                               ps_vs_write_complete(vs, mobj_target_addr, 
+                                                               cnt, error);
                                error = ps_write_file(ps, internal_upl,
                                                0, actual_offset,
                                                cnt, flags);
                                if (error)
                                        break;
-                               ps_vs_write_complete(vs, mobj_target_addr, 
-                                                               cnt, error);
                           }
                        if (error)
                                break;
@@ -3078,7 +3710,7 @@ ps_vstruct_allocated_size(
 {
        int             num_pages;
        struct vs_map   *vsmap;
-       int             i, j, k;
+       unsigned int    i, j, k;
 
        num_pages = 0;
        if (vs->vs_indirect) {
@@ -3116,19 +3748,19 @@ ps_vstruct_allocated_size(
                }
        }
 
-       return ptoa(num_pages);
+       return ptoa_32(num_pages);
 }
 
-size_t
+unsigned int
 ps_vstruct_allocated_pages(
        vstruct_t               vs,
        default_pager_page_t    *pages,
-       size_t                  pages_size)
+       unsigned int            pages_size)
 {
-       int             num_pages;
+       unsigned int    num_pages;
        struct vs_map   *vsmap;
-       vm_offset_t     offset;
-       int             i, j, k;
+       dp_offset_t     offset;
+       unsigned int    i, j, k;
 
        num_pages = 0;
        offset = 0;
@@ -3191,16 +3823,12 @@ kern_return_t
 ps_vstruct_transfer_from_segment(
        vstruct_t        vs,
        paging_segment_t segment,
-#ifndef ubc_sync_working
-       vm_object_t     transfer_object)
-#else
        upl_t            upl)
-#endif
 {
        struct vs_map   *vsmap;
-       struct vs_map   old_vsmap;
-       struct vs_map   new_vsmap;
-       int             i, j, k;
+//     struct vs_map   old_vsmap;
+//     struct vs_map   new_vsmap;
+       unsigned int    i, j;
 
        VS_LOCK(vs);    /* block all work on this vstruct */
                        /* can't allow the normal multiple write */
@@ -3223,8 +3851,8 @@ ps_vstruct_transfer_from_segment(
        VS_UNLOCK(vs);
 vs_changed:
        if (vs->vs_indirect) {
-               int     vsmap_size;
-               int     clmap_off;
+               unsigned int    vsmap_size;
+               int             clmap_off;
                /* loop on indirect maps */
                for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
                        vsmap = vs->vs_imap[i];
@@ -3246,11 +3874,7 @@ vs_changed:
                                        (vm_page_size * (j << vs->vs_clshift))
                                        + clmap_off, 
                                        vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
-                                       transfer_object)
-#else
                                        upl)
-#endif
                                                != KERN_SUCCESS) {
                                   VS_LOCK(vs);
                                   vs->vs_xfer_pending = FALSE;
@@ -3263,6 +3887,14 @@ vs_changed:
                                vs->vs_xfer_pending = FALSE;
                                VS_UNLOCK(vs);
                                vs_finish_write(vs);
+
+                               if (backing_store_abort_compaction || backing_store_stop_compaction) {
+                                       backing_store_abort_compaction = FALSE;
+                                       dprintf(("ps_vstruct_transfer_from_segment - ABORTED\n"));
+                                       return KERN_FAILURE;
+                               }
+                               vnode_pager_throttle();
+
                                VS_LOCK(vs);
                                vs->vs_xfer_pending = TRUE;
                                vs_wait_for_sync_writers(vs);
@@ -3292,11 +3924,7 @@ vs_changed:
                        if(vs_cluster_transfer(vs, 
                                vm_page_size * (j << vs->vs_clshift), 
                                vm_page_size << vs->vs_clshift,
-#ifndef ubc_sync_working
-                               transfer_object) != KERN_SUCCESS) {
-#else
                                upl) != KERN_SUCCESS) {
-#endif
                           VS_LOCK(vs);
                           vs->vs_xfer_pending = FALSE;
                           VS_UNLOCK(vs);
@@ -3310,10 +3938,10 @@ vs_changed:
                        vs_finish_write(vs);
                        VS_LOCK(vs);
                        vs->vs_xfer_pending = TRUE;
-                       VS_UNLOCK(vs);
                        vs_wait_for_sync_writers(vs);
                        vs_start_write(vs);
                        vs_wait_for_readers(vs);
+                       VS_UNLOCK(vs);
                        if (vs->vs_indirect) {
                                goto vs_changed;
                        }
@@ -3332,12 +3960,12 @@ vs_changed:
 vs_map_t
 vs_get_map_entry(
        vstruct_t       vs, 
-       vm_offset_t     offset)
+       dp_offset_t     offset)
 {
        struct vs_map   *vsmap;
-       vm_offset_t     cluster;
+       dp_offset_t     cluster;
 
-       cluster = atop(offset) >> vs->vs_clshift;
+       cluster = atop_32(offset) >> vs->vs_clshift;
        if (vs->vs_indirect) {
                long    ind_block = cluster/CLMAP_ENTRIES;
 
@@ -3354,33 +3982,26 @@ vs_get_map_entry(
 kern_return_t
 vs_cluster_transfer(
        vstruct_t       vs,
-       vm_offset_t     offset,
-       vm_size_t       cnt,
-#ifndef ubc_sync_working
-       vm_object_t     transfer_object)
-#else
+       dp_offset_t     offset,
+       dp_size_t       cnt,
        upl_t           upl)
-#endif
 {
-       vm_offset_t             actual_offset;
+       dp_offset_t             actual_offset;
        paging_segment_t        ps;
        struct clmap            clmap;
        kern_return_t           error = KERN_SUCCESS;
-       int                     size, size_wanted, i;
-       unsigned int            residual;
-       int                     unavail_size;
-       default_pager_thread_t  *dpt;
-       boolean_t               dealloc;
-       struct  vs_map          *vsmap_ptr;
+       unsigned int            size, size_wanted;
+       int                     i;
+       unsigned int            residual = 0;
+       unsigned int            unavail_size;
+//     default_pager_thread_t  *dpt;
+//     boolean_t               dealloc;
+       struct  vs_map          *vsmap_ptr = NULL;
        struct  vs_map          read_vsmap;
        struct  vs_map          original_read_vsmap;
        struct  vs_map          write_vsmap;
-       upl_t                   sync_upl;
-#ifndef ubc_sync_working
-       upl_t                   upl;
-#endif
-
-       vm_offset_t     ioaddr;
+//     upl_t                           sync_upl;
+//     vm_offset_t                     ioaddr;
 
        /* vs_cluster_transfer reads in the pages of a cluster and
         * then writes these pages back to new backing store.  The
@@ -3411,7 +4032,7 @@ vs_cluster_transfer(
                vsmap_ptr = vs_get_map_entry(vs, offset);
                actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
 
-               if (actual_offset == (vm_offset_t) -1) {
+               if (actual_offset == (dp_offset_t) -1) {
 
                        /*
                         * Nothing left to write in this cluster at least
@@ -3464,6 +4085,7 @@ vs_cluster_transfer(
 
                if (size == 0) {
                        ASSERT(unavail_size);
+                       ps_clunmap(vs, offset, unavail_size);
                        cnt -= unavail_size;
                        offset += unavail_size;
                        if((offset & ((vm_page_size << vs->vs_clshift) - 1)) 
@@ -3482,34 +4104,20 @@ vs_cluster_transfer(
                        original_read_vsmap = *vsmap_ptr;
 
                if(ps->ps_segtype == PS_PARTITION) {
+                       panic("swap partition not supported\n");
+                       /*NOTREACHED*/
+                       error = KERN_FAILURE;
+                       residual = size;
 /*
-                       NEED TO BE WITH SYNC & NO COMMIT
+                       NEED TO ISSUE WITH SYNC & NO COMMIT
                        error = ps_read_device(ps, actual_offset, &buffer,
                                       size, &residual, flags);
 */
                } else {
-#ifndef ubc_sync_working
-                       int page_list_count = 0;
-
-                       error = vm_object_upl_request(transfer_object, 
-(vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
-                                       size, &upl, NULL, &page_list_count,
-                                       UPL_NO_SYNC | UPL_CLEAN_IN_PLACE 
-                                                   | UPL_SET_INTERNAL);
-                       if (error == KERN_SUCCESS) {
-                               error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, 
-                                                       size, &residual, 0);
-                               if(error)
-                                       upl_commit(upl, NULL);
-                                       upl_deallocate(upl);
-                       }
-                                       
-#else
-                       /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/
-                       error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset, 
+                       /* NEED TO ISSUE WITH SYNC & NO COMMIT */
+                       error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset, 
                                        size, &residual, 
-                                       (UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD));
-#endif
+                                       (UPL_IOSYNC | UPL_NOCOMMIT | (dp_encryption ? UPL_PAGING_ENCRYPTED : 0)));
                }
 
                read_vsmap = *vsmap_ptr;
@@ -3523,7 +4131,6 @@ vs_cluster_transfer(
                 * 
                 */
                if ((error == KERN_SUCCESS) && (residual == 0)) {
-                       int page_list_count = 0;
 
                        /*
                         * Got everything we asked for, supply the data to
@@ -3538,20 +4145,8 @@ vs_cluster_transfer(
                        /* the vm_map_copy_page_discard call              */
                        *vsmap_ptr = write_vsmap;
 
-#ifndef ubc_sync_working
-                       error = vm_object_upl_request(transfer_object, 
-                                       (vm_object_offset_t)
-                                       (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
-                                        size, &upl, NULL, &page_list_count,
-                                        UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_INTERNAL);
-                       if(vs_cluster_write(vs, upl, offset, 
-                                       size, TRUE, 0) != KERN_SUCCESS) {
-                               upl_commit(upl, NULL);
-                               upl_deallocate(upl);
-#else
                        if(vs_cluster_write(vs, upl, offset, 
                                        size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
-#endif
                                error = KERN_FAILURE;
                                if(!(VSM_ISCLR(*vsmap_ptr))) {
                                        /* unmap the new backing store object */
@@ -3569,6 +4164,7 @@ vs_cluster_transfer(
                                        */
                                        write_vsmap = *vsmap_ptr;
                                        *vsmap_ptr = read_vsmap;
+                                       ps_clunmap(vs, offset, size);
                                } else {
                                        /* discard the old backing object */
                                        write_vsmap = *vsmap_ptr;
@@ -3630,14 +4226,16 @@ vs_cluster_transfer(
 }
 
 kern_return_t
-default_pager_add_file(MACH_PORT_FACE backing_store,
-       int             *vp,
+default_pager_add_file(
+       MACH_PORT_FACE  backing_store,
+       vnode_ptr_t     vp,
        int             record_size,
-       long            size)
+       vm_size_t       size)
 {
        backing_store_t         bs;
        paging_segment_t        ps;
        int                     i;
+       unsigned int            j;
        int                     error;
 
        if ((bs = backing_store_lookup(backing_store))
@@ -3676,31 +4274,44 @@ default_pager_add_file(MACH_PORT_FACE backing_store,
        ps->ps_vnode = (struct vnode *)vp;
        ps->ps_offset = 0;
        ps->ps_record_shift = local_log2(vm_page_size / record_size);
-       ps->ps_recnum = size;
-       ps->ps_pgnum = size >> ps->ps_record_shift;
+       assert((dp_size_t) size == size);
+       ps->ps_recnum = (dp_size_t) size;
+       ps->ps_pgnum = ((dp_size_t) size) >> ps->ps_record_shift;
 
        ps->ps_pgcount = ps->ps_pgnum;
        ps->ps_clshift = local_log2(bs->bs_clsize);
        ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
+       ps->ps_special_clusters = 0;
        ps->ps_hint = 0;
 
        PS_LOCK_INIT(ps);
        ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
        if (!ps->ps_bmap) {
-               kfree((vm_offset_t)ps, sizeof *ps);
+               PS_LOCK_DESTROY(ps);
+               kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
        }
-       for (i = 0; i < ps->ps_ncls; i++) {
-               clrbit(ps->ps_bmap, i);
+       for (j = 0; j < ps->ps_ncls; j++) {
+               clrbit(ps->ps_bmap, j);
        }
 
-       ps->ps_going_away = FALSE;
+       if(paging_segment_count == 0) {
+               ps->ps_state = PS_EMERGENCY_SEGMENT;
+               if(use_emergency_swap_file_first) {
+                       ps->ps_state |= PS_CAN_USE;
+               }
+               emergency_segment_backing_store = backing_store;
+       } else {
+               ps->ps_state = PS_CAN_USE;
+       }
+       
        ps->ps_bs = bs;
 
        if ((error = ps_enter(ps)) != 0) {
-               kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
-               kfree((vm_offset_t)ps, sizeof *ps);
+               kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
+               PS_LOCK_DESTROY(ps);
+               kfree(ps, sizeof *ps);
                BS_UNLOCK(bs);
                return KERN_RESOURCE_SHORTAGE;
        }
@@ -3708,17 +4319,42 @@ default_pager_add_file(MACH_PORT_FACE backing_store,
        bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
        bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
        PSL_LOCK();
-       dp_pages_free += ps->ps_pgcount;
+       if(IS_PS_OK_TO_USE(ps)) {
+               dp_pages_free += ps->ps_pgcount;
+       } else {
+               dp_pages_reserve += ps->ps_pgcount;
+       }
        PSL_UNLOCK();
 
        BS_UNLOCK(bs);
 
        bs_more_space(ps->ps_clcount);
 
-       DEBUG(DEBUG_BS_INTERNAL,
-             ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
-              device, offset, size, record_size,
-              ps->ps_record_shift, ps->ps_pgnum));
+       /*
+        * If the paging segment being activated is not the emergency
+        * segment and we notice that the emergency segment is being
+        * used then we help recover it. If all goes well, the
+        * emergency segment will be back to its original state of
+        * online but not activated (till it's needed the next time).
+        */
+#if CONFIG_FREEZE
+       if (!memorystatus_freeze_enabled)
+#endif
+       {
+               ps = paging_segments[EMERGENCY_PSEG_INDEX];
+               if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) {
+                       if(default_pager_backing_store_delete(emergency_segment_backing_store)) {
+                               dprintf(("Failed to recover emergency paging segment\n"));
+                       } else {
+                               dprintf(("Recovered emergency paging segment\n"));
+                       }
+               }
+       }
+       
+       DP_DEBUG(DEBUG_BS_INTERNAL,
+                ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
+                 device, offset, (dp_size_t) size, record_size,
+                 ps->ps_record_shift, ps->ps_pgnum));
 
        return KERN_SUCCESS;
 }
@@ -3729,9 +4365,9 @@ kern_return_t
 ps_read_file(
        paging_segment_t        ps,
        upl_t                   upl,
-       vm_offset_t             upl_offset,
-       vm_offset_t             offset,
-       unsigned int            size,
+       upl_offset_t            upl_offset,
+       dp_offset_t             offset,
+       upl_size_t              size,
        unsigned int            *residualp,
        int                     flags)
 {
@@ -3739,14 +4375,17 @@ ps_read_file(
        int                     error = 0;
        int                     result;
 
+       assert(dp_encryption_inited);
 
-       clustered_reads[atop(size)]++;
+       clustered_reads[atop_32(size)]++;
 
        f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
        
-       /* for transfer case we need to pass uploffset and flags */
-       error = vnode_pagein(ps->ps_vnode, 
-                                  upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
+       /*
+        * for transfer case we need to pass uploffset and flags
+        */
+       assert((upl_size_t) size == size);
+       error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL);
 
        /* The vnode_pagein semantic is somewhat at odds with the existing   */
        /* device_read semantic.  Partial reads are not experienced at this  */
@@ -3768,21 +4407,29 @@ kern_return_t
 ps_write_file(
        paging_segment_t        ps,
        upl_t                   upl,
-       vm_offset_t             upl_offset,
-       vm_offset_t             offset,
+       upl_offset_t            upl_offset,
+       dp_offset_t             offset,
        unsigned int            size,
        int                     flags)
 {
        vm_object_offset_t      f_offset;
        kern_return_t           result;
 
-       int             error = 0;
+       assert(dp_encryption_inited);
 
-       clustered_writes[atop(size)]++;
+       clustered_writes[atop_32(size)]++;
        f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
 
-       if (vnode_pageout(ps->ps_vnode,
-                               upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
+       if (flags & UPL_PAGING_ENCRYPTED) {
+               /*
+                * ENCRYPTED SWAP:
+                * encrypt all the pages that we're going
+                * to pageout.
+                */
+               upl_encrypt(upl, upl_offset, size);
+       }
+       assert((upl_size_t) size == size);
+       if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL))
                result = KERN_FAILURE;
        else
                result = KERN_SUCCESS;
@@ -3790,28 +4437,109 @@ ps_write_file(
        return result;
 }
 
+static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data)
+{
+#pragma unused(data)
+}
+
+static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data)
+{
+#pragma unused(data)
+}
+
+static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length)
+{
+#pragma unused(data, map, shift, length)
+}
+
 kern_return_t
-default_pager_triggers(MACH_PORT_FACE default_pager,
+default_pager_triggers( __unused MACH_PORT_FACE default_pager,
        int             hi_wat,
        int             lo_wat,
        int             flags,
        MACH_PORT_FACE  trigger_port)
 {
-       MACH_PORT_FACE release;
+       MACH_PORT_FACE release = IPC_PORT_NULL;
        kern_return_t kr;
+       clock_sec_t now;
+       clock_nsec_t nanoseconds_dummy;
+       static clock_sec_t error_notify = 0;
 
        PSL_LOCK();
-       if (flags == HI_WAT_ALERT) {
+       if (flags == SWAP_ENCRYPT_ON) {
+               /* ENCRYPTED SWAP: turn encryption on */
+               release = trigger_port;
+               if (!dp_encryption_inited) {
+                       dp_encryption_inited = TRUE;
+                       dp_encryption = TRUE;
+                       kr = KERN_SUCCESS;
+               } else {
+                       kr = KERN_FAILURE;
+               }
+       } else if (flags == SWAP_ENCRYPT_OFF) {
+               /* ENCRYPTED SWAP: turn encryption off */
+               release = trigger_port;
+               if (!dp_encryption_inited) {
+                       dp_encryption_inited = TRUE;
+                       dp_encryption = FALSE;
+                       kr = KERN_SUCCESS;
+               } else {
+                       kr = KERN_FAILURE;
+               }
+       } else if (flags == HI_WAT_ALERT) {
                release = min_pages_trigger_port;
-               min_pages_trigger_port = trigger_port;
-               minimum_pages_remaining = hi_wat/vm_page_size;
-               bs_low = FALSE;
-               kr = KERN_SUCCESS;
+#if CONFIG_FREEZE
+               /* High and low water signals aren't applicable when freeze is */
+               /* enabled, so release the trigger ports here and return       */
+               /* KERN_FAILURE.                                               */
+               if (memorystatus_freeze_enabled) {
+                       if (IP_VALID( trigger_port )){
+                               ipc_port_release_send( trigger_port );
+                       }
+                       min_pages_trigger_port = IPC_PORT_NULL;
+                       kr = KERN_FAILURE;
+               }
+               else
+#endif
+               {
+                       min_pages_trigger_port = trigger_port;
+                       minimum_pages_remaining = hi_wat/vm_page_size;
+                       bs_low = FALSE;
+                       kr = KERN_SUCCESS;
+               }
        } else if (flags ==  LO_WAT_ALERT) {
                release = max_pages_trigger_port;
-               max_pages_trigger_port = trigger_port;
-               maximum_pages_free = lo_wat/vm_page_size;
+#if CONFIG_FREEZE
+               if (memorystatus_freeze_enabled) {
+                       if (IP_VALID( trigger_port )){
+                               ipc_port_release_send( trigger_port );
+                       }
+                       max_pages_trigger_port = IPC_PORT_NULL;
+                       kr = KERN_FAILURE;
+               }
+               else
+#endif
+               {
+                       max_pages_trigger_port = trigger_port;
+                       maximum_pages_free = lo_wat/vm_page_size;
+                       kr = KERN_SUCCESS;
+               }
+       } else if (flags == USE_EMERGENCY_SWAP_FILE_FIRST) {
+               use_emergency_swap_file_first = TRUE;
+               release = trigger_port;
                kr = KERN_SUCCESS;
+       } else if (flags == SWAP_FILE_CREATION_ERROR) {
+               release = trigger_port;
+               kr = KERN_SUCCESS;
+               if( paging_segment_count == 1) {
+                       use_emergency_swap_file_first = TRUE;
+               }
+               no_paging_space_action();
+               clock_get_system_nanotime(&now, &nanoseconds_dummy);
+               if (now > error_notify + 5) {
+                       dprintf(("Swap File Error.\n"));
+                       error_notify = now;
+               }
        } else {
                release = trigger_port;
                kr =  KERN_INVALID_ARGUMENT;
@@ -3823,3 +4551,83 @@ default_pager_triggers(MACH_PORT_FACE default_pager,
        
        return kr;
 }
+
+/*
+ * Monitor the amount of available backing store vs. the amount of
+ * required backing store, notify a listener (if present) when 
+ * backing store may safely be removed.
+ *
+ * We attempt to avoid the situation where backing store is 
+ * discarded en masse, as this can lead to thrashing as the
+ * backing store is compacted.
+ */
+
+#define PF_INTERVAL    3       /* time between free level checks */
+#define PF_LATENCY     10      /* number of intervals before release */
+
+static int dp_pages_free_low_count = 0;
+thread_call_t default_pager_backing_store_monitor_callout;
+
+void
+default_pager_backing_store_monitor(__unused thread_call_param_t p1,
+                                                                       __unused thread_call_param_t p2)
+{
+//     unsigned long long      average;
+       ipc_port_t              trigger;
+       uint64_t                deadline;
+
+       /*
+        * We determine whether it will be safe to release some
+        * backing store by watching the free page level.  If
+        * it remains below the maximum_pages_free threshold for
+        * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
+        * then we deem it safe.
+        *
+        * Note that this establishes a maximum rate at which backing
+        * store will be released, as each notification (currently)
+        * only results in a single backing store object being
+        * released.
+        */
+       if (dp_pages_free > maximum_pages_free) {
+               dp_pages_free_low_count++;
+       } else {
+               dp_pages_free_low_count = 0;
+       }
+
+       /* decide whether to send notification */
+       trigger = IP_NULL;
+       if (max_pages_trigger_port &&
+           (backing_store_release_trigger_disable == 0) &&
+           (dp_pages_free_low_count > PF_LATENCY)) {
+               trigger = max_pages_trigger_port;
+               max_pages_trigger_port = NULL;
+       }
+
+       /* send notification */
+       if (trigger != IP_NULL) {
+               VSL_LOCK();
+               if(backing_store_release_trigger_disable != 0) {
+                       assert_wait((event_t) 
+                                   &backing_store_release_trigger_disable, 
+                                   THREAD_UNINT);
+                       VSL_UNLOCK();
+                       thread_block(THREAD_CONTINUE_NULL);
+               } else {
+                       VSL_UNLOCK();
+               }
+               dprintf(("default_pager_backing_store_monitor - send LO_WAT_ALERT\n"));
+
+               default_pager_space_alert(trigger, LO_WAT_ALERT);
+               ipc_port_release_send(trigger);
+               dp_pages_free_low_count = 0;
+       }
+
+       clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
+       thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline);
+}
+
+#if CONFIG_FREEZE
+unsigned int default_pager_swap_pages_free() {
+       return dp_pages_free;
+}
+#endif