]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/vm/bsd_vm.c
xnu-3247.1.106.tar.gz
[apple/xnu.git] / osfmk / vm / bsd_vm.c
index e4cad6a94a2bde78b4e59cabb4f6f4d645942c79..f7e485bde671ff30e51db3dbddf9cc8b1e9746e4 100644 (file)
@@ -1,31 +1,29 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
  *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * This file contains Original Code and/or Modifications of Original Code 
- * as defined in and that are subject to the Apple Public Source License 
- * Version 2.0 (the 'License'). You may not use this file except in 
- * compliance with the License.  The rights granted to you under the 
- * License may not be used to create, or enable the creation or 
- * redistribution of, unlawful or unlicensed copies of an Apple operating 
- * system, or to circumvent, violate, or enable the circumvention or 
- * violation of, any terms of an Apple operating system software license 
- * agreement.
- *
- * Please obtain a copy of the License at 
- * http://www.opensource.apple.com/apsl/ and read it before using this 
- * file.
- *
- * The Original Code and all software distributed under the License are 
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
- * Please see the License for the specific language governing rights and 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
  * limitations under the License.
- *
- * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 
 #include <sys/errno.h>
@@ -41,6 +39,7 @@
 #include <mach/upl.h>
 #include <mach/thread_act.h>
 
+#include <kern/assert.h>
 #include <kern/host.h>
 #include <kern/thread.h>
 
@@ -55,6 +54,8 @@
 #include <vm/memory_object.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_protos.h>
+#include <vm/vm_purgeable_internal.h>
+
 
 /* BSD VM COMPONENT INTERFACES */
 int
@@ -91,26 +92,6 @@ mach_get_vm_end(vm_map_t map)
        return( vm_map_last_entry(map)->vme_end);
 }
 
-/*
- * Legacy routines to get the start and end for a vm_map_t.  They
- * return them in the vm_offset_t format.  So, they should only be
- * called on maps that are the same size as the kernel map for
- * accurate results.
- */
-vm_offset_t
-get_vm_start(
-       vm_map_t map)
-{
-       return(CAST_DOWN(vm_offset_t, vm_map_first_entry(map)->vme_start));
-}
-
-vm_offset_t
-get_vm_end(
-       vm_map_t map)
-{
-       return(CAST_DOWN(vm_offset_t, vm_map_last_entry(map)->vme_end));
-}
-
 /* 
  * BSD VNODE PAGER 
  */
@@ -125,19 +106,23 @@ const struct memory_object_pager_ops vnode_pager_ops = {
        vnode_pager_data_initialize,
        vnode_pager_data_unlock,
        vnode_pager_synchronize,
-       vnode_pager_unmap,
+       vnode_pager_map,
+       vnode_pager_last_unmap,
+       NULL, /* data_reclaim */
        "vnode pager"
 };
 
 typedef struct vnode_pager {
+       struct ipc_object_header        pager_header;   /* fake ip_kotype()             */
        memory_object_pager_ops_t pager_ops;    /* == &vnode_pager_ops       */
-       unsigned int            pager_ikot;     /* JMM: fake ip_kotype()     */
        unsigned int            ref_count;      /* reference count           */
        memory_object_control_t control_handle; /* mem object control handle */
        struct vnode            *vnode_handle;  /* vnode handle              */
 } *vnode_pager_t;
 
 
+#define pager_ikot pager_header.io_bits
+
 ipc_port_t
 trigger_name_to_port(                  /* forward */
        mach_port_t);
@@ -145,7 +130,9 @@ trigger_name_to_port(                       /* forward */
 kern_return_t
 vnode_pager_cluster_read(              /* forward */
        vnode_pager_t, 
-       vm_object_offset_t, 
+       vm_object_offset_t,
+       vm_object_offset_t,
+       uint32_t,
        vm_size_t);
 
 void
@@ -190,14 +177,20 @@ int pagerdebug=0;
 #define PAGER_DEBUG(LEVEL, A)
 #endif
 
+extern int proc_resetpcontrol(int);
+
+#if DEVELOPMENT || DEBUG
+extern unsigned long vm_cs_validated_resets;
+#endif
+
 /*
- *     Routine:        macx_triggers
+ *     Routine:        mach_macx_triggers
  *     Function:
  *             Syscall interface to set the call backs for low and
  *             high water marks.
  */
 int
-macx_triggers(
+mach_macx_triggers(
        struct macx_triggers_args *args)
 {
        int     hi_water = args->hi_water;
@@ -215,16 +208,16 @@ macx_triggers(
                return EINVAL;
        }
 
-       if ((flags & SWAP_ENCRYPT_ON) &&
-           (flags & SWAP_ENCRYPT_OFF)) {
+       if (((flags & SWAP_ENCRYPT_ON) && (flags & SWAP_ENCRYPT_OFF)) || 
+           ((flags & SWAP_COMPACT_ENABLE) && (flags & SWAP_COMPACT_DISABLE))) {
                /* can't have it both ways */
                return EINVAL;
        }
 
-       if (default_pager_init_flag == 0) {
+       if (default_pager_init_flag == 0) {
                start_def_pager(NULL);
                default_pager_init_flag = 1;
-       }
+       }
 
        if (flags & SWAP_ENCRYPT_ON) {
                /* ENCRYPTED SWAP: tell default_pager to encrypt */
@@ -240,6 +233,33 @@ macx_triggers(
                                       IP_NULL);
        }
 
+       if (flags & USE_EMERGENCY_SWAP_FILE_FIRST) {
+               /*
+                * Time to switch to the emergency segment.
+                */
+               return default_pager_triggers(default_pager,
+                                       0, 0, 
+                                       USE_EMERGENCY_SWAP_FILE_FIRST,
+                                       IP_NULL);
+       }
+
+       if (flags & SWAP_FILE_CREATION_ERROR) {
+               /* 
+                * For some reason, the dynamic pager failed to create a swap file.
+                */
+               trigger_port = trigger_name_to_port(trigger_name);
+               if(trigger_port == NULL) {
+                       return EINVAL;
+               }
+               /* trigger_port is locked and active */
+               ipc_port_make_send_locked(trigger_port); 
+               ip_unlock(trigger_port);
+               default_pager_triggers(default_pager,
+                                       0, 0, 
+                                       SWAP_FILE_CREATION_ERROR,
+                                       trigger_port);
+       }
+
        if (flags & HI_WAT_ALERT) {
                trigger_port = trigger_name_to_port(trigger_name);
                if(trigger_port == NULL) {
@@ -247,7 +267,7 @@ macx_triggers(
                }
                /* trigger_port is locked and active */
                ipc_port_make_send_locked(trigger_port); 
-               /* now unlocked */
+               ip_unlock(trigger_port);
                default_pager_triggers(default_pager, 
                                       hi_water, low_water,
                                       HI_WAT_ALERT, trigger_port);
@@ -260,12 +280,24 @@ macx_triggers(
                }
                /* trigger_port is locked and active */
                ipc_port_make_send_locked(trigger_port);
-               /* and now its unlocked */
+               ip_unlock(trigger_port);
                default_pager_triggers(default_pager, 
                                       hi_water, low_water,
                                       LO_WAT_ALERT, trigger_port);
        }
 
+
+       if (flags & PROC_RESUME) {
+
+               /*
+                * For this call, hi_water is used to pass in the pid of the process we want to resume
+                * or unthrottle.  This is of course restricted to the superuser (checked inside of 
+                * proc_resetpcontrol).
+                */
+
+               return proc_resetpcontrol(hi_water);
+       }
+
        /*
         * Set thread scheduling priority and policy for the current thread
         * it is assumed for the time being that the thread setting the alert
@@ -273,7 +305,7 @@ macx_triggers(
         *
         * XXX This does not belong in the kernel XXX
         */
-       {
+       if (flags & HI_WAT_ALERT) {
                thread_precedence_policy_data_t         pre;
                thread_extended_policy_data_t           ext;
 
@@ -289,9 +321,13 @@ macx_triggers(
                                  THREAD_PRECEDENCE_POLICY,
                                  (thread_policy_t)&pre,
                                  THREAD_PRECEDENCE_POLICY_COUNT);
+
+               current_thread()->options |= TH_OPT_VMPRIV;
        }
  
-       current_thread()->options |= TH_OPT_VMPRIV;
+       if (flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE)) {
+               return macx_backing_store_compaction(flags & (SWAP_COMPACT_DISABLE | SWAP_COMPACT_ENABLE));
+       }
 
        return 0;
 }
@@ -310,7 +346,7 @@ trigger_name_to_port(
                return (NULL);
 
        space  = current_space();
-       if(ipc_port_translate_receive(space, (mach_port_name_t)trigger_name
+       if(ipc_port_translate_receive(space, CAST_MACH_PORT_TO_NAME(trigger_name)
                                                &trigger_port) != KERN_SUCCESS)
                return (NULL);
        return trigger_port;
@@ -327,7 +363,8 @@ memory_object_control_uiomove(
        void            *       uio,
        int                     start_offset,
        int                     io_requested,
-       int                     mark_dirty)
+       int                     mark_dirty,
+       int                     take_reference)
 {
        vm_object_t             object;
        vm_page_t               dst_page;
@@ -336,9 +373,9 @@ memory_object_control_uiomove(
        int                     cur_run;
        int                     cur_needed;
        int                     i;
+       int                     orig_offset;
        vm_page_t               page_run[MAX_RUN];
 
-
        object = memory_object_control_to_vm_object(control);
        if (object == VM_OBJECT_NULL) {
                return (0);
@@ -357,6 +394,7 @@ memory_object_control_uiomove(
                vm_object_unlock(object);
                return 0;
        }
+       orig_offset = start_offset;
            
        while (io_requested && retval == 0) {
 
@@ -369,22 +407,26 @@ memory_object_control_uiomove(
 
                        if ((dst_page = vm_page_lookup(object, offset)) == VM_PAGE_NULL)
                                break;
-                       /*
-                        * Sync up on getting the busy bit
-                        */
-                       if ((dst_page->busy || dst_page->cleaning)) {
-                               /*
+
+
+                       if (dst_page->busy || dst_page->cleaning) {
+                               /*
                                 * someone else is playing with the page... if we've
                                 * already collected pages into this run, go ahead
                                 * and process now, we can't block on this
                                 * page while holding other pages in the BUSY state
                                 * otherwise we will wait
                                 */
-                               if (cur_run)
-                                       break;
-                               PAGE_SLEEP(object, dst_page, THREAD_UNINT);
+                               if (cur_run)
+                                       break;
+                               PAGE_SLEEP(object, dst_page, THREAD_UNINT);
                                continue;
                        }
+                       if (dst_page->laundry) {
+                               dst_page->pageout = FALSE;
+                               
+                               vm_pageout_steal_laundry(dst_page, FALSE);
+                       }
                        /*
                         * this routine is only called when copying
                         * to/from real files... no need to consider
@@ -392,8 +434,22 @@ memory_object_control_uiomove(
                         */
                        assert(!dst_page->encrypted);
 
-                       if (mark_dirty)
-                               dst_page->dirty = TRUE;
+                       if (mark_dirty) {
+                               SET_PAGE_DIRTY(dst_page, FALSE);
+                               if (dst_page->cs_validated && 
+                                   !dst_page->cs_tainted) {
+                                       /*
+                                        * CODE SIGNING:
+                                        * We're modifying a code-signed
+                                        * page: force revalidate
+                                        */
+                                       dst_page->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+                                        vm_cs_validated_resets++;
+#endif
+                                       pmap_disconnect(dst_page->phys_page);
+                               }
+                       }
                        dst_page->busy = TRUE;
 
                        page_run[cur_run++] = dst_page;
@@ -402,8 +458,9 @@ memory_object_control_uiomove(
                }
                if (cur_run == 0)
                        /*
-                        * we hit a 'hole' in the cache
-                        * we bail at this point
+                        * we hit a 'hole' in the cache or
+                        * a page we don't want to try to handle,
+                        * so bail at this point
                         * we'll unlock the object below
                         */
                        break;
@@ -416,7 +473,7 @@ memory_object_control_uiomove(
                        if ((xsize = PAGE_SIZE - start_offset) > io_requested)
                                xsize = io_requested;
 
-                       if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << 12) + start_offset), xsize, uio)) )
+                       if ( (retval = uiomove64((addr64_t)(((addr64_t)(dst_page->phys_page) << PAGE_SHIFT) + start_offset), xsize, uio)) )
                                break;
 
                        io_requested -= xsize;
@@ -424,11 +481,41 @@ memory_object_control_uiomove(
                }
                vm_object_lock(object);
 
+               /*
+                * if we have more than 1 page to work on
+                * in the current run, or the original request
+                * started at offset 0 of the page, or we're
+                * processing multiple batches, we will move
+                * the pages to the tail of the inactive queue
+                * to implement an LRU for read/write accesses
+                *
+                * the check for orig_offset == 0 is there to 
+                * mitigate the cost of small (< page_size) requests
+                * to the same page (this way we only move it once)
+                */
+               if (take_reference && (cur_run > 1 || orig_offset == 0)) {
+
+                       vm_page_lockspin_queues();
+
+                       for (i = 0; i < cur_run; i++)
+                               vm_page_lru(page_run[i]);
+
+                       vm_page_unlock_queues();
+               }
                for (i = 0; i < cur_run; i++) {
                        dst_page = page_run[i];
 
+                       /*
+                        * someone is explicitly referencing this page...
+                        * update clustered and speculative state
+                        * 
+                        */
+                       if (dst_page->clustered)
+                               VM_PAGE_CONSUME_CLUSTERED(dst_page);
+
                        PAGE_WAKEUP_DONE(dst_page);
                }
+               orig_offset = 0;
        }
        vm_object_unlock(object);
 
@@ -447,9 +534,14 @@ vnode_pager_bootstrap(void)
        size = (vm_size_t) sizeof(struct vnode_pager);
        vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
                                PAGE_SIZE, "vnode pager structures");
-#ifdef __i386__
+       zone_change(vnode_pager_zone, Z_CALLERACCT, FALSE);
+       zone_change(vnode_pager_zone, Z_NOENCRYPT, TRUE);
+
+
+#if CONFIG_CODE_DECRYPTION
        apple_protect_pager_bootstrap();
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
+       swapfile_pager_bootstrap();
        return;
 }
 
@@ -478,14 +570,14 @@ vnode_pager_init(memory_object_t mem_obj,
 #if !DEBUG
                 __unused
 #endif
-                vm_size_t pg_size)
+                memory_object_cluster_size_t pg_size)
 {
        vnode_pager_t   vnode_object;
        kern_return_t   kr;
        memory_object_attr_info_data_t  attributes;
 
 
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %x\n", mem_obj, control, pg_size));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_init: %p, %p, %lx\n", mem_obj, control, (unsigned long)pg_size));
 
        if (control == MEMORY_OBJECT_CONTROL_NULL)
                return KERN_INVALID_ARGUMENT;
@@ -520,7 +612,7 @@ kern_return_t
 vnode_pager_data_return(
         memory_object_t                mem_obj,
         memory_object_offset_t offset,
-        vm_size_t              data_cnt,
+        memory_object_cluster_size_t           data_cnt,
         memory_object_offset_t *resid_offset,
        int                     *io_error,
        __unused boolean_t              dirty,
@@ -540,7 +632,7 @@ kern_return_t
 vnode_pager_data_initialize(
        __unused memory_object_t                mem_obj,
        __unused memory_object_offset_t offset,
-       __unused vm_size_t              data_cnt)
+       __unused memory_object_cluster_size_t           data_cnt)
 {
        panic("vnode_pager_data_initialize");
        return KERN_FAILURE;
@@ -550,12 +642,62 @@ kern_return_t
 vnode_pager_data_unlock(
        __unused memory_object_t                mem_obj,
        __unused memory_object_offset_t offset,
-       __unused vm_size_t              size,
+       __unused memory_object_size_t           size,
        __unused vm_prot_t              desired_access)
 {
        return KERN_FAILURE;
 }
 
+kern_return_t
+vnode_pager_get_isinuse(
+       memory_object_t         mem_obj,
+       uint32_t                *isinuse)
+{
+       vnode_pager_t   vnode_object;
+
+       if (mem_obj->mo_pager_ops != &vnode_pager_ops) {
+               *isinuse = 1;
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       *isinuse = vnode_pager_isinuse(vnode_object->vnode_handle);
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+vnode_pager_get_throttle_io_limit(
+       memory_object_t         mem_obj,
+       uint32_t                *limit)
+{
+       vnode_pager_t   vnode_object;
+
+       if (mem_obj->mo_pager_ops != &vnode_pager_ops)
+               return KERN_INVALID_ARGUMENT;
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       (void)vnode_pager_return_throttle_io_limit(vnode_object->vnode_handle, limit);
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+vnode_pager_get_isSSD(
+       memory_object_t         mem_obj,
+       boolean_t               *isSSD)
+{
+       vnode_pager_t   vnode_object;
+
+       if (mem_obj->mo_pager_ops != &vnode_pager_ops)
+               return KERN_INVALID_ARGUMENT;
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       *isSSD = vnode_pager_isSSD(vnode_object->vnode_handle);
+       return KERN_SUCCESS;
+}
+
 kern_return_t
 vnode_pager_get_object_size(
        memory_object_t         mem_obj,
@@ -575,10 +717,13 @@ vnode_pager_get_object_size(
 }
 
 kern_return_t
-vnode_pager_get_object_pathname(
+vnode_pager_get_object_name(
        memory_object_t         mem_obj,
        char                    *pathname,
-       vm_size_t               *length_p)
+       vm_size_t               pathname_len,
+       char                    *filename,
+       vm_size_t               filename_len,
+       boolean_t               *truncated_path_p)
 {
        vnode_pager_t   vnode_object;
 
@@ -588,15 +733,19 @@ vnode_pager_get_object_pathname(
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
-       return vnode_pager_get_pathname(vnode_object->vnode_handle,
-                                       pathname,
-                                       length_p);
+       return vnode_pager_get_name(vnode_object->vnode_handle,
+                                   pathname,
+                                   pathname_len,
+                                   filename,
+                                   filename_len,
+                                   truncated_path_p);
 }
 
 kern_return_t
-vnode_pager_get_object_filename(
-       memory_object_t mem_obj,
-       char            **filename)
+vnode_pager_get_object_mtime(
+       memory_object_t         mem_obj,
+       struct timespec         *mtime,
+       struct timespec         *cs_mtime)
 {
        vnode_pager_t   vnode_object;
 
@@ -606,9 +755,47 @@ vnode_pager_get_object_filename(
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
-       return vnode_pager_get_filename(vnode_object->vnode_handle,
-                                       filename);
+       return vnode_pager_get_mtime(vnode_object->vnode_handle,
+                                    mtime,
+                                    cs_mtime);
+}
+
+kern_return_t
+vnode_pager_get_object_cs_blobs(
+       memory_object_t mem_obj,
+       void            **blobs)
+{
+       vnode_pager_t   vnode_object;
+
+       if (mem_obj == MEMORY_OBJECT_NULL ||
+           mem_obj->mo_pager_ops != &vnode_pager_ops) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       return vnode_pager_get_cs_blobs(vnode_object->vnode_handle,
+                                       blobs);
+}
+
+#if CHECK_CS_VALIDATION_BITMAP
+kern_return_t
+vnode_pager_cs_check_validation_bitmap( 
+       memory_object_t mem_obj, 
+       memory_object_offset_t  offset,
+        int            optype  )
+{
+       vnode_pager_t   vnode_object;
+
+       if (mem_obj == MEMORY_OBJECT_NULL ||
+           mem_obj->mo_pager_ops != &vnode_pager_ops) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+       return ubc_cs_check_validation_bitmap( vnode_object->vnode_handle, offset, optype );
 }
+#endif /* CHECK_CS_VALIDATION_BITMAP */
 
 /*
  *
@@ -617,21 +804,27 @@ kern_return_t
 vnode_pager_data_request(
        memory_object_t         mem_obj,
        memory_object_offset_t  offset,
-       vm_size_t               length,
-#if !DEBUG
-       __unused
-#endif
-vm_prot_t              protection_required)
+       __unused memory_object_cluster_size_t   length,
+       __unused vm_prot_t      desired_access,
+       memory_object_fault_info_t      fault_info)
 {
-       register vnode_pager_t  vnode_object;
-
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj, offset, length, protection_required));
+       vnode_pager_t           vnode_object;
+       memory_object_offset_t  base_offset;
+       vm_size_t               size;
+       uint32_t                io_streaming = 0;
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
-       PAGER_DEBUG(PAGER_PAGEIN, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj, offset, length, protection_required, vnode_object));
-               
-       return vnode_pager_cluster_read(vnode_object, offset, length);
+       size = MAX_UPL_TRANSFER_BYTES;
+       base_offset = offset;
+
+       if (memory_object_cluster_size(vnode_object->control_handle, &base_offset, &size, &io_streaming, fault_info) != KERN_SUCCESS)
+               size = PAGE_SIZE;
+
+       assert(offset >= base_offset &&
+              offset < base_offset + size);
+
+       return vnode_pager_cluster_read(vnode_object, base_offset, offset, io_streaming, size);
 }
 
 /*
@@ -658,7 +851,7 @@ vnode_pager_deallocate(
 {
        register vnode_pager_t  vnode_object;
 
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %x\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_deallocate: %p\n", mem_obj));
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
@@ -681,7 +874,7 @@ vnode_pager_terminate(
 #endif
        memory_object_t mem_obj)
 {
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %x\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_terminate: %p\n", mem_obj));
 
        return(KERN_SUCCESS);
 }
@@ -693,12 +886,12 @@ kern_return_t
 vnode_pager_synchronize(
        memory_object_t         mem_obj,
        memory_object_offset_t  offset,
-       vm_size_t               length,
+       memory_object_size_t            length,
        __unused vm_sync_t              sync_flags)
 {
        register vnode_pager_t  vnode_object;
 
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %x\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_synchronize: %p\n", mem_obj));
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
@@ -711,12 +904,36 @@ vnode_pager_synchronize(
  *
  */
 kern_return_t
-vnode_pager_unmap(
+vnode_pager_map(
+       memory_object_t         mem_obj,
+       vm_prot_t               prot)
+{
+       vnode_pager_t           vnode_object;
+       int                     ret;
+       kern_return_t           kr;
+
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       ret = ubc_map(vnode_object->vnode_handle, prot);
+
+       if (ret != 0) {
+               kr = KERN_FAILURE;
+       } else {
+               kr = KERN_SUCCESS;
+       }
+
+       return kr;
+}
+
+kern_return_t
+vnode_pager_last_unmap(
        memory_object_t         mem_obj)
 {
        register vnode_pager_t  vnode_object;
 
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %x\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
@@ -725,6 +942,7 @@ vnode_pager_unmap(
 }
 
 
+
 /*
  *
  */
@@ -737,9 +955,7 @@ vnode_pager_cluster_write(
        int                  *  io_error,
        int                     upl_flags)
 {
-        vm_size_t       size;
-       upl_t           upl = NULL;
-       int             request_flags;
+       vm_size_t       size;
        int             errno;
 
        if (upl_flags & UPL_MSYNC) {
@@ -750,20 +966,11 @@ vnode_pager_cluster_write(
                        upl_flags |= UPL_KEEPCACHED;
 
                while (cnt) {
-                       kern_return_t   kr;
-
-                       size = (cnt < (PAGE_SIZE * MAX_UPL_TRANSFER)) ? cnt : (PAGE_SIZE * MAX_UPL_TRANSFER); /* effective max */
-
-                       request_flags = UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | UPL_CLEAN_IN_PLACE |
-                                       UPL_SET_INTERNAL | UPL_SET_LITE;
-
-                       kr = memory_object_upl_request(vnode_object->control_handle, 
-                                                      offset, size, &upl, NULL, NULL, request_flags);
-                       if (kr != KERN_SUCCESS)
-                               panic("vnode_pager_cluster_write: upl request failed\n");
+                       size = (cnt < MAX_UPL_TRANSFER_BYTES) ? cnt : MAX_UPL_TRANSFER_BYTES; /* effective max */
 
+                       assert((upl_size_t) size == size);
                        vnode_pageout(vnode_object->vnode_handle, 
-                                     upl, (vm_offset_t)0, offset, size, upl_flags, &errno);
+                                     NULL, (upl_offset_t)0, offset, (upl_size_t)size, upl_flags, &errno);
 
                        if ( (upl_flags & UPL_KEEPCACHED) ) {
                                if ( (*io_error = errno) )
@@ -778,9 +985,6 @@ vnode_pager_cluster_write(
        } else {
                vm_object_offset_t      vnode_size;
                vm_object_offset_t      base_offset;
-               vm_object_t             object;
-               vm_page_t               target_page;
-               int                     ticket;
 
                /*
                 * this is the pageout path
@@ -794,11 +998,11 @@ vnode_pager_cluster_write(
                         * and then clip the size to insure we
                         * don't request past the end of the underlying file
                         */
-                       size = PAGE_SIZE * MAX_UPL_TRANSFER;
+                       size = MAX_UPL_TRANSFER_BYTES;
                        base_offset = offset & ~((signed)(size - 1));
 
                        if ((base_offset + size) > vnode_size)
-                               size = round_page_32(((vm_size_t)(vnode_size - base_offset)));
+                               size = round_page(((vm_size_t)(vnode_size - base_offset)));
                } else {
                        /*
                         * we've been requested to page out a page beyond the current
@@ -810,36 +1014,10 @@ vnode_pager_cluster_write(
                        base_offset = offset;
                        size = PAGE_SIZE;
                }
-               object = memory_object_control_to_vm_object(vnode_object->control_handle);
-
-               if (object == VM_OBJECT_NULL)
-                       panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
-
-               request_flags = UPL_NOBLOCK | UPL_FOR_PAGEOUT | UPL_CLEAN_IN_PLACE |
-                               UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
-                               UPL_SET_INTERNAL | UPL_SET_LITE;
-
-               vm_object_lock(object);
-
-               if ((target_page = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
-                       /*
-                        * only pick up pages whose ticket number matches
-                        * the ticket number of the page orginally targeted
-                        * for pageout
-                        */
-                       ticket = target_page->page_ticket;
-
-                       request_flags |= ((ticket << UPL_PAGE_TICKET_SHIFT) & UPL_PAGE_TICKET_MASK);
-               }
-               vm_object_unlock(object);
-
-               vm_object_upl_request(object, base_offset, size,
-                                     &upl, NULL, NULL, request_flags);
-               if (upl == NULL)
-                       panic("vnode_pager_cluster_write: upl request failed\n");
-
+               assert((upl_size_t) size == size);
                vnode_pageout(vnode_object->vnode_handle,
-                              upl, (vm_offset_t)0, upl->offset, upl->size, UPL_VNODE_PAGER, NULL);
+                             NULL, (upl_offset_t)(offset - base_offset), base_offset, (upl_size_t) size,
+                             (upl_flags & UPL_IOSYNC) | UPL_VNODE_PAGER, NULL);
        }
 }
 
@@ -850,20 +1028,27 @@ vnode_pager_cluster_write(
 kern_return_t
 vnode_pager_cluster_read(
        vnode_pager_t           vnode_object,
+       vm_object_offset_t      base_offset,
        vm_object_offset_t      offset,
+       uint32_t                io_streaming,
        vm_size_t               cnt)
 {
        int             local_error = 0;
        int             kret;
+       int             flags = 0;
 
        assert(! (cnt & PAGE_MASK));
 
+       if (io_streaming)
+               flags |= UPL_IOSTREAMING;
+
+       assert((upl_size_t) cnt == cnt);
        kret = vnode_pagein(vnode_object->vnode_handle,
                            (upl_t) NULL,
-                           (vm_offset_t) NULL,
-                           offset,
-                           cnt,
-                           0,
+                           (upl_offset_t) (offset - base_offset),
+                           base_offset,
+                           (upl_size_t) cnt,
+                           flags,
                            &local_error);
 /*
        if(kret == PAGER_ABSENT) {
@@ -881,8 +1066,9 @@ vnode_pager_cluster_read(
                            UPL_CLEAN_IN_PLACE |
                            UPL_SET_INTERNAL);
                count = 0;
+               assert((upl_size_t) cnt == cnt);
                kr = memory_object_upl_request(vnode_object->control_handle,
-                                              offset, cnt,
+                                              base_offset, (upl_size_t) cnt,
                                               &upl, NULL, &count, uplflags);
                if (kr == KERN_SUCCESS) {
                        upl_abort(upl, 0);
@@ -933,7 +1119,7 @@ vnode_object_create(
         * The vm_map call takes both named entry ports and raw memory
         * objects in the same parameter.  We need to make sure that
         * vm_map does not see this object as a named entry port.  So,
-        * we reserve the second word in the object for a fake ip_kotype
+        * we reserve the first word in the object for a fake ip_kotype
         * setting - that will tell vm_map to use it as a memory object.
         */
        vnode_object->pager_ops = &vnode_pager_ops;
@@ -964,14 +1150,14 @@ vnode_pager_lookup(
 
 #include <sys/bsdtask_info.h>
 
-static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uint32_t * vnodeaddr, uint32_t * vid);
+static int fill_vnodeinfoforaddr( vm_map_entry_t entry, uintptr_t * vnodeaddr, uint32_t * vid);
 
 
 int
-fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uint32_t  *vnodeaddr, uint32_t  *vid)
+fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
 {
 
-       vm_map_t map = task->map;
+       vm_map_t map;
        vm_map_offset_t address = (vm_map_offset_t )arg;
        vm_map_entry_t          tmp_entry;
        vm_map_entry_t          entry;
@@ -979,16 +1165,23 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
        vm_region_extended_info_data_t extended;
        vm_region_top_info_data_t top;
 
-
-       if (map == VM_MAP_NULL) 
-               return(0);
-
+           task_lock(task);
+           map = task->map;
+           if (map == VM_MAP_NULL) 
+           {
+                       task_unlock(task);
+                       return(0);
+           }
+           vm_map_reference(map); 
+           task_unlock(task);
+           
            vm_map_lock_read(map);
 
            start = address;
            if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
                if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
                        vm_map_unlock_read(map);
+                       vm_map_deallocate(map); 
                        return(0);
                }
            } else {
@@ -997,13 +1190,13 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
 
            start = entry->vme_start;
 
-           pinfo->pri_offset = entry->offset;
+           pinfo->pri_offset = VME_OFFSET(entry);
            pinfo->pri_protection = entry->protection;
            pinfo->pri_max_protection = entry->max_protection;
            pinfo->pri_inheritance = entry->inheritance;
            pinfo->pri_behavior = entry->behavior;
            pinfo->pri_user_wired_count = entry->user_wired_count;
-           pinfo->pri_user_tag = entry->alias;
+           pinfo->pri_user_tag = VME_ALIAS(entry);
 
            if (entry->is_sub_map) {
                pinfo->pri_flags |= PROC_REGION_SUBMAP;
@@ -1014,7 +1207,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
 
 
            extended.protection = entry->protection;
-           extended.user_tag = entry->alias;
+           extended.user_tag = VME_ALIAS(entry);
            extended.pages_resident = 0;
            extended.pages_swapped_out = 0;
            extended.pages_shared_now_private = 0;
@@ -1022,7 +1215,7 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
            extended.external_pager = 0;
            extended.shadow_depth = 0;
 
-           vm_map_region_walk(map, start, entry, entry->offset, entry->vme_end - start, &extended);
+           vm_map_region_walk(map, start, entry, VME_OFFSET(entry), entry->vme_end - start, &extended);
 
            if (extended.external_pager && extended.ref_count == 2 && extended.share_mode == SM_SHARED)
                    extended.share_mode = SM_PRIVATE;
@@ -1049,22 +1242,104 @@ fill_procregioninfo(task_t task, uint64_t arg, struct proc_regioninfo_internal *
            pinfo->pri_depth = 0;
        
            if ((vnodeaddr != 0) && (entry->is_sub_map == 0)) {
-               *vnodeaddr = (uint32_t)0;
+               *vnodeaddr = (uintptr_t)0;
 
                if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid) ==0) {
                        vm_map_unlock_read(map);
+                       vm_map_deallocate(map); 
                        return(1);
                }
            }
 
            vm_map_unlock_read(map);
+           vm_map_deallocate(map); 
            return(1);
 }
 
+int
+fill_procregioninfo_onlymappedvnodes(task_t task, uint64_t arg, struct proc_regioninfo_internal *pinfo, uintptr_t *vnodeaddr, uint32_t  *vid)
+{
+
+       vm_map_t map;
+       vm_map_offset_t address = (vm_map_offset_t )arg;
+       vm_map_entry_t          tmp_entry;
+       vm_map_entry_t          entry;
+
+       task_lock(task);
+       map = task->map;
+       if (map == VM_MAP_NULL) 
+       {
+               task_unlock(task);
+               return(0);
+       }
+       vm_map_reference(map); 
+       task_unlock(task);
+       
+       vm_map_lock_read(map);
+
+       if (!vm_map_lookup_entry(map, address, &tmp_entry)) {
+               if ((entry = tmp_entry->vme_next) == vm_map_to_entry(map)) {
+                       vm_map_unlock_read(map);
+                       vm_map_deallocate(map); 
+                       return(0);
+               }
+       } else {
+               entry = tmp_entry;
+       }
+
+       while (entry != vm_map_to_entry(map)) {
+               *vnodeaddr = 0;
+               *vid = 0;
+
+               if (entry->is_sub_map == 0) {
+                       if (fill_vnodeinfoforaddr(entry, vnodeaddr, vid)) {
+
+                               pinfo->pri_offset = VME_OFFSET(entry);
+                               pinfo->pri_protection = entry->protection;
+                               pinfo->pri_max_protection = entry->max_protection;
+                               pinfo->pri_inheritance = entry->inheritance;
+                               pinfo->pri_behavior = entry->behavior;
+                               pinfo->pri_user_wired_count = entry->user_wired_count;
+                               pinfo->pri_user_tag = VME_ALIAS(entry);
+                               
+                               if (entry->is_shared)
+                                       pinfo->pri_flags |= PROC_REGION_SHARED;
+                               
+                               pinfo->pri_pages_resident = 0;
+                               pinfo->pri_pages_shared_now_private = 0;
+                               pinfo->pri_pages_swapped_out = 0;
+                               pinfo->pri_pages_dirtied = 0;
+                               pinfo->pri_ref_count = 0;
+                               pinfo->pri_shadow_depth = 0;
+                               pinfo->pri_share_mode = 0;
+                               
+                               pinfo->pri_private_pages_resident = 0;
+                               pinfo->pri_shared_pages_resident = 0;
+                               pinfo->pri_obj_id = 0;
+                               
+                               pinfo->pri_address = (uint64_t)entry->vme_start;
+                               pinfo->pri_size = (uint64_t)(entry->vme_end - entry->vme_start);
+                               pinfo->pri_depth = 0;
+       
+                               vm_map_unlock_read(map);
+                               vm_map_deallocate(map); 
+                               return(1);
+                       }
+               }
+
+               /* Keep searching for a vnode-backed mapping */
+               entry = entry->vme_next;
+       }
+
+       vm_map_unlock_read(map);
+       vm_map_deallocate(map); 
+       return(0);
+}
+
 static int
 fill_vnodeinfoforaddr(
        vm_map_entry_t                  entry,
-       uint32_t * vnodeaddr,
+       uintptr_t * vnodeaddr,
        uint32_t * vid)
 {
        vm_object_t     top_object, object;
@@ -1081,7 +1356,7 @@ fill_vnodeinfoforaddr(
                 * The last object in the shadow chain has the
                 * relevant pager information.
                 */
-               top_object = entry->object.vm_object;
+               top_object = VME_OBJECT(entry);
                if (top_object == VM_OBJECT_NULL) {
                        object = VM_OBJECT_NULL;
                        shadow_depth = 0;
@@ -1129,14 +1404,14 @@ fill_vnodeinfoforaddr(
 kern_return_t 
 vnode_pager_get_object_vnode (
        memory_object_t         mem_obj,
-       uint32_t * vnodeaddr,
+       uintptr_t * vnodeaddr,
        uint32_t * vid)
 {
        vnode_pager_t   vnode_object;
 
        vnode_object = vnode_pager_lookup(mem_obj);
        if (vnode_object->vnode_handle)  {
-               *vnodeaddr = (uint32_t)vnode_object->vnode_handle;
+               *vnodeaddr = (uintptr_t)vnode_object->vnode_handle;
                *vid = (uint32_t)vnode_vid((void *)vnode_object->vnode_handle); 
 
                return(KERN_SUCCESS);
@@ -1145,3 +1420,74 @@ vnode_pager_get_object_vnode (
        return(KERN_FAILURE);
 }
 
+#if CONFIG_IOSCHED
+kern_return_t
+vnode_pager_get_object_devvp(
+       memory_object_t         mem_obj,
+       uintptr_t               *devvp)
+{
+       struct vnode    *vp;
+       uint32_t        vid;
+
+       if(vnode_pager_get_object_vnode(mem_obj, (uintptr_t *)&vp, (uint32_t *)&vid) != KERN_SUCCESS)
+               return (KERN_FAILURE);
+       *devvp = (uintptr_t)vnode_mountdevvp(vp);
+       if (*devvp)
+               return (KERN_SUCCESS);  
+       return (KERN_FAILURE);
+}
+#endif
+
+/*
+ * Find the underlying vnode object for the given vm_map_entry.  If found, return with the
+ * object locked, otherwise return NULL with nothing locked.
+ */
+
+vm_object_t
+find_vnode_object(
+       vm_map_entry_t  entry
+)
+{
+       vm_object_t                     top_object, object;
+       memory_object_t                 memory_object;
+       memory_object_pager_ops_t       pager_ops;
+
+       if (!entry->is_sub_map) {
+
+               /*
+                * The last object in the shadow chain has the
+                * relevant pager information.
+                */
+
+               top_object = VME_OBJECT(entry);
+
+               if (top_object) {
+                       vm_object_lock(top_object);
+
+                       for (object = top_object; object->shadow != VM_OBJECT_NULL; object = object->shadow) {
+                               vm_object_lock(object->shadow);
+                               vm_object_unlock(object);
+                       }
+
+                       if (object && !object->internal && object->pager_ready && !object->terminating &&
+                           object->alive) {
+                               memory_object = object->pager;
+                               pager_ops = memory_object->mo_pager_ops;
+
+                               /*
+                                * If this object points to the vnode_pager_ops, then we found what we're
+                                * looking for.  Otherwise, this vm_map_entry doesn't have an underlying
+                                * vnode and so we fall through to the bottom and return NULL.
+                                */
+
+                               if (pager_ops == &vnode_pager_ops) 
+                                       return object;          /* we return with the object locked */
+                       }
+
+                       vm_object_unlock(object);
+               }
+
+       }
+
+       return(VM_OBJECT_NULL);
+}