2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved. 
   4  * @APPLE_LICENSE_HEADER_START@ 
   6  * This file contains Original Code and/or Modifications of Original Code 
   7  * as defined in and that are subject to the Apple Public Source License 
   8  * Version 2.0 (the 'License'). You may not use this file except in 
   9  * compliance with the License. Please obtain a copy of the License at 
  10  * http://www.opensource.apple.com/apsl/ and read it before using this 
  13  * The Original Code and all software distributed under the License are 
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 
  18  * Please see the License for the specific language governing rights and 
  19  * limitations under the License. 
  21  * @APPLE_LICENSE_HEADER_END@ 
  24 #include <sys/errno.h> 
  26 #include <mach/mach_types.h> 
  27 #include <mach/mach_traps.h> 
  28 #include <mach/host_priv.h> 
  29 #include <mach/kern_return.h> 
  30 #include <mach/memory_object_control.h> 
  31 #include <mach/memory_object_types.h> 
  32 #include <mach/port.h> 
  33 #include <mach/policy.h> 
  35 #include <mach/thread_act.h> 
  37 #include <kern/host.h> 
  38 #include <kern/thread.h> 
  40 #include <ipc/ipc_port.h> 
  41 #include <ipc/ipc_space.h> 
  43 #include <default_pager/default_pager_types.h> 
  44 #include <default_pager/default_pager_object_server.h> 
  46 #include <vm/vm_map.h> 
  47 #include <vm/vm_kern.h> 
  48 #include <vm/vm_pageout.h> 
  49 #include <vm/memory_object.h> 
  50 #include <vm/vm_pageout.h> 
  51 #include <vm/vm_protos.h> 
  53 /* BSD VM COMPONENT INTERFACES */ 
  73         return(map
->hdr
.nentries
); 
  77 mach_get_vm_start(vm_map_t map
) 
  79         return( vm_map_first_entry(map
)->vme_start
); 
  83 mach_get_vm_end(vm_map_t map
) 
  85         return( vm_map_last_entry(map
)->vme_end
); 
  89  * Legacy routines to get the start and end for a vm_map_t.  They 
  90  * return them in the vm_offset_t format.  So, they should only be 
  91  * called on maps that are the same size as the kernel map for 
  98         return(CAST_DOWN(vm_offset_t
, vm_map_first_entry(map
)->vme_start
)); 
 105         return(CAST_DOWN(vm_offset_t
, vm_map_last_entry(map
)->vme_end
)); 
 112 /* until component support available */ 
 113 int     vnode_pager_workaround
; 
 115 typedef struct vnode_pager 
{ 
 116         int                     *pager
;         /* pager workaround pointer  */ 
 117         unsigned int            pager_ikot
;     /* JMM: fake ip_kotype()     */ 
 118         unsigned int            ref_count
;      /* reference count           */ 
 119         memory_object_control_t control_handle
; /* mem object control handle */ 
 120         struct vnode            
*vnode_handle
;  /* vnode handle              */ 
 125 trigger_name_to_port(                   /* forward */ 
 129 vnode_pager_cluster_read(               /* forward */ 
 135 vnode_pager_cluster_write(              /* forward */ 
 139         vm_object_offset_t 
*, 
 145 vnode_object_create(                    /* forward */ 
 149 vnode_pager_lookup(                     /* forward */ 
 152 zone_t  vnode_pager_zone
; 
 155 #define VNODE_PAGER_NULL        ((vnode_pager_t) 0) 
 157 /* TODO: Should be set dynamically by vnode_pager_init() */ 
 158 #define CLUSTER_SHIFT   1 
 160 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */ 
 161 #define MAX_VNODE               10000 
 167 #define PAGER_ALL               0xffffffff 
 168 #define PAGER_INIT              0x00000001 
 169 #define PAGER_PAGEIN    0x00000002 
 171 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}} 
 173 #define PAGER_DEBUG(LEVEL, A) 
 177  *      Routine:        macx_triggers 
 179  *              Syscall interface to set the call backs for low and 
 184         struct macx_triggers_args 
*args
) 
 186         int     hi_water 
= args
->hi_water
; 
 187         int     low_water 
= args
->low_water
; 
 188         int     flags 
= args
->flags
; 
 189         mach_port_t     trigger_name 
= args
->alert_port
; 
 191         memory_object_default_t default_pager
; 
 192         ipc_port_t              trigger_port
; 
 194         default_pager 
= MEMORY_OBJECT_DEFAULT_NULL
; 
 195         kr 
= host_default_memory_manager(host_priv_self(),  
 197         if(kr 
!= KERN_SUCCESS
) { 
 201         if ((flags 
& SWAP_ENCRYPT_ON
) && 
 202             (flags 
& SWAP_ENCRYPT_OFF
)) { 
 203                 /* can't have it both ways */ 
 207         if (flags 
& SWAP_ENCRYPT_ON
) { 
 208                 /* ENCRYPTED SWAP: tell default_pager to encrypt */ 
 209                 default_pager_triggers(default_pager
, 
 213         } else if (flags 
& SWAP_ENCRYPT_OFF
) { 
 214                 /* ENCRYPTED SWAP: tell default_pager not to encrypt */ 
 215                 default_pager_triggers(default_pager
, 
 221         if (flags 
& HI_WAT_ALERT
) { 
 222                 trigger_port 
= trigger_name_to_port(trigger_name
); 
 223                 if(trigger_port 
== NULL
) { 
 226                 /* trigger_port is locked and active */ 
 227                 ipc_port_make_send_locked(trigger_port
);  
 229                 default_pager_triggers(default_pager
,  
 231                                        HI_WAT_ALERT
, trigger_port
); 
 234         if (flags 
& LO_WAT_ALERT
) { 
 235                 trigger_port 
= trigger_name_to_port(trigger_name
); 
 236                 if(trigger_port 
== NULL
) { 
 239                 /* trigger_port is locked and active */ 
 240                 ipc_port_make_send_locked(trigger_port
); 
 241                 /* and now its unlocked */ 
 242                 default_pager_triggers(default_pager
,  
 244                                        LO_WAT_ALERT
, trigger_port
); 
 248          * Set thread scheduling priority and policy for the current thread 
 249          * it is assumed for the time being that the thread setting the alert 
 250          * is the same one which will be servicing it. 
 252          * XXX This does not belong in the kernel XXX 
 255                 thread_precedence_policy_data_t         pre
; 
 256                 thread_extended_policy_data_t           ext
; 
 258                 ext
.timeshare 
= FALSE
; 
 259                 pre
.importance 
= INT32_MAX
; 
 261                 thread_policy_set(current_thread(), 
 262                                   THREAD_EXTENDED_POLICY
, 
 263                                   (thread_policy_t
)&ext
, 
 264                                   THREAD_EXTENDED_POLICY_COUNT
); 
 266                 thread_policy_set(current_thread(), 
 267                                   THREAD_PRECEDENCE_POLICY
, 
 268                                   (thread_policy_t
)&pre
, 
 269                                   THREAD_PRECEDENCE_POLICY_COUNT
); 
 272         current_thread()->options 
|= TH_OPT_VMPRIV
; 
 281 trigger_name_to_port( 
 282         mach_port_t     trigger_name
) 
 284         ipc_port_t      trigger_port
; 
 287         if (trigger_name 
== 0) 
 290         space  
= current_space(); 
 291         if(ipc_port_translate_receive(space
, (mach_port_name_t
)trigger_name
,  
 292                                                 &trigger_port
) != KERN_SUCCESS
) 
 298 extern int      uiomove64(addr64_t
, int, void *); 
 302 memory_object_control_uiomove( 
 303         memory_object_control_t control
, 
 304         memory_object_offset_t  offset
, 
 317         vm_page_t               page_run
[MAX_RUN
]; 
 320         object 
= memory_object_control_to_vm_object(control
); 
 321         if (object 
== VM_OBJECT_NULL
) { 
 324         assert(!object
->internal
); 
 326         vm_object_lock(object
); 
 328         if (mark_dirty 
&& object
->copy 
!= VM_OBJECT_NULL
) { 
 330                  * We can't modify the pages without honoring 
 331                  * copy-on-write obligations first, so fall off 
 332                  * this optimized path and fall back to the regular 
 335                 vm_object_unlock(object
); 
 339         while (io_requested 
&& retval 
== 0) { 
 341                 cur_needed 
= (start_offset 
+ io_requested 
+ (PAGE_SIZE 
- 1)) / PAGE_SIZE
; 
 343                 if (cur_needed 
> MAX_RUN
) 
 344                         cur_needed 
= MAX_RUN
; 
 346                 for (cur_run 
= 0; cur_run 
< cur_needed
; ) { 
 348                         if ((dst_page 
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
) 
 351                          * Sync up on getting the busy bit 
 353                         if ((dst_page
->busy 
|| dst_page
->cleaning
)) { 
 355                                  * someone else is playing with the page... if we've 
 356                                  * already collected pages into this run, go ahead 
 357                                  * and process now, we can't block on this 
 358                                  * page while holding other pages in the BUSY state 
 359                                  * otherwise we will wait 
 363                                 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
); 
 367                          * this routine is only called when copying 
 368                          * to/from real files... no need to consider 
 369                          * encrypted swap pages 
 371                         assert(!dst_page
->encrypted
); 
 374                                 dst_page
->dirty 
= TRUE
; 
 375                         dst_page
->busy 
= TRUE
; 
 377                         page_run
[cur_run
++] = dst_page
; 
 379                         offset 
+= PAGE_SIZE_64
; 
 383                          * we hit a 'hole' in the cache 
 384                          * we bail at this point 
 385                          * we'll unlock the object below 
 388                 vm_object_unlock(object
); 
 390                 for (i 
= 0; i 
< cur_run
; i
++) { 
 392                         dst_page 
= page_run
[i
]; 
 394                         if ((xsize 
= PAGE_SIZE 
- start_offset
) > io_requested
) 
 395                                 xsize 
= io_requested
; 
 397                         if ( (retval 
= uiomove64((addr64_t
)(((addr64_t
)(dst_page
->phys_page
) << 12) + start_offset
), xsize
, uio
)) ) 
 400                         io_requested 
-= xsize
; 
 403                 vm_object_lock(object
); 
 405                 for (i 
= 0; i 
< cur_run
; i
++) { 
 406                         dst_page 
= page_run
[i
]; 
 408                         PAGE_WAKEUP_DONE(dst_page
); 
 411         vm_object_unlock(object
); 
 421 vnode_pager_bootstrap(void) 
 423         register vm_size_t      size
; 
 425         size 
= (vm_size_t
) sizeof(struct vnode_pager
); 
 426         vnode_pager_zone 
= zinit(size
, (vm_size_t
) MAX_VNODE
*size
, 
 427                                 PAGE_SIZE
, "vnode pager structures"); 
 437         __unused memory_object_t        pager
) 
 439         vnode_pager_t   vnode_object
; 
 441         vnode_object 
= vnode_object_create(vp
); 
 442         if (vnode_object 
== VNODE_PAGER_NULL
) 
 443                 panic("vnode_pager_setup: vnode_object_create() failed"); 
 444         return((memory_object_t
)vnode_object
); 
 451 vnode_pager_init(memory_object_t mem_obj
,  
 452                 memory_object_control_t control
,  
 458         vnode_pager_t   vnode_object
; 
 460         memory_object_attr_info_data_t  attributes
; 
 463         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_init: %p, %p, %x\n", mem_obj
, control
, pg_size
)); 
 465         if (control 
== MEMORY_OBJECT_CONTROL_NULL
) 
 466                 return KERN_INVALID_ARGUMENT
; 
 468         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 470         memory_object_control_reference(control
); 
 472         vnode_object
->control_handle 
= control
; 
 474         attributes
.copy_strategy 
= MEMORY_OBJECT_COPY_DELAY
; 
 475         /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/ 
 476         attributes
.cluster_size 
= (1 << (PAGE_SHIFT
)); 
 477         attributes
.may_cache_object 
= TRUE
; 
 478         attributes
.temporary 
= TRUE
; 
 480         kr 
= memory_object_change_attributes( 
 482                                         MEMORY_OBJECT_ATTRIBUTE_INFO
, 
 483                                         (memory_object_info_t
) &attributes
, 
 484                                         MEMORY_OBJECT_ATTR_INFO_COUNT
); 
 485         if (kr 
!= KERN_SUCCESS
) 
 486                 panic("vnode_pager_init: memory_object_change_attributes() failed"); 
 488         return(KERN_SUCCESS
); 
 495 vnode_pager_data_return( 
 496         memory_object_t         mem_obj
, 
 497         memory_object_offset_t  offset
, 
 499         memory_object_offset_t  
*resid_offset
, 
 501         __unused boolean_t              dirty
, 
 502         __unused boolean_t              kernel_copy
, 
 505         register vnode_pager_t  vnode_object
; 
 507         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 509         vnode_pager_cluster_write(vnode_object
, offset
, data_cnt
, resid_offset
, io_error
, upl_flags
); 
 515 vnode_pager_data_initialize( 
 516         __unused memory_object_t                mem_obj
, 
 517         __unused memory_object_offset_t offset
, 
 518         __unused vm_size_t              data_cnt
) 
 520         panic("vnode_pager_data_initialize"); 
 525 vnode_pager_data_unlock( 
 526         __unused memory_object_t                mem_obj
, 
 527         __unused memory_object_offset_t offset
, 
 528         __unused vm_size_t              size
, 
 529         __unused vm_prot_t              desired_access
) 
 535 vnode_pager_get_object_size( 
 536         memory_object_t         mem_obj
, 
 537         memory_object_offset_t  
*length
) 
 539         vnode_pager_t   vnode_object
; 
 541         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 543         *length 
= vnode_pager_get_filesize(vnode_object
->vnode_handle
); 
 551 vnode_pager_data_request( 
 552         memory_object_t         mem_obj
, 
 553         memory_object_offset_t  offset
, 
 558 vm_prot_t               protection_required
) 
 560         register vnode_pager_t  vnode_object
; 
 562         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj
, offset
, length
, protection_required
)); 
 564         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 566         PAGER_DEBUG(PAGER_PAGEIN
, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj
, offset
, length
, protection_required
, vnode_object
)); 
 568         return vnode_pager_cluster_read(vnode_object
, offset
, length
); 
 575 vnode_pager_reference( 
 576         memory_object_t         mem_obj
) 
 578         register vnode_pager_t  vnode_object
; 
 579         unsigned int            new_ref_count
; 
 581         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 582         new_ref_count 
= hw_atomic_add(&vnode_object
->ref_count
, 1); 
 583         assert(new_ref_count 
> 1); 
 590 vnode_pager_deallocate( 
 591         memory_object_t         mem_obj
) 
 593         register vnode_pager_t  vnode_object
; 
 595         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_deallocate: %x\n", mem_obj
)); 
 597         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 599         if (hw_atomic_sub(&vnode_object
->ref_count
, 1) == 0) { 
 600                 if (vnode_object
->vnode_handle 
!= NULL
) { 
 601                         vnode_pager_vrele(vnode_object
->vnode_handle
); 
 603                 zfree(vnode_pager_zone
, vnode_object
); 
 612 vnode_pager_terminate( 
 616         memory_object_t mem_obj
) 
 618         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_terminate: %x\n", mem_obj
)); 
 620         return(KERN_SUCCESS
); 
 627 vnode_pager_synchronize( 
 628         memory_object_t         mem_obj
, 
 629         memory_object_offset_t  offset
, 
 631         __unused vm_sync_t              sync_flags
) 
 633         register vnode_pager_t  vnode_object
; 
 635         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_synchronize: %x\n", mem_obj
)); 
 637         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 639         memory_object_synchronize_completed(vnode_object
->control_handle
, offset
, length
); 
 641         return (KERN_SUCCESS
); 
 649         memory_object_t         mem_obj
) 
 651         register vnode_pager_t  vnode_object
; 
 653         PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_unmap: %x\n", mem_obj
)); 
 655         vnode_object 
= vnode_pager_lookup(mem_obj
); 
 657         ubc_unmap(vnode_object
->vnode_handle
); 
 666 vnode_pager_cluster_write( 
 667         vnode_pager_t           vnode_object
, 
 668         vm_object_offset_t      offset
, 
 670         vm_object_offset_t   
*  resid_offset
, 
 679         if (upl_flags 
& UPL_MSYNC
) { 
 681                 upl_flags 
|= UPL_VNODE_PAGER
; 
 683                 if ( (upl_flags 
& UPL_IOSYNC
) && io_error
) 
 684                         upl_flags 
|= UPL_KEEPCACHED
; 
 689                         size 
= (cnt 
< (PAGE_SIZE 
* MAX_UPL_TRANSFER
)) ? cnt 
: (PAGE_SIZE 
* MAX_UPL_TRANSFER
); /* effective max */ 
 691                         request_flags 
= UPL_RET_ONLY_DIRTY 
| UPL_COPYOUT_FROM 
| UPL_CLEAN_IN_PLACE 
| 
 692                                         UPL_SET_INTERNAL 
| UPL_SET_LITE
; 
 694                         kr 
= memory_object_upl_request(vnode_object
->control_handle
,  
 695                                                        offset
, size
, &upl
, NULL
, NULL
, request_flags
); 
 696                         if (kr 
!= KERN_SUCCESS
) 
 697                                 panic("vnode_pager_cluster_write: upl request failed\n"); 
 699                         vnode_pageout(vnode_object
->vnode_handle
,  
 700                                       upl
, (vm_offset_t
)0, offset
, size
, upl_flags
, &errno
); 
 702                         if ( (upl_flags 
& UPL_KEEPCACHED
) ) { 
 703                                 if ( (*io_error 
= errno
) ) 
 710                         *resid_offset 
= offset
; 
 713                 vm_object_offset_t      vnode_size
; 
 714                 vm_object_offset_t      base_offset
; 
 716                 vm_page_t               target_page
; 
 720                  * this is the pageout path 
 722                 vnode_size 
= vnode_pager_get_filesize(vnode_object
->vnode_handle
); 
 724                 if (vnode_size 
> (offset 
+ PAGE_SIZE
)) { 
 726                          * preset the maximum size of the cluster 
 727                          * and put us on a nice cluster boundary... 
 728                          * and then clip the size to insure we 
 729                          * don't request past the end of the underlying file 
 731                         size 
= PAGE_SIZE 
* MAX_UPL_TRANSFER
; 
 732                         base_offset 
= offset 
& ~((signed)(size 
- 1)); 
 734                         if ((base_offset 
+ size
) > vnode_size
) 
 735                                 size 
= round_page_32(((vm_size_t
)(vnode_size 
- base_offset
))); 
 738                          * we've been requested to page out a page beyond the current 
 739                          * end of the 'file'... don't try to cluster in this case... 
 740                          * we still need to send this page through because it might 
 741                          * be marked precious and the underlying filesystem may need 
 742                          * to do something with it (besides page it out)... 
 744                         base_offset 
= offset
; 
 747                 object 
= memory_object_control_to_vm_object(vnode_object
->control_handle
); 
 749                 if (object 
== VM_OBJECT_NULL
) 
 750                         panic("vnode_pager_cluster_write: NULL vm_object in control handle\n"); 
 752                 request_flags 
= UPL_NOBLOCK 
| UPL_FOR_PAGEOUT 
| UPL_CLEAN_IN_PLACE 
| 
 753                                 UPL_RET_ONLY_DIRTY 
| UPL_COPYOUT_FROM 
| 
 754                                 UPL_SET_INTERNAL 
| UPL_SET_LITE
; 
 756                 vm_object_lock(object
); 
 758                 if ((target_page 
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) { 
 760                          * only pick up pages whose ticket number matches 
 761                          * the ticket number of the page orginally targeted 
 764                         ticket 
= target_page
->page_ticket
; 
 766                         request_flags 
|= ((ticket 
<< UPL_PAGE_TICKET_SHIFT
) & UPL_PAGE_TICKET_MASK
); 
 768                 vm_object_unlock(object
); 
 770                 vm_object_upl_request(object
, base_offset
, size
, 
 771                                       &upl
, NULL
, NULL
, request_flags
); 
 773                         panic("vnode_pager_cluster_write: upl request failed\n"); 
 775                 vnode_pageout(vnode_object
->vnode_handle
, 
 776                                upl
, (vm_offset_t
)0, upl
->offset
, upl
->size
, UPL_VNODE_PAGER
, NULL
); 
 785 vnode_pager_cluster_read( 
 786         vnode_pager_t           vnode_object
, 
 787         vm_object_offset_t      offset
, 
 793         assert(! (cnt 
& PAGE_MASK
)); 
 795         kret 
= vnode_pagein(vnode_object
->vnode_handle
, 
 803         if(kret == PAGER_ABSENT) { 
 804         Need to work out the defs here, 1 corresponds to PAGER_ABSENT  
 805         defined in bsd/vm/vm_pager.h  However, we should not be including  
 806         that file here it is a layering violation. 
 814                 uplflags 
= (UPL_NO_SYNC 
| 
 818                 kr 
= memory_object_upl_request(vnode_object
->control_handle
, 
 820                                                &upl
, NULL
, &count
, uplflags
); 
 821                 if (kr 
== KERN_SUCCESS
) { 
 826                          * We couldn't gather the page list, probably 
 827                          * because the memory object doesn't have a link 
 828                          * to a VM object anymore (forced unmount, for 
 829                          * example).  Just return an error to the vm_fault() 
 830                          * path and let it handle it. 
 846 vnode_pager_release_from_cache( 
 849         memory_object_free_from_cache( 
 850                         &realhost
, &vnode_pager_workaround
, cnt
); 
 860         register vnode_pager_t  vnode_object
; 
 862         vnode_object 
= (struct vnode_pager 
*) zalloc(vnode_pager_zone
); 
 863         if (vnode_object 
== VNODE_PAGER_NULL
) 
 864                 return(VNODE_PAGER_NULL
); 
 867          * The vm_map call takes both named entry ports and raw memory 
 868          * objects in the same parameter.  We need to make sure that 
 869          * vm_map does not see this object as a named entry port.  So, 
 870          * we reserve the second word in the object for a fake ip_kotype 
 871          * setting - that will tell vm_map to use it as a memory object. 
 873         vnode_object
->pager 
= &vnode_pager_workaround
; 
 874         vnode_object
->pager_ikot 
= IKOT_MEMORY_OBJECT
; 
 875         vnode_object
->ref_count 
= 1; 
 876         vnode_object
->control_handle 
= MEMORY_OBJECT_CONTROL_NULL
; 
 877         vnode_object
->vnode_handle 
= vp
; 
 879         return(vnode_object
); 
 887         memory_object_t  name
) 
 889         vnode_pager_t   vnode_object
; 
 891         vnode_object 
= (vnode_pager_t
)name
; 
 892         assert(vnode_object
->pager 
== &vnode_pager_workaround
); 
 893         return (vnode_object
);