2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
23 #include <sys/errno.h>
25 #include <mach/mach_types.h>
26 #include <mach/mach_traps.h>
27 #include <mach/host_priv.h>
28 #include <mach/kern_return.h>
29 #include <mach/memory_object_control.h>
30 #include <mach/memory_object_types.h>
31 #include <mach/port.h>
32 #include <mach/policy.h>
34 #include <mach/thread_act.h>
36 #include <kern/host.h>
37 #include <kern/thread.h>
39 #include <ipc/ipc_port.h>
40 #include <ipc/ipc_space.h>
42 #include <default_pager/default_pager_types.h>
43 #include <default_pager/default_pager_object_server.h>
45 #include <vm/vm_map.h>
46 #include <vm/vm_kern.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/memory_object.h>
49 #include <vm/vm_pageout.h>
50 #include <vm/vm_protos.h>
52 /* BSD VM COMPONENT INTERFACES */
72 return(map
->hdr
.nentries
);
76 mach_get_vm_start(vm_map_t map
)
78 return( vm_map_first_entry(map
)->vme_start
);
82 mach_get_vm_end(vm_map_t map
)
84 return( vm_map_last_entry(map
)->vme_end
);
88 * Legacy routines to get the start and end for a vm_map_t. They
89 * return them in the vm_offset_t format. So, they should only be
90 * called on maps that are the same size as the kernel map for
97 return(CAST_DOWN(vm_offset_t
, vm_map_first_entry(map
)->vme_start
));
104 return(CAST_DOWN(vm_offset_t
, vm_map_last_entry(map
)->vme_end
));
111 /* until component support available */
112 int vnode_pager_workaround
;
114 typedef struct vnode_pager
{
115 int *pager
; /* pager workaround pointer */
116 unsigned int pager_ikot
; /* JMM: fake ip_kotype() */
117 unsigned int ref_count
; /* reference count */
118 memory_object_control_t control_handle
; /* mem object control handle */
119 struct vnode
*vnode_handle
; /* vnode handle */
124 trigger_name_to_port( /* forward */
128 vnode_pager_cluster_read( /* forward */
134 vnode_pager_cluster_write( /* forward */
138 vm_object_offset_t
*,
144 vnode_object_create( /* forward */
148 vnode_pager_lookup( /* forward */
151 zone_t vnode_pager_zone
;
154 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
156 /* TODO: Should be set dynamically by vnode_pager_init() */
157 #define CLUSTER_SHIFT 1
159 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
160 #define MAX_VNODE 10000
166 #define PAGER_ALL 0xffffffff
167 #define PAGER_INIT 0x00000001
168 #define PAGER_PAGEIN 0x00000002
170 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
172 #define PAGER_DEBUG(LEVEL, A)
176 * Routine: macx_triggers
178 * Syscall interface to set the call backs for low and
183 struct macx_triggers_args
*args
)
185 int hi_water
= args
->hi_water
;
186 int low_water
= args
->low_water
;
187 int flags
= args
->flags
;
188 mach_port_t trigger_name
= args
->alert_port
;
190 memory_object_default_t default_pager
;
191 ipc_port_t trigger_port
;
193 default_pager
= MEMORY_OBJECT_DEFAULT_NULL
;
194 kr
= host_default_memory_manager(host_priv_self(),
196 if(kr
!= KERN_SUCCESS
) {
200 if ((flags
& SWAP_ENCRYPT_ON
) &&
201 (flags
& SWAP_ENCRYPT_OFF
)) {
202 /* can't have it both ways */
206 if (flags
& SWAP_ENCRYPT_ON
) {
207 /* ENCRYPTED SWAP: tell default_pager to encrypt */
208 default_pager_triggers(default_pager
,
212 } else if (flags
& SWAP_ENCRYPT_OFF
) {
213 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
214 default_pager_triggers(default_pager
,
220 if (flags
& HI_WAT_ALERT
) {
221 trigger_port
= trigger_name_to_port(trigger_name
);
222 if(trigger_port
== NULL
) {
225 /* trigger_port is locked and active */
226 ipc_port_make_send_locked(trigger_port
);
228 default_pager_triggers(default_pager
,
230 HI_WAT_ALERT
, trigger_port
);
233 if (flags
& LO_WAT_ALERT
) {
234 trigger_port
= trigger_name_to_port(trigger_name
);
235 if(trigger_port
== NULL
) {
238 /* trigger_port is locked and active */
239 ipc_port_make_send_locked(trigger_port
);
240 /* and now its unlocked */
241 default_pager_triggers(default_pager
,
243 LO_WAT_ALERT
, trigger_port
);
247 * Set thread scheduling priority and policy for the current thread
248 * it is assumed for the time being that the thread setting the alert
249 * is the same one which will be servicing it.
251 * XXX This does not belong in the kernel XXX
254 thread_precedence_policy_data_t pre
;
255 thread_extended_policy_data_t ext
;
257 ext
.timeshare
= FALSE
;
258 pre
.importance
= INT32_MAX
;
260 thread_policy_set(current_thread(),
261 THREAD_EXTENDED_POLICY
,
262 (thread_policy_t
)&ext
,
263 THREAD_EXTENDED_POLICY_COUNT
);
265 thread_policy_set(current_thread(),
266 THREAD_PRECEDENCE_POLICY
,
267 (thread_policy_t
)&pre
,
268 THREAD_PRECEDENCE_POLICY_COUNT
);
271 current_thread()->options
|= TH_OPT_VMPRIV
;
280 trigger_name_to_port(
281 mach_port_t trigger_name
)
283 ipc_port_t trigger_port
;
286 if (trigger_name
== 0)
289 space
= current_space();
290 if(ipc_port_translate_receive(space
, (mach_port_name_t
)trigger_name
,
291 &trigger_port
) != KERN_SUCCESS
)
297 extern int uiomove64(addr64_t
, int, void *);
301 memory_object_control_uiomove(
302 memory_object_control_t control
,
303 memory_object_offset_t offset
,
316 vm_page_t page_run
[MAX_RUN
];
319 object
= memory_object_control_to_vm_object(control
);
320 if (object
== VM_OBJECT_NULL
) {
323 assert(!object
->internal
);
325 vm_object_lock(object
);
327 if (mark_dirty
&& object
->copy
!= VM_OBJECT_NULL
) {
329 * We can't modify the pages without honoring
330 * copy-on-write obligations first, so fall off
331 * this optimized path and fall back to the regular
334 vm_object_unlock(object
);
338 while (io_requested
&& retval
== 0) {
340 cur_needed
= (start_offset
+ io_requested
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
342 if (cur_needed
> MAX_RUN
)
343 cur_needed
= MAX_RUN
;
345 for (cur_run
= 0; cur_run
< cur_needed
; ) {
347 if ((dst_page
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
350 * Sync up on getting the busy bit
352 if ((dst_page
->busy
|| dst_page
->cleaning
)) {
354 * someone else is playing with the page... if we've
355 * already collected pages into this run, go ahead
356 * and process now, we can't block on this
357 * page while holding other pages in the BUSY state
358 * otherwise we will wait
362 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
366 * this routine is only called when copying
367 * to/from real files... no need to consider
368 * encrypted swap pages
370 assert(!dst_page
->encrypted
);
373 dst_page
->dirty
= TRUE
;
374 dst_page
->busy
= TRUE
;
376 page_run
[cur_run
++] = dst_page
;
378 offset
+= PAGE_SIZE_64
;
382 * we hit a 'hole' in the cache
383 * we bail at this point
384 * we'll unlock the object below
387 vm_object_unlock(object
);
389 for (i
= 0; i
< cur_run
; i
++) {
391 dst_page
= page_run
[i
];
393 if ((xsize
= PAGE_SIZE
- start_offset
) > io_requested
)
394 xsize
= io_requested
;
396 if ( (retval
= uiomove64((addr64_t
)(((addr64_t
)(dst_page
->phys_page
) << 12) + start_offset
), xsize
, uio
)) )
399 io_requested
-= xsize
;
402 vm_object_lock(object
);
404 for (i
= 0; i
< cur_run
; i
++) {
405 dst_page
= page_run
[i
];
407 PAGE_WAKEUP_DONE(dst_page
);
410 vm_object_unlock(object
);
420 vnode_pager_bootstrap(void)
422 register vm_size_t size
;
424 size
= (vm_size_t
) sizeof(struct vnode_pager
);
425 vnode_pager_zone
= zinit(size
, (vm_size_t
) MAX_VNODE
*size
,
426 PAGE_SIZE
, "vnode pager structures");
436 __unused memory_object_t pager
)
438 vnode_pager_t vnode_object
;
440 vnode_object
= vnode_object_create(vp
);
441 if (vnode_object
== VNODE_PAGER_NULL
)
442 panic("vnode_pager_setup: vnode_object_create() failed");
443 return((memory_object_t
)vnode_object
);
450 vnode_pager_init(memory_object_t mem_obj
,
451 memory_object_control_t control
,
457 vnode_pager_t vnode_object
;
459 memory_object_attr_info_data_t attributes
;
462 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_init: %p, %p, %x\n", mem_obj
, control
, pg_size
));
464 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
465 return KERN_INVALID_ARGUMENT
;
467 vnode_object
= vnode_pager_lookup(mem_obj
);
469 memory_object_control_reference(control
);
471 vnode_object
->control_handle
= control
;
473 attributes
.copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
474 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
475 attributes
.cluster_size
= (1 << (PAGE_SHIFT
));
476 attributes
.may_cache_object
= TRUE
;
477 attributes
.temporary
= TRUE
;
479 kr
= memory_object_change_attributes(
481 MEMORY_OBJECT_ATTRIBUTE_INFO
,
482 (memory_object_info_t
) &attributes
,
483 MEMORY_OBJECT_ATTR_INFO_COUNT
);
484 if (kr
!= KERN_SUCCESS
)
485 panic("vnode_pager_init: memory_object_change_attributes() failed");
487 return(KERN_SUCCESS
);
494 vnode_pager_data_return(
495 memory_object_t mem_obj
,
496 memory_object_offset_t offset
,
498 memory_object_offset_t
*resid_offset
,
500 __unused boolean_t dirty
,
501 __unused boolean_t kernel_copy
,
504 register vnode_pager_t vnode_object
;
506 vnode_object
= vnode_pager_lookup(mem_obj
);
508 vnode_pager_cluster_write(vnode_object
, offset
, data_cnt
, resid_offset
, io_error
, upl_flags
);
514 vnode_pager_data_initialize(
515 __unused memory_object_t mem_obj
,
516 __unused memory_object_offset_t offset
,
517 __unused vm_size_t data_cnt
)
519 panic("vnode_pager_data_initialize");
524 vnode_pager_data_unlock(
525 __unused memory_object_t mem_obj
,
526 __unused memory_object_offset_t offset
,
527 __unused vm_size_t size
,
528 __unused vm_prot_t desired_access
)
534 vnode_pager_get_object_size(
535 memory_object_t mem_obj
,
536 memory_object_offset_t
*length
)
538 vnode_pager_t vnode_object
;
540 vnode_object
= vnode_pager_lookup(mem_obj
);
542 *length
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
550 vnode_pager_data_request(
551 memory_object_t mem_obj
,
552 memory_object_offset_t offset
,
557 vm_prot_t protection_required
)
559 register vnode_pager_t vnode_object
;
561 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj
, offset
, length
, protection_required
));
563 vnode_object
= vnode_pager_lookup(mem_obj
);
565 PAGER_DEBUG(PAGER_PAGEIN
, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj
, offset
, length
, protection_required
, vnode_object
));
567 return vnode_pager_cluster_read(vnode_object
, offset
, length
);
574 vnode_pager_reference(
575 memory_object_t mem_obj
)
577 register vnode_pager_t vnode_object
;
578 unsigned int new_ref_count
;
580 vnode_object
= vnode_pager_lookup(mem_obj
);
581 new_ref_count
= hw_atomic_add(&vnode_object
->ref_count
, 1);
582 assert(new_ref_count
> 1);
589 vnode_pager_deallocate(
590 memory_object_t mem_obj
)
592 register vnode_pager_t vnode_object
;
594 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_deallocate: %x\n", mem_obj
));
596 vnode_object
= vnode_pager_lookup(mem_obj
);
598 if (hw_atomic_sub(&vnode_object
->ref_count
, 1) == 0) {
599 if (vnode_object
->vnode_handle
!= NULL
) {
600 vnode_pager_vrele(vnode_object
->vnode_handle
);
602 zfree(vnode_pager_zone
, vnode_object
);
611 vnode_pager_terminate(
615 memory_object_t mem_obj
)
617 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_terminate: %x\n", mem_obj
));
619 return(KERN_SUCCESS
);
626 vnode_pager_synchronize(
627 memory_object_t mem_obj
,
628 memory_object_offset_t offset
,
630 __unused vm_sync_t sync_flags
)
632 register vnode_pager_t vnode_object
;
634 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_synchronize: %x\n", mem_obj
));
636 vnode_object
= vnode_pager_lookup(mem_obj
);
638 memory_object_synchronize_completed(vnode_object
->control_handle
, offset
, length
);
640 return (KERN_SUCCESS
);
648 memory_object_t mem_obj
)
650 register vnode_pager_t vnode_object
;
652 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_unmap: %x\n", mem_obj
));
654 vnode_object
= vnode_pager_lookup(mem_obj
);
656 ubc_unmap(vnode_object
->vnode_handle
);
665 vnode_pager_cluster_write(
666 vnode_pager_t vnode_object
,
667 vm_object_offset_t offset
,
669 vm_object_offset_t
* resid_offset
,
678 if (upl_flags
& UPL_MSYNC
) {
680 upl_flags
|= UPL_VNODE_PAGER
;
682 if ( (upl_flags
& UPL_IOSYNC
) && io_error
)
683 upl_flags
|= UPL_KEEPCACHED
;
688 size
= (cnt
< (PAGE_SIZE
* MAX_UPL_TRANSFER
)) ? cnt
: (PAGE_SIZE
* MAX_UPL_TRANSFER
); /* effective max */
690 request_flags
= UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
| UPL_CLEAN_IN_PLACE
|
691 UPL_SET_INTERNAL
| UPL_SET_LITE
;
693 kr
= memory_object_upl_request(vnode_object
->control_handle
,
694 offset
, size
, &upl
, NULL
, NULL
, request_flags
);
695 if (kr
!= KERN_SUCCESS
)
696 panic("vnode_pager_cluster_write: upl request failed\n");
698 vnode_pageout(vnode_object
->vnode_handle
,
699 upl
, (vm_offset_t
)0, offset
, size
, upl_flags
, &errno
);
701 if ( (upl_flags
& UPL_KEEPCACHED
) ) {
702 if ( (*io_error
= errno
) )
709 *resid_offset
= offset
;
712 vm_object_offset_t vnode_size
;
713 vm_object_offset_t base_offset
;
715 vm_page_t target_page
;
719 * this is the pageout path
721 vnode_size
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
723 if (vnode_size
> (offset
+ PAGE_SIZE
)) {
725 * preset the maximum size of the cluster
726 * and put us on a nice cluster boundary...
727 * and then clip the size to insure we
728 * don't request past the end of the underlying file
730 size
= PAGE_SIZE
* MAX_UPL_TRANSFER
;
731 base_offset
= offset
& ~((signed)(size
- 1));
733 if ((base_offset
+ size
) > vnode_size
)
734 size
= round_page_32(((vm_size_t
)(vnode_size
- base_offset
)));
737 * we've been requested to page out a page beyond the current
738 * end of the 'file'... don't try to cluster in this case...
739 * we still need to send this page through because it might
740 * be marked precious and the underlying filesystem may need
741 * to do something with it (besides page it out)...
743 base_offset
= offset
;
746 object
= memory_object_control_to_vm_object(vnode_object
->control_handle
);
748 if (object
== VM_OBJECT_NULL
)
749 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
751 request_flags
= UPL_NOBLOCK
| UPL_FOR_PAGEOUT
| UPL_CLEAN_IN_PLACE
|
752 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
753 UPL_SET_INTERNAL
| UPL_SET_LITE
;
755 vm_object_lock(object
);
757 if ((target_page
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
759 * only pick up pages whose ticket number matches
760 * the ticket number of the page orginally targeted
763 ticket
= target_page
->page_ticket
;
765 request_flags
|= ((ticket
<< UPL_PAGE_TICKET_SHIFT
) & UPL_PAGE_TICKET_MASK
);
767 vm_object_unlock(object
);
769 vm_object_upl_request(object
, base_offset
, size
,
770 &upl
, NULL
, NULL
, request_flags
);
772 panic("vnode_pager_cluster_write: upl request failed\n");
774 vnode_pageout(vnode_object
->vnode_handle
,
775 upl
, (vm_offset_t
)0, upl
->offset
, upl
->size
, UPL_VNODE_PAGER
, NULL
);
784 vnode_pager_cluster_read(
785 vnode_pager_t vnode_object
,
786 vm_object_offset_t offset
,
792 assert(! (cnt
& PAGE_MASK
));
794 kret
= vnode_pagein(vnode_object
->vnode_handle
,
802 if(kret == PAGER_ABSENT) {
803 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
804 defined in bsd/vm/vm_pager.h However, we should not be including
805 that file here it is a layering violation.
813 uplflags
= (UPL_NO_SYNC
|
817 kr
= memory_object_upl_request(vnode_object
->control_handle
,
819 &upl
, NULL
, &count
, uplflags
);
820 if (kr
== KERN_SUCCESS
) {
825 * We couldn't gather the page list, probably
826 * because the memory object doesn't have a link
827 * to a VM object anymore (forced unmount, for
828 * example). Just return an error to the vm_fault()
829 * path and let it handle it.
845 vnode_pager_release_from_cache(
848 memory_object_free_from_cache(
849 &realhost
, &vnode_pager_workaround
, cnt
);
859 register vnode_pager_t vnode_object
;
861 vnode_object
= (struct vnode_pager
*) zalloc(vnode_pager_zone
);
862 if (vnode_object
== VNODE_PAGER_NULL
)
863 return(VNODE_PAGER_NULL
);
866 * The vm_map call takes both named entry ports and raw memory
867 * objects in the same parameter. We need to make sure that
868 * vm_map does not see this object as a named entry port. So,
869 * we reserve the second word in the object for a fake ip_kotype
870 * setting - that will tell vm_map to use it as a memory object.
872 vnode_object
->pager
= &vnode_pager_workaround
;
873 vnode_object
->pager_ikot
= IKOT_MEMORY_OBJECT
;
874 vnode_object
->ref_count
= 1;
875 vnode_object
->control_handle
= MEMORY_OBJECT_CONTROL_NULL
;
876 vnode_object
->vnode_handle
= vp
;
878 return(vnode_object
);
886 memory_object_t name
)
888 vnode_pager_t vnode_object
;
890 vnode_object
= (vnode_pager_t
)name
;
891 assert(vnode_object
->pager
== &vnode_pager_workaround
);
892 return (vnode_object
);