2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
24 #include <sys/errno.h>
26 #include <mach/mach_types.h>
27 #include <mach/mach_traps.h>
28 #include <mach/host_priv.h>
29 #include <mach/kern_return.h>
30 #include <mach/memory_object_control.h>
31 #include <mach/memory_object_types.h>
32 #include <mach/port.h>
33 #include <mach/policy.h>
35 #include <mach/thread_act.h>
37 #include <kern/host.h>
38 #include <kern/thread.h>
40 #include <ipc/ipc_port.h>
41 #include <ipc/ipc_space.h>
43 #include <default_pager/default_pager_types.h>
44 #include <default_pager/default_pager_object_server.h>
46 #include <vm/vm_map.h>
47 #include <vm/vm_kern.h>
48 #include <vm/vm_pageout.h>
49 #include <vm/memory_object.h>
50 #include <vm/vm_pageout.h>
51 #include <vm/vm_protos.h>
53 /* BSD VM COMPONENT INTERFACES */
73 return(map
->hdr
.nentries
);
77 mach_get_vm_start(vm_map_t map
)
79 return( vm_map_first_entry(map
)->vme_start
);
83 mach_get_vm_end(vm_map_t map
)
85 return( vm_map_last_entry(map
)->vme_end
);
89 * Legacy routines to get the start and end for a vm_map_t. They
90 * return them in the vm_offset_t format. So, they should only be
91 * called on maps that are the same size as the kernel map for
98 return(CAST_DOWN(vm_offset_t
, vm_map_first_entry(map
)->vme_start
));
105 return(CAST_DOWN(vm_offset_t
, vm_map_last_entry(map
)->vme_end
));
112 /* until component support available */
113 int vnode_pager_workaround
;
115 typedef struct vnode_pager
{
116 int *pager
; /* pager workaround pointer */
117 unsigned int pager_ikot
; /* JMM: fake ip_kotype() */
118 unsigned int ref_count
; /* reference count */
119 memory_object_control_t control_handle
; /* mem object control handle */
120 struct vnode
*vnode_handle
; /* vnode handle */
125 trigger_name_to_port( /* forward */
129 vnode_pager_cluster_read( /* forward */
135 vnode_pager_cluster_write( /* forward */
139 vm_object_offset_t
*,
145 vnode_object_create( /* forward */
149 vnode_pager_lookup( /* forward */
152 zone_t vnode_pager_zone
;
155 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
157 /* TODO: Should be set dynamically by vnode_pager_init() */
158 #define CLUSTER_SHIFT 1
160 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
161 #define MAX_VNODE 10000
167 #define PAGER_ALL 0xffffffff
168 #define PAGER_INIT 0x00000001
169 #define PAGER_PAGEIN 0x00000002
171 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
173 #define PAGER_DEBUG(LEVEL, A)
177 * Routine: macx_triggers
179 * Syscall interface to set the call backs for low and
184 struct macx_triggers_args
*args
)
186 int hi_water
= args
->hi_water
;
187 int low_water
= args
->low_water
;
188 int flags
= args
->flags
;
189 mach_port_t trigger_name
= args
->alert_port
;
191 memory_object_default_t default_pager
;
192 ipc_port_t trigger_port
;
194 default_pager
= MEMORY_OBJECT_DEFAULT_NULL
;
195 kr
= host_default_memory_manager(host_priv_self(),
197 if(kr
!= KERN_SUCCESS
) {
201 if ((flags
& SWAP_ENCRYPT_ON
) &&
202 (flags
& SWAP_ENCRYPT_OFF
)) {
203 /* can't have it both ways */
207 if (flags
& SWAP_ENCRYPT_ON
) {
208 /* ENCRYPTED SWAP: tell default_pager to encrypt */
209 default_pager_triggers(default_pager
,
213 } else if (flags
& SWAP_ENCRYPT_OFF
) {
214 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
215 default_pager_triggers(default_pager
,
221 if (flags
& HI_WAT_ALERT
) {
222 trigger_port
= trigger_name_to_port(trigger_name
);
223 if(trigger_port
== NULL
) {
226 /* trigger_port is locked and active */
227 ipc_port_make_send_locked(trigger_port
);
229 default_pager_triggers(default_pager
,
231 HI_WAT_ALERT
, trigger_port
);
234 if (flags
& LO_WAT_ALERT
) {
235 trigger_port
= trigger_name_to_port(trigger_name
);
236 if(trigger_port
== NULL
) {
239 /* trigger_port is locked and active */
240 ipc_port_make_send_locked(trigger_port
);
241 /* and now its unlocked */
242 default_pager_triggers(default_pager
,
244 LO_WAT_ALERT
, trigger_port
);
248 * Set thread scheduling priority and policy for the current thread
249 * it is assumed for the time being that the thread setting the alert
250 * is the same one which will be servicing it.
252 * XXX This does not belong in the kernel XXX
255 thread_precedence_policy_data_t pre
;
256 thread_extended_policy_data_t ext
;
258 ext
.timeshare
= FALSE
;
259 pre
.importance
= INT32_MAX
;
261 thread_policy_set(current_thread(),
262 THREAD_EXTENDED_POLICY
,
263 (thread_policy_t
)&ext
,
264 THREAD_EXTENDED_POLICY_COUNT
);
266 thread_policy_set(current_thread(),
267 THREAD_PRECEDENCE_POLICY
,
268 (thread_policy_t
)&pre
,
269 THREAD_PRECEDENCE_POLICY_COUNT
);
272 current_thread()->options
|= TH_OPT_VMPRIV
;
281 trigger_name_to_port(
282 mach_port_t trigger_name
)
284 ipc_port_t trigger_port
;
287 if (trigger_name
== 0)
290 space
= current_space();
291 if(ipc_port_translate_receive(space
, (mach_port_name_t
)trigger_name
,
292 &trigger_port
) != KERN_SUCCESS
)
298 extern int uiomove64(addr64_t
, int, void *);
302 memory_object_control_uiomove(
303 memory_object_control_t control
,
304 memory_object_offset_t offset
,
317 vm_page_t page_run
[MAX_RUN
];
320 object
= memory_object_control_to_vm_object(control
);
321 if (object
== VM_OBJECT_NULL
) {
324 assert(!object
->internal
);
326 vm_object_lock(object
);
328 if (mark_dirty
&& object
->copy
!= VM_OBJECT_NULL
) {
330 * We can't modify the pages without honoring
331 * copy-on-write obligations first, so fall off
332 * this optimized path and fall back to the regular
335 vm_object_unlock(object
);
339 while (io_requested
&& retval
== 0) {
341 cur_needed
= (start_offset
+ io_requested
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
343 if (cur_needed
> MAX_RUN
)
344 cur_needed
= MAX_RUN
;
346 for (cur_run
= 0; cur_run
< cur_needed
; ) {
348 if ((dst_page
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
351 * Sync up on getting the busy bit
353 if ((dst_page
->busy
|| dst_page
->cleaning
)) {
355 * someone else is playing with the page... if we've
356 * already collected pages into this run, go ahead
357 * and process now, we can't block on this
358 * page while holding other pages in the BUSY state
359 * otherwise we will wait
363 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
367 * this routine is only called when copying
368 * to/from real files... no need to consider
369 * encrypted swap pages
371 assert(!dst_page
->encrypted
);
374 dst_page
->dirty
= TRUE
;
375 dst_page
->busy
= TRUE
;
377 page_run
[cur_run
++] = dst_page
;
379 offset
+= PAGE_SIZE_64
;
383 * we hit a 'hole' in the cache
384 * we bail at this point
385 * we'll unlock the object below
388 vm_object_unlock(object
);
390 for (i
= 0; i
< cur_run
; i
++) {
392 dst_page
= page_run
[i
];
394 if ((xsize
= PAGE_SIZE
- start_offset
) > io_requested
)
395 xsize
= io_requested
;
397 if ( (retval
= uiomove64((addr64_t
)(((addr64_t
)(dst_page
->phys_page
) << 12) + start_offset
), xsize
, uio
)) )
400 io_requested
-= xsize
;
403 vm_object_lock(object
);
405 for (i
= 0; i
< cur_run
; i
++) {
406 dst_page
= page_run
[i
];
408 PAGE_WAKEUP_DONE(dst_page
);
411 vm_object_unlock(object
);
421 vnode_pager_bootstrap(void)
423 register vm_size_t size
;
425 size
= (vm_size_t
) sizeof(struct vnode_pager
);
426 vnode_pager_zone
= zinit(size
, (vm_size_t
) MAX_VNODE
*size
,
427 PAGE_SIZE
, "vnode pager structures");
437 __unused memory_object_t pager
)
439 vnode_pager_t vnode_object
;
441 vnode_object
= vnode_object_create(vp
);
442 if (vnode_object
== VNODE_PAGER_NULL
)
443 panic("vnode_pager_setup: vnode_object_create() failed");
444 return((memory_object_t
)vnode_object
);
451 vnode_pager_init(memory_object_t mem_obj
,
452 memory_object_control_t control
,
458 vnode_pager_t vnode_object
;
460 memory_object_attr_info_data_t attributes
;
463 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_init: %p, %p, %x\n", mem_obj
, control
, pg_size
));
465 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
466 return KERN_INVALID_ARGUMENT
;
468 vnode_object
= vnode_pager_lookup(mem_obj
);
470 memory_object_control_reference(control
);
472 vnode_object
->control_handle
= control
;
474 attributes
.copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
475 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
476 attributes
.cluster_size
= (1 << (PAGE_SHIFT
));
477 attributes
.may_cache_object
= TRUE
;
478 attributes
.temporary
= TRUE
;
480 kr
= memory_object_change_attributes(
482 MEMORY_OBJECT_ATTRIBUTE_INFO
,
483 (memory_object_info_t
) &attributes
,
484 MEMORY_OBJECT_ATTR_INFO_COUNT
);
485 if (kr
!= KERN_SUCCESS
)
486 panic("vnode_pager_init: memory_object_change_attributes() failed");
488 return(KERN_SUCCESS
);
495 vnode_pager_data_return(
496 memory_object_t mem_obj
,
497 memory_object_offset_t offset
,
499 memory_object_offset_t
*resid_offset
,
501 __unused boolean_t dirty
,
502 __unused boolean_t kernel_copy
,
505 register vnode_pager_t vnode_object
;
507 vnode_object
= vnode_pager_lookup(mem_obj
);
509 vnode_pager_cluster_write(vnode_object
, offset
, data_cnt
, resid_offset
, io_error
, upl_flags
);
515 vnode_pager_data_initialize(
516 __unused memory_object_t mem_obj
,
517 __unused memory_object_offset_t offset
,
518 __unused vm_size_t data_cnt
)
520 panic("vnode_pager_data_initialize");
525 vnode_pager_data_unlock(
526 __unused memory_object_t mem_obj
,
527 __unused memory_object_offset_t offset
,
528 __unused vm_size_t size
,
529 __unused vm_prot_t desired_access
)
535 vnode_pager_get_object_size(
536 memory_object_t mem_obj
,
537 memory_object_offset_t
*length
)
539 vnode_pager_t vnode_object
;
541 vnode_object
= vnode_pager_lookup(mem_obj
);
543 *length
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
551 vnode_pager_data_request(
552 memory_object_t mem_obj
,
553 memory_object_offset_t offset
,
558 vm_prot_t protection_required
)
560 register vnode_pager_t vnode_object
;
562 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj
, offset
, length
, protection_required
));
564 vnode_object
= vnode_pager_lookup(mem_obj
);
566 PAGER_DEBUG(PAGER_PAGEIN
, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj
, offset
, length
, protection_required
, vnode_object
));
568 return vnode_pager_cluster_read(vnode_object
, offset
, length
);
575 vnode_pager_reference(
576 memory_object_t mem_obj
)
578 register vnode_pager_t vnode_object
;
579 unsigned int new_ref_count
;
581 vnode_object
= vnode_pager_lookup(mem_obj
);
582 new_ref_count
= hw_atomic_add(&vnode_object
->ref_count
, 1);
583 assert(new_ref_count
> 1);
590 vnode_pager_deallocate(
591 memory_object_t mem_obj
)
593 register vnode_pager_t vnode_object
;
595 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_deallocate: %x\n", mem_obj
));
597 vnode_object
= vnode_pager_lookup(mem_obj
);
599 if (hw_atomic_sub(&vnode_object
->ref_count
, 1) == 0) {
600 if (vnode_object
->vnode_handle
!= NULL
) {
601 vnode_pager_vrele(vnode_object
->vnode_handle
);
603 zfree(vnode_pager_zone
, vnode_object
);
612 vnode_pager_terminate(
616 memory_object_t mem_obj
)
618 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_terminate: %x\n", mem_obj
));
620 return(KERN_SUCCESS
);
627 vnode_pager_synchronize(
628 memory_object_t mem_obj
,
629 memory_object_offset_t offset
,
631 __unused vm_sync_t sync_flags
)
633 register vnode_pager_t vnode_object
;
635 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_synchronize: %x\n", mem_obj
));
637 vnode_object
= vnode_pager_lookup(mem_obj
);
639 memory_object_synchronize_completed(vnode_object
->control_handle
, offset
, length
);
641 return (KERN_SUCCESS
);
649 memory_object_t mem_obj
)
651 register vnode_pager_t vnode_object
;
653 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_unmap: %x\n", mem_obj
));
655 vnode_object
= vnode_pager_lookup(mem_obj
);
657 ubc_unmap(vnode_object
->vnode_handle
);
666 vnode_pager_cluster_write(
667 vnode_pager_t vnode_object
,
668 vm_object_offset_t offset
,
670 vm_object_offset_t
* resid_offset
,
679 if (upl_flags
& UPL_MSYNC
) {
681 upl_flags
|= UPL_VNODE_PAGER
;
683 if ( (upl_flags
& UPL_IOSYNC
) && io_error
)
684 upl_flags
|= UPL_KEEPCACHED
;
689 size
= (cnt
< (PAGE_SIZE
* MAX_UPL_TRANSFER
)) ? cnt
: (PAGE_SIZE
* MAX_UPL_TRANSFER
); /* effective max */
691 request_flags
= UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
| UPL_CLEAN_IN_PLACE
|
692 UPL_SET_INTERNAL
| UPL_SET_LITE
;
694 kr
= memory_object_upl_request(vnode_object
->control_handle
,
695 offset
, size
, &upl
, NULL
, NULL
, request_flags
);
696 if (kr
!= KERN_SUCCESS
)
697 panic("vnode_pager_cluster_write: upl request failed\n");
699 vnode_pageout(vnode_object
->vnode_handle
,
700 upl
, (vm_offset_t
)0, offset
, size
, upl_flags
, &errno
);
702 if ( (upl_flags
& UPL_KEEPCACHED
) ) {
703 if ( (*io_error
= errno
) )
710 *resid_offset
= offset
;
713 vm_object_offset_t vnode_size
;
714 vm_object_offset_t base_offset
;
716 vm_page_t target_page
;
720 * this is the pageout path
722 vnode_size
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
724 if (vnode_size
> (offset
+ PAGE_SIZE
)) {
726 * preset the maximum size of the cluster
727 * and put us on a nice cluster boundary...
728 * and then clip the size to insure we
729 * don't request past the end of the underlying file
731 size
= PAGE_SIZE
* MAX_UPL_TRANSFER
;
732 base_offset
= offset
& ~((signed)(size
- 1));
734 if ((base_offset
+ size
) > vnode_size
)
735 size
= round_page_32(((vm_size_t
)(vnode_size
- base_offset
)));
738 * we've been requested to page out a page beyond the current
739 * end of the 'file'... don't try to cluster in this case...
740 * we still need to send this page through because it might
741 * be marked precious and the underlying filesystem may need
742 * to do something with it (besides page it out)...
744 base_offset
= offset
;
747 object
= memory_object_control_to_vm_object(vnode_object
->control_handle
);
749 if (object
== VM_OBJECT_NULL
)
750 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
752 request_flags
= UPL_NOBLOCK
| UPL_FOR_PAGEOUT
| UPL_CLEAN_IN_PLACE
|
753 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
754 UPL_SET_INTERNAL
| UPL_SET_LITE
;
756 vm_object_lock(object
);
758 if ((target_page
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
760 * only pick up pages whose ticket number matches
761 * the ticket number of the page orginally targeted
764 ticket
= target_page
->page_ticket
;
766 request_flags
|= ((ticket
<< UPL_PAGE_TICKET_SHIFT
) & UPL_PAGE_TICKET_MASK
);
768 vm_object_unlock(object
);
770 vm_object_upl_request(object
, base_offset
, size
,
771 &upl
, NULL
, NULL
, request_flags
);
773 panic("vnode_pager_cluster_write: upl request failed\n");
775 vnode_pageout(vnode_object
->vnode_handle
,
776 upl
, (vm_offset_t
)0, upl
->offset
, upl
->size
, UPL_VNODE_PAGER
, NULL
);
785 vnode_pager_cluster_read(
786 vnode_pager_t vnode_object
,
787 vm_object_offset_t offset
,
793 assert(! (cnt
& PAGE_MASK
));
795 kret
= vnode_pagein(vnode_object
->vnode_handle
,
803 if(kret == PAGER_ABSENT) {
804 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
805 defined in bsd/vm/vm_pager.h However, we should not be including
806 that file here it is a layering violation.
814 uplflags
= (UPL_NO_SYNC
|
818 kr
= memory_object_upl_request(vnode_object
->control_handle
,
820 &upl
, NULL
, &count
, uplflags
);
821 if (kr
== KERN_SUCCESS
) {
826 * We couldn't gather the page list, probably
827 * because the memory object doesn't have a link
828 * to a VM object anymore (forced unmount, for
829 * example). Just return an error to the vm_fault()
830 * path and let it handle it.
846 vnode_pager_release_from_cache(
849 memory_object_free_from_cache(
850 &realhost
, &vnode_pager_workaround
, cnt
);
860 register vnode_pager_t vnode_object
;
862 vnode_object
= (struct vnode_pager
*) zalloc(vnode_pager_zone
);
863 if (vnode_object
== VNODE_PAGER_NULL
)
864 return(VNODE_PAGER_NULL
);
867 * The vm_map call takes both named entry ports and raw memory
868 * objects in the same parameter. We need to make sure that
869 * vm_map does not see this object as a named entry port. So,
870 * we reserve the second word in the object for a fake ip_kotype
871 * setting - that will tell vm_map to use it as a memory object.
873 vnode_object
->pager
= &vnode_pager_workaround
;
874 vnode_object
->pager_ikot
= IKOT_MEMORY_OBJECT
;
875 vnode_object
->ref_count
= 1;
876 vnode_object
->control_handle
= MEMORY_OBJECT_CONTROL_NULL
;
877 vnode_object
->vnode_handle
= vp
;
879 return(vnode_object
);
887 memory_object_t name
)
889 vnode_pager_t vnode_object
;
891 vnode_object
= (vnode_pager_t
)name
;
892 assert(vnode_object
->pager
== &vnode_pager_workaround
);
893 return (vnode_object
);