2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <sys/errno.h>
31 #include <mach/mach_types.h>
32 #include <mach/mach_traps.h>
33 #include <mach/host_priv.h>
34 #include <mach/kern_return.h>
35 #include <mach/memory_object_control.h>
36 #include <mach/memory_object_types.h>
37 #include <mach/port.h>
38 #include <mach/policy.h>
40 #include <mach/thread_act.h>
42 #include <kern/host.h>
43 #include <kern/thread.h>
45 #include <ipc/ipc_port.h>
46 #include <ipc/ipc_space.h>
48 #include <default_pager/default_pager_types.h>
49 #include <default_pager/default_pager_object_server.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_kern.h>
53 #include <vm/vm_pageout.h>
54 #include <vm/memory_object.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_protos.h>
58 /* BSD VM COMPONENT INTERFACES */
78 return(map
->hdr
.nentries
);
82 mach_get_vm_start(vm_map_t map
)
84 return( vm_map_first_entry(map
)->vme_start
);
88 mach_get_vm_end(vm_map_t map
)
90 return( vm_map_last_entry(map
)->vme_end
);
94 * Legacy routines to get the start and end for a vm_map_t. They
95 * return them in the vm_offset_t format. So, they should only be
96 * called on maps that are the same size as the kernel map for
103 return(CAST_DOWN(vm_offset_t
, vm_map_first_entry(map
)->vme_start
));
110 return(CAST_DOWN(vm_offset_t
, vm_map_last_entry(map
)->vme_end
));
117 /* until component support available */
118 int vnode_pager_workaround
;
120 typedef struct vnode_pager
{
121 int *pager
; /* pager workaround pointer */
122 unsigned int pager_ikot
; /* JMM: fake ip_kotype() */
123 unsigned int ref_count
; /* reference count */
124 memory_object_control_t control_handle
; /* mem object control handle */
125 struct vnode
*vnode_handle
; /* vnode handle */
130 trigger_name_to_port( /* forward */
134 vnode_pager_cluster_read( /* forward */
140 vnode_pager_cluster_write( /* forward */
144 vm_object_offset_t
*,
150 vnode_object_create( /* forward */
154 vnode_pager_lookup( /* forward */
157 zone_t vnode_pager_zone
;
160 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
162 /* TODO: Should be set dynamically by vnode_pager_init() */
163 #define CLUSTER_SHIFT 1
165 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
166 #define MAX_VNODE 10000
172 #define PAGER_ALL 0xffffffff
173 #define PAGER_INIT 0x00000001
174 #define PAGER_PAGEIN 0x00000002
176 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
178 #define PAGER_DEBUG(LEVEL, A)
182 * Routine: macx_triggers
184 * Syscall interface to set the call backs for low and
189 struct macx_triggers_args
*args
)
191 int hi_water
= args
->hi_water
;
192 int low_water
= args
->low_water
;
193 int flags
= args
->flags
;
194 mach_port_t trigger_name
= args
->alert_port
;
196 memory_object_default_t default_pager
;
197 ipc_port_t trigger_port
;
199 default_pager
= MEMORY_OBJECT_DEFAULT_NULL
;
200 kr
= host_default_memory_manager(host_priv_self(),
202 if(kr
!= KERN_SUCCESS
) {
206 if ((flags
& SWAP_ENCRYPT_ON
) &&
207 (flags
& SWAP_ENCRYPT_OFF
)) {
208 /* can't have it both ways */
212 if (flags
& SWAP_ENCRYPT_ON
) {
213 /* ENCRYPTED SWAP: tell default_pager to encrypt */
214 default_pager_triggers(default_pager
,
218 } else if (flags
& SWAP_ENCRYPT_OFF
) {
219 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
220 default_pager_triggers(default_pager
,
226 if (flags
& HI_WAT_ALERT
) {
227 trigger_port
= trigger_name_to_port(trigger_name
);
228 if(trigger_port
== NULL
) {
231 /* trigger_port is locked and active */
232 ipc_port_make_send_locked(trigger_port
);
234 default_pager_triggers(default_pager
,
236 HI_WAT_ALERT
, trigger_port
);
239 if (flags
& LO_WAT_ALERT
) {
240 trigger_port
= trigger_name_to_port(trigger_name
);
241 if(trigger_port
== NULL
) {
244 /* trigger_port is locked and active */
245 ipc_port_make_send_locked(trigger_port
);
246 /* and now its unlocked */
247 default_pager_triggers(default_pager
,
249 LO_WAT_ALERT
, trigger_port
);
253 * Set thread scheduling priority and policy for the current thread
254 * it is assumed for the time being that the thread setting the alert
255 * is the same one which will be servicing it.
257 * XXX This does not belong in the kernel XXX
260 thread_precedence_policy_data_t pre
;
261 thread_extended_policy_data_t ext
;
263 ext
.timeshare
= FALSE
;
264 pre
.importance
= INT32_MAX
;
266 thread_policy_set(current_thread(),
267 THREAD_EXTENDED_POLICY
,
268 (thread_policy_t
)&ext
,
269 THREAD_EXTENDED_POLICY_COUNT
);
271 thread_policy_set(current_thread(),
272 THREAD_PRECEDENCE_POLICY
,
273 (thread_policy_t
)&pre
,
274 THREAD_PRECEDENCE_POLICY_COUNT
);
277 current_thread()->options
|= TH_OPT_VMPRIV
;
286 trigger_name_to_port(
287 mach_port_t trigger_name
)
289 ipc_port_t trigger_port
;
292 if (trigger_name
== 0)
295 space
= current_space();
296 if(ipc_port_translate_receive(space
, (mach_port_name_t
)trigger_name
,
297 &trigger_port
) != KERN_SUCCESS
)
303 extern int uiomove64(addr64_t
, int, void *);
307 memory_object_control_uiomove(
308 memory_object_control_t control
,
309 memory_object_offset_t offset
,
322 vm_page_t page_run
[MAX_RUN
];
325 object
= memory_object_control_to_vm_object(control
);
326 if (object
== VM_OBJECT_NULL
) {
329 assert(!object
->internal
);
331 vm_object_lock(object
);
333 if (mark_dirty
&& object
->copy
!= VM_OBJECT_NULL
) {
335 * We can't modify the pages without honoring
336 * copy-on-write obligations first, so fall off
337 * this optimized path and fall back to the regular
340 vm_object_unlock(object
);
344 while (io_requested
&& retval
== 0) {
346 cur_needed
= (start_offset
+ io_requested
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
348 if (cur_needed
> MAX_RUN
)
349 cur_needed
= MAX_RUN
;
351 for (cur_run
= 0; cur_run
< cur_needed
; ) {
353 if ((dst_page
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
356 * Sync up on getting the busy bit
358 if ((dst_page
->busy
|| dst_page
->cleaning
)) {
360 * someone else is playing with the page... if we've
361 * already collected pages into this run, go ahead
362 * and process now, we can't block on this
363 * page while holding other pages in the BUSY state
364 * otherwise we will wait
368 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
372 * this routine is only called when copying
373 * to/from real files... no need to consider
374 * encrypted swap pages
376 assert(!dst_page
->encrypted
);
379 dst_page
->dirty
= TRUE
;
380 dst_page
->busy
= TRUE
;
382 page_run
[cur_run
++] = dst_page
;
384 offset
+= PAGE_SIZE_64
;
388 * we hit a 'hole' in the cache
389 * we bail at this point
390 * we'll unlock the object below
393 vm_object_unlock(object
);
395 for (i
= 0; i
< cur_run
; i
++) {
397 dst_page
= page_run
[i
];
399 if ((xsize
= PAGE_SIZE
- start_offset
) > io_requested
)
400 xsize
= io_requested
;
402 if ( (retval
= uiomove64((addr64_t
)(((addr64_t
)(dst_page
->phys_page
) << 12) + start_offset
), xsize
, uio
)) )
405 io_requested
-= xsize
;
408 vm_object_lock(object
);
410 for (i
= 0; i
< cur_run
; i
++) {
411 dst_page
= page_run
[i
];
413 PAGE_WAKEUP_DONE(dst_page
);
416 vm_object_unlock(object
);
426 vnode_pager_bootstrap(void)
428 register vm_size_t size
;
430 size
= (vm_size_t
) sizeof(struct vnode_pager
);
431 vnode_pager_zone
= zinit(size
, (vm_size_t
) MAX_VNODE
*size
,
432 PAGE_SIZE
, "vnode pager structures");
442 __unused memory_object_t pager
)
444 vnode_pager_t vnode_object
;
446 vnode_object
= vnode_object_create(vp
);
447 if (vnode_object
== VNODE_PAGER_NULL
)
448 panic("vnode_pager_setup: vnode_object_create() failed");
449 return((memory_object_t
)vnode_object
);
456 vnode_pager_init(memory_object_t mem_obj
,
457 memory_object_control_t control
,
463 vnode_pager_t vnode_object
;
465 memory_object_attr_info_data_t attributes
;
468 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_init: %p, %p, %x\n", mem_obj
, control
, pg_size
));
470 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
471 return KERN_INVALID_ARGUMENT
;
473 vnode_object
= vnode_pager_lookup(mem_obj
);
475 memory_object_control_reference(control
);
477 vnode_object
->control_handle
= control
;
479 attributes
.copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
480 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
481 attributes
.cluster_size
= (1 << (PAGE_SHIFT
));
482 attributes
.may_cache_object
= TRUE
;
483 attributes
.temporary
= TRUE
;
485 kr
= memory_object_change_attributes(
487 MEMORY_OBJECT_ATTRIBUTE_INFO
,
488 (memory_object_info_t
) &attributes
,
489 MEMORY_OBJECT_ATTR_INFO_COUNT
);
490 if (kr
!= KERN_SUCCESS
)
491 panic("vnode_pager_init: memory_object_change_attributes() failed");
493 return(KERN_SUCCESS
);
500 vnode_pager_data_return(
501 memory_object_t mem_obj
,
502 memory_object_offset_t offset
,
504 memory_object_offset_t
*resid_offset
,
506 __unused boolean_t dirty
,
507 __unused boolean_t kernel_copy
,
510 register vnode_pager_t vnode_object
;
512 vnode_object
= vnode_pager_lookup(mem_obj
);
514 vnode_pager_cluster_write(vnode_object
, offset
, data_cnt
, resid_offset
, io_error
, upl_flags
);
520 vnode_pager_data_initialize(
521 __unused memory_object_t mem_obj
,
522 __unused memory_object_offset_t offset
,
523 __unused vm_size_t data_cnt
)
525 panic("vnode_pager_data_initialize");
530 vnode_pager_data_unlock(
531 __unused memory_object_t mem_obj
,
532 __unused memory_object_offset_t offset
,
533 __unused vm_size_t size
,
534 __unused vm_prot_t desired_access
)
540 vnode_pager_get_object_size(
541 memory_object_t mem_obj
,
542 memory_object_offset_t
*length
)
544 vnode_pager_t vnode_object
;
546 vnode_object
= vnode_pager_lookup(mem_obj
);
548 *length
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
556 vnode_pager_data_request(
557 memory_object_t mem_obj
,
558 memory_object_offset_t offset
,
563 vm_prot_t protection_required
)
565 register vnode_pager_t vnode_object
;
567 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj
, offset
, length
, protection_required
));
569 vnode_object
= vnode_pager_lookup(mem_obj
);
571 PAGER_DEBUG(PAGER_PAGEIN
, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj
, offset
, length
, protection_required
, vnode_object
));
573 return vnode_pager_cluster_read(vnode_object
, offset
, length
);
580 vnode_pager_reference(
581 memory_object_t mem_obj
)
583 register vnode_pager_t vnode_object
;
584 unsigned int new_ref_count
;
586 vnode_object
= vnode_pager_lookup(mem_obj
);
587 new_ref_count
= hw_atomic_add(&vnode_object
->ref_count
, 1);
588 assert(new_ref_count
> 1);
595 vnode_pager_deallocate(
596 memory_object_t mem_obj
)
598 register vnode_pager_t vnode_object
;
600 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_deallocate: %x\n", mem_obj
));
602 vnode_object
= vnode_pager_lookup(mem_obj
);
604 if (hw_atomic_sub(&vnode_object
->ref_count
, 1) == 0) {
605 if (vnode_object
->vnode_handle
!= NULL
) {
606 vnode_pager_vrele(vnode_object
->vnode_handle
);
608 zfree(vnode_pager_zone
, vnode_object
);
617 vnode_pager_terminate(
621 memory_object_t mem_obj
)
623 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_terminate: %x\n", mem_obj
));
625 return(KERN_SUCCESS
);
632 vnode_pager_synchronize(
633 memory_object_t mem_obj
,
634 memory_object_offset_t offset
,
636 __unused vm_sync_t sync_flags
)
638 register vnode_pager_t vnode_object
;
640 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_synchronize: %x\n", mem_obj
));
642 vnode_object
= vnode_pager_lookup(mem_obj
);
644 memory_object_synchronize_completed(vnode_object
->control_handle
, offset
, length
);
646 return (KERN_SUCCESS
);
654 memory_object_t mem_obj
)
656 register vnode_pager_t vnode_object
;
658 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_unmap: %x\n", mem_obj
));
660 vnode_object
= vnode_pager_lookup(mem_obj
);
662 ubc_unmap(vnode_object
->vnode_handle
);
671 vnode_pager_cluster_write(
672 vnode_pager_t vnode_object
,
673 vm_object_offset_t offset
,
675 vm_object_offset_t
* resid_offset
,
684 if (upl_flags
& UPL_MSYNC
) {
686 upl_flags
|= UPL_VNODE_PAGER
;
688 if ( (upl_flags
& UPL_IOSYNC
) && io_error
)
689 upl_flags
|= UPL_KEEPCACHED
;
694 size
= (cnt
< (PAGE_SIZE
* MAX_UPL_TRANSFER
)) ? cnt
: (PAGE_SIZE
* MAX_UPL_TRANSFER
); /* effective max */
696 request_flags
= UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
| UPL_CLEAN_IN_PLACE
|
697 UPL_SET_INTERNAL
| UPL_SET_LITE
;
699 kr
= memory_object_upl_request(vnode_object
->control_handle
,
700 offset
, size
, &upl
, NULL
, NULL
, request_flags
);
701 if (kr
!= KERN_SUCCESS
)
702 panic("vnode_pager_cluster_write: upl request failed\n");
704 vnode_pageout(vnode_object
->vnode_handle
,
705 upl
, (vm_offset_t
)0, offset
, size
, upl_flags
, &errno
);
707 if ( (upl_flags
& UPL_KEEPCACHED
) ) {
708 if ( (*io_error
= errno
) )
715 *resid_offset
= offset
;
718 vm_object_offset_t vnode_size
;
719 vm_object_offset_t base_offset
;
721 vm_page_t target_page
;
725 * this is the pageout path
727 vnode_size
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
729 if (vnode_size
> (offset
+ PAGE_SIZE
)) {
731 * preset the maximum size of the cluster
732 * and put us on a nice cluster boundary...
733 * and then clip the size to insure we
734 * don't request past the end of the underlying file
736 size
= PAGE_SIZE
* MAX_UPL_TRANSFER
;
737 base_offset
= offset
& ~((signed)(size
- 1));
739 if ((base_offset
+ size
) > vnode_size
)
740 size
= round_page_32(((vm_size_t
)(vnode_size
- base_offset
)));
743 * we've been requested to page out a page beyond the current
744 * end of the 'file'... don't try to cluster in this case...
745 * we still need to send this page through because it might
746 * be marked precious and the underlying filesystem may need
747 * to do something with it (besides page it out)...
749 base_offset
= offset
;
752 object
= memory_object_control_to_vm_object(vnode_object
->control_handle
);
754 if (object
== VM_OBJECT_NULL
)
755 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
757 request_flags
= UPL_NOBLOCK
| UPL_FOR_PAGEOUT
| UPL_CLEAN_IN_PLACE
|
758 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
759 UPL_SET_INTERNAL
| UPL_SET_LITE
;
761 vm_object_lock(object
);
763 if ((target_page
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
765 * only pick up pages whose ticket number matches
766 * the ticket number of the page orginally targeted
769 ticket
= target_page
->page_ticket
;
771 request_flags
|= ((ticket
<< UPL_PAGE_TICKET_SHIFT
) & UPL_PAGE_TICKET_MASK
);
773 vm_object_unlock(object
);
775 vm_object_upl_request(object
, base_offset
, size
,
776 &upl
, NULL
, NULL
, request_flags
);
778 panic("vnode_pager_cluster_write: upl request failed\n");
780 vnode_pageout(vnode_object
->vnode_handle
,
781 upl
, (vm_offset_t
)0, upl
->offset
, upl
->size
, UPL_VNODE_PAGER
, NULL
);
790 vnode_pager_cluster_read(
791 vnode_pager_t vnode_object
,
792 vm_object_offset_t offset
,
798 assert(! (cnt
& PAGE_MASK
));
800 kret
= vnode_pagein(vnode_object
->vnode_handle
,
808 if(kret == PAGER_ABSENT) {
809 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
810 defined in bsd/vm/vm_pager.h However, we should not be including
811 that file here it is a layering violation.
819 uplflags
= (UPL_NO_SYNC
|
823 kr
= memory_object_upl_request(vnode_object
->control_handle
,
825 &upl
, NULL
, &count
, uplflags
);
826 if (kr
== KERN_SUCCESS
) {
831 * We couldn't gather the page list, probably
832 * because the memory object doesn't have a link
833 * to a VM object anymore (forced unmount, for
834 * example). Just return an error to the vm_fault()
835 * path and let it handle it.
851 vnode_pager_release_from_cache(
854 memory_object_free_from_cache(
855 &realhost
, &vnode_pager_workaround
, cnt
);
865 register vnode_pager_t vnode_object
;
867 vnode_object
= (struct vnode_pager
*) zalloc(vnode_pager_zone
);
868 if (vnode_object
== VNODE_PAGER_NULL
)
869 return(VNODE_PAGER_NULL
);
872 * The vm_map call takes both named entry ports and raw memory
873 * objects in the same parameter. We need to make sure that
874 * vm_map does not see this object as a named entry port. So,
875 * we reserve the second word in the object for a fake ip_kotype
876 * setting - that will tell vm_map to use it as a memory object.
878 vnode_object
->pager
= &vnode_pager_workaround
;
879 vnode_object
->pager_ikot
= IKOT_MEMORY_OBJECT
;
880 vnode_object
->ref_count
= 1;
881 vnode_object
->control_handle
= MEMORY_OBJECT_CONTROL_NULL
;
882 vnode_object
->vnode_handle
= vp
;
884 return(vnode_object
);
892 memory_object_t name
)
894 vnode_pager_t vnode_object
;
896 vnode_object
= (vnode_pager_t
)name
;
897 assert(vnode_object
->pager
== &vnode_pager_workaround
);
898 return (vnode_object
);