2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
31 #include <sys/errno.h>
33 #include <mach/mach_types.h>
34 #include <mach/mach_traps.h>
35 #include <mach/host_priv.h>
36 #include <mach/kern_return.h>
37 #include <mach/memory_object_control.h>
38 #include <mach/memory_object_types.h>
39 #include <mach/port.h>
40 #include <mach/policy.h>
42 #include <mach/thread_act.h>
44 #include <kern/host.h>
45 #include <kern/thread.h>
47 #include <ipc/ipc_port.h>
48 #include <ipc/ipc_space.h>
50 #include <default_pager/default_pager_types.h>
51 #include <default_pager/default_pager_object_server.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/memory_object.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/vm_protos.h>
60 /* BSD VM COMPONENT INTERFACES */
80 return(map
->hdr
.nentries
);
84 mach_get_vm_start(vm_map_t map
)
86 return( vm_map_first_entry(map
)->vme_start
);
90 mach_get_vm_end(vm_map_t map
)
92 return( vm_map_last_entry(map
)->vme_end
);
96 * Legacy routines to get the start and end for a vm_map_t. They
97 * return them in the vm_offset_t format. So, they should only be
98 * called on maps that are the same size as the kernel map for
105 return(CAST_DOWN(vm_offset_t
, vm_map_first_entry(map
)->vme_start
));
112 return(CAST_DOWN(vm_offset_t
, vm_map_last_entry(map
)->vme_end
));
119 /* until component support available */
120 int vnode_pager_workaround
;
122 typedef struct vnode_pager
{
123 int *pager
; /* pager workaround pointer */
124 unsigned int pager_ikot
; /* JMM: fake ip_kotype() */
125 unsigned int ref_count
; /* reference count */
126 memory_object_control_t control_handle
; /* mem object control handle */
127 struct vnode
*vnode_handle
; /* vnode handle */
132 trigger_name_to_port( /* forward */
136 vnode_pager_cluster_read( /* forward */
142 vnode_pager_cluster_write( /* forward */
146 vm_object_offset_t
*,
152 vnode_object_create( /* forward */
156 vnode_pager_lookup( /* forward */
159 zone_t vnode_pager_zone
;
162 #define VNODE_PAGER_NULL ((vnode_pager_t) 0)
164 /* TODO: Should be set dynamically by vnode_pager_init() */
165 #define CLUSTER_SHIFT 1
167 /* TODO: Should be set dynamically by vnode_pager_bootstrap() */
168 #define MAX_VNODE 10000
174 #define PAGER_ALL 0xffffffff
175 #define PAGER_INIT 0x00000001
176 #define PAGER_PAGEIN 0x00000002
178 #define PAGER_DEBUG(LEVEL, A) {if ((pagerdebug & LEVEL)==LEVEL){printf A;}}
180 #define PAGER_DEBUG(LEVEL, A)
184 * Routine: macx_triggers
186 * Syscall interface to set the call backs for low and
191 struct macx_triggers_args
*args
)
193 int hi_water
= args
->hi_water
;
194 int low_water
= args
->low_water
;
195 int flags
= args
->flags
;
196 mach_port_t trigger_name
= args
->alert_port
;
198 memory_object_default_t default_pager
;
199 ipc_port_t trigger_port
;
201 default_pager
= MEMORY_OBJECT_DEFAULT_NULL
;
202 kr
= host_default_memory_manager(host_priv_self(),
204 if(kr
!= KERN_SUCCESS
) {
208 if ((flags
& SWAP_ENCRYPT_ON
) &&
209 (flags
& SWAP_ENCRYPT_OFF
)) {
210 /* can't have it both ways */
214 if (flags
& SWAP_ENCRYPT_ON
) {
215 /* ENCRYPTED SWAP: tell default_pager to encrypt */
216 default_pager_triggers(default_pager
,
220 } else if (flags
& SWAP_ENCRYPT_OFF
) {
221 /* ENCRYPTED SWAP: tell default_pager not to encrypt */
222 default_pager_triggers(default_pager
,
228 if (flags
& HI_WAT_ALERT
) {
229 trigger_port
= trigger_name_to_port(trigger_name
);
230 if(trigger_port
== NULL
) {
233 /* trigger_port is locked and active */
234 ipc_port_make_send_locked(trigger_port
);
236 default_pager_triggers(default_pager
,
238 HI_WAT_ALERT
, trigger_port
);
241 if (flags
& LO_WAT_ALERT
) {
242 trigger_port
= trigger_name_to_port(trigger_name
);
243 if(trigger_port
== NULL
) {
246 /* trigger_port is locked and active */
247 ipc_port_make_send_locked(trigger_port
);
248 /* and now its unlocked */
249 default_pager_triggers(default_pager
,
251 LO_WAT_ALERT
, trigger_port
);
255 * Set thread scheduling priority and policy for the current thread
256 * it is assumed for the time being that the thread setting the alert
257 * is the same one which will be servicing it.
259 * XXX This does not belong in the kernel XXX
262 thread_precedence_policy_data_t pre
;
263 thread_extended_policy_data_t ext
;
265 ext
.timeshare
= FALSE
;
266 pre
.importance
= INT32_MAX
;
268 thread_policy_set(current_thread(),
269 THREAD_EXTENDED_POLICY
,
270 (thread_policy_t
)&ext
,
271 THREAD_EXTENDED_POLICY_COUNT
);
273 thread_policy_set(current_thread(),
274 THREAD_PRECEDENCE_POLICY
,
275 (thread_policy_t
)&pre
,
276 THREAD_PRECEDENCE_POLICY_COUNT
);
279 current_thread()->options
|= TH_OPT_VMPRIV
;
288 trigger_name_to_port(
289 mach_port_t trigger_name
)
291 ipc_port_t trigger_port
;
294 if (trigger_name
== 0)
297 space
= current_space();
298 if(ipc_port_translate_receive(space
, (mach_port_name_t
)trigger_name
,
299 &trigger_port
) != KERN_SUCCESS
)
305 extern int uiomove64(addr64_t
, int, void *);
309 memory_object_control_uiomove(
310 memory_object_control_t control
,
311 memory_object_offset_t offset
,
324 vm_page_t page_run
[MAX_RUN
];
327 object
= memory_object_control_to_vm_object(control
);
328 if (object
== VM_OBJECT_NULL
) {
331 assert(!object
->internal
);
333 vm_object_lock(object
);
335 if (mark_dirty
&& object
->copy
!= VM_OBJECT_NULL
) {
337 * We can't modify the pages without honoring
338 * copy-on-write obligations first, so fall off
339 * this optimized path and fall back to the regular
342 vm_object_unlock(object
);
346 while (io_requested
&& retval
== 0) {
348 cur_needed
= (start_offset
+ io_requested
+ (PAGE_SIZE
- 1)) / PAGE_SIZE
;
350 if (cur_needed
> MAX_RUN
)
351 cur_needed
= MAX_RUN
;
353 for (cur_run
= 0; cur_run
< cur_needed
; ) {
355 if ((dst_page
= vm_page_lookup(object
, offset
)) == VM_PAGE_NULL
)
358 * Sync up on getting the busy bit
360 if ((dst_page
->busy
|| dst_page
->cleaning
)) {
362 * someone else is playing with the page... if we've
363 * already collected pages into this run, go ahead
364 * and process now, we can't block on this
365 * page while holding other pages in the BUSY state
366 * otherwise we will wait
370 PAGE_SLEEP(object
, dst_page
, THREAD_UNINT
);
374 * this routine is only called when copying
375 * to/from real files... no need to consider
376 * encrypted swap pages
378 assert(!dst_page
->encrypted
);
381 dst_page
->dirty
= TRUE
;
382 dst_page
->busy
= TRUE
;
384 page_run
[cur_run
++] = dst_page
;
386 offset
+= PAGE_SIZE_64
;
390 * we hit a 'hole' in the cache
391 * we bail at this point
392 * we'll unlock the object below
395 vm_object_unlock(object
);
397 for (i
= 0; i
< cur_run
; i
++) {
399 dst_page
= page_run
[i
];
401 if ((xsize
= PAGE_SIZE
- start_offset
) > io_requested
)
402 xsize
= io_requested
;
404 if ( (retval
= uiomove64((addr64_t
)(((addr64_t
)(dst_page
->phys_page
) << 12) + start_offset
), xsize
, uio
)) )
407 io_requested
-= xsize
;
410 vm_object_lock(object
);
412 for (i
= 0; i
< cur_run
; i
++) {
413 dst_page
= page_run
[i
];
415 PAGE_WAKEUP_DONE(dst_page
);
418 vm_object_unlock(object
);
428 vnode_pager_bootstrap(void)
430 register vm_size_t size
;
432 size
= (vm_size_t
) sizeof(struct vnode_pager
);
433 vnode_pager_zone
= zinit(size
, (vm_size_t
) MAX_VNODE
*size
,
434 PAGE_SIZE
, "vnode pager structures");
444 __unused memory_object_t pager
)
446 vnode_pager_t vnode_object
;
448 vnode_object
= vnode_object_create(vp
);
449 if (vnode_object
== VNODE_PAGER_NULL
)
450 panic("vnode_pager_setup: vnode_object_create() failed");
451 return((memory_object_t
)vnode_object
);
458 vnode_pager_init(memory_object_t mem_obj
,
459 memory_object_control_t control
,
465 vnode_pager_t vnode_object
;
467 memory_object_attr_info_data_t attributes
;
470 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_init: %p, %p, %x\n", mem_obj
, control
, pg_size
));
472 if (control
== MEMORY_OBJECT_CONTROL_NULL
)
473 return KERN_INVALID_ARGUMENT
;
475 vnode_object
= vnode_pager_lookup(mem_obj
);
477 memory_object_control_reference(control
);
479 vnode_object
->control_handle
= control
;
481 attributes
.copy_strategy
= MEMORY_OBJECT_COPY_DELAY
;
482 /* attributes.cluster_size = (1 << (CLUSTER_SHIFT + PAGE_SHIFT));*/
483 attributes
.cluster_size
= (1 << (PAGE_SHIFT
));
484 attributes
.may_cache_object
= TRUE
;
485 attributes
.temporary
= TRUE
;
487 kr
= memory_object_change_attributes(
489 MEMORY_OBJECT_ATTRIBUTE_INFO
,
490 (memory_object_info_t
) &attributes
,
491 MEMORY_OBJECT_ATTR_INFO_COUNT
);
492 if (kr
!= KERN_SUCCESS
)
493 panic("vnode_pager_init: memory_object_change_attributes() failed");
495 return(KERN_SUCCESS
);
502 vnode_pager_data_return(
503 memory_object_t mem_obj
,
504 memory_object_offset_t offset
,
506 memory_object_offset_t
*resid_offset
,
508 __unused boolean_t dirty
,
509 __unused boolean_t kernel_copy
,
512 register vnode_pager_t vnode_object
;
514 vnode_object
= vnode_pager_lookup(mem_obj
);
516 vnode_pager_cluster_write(vnode_object
, offset
, data_cnt
, resid_offset
, io_error
, upl_flags
);
522 vnode_pager_data_initialize(
523 __unused memory_object_t mem_obj
,
524 __unused memory_object_offset_t offset
,
525 __unused vm_size_t data_cnt
)
527 panic("vnode_pager_data_initialize");
532 vnode_pager_data_unlock(
533 __unused memory_object_t mem_obj
,
534 __unused memory_object_offset_t offset
,
535 __unused vm_size_t size
,
536 __unused vm_prot_t desired_access
)
542 vnode_pager_get_object_size(
543 memory_object_t mem_obj
,
544 memory_object_offset_t
*length
)
546 vnode_pager_t vnode_object
;
548 vnode_object
= vnode_pager_lookup(mem_obj
);
550 *length
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
558 vnode_pager_data_request(
559 memory_object_t mem_obj
,
560 memory_object_offset_t offset
,
565 vm_prot_t protection_required
)
567 register vnode_pager_t vnode_object
;
569 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_data_request: %x, %x, %x, %x\n", mem_obj
, offset
, length
, protection_required
));
571 vnode_object
= vnode_pager_lookup(mem_obj
);
573 PAGER_DEBUG(PAGER_PAGEIN
, ("vnode_pager_data_request: %x, %x, %x, %x, vnode_object %x\n", mem_obj
, offset
, length
, protection_required
, vnode_object
));
575 return vnode_pager_cluster_read(vnode_object
, offset
, length
);
582 vnode_pager_reference(
583 memory_object_t mem_obj
)
585 register vnode_pager_t vnode_object
;
586 unsigned int new_ref_count
;
588 vnode_object
= vnode_pager_lookup(mem_obj
);
589 new_ref_count
= hw_atomic_add(&vnode_object
->ref_count
, 1);
590 assert(new_ref_count
> 1);
597 vnode_pager_deallocate(
598 memory_object_t mem_obj
)
600 register vnode_pager_t vnode_object
;
602 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_deallocate: %x\n", mem_obj
));
604 vnode_object
= vnode_pager_lookup(mem_obj
);
606 if (hw_atomic_sub(&vnode_object
->ref_count
, 1) == 0) {
607 if (vnode_object
->vnode_handle
!= NULL
) {
608 vnode_pager_vrele(vnode_object
->vnode_handle
);
610 zfree(vnode_pager_zone
, vnode_object
);
619 vnode_pager_terminate(
623 memory_object_t mem_obj
)
625 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_terminate: %x\n", mem_obj
));
627 return(KERN_SUCCESS
);
634 vnode_pager_synchronize(
635 memory_object_t mem_obj
,
636 memory_object_offset_t offset
,
638 __unused vm_sync_t sync_flags
)
640 register vnode_pager_t vnode_object
;
642 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_synchronize: %x\n", mem_obj
));
644 vnode_object
= vnode_pager_lookup(mem_obj
);
646 memory_object_synchronize_completed(vnode_object
->control_handle
, offset
, length
);
648 return (KERN_SUCCESS
);
656 memory_object_t mem_obj
)
658 register vnode_pager_t vnode_object
;
660 PAGER_DEBUG(PAGER_ALL
, ("vnode_pager_unmap: %x\n", mem_obj
));
662 vnode_object
= vnode_pager_lookup(mem_obj
);
664 ubc_unmap(vnode_object
->vnode_handle
);
673 vnode_pager_cluster_write(
674 vnode_pager_t vnode_object
,
675 vm_object_offset_t offset
,
677 vm_object_offset_t
* resid_offset
,
686 if (upl_flags
& UPL_MSYNC
) {
688 upl_flags
|= UPL_VNODE_PAGER
;
690 if ( (upl_flags
& UPL_IOSYNC
) && io_error
)
691 upl_flags
|= UPL_KEEPCACHED
;
696 size
= (cnt
< (PAGE_SIZE
* MAX_UPL_TRANSFER
)) ? cnt
: (PAGE_SIZE
* MAX_UPL_TRANSFER
); /* effective max */
698 request_flags
= UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
| UPL_CLEAN_IN_PLACE
|
699 UPL_SET_INTERNAL
| UPL_SET_LITE
;
701 kr
= memory_object_upl_request(vnode_object
->control_handle
,
702 offset
, size
, &upl
, NULL
, NULL
, request_flags
);
703 if (kr
!= KERN_SUCCESS
)
704 panic("vnode_pager_cluster_write: upl request failed\n");
706 vnode_pageout(vnode_object
->vnode_handle
,
707 upl
, (vm_offset_t
)0, offset
, size
, upl_flags
, &errno
);
709 if ( (upl_flags
& UPL_KEEPCACHED
) ) {
710 if ( (*io_error
= errno
) )
717 *resid_offset
= offset
;
720 vm_object_offset_t vnode_size
;
721 vm_object_offset_t base_offset
;
723 vm_page_t target_page
;
727 * this is the pageout path
729 vnode_size
= vnode_pager_get_filesize(vnode_object
->vnode_handle
);
731 if (vnode_size
> (offset
+ PAGE_SIZE
)) {
733 * preset the maximum size of the cluster
734 * and put us on a nice cluster boundary...
735 * and then clip the size to insure we
736 * don't request past the end of the underlying file
738 size
= PAGE_SIZE
* MAX_UPL_TRANSFER
;
739 base_offset
= offset
& ~((signed)(size
- 1));
741 if ((base_offset
+ size
) > vnode_size
)
742 size
= round_page_32(((vm_size_t
)(vnode_size
- base_offset
)));
745 * we've been requested to page out a page beyond the current
746 * end of the 'file'... don't try to cluster in this case...
747 * we still need to send this page through because it might
748 * be marked precious and the underlying filesystem may need
749 * to do something with it (besides page it out)...
751 base_offset
= offset
;
754 object
= memory_object_control_to_vm_object(vnode_object
->control_handle
);
756 if (object
== VM_OBJECT_NULL
)
757 panic("vnode_pager_cluster_write: NULL vm_object in control handle\n");
759 request_flags
= UPL_NOBLOCK
| UPL_FOR_PAGEOUT
| UPL_CLEAN_IN_PLACE
|
760 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
761 UPL_SET_INTERNAL
| UPL_SET_LITE
;
763 vm_object_lock(object
);
765 if ((target_page
= vm_page_lookup(object
, offset
)) != VM_PAGE_NULL
) {
767 * only pick up pages whose ticket number matches
768 * the ticket number of the page orginally targeted
771 ticket
= target_page
->page_ticket
;
773 request_flags
|= ((ticket
<< UPL_PAGE_TICKET_SHIFT
) & UPL_PAGE_TICKET_MASK
);
775 vm_object_unlock(object
);
777 vm_object_upl_request(object
, base_offset
, size
,
778 &upl
, NULL
, NULL
, request_flags
);
780 panic("vnode_pager_cluster_write: upl request failed\n");
782 vnode_pageout(vnode_object
->vnode_handle
,
783 upl
, (vm_offset_t
)0, upl
->offset
, upl
->size
, UPL_VNODE_PAGER
, NULL
);
792 vnode_pager_cluster_read(
793 vnode_pager_t vnode_object
,
794 vm_object_offset_t offset
,
800 assert(! (cnt
& PAGE_MASK
));
802 kret
= vnode_pagein(vnode_object
->vnode_handle
,
810 if(kret == PAGER_ABSENT) {
811 Need to work out the defs here, 1 corresponds to PAGER_ABSENT
812 defined in bsd/vm/vm_pager.h However, we should not be including
813 that file here it is a layering violation.
821 uplflags
= (UPL_NO_SYNC
|
825 kr
= memory_object_upl_request(vnode_object
->control_handle
,
827 &upl
, NULL
, &count
, uplflags
);
828 if (kr
== KERN_SUCCESS
) {
833 * We couldn't gather the page list, probably
834 * because the memory object doesn't have a link
835 * to a VM object anymore (forced unmount, for
836 * example). Just return an error to the vm_fault()
837 * path and let it handle it.
853 vnode_pager_release_from_cache(
856 memory_object_free_from_cache(
857 &realhost
, &vnode_pager_workaround
, cnt
);
867 register vnode_pager_t vnode_object
;
869 vnode_object
= (struct vnode_pager
*) zalloc(vnode_pager_zone
);
870 if (vnode_object
== VNODE_PAGER_NULL
)
871 return(VNODE_PAGER_NULL
);
874 * The vm_map call takes both named entry ports and raw memory
875 * objects in the same parameter. We need to make sure that
876 * vm_map does not see this object as a named entry port. So,
877 * we reserve the second word in the object for a fake ip_kotype
878 * setting - that will tell vm_map to use it as a memory object.
880 vnode_object
->pager
= &vnode_pager_workaround
;
881 vnode_object
->pager_ikot
= IKOT_MEMORY_OBJECT
;
882 vnode_object
->ref_count
= 1;
883 vnode_object
->control_handle
= MEMORY_OBJECT_CONTROL_NULL
;
884 vnode_object
->vnode_handle
= vp
;
886 return(vnode_object
);
894 memory_object_t name
)
896 vnode_pager_t vnode_object
;
898 vnode_object
= (vnode_pager_t
)name
;
899 assert(vnode_object
->pager
== &vnode_pager_workaround
);
900 return (vnode_object
);