2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * Paging File Management.
62 #include <mach/host_priv.h>
63 #include <mach/memory_object_control.h>
64 #include <mach/memory_object_server.h>
66 #include <default_pager/default_pager_internal.h>
67 #include <default_pager/default_pager_alerts.h>
68 #include <default_pager/default_pager_object_server.h>
70 #include <ipc/ipc_types.h>
71 #include <ipc/ipc_port.h>
72 #include <ipc/ipc_space.h>
74 #include <kern/kern_types.h>
75 #include <kern/host.h>
76 #include <kern/queue.h>
77 #include <kern/counters.h>
78 #include <kern/sched_prim.h>
80 #include <vm/vm_kern.h>
81 #include <vm/vm_pageout.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_object.h>
84 #include <vm/vm_protos.h>
87 /* todo - need large internal object support */
90 * ALLOC_STRIDE... the maximum number of bytes allocated from
91 * a swap file before moving on to the next swap file... if
92 * all swap files reside on a single disk, this value should
93 * be very large (this is the default assumption)... if the
94 * swap files are spread across multiple disks, than this value
95 * should be small (128 * 1024)...
97 * This should be determined dynamically in the future
100 #define ALLOC_STRIDE (1024 * 1024 * 1024)
101 int physical_transfer_cluster_count
= 0;
103 #define VM_SUPER_CLUSTER 0x40000
104 #define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_SIZE)
107 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
108 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
110 #define VSTRUCT_MIN_CLSHIFT 0
112 #define VSTRUCT_DEF_CLSHIFT 2
113 int default_pager_clsize
= 0;
115 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
118 unsigned int clustered_writes
[VM_SUPER_PAGES
+1];
119 unsigned int clustered_reads
[VM_SUPER_PAGES
+1];
122 * Globals used for asynchronous paging operations:
123 * vs_async_list: head of list of to-be-completed I/O ops
124 * async_num_queued: number of pages completed, but not yet
125 * processed by async thread.
126 * async_requests_out: number of pages of requests not completed.
130 struct vs_async
*vs_async_list
;
131 int async_num_queued
;
132 int async_requests_out
;
136 #define VS_ASYNC_REUSE 1
137 struct vs_async
*vs_async_free_list
;
139 lck_mtx_t default_pager_async_lock
; /* Protects globals above */
142 int vs_alloc_async_failed
= 0; /* statistics */
143 int vs_alloc_async_count
= 0; /* statistics */
144 struct vs_async
*vs_alloc_async(void); /* forward */
145 void vs_free_async(struct vs_async
*vsa
); /* forward */
148 #define VS_ALLOC_ASYNC() vs_alloc_async()
149 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
151 #define VS_ASYNC_LOCK() lck_mtx_lock(&default_pager_async_lock)
152 #define VS_ASYNC_UNLOCK() lck_mtx_unlock(&default_pager_async_lock)
153 #define VS_ASYNC_LOCK_INIT() lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr)
154 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
156 * Paging Space Hysteresis triggers and the target notification port
159 unsigned int dp_pages_free_drift_count
= 0;
160 unsigned int dp_pages_free_drifted_max
= 0;
161 unsigned int minimum_pages_remaining
= 0;
162 unsigned int maximum_pages_free
= 0;
163 ipc_port_t min_pages_trigger_port
= NULL
;
164 ipc_port_t max_pages_trigger_port
= NULL
;
166 boolean_t use_emergency_swap_file_first
= FALSE
;
167 boolean_t bs_low
= FALSE
;
168 int backing_store_release_trigger_disable
= 0;
169 boolean_t backing_store_stop_compaction
= FALSE
;
172 /* Have we decided if swap needs to be encrypted yet ? */
173 boolean_t dp_encryption_inited
= FALSE
;
174 /* Should we encrypt swap ? */
175 boolean_t dp_encryption
= FALSE
;
177 boolean_t dp_isssd
= FALSE
;
181 * Object sizes are rounded up to the next power of 2,
182 * unless they are bigger than a given maximum size.
184 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
187 * List of all backing store and segments.
189 MACH_PORT_FACE emergency_segment_backing_store
;
190 struct backing_store_list_head backing_store_list
;
191 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
192 lck_mtx_t paging_segments_lock
;
193 int paging_segment_max
= 0;
194 int paging_segment_count
= 0;
195 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
199 * Total pages free in system
200 * This differs from clusters committed/avail which is a measure of the
201 * over commitment of paging segments to backing store. An idea which is
202 * likely to be deprecated.
204 unsigned int dp_pages_free
= 0;
205 unsigned int dp_pages_reserve
= 0;
206 unsigned int cluster_transfer_minimum
= 100;
208 /* forward declarations */
209 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, upl_offset_t
, dp_offset_t
, unsigned int, int); /* forward */
210 kern_return_t
ps_read_file (paging_segment_t
, upl_t
, upl_offset_t
, dp_offset_t
, unsigned int, unsigned int *, int); /* forward */
211 default_pager_thread_t
*get_read_buffer( void );
212 kern_return_t
ps_vstruct_transfer_from_segment(
214 paging_segment_t segment
,
216 kern_return_t
ps_read_device(paging_segment_t
, dp_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
217 kern_return_t
ps_write_device(paging_segment_t
, dp_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
218 kern_return_t
vs_cluster_transfer(
223 vs_map_t
vs_get_map_entry(
228 default_pager_backing_store_delete_internal( MACH_PORT_FACE
);
230 default_pager_thread_t
*
231 get_read_buffer( void )
237 for (i
=0; i
<default_pager_internal_count
; i
++) {
238 if(dpt_array
[i
]->checked_out
== FALSE
) {
239 dpt_array
[i
]->checked_out
= TRUE
;
240 DPT_UNLOCK(dpt_lock
);
244 DPT_SLEEP(dpt_lock
, &dpt_array
, THREAD_UNINT
);
254 * List of all backing store.
257 queue_init(&backing_store_list
.bsl_queue
);
260 VS_ASYNC_LOCK_INIT();
262 vs_async_free_list
= NULL
;
263 #endif /* VS_ASYNC_REUSE */
265 for (i
= 0; i
< VM_SUPER_PAGES
+ 1; i
++) {
266 clustered_writes
[i
] = 0;
267 clustered_reads
[i
] = 0;
273 * When things do not quite workout...
275 void bs_no_paging_space(boolean_t
); /* forward */
279 boolean_t out_of_memory
)
283 dprintf(("*** OUT OF MEMORY ***\n"));
284 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
287 void bs_more_space(int); /* forward */
288 void bs_commit(int); /* forward */
290 boolean_t user_warned
= FALSE
;
291 unsigned int clusters_committed
= 0;
292 unsigned int clusters_available
= 0;
293 unsigned int clusters_committed_peak
= 0;
301 * Account for new paging space.
303 clusters_available
+= nclusters
;
305 if (clusters_available
>= clusters_committed
) {
306 if (verbose
&& user_warned
) {
307 printf("%s%s - %d excess clusters now.\n",
309 "paging space is OK now",
310 clusters_available
- clusters_committed
);
312 clusters_committed_peak
= 0;
315 if (verbose
&& user_warned
) {
316 printf("%s%s - still short of %d clusters.\n",
318 "WARNING: paging space over-committed",
319 clusters_committed
- clusters_available
);
320 clusters_committed_peak
-= nclusters
;
333 clusters_committed
+= nclusters
;
334 if (clusters_committed
> clusters_available
) {
335 if (verbose
&& !user_warned
) {
337 printf("%s%s - short of %d clusters.\n",
339 "WARNING: paging space over-committed",
340 clusters_committed
- clusters_available
);
342 if (clusters_committed
> clusters_committed_peak
) {
343 clusters_committed_peak
= clusters_committed
;
346 if (verbose
&& user_warned
) {
347 printf("%s%s - was short of up to %d clusters.\n",
349 "paging space is OK now",
350 clusters_committed_peak
- clusters_available
);
352 clusters_committed_peak
= 0;
360 int default_pager_info_verbose
= 1;
367 uint64_t pages_total
, pages_free
;
372 pages_total
= pages_free
= 0;
373 for (i
= 0; i
<= paging_segment_max
; i
++) {
374 ps
= paging_segments
[i
];
375 if (ps
== PAGING_SEGMENT_NULL
)
379 * no need to lock: by the time this data
380 * gets back to any remote requestor it
381 * will be obsolete anyways
383 pages_total
+= ps
->ps_pgnum
;
384 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
385 DP_DEBUG(DEBUG_BS_INTERNAL
,
386 ("segment #%d: %d total, %d free\n",
387 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
389 *totalp
= pages_total
;
391 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
392 if (clusters_available
< clusters_committed
) {
393 printf("%s %d clusters committed, %d available.\n",
402 backing_store_t
backing_store_alloc(void); /* forward */
405 backing_store_alloc(void)
409 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
410 if (bs
== BACKING_STORE_NULL
)
411 panic("backing_store_alloc: no memory");
414 bs
->bs_port
= MACH_PORT_NULL
;
417 bs
->bs_pages_total
= 0;
419 bs
->bs_pages_in_fail
= 0;
420 bs
->bs_pages_out
= 0;
421 bs
->bs_pages_out_fail
= 0;
426 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
428 /* Even in both the component space and external versions of this pager, */
429 /* backing_store_lookup will be called from tasks in the application space */
431 backing_store_lookup(
437 port is currently backed with a vs structure in the alias field
438 we could create an ISBS alias and a port_is_bs call but frankly
439 I see no reason for the test, the bs->port == port check below
440 will work properly on junk entries.
442 if ((port == MACH_PORT_NULL) || port_is_vs(port))
444 if ((port
== MACH_PORT_NULL
))
445 return BACKING_STORE_NULL
;
448 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
451 if (bs
->bs_port
== port
) {
453 /* Success, return it locked. */
459 return BACKING_STORE_NULL
;
462 void backing_store_add(backing_store_t
); /* forward */
466 __unused backing_store_t bs
)
468 // MACH_PORT_FACE port = bs->bs_port;
469 // MACH_PORT_FACE pset = default_pager_default_set;
470 kern_return_t kr
= KERN_SUCCESS
;
472 if (kr
!= KERN_SUCCESS
)
473 panic("backing_store_add: add to set");
478 * Set up default page shift, but only if not already
479 * set and argument is within range.
482 bs_set_default_clsize(unsigned int npages
)
489 if (default_pager_clsize
== 0) /* if not yet set */
490 vstruct_def_clshift
= local_log2(npages
);
496 int bs_get_global_clsize(int clsize
); /* forward */
499 bs_get_global_clsize(
503 memory_object_default_t dmm
;
507 * Only allow setting of cluster size once. If called
508 * with no cluster size (default), we use the compiled-in default
509 * for the duration. The same cluster size is used for all
512 if (default_pager_clsize
== 0) {
514 * Keep cluster size in bit shift because it's quicker
515 * arithmetic, and easier to keep at a power of 2.
517 if (clsize
!= NO_CLSIZE
) {
518 for (i
= 0; (1 << i
) < clsize
; i
++);
519 if (i
> MAX_CLUSTER_SHIFT
)
520 i
= MAX_CLUSTER_SHIFT
;
521 vstruct_def_clshift
= i
;
523 default_pager_clsize
= (1 << vstruct_def_clshift
);
526 * Let the user know the new (and definitive) cluster size.
529 printf("%scluster size = %d page%s\n",
530 my_name
, default_pager_clsize
,
531 (default_pager_clsize
== 1) ? "" : "s");
534 * Let the kernel know too, in case it hasn't used the
535 * default value provided in main() yet.
537 dmm
= default_pager_object
;
538 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
539 kr
= host_default_memory_manager(host_priv_self(),
542 memory_object_default_deallocate(dmm
);
544 if (kr
!= KERN_SUCCESS
) {
545 panic("bs_get_global_cl_size:host_default_memory_manager");
547 if (dmm
!= default_pager_object
) {
548 panic("bs_get_global_cl_size:there is another default pager");
551 ASSERT(default_pager_clsize
> 0 &&
552 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
554 return default_pager_clsize
;
558 default_pager_backing_store_create(
559 memory_object_default_t pager
,
561 int clsize
, /* in bytes */
562 MACH_PORT_FACE
*backing_store
)
567 struct vstruct_alias
*alias_struct
;
569 if (pager
!= default_pager_object
)
570 return KERN_INVALID_ARGUMENT
;
572 bs
= backing_store_alloc();
573 port
= ipc_port_alloc_kernel();
574 ipc_port_make_send(port
);
575 assert (port
!= IP_NULL
);
577 DP_DEBUG(DEBUG_BS_EXTERNAL
,
578 ("priority=%d clsize=%d bs_port=0x%x\n",
579 priority
, clsize
, (int) backing_store
));
581 alias_struct
= (struct vstruct_alias
*)
582 kalloc(sizeof (struct vstruct_alias
));
583 if(alias_struct
!= NULL
) {
584 alias_struct
->vs
= (struct vstruct
*)bs
;
585 alias_struct
->name
= &default_pager_ops
;
586 port
->alias
= (uintptr_t) alias_struct
;
589 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
590 kfree(bs
, sizeof (struct backing_store
));
591 return KERN_RESOURCE_SHORTAGE
;
595 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
596 priority
= BS_MAXPRI
;
597 else if (priority
== BS_NOPRI
)
598 priority
= BS_MAXPRI
;
600 priority
= BS_MINPRI
;
601 bs
->bs_priority
= priority
;
603 bs
->bs_clsize
= bs_get_global_clsize(atop_32(clsize
));
606 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
610 backing_store_add(bs
);
612 *backing_store
= port
;
617 default_pager_backing_store_info(
618 MACH_PORT_FACE backing_store
,
619 backing_store_flavor_t flavour
,
620 backing_store_info_t info
,
621 mach_msg_type_number_t
*size
)
624 backing_store_basic_info_t basic
;
628 if (flavour
!= BACKING_STORE_BASIC_INFO
||
629 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
630 return KERN_INVALID_ARGUMENT
;
632 basic
= (backing_store_basic_info_t
)info
;
633 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
635 VSTATS_LOCK(&global_stats
.gs_lock
);
636 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
637 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
638 basic
->pages_in
= global_stats
.gs_pages_in
;
639 basic
->pages_out
= global_stats
.gs_pages_out
;
640 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
641 basic
->pages_init
= global_stats
.gs_pages_init
;
642 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
643 VSTATS_UNLOCK(&global_stats
.gs_lock
);
645 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
646 return KERN_INVALID_ARGUMENT
;
648 basic
->bs_pages_total
= bs
->bs_pages_total
;
650 bs
->bs_pages_free
= 0;
651 for (i
= 0; i
<= paging_segment_max
; i
++) {
652 ps
= paging_segments
[i
];
653 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
655 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
660 basic
->bs_pages_free
= bs
->bs_pages_free
;
661 basic
->bs_pages_in
= bs
->bs_pages_in
;
662 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
663 basic
->bs_pages_out
= bs
->bs_pages_out
;
664 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
666 basic
->bs_priority
= bs
->bs_priority
;
667 basic
->bs_clsize
= ptoa_32(bs
->bs_clsize
); /* in bytes */
674 int ps_delete(paging_segment_t
); /* forward */
675 boolean_t
current_thread_aborted(void);
682 kern_return_t error
= KERN_SUCCESS
;
685 VSL_LOCK(); /* get the lock on the list of vs's */
687 /* The lock relationship and sequence is farily complicated */
688 /* this code looks at a live list, locking and unlocking the list */
689 /* as it traverses it. It depends on the locking behavior of */
690 /* default_pager_no_senders. no_senders always locks the vstruct */
691 /* targeted for removal before locking the vstruct list. However */
692 /* it will remove that member of the list without locking its */
693 /* neighbors. We can be sure when we hold a lock on a vstruct */
694 /* it cannot be removed from the list but we must hold the list */
695 /* lock to be sure that its pointers to its neighbors are valid. */
696 /* Also, we can hold off destruction of a vstruct when the list */
697 /* lock and the vs locks are not being held by bumping the */
698 /* vs_async_pending count. */
701 while(backing_store_release_trigger_disable
!= 0) {
702 VSL_SLEEP(&backing_store_release_trigger_disable
, THREAD_UNINT
);
705 /* we will choose instead to hold a send right */
706 vs_count
= vstruct_list
.vsl_count
;
707 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
708 if(vs
== (vstruct_t
)&vstruct_list
) {
713 vs_async_wait(vs
); /* wait for any pending async writes */
714 if ((vs_count
!= 0) && (vs
!= NULL
))
715 vs
->vs_async_pending
+= 1; /* hold parties calling */
719 while((vs_count
!= 0) && (vs
!= NULL
)) {
720 /* We take the count of AMO's before beginning the */
721 /* transfer of of the target segment. */
722 /* We are guaranteed that the target segment cannot get */
723 /* more users. We also know that queue entries are */
724 /* made at the back of the list. If some of the entries */
725 /* we would check disappear while we are traversing the */
726 /* list then we will either check new entries which */
727 /* do not have any backing store in the target segment */
728 /* or re-check old entries. This might not be optimal */
729 /* but it will always be correct. The alternative is to */
730 /* take a snapshot of the list. */
733 if(dp_pages_free
< cluster_transfer_minimum
)
734 error
= KERN_FAILURE
;
736 vm_object_t transfer_object
;
740 transfer_object
= vm_object_allocate((vm_object_size_t
)VM_SUPER_CLUSTER
);
742 error
= vm_object_upl_request(transfer_object
,
743 (vm_object_offset_t
)0, VM_SUPER_CLUSTER
,
745 UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_LITE
| UPL_SET_INTERNAL
);
747 if(error
== KERN_SUCCESS
) {
748 error
= ps_vstruct_transfer_from_segment(
750 upl_commit(upl
, NULL
, 0);
753 error
= KERN_FAILURE
;
755 vm_object_deallocate(transfer_object
);
757 if(error
|| current_thread_aborted() || backing_store_stop_compaction
) {
759 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
760 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
761 vs
->vs_waiting_async
= FALSE
;
763 thread_wakeup(&vs
->vs_async_pending
);
772 while(backing_store_release_trigger_disable
!= 0) {
773 VSL_SLEEP(&backing_store_release_trigger_disable
,
777 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
778 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
779 (vs
!= next_vs
) && (vs_count
!= 1)) {
781 vs_async_wait(next_vs
); /* wait for any */
782 /* pending async writes */
783 next_vs
->vs_async_pending
+= 1; /* hold parties */
784 /* calling vs_async_wait */
789 vs
->vs_async_pending
-= 1;
790 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
791 vs
->vs_waiting_async
= FALSE
;
793 thread_wakeup(&vs
->vs_async_pending
);
797 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
808 default_pager_backing_store_delete_internal(
809 MACH_PORT_FACE backing_store
)
815 int interim_pages_removed
= 0;
816 boolean_t dealing_with_emergency_segment
= ( backing_store
== emergency_segment_backing_store
);
818 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
819 return KERN_INVALID_ARGUMENT
;
823 error
= KERN_SUCCESS
;
824 for (i
= 0; i
<= paging_segment_max
; i
++) {
825 ps
= paging_segments
[i
];
826 if (ps
!= PAGING_SEGMENT_NULL
&&
828 ! IS_PS_GOING_AWAY(ps
)) {
831 if( IS_PS_GOING_AWAY(ps
) || !IS_PS_OK_TO_USE(ps
)) {
833 * Someone is already busy reclamining this paging segment.
834 * If it's the emergency segment we are looking at then check
835 * that someone has not already recovered it and set the right
836 * state i.e. online but not activated.
842 /* disable access to this segment */
843 ps
->ps_state
&= ~PS_CAN_USE
;
844 ps
->ps_state
|= PS_GOING_AWAY
;
847 * The "ps" segment is "off-line" now,
848 * we can try and delete it...
850 if(dp_pages_free
< (cluster_transfer_minimum
852 error
= KERN_FAILURE
;
856 /* remove all pages associated with the */
857 /* segment from the list of free pages */
858 /* when transfer is through, all target */
859 /* segment pages will appear to be free */
861 dp_pages_free
-= ps
->ps_pgcount
;
862 interim_pages_removed
+= ps
->ps_pgcount
;
864 error
= ps_delete(ps
);
866 if (error
!= KERN_SUCCESS
) {
868 * We couldn't delete the segment,
869 * probably because there's not enough
870 * virtual memory left.
871 * Re-enable all the segments.
880 if (error
!= KERN_SUCCESS
) {
881 for (i
= 0; i
<= paging_segment_max
; i
++) {
882 ps
= paging_segments
[i
];
883 if (ps
!= PAGING_SEGMENT_NULL
&&
885 IS_PS_GOING_AWAY(ps
)) {
888 if( !IS_PS_GOING_AWAY(ps
)) {
892 /* Handle the special clusters that came in while we let go the lock*/
893 if( ps
->ps_special_clusters
) {
894 dp_pages_free
+= ps
->ps_special_clusters
<< ps
->ps_clshift
;
895 ps
->ps_pgcount
+= ps
->ps_special_clusters
<< ps
->ps_clshift
;
896 ps
->ps_clcount
+= ps
->ps_special_clusters
;
897 if ( ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
) {
898 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
900 ps
->ps_special_clusters
= 0;
902 /* re-enable access to this segment */
903 ps
->ps_state
&= ~PS_GOING_AWAY
;
904 ps
->ps_state
|= PS_CAN_USE
;
908 dp_pages_free
+= interim_pages_removed
;
914 for (i
= 0; i
<= paging_segment_max
; i
++) {
915 ps
= paging_segments
[i
];
916 if (ps
!= PAGING_SEGMENT_NULL
&&
918 if(IS_PS_GOING_AWAY(ps
)) {
919 if(IS_PS_EMERGENCY_SEGMENT(ps
)) {
921 ps
->ps_state
&= ~PS_GOING_AWAY
;
922 ps
->ps_special_clusters
= 0;
923 ps
->ps_pgcount
= ps
->ps_pgnum
;
924 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
925 dp_pages_reserve
+= ps
->ps_pgcount
;
928 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
929 paging_segment_count
--;
931 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
932 kfree(ps
, sizeof *ps
);
938 /* Scan the entire ps array separately to make certain we find the */
939 /* proper paging_segment_max */
940 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
941 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
942 paging_segment_max
= i
;
947 if( dealing_with_emergency_segment
) {
953 * All the segments have been deleted.
954 * We can remove the backing store.
958 * Disable lookups of this backing store.
960 if((void *)bs
->bs_port
->alias
!= NULL
)
961 kfree((void *) bs
->bs_port
->alias
,
962 sizeof (struct vstruct_alias
));
963 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
964 bs
->bs_port
= MACH_PORT_NULL
;
968 * Remove backing store from backing_store list.
971 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
976 * Free the backing store structure.
978 kfree(bs
, sizeof *bs
);
984 default_pager_backing_store_delete(
985 MACH_PORT_FACE backing_store
)
987 if( backing_store
!= emergency_segment_backing_store
) {
988 default_pager_backing_store_delete_internal(emergency_segment_backing_store
);
990 return(default_pager_backing_store_delete_internal(backing_store
));
993 int ps_enter(paging_segment_t
); /* forward */
1003 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
1004 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
1008 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
1009 paging_segments
[i
] = ps
;
1010 if (i
> paging_segment_max
)
1011 paging_segment_max
= i
;
1012 paging_segment_count
++;
1013 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
1014 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
1015 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1019 return KERN_RESOURCE_SHORTAGE
;
1026 #ifdef DEVICE_PAGING
1028 default_pager_add_segment(
1029 MACH_PORT_FACE backing_store
,
1030 MACH_PORT_FACE device
,
1036 paging_segment_t ps
;
1040 if ((bs
= backing_store_lookup(backing_store
))
1041 == BACKING_STORE_NULL
)
1042 return KERN_INVALID_ARGUMENT
;
1045 for (i
= 0; i
<= paging_segment_max
; i
++) {
1046 ps
= paging_segments
[i
];
1047 if (ps
== PAGING_SEGMENT_NULL
)
1051 * Check for overlap on same device.
1053 if (!(ps
->ps_device
!= device
1054 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
1055 || offset
+ count
<= ps
->ps_offset
)) {
1058 return KERN_INVALID_ARGUMENT
;
1064 * Set up the paging segment
1066 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
1067 if (ps
== PAGING_SEGMENT_NULL
) {
1069 return KERN_RESOURCE_SHORTAGE
;
1072 ps
->ps_segtype
= PS_PARTITION
;
1073 ps
->ps_device
= device
;
1074 ps
->ps_offset
= offset
;
1075 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
1076 ps
->ps_recnum
= count
;
1077 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
1079 ps
->ps_pgcount
= ps
->ps_pgnum
;
1080 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
1081 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
1085 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
1087 kfree(ps
, sizeof *ps
);
1089 return KERN_RESOURCE_SHORTAGE
;
1091 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
1092 clrbit(ps
->ps_bmap
, i
);
1095 if(paging_segment_count
== 0) {
1096 ps
->ps_state
= PS_EMERGENCY_SEGMENT
;
1097 if(use_emergency_swap_file_first
) {
1098 ps
->ps_state
|= PS_CAN_USE
;
1101 ps
->ps_state
= PS_CAN_USE
;
1106 if ((error
= ps_enter(ps
)) != 0) {
1107 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
1108 kfree(ps
, sizeof *ps
);
1110 return KERN_RESOURCE_SHORTAGE
;
1113 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1114 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1118 if(IS_PS_OK_TO_USE(ps
)) {
1119 dp_pages_free
+= ps
->ps_pgcount
;
1121 dp_pages_reserve
+= ps
->ps_pgcount
;
1125 bs_more_space(ps
->ps_clcount
);
1127 DP_DEBUG(DEBUG_BS_INTERNAL
,
1128 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1129 device
, offset
, count
, record_size
,
1130 ps
->ps_record_shift
, ps
->ps_pgnum
));
1132 return KERN_SUCCESS
;
1138 MACH_PORT_FACE master
)
1140 security_token_t null_security_token
= {
1143 MACH_PORT_FACE device
;
1144 int info
[DEV_GET_SIZE_COUNT
];
1145 mach_msg_type_number_t info_count
;
1146 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1147 unsigned int rec_size
;
1150 MACH_PORT_FACE reply_port
;
1152 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1153 null_security_token
, dev_name
, &device
))
1156 info_count
= DEV_GET_SIZE_COUNT
;
1157 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1158 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1159 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1160 clsize
= bs_get_global_clsize(0);
1161 if (!default_pager_backing_store_create(
1162 default_pager_object
,
1163 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1164 (clsize
* vm_page_size
),
1166 if (!default_pager_add_segment(bs
, device
,
1167 0, count
, rec_size
)) {
1170 ipc_port_release_receive(bs
);
1174 ipc_port_release_send(device
);
1177 #endif /* DEVICE_PAGING */
1182 vs_alloc_async(void)
1184 struct vs_async
*vsa
;
1185 MACH_PORT_FACE reply_port
;
1186 // kern_return_t kr;
1189 if (vs_async_free_list
== NULL
) {
1191 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1194 * Try allocating a reply port named after the
1195 * address of the vs_async structure.
1197 struct vstruct_alias
*alias_struct
;
1199 reply_port
= ipc_port_alloc_kernel();
1200 alias_struct
= (struct vstruct_alias
*)
1201 kalloc(sizeof (struct vstruct_alias
));
1202 if(alias_struct
!= NULL
) {
1203 alias_struct
->vs
= (struct vstruct
*)vsa
;
1204 alias_struct
->name
= &default_pager_ops
;
1205 reply_port
->alias
= (uintptr_t) alias_struct
;
1206 vsa
->reply_port
= reply_port
;
1207 vs_alloc_async_count
++;
1210 vs_alloc_async_failed
++;
1211 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1213 kfree(vsa
, sizeof (struct vs_async
));
1218 vsa
= vs_async_free_list
;
1219 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1228 struct vs_async
*vsa
)
1231 vsa
->vsa_next
= vs_async_free_list
;
1232 vs_async_free_list
= vsa
;
1236 #else /* VS_ASYNC_REUSE */
1239 vs_alloc_async(void)
1241 struct vs_async
*vsa
;
1242 MACH_PORT_FACE reply_port
;
1245 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1248 * Try allocating a reply port named after the
1249 * address of the vs_async structure.
1251 reply_port
= ipc_port_alloc_kernel();
1252 alias_struct
= (vstruct_alias
*)
1253 kalloc(sizeof (struct vstruct_alias
));
1254 if(alias_struct
!= NULL
) {
1255 alias_struct
->vs
= reply_port
;
1256 alias_struct
->name
= &default_pager_ops
;
1257 reply_port
->alias
= (int) vsa
;
1258 vsa
->reply_port
= reply_port
;
1259 vs_alloc_async_count
++;
1262 vs_alloc_async_failed
++;
1263 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1265 kfree(vsa
, sizeof (struct vs_async
));
1275 struct vs_async
*vsa
)
1277 MACH_PORT_FACE reply_port
;
1280 reply_port
= vsa
->reply_port
;
1281 kfree(reply_port
->alias
, sizeof (struct vstuct_alias
));
1282 kfree(vsa
, sizeof (struct vs_async
));
1283 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1286 vs_alloc_async_count
--;
1291 #endif /* VS_ASYNC_REUSE */
1293 zone_t vstruct_zone
;
1302 vs
= (vstruct_t
) zalloc(vstruct_zone
);
1303 if (vs
== VSTRUCT_NULL
) {
1304 return VSTRUCT_NULL
;
1310 * The following fields will be provided later.
1312 vs
->vs_pager_ops
= NULL
;
1313 vs
->vs_control
= MEMORY_OBJECT_CONTROL_NULL
;
1314 vs
->vs_references
= 1;
1317 vs
->vs_waiting_seqno
= FALSE
;
1318 vs
->vs_waiting_read
= FALSE
;
1319 vs
->vs_waiting_write
= FALSE
;
1320 vs
->vs_waiting_async
= FALSE
;
1327 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1328 vs
->vs_size
= ((atop_32(round_page_32(size
)) - 1) >> vs
->vs_clshift
) + 1;
1329 vs
->vs_async_pending
= 0;
1332 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1333 * depending on the size of the memory object.
1335 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1336 vs
->vs_imap
= (struct vs_map
**)
1337 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1338 vs
->vs_indirect
= TRUE
;
1340 vs
->vs_dmap
= (struct vs_map
*)
1341 kalloc(CLMAP_SIZE(vs
->vs_size
));
1342 vs
->vs_indirect
= FALSE
;
1344 vs
->vs_xfer_pending
= FALSE
;
1345 DP_DEBUG(DEBUG_VS_INTERNAL
,
1346 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1349 * Check to see that we got the space.
1352 kfree(vs
, sizeof *vs
);
1353 return VSTRUCT_NULL
;
1357 * Zero the indirect pointers, or clear the direct pointers.
1359 if (vs
->vs_indirect
)
1360 memset(vs
->vs_imap
, 0,
1361 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1363 for (i
= 0; i
< vs
->vs_size
; i
++)
1364 VSM_CLR(vs
->vs_dmap
[i
]);
1366 VS_MAP_LOCK_INIT(vs
);
1368 bs_commit(vs
->vs_size
);
1373 paging_segment_t
ps_select_segment(unsigned int, int *); /* forward */
1380 paging_segment_t ps
;
1385 * Optimize case where there's only one segment.
1386 * paging_segment_max will index the one and only segment.
1390 if (paging_segment_count
== 1) {
1391 paging_segment_t lps
= PAGING_SEGMENT_NULL
; /* used to avoid extra PS_UNLOCK */
1392 ipc_port_t trigger
= IP_NULL
;
1394 ps
= paging_segments
[paging_segment_max
];
1395 *psindex
= paging_segment_max
;
1397 if( !IS_PS_EMERGENCY_SEGMENT(ps
) ) {
1398 panic("Emergency paging segment missing\n");
1400 ASSERT(ps
->ps_clshift
>= shift
);
1401 if(IS_PS_OK_TO_USE(ps
)) {
1402 if (ps
->ps_clcount
) {
1404 dp_pages_free
-= 1 << ps
->ps_clshift
;
1405 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1406 if(min_pages_trigger_port
&&
1407 (dp_pages_free
< minimum_pages_remaining
)) {
1408 trigger
= min_pages_trigger_port
;
1409 min_pages_trigger_port
= NULL
;
1417 if( lps
== PAGING_SEGMENT_NULL
) {
1419 dp_pages_free_drift_count
++;
1420 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1421 dp_pages_free_drifted_max
= dp_pages_free
;
1423 dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free
));
1430 if (trigger
!= IP_NULL
) {
1431 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1432 ipc_port_release_send(trigger
);
1437 if (paging_segment_count
== 0) {
1439 dp_pages_free_drift_count
++;
1440 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1441 dp_pages_free_drifted_max
= dp_pages_free
;
1443 dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free
));
1447 return PAGING_SEGMENT_NULL
;
1451 i
>= BS_MINPRI
; i
--) {
1454 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1455 (ps_select_array
[i
] == BS_FULLPRI
))
1457 start_index
= ps_select_array
[i
];
1459 if(!(paging_segments
[start_index
])) {
1461 physical_transfer_cluster_count
= 0;
1463 else if ((physical_transfer_cluster_count
+1) == (ALLOC_STRIDE
>>
1464 (((paging_segments
[start_index
])->ps_clshift
)
1465 + vm_page_shift
))) {
1466 physical_transfer_cluster_count
= 0;
1467 j
= start_index
+ 1;
1469 physical_transfer_cluster_count
+=1;
1471 if(start_index
== 0)
1472 start_index
= paging_segment_max
;
1474 start_index
= start_index
- 1;
1478 if (j
> paging_segment_max
)
1480 if ((ps
= paging_segments
[j
]) &&
1481 (ps
->ps_bs
->bs_priority
== i
)) {
1483 * Force the ps cluster size to be
1484 * >= that of the vstruct.
1487 if (IS_PS_OK_TO_USE(ps
)) {
1488 if ((ps
->ps_clcount
) &&
1489 (ps
->ps_clshift
>= shift
)) {
1490 ipc_port_t trigger
= IP_NULL
;
1493 dp_pages_free
-= 1 << ps
->ps_clshift
;
1494 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1495 if(min_pages_trigger_port
&&
1497 minimum_pages_remaining
)) {
1498 trigger
= min_pages_trigger_port
;
1499 min_pages_trigger_port
= NULL
;
1503 * found one, quit looking.
1505 ps_select_array
[i
] = j
;
1508 if (trigger
!= IP_NULL
) {
1509 default_pager_space_alert(
1512 ipc_port_release_send(trigger
);
1520 if (j
== start_index
) {
1522 * none at this priority -- mark it full
1524 ps_select_array
[i
] = BS_FULLPRI
;
1532 dp_pages_free_drift_count
++;
1533 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1534 dp_pages_free_drifted_max
= dp_pages_free
;
1536 dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count
,dp_pages_free
));
1540 return PAGING_SEGMENT_NULL
;
1543 dp_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1546 ps_allocate_cluster(
1549 paging_segment_t use_ps
)
1551 unsigned int byte_num
;
1553 paging_segment_t ps
;
1554 dp_offset_t cluster
;
1555 ipc_port_t trigger
= IP_NULL
;
1558 * Find best paging segment.
1559 * ps_select_segment will decrement cluster count on ps.
1560 * Must pass cluster shift to find the most appropriate segment.
1562 /* NOTE: The addition of paging segment delete capability threatened
1563 * to seriously complicate the treatment of paging segments in this
1564 * module and the ones that call it (notably ps_clmap), because of the
1565 * difficulty in assuring that the paging segment would continue to
1566 * exist between being unlocked and locked. This was
1567 * avoided because all calls to this module are based in either
1568 * dp_memory_object calls which rely on the vs lock, or by
1569 * the transfer function which is part of the segment delete path.
1570 * The transfer function which is part of paging segment delete is
1571 * protected from multiple callers by the backing store lock.
1572 * The paging segment delete function treats mappings to a paging
1573 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1574 * while data is transferred to the remaining segments. This is in
1575 * line with the view that incomplete or in-transition mappings between
1576 * data, a vstruct, and backing store are protected by the vs lock.
1577 * This and the ordering of the paging segment "going_away" bit setting
1581 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1586 ASSERT(ps
->ps_clcount
!= 0);
1589 dp_pages_free
-= 1 << ps
->ps_clshift
;
1590 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1591 if(min_pages_trigger_port
&&
1592 (dp_pages_free
< minimum_pages_remaining
)) {
1593 trigger
= min_pages_trigger_port
;
1594 min_pages_trigger_port
= NULL
;
1598 if (trigger
!= IP_NULL
) {
1599 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1600 ipc_port_release_send(trigger
);
1603 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1604 PAGING_SEGMENT_NULL
) {
1605 static clock_sec_t lastnotify
= 0;
1607 clock_nsec_t nanoseconds_dummy
;
1610 * Don't immediately jump to the emergency segment. Give the
1611 * dynamic pager a chance to create it's first normal swap file.
1612 * Unless, of course the very first normal swap file can't be
1613 * created due to some problem and we didn't expect that problem
1614 * i.e. use_emergency_swap_file_first was never set to true initially.
1615 * It then gets set in the swap file creation error handling.
1617 if(paging_segment_count
> 1 || use_emergency_swap_file_first
== TRUE
) {
1619 ps
= paging_segments
[EMERGENCY_PSEG_INDEX
];
1620 if(IS_PS_EMERGENCY_SEGMENT(ps
) && !IS_PS_GOING_AWAY(ps
)) {
1624 if(IS_PS_GOING_AWAY(ps
)) {
1625 /* Someone de-activated the emergency paging segment*/
1629 } else if(dp_pages_free
) {
1631 * Someone has already activated the emergency paging segment
1633 * Between us having rec'd a NULL segment from ps_select_segment
1634 * and reaching here a new normal segment could have been added.
1635 * E.g. we get NULL segment and another thread just added the
1636 * new swap file. Hence check to see if we have more dp_pages_free
1637 * before activating the emergency segment.
1643 } else if(!IS_PS_OK_TO_USE(ps
) && ps
->ps_clcount
) {
1645 * PS_CAN_USE is only reset from the emergency segment when it's
1646 * been successfully recovered. So it's legal to have an emergency
1647 * segment that has PS_CAN_USE but no clusters because it's recovery
1650 backing_store_t bs
= ps
->ps_bs
;
1651 ps
->ps_state
|= PS_CAN_USE
;
1652 if(ps_select_array
[bs
->bs_priority
] == BS_FULLPRI
||
1653 ps_select_array
[bs
->bs_priority
] == BS_NOPRI
) {
1654 ps_select_array
[bs
->bs_priority
] = 0;
1656 dp_pages_free
+= ps
->ps_pgcount
;
1657 dp_pages_reserve
-= ps
->ps_pgcount
;
1660 dprintf(("Switching ON Emergency paging segment\n"));
1670 * Emit a notification of the low-paging resource condition
1671 * but don't issue it more than once every five seconds. This
1672 * prevents us from overflowing logs with thousands of
1673 * repetitions of the message.
1675 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
1676 if (paging_segment_count
> 1 && (now
> lastnotify
+ 5)) {
1677 /* With an activated emergency paging segment we still
1678 * didn't get any clusters. This could mean that the
1679 * emergency paging segment is exhausted.
1681 dprintf(("System is out of paging space.\n"));
1687 if(min_pages_trigger_port
) {
1688 trigger
= min_pages_trigger_port
;
1689 min_pages_trigger_port
= NULL
;
1693 if (trigger
!= IP_NULL
) {
1694 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1695 ipc_port_release_send(trigger
);
1697 return (dp_offset_t
) -1;
1701 * Look for an available cluster. At the end of the loop,
1702 * byte_num is the byte offset and bit_num is the bit offset of the
1703 * first zero bit in the paging segment bitmap.
1706 byte_num
= ps
->ps_hint
;
1707 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1708 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1709 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1710 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1713 ASSERT(bit_num
!= NBBY
);
1717 ps
->ps_hint
= byte_num
;
1718 cluster
= (byte_num
*NBBY
) + bit_num
;
1720 /* Space was reserved, so this must be true */
1721 ASSERT(cluster
< ps
->ps_ncls
);
1723 setbit(ps
->ps_bmap
, cluster
);
1729 void ps_deallocate_cluster(paging_segment_t
, dp_offset_t
); /* forward */
1732 ps_deallocate_cluster(
1733 paging_segment_t ps
,
1734 dp_offset_t cluster
)
1737 if (cluster
>= ps
->ps_ncls
)
1738 panic("ps_deallocate_cluster: Invalid cluster number");
1741 * Lock the paging segment, clear the cluster's bitmap and increment the
1742 * number of free cluster.
1746 clrbit(ps
->ps_bmap
, cluster
);
1747 if( IS_PS_OK_TO_USE(ps
)) {
1749 ps
->ps_pgcount
+= 1 << ps
->ps_clshift
;
1750 dp_pages_free
+= 1 << ps
->ps_clshift
;
1752 ps
->ps_special_clusters
+= 1;
1756 * Move the hint down to the freed cluster if it is
1757 * less than the current hint.
1759 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1760 ps
->ps_hint
= (cluster
/NBBY
);
1765 * If we're freeing space on a full priority, reset the array.
1767 if ( IS_PS_OK_TO_USE(ps
) && ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1768 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1775 void ps_dealloc_vsmap(struct vs_map
*, dp_size_t
); /* forward */
1779 struct vs_map
*vsmap
,
1783 for (i
= 0; i
< size
; i
++)
1784 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1785 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1786 VSM_CLOFF(vsmap
[i
]));
1799 * If this is an indirect structure, then we walk through the valid
1800 * (non-zero) indirect pointers and deallocate the clusters
1801 * associated with each used map entry (via ps_dealloc_vsmap).
1802 * When all of the clusters in an indirect block have been
1803 * freed, we deallocate the block. When all of the indirect
1804 * blocks have been deallocated we deallocate the memory
1805 * holding the indirect pointers.
1807 if (vs
->vs_indirect
) {
1808 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1809 if (vs
->vs_imap
[i
] != NULL
) {
1810 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1811 kfree(vs
->vs_imap
[i
], CLMAP_THRESHOLD
);
1814 kfree(vs
->vs_imap
, INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1817 * Direct map. Free used clusters, then memory.
1819 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1820 kfree(vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1824 bs_commit(- vs
->vs_size
);
1826 zfree(vstruct_zone
, vs
);
1829 int ps_map_extend(vstruct_t
, unsigned int); /* forward */
1833 unsigned int new_size
)
1835 struct vs_map
**new_imap
;
1836 struct vs_map
*new_dmap
= NULL
;
1839 void *old_map
= NULL
;
1840 int old_map_size
= 0;
1842 if (vs
->vs_size
>= new_size
) {
1844 * Someone has already done the work.
1850 * If the new size extends into the indirect range, then we have one
1851 * of two cases: we are going from indirect to indirect, or we are
1852 * going from direct to indirect. If we are going from indirect to
1853 * indirect, then it is possible that the new size will fit in the old
1854 * indirect map. If this is the case, then just reset the size of the
1855 * vstruct map and we are done. If the new size will not
1856 * fit into the old indirect map, then we have to allocate a new
1857 * indirect map and copy the old map pointers into this new map.
1859 * If we are going from direct to indirect, then we have to allocate a
1860 * new indirect map and copy the old direct pages into the first
1861 * indirect page of the new map.
1862 * NOTE: allocating memory here is dangerous, as we're in the
1865 if (INDIRECT_CLMAP(new_size
)) {
1866 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1869 * Get a new indirect map and zero it.
1871 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1872 if (vs
->vs_indirect
&&
1873 (new_map_size
== old_map_size
)) {
1874 bs_commit(new_size
- vs
->vs_size
);
1875 vs
->vs_size
= new_size
;
1879 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1880 if (new_imap
== NULL
) {
1883 memset(new_imap
, 0, new_map_size
);
1885 if (vs
->vs_indirect
) {
1886 /* Copy old entries into new map */
1887 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1888 /* Arrange to free the old map */
1889 old_map
= (void *) vs
->vs_imap
;
1891 } else { /* Old map was a direct map */
1892 /* Allocate an indirect page */
1893 if ((new_imap
[0] = (struct vs_map
*)
1894 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1895 kfree(new_imap
, new_map_size
);
1898 new_dmap
= new_imap
[0];
1899 newdsize
= CLMAP_ENTRIES
;
1903 newdsize
= new_size
;
1905 * If the new map is a direct map, then the old map must
1906 * also have been a direct map. All we have to do is
1907 * to allocate a new direct map, copy the old entries
1908 * into it and free the old map.
1910 if ((new_dmap
= (struct vs_map
*)
1911 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1917 /* Free the old map */
1918 old_map
= (void *) vs
->vs_dmap
;
1919 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1921 /* Copy info from the old map into the new map */
1922 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1924 /* Initialize the rest of the new map */
1925 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1926 VSM_CLR(new_dmap
[i
]);
1929 vs
->vs_imap
= new_imap
;
1930 vs
->vs_indirect
= TRUE
;
1932 vs
->vs_dmap
= new_dmap
;
1933 bs_commit(new_size
- vs
->vs_size
);
1934 vs
->vs_size
= new_size
;
1936 kfree(old_map
, old_map_size
);
1944 struct clmap
*clmap
,
1949 dp_offset_t cluster
; /* The cluster of offset. */
1950 dp_offset_t newcl
; /* The new cluster allocated. */
1953 struct vs_map
*vsmap
;
1957 ASSERT(vs
->vs_dmap
);
1958 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1961 * Initialize cluster error value
1963 clmap
->cl_error
= 0;
1966 * If the object has grown, extend the page map.
1968 if (cluster
>= vs
->vs_size
) {
1969 if (flag
== CL_FIND
) {
1970 /* Do not allocate if just doing a lookup */
1972 return (dp_offset_t
) -1;
1974 if (ps_map_extend(vs
, cluster
+ 1)) {
1976 return (dp_offset_t
) -1;
1981 * Look for the desired cluster. If the map is indirect, then we
1982 * have a two level lookup. First find the indirect block, then
1983 * find the actual cluster. If the indirect block has not yet
1984 * been allocated, then do so. If the cluster has not yet been
1985 * allocated, then do so.
1987 * If any of the allocations fail, then return an error.
1988 * Don't allocate if just doing a lookup.
1990 if (vs
->vs_indirect
) {
1991 long ind_block
= cluster
/CLMAP_ENTRIES
;
1993 /* Is the indirect block allocated? */
1994 vsmap
= vs
->vs_imap
[ind_block
];
1995 if (vsmap
== NULL
) {
1996 if (flag
== CL_FIND
) {
1998 return (dp_offset_t
) -1;
2001 /* Allocate the indirect block */
2002 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
2003 if (vsmap
== NULL
) {
2005 return (dp_offset_t
) -1;
2007 /* Initialize the cluster offsets */
2008 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
2010 vs
->vs_imap
[ind_block
] = vsmap
;
2013 vsmap
= vs
->vs_dmap
;
2016 vsmap
+= cluster%CLMAP_ENTRIES
;
2019 * At this point, vsmap points to the struct vs_map desired.
2021 * Look in the map for the cluster, if there was an error on a
2022 * previous write, flag it and return. If it is not yet
2023 * allocated, then allocate it, if we're writing; if we're
2024 * doing a lookup and the cluster's not allocated, return error.
2026 if (VSM_ISERR(*vsmap
)) {
2027 clmap
->cl_error
= VSM_GETERR(*vsmap
);
2029 return (dp_offset_t
) -1;
2030 } else if (VSM_ISCLR(*vsmap
)) {
2033 if (flag
== CL_FIND
) {
2035 * If there's an error and the entry is clear, then
2036 * we've run out of swap space. Record the error
2040 VSM_SETERR(*vsmap
, error
);
2043 return (dp_offset_t
) -1;
2046 * Attempt to allocate a cluster from the paging segment
2048 newcl
= ps_allocate_cluster(vs
, &psindex
,
2049 PAGING_SEGMENT_NULL
);
2050 if (newcl
== (dp_offset_t
) -1) {
2052 return (dp_offset_t
) -1;
2055 VSM_SETCLOFF(*vsmap
, newcl
);
2056 VSM_SETPS(*vsmap
, psindex
);
2059 newcl
= VSM_CLOFF(*vsmap
);
2062 * Fill in pertinent fields of the clmap
2064 clmap
->cl_ps
= VSM_PS(*vsmap
);
2065 clmap
->cl_numpages
= VSCLSIZE(vs
);
2066 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
2069 * Byte offset in paging segment is byte offset to cluster plus
2070 * byte offset within cluster. It looks ugly, but should be
2073 ASSERT(trunc_page(offset
) == offset
);
2074 newcl
= ptoa_32(newcl
) << vs
->vs_clshift
;
2075 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
2076 if (flag
== CL_ALLOC
) {
2078 * set bits in the allocation bitmap according to which
2079 * pages were requested. size is in bytes.
2081 i
= atop_32(newoff
);
2082 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
2083 VSM_SETALLOC(*vsmap
, i
);
2085 size
-= vm_page_size
;
2088 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
2091 * Offset is not cluster aligned, so number of pages
2092 * and bitmaps must be adjusted
2094 clmap
->cl_numpages
-= atop_32(newoff
);
2095 CLMAP_SHIFT(clmap
, vs
);
2096 CLMAP_SHIFTALLOC(clmap
, vs
);
2101 * The setting of valid bits and handling of write errors
2102 * must be done here, while we hold the lock on the map.
2103 * It logically should be done in ps_vs_write_complete().
2104 * The size and error information has been passed from
2105 * ps_vs_write_complete(). If the size parameter is non-zero,
2106 * then there is work to be done. If error is also non-zero,
2107 * then the error number is recorded in the cluster and the
2108 * entire cluster is in error.
2110 if (size
&& flag
== CL_FIND
) {
2111 dp_offset_t off
= (dp_offset_t
) 0;
2114 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
2116 VSM_SETPG(*vsmap
, i
);
2117 size
-= vm_page_size
;
2119 ASSERT(i
<= VSCLSIZE(vs
));
2121 BS_STAT(clmap
->cl_ps
->ps_bs
,
2122 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
2124 off
= VSM_CLOFF(*vsmap
);
2125 VSM_SETERR(*vsmap
, error
);
2128 * Deallocate cluster if error, and no valid pages
2131 if (off
!= (dp_offset_t
) 0)
2132 ps_deallocate_cluster(clmap
->cl_ps
, off
);
2134 return (dp_offset_t
) 0;
2138 DP_DEBUG(DEBUG_VS_INTERNAL
,
2139 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
2140 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
2141 DP_DEBUG(DEBUG_VS_INTERNAL
,
2142 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
2143 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
2144 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
2146 return (newcl
+ newoff
);
2149 void ps_clunmap(vstruct_t
, dp_offset_t
, dp_size_t
); /* forward */
2157 dp_offset_t cluster
; /* The cluster number of offset */
2158 struct vs_map
*vsmap
;
2163 * Loop through all clusters in this range, freeing paging segment
2164 * clusters and map entries as encountered.
2166 while (length
> 0) {
2170 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
2171 if (vs
->vs_indirect
) /* indirect map */
2172 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
2174 vsmap
= vs
->vs_dmap
;
2175 if (vsmap
== NULL
) {
2179 vsmap
+= cluster%CLMAP_ENTRIES
;
2180 if (VSM_ISCLR(*vsmap
)) {
2181 length
-= vm_page_size
;
2182 offset
+= vm_page_size
;
2186 * We've got a valid mapping. Clear it and deallocate
2187 * paging segment cluster pages.
2188 * Optimize for entire cluster cleraing.
2190 if ( (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) ) {
2192 * Not cluster aligned.
2194 ASSERT(trunc_page(newoff
) == newoff
);
2195 i
= atop_32(newoff
);
2198 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
2199 VSM_CLRPG(*vsmap
, i
);
2200 VSM_CLRALLOC(*vsmap
, i
);
2201 length
-= vm_page_size
;
2202 offset
+= vm_page_size
;
2207 * If map entry is empty, clear and deallocate cluster.
2209 if (!VSM_ALLOC(*vsmap
)) {
2210 ps_deallocate_cluster(VSM_PS(*vsmap
),
2219 void ps_vs_write_complete(vstruct_t
, dp_offset_t
, dp_size_t
, int); /* forward */
2222 ps_vs_write_complete(
2231 * Get the struct vsmap for this cluster.
2232 * Use READ, even though it was written, because the
2233 * cluster MUST be present, unless there was an error
2234 * in the original ps_clmap (e.g. no space), in which
2235 * case, nothing happens.
2237 * Must pass enough information to ps_clmap to allow it
2238 * to set the vs_map structure bitmap under lock.
2240 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2243 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, dp_offset_t
, vm_offset_t
, dp_size_t
, boolean_t
, int); /* forward */
2246 vs_cl_write_complete(
2248 __unused paging_segment_t ps
,
2250 __unused vm_offset_t addr
,
2255 // kern_return_t kr;
2259 * For internal objects, the error is recorded on a
2260 * per-cluster basis by ps_clmap() which is called
2261 * by ps_vs_write_complete() below.
2263 dprintf(("write failed error = 0x%x\n", error
));
2264 /* add upl_abort code here */
2266 GSTAT(global_stats
.gs_pages_out
+= atop_32(size
));
2268 * Notify the vstruct mapping code, so it can do its accounting.
2270 ps_vs_write_complete(vs
, offset
, size
, error
);
2274 ASSERT(vs
->vs_async_pending
> 0);
2275 vs
->vs_async_pending
-= size
;
2276 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
2277 vs
->vs_waiting_async
= FALSE
;
2279 thread_wakeup(&vs
->vs_async_pending
);
2286 #ifdef DEVICE_PAGING
2287 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2291 MACH_PORT_FACE reply_port
,
2292 kern_return_t device_code
,
2293 io_buf_len_t bytes_written
)
2295 struct vs_async
*vsa
;
2297 vsa
= (struct vs_async
*)
2298 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2300 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2301 device_code
= KERN_FAILURE
;
2304 vsa
->vsa_error
= device_code
;
2307 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2308 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2309 /* revisit when async disk segments redone */
2310 if(vsa
->vsa_error
) {
2311 /* need to consider error condition. re-write data or */
2312 /* throw it away here. */
2313 vm_map_copy_discard((vm_map_copy_t
)vsa
->vsa_addr
);
2315 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2316 vsa
->vsa_size
, vsa
->vsa_error
);
2318 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2319 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2324 return KERN_SUCCESS
;
2327 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2329 device_write_reply_inband(
2330 MACH_PORT_FACE reply_port
,
2331 kern_return_t return_code
,
2332 io_buf_len_t bytes_written
)
2334 panic("device_write_reply_inband: illegal");
2335 return KERN_SUCCESS
;
2338 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2341 MACH_PORT_FACE reply_port
,
2342 kern_return_t return_code
,
2344 mach_msg_type_number_t dataCnt
)
2346 struct vs_async
*vsa
;
2347 vsa
= (struct vs_async
*)
2348 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2349 vsa
->vsa_addr
= (vm_offset_t
)data
;
2350 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2351 vsa
->vsa_error
= return_code
;
2352 thread_wakeup(&vsa
);
2353 return KERN_SUCCESS
;
2356 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2358 device_read_reply_inband(
2359 MACH_PORT_FACE reply_port
,
2360 kern_return_t return_code
,
2361 io_buf_ptr_inband_t data
,
2362 mach_msg_type_number_t dataCnt
)
2364 panic("device_read_reply_inband: illegal");
2365 return KERN_SUCCESS
;
2368 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2370 device_read_reply_overwrite(
2371 MACH_PORT_FACE reply_port
,
2372 kern_return_t return_code
,
2373 io_buf_len_t bytes_read
)
2375 panic("device_read_reply_overwrite: illegal\n");
2376 return KERN_SUCCESS
;
2379 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2382 MACH_PORT_FACE reply_port
,
2383 kern_return_t return_code
,
2384 MACH_PORT_FACE device_port
)
2386 panic("device_open_reply: illegal\n");
2387 return KERN_SUCCESS
;
2392 paging_segment_t ps
,
2394 vm_offset_t
*bufferp
,
2396 unsigned int *residualp
,
2400 recnum_t dev_offset
;
2401 unsigned int bytes_wanted
;
2402 unsigned int bytes_read
;
2403 unsigned int total_read
;
2404 vm_offset_t dev_buffer
;
2405 vm_offset_t buf_ptr
;
2406 unsigned int records_read
;
2407 struct vs_async
*vsa
;
2410 vm_map_copy_t device_data
= NULL
;
2411 default_pager_thread_t
*dpt
= NULL
;
2413 device
= dev_port_lookup(ps
->ps_device
);
2414 clustered_reads
[atop_32(size
)]++;
2416 dev_offset
= (ps
->ps_offset
+
2417 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2418 bytes_wanted
= size
;
2420 *bufferp
= (vm_offset_t
)NULL
;
2423 vsa
= VS_ALLOC_ASYNC();
2427 vsa
->vsa_offset
= 0;
2431 ip_lock(vsa
->reply_port
);
2432 vsa
->reply_port
->ip_sorights
++;
2433 ip_reference(vsa
->reply_port
);
2434 ip_unlock(vsa
->reply_port
);
2435 kr
= ds_device_read_common(device
,
2437 (mach_msg_type_name_t
)
2438 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2442 (IO_READ
| IO_CALL
),
2443 (io_buf_ptr_t
*) &dev_buffer
,
2444 (mach_msg_type_number_t
*) &bytes_read
);
2445 if(kr
== MIG_NO_REPLY
) {
2446 assert_wait(&vsa
, THREAD_UNINT
);
2447 thread_block(THREAD_CONTINUE_NULL
);
2449 dev_buffer
= vsa
->vsa_addr
;
2450 bytes_read
= (unsigned int)vsa
->vsa_size
;
2451 kr
= vsa
->vsa_error
;
2454 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2457 total_read
+= bytes_read
;
2460 * If we got the entire range, use the returned dev_buffer.
2462 if (bytes_read
== size
) {
2463 *bufferp
= (vm_offset_t
)dev_buffer
;
2468 dprintf(("read only %d bytes out of %d\n",
2469 bytes_read
, bytes_wanted
));
2472 dpt
= get_read_buffer();
2473 buf_ptr
= dpt
->dpt_buffer
;
2474 *bufferp
= (vm_offset_t
)buf_ptr
;
2477 * Otherwise, copy the data into the provided buffer (*bufferp)
2478 * and append the rest of the range as it comes in.
2480 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2481 buf_ptr
+= bytes_read
;
2482 bytes_wanted
-= bytes_read
;
2483 records_read
= (bytes_read
>>
2484 (vm_page_shift
- ps
->ps_record_shift
));
2485 dev_offset
+= records_read
;
2486 DP_DEBUG(DEBUG_VS_INTERNAL
,
2487 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2488 dev_buffer
, bytes_read
));
2489 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2491 Panic("dealloc buf");
2492 } while (bytes_wanted
);
2494 *residualp
= size
- total_read
;
2495 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2496 vm_offset_t temp_buffer
;
2497 vm_allocate(kernel_map
, &temp_buffer
, total_read
, VM_FLAGS_ANYWHERE
);
2498 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2499 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2500 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2501 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2502 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2503 (vm_map_copy_t
*)&device_data
, FALSE
))
2504 panic("ps_read_device: cannot copyin locally provided buffer\n");
2506 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2507 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2508 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2509 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2510 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2511 (vm_map_copy_t
*)&device_data
, FALSE
))
2512 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2517 *bufferp
= (vm_offset_t
)device_data
;
2520 /* Free the receive buffer */
2521 dpt
->checked_out
= 0;
2522 thread_wakeup(&dpt_array
);
2524 return KERN_SUCCESS
;
2529 paging_segment_t ps
,
2533 struct vs_async
*vsa
)
2535 recnum_t dev_offset
;
2536 io_buf_len_t bytes_to_write
, bytes_written
;
2537 recnum_t records_written
;
2539 MACH_PORT_FACE reply_port
;
2543 clustered_writes
[atop_32(size
)]++;
2545 dev_offset
= (ps
->ps_offset
+
2546 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2547 bytes_to_write
= size
;
2551 * Asynchronous write.
2553 reply_port
= vsa
->reply_port
;
2554 ip_lock(reply_port
);
2555 reply_port
->ip_sorights
++;
2556 ip_reference(reply_port
);
2557 ip_unlock(reply_port
);
2560 device
= dev_port_lookup(ps
->ps_device
);
2562 vsa
->vsa_addr
= addr
;
2563 kr
=ds_device_write_common(device
,
2565 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2568 (io_buf_ptr_t
) addr
,
2570 (IO_WRITE
| IO_CALL
),
2573 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2575 dprintf(("%s0x%x, addr=0x%x,"
2576 "size=0x%x,offset=0x%x\n",
2577 "device_write_request returned ",
2578 kr
, addr
, size
, offset
));
2580 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2581 /* do the completion notification to free resources */
2582 device_write_reply(reply_port
, kr
, 0);
2587 * Synchronous write.
2591 device
= dev_port_lookup(ps
->ps_device
);
2592 kr
=ds_device_write_common(device
,
2596 (io_buf_ptr_t
) addr
,
2598 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2601 if (kr
!= KERN_SUCCESS
) {
2602 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2603 "device_write returned ",
2604 kr
, addr
, size
, offset
));
2606 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2609 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2610 Panic("fragmented write");
2611 records_written
= (bytes_written
>>
2612 (vm_page_shift
- ps
->ps_record_shift
));
2613 dev_offset
+= records_written
;
2615 if (bytes_written
!= bytes_to_write
) {
2616 dprintf(("wrote only %d bytes out of %d\n",
2617 bytes_written
, bytes_to_write
));
2620 bytes_to_write
-= bytes_written
;
2621 addr
+= bytes_written
;
2622 } while (bytes_to_write
> 0);
2624 return PAGER_SUCCESS
;
2628 #else /* !DEVICE_PAGING */
2632 __unused paging_segment_t ps
,
2633 __unused dp_offset_t offset
,
2634 __unused vm_offset_t
*bufferp
,
2635 __unused
unsigned int size
,
2636 __unused
unsigned int *residualp
,
2639 panic("ps_read_device not supported");
2640 return KERN_FAILURE
;
2645 __unused paging_segment_t ps
,
2646 __unused dp_offset_t offset
,
2647 __unused vm_offset_t addr
,
2648 __unused
unsigned int size
,
2649 __unused
struct vs_async
*vsa
)
2651 panic("ps_write_device not supported");
2652 return KERN_FAILURE
;
2655 #endif /* DEVICE_PAGING */
2656 void pvs_object_data_provided(vstruct_t
, upl_t
, upl_offset_t
, upl_size_t
); /* forward */
2659 pvs_object_data_provided(
2660 __unused vstruct_t vs
,
2662 __unused upl_offset_t offset
,
2666 DP_DEBUG(DEBUG_VS_INTERNAL
,
2667 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2668 upl
, offset
, size
));
2671 GSTAT(global_stats
.gs_pages_in
+= atop_32(size
));
2675 ps_clunmap(vs
, offset
, size
);
2676 #endif /* USE_PRECIOUS */
2680 static memory_object_offset_t last_start
;
2681 static vm_size_t last_length
;
2686 dp_offset_t vs_offset
,
2690 kern_return_t error
= KERN_SUCCESS
;
2692 unsigned int residual
;
2693 unsigned int request_flags
;
2700 unsigned int xfer_size
;
2701 dp_offset_t orig_vs_offset
;
2702 dp_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_MIN_CLSHIFT
];
2703 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_MIN_CLSHIFT
];
2706 unsigned int page_list_count
;
2707 memory_object_offset_t cluster_start
;
2708 vm_size_t cluster_length
;
2709 uint32_t io_streaming
;
2711 pages_in_cl
= 1 << vs
->vs_clshift
;
2712 cl_size
= pages_in_cl
* vm_page_size
;
2713 cl_mask
= cl_size
- 1;
2716 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
| UPL_RET_ONLY_ABSENT
| UPL_SET_LITE
;
2718 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_RET_ONLY_ABSENT
| UPL_SET_LITE
;
2720 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2722 if ((ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0) == (dp_offset_t
)-1) ||
2723 !CLMAP_ISSET(clmap
, cl_index
)) {
2725 * the needed page doesn't exist in the backing store...
2726 * we don't want to try to do any I/O, just abort the
2727 * page and let the fault handler provide a zero-fill
2731 * The caller was just poking at us to see if
2732 * the page has been paged out. No need to
2733 * mess with the page at all.
2734 * Just let the caller know we don't have that page.
2736 return KERN_FAILURE
;
2739 page_list_count
= 0;
2741 memory_object_super_upl_request(vs
->vs_control
, (memory_object_offset_t
)vs_offset
,
2742 PAGE_SIZE
, PAGE_SIZE
,
2743 &upl
, NULL
, &page_list_count
,
2747 upl_abort(upl
, UPL_ABORT_ERROR
);
2749 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2750 upl_deallocate(upl
);
2752 return KERN_SUCCESS
;
2757 * The caller was just poking at us to see if
2758 * the page has been paged out. No need to
2759 * mess with the page at all.
2760 * Just let the caller know we do have that page.
2762 return KERN_SUCCESS
;
2765 assert(dp_encryption_inited
);
2766 if (dp_encryption
) {
2769 * request that the UPL be prepared for
2772 request_flags
|= UPL_ENCRYPT
;
2774 orig_vs_offset
= vs_offset
;
2777 cnt
= VM_SUPER_CLUSTER
;
2778 cluster_start
= (memory_object_offset_t
) vs_offset
;
2779 cluster_length
= (vm_size_t
) cnt
;
2783 * determine how big a speculative I/O we should try for...
2785 if (memory_object_cluster_size(vs
->vs_control
, &cluster_start
, &cluster_length
, &io_streaming
, (memory_object_fault_info_t
)fault_info
) == KERN_SUCCESS
) {
2786 assert(vs_offset
>= (dp_offset_t
) cluster_start
&&
2787 vs_offset
< (dp_offset_t
) (cluster_start
+ cluster_length
));
2788 vs_offset
= (dp_offset_t
) cluster_start
;
2789 cnt
= (dp_size_t
) cluster_length
;
2791 cluster_length
= PAGE_SIZE
;
2796 io_flags
|= UPL_IOSTREAMING
;
2798 last_start
= cluster_start
;
2799 last_length
= cluster_length
;
2802 * This loop will be executed multiple times until the entire
2803 * range has been looked at or we issue an I/O... if the request spans cluster
2804 * boundaries, the clusters will be checked for logical continunity,
2805 * if contiguous the I/O request will span multiple clusters...
2806 * at most only 1 I/O will be issued... it will encompass the original offset
2808 while (cnt
&& error
== KERN_SUCCESS
) {
2811 if ((vs_offset
& cl_mask
) && (cnt
> (VM_SUPER_CLUSTER
- (vs_offset
& cl_mask
)))) {
2812 size
= VM_SUPER_CLUSTER
;
2813 size
-= vs_offset
& cl_mask
;
2814 } else if (cnt
> VM_SUPER_CLUSTER
)
2815 size
= VM_SUPER_CLUSTER
;
2824 while (size
> 0 && error
== KERN_SUCCESS
) {
2825 unsigned int abort_size
;
2829 dp_offset_t cur_offset
;
2831 if ( !ps_info_valid
) {
2832 ps_offset
[seg_index
] = ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2833 psp
[seg_index
] = CLMAP_PS(clmap
);
2837 * skip over unallocated physical segments
2839 if (ps_offset
[seg_index
] == (dp_offset_t
) -1) {
2840 abort_size
= cl_size
- (vs_offset
& cl_mask
);
2841 abort_size
= MIN(abort_size
, size
);
2844 vs_offset
+= abort_size
;
2851 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2853 for (abort_size
= 0; cl_index
< pages_in_cl
&& abort_size
< size
; cl_index
++) {
2855 * skip over unallocated pages
2857 if (CLMAP_ISSET(clmap
, cl_index
))
2859 abort_size
+= vm_page_size
;
2863 vs_offset
+= abort_size
;
2865 if (cl_index
== pages_in_cl
) {
2867 * if we're at the end of this physical cluster
2868 * then bump to the next one and continue looking
2879 * remember the starting point of the first allocated page
2880 * for the I/O we're about to issue
2882 beg_pseg
= seg_index
;
2883 beg_indx
= cl_index
;
2884 cur_offset
= vs_offset
;
2887 * calculate the size of the I/O that we can do...
2888 * this may span multiple physical segments if
2889 * they are contiguous
2891 for (xfer_size
= 0; xfer_size
< size
; ) {
2893 while (cl_index
< pages_in_cl
&& xfer_size
< size
) {
2895 * accumulate allocated pages within
2896 * a physical segment
2898 if (CLMAP_ISSET(clmap
, cl_index
)) {
2899 xfer_size
+= vm_page_size
;
2900 cur_offset
+= vm_page_size
;
2903 BS_STAT(psp
[seg_index
]->ps_bs
,
2904 psp
[seg_index
]->ps_bs
->bs_pages_in
++);
2908 if (cl_index
< pages_in_cl
|| xfer_size
>= size
) {
2910 * we've hit an unallocated page or
2911 * the end of this request... see if
2912 * it's time to fire the I/O
2917 * we've hit the end of the current physical
2918 * segment and there's more to do, so try
2919 * moving to the next one
2923 ps_offset
[seg_index
] = ps_clmap(vs
, cur_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2924 psp
[seg_index
] = CLMAP_PS(clmap
);
2927 if ((ps_offset
[seg_index
- 1] != (ps_offset
[seg_index
] - cl_size
)) || (psp
[seg_index
- 1] != psp
[seg_index
])) {
2929 * if the physical segment we're about
2930 * to step into is not contiguous to
2931 * the one we're currently in, or it's
2932 * in a different paging file, or
2933 * it hasn't been allocated....
2934 * we stop this run and go check
2935 * to see if it's time to fire the I/O
2940 * start with first page of the next physical
2945 if (xfer_size
== 0) {
2947 * no I/O to generate for this segment
2951 if (cur_offset
<= orig_vs_offset
) {
2953 * we've hit a hole in our speculative cluster
2954 * before the offset that we're really after...
2955 * don't issue the I/O since it doesn't encompass
2956 * the original offset and we're looking to only
2957 * pull in the speculative pages if they can be
2958 * made part of a single I/O
2961 vs_offset
+= xfer_size
;
2966 * we have a contiguous range of allocated pages
2967 * to read from that encompasses the original offset
2969 page_list_count
= 0;
2970 memory_object_super_upl_request(vs
->vs_control
, (memory_object_offset_t
)vs_offset
,
2971 xfer_size
, xfer_size
,
2972 &upl
, NULL
, &page_list_count
,
2973 request_flags
| UPL_SET_INTERNAL
| UPL_NOBLOCK
);
2975 error
= ps_read_file(psp
[beg_pseg
],
2976 upl
, (upl_offset_t
) 0,
2977 ps_offset
[beg_pseg
] + (beg_indx
* vm_page_size
),
2978 xfer_size
, &residual
, io_flags
);
2983 * Adjust counts and send response to VM. Optimize
2984 * for the common case, i.e. no error and/or partial
2985 * data. If there was an error, then we need to error
2986 * the entire range, even if some data was successfully
2987 * read. If there was a partial read we may supply some
2988 * data and may error some as well. In all cases the
2989 * VM must receive some notification for every page
2992 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2994 * Got everything we asked for, supply the data
2995 * to the VM. Note that as a side effect of
2996 * supplying the data, the buffer holding the
2997 * supplied data is deallocated from the pager's
3000 pvs_object_data_provided(vs
, upl
, vs_offset
, xfer_size
);
3002 failed_size
= xfer_size
;
3004 if (error
== KERN_SUCCESS
) {
3005 if (residual
== xfer_size
) {
3007 * If a read operation returns no error
3008 * and no data moved, we turn it into
3009 * an error, assuming we're reading at
3011 * Fall through and error the entire range.
3013 error
= KERN_FAILURE
;
3016 * Otherwise, we have partial read. If
3017 * the part read is a integral number
3018 * of pages supply it. Otherwise round
3019 * it up to a page boundary, zero fill
3020 * the unread part, and supply it.
3021 * Fall through and error the remainder
3022 * of the range, if any.
3027 fill
= residual
& ~vm_page_size
;
3028 lsize
= (xfer_size
- residual
) + fill
;
3030 pvs_object_data_provided(vs
, upl
, vs_offset
, lsize
);
3032 if (lsize
< xfer_size
) {
3033 failed_size
= xfer_size
- lsize
;
3034 error
= KERN_FAILURE
;
3039 if (error
!= KERN_SUCCESS
) {
3041 * There was an error in some part of the range, tell
3042 * the VM. Note that error is explicitly checked again
3043 * since it can be modified above.
3045 BS_STAT(psp
[beg_pseg
]->ps_bs
,
3046 psp
[beg_pseg
]->ps_bs
->bs_pages_in_fail
+= atop_32(failed_size
));
3049 * we've issued a single I/O that encompassed the original offset
3050 * at this point we either met our speculative request length or
3051 * we ran into a 'hole' (i.e. page not present in the cluster, cluster
3052 * not present or not physically contiguous to the previous one), so
3053 * we're done issuing I/O at this point
3061 int vs_do_async_write
= 1;
3067 upl_offset_t offset
,
3069 boolean_t dp_internal
,
3072 upl_size_t transfer_size
;
3076 dp_offset_t actual_offset
; /* Offset within paging segment */
3077 paging_segment_t ps
;
3078 dp_offset_t mobj_base_addr
;
3079 dp_offset_t mobj_target_addr
;
3082 upl_page_info_t
*pl
;
3084 unsigned int page_max_index
;
3087 unsigned int cl_size
;
3089 unsigned int seg_size
;
3090 unsigned int upl_offset_in_object
;
3091 boolean_t minimal_clustering
= FALSE
;
3092 boolean_t found_dirty
;
3094 pages_in_cl
= 1 << vs
->vs_clshift
;
3095 cl_size
= pages_in_cl
* vm_page_size
;
3098 minimal_clustering
= TRUE
;
3100 if (dp_isssd
== TRUE
)
3101 minimal_clustering
= TRUE
;
3104 unsigned int page_list_count
;
3106 unsigned int super_size
;
3111 upl_offset_t upl_offset
;
3112 upl_offset_t upl_offset_aligned
;
3113 dp_offset_t seg_offset
;
3114 dp_offset_t ps_offset
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_MIN_CLSHIFT
) + 1];
3115 paging_segment_t psp
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_MIN_CLSHIFT
) + 1];
3119 super_size
= cl_size
;
3121 super_size
= VM_SUPER_CLUSTER
;
3123 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
3124 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
3125 UPL_NO_SYNC
| UPL_SET_INTERNAL
| UPL_SET_LITE
;
3127 if (!dp_encryption_inited
) {
3130 * Once we've started using swap, we
3131 * can't change our mind on whether
3132 * it needs to be encrypted or
3135 dp_encryption_inited
= TRUE
;
3137 if (dp_encryption
) {
3140 * request that the UPL be prepared for
3143 request_flags
|= UPL_ENCRYPT
;
3144 flags
|= UPL_PAGING_ENCRYPTED
;
3146 page_list_count
= 0;
3147 memory_object_super_upl_request(vs
->vs_control
,
3148 (memory_object_offset_t
)offset
,
3150 &upl
, NULL
, &page_list_count
,
3151 request_flags
| UPL_FOR_PAGEOUT
);
3154 * The default pager does not handle objects larger than
3155 * 4GB, so it does not deal with offset that don't fit in
3156 * 32-bit. Cast down upl->offset now and make sure we
3157 * did not lose any valuable bits.
3159 upl_offset_in_object
= (unsigned int) upl
->offset
;
3160 assert(upl
->offset
== upl_offset_in_object
);
3162 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
3164 seg_size
= cl_size
- (upl_offset_in_object
% cl_size
);
3165 upl_offset_aligned
= upl_offset_in_object
& ~(cl_size
- 1);
3167 page_max_index
= upl
->size
/ PAGE_SIZE
;
3170 for (seg_index
= 0, transfer_size
= upl
->size
; transfer_size
> 0; ) {
3171 unsigned int seg_pgcnt
;
3173 seg_pgcnt
= seg_size
/ PAGE_SIZE
;
3175 if (minimal_clustering
== TRUE
) {
3176 unsigned int non_dirty
;
3179 found_dirty
= FALSE
;
3181 for (; non_dirty
< seg_pgcnt
; non_dirty
++) {
3182 if ((page_index
+ non_dirty
) >= page_max_index
)
3185 if (UPL_DIRTY_PAGE(pl
, page_index
+ non_dirty
) ||
3186 UPL_PRECIOUS_PAGE(pl
, page_index
+ non_dirty
)) {
3192 if (found_dirty
== TRUE
) {
3193 ps_offset
[seg_index
] =
3199 if (ps_offset
[seg_index
] == (dp_offset_t
) -1) {
3201 upl_deallocate(upl
);
3203 return KERN_FAILURE
;
3205 psp
[seg_index
] = CLMAP_PS(clmap
);
3207 if (transfer_size
> seg_size
) {
3208 page_index
+= seg_pgcnt
;
3209 transfer_size
-= seg_size
;
3210 upl_offset_aligned
+= cl_size
;
3217 * Ignore any non-present pages at the end of the
3220 for (page_index
= upl
->size
/ vm_page_size
; page_index
> 0;)
3221 if (UPL_PAGE_PRESENT(pl
, --page_index
))
3223 num_of_pages
= page_index
+ 1;
3225 base_index
= (upl_offset_in_object
% cl_size
) / PAGE_SIZE
;
3227 for (page_index
= 0; page_index
< num_of_pages
; ) {
3229 * skip over non-dirty pages
3231 for ( ; page_index
< num_of_pages
; page_index
++) {
3232 if (UPL_DIRTY_PAGE(pl
, page_index
)
3233 || UPL_PRECIOUS_PAGE(pl
, page_index
))
3235 * this is a page we need to write
3236 * go see if we can buddy it up with
3237 * others that are contiguous to it
3241 * if the page is not-dirty, but present we
3242 * need to commit it... This is an unusual
3243 * case since we only asked for dirty pages
3245 if (UPL_PAGE_PRESENT(pl
, page_index
)) {
3246 boolean_t empty
= FALSE
;
3247 upl_commit_range(upl
,
3248 page_index
* vm_page_size
,
3250 UPL_COMMIT_NOTIFY_EMPTY
,
3255 assert(page_index
==
3257 upl_deallocate(upl
);
3261 if (page_index
== num_of_pages
)
3263 * no more pages to look at, we're out of here
3268 * gather up contiguous dirty pages... we have at
3269 * least 1 * otherwise we would have bailed above
3270 * make sure that each physical segment that we step
3271 * into is contiguous to the one we're currently in
3272 * if it's not, we have to stop and write what we have
3274 for (first_dirty
= page_index
;
3275 page_index
< num_of_pages
; ) {
3276 if ( !UPL_DIRTY_PAGE(pl
, page_index
)
3277 && !UPL_PRECIOUS_PAGE(pl
, page_index
))
3281 * if we just looked at the last page in the UPL
3282 * we don't need to check for physical segment
3285 if (page_index
< num_of_pages
) {
3289 cur_seg
= (base_index
+ (page_index
- 1))/pages_in_cl
;
3290 nxt_seg
= (base_index
+ page_index
)/pages_in_cl
;
3292 if (cur_seg
!= nxt_seg
) {
3293 if ((ps_offset
[cur_seg
] != (ps_offset
[nxt_seg
] - cl_size
)) || (psp
[cur_seg
] != psp
[nxt_seg
]))
3295 * if the segment we're about
3296 * to step into is not
3297 * contiguous to the one we're
3298 * currently in, or it's in a
3299 * different paging file....
3300 * we stop here and generate
3307 num_dirty
= page_index
- first_dirty
;
3310 upl_offset
= first_dirty
* vm_page_size
;
3311 transfer_size
= num_dirty
* vm_page_size
;
3313 while (transfer_size
) {
3315 if ((seg_size
= cl_size
-
3316 ((upl_offset_in_object
+
3317 upl_offset
) % cl_size
))
3319 seg_size
= transfer_size
;
3321 ps_vs_write_complete(
3323 (upl_offset_in_object
+
3327 transfer_size
-= seg_size
;
3328 upl_offset
+= seg_size
;
3330 upl_offset
= first_dirty
* vm_page_size
;
3331 transfer_size
= num_dirty
* vm_page_size
;
3333 seg_index
= (base_index
+ first_dirty
) / pages_in_cl
;
3334 seg_offset
= (upl_offset_in_object
+ upl_offset
) % cl_size
;
3336 error
= ps_write_file(psp
[seg_index
],
3338 ps_offset
[seg_index
]
3340 transfer_size
, flags
);
3342 boolean_t empty
= FALSE
;
3343 upl_abort_range(upl
,
3344 first_dirty
* vm_page_size
,
3345 num_dirty
* vm_page_size
,
3346 UPL_ABORT_NOTIFY_EMPTY
,
3349 assert(page_index
== num_of_pages
);
3350 upl_deallocate(upl
);
3356 assert(cnt
<= (unsigned) (vm_page_size
<< vs
->vs_clshift
));
3360 /* The caller provides a mapped_data which is derived */
3361 /* from a temporary object. The targeted pages are */
3362 /* guaranteed to be set at offset 0 in the mapped_data */
3363 /* The actual offset however must still be derived */
3364 /* from the offset in the vs in question */
3365 mobj_base_addr
= offset
;
3366 mobj_target_addr
= mobj_base_addr
;
3368 for (transfer_size
= list_size
; transfer_size
!= 0;) {
3369 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
3371 transfer_size
< cl_size
?
3372 transfer_size
: cl_size
, 0);
3373 if(actual_offset
== (dp_offset_t
) -1) {
3377 cnt
= MIN(transfer_size
,
3378 (unsigned) CLMAP_NPGS(clmap
) * vm_page_size
);
3379 ps
= CLMAP_PS(clmap
);
3380 /* Assume that the caller has given us contiguous */
3383 ps_vs_write_complete(vs
, mobj_target_addr
,
3385 error
= ps_write_file(ps
, internal_upl
,
3393 actual_offset
+= cnt
;
3394 mobj_target_addr
+= cnt
;
3395 transfer_size
-= cnt
;
3403 return KERN_FAILURE
;
3405 return KERN_SUCCESS
;
3409 ps_vstruct_allocated_size(
3413 struct vs_map
*vsmap
;
3414 unsigned int i
, j
, k
;
3417 if (vs
->vs_indirect
) {
3418 /* loop on indirect maps */
3419 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3420 vsmap
= vs
->vs_imap
[i
];
3423 /* loop on clusters in this indirect map */
3424 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3425 if (VSM_ISCLR(vsmap
[j
]) ||
3426 VSM_ISERR(vsmap
[j
]))
3428 /* loop on pages in this cluster */
3429 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3430 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3436 vsmap
= vs
->vs_dmap
;
3439 /* loop on clusters in the direct map */
3440 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3441 if (VSM_ISCLR(vsmap
[j
]) ||
3442 VSM_ISERR(vsmap
[j
]))
3444 /* loop on pages in this cluster */
3445 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3446 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3452 return ptoa_32(num_pages
);
3456 ps_vstruct_allocated_pages(
3458 default_pager_page_t
*pages
,
3459 unsigned int pages_size
)
3461 unsigned int num_pages
;
3462 struct vs_map
*vsmap
;
3464 unsigned int i
, j
, k
;
3468 if (vs
->vs_indirect
) {
3469 /* loop on indirect maps */
3470 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3471 vsmap
= vs
->vs_imap
[i
];
3472 if (vsmap
== NULL
) {
3473 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
3477 /* loop on clusters in this indirect map */
3478 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3479 if (VSM_ISCLR(vsmap
[j
]) ||
3480 VSM_ISERR(vsmap
[j
])) {
3481 offset
+= vm_page_size
* VSCLSIZE(vs
);
3484 /* loop on pages in this cluster */
3485 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3486 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3488 if (num_pages
< pages_size
)
3489 pages
++->dpp_offset
=
3492 offset
+= vm_page_size
;
3497 vsmap
= vs
->vs_dmap
;
3500 /* loop on clusters in the direct map */
3501 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3502 if (VSM_ISCLR(vsmap
[j
]) ||
3503 VSM_ISERR(vsmap
[j
])) {
3504 offset
+= vm_page_size
* VSCLSIZE(vs
);
3507 /* loop on pages in this cluster */
3508 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3509 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3511 if (num_pages
< pages_size
)
3512 pages
++->dpp_offset
= offset
;
3514 offset
+= vm_page_size
;
3524 ps_vstruct_transfer_from_segment(
3526 paging_segment_t segment
,
3529 struct vs_map
*vsmap
;
3530 // struct vs_map old_vsmap;
3531 // struct vs_map new_vsmap;
3534 VS_LOCK(vs
); /* block all work on this vstruct */
3535 /* can't allow the normal multiple write */
3536 /* semantic because writes may conflict */
3537 vs
->vs_xfer_pending
= TRUE
;
3538 vs_wait_for_sync_writers(vs
);
3540 vs_wait_for_readers(vs
);
3541 /* we will unlock the vs to allow other writes while transferring */
3542 /* and will be guaranteed of the persistance of the vs struct */
3543 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3544 /* vs_async_pending */
3545 /* OK we now have guaranteed no other parties are accessing this */
3546 /* vs. Now that we are also supporting simple lock versions of */
3547 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3548 /* our purpose in holding it before was the multiple write case */
3549 /* we now use the boolean xfer_pending to do that. We can use */
3550 /* a boolean instead of a count because we have guaranteed single */
3551 /* file access to this code in its caller */
3554 if (vs
->vs_indirect
) {
3555 unsigned int vsmap_size
;
3557 /* loop on indirect maps */
3558 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3559 vsmap
= vs
->vs_imap
[i
];
3562 /* loop on clusters in this indirect map */
3563 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3565 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3566 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3568 vsmap_size
= CLMAP_ENTRIES
;
3569 for (j
= 0; j
< vsmap_size
; j
++) {
3570 if (VSM_ISCLR(vsmap
[j
]) ||
3571 VSM_ISERR(vsmap
[j
]) ||
3572 (VSM_PS(vsmap
[j
]) != segment
))
3574 if(vs_cluster_transfer(vs
,
3575 (vm_page_size
* (j
<< vs
->vs_clshift
))
3577 vm_page_size
<< vs
->vs_clshift
,
3581 vs
->vs_xfer_pending
= FALSE
;
3583 vs_finish_write(vs
);
3584 return KERN_FAILURE
;
3586 /* allow other readers/writers during transfer*/
3588 vs
->vs_xfer_pending
= FALSE
;
3590 vs_finish_write(vs
);
3592 vs
->vs_xfer_pending
= TRUE
;
3593 vs_wait_for_sync_writers(vs
);
3595 vs_wait_for_readers(vs
);
3597 if (!(vs
->vs_indirect
)) {
3603 vsmap
= vs
->vs_dmap
;
3604 if (vsmap
== NULL
) {
3606 vs
->vs_xfer_pending
= FALSE
;
3608 vs_finish_write(vs
);
3609 return KERN_SUCCESS
;
3611 /* loop on clusters in the direct map */
3612 for (j
= 0; j
< vs
->vs_size
; j
++) {
3613 if (VSM_ISCLR(vsmap
[j
]) ||
3614 VSM_ISERR(vsmap
[j
]) ||
3615 (VSM_PS(vsmap
[j
]) != segment
))
3617 if(vs_cluster_transfer(vs
,
3618 vm_page_size
* (j
<< vs
->vs_clshift
),
3619 vm_page_size
<< vs
->vs_clshift
,
3620 upl
) != KERN_SUCCESS
) {
3622 vs
->vs_xfer_pending
= FALSE
;
3624 vs_finish_write(vs
);
3625 return KERN_FAILURE
;
3627 /* allow other readers/writers during transfer*/
3629 vs
->vs_xfer_pending
= FALSE
;
3631 vs_finish_write(vs
);
3633 vs
->vs_xfer_pending
= TRUE
;
3634 vs_wait_for_sync_writers(vs
);
3636 vs_wait_for_readers(vs
);
3638 if (vs
->vs_indirect
) {
3645 vs
->vs_xfer_pending
= FALSE
;
3647 vs_finish_write(vs
);
3648 return KERN_SUCCESS
;
3658 struct vs_map
*vsmap
;
3659 dp_offset_t cluster
;
3661 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
3662 if (vs
->vs_indirect
) {
3663 long ind_block
= cluster
/CLMAP_ENTRIES
;
3665 /* Is the indirect block allocated? */
3666 vsmap
= vs
->vs_imap
[ind_block
];
3667 if(vsmap
== (vs_map_t
) NULL
)
3670 vsmap
= vs
->vs_dmap
;
3671 vsmap
+= cluster%CLMAP_ENTRIES
;
3676 vs_cluster_transfer(
3682 dp_offset_t actual_offset
;
3683 paging_segment_t ps
;
3685 kern_return_t error
= KERN_SUCCESS
;
3686 unsigned int size
, size_wanted
;
3688 unsigned int residual
= 0;
3689 unsigned int unavail_size
;
3690 // default_pager_thread_t *dpt;
3691 // boolean_t dealloc;
3692 struct vs_map
*vsmap_ptr
= NULL
;
3693 struct vs_map read_vsmap
;
3694 struct vs_map original_read_vsmap
;
3695 struct vs_map write_vsmap
;
3697 // vm_offset_t ioaddr;
3699 /* vs_cluster_transfer reads in the pages of a cluster and
3700 * then writes these pages back to new backing store. The
3701 * segment the pages are being read from is assumed to have
3702 * been taken off-line and is no longer considered for new
3707 * This loop will be executed once per cluster referenced.
3708 * Typically this means once, since it's unlikely that the
3709 * VM system will ask for anything spanning cluster boundaries.
3711 * If there are holes in a cluster (in a paging segment), we stop
3712 * reading at the hole, then loop again, hoping to
3713 * find valid pages later in the cluster. This continues until
3714 * the entire range has been examined, and read, if present. The
3715 * pages are written as they are read. If a failure occurs after
3716 * some pages are written the unmap call at the bottom of the loop
3717 * recovers the backing store and the old backing store remains
3721 VSM_CLR(write_vsmap
);
3722 VSM_CLR(original_read_vsmap
);
3723 /* grab the actual object's pages to sync with I/O */
3724 while (cnt
&& (error
== KERN_SUCCESS
)) {
3725 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3726 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3728 if (actual_offset
== (dp_offset_t
) -1) {
3731 * Nothing left to write in this cluster at least
3732 * set write cluster information for any previous
3733 * write, clear for next cluster, if there is one
3735 unsigned int local_size
, clmask
, clsize
;
3737 clsize
= vm_page_size
<< vs
->vs_clshift
;
3738 clmask
= clsize
- 1;
3739 local_size
= clsize
- (offset
& clmask
);
3741 local_size
= MIN(local_size
, cnt
);
3743 /* This cluster has no data in it beyond what may */
3744 /* have been found on a previous iteration through */
3745 /* the loop "write_vsmap" */
3746 *vsmap_ptr
= write_vsmap
;
3747 VSM_CLR(write_vsmap
);
3748 VSM_CLR(original_read_vsmap
);
3751 offset
+= local_size
;
3756 * Count up contiguous available or unavailable
3759 ps
= CLMAP_PS(clmap
);
3764 (size
< cnt
) && (unavail_size
< cnt
) &&
3765 (i
< CLMAP_NPGS(clmap
)); i
++) {
3766 if (CLMAP_ISSET(clmap
, i
)) {
3767 if (unavail_size
!= 0)
3769 size
+= vm_page_size
;
3771 ps
->ps_bs
->bs_pages_in
++);
3775 unavail_size
+= vm_page_size
;
3780 ASSERT(unavail_size
);
3781 ps_clunmap(vs
, offset
, unavail_size
);
3782 cnt
-= unavail_size
;
3783 offset
+= unavail_size
;
3784 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3786 /* There is no more to transfer in this
3789 *vsmap_ptr
= write_vsmap
;
3790 VSM_CLR(write_vsmap
);
3791 VSM_CLR(original_read_vsmap
);
3796 if(VSM_ISCLR(original_read_vsmap
))
3797 original_read_vsmap
= *vsmap_ptr
;
3799 if(ps
->ps_segtype
== PS_PARTITION
) {
3800 panic("swap partition not supported\n");
3802 error
= KERN_FAILURE
;
3805 NEED TO ISSUE WITH SYNC & NO COMMIT
3806 error = ps_read_device(ps, actual_offset, &buffer,
3807 size, &residual, flags);
3810 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3811 error
= ps_read_file(ps
, upl
, (upl_offset_t
) 0, actual_offset
,
3813 (UPL_IOSYNC
| UPL_NOCOMMIT
));
3816 read_vsmap
= *vsmap_ptr
;
3820 * Adjust counts and put data in new BS. Optimize for the
3821 * common case, i.e. no error and/or partial data.
3822 * If there was an error, then we need to error the entire
3823 * range, even if some data was successfully read.
3826 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3829 * Got everything we asked for, supply the data to
3830 * the new BS. Note that as a side effect of supplying
3831 * the data, the buffer holding the supplied data is
3832 * deallocated from the pager's address space unless
3833 * the write is unsuccessful.
3836 /* note buffer will be cleaned up in all cases by */
3837 /* internal_cluster_write or if an error on write */
3838 /* the vm_map_copy_page_discard call */
3839 *vsmap_ptr
= write_vsmap
;
3841 if(vs_cluster_write(vs
, upl
, offset
,
3842 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3843 error
= KERN_FAILURE
;
3844 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3845 /* unmap the new backing store object */
3846 ps_clunmap(vs
, offset
, size
);
3848 /* original vsmap */
3849 *vsmap_ptr
= original_read_vsmap
;
3850 VSM_CLR(write_vsmap
);
3852 if((offset
+ size
) &
3853 ((vm_page_size
<< vs
->vs_clshift
)
3855 /* There is more to transfer in this
3858 write_vsmap
= *vsmap_ptr
;
3859 *vsmap_ptr
= read_vsmap
;
3860 ps_clunmap(vs
, offset
, size
);
3862 /* discard the old backing object */
3863 write_vsmap
= *vsmap_ptr
;
3864 *vsmap_ptr
= read_vsmap
;
3865 ps_clunmap(vs
, offset
, size
);
3866 *vsmap_ptr
= write_vsmap
;
3867 VSM_CLR(write_vsmap
);
3868 VSM_CLR(original_read_vsmap
);
3873 if (error
== KERN_SUCCESS
) {
3874 if (residual
== size
) {
3876 * If a read operation returns no error
3877 * and no data moved, we turn it into
3878 * an error, assuming we're reading at
3880 * Fall through and error the entire
3883 error
= KERN_FAILURE
;
3884 *vsmap_ptr
= write_vsmap
;
3885 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3886 /* unmap the new backing store object */
3887 ps_clunmap(vs
, offset
, size
);
3889 *vsmap_ptr
= original_read_vsmap
;
3890 VSM_CLR(write_vsmap
);
3894 * Otherwise, we have partial read.
3895 * This is also considered an error
3896 * for the purposes of cluster transfer
3898 error
= KERN_FAILURE
;
3899 *vsmap_ptr
= write_vsmap
;
3900 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3901 /* unmap the new backing store object */
3902 ps_clunmap(vs
, offset
, size
);
3904 *vsmap_ptr
= original_read_vsmap
;
3905 VSM_CLR(write_vsmap
);
3914 } /* END while (cnt && (error == 0)) */
3915 if(!VSM_ISCLR(write_vsmap
))
3916 *vsmap_ptr
= write_vsmap
;
3922 default_pager_add_file(
3923 MACH_PORT_FACE backing_store
,
3929 paging_segment_t ps
;
3934 if ((bs
= backing_store_lookup(backing_store
))
3935 == BACKING_STORE_NULL
)
3936 return KERN_INVALID_ARGUMENT
;
3939 for (i
= 0; i
<= paging_segment_max
; i
++) {
3940 ps
= paging_segments
[i
];
3941 if (ps
== PAGING_SEGMENT_NULL
)
3943 if (ps
->ps_segtype
!= PS_FILE
)
3947 * Check for overlap on same device.
3949 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3952 return KERN_INVALID_ARGUMENT
;
3958 * Set up the paging segment
3960 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3961 if (ps
== PAGING_SEGMENT_NULL
) {
3963 return KERN_RESOURCE_SHORTAGE
;
3966 ps
->ps_segtype
= PS_FILE
;
3967 ps
->ps_vnode
= (struct vnode
*)vp
;
3969 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3970 assert((dp_size_t
) size
== size
);
3971 ps
->ps_recnum
= (dp_size_t
) size
;
3972 ps
->ps_pgnum
= ((dp_size_t
) size
) >> ps
->ps_record_shift
;
3974 ps
->ps_pgcount
= ps
->ps_pgnum
;
3975 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3976 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3977 ps
->ps_special_clusters
= 0;
3981 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3983 kfree(ps
, sizeof *ps
);
3985 return KERN_RESOURCE_SHORTAGE
;
3987 for (j
= 0; j
< ps
->ps_ncls
; j
++) {
3988 clrbit(ps
->ps_bmap
, j
);
3991 if(paging_segment_count
== 0) {
3992 ps
->ps_state
= PS_EMERGENCY_SEGMENT
;
3993 if(use_emergency_swap_file_first
) {
3994 ps
->ps_state
|= PS_CAN_USE
;
3996 emergency_segment_backing_store
= backing_store
;
3998 ps
->ps_state
= PS_CAN_USE
;
4003 if ((error
= ps_enter(ps
)) != 0) {
4004 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
4005 kfree(ps
, sizeof *ps
);
4007 return KERN_RESOURCE_SHORTAGE
;
4010 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
4011 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
4013 if(IS_PS_OK_TO_USE(ps
)) {
4014 dp_pages_free
+= ps
->ps_pgcount
;
4016 dp_pages_reserve
+= ps
->ps_pgcount
;
4022 bs_more_space(ps
->ps_clcount
);
4025 * If the paging segment being activated is not the emergency
4026 * segment and we notice that the emergency segment is being
4027 * used then we help recover it. If all goes well, the
4028 * emergency segment will be back to its original state of
4029 * online but not activated (till it's needed the next time).
4031 ps
= paging_segments
[EMERGENCY_PSEG_INDEX
];
4032 if(IS_PS_EMERGENCY_SEGMENT(ps
) && IS_PS_OK_TO_USE(ps
)) {
4033 if(default_pager_backing_store_delete(emergency_segment_backing_store
)) {
4034 dprintf(("Failed to recover emergency paging segment\n"));
4036 dprintf(("Recovered emergency paging segment\n"));
4040 DP_DEBUG(DEBUG_BS_INTERNAL
,
4041 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
4042 device
, offset
, (dp_size_t
) size
, record_size
,
4043 ps
->ps_record_shift
, ps
->ps_pgnum
));
4045 return KERN_SUCCESS
;
4052 paging_segment_t ps
,
4054 upl_offset_t upl_offset
,
4057 unsigned int *residualp
,
4060 vm_object_offset_t f_offset
;
4064 assert(dp_encryption_inited
);
4066 clustered_reads
[atop_32(size
)]++;
4068 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
4071 * for transfer case we need to pass uploffset and flags
4073 assert((upl_size_t
) size
== size
);
4074 error
= vnode_pagein(ps
->ps_vnode
, upl
, upl_offset
, f_offset
, (upl_size_t
)size
, flags
, NULL
);
4076 /* The vnode_pagein semantic is somewhat at odds with the existing */
4077 /* device_read semantic. Partial reads are not experienced at this */
4078 /* level. It is up to the bit map code and cluster read code to */
4079 /* check that requested data locations are actually backed, and the */
4080 /* pagein code to either read all of the requested data or return an */
4084 result
= KERN_FAILURE
;
4087 result
= KERN_SUCCESS
;
4094 paging_segment_t ps
,
4096 upl_offset_t upl_offset
,
4101 vm_object_offset_t f_offset
;
4102 kern_return_t result
;
4104 assert(dp_encryption_inited
);
4106 clustered_writes
[atop_32(size
)]++;
4107 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
4109 if (flags
& UPL_PAGING_ENCRYPTED
) {
4112 * encrypt all the pages that we're going
4115 upl_encrypt(upl
, upl_offset
, size
);
4117 assert((upl_size_t
) size
== size
);
4118 if (vnode_pageout(ps
->ps_vnode
, upl
, upl_offset
, f_offset
, (upl_size_t
)size
, flags
, NULL
))
4119 result
= KERN_FAILURE
;
4121 result
= KERN_SUCCESS
;
4127 default_pager_triggers( __unused MACH_PORT_FACE default_pager
,
4131 MACH_PORT_FACE trigger_port
)
4133 MACH_PORT_FACE release
;
4136 clock_nsec_t nanoseconds_dummy
;
4137 static clock_sec_t error_notify
= 0;
4140 if (flags
== SWAP_ENCRYPT_ON
) {
4141 /* ENCRYPTED SWAP: turn encryption on */
4142 release
= trigger_port
;
4143 if (!dp_encryption_inited
) {
4144 dp_encryption_inited
= TRUE
;
4145 dp_encryption
= TRUE
;
4150 } else if (flags
== SWAP_ENCRYPT_OFF
) {
4151 /* ENCRYPTED SWAP: turn encryption off */
4152 release
= trigger_port
;
4153 if (!dp_encryption_inited
) {
4154 dp_encryption_inited
= TRUE
;
4155 dp_encryption
= FALSE
;
4160 } else if (flags
== HI_WAT_ALERT
) {
4161 release
= min_pages_trigger_port
;
4162 min_pages_trigger_port
= trigger_port
;
4163 minimum_pages_remaining
= hi_wat
/vm_page_size
;
4166 } else if (flags
== LO_WAT_ALERT
) {
4167 release
= max_pages_trigger_port
;
4168 max_pages_trigger_port
= trigger_port
;
4169 maximum_pages_free
= lo_wat
/vm_page_size
;
4171 } else if (flags
== USE_EMERGENCY_SWAP_FILE_FIRST
) {
4172 use_emergency_swap_file_first
= TRUE
;
4173 release
= trigger_port
;
4175 } else if (flags
== SWAP_FILE_CREATION_ERROR
) {
4176 release
= trigger_port
;
4178 if( paging_segment_count
== 1) {
4179 use_emergency_swap_file_first
= TRUE
;
4181 no_paging_space_action();
4182 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
4183 if (now
> error_notify
+ 5) {
4184 dprintf(("Swap File Error.\n"));
4188 release
= trigger_port
;
4189 kr
= KERN_INVALID_ARGUMENT
;
4193 if (IP_VALID(release
))
4194 ipc_port_release_send(release
);
4200 * Monitor the amount of available backing store vs. the amount of
4201 * required backing store, notify a listener (if present) when
4202 * backing store may safely be removed.
4204 * We attempt to avoid the situation where backing store is
4205 * discarded en masse, as this can lead to thrashing as the
4206 * backing store is compacted.
4209 #define PF_INTERVAL 3 /* time between free level checks */
4210 #define PF_LATENCY 10 /* number of intervals before release */
4212 static int dp_pages_free_low_count
= 0;
4213 thread_call_t default_pager_backing_store_monitor_callout
;
4216 default_pager_backing_store_monitor(__unused thread_call_param_t p1
,
4217 __unused thread_call_param_t p2
)
4219 // unsigned long long average;
4224 * We determine whether it will be safe to release some
4225 * backing store by watching the free page level. If
4226 * it remains below the maximum_pages_free threshold for
4227 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
4228 * then we deem it safe.
4230 * Note that this establishes a maximum rate at which backing
4231 * store will be released, as each notification (currently)
4232 * only results in a single backing store object being
4235 if (dp_pages_free
> maximum_pages_free
) {
4236 dp_pages_free_low_count
++;
4238 dp_pages_free_low_count
= 0;
4241 /* decide whether to send notification */
4243 if (max_pages_trigger_port
&&
4244 (backing_store_release_trigger_disable
== 0) &&
4245 (dp_pages_free_low_count
> PF_LATENCY
)) {
4246 trigger
= max_pages_trigger_port
;
4247 max_pages_trigger_port
= NULL
;
4250 /* send notification */
4251 if (trigger
!= IP_NULL
) {
4253 if(backing_store_release_trigger_disable
!= 0) {
4254 assert_wait((event_t
)
4255 &backing_store_release_trigger_disable
,
4258 thread_block(THREAD_CONTINUE_NULL
);
4262 default_pager_space_alert(trigger
, LO_WAT_ALERT
);
4263 ipc_port_release_send(trigger
);
4264 dp_pages_free_low_count
= 0;
4267 clock_interval_to_deadline(PF_INTERVAL
, NSEC_PER_SEC
, &deadline
);
4268 thread_call_enter_delayed(default_pager_backing_store_monitor_callout
, deadline
);