2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
46 * Carnegie Mellon requests users of this software to return to
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
59 * Paging File Management.
62 #include <mach/host_priv.h>
63 #include <mach/memory_object_control.h>
64 #include <mach/memory_object_server.h>
66 #include <default_pager/default_pager_internal.h>
67 #include <default_pager/default_pager_alerts.h>
68 #include <default_pager/default_pager_object_server.h>
70 #include <ipc/ipc_types.h>
71 #include <ipc/ipc_port.h>
72 #include <ipc/ipc_space.h>
74 #include <kern/kern_types.h>
75 #include <kern/host.h>
76 #include <kern/queue.h>
77 #include <kern/counters.h>
78 #include <kern/sched_prim.h>
80 #include <vm/vm_kern.h>
81 #include <vm/vm_pageout.h>
82 #include <vm/vm_map.h>
83 #include <vm/vm_object.h>
84 #include <vm/vm_protos.h>
87 /* todo - need large internal object support */
90 * ALLOC_STRIDE... the maximum number of bytes allocated from
91 * a swap file before moving on to the next swap file... if
92 * all swap files reside on a single disk, this value should
93 * be very large (this is the default assumption)... if the
94 * swap files are spread across multiple disks, than this value
95 * should be small (128 * 1024)...
97 * This should be determined dynamically in the future
100 #define ALLOC_STRIDE (1024 * 1024 * 1024)
101 int physical_transfer_cluster_count
= 0;
103 #define VM_SUPER_CLUSTER 0x40000
104 #define VM_SUPER_PAGES 64
107 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
108 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
110 #define VSTRUCT_DEF_CLSHIFT 2
111 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
112 int default_pager_clsize
= 0;
115 unsigned int clustered_writes
[VM_SUPER_PAGES
+1];
116 unsigned int clustered_reads
[VM_SUPER_PAGES
+1];
119 * Globals used for asynchronous paging operations:
120 * vs_async_list: head of list of to-be-completed I/O ops
121 * async_num_queued: number of pages completed, but not yet
122 * processed by async thread.
123 * async_requests_out: number of pages of requests not completed.
127 struct vs_async
*vs_async_list
;
128 int async_num_queued
;
129 int async_requests_out
;
133 #define VS_ASYNC_REUSE 1
134 struct vs_async
*vs_async_free_list
;
136 lck_mtx_t default_pager_async_lock
; /* Protects globals above */
139 int vs_alloc_async_failed
= 0; /* statistics */
140 int vs_alloc_async_count
= 0; /* statistics */
141 struct vs_async
*vs_alloc_async(void); /* forward */
142 void vs_free_async(struct vs_async
*vsa
); /* forward */
145 #define VS_ALLOC_ASYNC() vs_alloc_async()
146 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
148 #define VS_ASYNC_LOCK() lck_mtx_lock(&default_pager_async_lock)
149 #define VS_ASYNC_UNLOCK() lck_mtx_unlock(&default_pager_async_lock)
150 #define VS_ASYNC_LOCK_INIT() lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr)
151 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
153 * Paging Space Hysteresis triggers and the target notification port
156 unsigned int dp_pages_free_drift_count
= 0;
157 unsigned int dp_pages_free_drifted_max
= 0;
158 unsigned int minimum_pages_remaining
= 0;
159 unsigned int maximum_pages_free
= 0;
160 ipc_port_t min_pages_trigger_port
= NULL
;
161 ipc_port_t max_pages_trigger_port
= NULL
;
163 boolean_t use_emergency_swap_file_first
= FALSE
;
164 boolean_t bs_low
= FALSE
;
165 int backing_store_release_trigger_disable
= 0;
166 boolean_t backing_store_stop_compaction
= FALSE
;
169 /* Have we decided if swap needs to be encrypted yet ? */
170 boolean_t dp_encryption_inited
= FALSE
;
171 /* Should we encrypt swap ? */
172 boolean_t dp_encryption
= FALSE
;
176 * Object sizes are rounded up to the next power of 2,
177 * unless they are bigger than a given maximum size.
179 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
182 * List of all backing store and segments.
184 MACH_PORT_FACE emergency_segment_backing_store
;
185 struct backing_store_list_head backing_store_list
;
186 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
187 lck_mtx_t paging_segments_lock
;
188 int paging_segment_max
= 0;
189 int paging_segment_count
= 0;
190 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
194 * Total pages free in system
195 * This differs from clusters committed/avail which is a measure of the
196 * over commitment of paging segments to backing store. An idea which is
197 * likely to be deprecated.
199 unsigned int dp_pages_free
= 0;
200 unsigned int dp_pages_reserve
= 0;
201 unsigned int cluster_transfer_minimum
= 100;
203 /* forward declarations */
204 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, upl_offset_t
, dp_offset_t
, unsigned int, int); /* forward */
205 kern_return_t
ps_read_file (paging_segment_t
, upl_t
, upl_offset_t
, dp_offset_t
, unsigned int, unsigned int *, int); /* forward */
206 default_pager_thread_t
*get_read_buffer( void );
207 kern_return_t
ps_vstruct_transfer_from_segment(
209 paging_segment_t segment
,
211 kern_return_t
ps_read_device(paging_segment_t
, dp_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
212 kern_return_t
ps_write_device(paging_segment_t
, dp_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
213 kern_return_t
vs_cluster_transfer(
218 vs_map_t
vs_get_map_entry(
223 default_pager_backing_store_delete_internal( MACH_PORT_FACE
);
225 default_pager_thread_t
*
226 get_read_buffer( void )
232 for (i
=0; i
<default_pager_internal_count
; i
++) {
233 if(dpt_array
[i
]->checked_out
== FALSE
) {
234 dpt_array
[i
]->checked_out
= TRUE
;
235 DPT_UNLOCK(dpt_lock
);
239 DPT_SLEEP(dpt_lock
, &dpt_array
, THREAD_UNINT
);
249 * List of all backing store.
252 queue_init(&backing_store_list
.bsl_queue
);
255 VS_ASYNC_LOCK_INIT();
257 vs_async_free_list
= NULL
;
258 #endif /* VS_ASYNC_REUSE */
260 for (i
= 0; i
< VM_SUPER_PAGES
+ 1; i
++) {
261 clustered_writes
[i
] = 0;
262 clustered_reads
[i
] = 0;
268 * When things do not quite workout...
270 void bs_no_paging_space(boolean_t
); /* forward */
274 boolean_t out_of_memory
)
278 dprintf(("*** OUT OF MEMORY ***\n"));
279 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
282 void bs_more_space(int); /* forward */
283 void bs_commit(int); /* forward */
285 boolean_t user_warned
= FALSE
;
286 unsigned int clusters_committed
= 0;
287 unsigned int clusters_available
= 0;
288 unsigned int clusters_committed_peak
= 0;
296 * Account for new paging space.
298 clusters_available
+= nclusters
;
300 if (clusters_available
>= clusters_committed
) {
301 if (verbose
&& user_warned
) {
302 printf("%s%s - %d excess clusters now.\n",
304 "paging space is OK now",
305 clusters_available
- clusters_committed
);
307 clusters_committed_peak
= 0;
310 if (verbose
&& user_warned
) {
311 printf("%s%s - still short of %d clusters.\n",
313 "WARNING: paging space over-committed",
314 clusters_committed
- clusters_available
);
315 clusters_committed_peak
-= nclusters
;
328 clusters_committed
+= nclusters
;
329 if (clusters_committed
> clusters_available
) {
330 if (verbose
&& !user_warned
) {
332 printf("%s%s - short of %d clusters.\n",
334 "WARNING: paging space over-committed",
335 clusters_committed
- clusters_available
);
337 if (clusters_committed
> clusters_committed_peak
) {
338 clusters_committed_peak
= clusters_committed
;
341 if (verbose
&& user_warned
) {
342 printf("%s%s - was short of up to %d clusters.\n",
344 "paging space is OK now",
345 clusters_committed_peak
- clusters_available
);
347 clusters_committed_peak
= 0;
355 int default_pager_info_verbose
= 1;
362 uint64_t pages_total
, pages_free
;
367 pages_total
= pages_free
= 0;
368 for (i
= 0; i
<= paging_segment_max
; i
++) {
369 ps
= paging_segments
[i
];
370 if (ps
== PAGING_SEGMENT_NULL
)
374 * no need to lock: by the time this data
375 * gets back to any remote requestor it
376 * will be obsolete anyways
378 pages_total
+= ps
->ps_pgnum
;
379 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
380 DP_DEBUG(DEBUG_BS_INTERNAL
,
381 ("segment #%d: %d total, %d free\n",
382 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
384 *totalp
= pages_total
;
386 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
387 if (clusters_available
< clusters_committed
) {
388 printf("%s %d clusters committed, %d available.\n",
397 backing_store_t
backing_store_alloc(void); /* forward */
400 backing_store_alloc(void)
404 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
405 if (bs
== BACKING_STORE_NULL
)
406 panic("backing_store_alloc: no memory");
409 bs
->bs_port
= MACH_PORT_NULL
;
412 bs
->bs_pages_total
= 0;
414 bs
->bs_pages_in_fail
= 0;
415 bs
->bs_pages_out
= 0;
416 bs
->bs_pages_out_fail
= 0;
421 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
423 /* Even in both the component space and external versions of this pager, */
424 /* backing_store_lookup will be called from tasks in the application space */
426 backing_store_lookup(
432 port is currently backed with a vs structure in the alias field
433 we could create an ISBS alias and a port_is_bs call but frankly
434 I see no reason for the test, the bs->port == port check below
435 will work properly on junk entries.
437 if ((port == MACH_PORT_NULL) || port_is_vs(port))
439 if ((port
== MACH_PORT_NULL
))
440 return BACKING_STORE_NULL
;
443 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
446 if (bs
->bs_port
== port
) {
448 /* Success, return it locked. */
454 return BACKING_STORE_NULL
;
457 void backing_store_add(backing_store_t
); /* forward */
461 __unused backing_store_t bs
)
463 // MACH_PORT_FACE port = bs->bs_port;
464 // MACH_PORT_FACE pset = default_pager_default_set;
465 kern_return_t kr
= KERN_SUCCESS
;
467 if (kr
!= KERN_SUCCESS
)
468 panic("backing_store_add: add to set");
473 * Set up default page shift, but only if not already
474 * set and argument is within range.
477 bs_set_default_clsize(unsigned int npages
)
484 if (default_pager_clsize
== 0) /* if not yet set */
485 vstruct_def_clshift
= local_log2(npages
);
491 int bs_get_global_clsize(int clsize
); /* forward */
494 bs_get_global_clsize(
498 memory_object_default_t dmm
;
502 * Only allow setting of cluster size once. If called
503 * with no cluster size (default), we use the compiled-in default
504 * for the duration. The same cluster size is used for all
507 if (default_pager_clsize
== 0) {
509 * Keep cluster size in bit shift because it's quicker
510 * arithmetic, and easier to keep at a power of 2.
512 if (clsize
!= NO_CLSIZE
) {
513 for (i
= 0; (1 << i
) < clsize
; i
++);
514 if (i
> MAX_CLUSTER_SHIFT
)
515 i
= MAX_CLUSTER_SHIFT
;
516 vstruct_def_clshift
= i
;
518 default_pager_clsize
= (1 << vstruct_def_clshift
);
521 * Let the user know the new (and definitive) cluster size.
524 printf("%scluster size = %d page%s\n",
525 my_name
, default_pager_clsize
,
526 (default_pager_clsize
== 1) ? "" : "s");
529 * Let the kernel know too, in case it hasn't used the
530 * default value provided in main() yet.
532 dmm
= default_pager_object
;
533 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
534 kr
= host_default_memory_manager(host_priv_self(),
537 memory_object_default_deallocate(dmm
);
539 if (kr
!= KERN_SUCCESS
) {
540 panic("bs_get_global_cl_size:host_default_memory_manager");
542 if (dmm
!= default_pager_object
) {
543 panic("bs_get_global_cl_size:there is another default pager");
546 ASSERT(default_pager_clsize
> 0 &&
547 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
549 return default_pager_clsize
;
553 default_pager_backing_store_create(
554 memory_object_default_t pager
,
556 int clsize
, /* in bytes */
557 MACH_PORT_FACE
*backing_store
)
562 struct vstruct_alias
*alias_struct
;
564 if (pager
!= default_pager_object
)
565 return KERN_INVALID_ARGUMENT
;
567 bs
= backing_store_alloc();
568 port
= ipc_port_alloc_kernel();
569 ipc_port_make_send(port
);
570 assert (port
!= IP_NULL
);
572 DP_DEBUG(DEBUG_BS_EXTERNAL
,
573 ("priority=%d clsize=%d bs_port=0x%x\n",
574 priority
, clsize
, (int) backing_store
));
576 alias_struct
= (struct vstruct_alias
*)
577 kalloc(sizeof (struct vstruct_alias
));
578 if(alias_struct
!= NULL
) {
579 alias_struct
->vs
= (struct vstruct
*)bs
;
580 alias_struct
->name
= &default_pager_ops
;
581 port
->alias
= (uintptr_t) alias_struct
;
584 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
585 kfree(bs
, sizeof (struct backing_store
));
586 return KERN_RESOURCE_SHORTAGE
;
590 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
591 priority
= BS_MAXPRI
;
592 else if (priority
== BS_NOPRI
)
593 priority
= BS_MAXPRI
;
595 priority
= BS_MINPRI
;
596 bs
->bs_priority
= priority
;
598 bs
->bs_clsize
= bs_get_global_clsize(atop_32(clsize
));
601 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
605 backing_store_add(bs
);
607 *backing_store
= port
;
612 default_pager_backing_store_info(
613 MACH_PORT_FACE backing_store
,
614 backing_store_flavor_t flavour
,
615 backing_store_info_t info
,
616 mach_msg_type_number_t
*size
)
619 backing_store_basic_info_t basic
;
623 if (flavour
!= BACKING_STORE_BASIC_INFO
||
624 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
625 return KERN_INVALID_ARGUMENT
;
627 basic
= (backing_store_basic_info_t
)info
;
628 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
630 VSTATS_LOCK(&global_stats
.gs_lock
);
631 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
632 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
633 basic
->pages_in
= global_stats
.gs_pages_in
;
634 basic
->pages_out
= global_stats
.gs_pages_out
;
635 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
636 basic
->pages_init
= global_stats
.gs_pages_init
;
637 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
638 VSTATS_UNLOCK(&global_stats
.gs_lock
);
640 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
641 return KERN_INVALID_ARGUMENT
;
643 basic
->bs_pages_total
= bs
->bs_pages_total
;
645 bs
->bs_pages_free
= 0;
646 for (i
= 0; i
<= paging_segment_max
; i
++) {
647 ps
= paging_segments
[i
];
648 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
650 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
655 basic
->bs_pages_free
= bs
->bs_pages_free
;
656 basic
->bs_pages_in
= bs
->bs_pages_in
;
657 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
658 basic
->bs_pages_out
= bs
->bs_pages_out
;
659 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
661 basic
->bs_priority
= bs
->bs_priority
;
662 basic
->bs_clsize
= ptoa_32(bs
->bs_clsize
); /* in bytes */
669 int ps_delete(paging_segment_t
); /* forward */
670 boolean_t
current_thread_aborted(void);
677 kern_return_t error
= KERN_SUCCESS
;
680 VSL_LOCK(); /* get the lock on the list of vs's */
682 /* The lock relationship and sequence is farily complicated */
683 /* this code looks at a live list, locking and unlocking the list */
684 /* as it traverses it. It depends on the locking behavior of */
685 /* default_pager_no_senders. no_senders always locks the vstruct */
686 /* targeted for removal before locking the vstruct list. However */
687 /* it will remove that member of the list without locking its */
688 /* neighbors. We can be sure when we hold a lock on a vstruct */
689 /* it cannot be removed from the list but we must hold the list */
690 /* lock to be sure that its pointers to its neighbors are valid. */
691 /* Also, we can hold off destruction of a vstruct when the list */
692 /* lock and the vs locks are not being held by bumping the */
693 /* vs_async_pending count. */
696 while(backing_store_release_trigger_disable
!= 0) {
697 VSL_SLEEP(&backing_store_release_trigger_disable
, THREAD_UNINT
);
700 /* we will choose instead to hold a send right */
701 vs_count
= vstruct_list
.vsl_count
;
702 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
703 if(vs
== (vstruct_t
)&vstruct_list
) {
708 vs_async_wait(vs
); /* wait for any pending async writes */
709 if ((vs_count
!= 0) && (vs
!= NULL
))
710 vs
->vs_async_pending
+= 1; /* hold parties calling */
714 while((vs_count
!= 0) && (vs
!= NULL
)) {
715 /* We take the count of AMO's before beginning the */
716 /* transfer of of the target segment. */
717 /* We are guaranteed that the target segment cannot get */
718 /* more users. We also know that queue entries are */
719 /* made at the back of the list. If some of the entries */
720 /* we would check disappear while we are traversing the */
721 /* list then we will either check new entries which */
722 /* do not have any backing store in the target segment */
723 /* or re-check old entries. This might not be optimal */
724 /* but it will always be correct. The alternative is to */
725 /* take a snapshot of the list. */
728 if(dp_pages_free
< cluster_transfer_minimum
)
729 error
= KERN_FAILURE
;
731 vm_object_t transfer_object
;
735 transfer_object
= vm_object_allocate((vm_object_size_t
)VM_SUPER_CLUSTER
);
737 error
= vm_object_upl_request(transfer_object
,
738 (vm_object_offset_t
)0, VM_SUPER_CLUSTER
,
740 UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_SET_LITE
| UPL_SET_INTERNAL
);
742 if(error
== KERN_SUCCESS
) {
743 error
= ps_vstruct_transfer_from_segment(
745 upl_commit(upl
, NULL
, 0);
748 error
= KERN_FAILURE
;
750 vm_object_deallocate(transfer_object
);
752 if(error
|| current_thread_aborted() || backing_store_stop_compaction
) {
754 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
755 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
756 vs
->vs_waiting_async
= FALSE
;
758 thread_wakeup(&vs
->vs_async_pending
);
767 while(backing_store_release_trigger_disable
!= 0) {
768 VSL_SLEEP(&backing_store_release_trigger_disable
,
772 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
773 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
774 (vs
!= next_vs
) && (vs_count
!= 1)) {
776 vs_async_wait(next_vs
); /* wait for any */
777 /* pending async writes */
778 next_vs
->vs_async_pending
+= 1; /* hold parties */
779 /* calling vs_async_wait */
784 vs
->vs_async_pending
-= 1;
785 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
786 vs
->vs_waiting_async
= FALSE
;
788 thread_wakeup(&vs
->vs_async_pending
);
792 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
803 default_pager_backing_store_delete_internal(
804 MACH_PORT_FACE backing_store
)
810 int interim_pages_removed
= 0;
811 boolean_t dealing_with_emergency_segment
= ( backing_store
== emergency_segment_backing_store
);
813 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
814 return KERN_INVALID_ARGUMENT
;
818 error
= KERN_SUCCESS
;
819 for (i
= 0; i
<= paging_segment_max
; i
++) {
820 ps
= paging_segments
[i
];
821 if (ps
!= PAGING_SEGMENT_NULL
&&
823 ! IS_PS_GOING_AWAY(ps
)) {
826 if( IS_PS_GOING_AWAY(ps
) || !IS_PS_OK_TO_USE(ps
)) {
828 * Someone is already busy reclamining this paging segment.
829 * If it's the emergency segment we are looking at then check
830 * that someone has not already recovered it and set the right
831 * state i.e. online but not activated.
837 /* disable access to this segment */
838 ps
->ps_state
&= ~PS_CAN_USE
;
839 ps
->ps_state
|= PS_GOING_AWAY
;
842 * The "ps" segment is "off-line" now,
843 * we can try and delete it...
845 if(dp_pages_free
< (cluster_transfer_minimum
847 error
= KERN_FAILURE
;
851 /* remove all pages associated with the */
852 /* segment from the list of free pages */
853 /* when transfer is through, all target */
854 /* segment pages will appear to be free */
856 dp_pages_free
-= ps
->ps_pgcount
;
857 interim_pages_removed
+= ps
->ps_pgcount
;
859 error
= ps_delete(ps
);
861 if (error
!= KERN_SUCCESS
) {
863 * We couldn't delete the segment,
864 * probably because there's not enough
865 * virtual memory left.
866 * Re-enable all the segments.
875 if (error
!= KERN_SUCCESS
) {
876 for (i
= 0; i
<= paging_segment_max
; i
++) {
877 ps
= paging_segments
[i
];
878 if (ps
!= PAGING_SEGMENT_NULL
&&
880 IS_PS_GOING_AWAY(ps
)) {
883 if( !IS_PS_GOING_AWAY(ps
)) {
887 /* Handle the special clusters that came in while we let go the lock*/
888 if( ps
->ps_special_clusters
) {
889 dp_pages_free
+= ps
->ps_special_clusters
<< ps
->ps_clshift
;
890 ps
->ps_pgcount
+= ps
->ps_special_clusters
<< ps
->ps_clshift
;
891 ps
->ps_clcount
+= ps
->ps_special_clusters
;
892 if ( ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
) {
893 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
895 ps
->ps_special_clusters
= 0;
897 /* re-enable access to this segment */
898 ps
->ps_state
&= ~PS_GOING_AWAY
;
899 ps
->ps_state
|= PS_CAN_USE
;
903 dp_pages_free
+= interim_pages_removed
;
909 for (i
= 0; i
<= paging_segment_max
; i
++) {
910 ps
= paging_segments
[i
];
911 if (ps
!= PAGING_SEGMENT_NULL
&&
913 if(IS_PS_GOING_AWAY(ps
)) {
914 if(IS_PS_EMERGENCY_SEGMENT(ps
)) {
916 ps
->ps_state
&= ~PS_GOING_AWAY
;
917 ps
->ps_special_clusters
= 0;
918 ps
->ps_pgcount
= ps
->ps_pgnum
;
919 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
921 dp_pages_reserve
+= interim_pages_removed
;
923 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
924 paging_segment_count
--;
926 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
927 kfree(ps
, sizeof *ps
);
933 /* Scan the entire ps array separately to make certain we find the */
934 /* proper paging_segment_max */
935 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
936 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
937 paging_segment_max
= i
;
942 if( dealing_with_emergency_segment
) {
948 * All the segments have been deleted.
949 * We can remove the backing store.
953 * Disable lookups of this backing store.
955 if((void *)bs
->bs_port
->alias
!= NULL
)
956 kfree((void *) bs
->bs_port
->alias
,
957 sizeof (struct vstruct_alias
));
958 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
959 bs
->bs_port
= MACH_PORT_NULL
;
963 * Remove backing store from backing_store list.
966 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
971 * Free the backing store structure.
973 kfree(bs
, sizeof *bs
);
979 default_pager_backing_store_delete(
980 MACH_PORT_FACE backing_store
)
982 if( backing_store
!= emergency_segment_backing_store
) {
983 default_pager_backing_store_delete_internal(emergency_segment_backing_store
);
985 return(default_pager_backing_store_delete_internal(backing_store
));
988 int ps_enter(paging_segment_t
); /* forward */
998 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
999 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
1003 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
1004 paging_segments
[i
] = ps
;
1005 if (i
> paging_segment_max
)
1006 paging_segment_max
= i
;
1007 paging_segment_count
++;
1008 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
1009 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
1010 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1014 return KERN_RESOURCE_SHORTAGE
;
1021 #ifdef DEVICE_PAGING
1023 default_pager_add_segment(
1024 MACH_PORT_FACE backing_store
,
1025 MACH_PORT_FACE device
,
1031 paging_segment_t ps
;
1035 if ((bs
= backing_store_lookup(backing_store
))
1036 == BACKING_STORE_NULL
)
1037 return KERN_INVALID_ARGUMENT
;
1040 for (i
= 0; i
<= paging_segment_max
; i
++) {
1041 ps
= paging_segments
[i
];
1042 if (ps
== PAGING_SEGMENT_NULL
)
1046 * Check for overlap on same device.
1048 if (!(ps
->ps_device
!= device
1049 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
1050 || offset
+ count
<= ps
->ps_offset
)) {
1053 return KERN_INVALID_ARGUMENT
;
1059 * Set up the paging segment
1061 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
1062 if (ps
== PAGING_SEGMENT_NULL
) {
1064 return KERN_RESOURCE_SHORTAGE
;
1067 ps
->ps_segtype
= PS_PARTITION
;
1068 ps
->ps_device
= device
;
1069 ps
->ps_offset
= offset
;
1070 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
1071 ps
->ps_recnum
= count
;
1072 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
1074 ps
->ps_pgcount
= ps
->ps_pgnum
;
1075 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
1076 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
1080 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
1082 kfree(ps
, sizeof *ps
);
1084 return KERN_RESOURCE_SHORTAGE
;
1086 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
1087 clrbit(ps
->ps_bmap
, i
);
1090 if(paging_segment_count
== 0) {
1091 ps
->ps_state
= PS_EMERGENCY_SEGMENT
;
1092 if(use_emergency_swap_file_first
) {
1093 ps
->ps_state
|= PS_CAN_USE
;
1096 ps
->ps_state
= PS_CAN_USE
;
1101 if ((error
= ps_enter(ps
)) != 0) {
1102 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
1103 kfree(ps
, sizeof *ps
);
1105 return KERN_RESOURCE_SHORTAGE
;
1108 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1109 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1113 if(IS_PS_OK_TO_USE(ps
)) {
1114 dp_pages_free
+= ps
->ps_pgcount
;
1116 dp_pages_reserve
+= ps
->ps_pgcount
;
1120 bs_more_space(ps
->ps_clcount
);
1122 DP_DEBUG(DEBUG_BS_INTERNAL
,
1123 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1124 device
, offset
, count
, record_size
,
1125 ps
->ps_record_shift
, ps
->ps_pgnum
));
1127 return KERN_SUCCESS
;
1133 MACH_PORT_FACE master
)
1135 security_token_t null_security_token
= {
1138 MACH_PORT_FACE device
;
1139 int info
[DEV_GET_SIZE_COUNT
];
1140 mach_msg_type_number_t info_count
;
1141 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1142 unsigned int rec_size
;
1145 MACH_PORT_FACE reply_port
;
1147 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1148 null_security_token
, dev_name
, &device
))
1151 info_count
= DEV_GET_SIZE_COUNT
;
1152 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1153 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1154 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1155 clsize
= bs_get_global_clsize(0);
1156 if (!default_pager_backing_store_create(
1157 default_pager_object
,
1158 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1159 (clsize
* vm_page_size
),
1161 if (!default_pager_add_segment(bs
, device
,
1162 0, count
, rec_size
)) {
1165 ipc_port_release_receive(bs
);
1169 ipc_port_release_send(device
);
1172 #endif /* DEVICE_PAGING */
1177 vs_alloc_async(void)
1179 struct vs_async
*vsa
;
1180 MACH_PORT_FACE reply_port
;
1181 // kern_return_t kr;
1184 if (vs_async_free_list
== NULL
) {
1186 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1189 * Try allocating a reply port named after the
1190 * address of the vs_async structure.
1192 struct vstruct_alias
*alias_struct
;
1194 reply_port
= ipc_port_alloc_kernel();
1195 alias_struct
= (struct vstruct_alias
*)
1196 kalloc(sizeof (struct vstruct_alias
));
1197 if(alias_struct
!= NULL
) {
1198 alias_struct
->vs
= (struct vstruct
*)vsa
;
1199 alias_struct
->name
= &default_pager_ops
;
1200 reply_port
->alias
= (uintptr_t) alias_struct
;
1201 vsa
->reply_port
= reply_port
;
1202 vs_alloc_async_count
++;
1205 vs_alloc_async_failed
++;
1206 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1208 kfree(vsa
, sizeof (struct vs_async
));
1213 vsa
= vs_async_free_list
;
1214 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1223 struct vs_async
*vsa
)
1226 vsa
->vsa_next
= vs_async_free_list
;
1227 vs_async_free_list
= vsa
;
1231 #else /* VS_ASYNC_REUSE */
1234 vs_alloc_async(void)
1236 struct vs_async
*vsa
;
1237 MACH_PORT_FACE reply_port
;
1240 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1243 * Try allocating a reply port named after the
1244 * address of the vs_async structure.
1246 reply_port
= ipc_port_alloc_kernel();
1247 alias_struct
= (vstruct_alias
*)
1248 kalloc(sizeof (struct vstruct_alias
));
1249 if(alias_struct
!= NULL
) {
1250 alias_struct
->vs
= reply_port
;
1251 alias_struct
->name
= &default_pager_ops
;
1252 reply_port
->alias
= (int) vsa
;
1253 vsa
->reply_port
= reply_port
;
1254 vs_alloc_async_count
++;
1257 vs_alloc_async_failed
++;
1258 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1260 kfree(vsa
, sizeof (struct vs_async
));
1270 struct vs_async
*vsa
)
1272 MACH_PORT_FACE reply_port
;
1275 reply_port
= vsa
->reply_port
;
1276 kfree(reply_port
->alias
, sizeof (struct vstuct_alias
));
1277 kfree(vsa
, sizeof (struct vs_async
));
1278 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1281 vs_alloc_async_count
--;
1286 #endif /* VS_ASYNC_REUSE */
1288 zone_t vstruct_zone
;
1297 vs
= (vstruct_t
) zalloc(vstruct_zone
);
1298 if (vs
== VSTRUCT_NULL
) {
1299 return VSTRUCT_NULL
;
1305 * The following fields will be provided later.
1307 vs
->vs_pager_ops
= NULL
;
1308 vs
->vs_control
= MEMORY_OBJECT_CONTROL_NULL
;
1309 vs
->vs_references
= 1;
1312 vs
->vs_waiting_seqno
= FALSE
;
1313 vs
->vs_waiting_read
= FALSE
;
1314 vs
->vs_waiting_write
= FALSE
;
1315 vs
->vs_waiting_async
= FALSE
;
1322 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1323 vs
->vs_size
= ((atop_32(round_page_32(size
)) - 1) >> vs
->vs_clshift
) + 1;
1324 vs
->vs_async_pending
= 0;
1327 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1328 * depending on the size of the memory object.
1330 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1331 vs
->vs_imap
= (struct vs_map
**)
1332 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1333 vs
->vs_indirect
= TRUE
;
1335 vs
->vs_dmap
= (struct vs_map
*)
1336 kalloc(CLMAP_SIZE(vs
->vs_size
));
1337 vs
->vs_indirect
= FALSE
;
1339 vs
->vs_xfer_pending
= FALSE
;
1340 DP_DEBUG(DEBUG_VS_INTERNAL
,
1341 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1344 * Check to see that we got the space.
1347 kfree(vs
, sizeof *vs
);
1348 return VSTRUCT_NULL
;
1352 * Zero the indirect pointers, or clear the direct pointers.
1354 if (vs
->vs_indirect
)
1355 memset(vs
->vs_imap
, 0,
1356 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1358 for (i
= 0; i
< vs
->vs_size
; i
++)
1359 VSM_CLR(vs
->vs_dmap
[i
]);
1361 VS_MAP_LOCK_INIT(vs
);
1363 bs_commit(vs
->vs_size
);
1368 paging_segment_t
ps_select_segment(unsigned int, int *); /* forward */
1375 paging_segment_t ps
;
1380 * Optimize case where there's only one segment.
1381 * paging_segment_max will index the one and only segment.
1385 if (paging_segment_count
== 1) {
1386 paging_segment_t lps
= PAGING_SEGMENT_NULL
; /* used to avoid extra PS_UNLOCK */
1387 ipc_port_t trigger
= IP_NULL
;
1389 ps
= paging_segments
[paging_segment_max
];
1390 *psindex
= paging_segment_max
;
1392 if( !IS_PS_EMERGENCY_SEGMENT(ps
) ) {
1393 panic("Emergency paging segment missing\n");
1395 ASSERT(ps
->ps_clshift
>= shift
);
1396 if(IS_PS_OK_TO_USE(ps
)) {
1397 if (ps
->ps_clcount
) {
1399 dp_pages_free
-= 1 << ps
->ps_clshift
;
1400 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1401 if(min_pages_trigger_port
&&
1402 (dp_pages_free
< minimum_pages_remaining
)) {
1403 trigger
= min_pages_trigger_port
;
1404 min_pages_trigger_port
= NULL
;
1412 if( lps
== PAGING_SEGMENT_NULL
) {
1414 dp_pages_free_drift_count
++;
1415 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1416 dp_pages_free_drifted_max
= dp_pages_free
;
1418 dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free
));
1425 if (trigger
!= IP_NULL
) {
1426 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1427 ipc_port_release_send(trigger
);
1432 if (paging_segment_count
== 0) {
1434 dp_pages_free_drift_count
++;
1435 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1436 dp_pages_free_drifted_max
= dp_pages_free
;
1438 dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free
));
1442 return PAGING_SEGMENT_NULL
;
1446 i
>= BS_MINPRI
; i
--) {
1449 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1450 (ps_select_array
[i
] == BS_FULLPRI
))
1452 start_index
= ps_select_array
[i
];
1454 if(!(paging_segments
[start_index
])) {
1456 physical_transfer_cluster_count
= 0;
1458 else if ((physical_transfer_cluster_count
+1) == (ALLOC_STRIDE
>>
1459 (((paging_segments
[start_index
])->ps_clshift
)
1460 + vm_page_shift
))) {
1461 physical_transfer_cluster_count
= 0;
1462 j
= start_index
+ 1;
1464 physical_transfer_cluster_count
+=1;
1466 if(start_index
== 0)
1467 start_index
= paging_segment_max
;
1469 start_index
= start_index
- 1;
1473 if (j
> paging_segment_max
)
1475 if ((ps
= paging_segments
[j
]) &&
1476 (ps
->ps_bs
->bs_priority
== i
)) {
1478 * Force the ps cluster size to be
1479 * >= that of the vstruct.
1482 if (IS_PS_OK_TO_USE(ps
)) {
1483 if ((ps
->ps_clcount
) &&
1484 (ps
->ps_clshift
>= shift
)) {
1485 ipc_port_t trigger
= IP_NULL
;
1488 dp_pages_free
-= 1 << ps
->ps_clshift
;
1489 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1490 if(min_pages_trigger_port
&&
1492 minimum_pages_remaining
)) {
1493 trigger
= min_pages_trigger_port
;
1494 min_pages_trigger_port
= NULL
;
1498 * found one, quit looking.
1500 ps_select_array
[i
] = j
;
1503 if (trigger
!= IP_NULL
) {
1504 default_pager_space_alert(
1507 ipc_port_release_send(trigger
);
1515 if (j
== start_index
) {
1517 * none at this priority -- mark it full
1519 ps_select_array
[i
] = BS_FULLPRI
;
1527 dp_pages_free_drift_count
++;
1528 if(dp_pages_free
> dp_pages_free_drifted_max
) {
1529 dp_pages_free_drifted_max
= dp_pages_free
;
1531 dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count
,dp_pages_free
));
1535 return PAGING_SEGMENT_NULL
;
1538 dp_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1541 ps_allocate_cluster(
1544 paging_segment_t use_ps
)
1546 unsigned int byte_num
;
1548 paging_segment_t ps
;
1549 dp_offset_t cluster
;
1550 ipc_port_t trigger
= IP_NULL
;
1553 * Find best paging segment.
1554 * ps_select_segment will decrement cluster count on ps.
1555 * Must pass cluster shift to find the most appropriate segment.
1557 /* NOTE: The addition of paging segment delete capability threatened
1558 * to seriously complicate the treatment of paging segments in this
1559 * module and the ones that call it (notably ps_clmap), because of the
1560 * difficulty in assuring that the paging segment would continue to
1561 * exist between being unlocked and locked. This was
1562 * avoided because all calls to this module are based in either
1563 * dp_memory_object calls which rely on the vs lock, or by
1564 * the transfer function which is part of the segment delete path.
1565 * The transfer function which is part of paging segment delete is
1566 * protected from multiple callers by the backing store lock.
1567 * The paging segment delete function treats mappings to a paging
1568 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1569 * while data is transferred to the remaining segments. This is in
1570 * line with the view that incomplete or in-transition mappings between
1571 * data, a vstruct, and backing store are protected by the vs lock.
1572 * This and the ordering of the paging segment "going_away" bit setting
1576 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1581 ASSERT(ps
->ps_clcount
!= 0);
1584 dp_pages_free
-= 1 << ps
->ps_clshift
;
1585 ps
->ps_pgcount
-= 1 << ps
->ps_clshift
;
1586 if(min_pages_trigger_port
&&
1587 (dp_pages_free
< minimum_pages_remaining
)) {
1588 trigger
= min_pages_trigger_port
;
1589 min_pages_trigger_port
= NULL
;
1593 if (trigger
!= IP_NULL
) {
1594 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1595 ipc_port_release_send(trigger
);
1598 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1599 PAGING_SEGMENT_NULL
) {
1600 static clock_sec_t lastnotify
= 0;
1602 clock_nsec_t nanoseconds_dummy
;
1605 * Don't immediately jump to the emergency segment. Give the
1606 * dynamic pager a chance to create it's first normal swap file.
1607 * Unless, of course the very first normal swap file can't be
1608 * created due to some problem and we didn't expect that problem
1609 * i.e. use_emergency_swap_file_first was never set to true initially.
1610 * It then gets set in the swap file creation error handling.
1612 if(paging_segment_count
> 1 || use_emergency_swap_file_first
== TRUE
) {
1614 ps
= paging_segments
[EMERGENCY_PSEG_INDEX
];
1615 if(IS_PS_EMERGENCY_SEGMENT(ps
) && !IS_PS_GOING_AWAY(ps
)) {
1619 if(IS_PS_GOING_AWAY(ps
)) {
1620 /* Someone de-activated the emergency paging segment*/
1624 } else if(dp_pages_free
) {
1626 * Someone has already activated the emergency paging segment
1628 * Between us having rec'd a NULL segment from ps_select_segment
1629 * and reaching here a new normal segment could have been added.
1630 * E.g. we get NULL segment and another thread just added the
1631 * new swap file. Hence check to see if we have more dp_pages_free
1632 * before activating the emergency segment.
1638 } else if(!IS_PS_OK_TO_USE(ps
) && ps
->ps_clcount
) {
1640 * PS_CAN_USE is only reset from the emergency segment when it's
1641 * been successfully recovered. So it's legal to have an emergency
1642 * segment that has PS_CAN_USE but no clusters because it's recovery
1645 backing_store_t bs
= ps
->ps_bs
;
1646 ps
->ps_state
|= PS_CAN_USE
;
1647 if(ps_select_array
[bs
->bs_priority
] == BS_FULLPRI
||
1648 ps_select_array
[bs
->bs_priority
] == BS_NOPRI
) {
1649 ps_select_array
[bs
->bs_priority
] = 0;
1651 dp_pages_free
+= ps
->ps_pgcount
;
1652 dp_pages_reserve
-= ps
->ps_pgcount
;
1655 dprintf(("Switching ON Emergency paging segment\n"));
1665 * Emit a notification of the low-paging resource condition
1666 * but don't issue it more than once every five seconds. This
1667 * prevents us from overflowing logs with thousands of
1668 * repetitions of the message.
1670 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
1671 if (paging_segment_count
> 1 && (now
> lastnotify
+ 5)) {
1672 /* With an activated emergency paging segment we still
1673 * didn't get any clusters. This could mean that the
1674 * emergency paging segment is exhausted.
1676 dprintf(("System is out of paging space.\n"));
1682 if(min_pages_trigger_port
) {
1683 trigger
= min_pages_trigger_port
;
1684 min_pages_trigger_port
= NULL
;
1688 if (trigger
!= IP_NULL
) {
1689 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1690 ipc_port_release_send(trigger
);
1692 return (dp_offset_t
) -1;
1696 * Look for an available cluster. At the end of the loop,
1697 * byte_num is the byte offset and bit_num is the bit offset of the
1698 * first zero bit in the paging segment bitmap.
1701 byte_num
= ps
->ps_hint
;
1702 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1703 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1704 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1705 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1708 ASSERT(bit_num
!= NBBY
);
1712 ps
->ps_hint
= byte_num
;
1713 cluster
= (byte_num
*NBBY
) + bit_num
;
1715 /* Space was reserved, so this must be true */
1716 ASSERT(cluster
< ps
->ps_ncls
);
1718 setbit(ps
->ps_bmap
, cluster
);
1724 void ps_deallocate_cluster(paging_segment_t
, dp_offset_t
); /* forward */
1727 ps_deallocate_cluster(
1728 paging_segment_t ps
,
1729 dp_offset_t cluster
)
1732 if (cluster
>= ps
->ps_ncls
)
1733 panic("ps_deallocate_cluster: Invalid cluster number");
1736 * Lock the paging segment, clear the cluster's bitmap and increment the
1737 * number of free cluster.
1741 clrbit(ps
->ps_bmap
, cluster
);
1742 if( IS_PS_OK_TO_USE(ps
)) {
1744 ps
->ps_pgcount
+= 1 << ps
->ps_clshift
;
1745 dp_pages_free
+= 1 << ps
->ps_clshift
;
1747 ps
->ps_special_clusters
+= 1;
1751 * Move the hint down to the freed cluster if it is
1752 * less than the current hint.
1754 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1755 ps
->ps_hint
= (cluster
/NBBY
);
1760 * If we're freeing space on a full priority, reset the array.
1762 if ( IS_PS_OK_TO_USE(ps
) && ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1763 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1770 void ps_dealloc_vsmap(struct vs_map
*, dp_size_t
); /* forward */
1774 struct vs_map
*vsmap
,
1778 for (i
= 0; i
< size
; i
++)
1779 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1780 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1781 VSM_CLOFF(vsmap
[i
]));
1794 * If this is an indirect structure, then we walk through the valid
1795 * (non-zero) indirect pointers and deallocate the clusters
1796 * associated with each used map entry (via ps_dealloc_vsmap).
1797 * When all of the clusters in an indirect block have been
1798 * freed, we deallocate the block. When all of the indirect
1799 * blocks have been deallocated we deallocate the memory
1800 * holding the indirect pointers.
1802 if (vs
->vs_indirect
) {
1803 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1804 if (vs
->vs_imap
[i
] != NULL
) {
1805 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1806 kfree(vs
->vs_imap
[i
], CLMAP_THRESHOLD
);
1809 kfree(vs
->vs_imap
, INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1812 * Direct map. Free used clusters, then memory.
1814 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1815 kfree(vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1819 bs_commit(- vs
->vs_size
);
1821 zfree(vstruct_zone
, vs
);
1824 int ps_map_extend(vstruct_t
, unsigned int); /* forward */
1828 unsigned int new_size
)
1830 struct vs_map
**new_imap
;
1831 struct vs_map
*new_dmap
= NULL
;
1834 void *old_map
= NULL
;
1835 int old_map_size
= 0;
1837 if (vs
->vs_size
>= new_size
) {
1839 * Someone has already done the work.
1845 * If the new size extends into the indirect range, then we have one
1846 * of two cases: we are going from indirect to indirect, or we are
1847 * going from direct to indirect. If we are going from indirect to
1848 * indirect, then it is possible that the new size will fit in the old
1849 * indirect map. If this is the case, then just reset the size of the
1850 * vstruct map and we are done. If the new size will not
1851 * fit into the old indirect map, then we have to allocate a new
1852 * indirect map and copy the old map pointers into this new map.
1854 * If we are going from direct to indirect, then we have to allocate a
1855 * new indirect map and copy the old direct pages into the first
1856 * indirect page of the new map.
1857 * NOTE: allocating memory here is dangerous, as we're in the
1860 if (INDIRECT_CLMAP(new_size
)) {
1861 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1864 * Get a new indirect map and zero it.
1866 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1867 if (vs
->vs_indirect
&&
1868 (new_map_size
== old_map_size
)) {
1869 bs_commit(new_size
- vs
->vs_size
);
1870 vs
->vs_size
= new_size
;
1874 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1875 if (new_imap
== NULL
) {
1878 memset(new_imap
, 0, new_map_size
);
1880 if (vs
->vs_indirect
) {
1881 /* Copy old entries into new map */
1882 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1883 /* Arrange to free the old map */
1884 old_map
= (void *) vs
->vs_imap
;
1886 } else { /* Old map was a direct map */
1887 /* Allocate an indirect page */
1888 if ((new_imap
[0] = (struct vs_map
*)
1889 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1890 kfree(new_imap
, new_map_size
);
1893 new_dmap
= new_imap
[0];
1894 newdsize
= CLMAP_ENTRIES
;
1898 newdsize
= new_size
;
1900 * If the new map is a direct map, then the old map must
1901 * also have been a direct map. All we have to do is
1902 * to allocate a new direct map, copy the old entries
1903 * into it and free the old map.
1905 if ((new_dmap
= (struct vs_map
*)
1906 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1912 /* Free the old map */
1913 old_map
= (void *) vs
->vs_dmap
;
1914 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1916 /* Copy info from the old map into the new map */
1917 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1919 /* Initialize the rest of the new map */
1920 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1921 VSM_CLR(new_dmap
[i
]);
1924 vs
->vs_imap
= new_imap
;
1925 vs
->vs_indirect
= TRUE
;
1927 vs
->vs_dmap
= new_dmap
;
1928 bs_commit(new_size
- vs
->vs_size
);
1929 vs
->vs_size
= new_size
;
1931 kfree(old_map
, old_map_size
);
1939 struct clmap
*clmap
,
1944 dp_offset_t cluster
; /* The cluster of offset. */
1945 dp_offset_t newcl
; /* The new cluster allocated. */
1948 struct vs_map
*vsmap
;
1952 ASSERT(vs
->vs_dmap
);
1953 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1956 * Initialize cluster error value
1958 clmap
->cl_error
= 0;
1961 * If the object has grown, extend the page map.
1963 if (cluster
>= vs
->vs_size
) {
1964 if (flag
== CL_FIND
) {
1965 /* Do not allocate if just doing a lookup */
1967 return (dp_offset_t
) -1;
1969 if (ps_map_extend(vs
, cluster
+ 1)) {
1971 return (dp_offset_t
) -1;
1976 * Look for the desired cluster. If the map is indirect, then we
1977 * have a two level lookup. First find the indirect block, then
1978 * find the actual cluster. If the indirect block has not yet
1979 * been allocated, then do so. If the cluster has not yet been
1980 * allocated, then do so.
1982 * If any of the allocations fail, then return an error.
1983 * Don't allocate if just doing a lookup.
1985 if (vs
->vs_indirect
) {
1986 long ind_block
= cluster
/CLMAP_ENTRIES
;
1988 /* Is the indirect block allocated? */
1989 vsmap
= vs
->vs_imap
[ind_block
];
1990 if (vsmap
== NULL
) {
1991 if (flag
== CL_FIND
) {
1993 return (dp_offset_t
) -1;
1996 /* Allocate the indirect block */
1997 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
1998 if (vsmap
== NULL
) {
2000 return (dp_offset_t
) -1;
2002 /* Initialize the cluster offsets */
2003 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
2005 vs
->vs_imap
[ind_block
] = vsmap
;
2008 vsmap
= vs
->vs_dmap
;
2011 vsmap
+= cluster%CLMAP_ENTRIES
;
2014 * At this point, vsmap points to the struct vs_map desired.
2016 * Look in the map for the cluster, if there was an error on a
2017 * previous write, flag it and return. If it is not yet
2018 * allocated, then allocate it, if we're writing; if we're
2019 * doing a lookup and the cluster's not allocated, return error.
2021 if (VSM_ISERR(*vsmap
)) {
2022 clmap
->cl_error
= VSM_GETERR(*vsmap
);
2024 return (dp_offset_t
) -1;
2025 } else if (VSM_ISCLR(*vsmap
)) {
2028 if (flag
== CL_FIND
) {
2030 * If there's an error and the entry is clear, then
2031 * we've run out of swap space. Record the error
2035 VSM_SETERR(*vsmap
, error
);
2038 return (dp_offset_t
) -1;
2041 * Attempt to allocate a cluster from the paging segment
2043 newcl
= ps_allocate_cluster(vs
, &psindex
,
2044 PAGING_SEGMENT_NULL
);
2045 if (newcl
== (dp_offset_t
) -1) {
2047 return (dp_offset_t
) -1;
2050 VSM_SETCLOFF(*vsmap
, newcl
);
2051 VSM_SETPS(*vsmap
, psindex
);
2054 newcl
= VSM_CLOFF(*vsmap
);
2057 * Fill in pertinent fields of the clmap
2059 clmap
->cl_ps
= VSM_PS(*vsmap
);
2060 clmap
->cl_numpages
= VSCLSIZE(vs
);
2061 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
2064 * Byte offset in paging segment is byte offset to cluster plus
2065 * byte offset within cluster. It looks ugly, but should be
2068 ASSERT(trunc_page(offset
) == offset
);
2069 newcl
= ptoa_32(newcl
) << vs
->vs_clshift
;
2070 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
2071 if (flag
== CL_ALLOC
) {
2073 * set bits in the allocation bitmap according to which
2074 * pages were requested. size is in bytes.
2076 i
= atop_32(newoff
);
2077 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
2078 VSM_SETALLOC(*vsmap
, i
);
2080 size
-= vm_page_size
;
2083 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
2086 * Offset is not cluster aligned, so number of pages
2087 * and bitmaps must be adjusted
2089 clmap
->cl_numpages
-= atop_32(newoff
);
2090 CLMAP_SHIFT(clmap
, vs
);
2091 CLMAP_SHIFTALLOC(clmap
, vs
);
2096 * The setting of valid bits and handling of write errors
2097 * must be done here, while we hold the lock on the map.
2098 * It logically should be done in ps_vs_write_complete().
2099 * The size and error information has been passed from
2100 * ps_vs_write_complete(). If the size parameter is non-zero,
2101 * then there is work to be done. If error is also non-zero,
2102 * then the error number is recorded in the cluster and the
2103 * entire cluster is in error.
2105 if (size
&& flag
== CL_FIND
) {
2106 dp_offset_t off
= (dp_offset_t
) 0;
2109 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
2111 VSM_SETPG(*vsmap
, i
);
2112 size
-= vm_page_size
;
2114 ASSERT(i
<= VSCLSIZE(vs
));
2116 BS_STAT(clmap
->cl_ps
->ps_bs
,
2117 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
2119 off
= VSM_CLOFF(*vsmap
);
2120 VSM_SETERR(*vsmap
, error
);
2123 * Deallocate cluster if error, and no valid pages
2126 if (off
!= (dp_offset_t
) 0)
2127 ps_deallocate_cluster(clmap
->cl_ps
, off
);
2129 return (dp_offset_t
) 0;
2133 DP_DEBUG(DEBUG_VS_INTERNAL
,
2134 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
2135 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
2136 DP_DEBUG(DEBUG_VS_INTERNAL
,
2137 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
2138 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
2139 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
2141 return (newcl
+ newoff
);
2144 void ps_clunmap(vstruct_t
, dp_offset_t
, dp_size_t
); /* forward */
2152 dp_offset_t cluster
; /* The cluster number of offset */
2153 struct vs_map
*vsmap
;
2158 * Loop through all clusters in this range, freeing paging segment
2159 * clusters and map entries as encountered.
2161 while (length
> 0) {
2165 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
2166 if (vs
->vs_indirect
) /* indirect map */
2167 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
2169 vsmap
= vs
->vs_dmap
;
2170 if (vsmap
== NULL
) {
2174 vsmap
+= cluster%CLMAP_ENTRIES
;
2175 if (VSM_ISCLR(*vsmap
)) {
2176 length
-= vm_page_size
;
2177 offset
+= vm_page_size
;
2181 * We've got a valid mapping. Clear it and deallocate
2182 * paging segment cluster pages.
2183 * Optimize for entire cluster cleraing.
2185 if ( (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) ) {
2187 * Not cluster aligned.
2189 ASSERT(trunc_page(newoff
) == newoff
);
2190 i
= atop_32(newoff
);
2193 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
2194 VSM_CLRPG(*vsmap
, i
);
2195 VSM_CLRALLOC(*vsmap
, i
);
2196 length
-= vm_page_size
;
2197 offset
+= vm_page_size
;
2202 * If map entry is empty, clear and deallocate cluster.
2204 if (!VSM_ALLOC(*vsmap
)) {
2205 ps_deallocate_cluster(VSM_PS(*vsmap
),
2214 void ps_vs_write_complete(vstruct_t
, dp_offset_t
, dp_size_t
, int); /* forward */
2217 ps_vs_write_complete(
2226 * Get the struct vsmap for this cluster.
2227 * Use READ, even though it was written, because the
2228 * cluster MUST be present, unless there was an error
2229 * in the original ps_clmap (e.g. no space), in which
2230 * case, nothing happens.
2232 * Must pass enough information to ps_clmap to allow it
2233 * to set the vs_map structure bitmap under lock.
2235 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2238 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, dp_offset_t
, vm_offset_t
, dp_size_t
, boolean_t
, int); /* forward */
2241 vs_cl_write_complete(
2243 __unused paging_segment_t ps
,
2245 __unused vm_offset_t addr
,
2250 // kern_return_t kr;
2254 * For internal objects, the error is recorded on a
2255 * per-cluster basis by ps_clmap() which is called
2256 * by ps_vs_write_complete() below.
2258 dprintf(("write failed error = 0x%x\n", error
));
2259 /* add upl_abort code here */
2261 GSTAT(global_stats
.gs_pages_out
+= atop_32(size
));
2263 * Notify the vstruct mapping code, so it can do its accounting.
2265 ps_vs_write_complete(vs
, offset
, size
, error
);
2269 ASSERT(vs
->vs_async_pending
> 0);
2270 vs
->vs_async_pending
-= size
;
2271 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
2272 vs
->vs_waiting_async
= FALSE
;
2274 thread_wakeup(&vs
->vs_async_pending
);
2281 #ifdef DEVICE_PAGING
2282 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2286 MACH_PORT_FACE reply_port
,
2287 kern_return_t device_code
,
2288 io_buf_len_t bytes_written
)
2290 struct vs_async
*vsa
;
2292 vsa
= (struct vs_async
*)
2293 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2295 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2296 device_code
= KERN_FAILURE
;
2299 vsa
->vsa_error
= device_code
;
2302 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2303 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2304 /* revisit when async disk segments redone */
2305 if(vsa
->vsa_error
) {
2306 /* need to consider error condition. re-write data or */
2307 /* throw it away here. */
2308 vm_map_copy_discard((vm_map_copy_t
)vsa
->vsa_addr
);
2310 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2311 vsa
->vsa_size
, vsa
->vsa_error
);
2313 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2314 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2319 return KERN_SUCCESS
;
2322 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2324 device_write_reply_inband(
2325 MACH_PORT_FACE reply_port
,
2326 kern_return_t return_code
,
2327 io_buf_len_t bytes_written
)
2329 panic("device_write_reply_inband: illegal");
2330 return KERN_SUCCESS
;
2333 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2336 MACH_PORT_FACE reply_port
,
2337 kern_return_t return_code
,
2339 mach_msg_type_number_t dataCnt
)
2341 struct vs_async
*vsa
;
2342 vsa
= (struct vs_async
*)
2343 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2344 vsa
->vsa_addr
= (vm_offset_t
)data
;
2345 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2346 vsa
->vsa_error
= return_code
;
2347 thread_wakeup(&vsa
);
2348 return KERN_SUCCESS
;
2351 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2353 device_read_reply_inband(
2354 MACH_PORT_FACE reply_port
,
2355 kern_return_t return_code
,
2356 io_buf_ptr_inband_t data
,
2357 mach_msg_type_number_t dataCnt
)
2359 panic("device_read_reply_inband: illegal");
2360 return KERN_SUCCESS
;
2363 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2365 device_read_reply_overwrite(
2366 MACH_PORT_FACE reply_port
,
2367 kern_return_t return_code
,
2368 io_buf_len_t bytes_read
)
2370 panic("device_read_reply_overwrite: illegal\n");
2371 return KERN_SUCCESS
;
2374 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2377 MACH_PORT_FACE reply_port
,
2378 kern_return_t return_code
,
2379 MACH_PORT_FACE device_port
)
2381 panic("device_open_reply: illegal\n");
2382 return KERN_SUCCESS
;
2387 paging_segment_t ps
,
2389 vm_offset_t
*bufferp
,
2391 unsigned int *residualp
,
2395 recnum_t dev_offset
;
2396 unsigned int bytes_wanted
;
2397 unsigned int bytes_read
;
2398 unsigned int total_read
;
2399 vm_offset_t dev_buffer
;
2400 vm_offset_t buf_ptr
;
2401 unsigned int records_read
;
2402 struct vs_async
*vsa
;
2405 vm_map_copy_t device_data
= NULL
;
2406 default_pager_thread_t
*dpt
= NULL
;
2408 device
= dev_port_lookup(ps
->ps_device
);
2409 clustered_reads
[atop_32(size
)]++;
2411 dev_offset
= (ps
->ps_offset
+
2412 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2413 bytes_wanted
= size
;
2415 *bufferp
= (vm_offset_t
)NULL
;
2418 vsa
= VS_ALLOC_ASYNC();
2422 vsa
->vsa_offset
= 0;
2426 ip_lock(vsa
->reply_port
);
2427 vsa
->reply_port
->ip_sorights
++;
2428 ip_reference(vsa
->reply_port
);
2429 ip_unlock(vsa
->reply_port
);
2430 kr
= ds_device_read_common(device
,
2432 (mach_msg_type_name_t
)
2433 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2437 (IO_READ
| IO_CALL
),
2438 (io_buf_ptr_t
*) &dev_buffer
,
2439 (mach_msg_type_number_t
*) &bytes_read
);
2440 if(kr
== MIG_NO_REPLY
) {
2441 assert_wait(&vsa
, THREAD_UNINT
);
2442 thread_block(THREAD_CONTINUE_NULL
);
2444 dev_buffer
= vsa
->vsa_addr
;
2445 bytes_read
= (unsigned int)vsa
->vsa_size
;
2446 kr
= vsa
->vsa_error
;
2449 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2452 total_read
+= bytes_read
;
2455 * If we got the entire range, use the returned dev_buffer.
2457 if (bytes_read
== size
) {
2458 *bufferp
= (vm_offset_t
)dev_buffer
;
2463 dprintf(("read only %d bytes out of %d\n",
2464 bytes_read
, bytes_wanted
));
2467 dpt
= get_read_buffer();
2468 buf_ptr
= dpt
->dpt_buffer
;
2469 *bufferp
= (vm_offset_t
)buf_ptr
;
2472 * Otherwise, copy the data into the provided buffer (*bufferp)
2473 * and append the rest of the range as it comes in.
2475 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2476 buf_ptr
+= bytes_read
;
2477 bytes_wanted
-= bytes_read
;
2478 records_read
= (bytes_read
>>
2479 (vm_page_shift
- ps
->ps_record_shift
));
2480 dev_offset
+= records_read
;
2481 DP_DEBUG(DEBUG_VS_INTERNAL
,
2482 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2483 dev_buffer
, bytes_read
));
2484 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2486 Panic("dealloc buf");
2487 } while (bytes_wanted
);
2489 *residualp
= size
- total_read
;
2490 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2491 vm_offset_t temp_buffer
;
2492 vm_allocate(kernel_map
, &temp_buffer
, total_read
, VM_FLAGS_ANYWHERE
);
2493 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2494 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2495 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2496 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2497 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2498 (vm_map_copy_t
*)&device_data
, FALSE
))
2499 panic("ps_read_device: cannot copyin locally provided buffer\n");
2501 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2502 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2503 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2504 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2505 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2506 (vm_map_copy_t
*)&device_data
, FALSE
))
2507 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2512 *bufferp
= (vm_offset_t
)device_data
;
2515 /* Free the receive buffer */
2516 dpt
->checked_out
= 0;
2517 thread_wakeup(&dpt_array
);
2519 return KERN_SUCCESS
;
2524 paging_segment_t ps
,
2528 struct vs_async
*vsa
)
2530 recnum_t dev_offset
;
2531 io_buf_len_t bytes_to_write
, bytes_written
;
2532 recnum_t records_written
;
2534 MACH_PORT_FACE reply_port
;
2538 clustered_writes
[atop_32(size
)]++;
2540 dev_offset
= (ps
->ps_offset
+
2541 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2542 bytes_to_write
= size
;
2546 * Asynchronous write.
2548 reply_port
= vsa
->reply_port
;
2549 ip_lock(reply_port
);
2550 reply_port
->ip_sorights
++;
2551 ip_reference(reply_port
);
2552 ip_unlock(reply_port
);
2555 device
= dev_port_lookup(ps
->ps_device
);
2557 vsa
->vsa_addr
= addr
;
2558 kr
=ds_device_write_common(device
,
2560 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2563 (io_buf_ptr_t
) addr
,
2565 (IO_WRITE
| IO_CALL
),
2568 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2570 dprintf(("%s0x%x, addr=0x%x,"
2571 "size=0x%x,offset=0x%x\n",
2572 "device_write_request returned ",
2573 kr
, addr
, size
, offset
));
2575 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2576 /* do the completion notification to free resources */
2577 device_write_reply(reply_port
, kr
, 0);
2582 * Synchronous write.
2586 device
= dev_port_lookup(ps
->ps_device
);
2587 kr
=ds_device_write_common(device
,
2591 (io_buf_ptr_t
) addr
,
2593 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2596 if (kr
!= KERN_SUCCESS
) {
2597 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2598 "device_write returned ",
2599 kr
, addr
, size
, offset
));
2601 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2604 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2605 Panic("fragmented write");
2606 records_written
= (bytes_written
>>
2607 (vm_page_shift
- ps
->ps_record_shift
));
2608 dev_offset
+= records_written
;
2610 if (bytes_written
!= bytes_to_write
) {
2611 dprintf(("wrote only %d bytes out of %d\n",
2612 bytes_written
, bytes_to_write
));
2615 bytes_to_write
-= bytes_written
;
2616 addr
+= bytes_written
;
2617 } while (bytes_to_write
> 0);
2619 return PAGER_SUCCESS
;
2623 #else /* !DEVICE_PAGING */
2627 __unused paging_segment_t ps
,
2628 __unused dp_offset_t offset
,
2629 __unused vm_offset_t
*bufferp
,
2630 __unused
unsigned int size
,
2631 __unused
unsigned int *residualp
,
2634 panic("ps_read_device not supported");
2635 return KERN_FAILURE
;
2640 __unused paging_segment_t ps
,
2641 __unused dp_offset_t offset
,
2642 __unused vm_offset_t addr
,
2643 __unused
unsigned int size
,
2644 __unused
struct vs_async
*vsa
)
2646 panic("ps_write_device not supported");
2647 return KERN_FAILURE
;
2650 #endif /* DEVICE_PAGING */
2651 void pvs_object_data_provided(vstruct_t
, upl_t
, upl_offset_t
, upl_size_t
); /* forward */
2654 pvs_object_data_provided(
2655 __unused vstruct_t vs
,
2657 __unused upl_offset_t offset
,
2661 DP_DEBUG(DEBUG_VS_INTERNAL
,
2662 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2663 upl
, offset
, size
));
2666 GSTAT(global_stats
.gs_pages_in
+= atop_32(size
));
2670 ps_clunmap(vs
, offset
, size
);
2671 #endif /* USE_PRECIOUS */
2675 static memory_object_offset_t last_start
;
2676 static vm_size_t last_length
;
2681 dp_offset_t vs_offset
,
2685 kern_return_t error
= KERN_SUCCESS
;
2687 unsigned int residual
;
2688 unsigned int request_flags
;
2695 unsigned int xfer_size
;
2696 dp_offset_t orig_vs_offset
;
2697 dp_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2698 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2701 unsigned int page_list_count
;
2702 memory_object_offset_t cluster_start
;
2703 vm_size_t cluster_length
;
2704 uint32_t io_streaming
;
2706 pages_in_cl
= 1 << vs
->vs_clshift
;
2707 cl_size
= pages_in_cl
* vm_page_size
;
2708 cl_mask
= cl_size
- 1;
2711 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
| UPL_RET_ONLY_ABSENT
| UPL_SET_LITE
;
2713 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_RET_ONLY_ABSENT
| UPL_SET_LITE
;
2715 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2717 if ((ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0) == (dp_offset_t
)-1) ||
2718 !CLMAP_ISSET(clmap
, cl_index
)) {
2720 * the needed page doesn't exist in the backing store...
2721 * we don't want to try to do any I/O, just abort the
2722 * page and let the fault handler provide a zero-fill
2726 * The caller was just poking at us to see if
2727 * the page has been paged out. No need to
2728 * mess with the page at all.
2729 * Just let the caller know we don't have that page.
2731 return KERN_FAILURE
;
2734 page_list_count
= 0;
2736 memory_object_super_upl_request(vs
->vs_control
, (memory_object_offset_t
)vs_offset
,
2737 PAGE_SIZE
, PAGE_SIZE
,
2738 &upl
, NULL
, &page_list_count
,
2742 upl_abort(upl
, UPL_ABORT_ERROR
);
2744 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2745 upl_deallocate(upl
);
2747 return KERN_SUCCESS
;
2752 * The caller was just poking at us to see if
2753 * the page has been paged out. No need to
2754 * mess with the page at all.
2755 * Just let the caller know we do have that page.
2757 return KERN_SUCCESS
;
2760 assert(dp_encryption_inited
);
2761 if (dp_encryption
) {
2764 * request that the UPL be prepared for
2767 request_flags
|= UPL_ENCRYPT
;
2769 orig_vs_offset
= vs_offset
;
2772 cnt
= VM_SUPER_CLUSTER
;
2773 cluster_start
= (memory_object_offset_t
) vs_offset
;
2774 cluster_length
= (vm_size_t
) cnt
;
2778 * determine how big a speculative I/O we should try for...
2780 if (memory_object_cluster_size(vs
->vs_control
, &cluster_start
, &cluster_length
, &io_streaming
, (memory_object_fault_info_t
)fault_info
) == KERN_SUCCESS
) {
2781 assert(vs_offset
>= (dp_offset_t
) cluster_start
&&
2782 vs_offset
< (dp_offset_t
) (cluster_start
+ cluster_length
));
2783 vs_offset
= (dp_offset_t
) cluster_start
;
2784 cnt
= (dp_size_t
) cluster_length
;
2786 cluster_length
= PAGE_SIZE
;
2791 io_flags
|= UPL_IOSTREAMING
;
2793 last_start
= cluster_start
;
2794 last_length
= cluster_length
;
2797 * This loop will be executed multiple times until the entire
2798 * range has been looked at or we issue an I/O... if the request spans cluster
2799 * boundaries, the clusters will be checked for logical continunity,
2800 * if contiguous the I/O request will span multiple clusters...
2801 * at most only 1 I/O will be issued... it will encompass the original offset
2803 while (cnt
&& error
== KERN_SUCCESS
) {
2806 if ((vs_offset
& cl_mask
) && (cnt
> (VM_SUPER_CLUSTER
- (vs_offset
& cl_mask
)))) {
2807 size
= VM_SUPER_CLUSTER
;
2808 size
-= vs_offset
& cl_mask
;
2809 } else if (cnt
> VM_SUPER_CLUSTER
)
2810 size
= VM_SUPER_CLUSTER
;
2819 while (size
> 0 && error
== KERN_SUCCESS
) {
2820 unsigned int abort_size
;
2824 dp_offset_t cur_offset
;
2826 if ( !ps_info_valid
) {
2827 ps_offset
[seg_index
] = ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2828 psp
[seg_index
] = CLMAP_PS(clmap
);
2832 * skip over unallocated physical segments
2834 if (ps_offset
[seg_index
] == (dp_offset_t
) -1) {
2835 abort_size
= cl_size
- (vs_offset
& cl_mask
);
2836 abort_size
= MIN(abort_size
, size
);
2839 vs_offset
+= abort_size
;
2846 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2848 for (abort_size
= 0; cl_index
< pages_in_cl
&& abort_size
< size
; cl_index
++) {
2850 * skip over unallocated pages
2852 if (CLMAP_ISSET(clmap
, cl_index
))
2854 abort_size
+= vm_page_size
;
2858 vs_offset
+= abort_size
;
2860 if (cl_index
== pages_in_cl
) {
2862 * if we're at the end of this physical cluster
2863 * then bump to the next one and continue looking
2874 * remember the starting point of the first allocated page
2875 * for the I/O we're about to issue
2877 beg_pseg
= seg_index
;
2878 beg_indx
= cl_index
;
2879 cur_offset
= vs_offset
;
2882 * calculate the size of the I/O that we can do...
2883 * this may span multiple physical segments if
2884 * they are contiguous
2886 for (xfer_size
= 0; xfer_size
< size
; ) {
2888 while (cl_index
< pages_in_cl
&& xfer_size
< size
) {
2890 * accumulate allocated pages within
2891 * a physical segment
2893 if (CLMAP_ISSET(clmap
, cl_index
)) {
2894 xfer_size
+= vm_page_size
;
2895 cur_offset
+= vm_page_size
;
2898 BS_STAT(psp
[seg_index
]->ps_bs
,
2899 psp
[seg_index
]->ps_bs
->bs_pages_in
++);
2903 if (cl_index
< pages_in_cl
|| xfer_size
>= size
) {
2905 * we've hit an unallocated page or
2906 * the end of this request... see if
2907 * it's time to fire the I/O
2912 * we've hit the end of the current physical
2913 * segment and there's more to do, so try
2914 * moving to the next one
2918 ps_offset
[seg_index
] = ps_clmap(vs
, cur_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2919 psp
[seg_index
] = CLMAP_PS(clmap
);
2922 if ((ps_offset
[seg_index
- 1] != (ps_offset
[seg_index
] - cl_size
)) || (psp
[seg_index
- 1] != psp
[seg_index
])) {
2924 * if the physical segment we're about
2925 * to step into is not contiguous to
2926 * the one we're currently in, or it's
2927 * in a different paging file, or
2928 * it hasn't been allocated....
2929 * we stop this run and go check
2930 * to see if it's time to fire the I/O
2935 * start with first page of the next physical
2940 if (xfer_size
== 0) {
2942 * no I/O to generate for this segment
2946 if (cur_offset
<= orig_vs_offset
) {
2948 * we've hit a hole in our speculative cluster
2949 * before the offset that we're really after...
2950 * don't issue the I/O since it doesn't encompass
2951 * the original offset and we're looking to only
2952 * pull in the speculative pages if they can be
2953 * made part of a single I/O
2956 vs_offset
+= xfer_size
;
2961 * we have a contiguous range of allocated pages
2962 * to read from that encompasses the original offset
2964 page_list_count
= 0;
2965 memory_object_super_upl_request(vs
->vs_control
, (memory_object_offset_t
)vs_offset
,
2966 xfer_size
, xfer_size
,
2967 &upl
, NULL
, &page_list_count
,
2968 request_flags
| UPL_SET_INTERNAL
| UPL_NOBLOCK
);
2970 error
= ps_read_file(psp
[beg_pseg
],
2971 upl
, (upl_offset_t
) 0,
2972 ps_offset
[beg_pseg
] + (beg_indx
* vm_page_size
),
2973 xfer_size
, &residual
, io_flags
);
2978 * Adjust counts and send response to VM. Optimize
2979 * for the common case, i.e. no error and/or partial
2980 * data. If there was an error, then we need to error
2981 * the entire range, even if some data was successfully
2982 * read. If there was a partial read we may supply some
2983 * data and may error some as well. In all cases the
2984 * VM must receive some notification for every page
2987 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2989 * Got everything we asked for, supply the data
2990 * to the VM. Note that as a side effect of
2991 * supplying the data, the buffer holding the
2992 * supplied data is deallocated from the pager's
2995 pvs_object_data_provided(vs
, upl
, vs_offset
, xfer_size
);
2997 failed_size
= xfer_size
;
2999 if (error
== KERN_SUCCESS
) {
3000 if (residual
== xfer_size
) {
3002 * If a read operation returns no error
3003 * and no data moved, we turn it into
3004 * an error, assuming we're reading at
3006 * Fall through and error the entire range.
3008 error
= KERN_FAILURE
;
3011 * Otherwise, we have partial read. If
3012 * the part read is a integral number
3013 * of pages supply it. Otherwise round
3014 * it up to a page boundary, zero fill
3015 * the unread part, and supply it.
3016 * Fall through and error the remainder
3017 * of the range, if any.
3022 fill
= residual
& ~vm_page_size
;
3023 lsize
= (xfer_size
- residual
) + fill
;
3025 pvs_object_data_provided(vs
, upl
, vs_offset
, lsize
);
3027 if (lsize
< xfer_size
) {
3028 failed_size
= xfer_size
- lsize
;
3029 error
= KERN_FAILURE
;
3034 if (error
!= KERN_SUCCESS
) {
3036 * There was an error in some part of the range, tell
3037 * the VM. Note that error is explicitly checked again
3038 * since it can be modified above.
3040 BS_STAT(psp
[beg_pseg
]->ps_bs
,
3041 psp
[beg_pseg
]->ps_bs
->bs_pages_in_fail
+= atop_32(failed_size
));
3044 * we've issued a single I/O that encompassed the original offset
3045 * at this point we either met our speculative request length or
3046 * we ran into a 'hole' (i.e. page not present in the cluster, cluster
3047 * not present or not physically contiguous to the previous one), so
3048 * we're done issuing I/O at this point
3056 int vs_do_async_write
= 1;
3062 upl_offset_t offset
,
3064 boolean_t dp_internal
,
3067 upl_size_t transfer_size
;
3071 dp_offset_t actual_offset
; /* Offset within paging segment */
3072 paging_segment_t ps
;
3073 dp_offset_t mobj_base_addr
;
3074 dp_offset_t mobj_target_addr
;
3077 upl_page_info_t
*pl
;
3081 unsigned int cl_size
;
3083 unsigned int seg_size
;
3084 unsigned int upl_offset_in_object
;
3086 pages_in_cl
= 1 << vs
->vs_clshift
;
3087 cl_size
= pages_in_cl
* vm_page_size
;
3090 unsigned int page_list_count
;
3092 unsigned int super_size
;
3097 upl_offset_t upl_offset
;
3098 dp_offset_t seg_offset
;
3099 dp_offset_t ps_offset
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
3100 paging_segment_t psp
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
3104 super_size
= cl_size
;
3106 request_flags
= UPL_NOBLOCK
|
3107 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
3108 UPL_NO_SYNC
| UPL_SET_INTERNAL
| UPL_SET_LITE
;
3110 super_size
= VM_SUPER_CLUSTER
;
3112 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
3113 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
3114 UPL_NO_SYNC
| UPL_SET_INTERNAL
| UPL_SET_LITE
;
3117 if (!dp_encryption_inited
) {
3120 * Once we've started using swap, we
3121 * can't change our mind on whether
3122 * it needs to be encrypted or
3125 dp_encryption_inited
= TRUE
;
3127 if (dp_encryption
) {
3130 * request that the UPL be prepared for
3133 request_flags
|= UPL_ENCRYPT
;
3134 flags
|= UPL_PAGING_ENCRYPTED
;
3137 page_list_count
= 0;
3138 memory_object_super_upl_request(vs
->vs_control
,
3139 (memory_object_offset_t
)offset
,
3141 &upl
, NULL
, &page_list_count
,
3142 request_flags
| UPL_FOR_PAGEOUT
);
3145 * The default pager does not handle objects larger than
3146 * 4GB, so it does not deal with offset that don't fit in
3147 * 32-bit. Cast down upl->offset now and make sure we
3148 * did not lose any valuable bits.
3150 upl_offset_in_object
= (unsigned int) upl
->offset
;
3151 assert(upl
->offset
== upl_offset_in_object
);
3153 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
3155 seg_size
= cl_size
- (upl_offset_in_object
% cl_size
);
3156 upl_offset
= upl_offset_in_object
& ~(cl_size
- 1);
3158 for (seg_index
= 0, transfer_size
= upl
->size
;
3159 transfer_size
> 0; ) {
3160 ps_offset
[seg_index
] =
3166 if (ps_offset
[seg_index
] == (dp_offset_t
) -1) {
3168 upl_deallocate(upl
);
3170 return KERN_FAILURE
;
3173 psp
[seg_index
] = CLMAP_PS(clmap
);
3175 if (transfer_size
> seg_size
) {
3176 transfer_size
-= seg_size
;
3177 upl_offset
+= cl_size
;
3184 * Ignore any non-present pages at the end of the
3187 for (page_index
= upl
->size
/ vm_page_size
; page_index
> 0;)
3188 if (UPL_PAGE_PRESENT(pl
, --page_index
))
3190 num_of_pages
= page_index
+ 1;
3192 base_index
= (upl_offset_in_object
% cl_size
) / PAGE_SIZE
;
3194 for (page_index
= 0; page_index
< num_of_pages
; ) {
3196 * skip over non-dirty pages
3198 for ( ; page_index
< num_of_pages
; page_index
++) {
3199 if (UPL_DIRTY_PAGE(pl
, page_index
)
3200 || UPL_PRECIOUS_PAGE(pl
, page_index
))
3202 * this is a page we need to write
3203 * go see if we can buddy it up with
3204 * others that are contiguous to it
3208 * if the page is not-dirty, but present we
3209 * need to commit it... This is an unusual
3210 * case since we only asked for dirty pages
3212 if (UPL_PAGE_PRESENT(pl
, page_index
)) {
3213 boolean_t empty
= FALSE
;
3214 upl_commit_range(upl
,
3215 page_index
* vm_page_size
,
3217 UPL_COMMIT_NOTIFY_EMPTY
,
3222 assert(page_index
==
3224 upl_deallocate(upl
);
3228 if (page_index
== num_of_pages
)
3230 * no more pages to look at, we're out of here
3235 * gather up contiguous dirty pages... we have at
3236 * least 1 * otherwise we would have bailed above
3237 * make sure that each physical segment that we step
3238 * into is contiguous to the one we're currently in
3239 * if it's not, we have to stop and write what we have
3241 for (first_dirty
= page_index
;
3242 page_index
< num_of_pages
; ) {
3243 if ( !UPL_DIRTY_PAGE(pl
, page_index
)
3244 && !UPL_PRECIOUS_PAGE(pl
, page_index
))
3248 * if we just looked at the last page in the UPL
3249 * we don't need to check for physical segment
3252 if (page_index
< num_of_pages
) {
3256 cur_seg
= (base_index
+ (page_index
- 1))/pages_in_cl
;
3257 nxt_seg
= (base_index
+ page_index
)/pages_in_cl
;
3259 if (cur_seg
!= nxt_seg
) {
3260 if ((ps_offset
[cur_seg
] != (ps_offset
[nxt_seg
] - cl_size
)) || (psp
[cur_seg
] != psp
[nxt_seg
]))
3262 * if the segment we're about
3263 * to step into is not
3264 * contiguous to the one we're
3265 * currently in, or it's in a
3266 * different paging file....
3267 * we stop here and generate
3274 num_dirty
= page_index
- first_dirty
;
3277 upl_offset
= first_dirty
* vm_page_size
;
3278 transfer_size
= num_dirty
* vm_page_size
;
3280 while (transfer_size
) {
3282 if ((seg_size
= cl_size
-
3283 ((upl_offset_in_object
+
3284 upl_offset
) % cl_size
))
3286 seg_size
= transfer_size
;
3288 ps_vs_write_complete(
3290 (upl_offset_in_object
+
3294 transfer_size
-= seg_size
;
3295 upl_offset
+= seg_size
;
3297 upl_offset
= first_dirty
* vm_page_size
;
3298 transfer_size
= num_dirty
* vm_page_size
;
3300 seg_index
= (base_index
+ first_dirty
) / pages_in_cl
;
3301 seg_offset
= (upl_offset_in_object
+ upl_offset
) % cl_size
;
3303 error
= ps_write_file(psp
[seg_index
],
3305 ps_offset
[seg_index
]
3307 transfer_size
, flags
);
3309 boolean_t empty
= FALSE
;
3310 upl_abort_range(upl
,
3311 first_dirty
* vm_page_size
,
3312 num_dirty
* vm_page_size
,
3313 UPL_ABORT_NOTIFY_EMPTY
,
3316 assert(page_index
== num_of_pages
);
3317 upl_deallocate(upl
);
3323 assert(cnt
<= (unsigned) (vm_page_size
<< vs
->vs_clshift
));
3327 /* The caller provides a mapped_data which is derived */
3328 /* from a temporary object. The targeted pages are */
3329 /* guaranteed to be set at offset 0 in the mapped_data */
3330 /* The actual offset however must still be derived */
3331 /* from the offset in the vs in question */
3332 mobj_base_addr
= offset
;
3333 mobj_target_addr
= mobj_base_addr
;
3335 for (transfer_size
= list_size
; transfer_size
!= 0;) {
3336 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
3338 transfer_size
< cl_size
?
3339 transfer_size
: cl_size
, 0);
3340 if(actual_offset
== (dp_offset_t
) -1) {
3344 cnt
= MIN(transfer_size
,
3345 (unsigned) CLMAP_NPGS(clmap
) * vm_page_size
);
3346 ps
= CLMAP_PS(clmap
);
3347 /* Assume that the caller has given us contiguous */
3350 ps_vs_write_complete(vs
, mobj_target_addr
,
3352 error
= ps_write_file(ps
, internal_upl
,
3360 actual_offset
+= cnt
;
3361 mobj_target_addr
+= cnt
;
3362 transfer_size
-= cnt
;
3370 return KERN_FAILURE
;
3372 return KERN_SUCCESS
;
3376 ps_vstruct_allocated_size(
3380 struct vs_map
*vsmap
;
3381 unsigned int i
, j
, k
;
3384 if (vs
->vs_indirect
) {
3385 /* loop on indirect maps */
3386 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3387 vsmap
= vs
->vs_imap
[i
];
3390 /* loop on clusters in this indirect map */
3391 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3392 if (VSM_ISCLR(vsmap
[j
]) ||
3393 VSM_ISERR(vsmap
[j
]))
3395 /* loop on pages in this cluster */
3396 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3397 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3403 vsmap
= vs
->vs_dmap
;
3406 /* loop on clusters in the direct map */
3407 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3408 if (VSM_ISCLR(vsmap
[j
]) ||
3409 VSM_ISERR(vsmap
[j
]))
3411 /* loop on pages in this cluster */
3412 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3413 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3419 return ptoa_32(num_pages
);
3423 ps_vstruct_allocated_pages(
3425 default_pager_page_t
*pages
,
3426 unsigned int pages_size
)
3428 unsigned int num_pages
;
3429 struct vs_map
*vsmap
;
3431 unsigned int i
, j
, k
;
3435 if (vs
->vs_indirect
) {
3436 /* loop on indirect maps */
3437 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3438 vsmap
= vs
->vs_imap
[i
];
3439 if (vsmap
== NULL
) {
3440 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
3444 /* loop on clusters in this indirect map */
3445 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3446 if (VSM_ISCLR(vsmap
[j
]) ||
3447 VSM_ISERR(vsmap
[j
])) {
3448 offset
+= vm_page_size
* VSCLSIZE(vs
);
3451 /* loop on pages in this cluster */
3452 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3453 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3455 if (num_pages
< pages_size
)
3456 pages
++->dpp_offset
=
3459 offset
+= vm_page_size
;
3464 vsmap
= vs
->vs_dmap
;
3467 /* loop on clusters in the direct map */
3468 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3469 if (VSM_ISCLR(vsmap
[j
]) ||
3470 VSM_ISERR(vsmap
[j
])) {
3471 offset
+= vm_page_size
* VSCLSIZE(vs
);
3474 /* loop on pages in this cluster */
3475 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3476 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3478 if (num_pages
< pages_size
)
3479 pages
++->dpp_offset
= offset
;
3481 offset
+= vm_page_size
;
3491 ps_vstruct_transfer_from_segment(
3493 paging_segment_t segment
,
3496 struct vs_map
*vsmap
;
3497 // struct vs_map old_vsmap;
3498 // struct vs_map new_vsmap;
3501 VS_LOCK(vs
); /* block all work on this vstruct */
3502 /* can't allow the normal multiple write */
3503 /* semantic because writes may conflict */
3504 vs
->vs_xfer_pending
= TRUE
;
3505 vs_wait_for_sync_writers(vs
);
3507 vs_wait_for_readers(vs
);
3508 /* we will unlock the vs to allow other writes while transferring */
3509 /* and will be guaranteed of the persistance of the vs struct */
3510 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3511 /* vs_async_pending */
3512 /* OK we now have guaranteed no other parties are accessing this */
3513 /* vs. Now that we are also supporting simple lock versions of */
3514 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3515 /* our purpose in holding it before was the multiple write case */
3516 /* we now use the boolean xfer_pending to do that. We can use */
3517 /* a boolean instead of a count because we have guaranteed single */
3518 /* file access to this code in its caller */
3521 if (vs
->vs_indirect
) {
3522 unsigned int vsmap_size
;
3524 /* loop on indirect maps */
3525 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3526 vsmap
= vs
->vs_imap
[i
];
3529 /* loop on clusters in this indirect map */
3530 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3532 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3533 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3535 vsmap_size
= CLMAP_ENTRIES
;
3536 for (j
= 0; j
< vsmap_size
; j
++) {
3537 if (VSM_ISCLR(vsmap
[j
]) ||
3538 VSM_ISERR(vsmap
[j
]) ||
3539 (VSM_PS(vsmap
[j
]) != segment
))
3541 if(vs_cluster_transfer(vs
,
3542 (vm_page_size
* (j
<< vs
->vs_clshift
))
3544 vm_page_size
<< vs
->vs_clshift
,
3548 vs
->vs_xfer_pending
= FALSE
;
3550 vs_finish_write(vs
);
3551 return KERN_FAILURE
;
3553 /* allow other readers/writers during transfer*/
3555 vs
->vs_xfer_pending
= FALSE
;
3557 vs_finish_write(vs
);
3559 vs
->vs_xfer_pending
= TRUE
;
3560 vs_wait_for_sync_writers(vs
);
3562 vs_wait_for_readers(vs
);
3564 if (!(vs
->vs_indirect
)) {
3570 vsmap
= vs
->vs_dmap
;
3571 if (vsmap
== NULL
) {
3573 vs
->vs_xfer_pending
= FALSE
;
3575 vs_finish_write(vs
);
3576 return KERN_SUCCESS
;
3578 /* loop on clusters in the direct map */
3579 for (j
= 0; j
< vs
->vs_size
; j
++) {
3580 if (VSM_ISCLR(vsmap
[j
]) ||
3581 VSM_ISERR(vsmap
[j
]) ||
3582 (VSM_PS(vsmap
[j
]) != segment
))
3584 if(vs_cluster_transfer(vs
,
3585 vm_page_size
* (j
<< vs
->vs_clshift
),
3586 vm_page_size
<< vs
->vs_clshift
,
3587 upl
) != KERN_SUCCESS
) {
3589 vs
->vs_xfer_pending
= FALSE
;
3591 vs_finish_write(vs
);
3592 return KERN_FAILURE
;
3594 /* allow other readers/writers during transfer*/
3596 vs
->vs_xfer_pending
= FALSE
;
3598 vs_finish_write(vs
);
3600 vs
->vs_xfer_pending
= TRUE
;
3601 vs_wait_for_sync_writers(vs
);
3603 vs_wait_for_readers(vs
);
3605 if (vs
->vs_indirect
) {
3612 vs
->vs_xfer_pending
= FALSE
;
3614 vs_finish_write(vs
);
3615 return KERN_SUCCESS
;
3625 struct vs_map
*vsmap
;
3626 dp_offset_t cluster
;
3628 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
3629 if (vs
->vs_indirect
) {
3630 long ind_block
= cluster
/CLMAP_ENTRIES
;
3632 /* Is the indirect block allocated? */
3633 vsmap
= vs
->vs_imap
[ind_block
];
3634 if(vsmap
== (vs_map_t
) NULL
)
3637 vsmap
= vs
->vs_dmap
;
3638 vsmap
+= cluster%CLMAP_ENTRIES
;
3643 vs_cluster_transfer(
3649 dp_offset_t actual_offset
;
3650 paging_segment_t ps
;
3652 kern_return_t error
= KERN_SUCCESS
;
3653 unsigned int size
, size_wanted
;
3655 unsigned int residual
= 0;
3656 unsigned int unavail_size
;
3657 // default_pager_thread_t *dpt;
3658 // boolean_t dealloc;
3659 struct vs_map
*vsmap_ptr
= NULL
;
3660 struct vs_map read_vsmap
;
3661 struct vs_map original_read_vsmap
;
3662 struct vs_map write_vsmap
;
3664 // vm_offset_t ioaddr;
3666 /* vs_cluster_transfer reads in the pages of a cluster and
3667 * then writes these pages back to new backing store. The
3668 * segment the pages are being read from is assumed to have
3669 * been taken off-line and is no longer considered for new
3674 * This loop will be executed once per cluster referenced.
3675 * Typically this means once, since it's unlikely that the
3676 * VM system will ask for anything spanning cluster boundaries.
3678 * If there are holes in a cluster (in a paging segment), we stop
3679 * reading at the hole, then loop again, hoping to
3680 * find valid pages later in the cluster. This continues until
3681 * the entire range has been examined, and read, if present. The
3682 * pages are written as they are read. If a failure occurs after
3683 * some pages are written the unmap call at the bottom of the loop
3684 * recovers the backing store and the old backing store remains
3688 VSM_CLR(write_vsmap
);
3689 VSM_CLR(original_read_vsmap
);
3690 /* grab the actual object's pages to sync with I/O */
3691 while (cnt
&& (error
== KERN_SUCCESS
)) {
3692 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3693 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3695 if (actual_offset
== (dp_offset_t
) -1) {
3698 * Nothing left to write in this cluster at least
3699 * set write cluster information for any previous
3700 * write, clear for next cluster, if there is one
3702 unsigned int local_size
, clmask
, clsize
;
3704 clsize
= vm_page_size
<< vs
->vs_clshift
;
3705 clmask
= clsize
- 1;
3706 local_size
= clsize
- (offset
& clmask
);
3708 local_size
= MIN(local_size
, cnt
);
3710 /* This cluster has no data in it beyond what may */
3711 /* have been found on a previous iteration through */
3712 /* the loop "write_vsmap" */
3713 *vsmap_ptr
= write_vsmap
;
3714 VSM_CLR(write_vsmap
);
3715 VSM_CLR(original_read_vsmap
);
3718 offset
+= local_size
;
3723 * Count up contiguous available or unavailable
3726 ps
= CLMAP_PS(clmap
);
3731 (size
< cnt
) && (unavail_size
< cnt
) &&
3732 (i
< CLMAP_NPGS(clmap
)); i
++) {
3733 if (CLMAP_ISSET(clmap
, i
)) {
3734 if (unavail_size
!= 0)
3736 size
+= vm_page_size
;
3738 ps
->ps_bs
->bs_pages_in
++);
3742 unavail_size
+= vm_page_size
;
3747 ASSERT(unavail_size
);
3748 ps_clunmap(vs
, offset
, unavail_size
);
3749 cnt
-= unavail_size
;
3750 offset
+= unavail_size
;
3751 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3753 /* There is no more to transfer in this
3756 *vsmap_ptr
= write_vsmap
;
3757 VSM_CLR(write_vsmap
);
3758 VSM_CLR(original_read_vsmap
);
3763 if(VSM_ISCLR(original_read_vsmap
))
3764 original_read_vsmap
= *vsmap_ptr
;
3766 if(ps
->ps_segtype
== PS_PARTITION
) {
3767 panic("swap partition not supported\n");
3769 error
= KERN_FAILURE
;
3772 NEED TO ISSUE WITH SYNC & NO COMMIT
3773 error = ps_read_device(ps, actual_offset, &buffer,
3774 size, &residual, flags);
3777 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3778 error
= ps_read_file(ps
, upl
, (upl_offset_t
) 0, actual_offset
,
3780 (UPL_IOSYNC
| UPL_NOCOMMIT
));
3783 read_vsmap
= *vsmap_ptr
;
3787 * Adjust counts and put data in new BS. Optimize for the
3788 * common case, i.e. no error and/or partial data.
3789 * If there was an error, then we need to error the entire
3790 * range, even if some data was successfully read.
3793 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3796 * Got everything we asked for, supply the data to
3797 * the new BS. Note that as a side effect of supplying
3798 * the data, the buffer holding the supplied data is
3799 * deallocated from the pager's address space unless
3800 * the write is unsuccessful.
3803 /* note buffer will be cleaned up in all cases by */
3804 /* internal_cluster_write or if an error on write */
3805 /* the vm_map_copy_page_discard call */
3806 *vsmap_ptr
= write_vsmap
;
3808 if(vs_cluster_write(vs
, upl
, offset
,
3809 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3810 error
= KERN_FAILURE
;
3811 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3812 /* unmap the new backing store object */
3813 ps_clunmap(vs
, offset
, size
);
3815 /* original vsmap */
3816 *vsmap_ptr
= original_read_vsmap
;
3817 VSM_CLR(write_vsmap
);
3819 if((offset
+ size
) &
3820 ((vm_page_size
<< vs
->vs_clshift
)
3822 /* There is more to transfer in this
3825 write_vsmap
= *vsmap_ptr
;
3826 *vsmap_ptr
= read_vsmap
;
3827 ps_clunmap(vs
, offset
, size
);
3829 /* discard the old backing object */
3830 write_vsmap
= *vsmap_ptr
;
3831 *vsmap_ptr
= read_vsmap
;
3832 ps_clunmap(vs
, offset
, size
);
3833 *vsmap_ptr
= write_vsmap
;
3834 VSM_CLR(write_vsmap
);
3835 VSM_CLR(original_read_vsmap
);
3840 if (error
== KERN_SUCCESS
) {
3841 if (residual
== size
) {
3843 * If a read operation returns no error
3844 * and no data moved, we turn it into
3845 * an error, assuming we're reading at
3847 * Fall through and error the entire
3850 error
= KERN_FAILURE
;
3851 *vsmap_ptr
= write_vsmap
;
3852 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3853 /* unmap the new backing store object */
3854 ps_clunmap(vs
, offset
, size
);
3856 *vsmap_ptr
= original_read_vsmap
;
3857 VSM_CLR(write_vsmap
);
3861 * Otherwise, we have partial read.
3862 * This is also considered an error
3863 * for the purposes of cluster transfer
3865 error
= KERN_FAILURE
;
3866 *vsmap_ptr
= write_vsmap
;
3867 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3868 /* unmap the new backing store object */
3869 ps_clunmap(vs
, offset
, size
);
3871 *vsmap_ptr
= original_read_vsmap
;
3872 VSM_CLR(write_vsmap
);
3881 } /* END while (cnt && (error == 0)) */
3882 if(!VSM_ISCLR(write_vsmap
))
3883 *vsmap_ptr
= write_vsmap
;
3889 default_pager_add_file(
3890 MACH_PORT_FACE backing_store
,
3896 paging_segment_t ps
;
3901 if ((bs
= backing_store_lookup(backing_store
))
3902 == BACKING_STORE_NULL
)
3903 return KERN_INVALID_ARGUMENT
;
3906 for (i
= 0; i
<= paging_segment_max
; i
++) {
3907 ps
= paging_segments
[i
];
3908 if (ps
== PAGING_SEGMENT_NULL
)
3910 if (ps
->ps_segtype
!= PS_FILE
)
3914 * Check for overlap on same device.
3916 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3919 return KERN_INVALID_ARGUMENT
;
3925 * Set up the paging segment
3927 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3928 if (ps
== PAGING_SEGMENT_NULL
) {
3930 return KERN_RESOURCE_SHORTAGE
;
3933 ps
->ps_segtype
= PS_FILE
;
3934 ps
->ps_vnode
= (struct vnode
*)vp
;
3936 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3937 assert((dp_size_t
) size
== size
);
3938 ps
->ps_recnum
= (dp_size_t
) size
;
3939 ps
->ps_pgnum
= ((dp_size_t
) size
) >> ps
->ps_record_shift
;
3941 ps
->ps_pgcount
= ps
->ps_pgnum
;
3942 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3943 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3944 ps
->ps_special_clusters
= 0;
3948 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3950 kfree(ps
, sizeof *ps
);
3952 return KERN_RESOURCE_SHORTAGE
;
3954 for (j
= 0; j
< ps
->ps_ncls
; j
++) {
3955 clrbit(ps
->ps_bmap
, j
);
3958 if(paging_segment_count
== 0) {
3959 ps
->ps_state
= PS_EMERGENCY_SEGMENT
;
3960 if(use_emergency_swap_file_first
) {
3961 ps
->ps_state
|= PS_CAN_USE
;
3963 emergency_segment_backing_store
= backing_store
;
3965 ps
->ps_state
= PS_CAN_USE
;
3970 if ((error
= ps_enter(ps
)) != 0) {
3971 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
3972 kfree(ps
, sizeof *ps
);
3974 return KERN_RESOURCE_SHORTAGE
;
3977 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3978 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3980 if(IS_PS_OK_TO_USE(ps
)) {
3981 dp_pages_free
+= ps
->ps_pgcount
;
3983 dp_pages_reserve
+= ps
->ps_pgcount
;
3989 bs_more_space(ps
->ps_clcount
);
3992 * If the paging segment being activated is not the emergency
3993 * segment and we notice that the emergency segment is being
3994 * used then we help recover it. If all goes well, the
3995 * emergency segment will be back to its original state of
3996 * online but not activated (till it's needed the next time).
3998 ps
= paging_segments
[EMERGENCY_PSEG_INDEX
];
3999 if(IS_PS_EMERGENCY_SEGMENT(ps
) && IS_PS_OK_TO_USE(ps
)) {
4000 if(default_pager_backing_store_delete(emergency_segment_backing_store
)) {
4001 dprintf(("Failed to recover emergency paging segment\n"));
4003 dprintf(("Recovered emergency paging segment\n"));
4007 DP_DEBUG(DEBUG_BS_INTERNAL
,
4008 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
4009 device
, offset
, (dp_size_t
) size
, record_size
,
4010 ps
->ps_record_shift
, ps
->ps_pgnum
));
4012 return KERN_SUCCESS
;
4019 paging_segment_t ps
,
4021 upl_offset_t upl_offset
,
4024 unsigned int *residualp
,
4027 vm_object_offset_t f_offset
;
4031 assert(dp_encryption_inited
);
4033 clustered_reads
[atop_32(size
)]++;
4035 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
4038 * for transfer case we need to pass uploffset and flags
4040 assert((upl_size_t
) size
== size
);
4041 error
= vnode_pagein(ps
->ps_vnode
, upl
, upl_offset
, f_offset
, (upl_size_t
)size
, flags
, NULL
);
4043 /* The vnode_pagein semantic is somewhat at odds with the existing */
4044 /* device_read semantic. Partial reads are not experienced at this */
4045 /* level. It is up to the bit map code and cluster read code to */
4046 /* check that requested data locations are actually backed, and the */
4047 /* pagein code to either read all of the requested data or return an */
4051 result
= KERN_FAILURE
;
4054 result
= KERN_SUCCESS
;
4061 paging_segment_t ps
,
4063 upl_offset_t upl_offset
,
4068 vm_object_offset_t f_offset
;
4069 kern_return_t result
;
4071 assert(dp_encryption_inited
);
4073 clustered_writes
[atop_32(size
)]++;
4074 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
4076 if (flags
& UPL_PAGING_ENCRYPTED
) {
4079 * encrypt all the pages that we're going
4082 upl_encrypt(upl
, upl_offset
, size
);
4084 assert((upl_size_t
) size
== size
);
4085 if (vnode_pageout(ps
->ps_vnode
, upl
, upl_offset
, f_offset
, (upl_size_t
)size
, flags
, NULL
))
4086 result
= KERN_FAILURE
;
4088 result
= KERN_SUCCESS
;
4094 default_pager_triggers( __unused MACH_PORT_FACE default_pager
,
4098 MACH_PORT_FACE trigger_port
)
4100 MACH_PORT_FACE release
;
4103 clock_nsec_t nanoseconds_dummy
;
4104 static clock_sec_t error_notify
= 0;
4107 if (flags
== SWAP_ENCRYPT_ON
) {
4108 /* ENCRYPTED SWAP: turn encryption on */
4109 release
= trigger_port
;
4110 if (!dp_encryption_inited
) {
4111 dp_encryption_inited
= TRUE
;
4112 dp_encryption
= TRUE
;
4117 } else if (flags
== SWAP_ENCRYPT_OFF
) {
4118 /* ENCRYPTED SWAP: turn encryption off */
4119 release
= trigger_port
;
4120 if (!dp_encryption_inited
) {
4121 dp_encryption_inited
= TRUE
;
4122 dp_encryption
= FALSE
;
4127 } else if (flags
== HI_WAT_ALERT
) {
4128 release
= min_pages_trigger_port
;
4129 min_pages_trigger_port
= trigger_port
;
4130 minimum_pages_remaining
= hi_wat
/vm_page_size
;
4133 } else if (flags
== LO_WAT_ALERT
) {
4134 release
= max_pages_trigger_port
;
4135 max_pages_trigger_port
= trigger_port
;
4136 maximum_pages_free
= lo_wat
/vm_page_size
;
4138 } else if (flags
== USE_EMERGENCY_SWAP_FILE_FIRST
) {
4139 use_emergency_swap_file_first
= TRUE
;
4140 release
= trigger_port
;
4142 } else if (flags
== SWAP_FILE_CREATION_ERROR
) {
4143 release
= trigger_port
;
4145 if( paging_segment_count
== 1) {
4146 use_emergency_swap_file_first
= TRUE
;
4148 no_paging_space_action();
4149 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
4150 if (now
> error_notify
+ 5) {
4151 dprintf(("Swap File Error.\n"));
4155 release
= trigger_port
;
4156 kr
= KERN_INVALID_ARGUMENT
;
4160 if (IP_VALID(release
))
4161 ipc_port_release_send(release
);
4167 * Monitor the amount of available backing store vs. the amount of
4168 * required backing store, notify a listener (if present) when
4169 * backing store may safely be removed.
4171 * We attempt to avoid the situation where backing store is
4172 * discarded en masse, as this can lead to thrashing as the
4173 * backing store is compacted.
4176 #define PF_INTERVAL 3 /* time between free level checks */
4177 #define PF_LATENCY 10 /* number of intervals before release */
4179 static int dp_pages_free_low_count
= 0;
4180 thread_call_t default_pager_backing_store_monitor_callout
;
4183 default_pager_backing_store_monitor(__unused thread_call_param_t p1
,
4184 __unused thread_call_param_t p2
)
4186 // unsigned long long average;
4191 * We determine whether it will be safe to release some
4192 * backing store by watching the free page level. If
4193 * it remains below the maximum_pages_free threshold for
4194 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
4195 * then we deem it safe.
4197 * Note that this establishes a maximum rate at which backing
4198 * store will be released, as each notification (currently)
4199 * only results in a single backing store object being
4202 if (dp_pages_free
> maximum_pages_free
) {
4203 dp_pages_free_low_count
++;
4205 dp_pages_free_low_count
= 0;
4208 /* decide whether to send notification */
4210 if (max_pages_trigger_port
&&
4211 (backing_store_release_trigger_disable
== 0) &&
4212 (dp_pages_free_low_count
> PF_LATENCY
)) {
4213 trigger
= max_pages_trigger_port
;
4214 max_pages_trigger_port
= NULL
;
4217 /* send notification */
4218 if (trigger
!= IP_NULL
) {
4220 if(backing_store_release_trigger_disable
!= 0) {
4221 assert_wait((event_t
)
4222 &backing_store_release_trigger_disable
,
4225 thread_block(THREAD_CONTINUE_NULL
);
4229 default_pager_space_alert(trigger
, LO_WAT_ALERT
);
4230 ipc_port_release_send(trigger
);
4231 dp_pages_free_low_count
= 0;
4234 clock_interval_to_deadline(PF_INTERVAL
, NSEC_PER_SEC
, &deadline
);
4235 thread_call_enter_delayed(default_pager_backing_store_monitor_callout
, deadline
);