2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
21 * @APPLE_LICENSE_HEADER_END@
27 * Mach Operating System
28 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
29 * All Rights Reserved.
31 * Permission to use, copy, modify and distribute this software and its
32 * documentation is hereby granted, provided that both the copyright
33 * notice and this permission notice appear in all copies of the
34 * software, derivative works or modified versions, and any portions
35 * thereof, and that both notices appear in supporting documentation.
37 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
38 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
39 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
41 * Carnegie Mellon requests users of this software to return to
43 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
44 * School of Computer Science
45 * Carnegie Mellon University
46 * Pittsburgh PA 15213-3890
48 * any improvements or extensions that they make and grant Carnegie Mellon
49 * the rights to redistribute these changes.
54 * Paging File Management.
57 #include <mach/host_priv.h>
58 #include <mach/memory_object_control.h>
59 #include <mach/memory_object_server.h>
61 #include <default_pager/default_pager_internal.h>
62 #include <default_pager/default_pager_alerts.h>
63 #include <default_pager/default_pager_object_server.h>
65 #include <ipc/ipc_types.h>
66 #include <ipc/ipc_port.h>
67 #include <ipc/ipc_space.h>
69 #include <kern/kern_types.h>
70 #include <kern/host.h>
71 #include <kern/queue.h>
72 #include <kern/counters.h>
73 #include <kern/sched_prim.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_pageout.h>
77 #include <vm/vm_map.h>
78 #include <vm/vm_object.h>
79 #include <vm/vm_protos.h>
81 /* LP64todo - need large internal object support */
84 * ALLOC_STRIDE... the maximum number of bytes allocated from
85 * a swap file before moving on to the next swap file... if
86 * all swap files reside on a single disk, this value should
87 * be very large (this is the default assumption)... if the
88 * swap files are spread across multiple disks, than this value
89 * should be small (128 * 1024)...
91 * This should be determined dynamically in the future
94 #define ALLOC_STRIDE (1024 * 1024 * 1024)
95 int physical_transfer_cluster_count
= 0;
97 #define VM_SUPER_CLUSTER 0x40000
98 #define VM_SUPER_PAGES 64
101 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
102 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
104 #define VSTRUCT_DEF_CLSHIFT 2
105 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
106 int default_pager_clsize
= 0;
109 unsigned int clustered_writes
[VM_SUPER_PAGES
+1];
110 unsigned int clustered_reads
[VM_SUPER_PAGES
+1];
113 * Globals used for asynchronous paging operations:
114 * vs_async_list: head of list of to-be-completed I/O ops
115 * async_num_queued: number of pages completed, but not yet
116 * processed by async thread.
117 * async_requests_out: number of pages of requests not completed.
121 struct vs_async
*vs_async_list
;
122 int async_num_queued
;
123 int async_requests_out
;
127 #define VS_ASYNC_REUSE 1
128 struct vs_async
*vs_async_free_list
;
130 mutex_t default_pager_async_lock
; /* Protects globals above */
133 int vs_alloc_async_failed
= 0; /* statistics */
134 int vs_alloc_async_count
= 0; /* statistics */
135 struct vs_async
*vs_alloc_async(void); /* forward */
136 void vs_free_async(struct vs_async
*vsa
); /* forward */
139 #define VS_ALLOC_ASYNC() vs_alloc_async()
140 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
142 #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
143 #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
144 #define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0)
145 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
147 * Paging Space Hysteresis triggers and the target notification port
151 unsigned int minimum_pages_remaining
= 0;
152 unsigned int maximum_pages_free
= 0;
153 ipc_port_t min_pages_trigger_port
= NULL
;
154 ipc_port_t max_pages_trigger_port
= NULL
;
156 boolean_t bs_low
= FALSE
;
157 int backing_store_release_trigger_disable
= 0;
160 /* Have we decided if swap needs to be encrypted yet ? */
161 boolean_t dp_encryption_inited
= FALSE
;
162 /* Should we encrypt swap ? */
163 boolean_t dp_encryption
= FALSE
;
167 * Object sizes are rounded up to the next power of 2,
168 * unless they are bigger than a given maximum size.
170 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
173 * List of all backing store and segments.
175 struct backing_store_list_head backing_store_list
;
176 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
177 mutex_t paging_segments_lock
;
178 int paging_segment_max
= 0;
179 int paging_segment_count
= 0;
180 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
184 * Total pages free in system
185 * This differs from clusters committed/avail which is a measure of the
186 * over commitment of paging segments to backing store. An idea which is
187 * likely to be deprecated.
189 unsigned int dp_pages_free
= 0;
190 unsigned int cluster_transfer_minimum
= 100;
192 /* forward declarations */
193 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, upl_offset_t
, vm_offset_t
, unsigned int, int); /* forward */
194 kern_return_t
ps_read_file (paging_segment_t
, upl_t
, upl_offset_t
, vm_offset_t
, unsigned int, unsigned int *, int); /* forward */
195 default_pager_thread_t
*get_read_buffer( void );
196 kern_return_t
ps_vstruct_transfer_from_segment(
198 paging_segment_t segment
,
200 kern_return_t
ps_read_device(paging_segment_t
, vm_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
201 kern_return_t
ps_write_device(paging_segment_t
, vm_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
202 kern_return_t
vs_cluster_transfer(
207 vs_map_t
vs_get_map_entry(
212 default_pager_thread_t
*
213 get_read_buffer( void )
219 for (i
=0; i
<default_pager_internal_count
; i
++) {
220 if(dpt_array
[i
]->checked_out
== FALSE
) {
221 dpt_array
[i
]->checked_out
= TRUE
;
222 DPT_UNLOCK(dpt_lock
);
226 DPT_SLEEP(dpt_lock
, &dpt_array
, THREAD_UNINT
);
236 * List of all backing store.
239 queue_init(&backing_store_list
.bsl_queue
);
242 VS_ASYNC_LOCK_INIT();
244 vs_async_free_list
= NULL
;
245 #endif /* VS_ASYNC_REUSE */
247 for (i
= 0; i
< VM_SUPER_PAGES
+ 1; i
++) {
248 clustered_writes
[i
] = 0;
249 clustered_reads
[i
] = 0;
255 * When things do not quite workout...
257 void bs_no_paging_space(boolean_t
); /* forward */
261 boolean_t out_of_memory
)
265 dprintf(("*** OUT OF MEMORY ***\n"));
266 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
269 void bs_more_space(int); /* forward */
270 void bs_commit(int); /* forward */
272 boolean_t user_warned
= FALSE
;
273 unsigned int clusters_committed
= 0;
274 unsigned int clusters_available
= 0;
275 unsigned int clusters_committed_peak
= 0;
283 * Account for new paging space.
285 clusters_available
+= nclusters
;
287 if (clusters_available
>= clusters_committed
) {
288 if (verbose
&& user_warned
) {
289 printf("%s%s - %d excess clusters now.\n",
291 "paging space is OK now",
292 clusters_available
- clusters_committed
);
294 clusters_committed_peak
= 0;
297 if (verbose
&& user_warned
) {
298 printf("%s%s - still short of %d clusters.\n",
300 "WARNING: paging space over-committed",
301 clusters_committed
- clusters_available
);
302 clusters_committed_peak
-= nclusters
;
315 clusters_committed
+= nclusters
;
316 if (clusters_committed
> clusters_available
) {
317 if (verbose
&& !user_warned
) {
319 printf("%s%s - short of %d clusters.\n",
321 "WARNING: paging space over-committed",
322 clusters_committed
- clusters_available
);
324 if (clusters_committed
> clusters_committed_peak
) {
325 clusters_committed_peak
= clusters_committed
;
328 if (verbose
&& user_warned
) {
329 printf("%s%s - was short of up to %d clusters.\n",
331 "paging space is OK now",
332 clusters_committed_peak
- clusters_available
);
334 clusters_committed_peak
= 0;
342 int default_pager_info_verbose
= 1;
349 vm_size_t pages_total
, pages_free
;
354 pages_total
= pages_free
= 0;
355 for (i
= 0; i
<= paging_segment_max
; i
++) {
356 ps
= paging_segments
[i
];
357 if (ps
== PAGING_SEGMENT_NULL
)
361 * no need to lock: by the time this data
362 * gets back to any remote requestor it
363 * will be obsolete anyways
365 pages_total
+= ps
->ps_pgnum
;
366 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
367 DP_DEBUG(DEBUG_BS_INTERNAL
,
368 ("segment #%d: %d total, %d free\n",
369 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
371 *totalp
= pages_total
;
373 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
374 if (clusters_available
< clusters_committed
) {
375 printf("%s %d clusters committed, %d available.\n",
384 backing_store_t
backing_store_alloc(void); /* forward */
387 backing_store_alloc(void)
391 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
392 if (bs
== BACKING_STORE_NULL
)
393 panic("backing_store_alloc: no memory");
396 bs
->bs_port
= MACH_PORT_NULL
;
399 bs
->bs_pages_total
= 0;
401 bs
->bs_pages_in_fail
= 0;
402 bs
->bs_pages_out
= 0;
403 bs
->bs_pages_out_fail
= 0;
408 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
410 /* Even in both the component space and external versions of this pager, */
411 /* backing_store_lookup will be called from tasks in the application space */
413 backing_store_lookup(
419 port is currently backed with a vs structure in the alias field
420 we could create an ISBS alias and a port_is_bs call but frankly
421 I see no reason for the test, the bs->port == port check below
422 will work properly on junk entries.
424 if ((port == MACH_PORT_NULL) || port_is_vs(port))
426 if ((port
== MACH_PORT_NULL
))
427 return BACKING_STORE_NULL
;
430 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
433 if (bs
->bs_port
== port
) {
435 /* Success, return it locked. */
441 return BACKING_STORE_NULL
;
444 void backing_store_add(backing_store_t
); /* forward */
448 __unused backing_store_t bs
)
450 // MACH_PORT_FACE port = bs->bs_port;
451 // MACH_PORT_FACE pset = default_pager_default_set;
452 kern_return_t kr
= KERN_SUCCESS
;
454 if (kr
!= KERN_SUCCESS
)
455 panic("backing_store_add: add to set");
460 * Set up default page shift, but only if not already
461 * set and argument is within range.
464 bs_set_default_clsize(unsigned int npages
)
471 if (default_pager_clsize
== 0) /* if not yet set */
472 vstruct_def_clshift
= local_log2(npages
);
478 int bs_get_global_clsize(int clsize
); /* forward */
481 bs_get_global_clsize(
485 memory_object_default_t dmm
;
489 * Only allow setting of cluster size once. If called
490 * with no cluster size (default), we use the compiled-in default
491 * for the duration. The same cluster size is used for all
494 if (default_pager_clsize
== 0) {
496 * Keep cluster size in bit shift because it's quicker
497 * arithmetic, and easier to keep at a power of 2.
499 if (clsize
!= NO_CLSIZE
) {
500 for (i
= 0; (1 << i
) < clsize
; i
++);
501 if (i
> MAX_CLUSTER_SHIFT
)
502 i
= MAX_CLUSTER_SHIFT
;
503 vstruct_def_clshift
= i
;
505 default_pager_clsize
= (1 << vstruct_def_clshift
);
508 * Let the user know the new (and definitive) cluster size.
511 printf("%scluster size = %d page%s\n",
512 my_name
, default_pager_clsize
,
513 (default_pager_clsize
== 1) ? "" : "s");
516 * Let the kernel know too, in case it hasn't used the
517 * default value provided in main() yet.
519 dmm
= default_pager_object
;
520 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
521 kr
= host_default_memory_manager(host_priv_self(),
524 memory_object_default_deallocate(dmm
);
526 if (kr
!= KERN_SUCCESS
) {
527 panic("bs_get_global_cl_size:host_default_memory_manager");
529 if (dmm
!= default_pager_object
) {
530 panic("bs_get_global_cl_size:there is another default pager");
533 ASSERT(default_pager_clsize
> 0 &&
534 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
536 return default_pager_clsize
;
540 default_pager_backing_store_create(
541 memory_object_default_t pager
,
543 int clsize
, /* in bytes */
544 MACH_PORT_FACE
*backing_store
)
549 struct vstruct_alias
*alias_struct
;
551 if (pager
!= default_pager_object
)
552 return KERN_INVALID_ARGUMENT
;
554 bs
= backing_store_alloc();
555 port
= ipc_port_alloc_kernel();
556 ipc_port_make_send(port
);
557 assert (port
!= IP_NULL
);
559 DP_DEBUG(DEBUG_BS_EXTERNAL
,
560 ("priority=%d clsize=%d bs_port=0x%x\n",
561 priority
, clsize
, (int) backing_store
));
563 alias_struct
= (struct vstruct_alias
*)
564 kalloc(sizeof (struct vstruct_alias
));
565 if(alias_struct
!= NULL
) {
566 alias_struct
->vs
= (struct vstruct
*)bs
;
567 alias_struct
->name
= ISVS
;
568 port
->alias
= (int) alias_struct
;
571 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
572 kfree(bs
, sizeof (struct backing_store
));
573 return KERN_RESOURCE_SHORTAGE
;
577 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
578 priority
= BS_MAXPRI
;
579 else if (priority
== BS_NOPRI
)
580 priority
= BS_MAXPRI
;
582 priority
= BS_MINPRI
;
583 bs
->bs_priority
= priority
;
585 bs
->bs_clsize
= bs_get_global_clsize(atop_32(clsize
));
588 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
592 backing_store_add(bs
);
594 *backing_store
= port
;
599 default_pager_backing_store_info(
600 MACH_PORT_FACE backing_store
,
601 backing_store_flavor_t flavour
,
602 backing_store_info_t info
,
603 mach_msg_type_number_t
*size
)
606 backing_store_basic_info_t basic
;
610 if (flavour
!= BACKING_STORE_BASIC_INFO
||
611 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
612 return KERN_INVALID_ARGUMENT
;
614 basic
= (backing_store_basic_info_t
)info
;
615 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
617 VSTATS_LOCK(&global_stats
.gs_lock
);
618 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
619 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
620 basic
->pages_in
= global_stats
.gs_pages_in
;
621 basic
->pages_out
= global_stats
.gs_pages_out
;
622 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
623 basic
->pages_init
= global_stats
.gs_pages_init
;
624 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
625 VSTATS_UNLOCK(&global_stats
.gs_lock
);
627 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
628 return KERN_INVALID_ARGUMENT
;
630 basic
->bs_pages_total
= bs
->bs_pages_total
;
632 bs
->bs_pages_free
= 0;
633 for (i
= 0; i
<= paging_segment_max
; i
++) {
634 ps
= paging_segments
[i
];
635 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
637 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
642 basic
->bs_pages_free
= bs
->bs_pages_free
;
643 basic
->bs_pages_in
= bs
->bs_pages_in
;
644 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
645 basic
->bs_pages_out
= bs
->bs_pages_out
;
646 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
648 basic
->bs_priority
= bs
->bs_priority
;
649 basic
->bs_clsize
= ptoa_32(bs
->bs_clsize
); /* in bytes */
656 int ps_delete(paging_segment_t
); /* forward */
663 kern_return_t error
= KERN_SUCCESS
;
666 VSL_LOCK(); /* get the lock on the list of vs's */
668 /* The lock relationship and sequence is farily complicated */
669 /* this code looks at a live list, locking and unlocking the list */
670 /* as it traverses it. It depends on the locking behavior of */
671 /* default_pager_no_senders. no_senders always locks the vstruct */
672 /* targeted for removal before locking the vstruct list. However */
673 /* it will remove that member of the list without locking its */
674 /* neighbors. We can be sure when we hold a lock on a vstruct */
675 /* it cannot be removed from the list but we must hold the list */
676 /* lock to be sure that its pointers to its neighbors are valid. */
677 /* Also, we can hold off destruction of a vstruct when the list */
678 /* lock and the vs locks are not being held by bumping the */
679 /* vs_async_pending count. */
682 while(backing_store_release_trigger_disable
!= 0) {
683 VSL_SLEEP(&backing_store_release_trigger_disable
, THREAD_UNINT
);
686 /* we will choose instead to hold a send right */
687 vs_count
= vstruct_list
.vsl_count
;
688 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
689 if(vs
== (vstruct_t
)&vstruct_list
) {
694 vs_async_wait(vs
); /* wait for any pending async writes */
695 if ((vs_count
!= 0) && (vs
!= NULL
))
696 vs
->vs_async_pending
+= 1; /* hold parties calling */
700 while((vs_count
!= 0) && (vs
!= NULL
)) {
701 /* We take the count of AMO's before beginning the */
702 /* transfer of of the target segment. */
703 /* We are guaranteed that the target segment cannot get */
704 /* more users. We also know that queue entries are */
705 /* made at the back of the list. If some of the entries */
706 /* we would check disappear while we are traversing the */
707 /* list then we will either check new entries which */
708 /* do not have any backing store in the target segment */
709 /* or re-check old entries. This might not be optimal */
710 /* but it will always be correct. The alternative is to */
711 /* take a snapshot of the list. */
714 if(dp_pages_free
< cluster_transfer_minimum
)
715 error
= KERN_FAILURE
;
717 vm_object_t transfer_object
;
721 transfer_object
= vm_object_allocate((vm_object_size_t
)VM_SUPER_CLUSTER
);
723 error
= vm_object_upl_request(transfer_object
,
724 (vm_object_offset_t
)0, VM_SUPER_CLUSTER
,
726 UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
728 if(error
== KERN_SUCCESS
) {
729 error
= ps_vstruct_transfer_from_segment(
731 upl_commit(upl
, NULL
, 0);
734 error
= KERN_FAILURE
;
736 vm_object_deallocate(transfer_object
);
740 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
741 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
742 vs
->vs_waiting_async
= FALSE
;
744 thread_wakeup(&vs
->vs_async_pending
);
753 while(backing_store_release_trigger_disable
!= 0) {
754 VSL_SLEEP(&backing_store_release_trigger_disable
,
758 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
759 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
760 (vs
!= next_vs
) && (vs_count
!= 1)) {
762 vs_async_wait(next_vs
); /* wait for any */
763 /* pending async writes */
764 next_vs
->vs_async_pending
+= 1; /* hold parties */
765 /* calling vs_async_wait */
770 vs
->vs_async_pending
-= 1;
771 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
772 vs
->vs_waiting_async
= FALSE
;
774 thread_wakeup(&vs
->vs_async_pending
);
778 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
789 default_pager_backing_store_delete(
790 MACH_PORT_FACE backing_store
)
796 int interim_pages_removed
= 0;
799 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
800 return KERN_INVALID_ARGUMENT
;
803 /* not implemented */
810 error
= KERN_SUCCESS
;
811 for (i
= 0; i
<= paging_segment_max
; i
++) {
812 ps
= paging_segments
[i
];
813 if (ps
!= PAGING_SEGMENT_NULL
&&
815 ! ps
->ps_going_away
) {
817 /* disable access to this segment */
818 ps
->ps_going_away
= TRUE
;
821 * The "ps" segment is "off-line" now,
822 * we can try and delete it...
824 if(dp_pages_free
< (cluster_transfer_minimum
826 error
= KERN_FAILURE
;
830 /* remove all pages associated with the */
831 /* segment from the list of free pages */
832 /* when transfer is through, all target */
833 /* segment pages will appear to be free */
835 dp_pages_free
-= ps
->ps_pgcount
;
836 interim_pages_removed
+= ps
->ps_pgcount
;
838 error
= ps_delete(ps
);
840 if (error
!= KERN_SUCCESS
) {
842 * We couldn't delete the segment,
843 * probably because there's not enough
844 * virtual memory left.
845 * Re-enable all the segments.
854 if (error
!= KERN_SUCCESS
) {
855 for (i
= 0; i
<= paging_segment_max
; i
++) {
856 ps
= paging_segments
[i
];
857 if (ps
!= PAGING_SEGMENT_NULL
&&
861 /* re-enable access to this segment */
862 ps
->ps_going_away
= FALSE
;
866 dp_pages_free
+= interim_pages_removed
;
872 for (i
= 0; i
<= paging_segment_max
; i
++) {
873 ps
= paging_segments
[i
];
874 if (ps
!= PAGING_SEGMENT_NULL
&&
876 if(ps
->ps_going_away
) {
877 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
878 paging_segment_count
--;
880 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
881 kfree(ps
, sizeof *ps
);
886 /* Scan the entire ps array separately to make certain we find the */
887 /* proper paging_segment_max */
888 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
889 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
890 paging_segment_max
= i
;
896 * All the segments have been deleted.
897 * We can remove the backing store.
901 * Disable lookups of this backing store.
903 if((void *)bs
->bs_port
->alias
!= NULL
)
904 kfree((void *) bs
->bs_port
->alias
,
905 sizeof (struct vstruct_alias
));
906 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
907 bs
->bs_port
= MACH_PORT_NULL
;
911 * Remove backing store from backing_store list.
914 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
919 * Free the backing store structure.
921 kfree(bs
, sizeof *bs
);
926 int ps_enter(paging_segment_t
); /* forward */
936 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
937 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
941 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
942 paging_segments
[i
] = ps
;
943 if (i
> paging_segment_max
)
944 paging_segment_max
= i
;
945 paging_segment_count
++;
946 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
947 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
948 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
952 return KERN_RESOURCE_SHORTAGE
;
961 default_pager_add_segment(
962 MACH_PORT_FACE backing_store
,
963 MACH_PORT_FACE device
,
973 if ((bs
= backing_store_lookup(backing_store
))
974 == BACKING_STORE_NULL
)
975 return KERN_INVALID_ARGUMENT
;
978 for (i
= 0; i
<= paging_segment_max
; i
++) {
979 ps
= paging_segments
[i
];
980 if (ps
== PAGING_SEGMENT_NULL
)
984 * Check for overlap on same device.
986 if (!(ps
->ps_device
!= device
987 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
988 || offset
+ count
<= ps
->ps_offset
)) {
991 return KERN_INVALID_ARGUMENT
;
997 * Set up the paging segment
999 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
1000 if (ps
== PAGING_SEGMENT_NULL
) {
1002 return KERN_RESOURCE_SHORTAGE
;
1005 ps
->ps_segtype
= PS_PARTITION
;
1006 ps
->ps_device
= device
;
1007 ps
->ps_offset
= offset
;
1008 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
1009 ps
->ps_recnum
= count
;
1010 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
1012 ps
->ps_pgcount
= ps
->ps_pgnum
;
1013 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
1014 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
1018 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
1020 kfree(ps
, sizeof *ps
);
1022 return KERN_RESOURCE_SHORTAGE
;
1024 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
1025 clrbit(ps
->ps_bmap
, i
);
1028 ps
->ps_going_away
= FALSE
;
1031 if ((error
= ps_enter(ps
)) != 0) {
1032 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
1033 kfree(ps
, sizeof *ps
);
1035 return KERN_RESOURCE_SHORTAGE
;
1038 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1039 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1043 dp_pages_free
+= ps
->ps_pgcount
;
1046 bs_more_space(ps
->ps_clcount
);
1048 DP_DEBUG(DEBUG_BS_INTERNAL
,
1049 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1050 device
, offset
, count
, record_size
,
1051 ps
->ps_record_shift
, ps
->ps_pgnum
));
1053 return KERN_SUCCESS
;
1059 MACH_PORT_FACE master
)
1061 security_token_t null_security_token
= {
1064 MACH_PORT_FACE device
;
1065 int info
[DEV_GET_SIZE_COUNT
];
1066 mach_msg_type_number_t info_count
;
1067 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1068 unsigned int rec_size
;
1071 MACH_PORT_FACE reply_port
;
1073 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1074 null_security_token
, dev_name
, &device
))
1077 info_count
= DEV_GET_SIZE_COUNT
;
1078 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1079 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1080 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1081 clsize
= bs_get_global_clsize(0);
1082 if (!default_pager_backing_store_create(
1083 default_pager_object
,
1084 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1085 (clsize
* vm_page_size
),
1087 if (!default_pager_add_segment(bs
, device
,
1088 0, count
, rec_size
)) {
1091 ipc_port_release_receive(bs
);
1095 ipc_port_release_send(device
);
1098 #endif /* DEVICE_PAGING */
1103 vs_alloc_async(void)
1105 struct vs_async
*vsa
;
1106 MACH_PORT_FACE reply_port
;
1107 // kern_return_t kr;
1110 if (vs_async_free_list
== NULL
) {
1112 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1115 * Try allocating a reply port named after the
1116 * address of the vs_async structure.
1118 struct vstruct_alias
*alias_struct
;
1120 reply_port
= ipc_port_alloc_kernel();
1121 alias_struct
= (struct vstruct_alias
*)
1122 kalloc(sizeof (struct vstruct_alias
));
1123 if(alias_struct
!= NULL
) {
1124 alias_struct
->vs
= (struct vstruct
*)vsa
;
1125 alias_struct
->name
= ISVS
;
1126 reply_port
->alias
= (int) alias_struct
;
1127 vsa
->reply_port
= reply_port
;
1128 vs_alloc_async_count
++;
1131 vs_alloc_async_failed
++;
1132 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1134 kfree(vsa
, sizeof (struct vs_async
));
1139 vsa
= vs_async_free_list
;
1140 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1149 struct vs_async
*vsa
)
1152 vsa
->vsa_next
= vs_async_free_list
;
1153 vs_async_free_list
= vsa
;
1157 #else /* VS_ASYNC_REUSE */
1160 vs_alloc_async(void)
1162 struct vs_async
*vsa
;
1163 MACH_PORT_FACE reply_port
;
1166 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1169 * Try allocating a reply port named after the
1170 * address of the vs_async structure.
1172 reply_port
= ipc_port_alloc_kernel();
1173 alias_struct
= (vstruct_alias
*)
1174 kalloc(sizeof (struct vstruct_alias
));
1175 if(alias_struct
!= NULL
) {
1176 alias_struct
->vs
= reply_port
;
1177 alias_struct
->name
= ISVS
;
1178 reply_port
->alias
= (int) vsa
;
1179 vsa
->reply_port
= reply_port
;
1180 vs_alloc_async_count
++;
1183 vs_alloc_async_failed
++;
1184 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1186 kfree(vsa
, sizeof (struct vs_async
));
1196 struct vs_async
*vsa
)
1198 MACH_PORT_FACE reply_port
;
1201 reply_port
= vsa
->reply_port
;
1202 kfree(reply_port
->alias
, sizeof (struct vstuct_alias
));
1203 kfree(vsa
, sizeof (struct vs_async
));
1204 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1207 vs_alloc_async_count
--;
1212 #endif /* VS_ASYNC_REUSE */
1214 zone_t vstruct_zone
;
1223 vs
= (vstruct_t
) zalloc(vstruct_zone
);
1224 if (vs
== VSTRUCT_NULL
) {
1225 return VSTRUCT_NULL
;
1231 * The following fields will be provided later.
1233 vs
->vs_mem_obj
= NULL
;
1234 vs
->vs_control
= MEMORY_OBJECT_CONTROL_NULL
;
1235 vs
->vs_references
= 1;
1239 vs
->vs_waiting_seqno
= FALSE
;
1240 vs
->vs_waiting_read
= FALSE
;
1241 vs
->vs_waiting_write
= FALSE
;
1242 vs
->vs_waiting_async
= FALSE
;
1244 mutex_init(&vs
->vs_waiting_seqno
, 0);
1245 mutex_init(&vs
->vs_waiting_read
, 0);
1246 mutex_init(&vs
->vs_waiting_write
, 0);
1247 mutex_init(&vs
->vs_waiting_refs
, 0);
1248 mutex_init(&vs
->vs_waiting_async
, 0);
1256 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1257 vs
->vs_size
= ((atop_32(round_page_32(size
)) - 1) >> vs
->vs_clshift
) + 1;
1258 vs
->vs_async_pending
= 0;
1261 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1262 * depending on the size of the memory object.
1264 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1265 vs
->vs_imap
= (struct vs_map
**)
1266 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1267 vs
->vs_indirect
= TRUE
;
1269 vs
->vs_dmap
= (struct vs_map
*)
1270 kalloc(CLMAP_SIZE(vs
->vs_size
));
1271 vs
->vs_indirect
= FALSE
;
1273 vs
->vs_xfer_pending
= FALSE
;
1274 DP_DEBUG(DEBUG_VS_INTERNAL
,
1275 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1278 * Check to see that we got the space.
1281 kfree(vs
, sizeof *vs
);
1282 return VSTRUCT_NULL
;
1286 * Zero the indirect pointers, or clear the direct pointers.
1288 if (vs
->vs_indirect
)
1289 memset(vs
->vs_imap
, 0,
1290 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1292 for (i
= 0; i
< vs
->vs_size
; i
++)
1293 VSM_CLR(vs
->vs_dmap
[i
]);
1295 VS_MAP_LOCK_INIT(vs
);
1297 bs_commit(vs
->vs_size
);
1302 paging_segment_t
ps_select_segment(unsigned int, int *); /* forward */
1309 paging_segment_t ps
;
1314 * Optimize case where there's only one segment.
1315 * paging_segment_max will index the one and only segment.
1319 if (paging_segment_count
== 1) {
1320 paging_segment_t lps
; /* used to avoid extra PS_UNLOCK */
1321 ipc_port_t trigger
= IP_NULL
;
1323 ps
= paging_segments
[paging_segment_max
];
1324 *psindex
= paging_segment_max
;
1326 if (ps
->ps_going_away
) {
1327 /* this segment is being turned off */
1328 lps
= PAGING_SEGMENT_NULL
;
1330 ASSERT(ps
->ps_clshift
>= shift
);
1331 if (ps
->ps_clcount
) {
1333 dp_pages_free
-= 1 << ps
->ps_clshift
;
1334 if(min_pages_trigger_port
&&
1335 (dp_pages_free
< minimum_pages_remaining
)) {
1336 trigger
= min_pages_trigger_port
;
1337 min_pages_trigger_port
= NULL
;
1342 lps
= PAGING_SEGMENT_NULL
;
1347 if (trigger
!= IP_NULL
) {
1348 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1349 ipc_port_release_send(trigger
);
1354 if (paging_segment_count
== 0) {
1356 return PAGING_SEGMENT_NULL
;
1360 i
>= BS_MINPRI
; i
--) {
1363 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1364 (ps_select_array
[i
] == BS_FULLPRI
))
1366 start_index
= ps_select_array
[i
];
1368 if(!(paging_segments
[start_index
])) {
1370 physical_transfer_cluster_count
= 0;
1372 else if ((physical_transfer_cluster_count
+1) == (ALLOC_STRIDE
>>
1373 (((paging_segments
[start_index
])->ps_clshift
)
1374 + vm_page_shift
))) {
1375 physical_transfer_cluster_count
= 0;
1376 j
= start_index
+ 1;
1378 physical_transfer_cluster_count
+=1;
1380 if(start_index
== 0)
1381 start_index
= paging_segment_max
;
1383 start_index
= start_index
- 1;
1387 if (j
> paging_segment_max
)
1389 if ((ps
= paging_segments
[j
]) &&
1390 (ps
->ps_bs
->bs_priority
== i
)) {
1392 * Force the ps cluster size to be
1393 * >= that of the vstruct.
1396 if (ps
->ps_going_away
) {
1397 /* this segment is being turned off */
1398 } else if ((ps
->ps_clcount
) &&
1399 (ps
->ps_clshift
>= shift
)) {
1400 ipc_port_t trigger
= IP_NULL
;
1403 dp_pages_free
-= 1 << ps
->ps_clshift
;
1404 if(min_pages_trigger_port
&&
1406 minimum_pages_remaining
)) {
1407 trigger
= min_pages_trigger_port
;
1408 min_pages_trigger_port
= NULL
;
1412 * found one, quit looking.
1414 ps_select_array
[i
] = j
;
1417 if (trigger
!= IP_NULL
) {
1418 default_pager_space_alert(
1421 ipc_port_release_send(trigger
);
1428 if (j
== start_index
) {
1430 * none at this priority -- mark it full
1432 ps_select_array
[i
] = BS_FULLPRI
;
1439 return PAGING_SEGMENT_NULL
;
1442 vm_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1445 ps_allocate_cluster(
1448 paging_segment_t use_ps
)
1450 unsigned int byte_num
;
1452 paging_segment_t ps
;
1453 vm_offset_t cluster
;
1454 ipc_port_t trigger
= IP_NULL
;
1457 * Find best paging segment.
1458 * ps_select_segment will decrement cluster count on ps.
1459 * Must pass cluster shift to find the most appropriate segment.
1461 /* NOTE: The addition of paging segment delete capability threatened
1462 * to seriously complicate the treatment of paging segments in this
1463 * module and the ones that call it (notably ps_clmap), because of the
1464 * difficulty in assuring that the paging segment would continue to
1465 * exist between being unlocked and locked. This was
1466 * avoided because all calls to this module are based in either
1467 * dp_memory_object calls which rely on the vs lock, or by
1468 * the transfer function which is part of the segment delete path.
1469 * The transfer function which is part of paging segment delete is
1470 * protected from multiple callers by the backing store lock.
1471 * The paging segment delete function treats mappings to a paging
1472 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1473 * while data is transferred to the remaining segments. This is in
1474 * line with the view that incomplete or in-transition mappings between
1475 * data, a vstruct, and backing store are protected by the vs lock.
1476 * This and the ordering of the paging segment "going_away" bit setting
1479 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1484 ASSERT(ps
->ps_clcount
!= 0);
1487 dp_pages_free
-= 1 << ps
->ps_clshift
;
1488 if(min_pages_trigger_port
&&
1489 (dp_pages_free
< minimum_pages_remaining
)) {
1490 trigger
= min_pages_trigger_port
;
1491 min_pages_trigger_port
= NULL
;
1495 if (trigger
!= IP_NULL
) {
1496 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1497 ipc_port_release_send(trigger
);
1500 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1501 PAGING_SEGMENT_NULL
) {
1502 static uint32_t lastnotify
= 0;
1503 uint32_t now
, nanoseconds_dummy
;
1506 * Emit a notification of the low-paging resource condition
1507 * but don't issue it more than once every five seconds. This
1508 * prevents us from overflowing logs with thousands of
1509 * repetitions of the message.
1511 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
1512 if (now
> lastnotify
+ 5) {
1513 dprintf(("no space in available paging segments\n"));
1517 /* the count got off maybe, reset to zero */
1520 if(min_pages_trigger_port
) {
1521 trigger
= min_pages_trigger_port
;
1522 min_pages_trigger_port
= NULL
;
1526 if (trigger
!= IP_NULL
) {
1527 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1528 ipc_port_release_send(trigger
);
1530 return (vm_offset_t
) -1;
1534 * Look for an available cluster. At the end of the loop,
1535 * byte_num is the byte offset and bit_num is the bit offset of the
1536 * first zero bit in the paging segment bitmap.
1539 byte_num
= ps
->ps_hint
;
1540 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1541 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1542 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1543 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1546 ASSERT(bit_num
!= NBBY
);
1550 ps
->ps_hint
= byte_num
;
1551 cluster
= (byte_num
*NBBY
) + bit_num
;
1553 /* Space was reserved, so this must be true */
1554 ASSERT(cluster
< ps
->ps_ncls
);
1556 setbit(ps
->ps_bmap
, cluster
);
1562 void ps_deallocate_cluster(paging_segment_t
, vm_offset_t
); /* forward */
1565 ps_deallocate_cluster(
1566 paging_segment_t ps
,
1567 vm_offset_t cluster
)
1570 if (cluster
>= (vm_offset_t
) ps
->ps_ncls
)
1571 panic("ps_deallocate_cluster: Invalid cluster number");
1574 * Lock the paging segment, clear the cluster's bitmap and increment the
1575 * number of free cluster.
1579 clrbit(ps
->ps_bmap
, cluster
);
1581 dp_pages_free
+= 1 << ps
->ps_clshift
;
1585 * Move the hint down to the freed cluster if it is
1586 * less than the current hint.
1588 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1589 ps
->ps_hint
= (cluster
/NBBY
);
1595 * If we're freeing space on a full priority, reset the array.
1598 if (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1599 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1605 void ps_dealloc_vsmap(struct vs_map
*, vm_size_t
); /* forward */
1609 struct vs_map
*vsmap
,
1613 for (i
= 0; i
< size
; i
++)
1614 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1615 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1616 VSM_CLOFF(vsmap
[i
]));
1629 * If this is an indirect structure, then we walk through the valid
1630 * (non-zero) indirect pointers and deallocate the clusters
1631 * associated with each used map entry (via ps_dealloc_vsmap).
1632 * When all of the clusters in an indirect block have been
1633 * freed, we deallocate the block. When all of the indirect
1634 * blocks have been deallocated we deallocate the memory
1635 * holding the indirect pointers.
1637 if (vs
->vs_indirect
) {
1638 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1639 if (vs
->vs_imap
[i
] != NULL
) {
1640 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1641 kfree(vs
->vs_imap
[i
], CLMAP_THRESHOLD
);
1644 kfree(vs
->vs_imap
, INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1647 * Direct map. Free used clusters, then memory.
1649 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1650 kfree(vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1654 bs_commit(- vs
->vs_size
);
1656 zfree(vstruct_zone
, vs
);
1659 int ps_map_extend(vstruct_t
, unsigned int); /* forward */
1663 unsigned int new_size
)
1665 struct vs_map
**new_imap
;
1666 struct vs_map
*new_dmap
= NULL
;
1669 void *old_map
= NULL
;
1670 int old_map_size
= 0;
1672 if (vs
->vs_size
>= new_size
) {
1674 * Someone has already done the work.
1680 * If the new size extends into the indirect range, then we have one
1681 * of two cases: we are going from indirect to indirect, or we are
1682 * going from direct to indirect. If we are going from indirect to
1683 * indirect, then it is possible that the new size will fit in the old
1684 * indirect map. If this is the case, then just reset the size of the
1685 * vstruct map and we are done. If the new size will not
1686 * fit into the old indirect map, then we have to allocate a new
1687 * indirect map and copy the old map pointers into this new map.
1689 * If we are going from direct to indirect, then we have to allocate a
1690 * new indirect map and copy the old direct pages into the first
1691 * indirect page of the new map.
1692 * NOTE: allocating memory here is dangerous, as we're in the
1695 if (INDIRECT_CLMAP(new_size
)) {
1696 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1699 * Get a new indirect map and zero it.
1701 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1702 if (vs
->vs_indirect
&&
1703 (new_map_size
== old_map_size
)) {
1704 bs_commit(new_size
- vs
->vs_size
);
1705 vs
->vs_size
= new_size
;
1709 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1710 if (new_imap
== NULL
) {
1713 memset(new_imap
, 0, new_map_size
);
1715 if (vs
->vs_indirect
) {
1716 /* Copy old entries into new map */
1717 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1718 /* Arrange to free the old map */
1719 old_map
= (void *) vs
->vs_imap
;
1721 } else { /* Old map was a direct map */
1722 /* Allocate an indirect page */
1723 if ((new_imap
[0] = (struct vs_map
*)
1724 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1725 kfree(new_imap
, new_map_size
);
1728 new_dmap
= new_imap
[0];
1729 newdsize
= CLMAP_ENTRIES
;
1733 newdsize
= new_size
;
1735 * If the new map is a direct map, then the old map must
1736 * also have been a direct map. All we have to do is
1737 * to allocate a new direct map, copy the old entries
1738 * into it and free the old map.
1740 if ((new_dmap
= (struct vs_map
*)
1741 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1747 /* Free the old map */
1748 old_map
= (void *) vs
->vs_dmap
;
1749 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1751 /* Copy info from the old map into the new map */
1752 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1754 /* Initialize the rest of the new map */
1755 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1756 VSM_CLR(new_dmap
[i
]);
1759 vs
->vs_imap
= new_imap
;
1760 vs
->vs_indirect
= TRUE
;
1762 vs
->vs_dmap
= new_dmap
;
1763 bs_commit(new_size
- vs
->vs_size
);
1764 vs
->vs_size
= new_size
;
1766 kfree(old_map
, old_map_size
);
1774 struct clmap
*clmap
,
1779 vm_offset_t cluster
; /* The cluster of offset. */
1780 vm_offset_t newcl
; /* The new cluster allocated. */
1783 struct vs_map
*vsmap
;
1787 ASSERT(vs
->vs_dmap
);
1788 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1791 * Initialize cluster error value
1793 clmap
->cl_error
= 0;
1796 * If the object has grown, extend the page map.
1798 if (cluster
>= vs
->vs_size
) {
1799 if (flag
== CL_FIND
) {
1800 /* Do not allocate if just doing a lookup */
1802 return (vm_offset_t
) -1;
1804 if (ps_map_extend(vs
, cluster
+ 1)) {
1806 return (vm_offset_t
) -1;
1811 * Look for the desired cluster. If the map is indirect, then we
1812 * have a two level lookup. First find the indirect block, then
1813 * find the actual cluster. If the indirect block has not yet
1814 * been allocated, then do so. If the cluster has not yet been
1815 * allocated, then do so.
1817 * If any of the allocations fail, then return an error.
1818 * Don't allocate if just doing a lookup.
1820 if (vs
->vs_indirect
) {
1821 long ind_block
= cluster
/CLMAP_ENTRIES
;
1823 /* Is the indirect block allocated? */
1824 vsmap
= vs
->vs_imap
[ind_block
];
1825 if (vsmap
== NULL
) {
1826 if (flag
== CL_FIND
) {
1828 return (vm_offset_t
) -1;
1831 /* Allocate the indirect block */
1832 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
1833 if (vsmap
== NULL
) {
1835 return (vm_offset_t
) -1;
1837 /* Initialize the cluster offsets */
1838 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
1840 vs
->vs_imap
[ind_block
] = vsmap
;
1843 vsmap
= vs
->vs_dmap
;
1846 vsmap
+= cluster%CLMAP_ENTRIES
;
1849 * At this point, vsmap points to the struct vs_map desired.
1851 * Look in the map for the cluster, if there was an error on a
1852 * previous write, flag it and return. If it is not yet
1853 * allocated, then allocate it, if we're writing; if we're
1854 * doing a lookup and the cluster's not allocated, return error.
1856 if (VSM_ISERR(*vsmap
)) {
1857 clmap
->cl_error
= VSM_GETERR(*vsmap
);
1859 return (vm_offset_t
) -1;
1860 } else if (VSM_ISCLR(*vsmap
)) {
1863 if (flag
== CL_FIND
) {
1865 * If there's an error and the entry is clear, then
1866 * we've run out of swap space. Record the error
1870 VSM_SETERR(*vsmap
, error
);
1873 return (vm_offset_t
) -1;
1876 * Attempt to allocate a cluster from the paging segment
1878 newcl
= ps_allocate_cluster(vs
, &psindex
,
1879 PAGING_SEGMENT_NULL
);
1880 if (newcl
== (vm_offset_t
) -1) {
1882 return (vm_offset_t
) -1;
1885 VSM_SETCLOFF(*vsmap
, newcl
);
1886 VSM_SETPS(*vsmap
, psindex
);
1889 newcl
= VSM_CLOFF(*vsmap
);
1892 * Fill in pertinent fields of the clmap
1894 clmap
->cl_ps
= VSM_PS(*vsmap
);
1895 clmap
->cl_numpages
= VSCLSIZE(vs
);
1896 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
1899 * Byte offset in paging segment is byte offset to cluster plus
1900 * byte offset within cluster. It looks ugly, but should be
1903 ASSERT(trunc_page(offset
) == offset
);
1904 newcl
= ptoa_32(newcl
) << vs
->vs_clshift
;
1905 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
1906 if (flag
== CL_ALLOC
) {
1908 * set bits in the allocation bitmap according to which
1909 * pages were requested. size is in bytes.
1911 i
= atop_32(newoff
);
1912 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
1913 VSM_SETALLOC(*vsmap
, i
);
1915 size
-= vm_page_size
;
1918 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
1921 * Offset is not cluster aligned, so number of pages
1922 * and bitmaps must be adjusted
1924 clmap
->cl_numpages
-= atop_32(newoff
);
1925 CLMAP_SHIFT(clmap
, vs
);
1926 CLMAP_SHIFTALLOC(clmap
, vs
);
1931 * The setting of valid bits and handling of write errors
1932 * must be done here, while we hold the lock on the map.
1933 * It logically should be done in ps_vs_write_complete().
1934 * The size and error information has been passed from
1935 * ps_vs_write_complete(). If the size parameter is non-zero,
1936 * then there is work to be done. If error is also non-zero,
1937 * then the error number is recorded in the cluster and the
1938 * entire cluster is in error.
1940 if (size
&& flag
== CL_FIND
) {
1941 vm_offset_t off
= (vm_offset_t
) 0;
1944 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
1946 VSM_SETPG(*vsmap
, i
);
1947 size
-= vm_page_size
;
1949 ASSERT(i
<= VSCLSIZE(vs
));
1951 BS_STAT(clmap
->cl_ps
->ps_bs
,
1952 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
1954 off
= VSM_CLOFF(*vsmap
);
1955 VSM_SETERR(*vsmap
, error
);
1958 * Deallocate cluster if error, and no valid pages
1961 if (off
!= (vm_offset_t
) 0)
1962 ps_deallocate_cluster(clmap
->cl_ps
, off
);
1964 return (vm_offset_t
) 0;
1968 DP_DEBUG(DEBUG_VS_INTERNAL
,
1969 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1970 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
1971 DP_DEBUG(DEBUG_VS_INTERNAL
,
1972 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1973 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
1974 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
1976 return (newcl
+ newoff
);
1979 void ps_clunmap(vstruct_t
, vm_offset_t
, vm_size_t
); /* forward */
1987 vm_offset_t cluster
; /* The cluster number of offset */
1988 struct vs_map
*vsmap
;
1993 * Loop through all clusters in this range, freeing paging segment
1994 * clusters and map entries as encountered.
1996 while (length
> 0) {
2000 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
2001 if (vs
->vs_indirect
) /* indirect map */
2002 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
2004 vsmap
= vs
->vs_dmap
;
2005 if (vsmap
== NULL
) {
2009 vsmap
+= cluster%CLMAP_ENTRIES
;
2010 if (VSM_ISCLR(*vsmap
)) {
2011 length
-= vm_page_size
;
2012 offset
+= vm_page_size
;
2016 * We've got a valid mapping. Clear it and deallocate
2017 * paging segment cluster pages.
2018 * Optimize for entire cluster cleraing.
2020 if ( (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) ) {
2022 * Not cluster aligned.
2024 ASSERT(trunc_page(newoff
) == newoff
);
2025 i
= atop_32(newoff
);
2028 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
2029 VSM_CLRPG(*vsmap
, i
);
2030 VSM_CLRALLOC(*vsmap
, i
);
2031 length
-= vm_page_size
;
2032 offset
+= vm_page_size
;
2037 * If map entry is empty, clear and deallocate cluster.
2039 if (!VSM_ALLOC(*vsmap
)) {
2040 ps_deallocate_cluster(VSM_PS(*vsmap
),
2049 void ps_vs_write_complete(vstruct_t
, vm_offset_t
, vm_size_t
, int); /* forward */
2052 ps_vs_write_complete(
2061 * Get the struct vsmap for this cluster.
2062 * Use READ, even though it was written, because the
2063 * cluster MUST be present, unless there was an error
2064 * in the original ps_clmap (e.g. no space), in which
2065 * case, nothing happens.
2067 * Must pass enough information to ps_clmap to allow it
2068 * to set the vs_map structure bitmap under lock.
2070 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2073 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, vm_offset_t
, vm_offset_t
, vm_size_t
, boolean_t
, int); /* forward */
2076 vs_cl_write_complete(
2078 __unused paging_segment_t ps
,
2080 __unused vm_offset_t addr
,
2085 // kern_return_t kr;
2089 * For internal objects, the error is recorded on a
2090 * per-cluster basis by ps_clmap() which is called
2091 * by ps_vs_write_complete() below.
2093 dprintf(("write failed error = 0x%x\n", error
));
2094 /* add upl_abort code here */
2096 GSTAT(global_stats
.gs_pages_out
+= atop_32(size
));
2098 * Notify the vstruct mapping code, so it can do its accounting.
2100 ps_vs_write_complete(vs
, offset
, size
, error
);
2104 ASSERT(vs
->vs_async_pending
> 0);
2105 vs
->vs_async_pending
-= size
;
2106 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
2107 vs
->vs_waiting_async
= FALSE
;
2109 /* mutex_unlock(&vs->vs_waiting_async); */
2110 thread_wakeup(&vs
->vs_async_pending
);
2117 #ifdef DEVICE_PAGING
2118 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2122 MACH_PORT_FACE reply_port
,
2123 kern_return_t device_code
,
2124 io_buf_len_t bytes_written
)
2126 struct vs_async
*vsa
;
2128 vsa
= (struct vs_async
*)
2129 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2131 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2132 device_code
= KERN_FAILURE
;
2135 vsa
->vsa_error
= device_code
;
2138 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2139 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2140 /* revisit when async disk segments redone */
2141 if(vsa
->vsa_error
) {
2142 /* need to consider error condition. re-write data or */
2143 /* throw it away here. */
2144 vm_map_copy_discard((vm_map_copy_t
)vsa
->vsa_addr
);
2146 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2147 vsa
->vsa_size
, vsa
->vsa_error
);
2149 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2150 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2155 return KERN_SUCCESS
;
2158 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2160 device_write_reply_inband(
2161 MACH_PORT_FACE reply_port
,
2162 kern_return_t return_code
,
2163 io_buf_len_t bytes_written
)
2165 panic("device_write_reply_inband: illegal");
2166 return KERN_SUCCESS
;
2169 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2172 MACH_PORT_FACE reply_port
,
2173 kern_return_t return_code
,
2175 mach_msg_type_number_t dataCnt
)
2177 struct vs_async
*vsa
;
2178 vsa
= (struct vs_async
*)
2179 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2180 vsa
->vsa_addr
= (vm_offset_t
)data
;
2181 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2182 vsa
->vsa_error
= return_code
;
2183 thread_wakeup(&vsa
->vsa_lock
);
2184 return KERN_SUCCESS
;
2187 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2189 device_read_reply_inband(
2190 MACH_PORT_FACE reply_port
,
2191 kern_return_t return_code
,
2192 io_buf_ptr_inband_t data
,
2193 mach_msg_type_number_t dataCnt
)
2195 panic("device_read_reply_inband: illegal");
2196 return KERN_SUCCESS
;
2199 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2201 device_read_reply_overwrite(
2202 MACH_PORT_FACE reply_port
,
2203 kern_return_t return_code
,
2204 io_buf_len_t bytes_read
)
2206 panic("device_read_reply_overwrite: illegal\n");
2207 return KERN_SUCCESS
;
2210 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2213 MACH_PORT_FACE reply_port
,
2214 kern_return_t return_code
,
2215 MACH_PORT_FACE device_port
)
2217 panic("device_open_reply: illegal\n");
2218 return KERN_SUCCESS
;
2223 paging_segment_t ps
,
2225 vm_offset_t
*bufferp
,
2227 unsigned int *residualp
,
2231 recnum_t dev_offset
;
2232 unsigned int bytes_wanted
;
2233 unsigned int bytes_read
;
2234 unsigned int total_read
;
2235 vm_offset_t dev_buffer
;
2236 vm_offset_t buf_ptr
;
2237 unsigned int records_read
;
2238 struct vs_async
*vsa
;
2239 mutex_t vs_waiting_read_reply
;
2242 vm_map_copy_t device_data
= NULL
;
2243 default_pager_thread_t
*dpt
= NULL
;
2245 device
= dev_port_lookup(ps
->ps_device
);
2246 clustered_reads
[atop_32(size
)]++;
2248 dev_offset
= (ps
->ps_offset
+
2249 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2250 bytes_wanted
= size
;
2252 *bufferp
= (vm_offset_t
)NULL
;
2255 vsa
= VS_ALLOC_ASYNC();
2259 vsa
->vsa_offset
= 0;
2263 mutex_init(&vsa
->vsa_lock
, 0);
2264 ip_lock(vsa
->reply_port
);
2265 vsa
->reply_port
->ip_sorights
++;
2266 ip_reference(vsa
->reply_port
);
2267 ip_unlock(vsa
->reply_port
);
2268 kr
= ds_device_read_common(device
,
2270 (mach_msg_type_name_t
)
2271 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2275 (IO_READ
| IO_CALL
),
2276 (io_buf_ptr_t
*) &dev_buffer
,
2277 (mach_msg_type_number_t
*) &bytes_read
);
2278 if(kr
== MIG_NO_REPLY
) {
2279 assert_wait(&vsa
->vsa_lock
, THREAD_UNINT
);
2280 thread_block(THREAD_CONTINUE_NULL
);
2282 dev_buffer
= vsa
->vsa_addr
;
2283 bytes_read
= (unsigned int)vsa
->vsa_size
;
2284 kr
= vsa
->vsa_error
;
2287 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2290 total_read
+= bytes_read
;
2293 * If we got the entire range, use the returned dev_buffer.
2295 if (bytes_read
== size
) {
2296 *bufferp
= (vm_offset_t
)dev_buffer
;
2301 dprintf(("read only %d bytes out of %d\n",
2302 bytes_read
, bytes_wanted
));
2305 dpt
= get_read_buffer();
2306 buf_ptr
= dpt
->dpt_buffer
;
2307 *bufferp
= (vm_offset_t
)buf_ptr
;
2310 * Otherwise, copy the data into the provided buffer (*bufferp)
2311 * and append the rest of the range as it comes in.
2313 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2314 buf_ptr
+= bytes_read
;
2315 bytes_wanted
-= bytes_read
;
2316 records_read
= (bytes_read
>>
2317 (vm_page_shift
- ps
->ps_record_shift
));
2318 dev_offset
+= records_read
;
2319 DP_DEBUG(DEBUG_VS_INTERNAL
,
2320 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2321 dev_buffer
, bytes_read
));
2322 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2324 Panic("dealloc buf");
2325 } while (bytes_wanted
);
2327 *residualp
= size
- total_read
;
2328 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2329 vm_offset_t temp_buffer
;
2330 vm_allocate(kernel_map
, &temp_buffer
, total_read
, VM_FLAGS_ANYWHERE
);
2331 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2332 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2333 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2334 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2335 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2336 (vm_map_copy_t
*)&device_data
, FALSE
))
2337 panic("ps_read_device: cannot copyin locally provided buffer\n");
2339 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2340 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2341 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2342 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2343 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2344 (vm_map_copy_t
*)&device_data
, FALSE
))
2345 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2350 *bufferp
= (vm_offset_t
)device_data
;
2353 /* Free the receive buffer */
2354 dpt
->checked_out
= 0;
2355 thread_wakeup(&dpt_array
);
2357 return KERN_SUCCESS
;
2362 paging_segment_t ps
,
2366 struct vs_async
*vsa
)
2368 recnum_t dev_offset
;
2369 io_buf_len_t bytes_to_write
, bytes_written
;
2370 recnum_t records_written
;
2372 MACH_PORT_FACE reply_port
;
2376 clustered_writes
[atop_32(size
)]++;
2378 dev_offset
= (ps
->ps_offset
+
2379 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2380 bytes_to_write
= size
;
2384 * Asynchronous write.
2386 reply_port
= vsa
->reply_port
;
2387 ip_lock(reply_port
);
2388 reply_port
->ip_sorights
++;
2389 ip_reference(reply_port
);
2390 ip_unlock(reply_port
);
2393 device
= dev_port_lookup(ps
->ps_device
);
2395 vsa
->vsa_addr
= addr
;
2396 kr
=ds_device_write_common(device
,
2398 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2401 (io_buf_ptr_t
) addr
,
2403 (IO_WRITE
| IO_CALL
),
2406 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2408 dprintf(("%s0x%x, addr=0x%x,"
2409 "size=0x%x,offset=0x%x\n",
2410 "device_write_request returned ",
2411 kr
, addr
, size
, offset
));
2413 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2414 /* do the completion notification to free resources */
2415 device_write_reply(reply_port
, kr
, 0);
2420 * Synchronous write.
2424 device
= dev_port_lookup(ps
->ps_device
);
2425 kr
=ds_device_write_common(device
,
2429 (io_buf_ptr_t
) addr
,
2431 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2434 if (kr
!= KERN_SUCCESS
) {
2435 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2436 "device_write returned ",
2437 kr
, addr
, size
, offset
));
2439 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2442 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2443 Panic("fragmented write");
2444 records_written
= (bytes_written
>>
2445 (vm_page_shift
- ps
->ps_record_shift
));
2446 dev_offset
+= records_written
;
2448 if (bytes_written
!= bytes_to_write
) {
2449 dprintf(("wrote only %d bytes out of %d\n",
2450 bytes_written
, bytes_to_write
));
2453 bytes_to_write
-= bytes_written
;
2454 addr
+= bytes_written
;
2455 } while (bytes_to_write
> 0);
2457 return PAGER_SUCCESS
;
2461 #else /* !DEVICE_PAGING */
2465 __unused paging_segment_t ps
,
2466 __unused vm_offset_t offset
,
2467 __unused vm_offset_t
*bufferp
,
2468 __unused
unsigned int size
,
2469 __unused
unsigned int *residualp
,
2472 panic("ps_read_device not supported");
2477 __unused paging_segment_t ps
,
2478 __unused vm_offset_t offset
,
2479 __unused vm_offset_t addr
,
2480 __unused
unsigned int size
,
2481 __unused
struct vs_async
*vsa
)
2483 panic("ps_write_device not supported");
2486 #endif /* DEVICE_PAGING */
2487 void pvs_object_data_provided(vstruct_t
, upl_t
, upl_offset_t
, upl_size_t
); /* forward */
2490 pvs_object_data_provided(
2491 __unused vstruct_t vs
,
2493 __unused upl_offset_t offset
,
2497 DP_DEBUG(DEBUG_VS_INTERNAL
,
2498 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2499 upl
, offset
, size
));
2502 GSTAT(global_stats
.gs_pages_in
+= atop_32(size
));
2506 ps_clunmap(vs
, offset
, size
);
2507 #endif /* USE_PRECIOUS */
2514 vm_offset_t vs_offset
,
2518 kern_return_t error
= KERN_SUCCESS
;
2521 unsigned int request_flags
;
2528 vm_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2529 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2532 pages_in_cl
= 1 << vs
->vs_clshift
;
2533 cl_size
= pages_in_cl
* vm_page_size
;
2534 cl_mask
= cl_size
- 1;
2537 * This loop will be executed multiple times until the entire
2538 * request has been satisfied... if the request spans cluster
2539 * boundaries, the clusters will be checked for logical continunity,
2540 * if contiguous the I/O request will span multiple clusters, otherwise
2541 * it will be broken up into the minimal set of I/O's
2543 * If there are holes in a request (either unallocated pages in a paging
2544 * segment or an unallocated paging segment), we stop
2545 * reading at the hole, inform the VM of any data read, inform
2546 * the VM of an unavailable range, then loop again, hoping to
2547 * find valid pages later in the requested range. This continues until
2548 * the entire range has been examined, and read, if present.
2552 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
| UPL_RET_ONLY_ABSENT
;
2554 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_RET_ONLY_ABSENT
;
2557 assert(dp_encryption_inited
);
2558 if (dp_encryption
) {
2561 * request that the UPL be prepared for
2564 request_flags
|= UPL_ENCRYPT
;
2567 while (cnt
&& (error
== KERN_SUCCESS
)) {
2569 int page_list_count
;
2571 if((vs_offset
& cl_mask
) &&
2572 (cnt
> (VM_SUPER_CLUSTER
-
2573 (vs_offset
& cl_mask
)))) {
2574 size
= VM_SUPER_CLUSTER
;
2575 size
-= vs_offset
& cl_mask
;
2576 } else if (cnt
> VM_SUPER_CLUSTER
) {
2577 size
= VM_SUPER_CLUSTER
;
2586 while (size
> 0 && error
== KERN_SUCCESS
) {
2591 vm_offset_t cur_offset
;
2594 if ( !ps_info_valid
) {
2595 ps_offset
[seg_index
] = ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2596 psp
[seg_index
] = CLMAP_PS(clmap
);
2600 * skip over unallocated physical segments
2602 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2603 abort_size
= cl_size
- (vs_offset
& cl_mask
);
2604 abort_size
= MIN(abort_size
, size
);
2606 page_list_count
= 0;
2607 memory_object_super_upl_request(
2609 (memory_object_offset_t
)vs_offset
,
2610 abort_size
, abort_size
,
2611 &upl
, NULL
, &page_list_count
,
2614 if (clmap
.cl_error
) {
2615 upl_abort(upl
, UPL_ABORT_ERROR
);
2617 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2619 upl_deallocate(upl
);
2622 vs_offset
+= abort_size
;
2628 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2630 for (abort_size
= 0; cl_index
< pages_in_cl
&& abort_size
< size
; cl_index
++) {
2632 * skip over unallocated pages
2634 if (CLMAP_ISSET(clmap
, cl_index
))
2636 abort_size
+= vm_page_size
;
2640 * Let VM system know about holes in clusters.
2642 GSTAT(global_stats
.gs_pages_unavail
+= atop_32(abort_size
));
2644 page_list_count
= 0;
2645 memory_object_super_upl_request(
2647 (memory_object_offset_t
)vs_offset
,
2648 abort_size
, abort_size
,
2649 &upl
, NULL
, &page_list_count
,
2652 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2653 upl_deallocate(upl
);
2656 vs_offset
+= abort_size
;
2658 if (cl_index
== pages_in_cl
) {
2660 * if we're at the end of this physical cluster
2661 * then bump to the next one and continue looking
2671 * remember the starting point of the first allocated page
2672 * for the I/O we're about to issue
2674 beg_pseg
= seg_index
;
2675 beg_indx
= cl_index
;
2676 cur_offset
= vs_offset
;
2679 * calculate the size of the I/O that we can do...
2680 * this may span multiple physical segments if
2681 * they are contiguous
2683 for (xfer_size
= 0; xfer_size
< size
; ) {
2685 while (cl_index
< pages_in_cl
2686 && xfer_size
< size
) {
2688 * accumulate allocated pages within
2689 * a physical segment
2691 if (CLMAP_ISSET(clmap
, cl_index
)) {
2692 xfer_size
+= vm_page_size
;
2693 cur_offset
+= vm_page_size
;
2696 BS_STAT(psp
[seg_index
]->ps_bs
,
2697 psp
[seg_index
]->ps_bs
->bs_pages_in
++);
2701 if (cl_index
< pages_in_cl
2702 || xfer_size
>= size
) {
2704 * we've hit an unallocated page or
2705 * the end of this request... go fire
2711 * we've hit the end of the current physical
2712 * segment and there's more to do, so try
2713 * moving to the next one
2717 ps_offset
[seg_index
] =
2719 cur_offset
& ~cl_mask
,
2720 &clmap
, CL_FIND
, 0, 0);
2721 psp
[seg_index
] = CLMAP_PS(clmap
);
2724 if ((ps_offset
[seg_index
- 1] != (ps_offset
[seg_index
] - cl_size
)) || (psp
[seg_index
- 1] != psp
[seg_index
])) {
2726 * if the physical segment we're about
2727 * to step into is not contiguous to
2728 * the one we're currently in, or it's
2729 * in a different paging file, or
2730 * it hasn't been allocated....
2731 * we stop here and generate the I/O
2736 * start with first page of the next physical
2743 * we have a contiguous range of allocated pages
2746 page_list_count
= 0;
2747 memory_object_super_upl_request(vs
->vs_control
,
2748 (memory_object_offset_t
)vs_offset
,
2749 xfer_size
, xfer_size
,
2750 &upl
, NULL
, &page_list_count
,
2751 request_flags
| UPL_SET_INTERNAL
);
2753 error
= ps_read_file(psp
[beg_pseg
],
2754 upl
, (upl_offset_t
) 0,
2755 ps_offset
[beg_pseg
] +
2756 (beg_indx
* vm_page_size
),
2757 xfer_size
, &residual
, 0);
2764 * Adjust counts and send response to VM. Optimize
2765 * for the common case, i.e. no error and/or partial
2766 * data. If there was an error, then we need to error
2767 * the entire range, even if some data was successfully
2768 * read. If there was a partial read we may supply some
2769 * data and may error some as well. In all cases the
2770 * VM must receive some notification for every page
2773 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2775 * Got everything we asked for, supply the data
2776 * to the VM. Note that as a side effect of
2777 * supplying the data, the buffer holding the
2778 * supplied data is deallocated from the pager's
2781 pvs_object_data_provided(
2782 vs
, upl
, vs_offset
, xfer_size
);
2784 failed_size
= xfer_size
;
2786 if (error
== KERN_SUCCESS
) {
2787 if (residual
== xfer_size
) {
2789 * If a read operation returns no error
2790 * and no data moved, we turn it into
2791 * an error, assuming we're reading at
2793 * Fall through and error the entire
2796 error
= KERN_FAILURE
;
2799 * Otherwise, we have partial read. If
2800 * the part read is a integral number
2801 * of pages supply it. Otherwise round
2802 * it up to a page boundary, zero fill
2803 * the unread part, and supply it.
2804 * Fall through and error the remainder
2805 * of the range, if any.
2811 lsize
= (xfer_size
- residual
)
2813 pvs_object_data_provided(
2817 if (lsize
< xfer_size
) {
2820 error
= KERN_FAILURE
;
2826 * If there was an error in any part of the range, tell
2827 * the VM. Note that error is explicitly checked again
2828 * since it can be modified above.
2830 if (error
!= KERN_SUCCESS
) {
2831 BS_STAT(psp
[beg_pseg
]->ps_bs
,
2832 psp
[beg_pseg
]->ps_bs
->bs_pages_in_fail
2833 += atop_32(failed_size
));
2836 vs_offset
+= xfer_size
;
2839 } /* END while (cnt && (error == 0)) */
2843 int vs_do_async_write
= 1;
2849 upl_offset_t offset
,
2851 boolean_t dp_internal
,
2854 upl_size_t transfer_size
;
2858 vm_offset_t actual_offset
; /* Offset within paging segment */
2859 paging_segment_t ps
;
2860 vm_offset_t mobj_base_addr
;
2861 vm_offset_t mobj_target_addr
;
2864 upl_page_info_t
*pl
;
2868 unsigned int cl_size
;
2870 unsigned int seg_size
;
2872 pages_in_cl
= 1 << vs
->vs_clshift
;
2873 cl_size
= pages_in_cl
* vm_page_size
;
2876 int page_list_count
;
2878 unsigned int super_size
;
2883 upl_offset_t upl_offset
;
2884 vm_offset_t seg_offset
;
2885 vm_offset_t ps_offset
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
2886 paging_segment_t psp
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
2890 super_size
= cl_size
;
2892 request_flags
= UPL_NOBLOCK
|
2893 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2894 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2896 super_size
= VM_SUPER_CLUSTER
;
2898 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
2899 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2900 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2903 if (!dp_encryption_inited
) {
2906 * Once we've started using swap, we
2907 * can't change our mind on whether
2908 * it needs to be encrypted or
2911 dp_encryption_inited
= TRUE
;
2913 if (dp_encryption
) {
2916 * request that the UPL be prepared for
2919 request_flags
|= UPL_ENCRYPT
;
2920 flags
|= UPL_PAGING_ENCRYPTED
;
2923 page_list_count
= 0;
2924 memory_object_super_upl_request(vs
->vs_control
,
2925 (memory_object_offset_t
)offset
,
2927 &upl
, NULL
, &page_list_count
,
2928 request_flags
| UPL_FOR_PAGEOUT
);
2930 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2932 seg_size
= cl_size
- (upl
->offset
% cl_size
);
2933 upl_offset
= upl
->offset
& ~(cl_size
- 1);
2935 for (seg_index
= 0, transfer_size
= upl
->size
;
2936 transfer_size
> 0; ) {
2937 ps_offset
[seg_index
] =
2943 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2945 upl_deallocate(upl
);
2947 return KERN_FAILURE
;
2950 psp
[seg_index
] = CLMAP_PS(clmap
);
2952 if (transfer_size
> seg_size
) {
2953 transfer_size
-= seg_size
;
2954 upl_offset
+= cl_size
;
2961 * Ignore any non-present pages at the end of the
2964 for (page_index
= upl
->size
/ vm_page_size
; page_index
> 0;)
2965 if (UPL_PAGE_PRESENT(pl
, --page_index
))
2967 num_of_pages
= page_index
+ 1;
2969 base_index
= (upl
->offset
% cl_size
) / PAGE_SIZE
;
2971 for (page_index
= 0; page_index
< num_of_pages
; ) {
2973 * skip over non-dirty pages
2975 for ( ; page_index
< num_of_pages
; page_index
++) {
2976 if (UPL_DIRTY_PAGE(pl
, page_index
)
2977 || UPL_PRECIOUS_PAGE(pl
, page_index
))
2979 * this is a page we need to write
2980 * go see if we can buddy it up with
2981 * others that are contiguous to it
2985 * if the page is not-dirty, but present we
2986 * need to commit it... This is an unusual
2987 * case since we only asked for dirty pages
2989 if (UPL_PAGE_PRESENT(pl
, page_index
)) {
2990 boolean_t empty
= FALSE
;
2991 upl_commit_range(upl
,
2992 page_index
* vm_page_size
,
2994 UPL_COMMIT_NOTIFY_EMPTY
,
2999 assert(page_index
==
3001 upl_deallocate(upl
);
3005 if (page_index
== num_of_pages
)
3007 * no more pages to look at, we're out of here
3012 * gather up contiguous dirty pages... we have at
3013 * least 1 * otherwise we would have bailed above
3014 * make sure that each physical segment that we step
3015 * into is contiguous to the one we're currently in
3016 * if it's not, we have to stop and write what we have
3018 for (first_dirty
= page_index
;
3019 page_index
< num_of_pages
; ) {
3020 if ( !UPL_DIRTY_PAGE(pl
, page_index
)
3021 && !UPL_PRECIOUS_PAGE(pl
, page_index
))
3025 * if we just looked at the last page in the UPL
3026 * we don't need to check for physical segment
3029 if (page_index
< num_of_pages
) {
3033 cur_seg
= (base_index
+ (page_index
- 1))/pages_in_cl
;
3034 nxt_seg
= (base_index
+ page_index
)/pages_in_cl
;
3036 if (cur_seg
!= nxt_seg
) {
3037 if ((ps_offset
[cur_seg
] != (ps_offset
[nxt_seg
] - cl_size
)) || (psp
[cur_seg
] != psp
[nxt_seg
]))
3039 * if the segment we're about
3040 * to step into is not
3041 * contiguous to the one we're
3042 * currently in, or it's in a
3043 * different paging file....
3044 * we stop here and generate
3051 num_dirty
= page_index
- first_dirty
;
3054 upl_offset
= first_dirty
* vm_page_size
;
3055 transfer_size
= num_dirty
* vm_page_size
;
3057 while (transfer_size
) {
3059 if ((seg_size
= cl_size
-
3060 ((upl
->offset
+ upl_offset
) % cl_size
))
3062 seg_size
= transfer_size
;
3064 ps_vs_write_complete(vs
,
3065 upl
->offset
+ upl_offset
,
3068 transfer_size
-= seg_size
;
3069 upl_offset
+= seg_size
;
3071 upl_offset
= first_dirty
* vm_page_size
;
3072 transfer_size
= num_dirty
* vm_page_size
;
3074 seg_index
= (base_index
+ first_dirty
) / pages_in_cl
;
3075 seg_offset
= (upl
->offset
+ upl_offset
) % cl_size
;
3077 error
= ps_write_file(psp
[seg_index
],
3079 ps_offset
[seg_index
]
3081 transfer_size
, flags
);
3083 boolean_t empty
= FALSE
;
3084 upl_abort_range(upl
,
3085 first_dirty
* vm_page_size
,
3086 num_dirty
* vm_page_size
,
3087 UPL_ABORT_NOTIFY_EMPTY
,
3090 assert(page_index
== num_of_pages
);
3091 upl_deallocate(upl
);
3097 assert(cnt
<= (vm_page_size
<< vs
->vs_clshift
));
3101 /* The caller provides a mapped_data which is derived */
3102 /* from a temporary object. The targeted pages are */
3103 /* guaranteed to be set at offset 0 in the mapped_data */
3104 /* The actual offset however must still be derived */
3105 /* from the offset in the vs in question */
3106 mobj_base_addr
= offset
;
3107 mobj_target_addr
= mobj_base_addr
;
3109 for (transfer_size
= list_size
; transfer_size
!= 0;) {
3110 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
3112 transfer_size
< cl_size
?
3113 transfer_size
: cl_size
, 0);
3114 if(actual_offset
== (vm_offset_t
) -1) {
3118 cnt
= MIN(transfer_size
,
3119 CLMAP_NPGS(clmap
) * vm_page_size
);
3120 ps
= CLMAP_PS(clmap
);
3121 /* Assume that the caller has given us contiguous */
3124 ps_vs_write_complete(vs
, mobj_target_addr
,
3126 error
= ps_write_file(ps
, internal_upl
,
3134 actual_offset
+= cnt
;
3135 mobj_target_addr
+= cnt
;
3136 transfer_size
-= cnt
;
3144 return KERN_FAILURE
;
3146 return KERN_SUCCESS
;
3150 ps_vstruct_allocated_size(
3154 struct vs_map
*vsmap
;
3155 unsigned int i
, j
, k
;
3158 if (vs
->vs_indirect
) {
3159 /* loop on indirect maps */
3160 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3161 vsmap
= vs
->vs_imap
[i
];
3164 /* loop on clusters in this indirect map */
3165 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3166 if (VSM_ISCLR(vsmap
[j
]) ||
3167 VSM_ISERR(vsmap
[j
]))
3169 /* loop on pages in this cluster */
3170 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3171 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3177 vsmap
= vs
->vs_dmap
;
3180 /* loop on clusters in the direct map */
3181 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3182 if (VSM_ISCLR(vsmap
[j
]) ||
3183 VSM_ISERR(vsmap
[j
]))
3185 /* loop on pages in this cluster */
3186 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3187 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3193 return ptoa_32(num_pages
);
3197 ps_vstruct_allocated_pages(
3199 default_pager_page_t
*pages
,
3202 unsigned int num_pages
;
3203 struct vs_map
*vsmap
;
3205 unsigned int i
, j
, k
;
3209 if (vs
->vs_indirect
) {
3210 /* loop on indirect maps */
3211 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3212 vsmap
= vs
->vs_imap
[i
];
3213 if (vsmap
== NULL
) {
3214 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
3218 /* loop on clusters in this indirect map */
3219 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3220 if (VSM_ISCLR(vsmap
[j
]) ||
3221 VSM_ISERR(vsmap
[j
])) {
3222 offset
+= vm_page_size
* VSCLSIZE(vs
);
3225 /* loop on pages in this cluster */
3226 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3227 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3229 if (num_pages
< pages_size
)
3230 pages
++->dpp_offset
=
3233 offset
+= vm_page_size
;
3238 vsmap
= vs
->vs_dmap
;
3241 /* loop on clusters in the direct map */
3242 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3243 if (VSM_ISCLR(vsmap
[j
]) ||
3244 VSM_ISERR(vsmap
[j
])) {
3245 offset
+= vm_page_size
* VSCLSIZE(vs
);
3248 /* loop on pages in this cluster */
3249 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3250 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3252 if (num_pages
< pages_size
)
3253 pages
++->dpp_offset
= offset
;
3255 offset
+= vm_page_size
;
3265 ps_vstruct_transfer_from_segment(
3267 paging_segment_t segment
,
3270 struct vs_map
*vsmap
;
3271 // struct vs_map old_vsmap;
3272 // struct vs_map new_vsmap;
3275 VS_LOCK(vs
); /* block all work on this vstruct */
3276 /* can't allow the normal multiple write */
3277 /* semantic because writes may conflict */
3278 vs
->vs_xfer_pending
= TRUE
;
3279 vs_wait_for_sync_writers(vs
);
3281 vs_wait_for_readers(vs
);
3282 /* we will unlock the vs to allow other writes while transferring */
3283 /* and will be guaranteed of the persistance of the vs struct */
3284 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3285 /* vs_async_pending */
3286 /* OK we now have guaranteed no other parties are accessing this */
3287 /* vs. Now that we are also supporting simple lock versions of */
3288 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3289 /* our purpose in holding it before was the multiple write case */
3290 /* we now use the boolean xfer_pending to do that. We can use */
3291 /* a boolean instead of a count because we have guaranteed single */
3292 /* file access to this code in its caller */
3295 if (vs
->vs_indirect
) {
3296 unsigned int vsmap_size
;
3298 /* loop on indirect maps */
3299 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3300 vsmap
= vs
->vs_imap
[i
];
3303 /* loop on clusters in this indirect map */
3304 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3306 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3307 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3309 vsmap_size
= CLMAP_ENTRIES
;
3310 for (j
= 0; j
< vsmap_size
; j
++) {
3311 if (VSM_ISCLR(vsmap
[j
]) ||
3312 VSM_ISERR(vsmap
[j
]) ||
3313 (VSM_PS(vsmap
[j
]) != segment
))
3315 if(vs_cluster_transfer(vs
,
3316 (vm_page_size
* (j
<< vs
->vs_clshift
))
3318 vm_page_size
<< vs
->vs_clshift
,
3322 vs
->vs_xfer_pending
= FALSE
;
3324 vs_finish_write(vs
);
3325 return KERN_FAILURE
;
3327 /* allow other readers/writers during transfer*/
3329 vs
->vs_xfer_pending
= FALSE
;
3331 vs_finish_write(vs
);
3333 vs
->vs_xfer_pending
= TRUE
;
3334 vs_wait_for_sync_writers(vs
);
3336 vs_wait_for_readers(vs
);
3338 if (!(vs
->vs_indirect
)) {
3344 vsmap
= vs
->vs_dmap
;
3345 if (vsmap
== NULL
) {
3347 vs
->vs_xfer_pending
= FALSE
;
3349 vs_finish_write(vs
);
3350 return KERN_SUCCESS
;
3352 /* loop on clusters in the direct map */
3353 for (j
= 0; j
< vs
->vs_size
; j
++) {
3354 if (VSM_ISCLR(vsmap
[j
]) ||
3355 VSM_ISERR(vsmap
[j
]) ||
3356 (VSM_PS(vsmap
[j
]) != segment
))
3358 if(vs_cluster_transfer(vs
,
3359 vm_page_size
* (j
<< vs
->vs_clshift
),
3360 vm_page_size
<< vs
->vs_clshift
,
3361 upl
) != KERN_SUCCESS
) {
3363 vs
->vs_xfer_pending
= FALSE
;
3365 vs_finish_write(vs
);
3366 return KERN_FAILURE
;
3368 /* allow other readers/writers during transfer*/
3370 vs
->vs_xfer_pending
= FALSE
;
3372 vs_finish_write(vs
);
3374 vs
->vs_xfer_pending
= TRUE
;
3376 vs_wait_for_sync_writers(vs
);
3378 vs_wait_for_readers(vs
);
3379 if (vs
->vs_indirect
) {
3386 vs
->vs_xfer_pending
= FALSE
;
3388 vs_finish_write(vs
);
3389 return KERN_SUCCESS
;
3399 struct vs_map
*vsmap
;
3400 vm_offset_t cluster
;
3402 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
3403 if (vs
->vs_indirect
) {
3404 long ind_block
= cluster
/CLMAP_ENTRIES
;
3406 /* Is the indirect block allocated? */
3407 vsmap
= vs
->vs_imap
[ind_block
];
3408 if(vsmap
== (vs_map_t
) NULL
)
3411 vsmap
= vs
->vs_dmap
;
3412 vsmap
+= cluster%CLMAP_ENTRIES
;
3417 vs_cluster_transfer(
3423 vm_offset_t actual_offset
;
3424 paging_segment_t ps
;
3426 kern_return_t error
= KERN_SUCCESS
;
3427 unsigned int size
, size_wanted
;
3429 unsigned int residual
;
3430 unsigned int unavail_size
;
3431 // default_pager_thread_t *dpt;
3432 // boolean_t dealloc;
3433 struct vs_map
*vsmap_ptr
= NULL
;
3434 struct vs_map read_vsmap
;
3435 struct vs_map original_read_vsmap
;
3436 struct vs_map write_vsmap
;
3438 // vm_offset_t ioaddr;
3440 /* vs_cluster_transfer reads in the pages of a cluster and
3441 * then writes these pages back to new backing store. The
3442 * segment the pages are being read from is assumed to have
3443 * been taken off-line and is no longer considered for new
3448 * This loop will be executed once per cluster referenced.
3449 * Typically this means once, since it's unlikely that the
3450 * VM system will ask for anything spanning cluster boundaries.
3452 * If there are holes in a cluster (in a paging segment), we stop
3453 * reading at the hole, then loop again, hoping to
3454 * find valid pages later in the cluster. This continues until
3455 * the entire range has been examined, and read, if present. The
3456 * pages are written as they are read. If a failure occurs after
3457 * some pages are written the unmap call at the bottom of the loop
3458 * recovers the backing store and the old backing store remains
3462 VSM_CLR(write_vsmap
);
3463 VSM_CLR(original_read_vsmap
);
3464 /* grab the actual object's pages to sync with I/O */
3465 while (cnt
&& (error
== KERN_SUCCESS
)) {
3466 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3467 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3469 if (actual_offset
== (vm_offset_t
) -1) {
3472 * Nothing left to write in this cluster at least
3473 * set write cluster information for any previous
3474 * write, clear for next cluster, if there is one
3476 unsigned int local_size
, clmask
, clsize
;
3478 clsize
= vm_page_size
<< vs
->vs_clshift
;
3479 clmask
= clsize
- 1;
3480 local_size
= clsize
- (offset
& clmask
);
3482 local_size
= MIN(local_size
, cnt
);
3484 /* This cluster has no data in it beyond what may */
3485 /* have been found on a previous iteration through */
3486 /* the loop "write_vsmap" */
3487 *vsmap_ptr
= write_vsmap
;
3488 VSM_CLR(write_vsmap
);
3489 VSM_CLR(original_read_vsmap
);
3492 offset
+= local_size
;
3497 * Count up contiguous available or unavailable
3500 ps
= CLMAP_PS(clmap
);
3505 (size
< cnt
) && (unavail_size
< cnt
) &&
3506 (i
< CLMAP_NPGS(clmap
)); i
++) {
3507 if (CLMAP_ISSET(clmap
, i
)) {
3508 if (unavail_size
!= 0)
3510 size
+= vm_page_size
;
3512 ps
->ps_bs
->bs_pages_in
++);
3516 unavail_size
+= vm_page_size
;
3521 ASSERT(unavail_size
);
3522 cnt
-= unavail_size
;
3523 offset
+= unavail_size
;
3524 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3526 /* There is no more to transfer in this
3529 *vsmap_ptr
= write_vsmap
;
3530 VSM_CLR(write_vsmap
);
3531 VSM_CLR(original_read_vsmap
);
3536 if(VSM_ISCLR(original_read_vsmap
))
3537 original_read_vsmap
= *vsmap_ptr
;
3539 if(ps
->ps_segtype
== PS_PARTITION
) {
3541 NEED TO ISSUE WITH SYNC & NO COMMIT
3542 error = ps_read_device(ps, actual_offset, &buffer,
3543 size, &residual, flags);
3546 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3547 error
= ps_read_file(ps
, upl
, (upl_offset_t
) 0, actual_offset
,
3549 (UPL_IOSYNC
| UPL_NOCOMMIT
));
3552 read_vsmap
= *vsmap_ptr
;
3556 * Adjust counts and put data in new BS. Optimize for the
3557 * common case, i.e. no error and/or partial data.
3558 * If there was an error, then we need to error the entire
3559 * range, even if some data was successfully read.
3562 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3565 * Got everything we asked for, supply the data to
3566 * the new BS. Note that as a side effect of supplying
3567 * the data, the buffer holding the supplied data is
3568 * deallocated from the pager's address space unless
3569 * the write is unsuccessful.
3572 /* note buffer will be cleaned up in all cases by */
3573 /* internal_cluster_write or if an error on write */
3574 /* the vm_map_copy_page_discard call */
3575 *vsmap_ptr
= write_vsmap
;
3577 if(vs_cluster_write(vs
, upl
, offset
,
3578 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3579 error
= KERN_FAILURE
;
3580 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3581 /* unmap the new backing store object */
3582 ps_clunmap(vs
, offset
, size
);
3584 /* original vsmap */
3585 *vsmap_ptr
= original_read_vsmap
;
3586 VSM_CLR(write_vsmap
);
3588 if((offset
+ size
) &
3589 ((vm_page_size
<< vs
->vs_clshift
)
3591 /* There is more to transfer in this
3594 write_vsmap
= *vsmap_ptr
;
3595 *vsmap_ptr
= read_vsmap
;
3597 /* discard the old backing object */
3598 write_vsmap
= *vsmap_ptr
;
3599 *vsmap_ptr
= read_vsmap
;
3600 ps_clunmap(vs
, offset
, size
);
3601 *vsmap_ptr
= write_vsmap
;
3602 VSM_CLR(write_vsmap
);
3603 VSM_CLR(original_read_vsmap
);
3608 if (error
== KERN_SUCCESS
) {
3609 if (residual
== size
) {
3611 * If a read operation returns no error
3612 * and no data moved, we turn it into
3613 * an error, assuming we're reading at
3615 * Fall through and error the entire
3618 error
= KERN_FAILURE
;
3619 *vsmap_ptr
= write_vsmap
;
3620 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3621 /* unmap the new backing store object */
3622 ps_clunmap(vs
, offset
, size
);
3624 *vsmap_ptr
= original_read_vsmap
;
3625 VSM_CLR(write_vsmap
);
3629 * Otherwise, we have partial read.
3630 * This is also considered an error
3631 * for the purposes of cluster transfer
3633 error
= KERN_FAILURE
;
3634 *vsmap_ptr
= write_vsmap
;
3635 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3636 /* unmap the new backing store object */
3637 ps_clunmap(vs
, offset
, size
);
3639 *vsmap_ptr
= original_read_vsmap
;
3640 VSM_CLR(write_vsmap
);
3649 } /* END while (cnt && (error == 0)) */
3650 if(!VSM_ISCLR(write_vsmap
))
3651 *vsmap_ptr
= write_vsmap
;
3657 default_pager_add_file(
3658 MACH_PORT_FACE backing_store
,
3664 paging_segment_t ps
;
3669 if ((bs
= backing_store_lookup(backing_store
))
3670 == BACKING_STORE_NULL
)
3671 return KERN_INVALID_ARGUMENT
;
3674 for (i
= 0; i
<= paging_segment_max
; i
++) {
3675 ps
= paging_segments
[i
];
3676 if (ps
== PAGING_SEGMENT_NULL
)
3678 if (ps
->ps_segtype
!= PS_FILE
)
3682 * Check for overlap on same device.
3684 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3687 return KERN_INVALID_ARGUMENT
;
3693 * Set up the paging segment
3695 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3696 if (ps
== PAGING_SEGMENT_NULL
) {
3698 return KERN_RESOURCE_SHORTAGE
;
3701 ps
->ps_segtype
= PS_FILE
;
3702 ps
->ps_vnode
= (struct vnode
*)vp
;
3704 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3705 ps
->ps_recnum
= size
;
3706 ps
->ps_pgnum
= size
>> ps
->ps_record_shift
;
3708 ps
->ps_pgcount
= ps
->ps_pgnum
;
3709 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3710 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3714 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3716 kfree(ps
, sizeof *ps
);
3718 return KERN_RESOURCE_SHORTAGE
;
3720 for (j
= 0; j
< ps
->ps_ncls
; j
++) {
3721 clrbit(ps
->ps_bmap
, j
);
3724 ps
->ps_going_away
= FALSE
;
3727 if ((error
= ps_enter(ps
)) != 0) {
3728 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
3729 kfree(ps
, sizeof *ps
);
3731 return KERN_RESOURCE_SHORTAGE
;
3734 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3735 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3737 dp_pages_free
+= ps
->ps_pgcount
;
3742 bs_more_space(ps
->ps_clcount
);
3744 DP_DEBUG(DEBUG_BS_INTERNAL
,
3745 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3746 device
, offset
, size
, record_size
,
3747 ps
->ps_record_shift
, ps
->ps_pgnum
));
3749 return KERN_SUCCESS
;
3756 paging_segment_t ps
,
3758 upl_offset_t upl_offset
,
3761 unsigned int *residualp
,
3764 vm_object_offset_t f_offset
;
3768 assert(dp_encryption_inited
);
3770 clustered_reads
[atop_32(size
)]++;
3772 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3774 /* for transfer case we need to pass uploffset and flags */
3775 error
= vnode_pagein(ps
->ps_vnode
,
3776 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
| UPL_NORDAHEAD
, NULL
);
3778 /* The vnode_pagein semantic is somewhat at odds with the existing */
3779 /* device_read semantic. Partial reads are not experienced at this */
3780 /* level. It is up to the bit map code and cluster read code to */
3781 /* check that requested data locations are actually backed, and the */
3782 /* pagein code to either read all of the requested data or return an */
3786 result
= KERN_FAILURE
;
3789 result
= KERN_SUCCESS
;
3796 paging_segment_t ps
,
3798 upl_offset_t upl_offset
,
3803 vm_object_offset_t f_offset
;
3804 kern_return_t result
;
3806 assert(dp_encryption_inited
);
3808 clustered_writes
[atop_32(size
)]++;
3809 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3811 if (flags
& UPL_PAGING_ENCRYPTED
) {
3814 * encrypt all the pages that we're going
3817 upl_encrypt(upl
, upl_offset
, size
);
3820 if (vnode_pageout(ps
->ps_vnode
,
3821 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
, NULL
))
3822 result
= KERN_FAILURE
;
3824 result
= KERN_SUCCESS
;
3830 default_pager_triggers( __unused MACH_PORT_FACE default_pager
,
3834 MACH_PORT_FACE trigger_port
)
3836 MACH_PORT_FACE release
;
3840 if (flags
== SWAP_ENCRYPT_ON
) {
3841 /* ENCRYPTED SWAP: turn encryption on */
3842 release
= trigger_port
;
3843 if (!dp_encryption_inited
) {
3844 dp_encryption_inited
= TRUE
;
3845 dp_encryption
= TRUE
;
3850 } else if (flags
== SWAP_ENCRYPT_OFF
) {
3851 /* ENCRYPTED SWAP: turn encryption off */
3852 release
= trigger_port
;
3853 if (!dp_encryption_inited
) {
3854 dp_encryption_inited
= TRUE
;
3855 dp_encryption
= FALSE
;
3860 } else if (flags
== HI_WAT_ALERT
) {
3861 release
= min_pages_trigger_port
;
3862 min_pages_trigger_port
= trigger_port
;
3863 minimum_pages_remaining
= hi_wat
/vm_page_size
;
3866 } else if (flags
== LO_WAT_ALERT
) {
3867 release
= max_pages_trigger_port
;
3868 max_pages_trigger_port
= trigger_port
;
3869 maximum_pages_free
= lo_wat
/vm_page_size
;
3872 release
= trigger_port
;
3873 kr
= KERN_INVALID_ARGUMENT
;
3877 if (IP_VALID(release
))
3878 ipc_port_release_send(release
);
3884 * Monitor the amount of available backing store vs. the amount of
3885 * required backing store, notify a listener (if present) when
3886 * backing store may safely be removed.
3888 * We attempt to avoid the situation where backing store is
3889 * discarded en masse, as this can lead to thrashing as the
3890 * backing store is compacted.
3893 #define PF_INTERVAL 3 /* time between free level checks */
3894 #define PF_LATENCY 10 /* number of intervals before release */
3896 static int dp_pages_free_low_count
= 0;
3897 thread_call_t default_pager_backing_store_monitor_callout
;
3900 default_pager_backing_store_monitor(__unused thread_call_param_t p1
,
3901 __unused thread_call_param_t p2
)
3903 // unsigned long long average;
3908 * We determine whether it will be safe to release some
3909 * backing store by watching the free page level. If
3910 * it remains below the maximum_pages_free threshold for
3911 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3912 * then we deem it safe.
3914 * Note that this establishes a maximum rate at which backing
3915 * store will be released, as each notification (currently)
3916 * only results in a single backing store object being
3919 if (dp_pages_free
> maximum_pages_free
) {
3920 dp_pages_free_low_count
++;
3922 dp_pages_free_low_count
= 0;
3925 /* decide whether to send notification */
3927 if (max_pages_trigger_port
&&
3928 (backing_store_release_trigger_disable
== 0) &&
3929 (dp_pages_free_low_count
> PF_LATENCY
)) {
3930 trigger
= max_pages_trigger_port
;
3931 max_pages_trigger_port
= NULL
;
3934 /* send notification */
3935 if (trigger
!= IP_NULL
) {
3937 if(backing_store_release_trigger_disable
!= 0) {
3938 assert_wait((event_t
)
3939 &backing_store_release_trigger_disable
,
3942 thread_block(THREAD_CONTINUE_NULL
);
3946 default_pager_space_alert(trigger
, LO_WAT_ALERT
);
3947 ipc_port_release_send(trigger
);
3948 dp_pages_free_low_count
= 0;
3951 clock_interval_to_deadline(PF_INTERVAL
, NSEC_PER_SEC
, &deadline
);
3952 thread_call_enter_delayed(default_pager_backing_store_monitor_callout
, deadline
);