2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
23 * @APPLE_LICENSE_HEADER_END@
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
31 * All Rights Reserved.
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
43 * Carnegie Mellon requests users of this software to return to
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
56 * Paging File Management.
59 #include <mach/memory_object_control.h>
60 #include <mach/memory_object_server.h>
61 #include "default_pager_internal.h"
62 #include <default_pager/default_pager_alerts.h>
63 #include <ipc/ipc_port.h>
64 #include <ipc/ipc_space.h>
65 #include <kern/queue.h>
66 #include <kern/counters.h>
67 #include <kern/sched_prim.h>
68 #include <vm/vm_kern.h>
69 #include <vm/vm_pageout.h>
71 #include <vm/vm_map.h>
74 * ALLOC_STRIDE... the maximum number of bytes allocated from
75 * a swap file before moving on to the next swap file... if
76 * all swap files reside on a single disk, this value should
77 * be very large (this is the default assumption)... if the
78 * swap files are spread across multiple disks, than this value
79 * should be small (128 * 1024)...
81 * This should be determined dynamically in the future
84 #define ALLOC_STRIDE (1024 * 1024 * 1024)
85 int physical_transfer_cluster_count
= 0;
87 #define VM_SUPER_CLUSTER 0x40000
88 #define VM_SUPER_PAGES 64
91 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
92 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
94 #define VSTRUCT_DEF_CLSHIFT 2
95 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
96 int default_pager_clsize
= 0;
99 unsigned int clustered_writes
[VM_SUPER_PAGES
+1];
100 unsigned int clustered_reads
[VM_SUPER_PAGES
+1];
103 * Globals used for asynchronous paging operations:
104 * vs_async_list: head of list of to-be-completed I/O ops
105 * async_num_queued: number of pages completed, but not yet
106 * processed by async thread.
107 * async_requests_out: number of pages of requests not completed.
111 struct vs_async
*vs_async_list
;
112 int async_num_queued
;
113 int async_requests_out
;
117 #define VS_ASYNC_REUSE 1
118 struct vs_async
*vs_async_free_list
;
120 mutex_t default_pager_async_lock
; /* Protects globals above */
123 int vs_alloc_async_failed
= 0; /* statistics */
124 int vs_alloc_async_count
= 0; /* statistics */
125 struct vs_async
*vs_alloc_async(void); /* forward */
126 void vs_free_async(struct vs_async
*vsa
); /* forward */
129 #define VS_ALLOC_ASYNC() vs_alloc_async()
130 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
132 #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
133 #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
134 #define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \
136 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
138 * Paging Space Hysteresis triggers and the target notification port
142 unsigned int minimum_pages_remaining
= 0;
143 unsigned int maximum_pages_free
= 0;
144 ipc_port_t min_pages_trigger_port
= NULL
;
145 ipc_port_t max_pages_trigger_port
= NULL
;
147 boolean_t bs_low
= FALSE
;
148 int backing_store_release_trigger_disable
= 0;
153 * Object sizes are rounded up to the next power of 2,
154 * unless they are bigger than a given maximum size.
156 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
159 * List of all backing store and segments.
161 struct backing_store_list_head backing_store_list
;
162 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
163 mutex_t paging_segments_lock
;
164 int paging_segment_max
= 0;
165 int paging_segment_count
= 0;
166 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
170 * Total pages free in system
171 * This differs from clusters committed/avail which is a measure of the
172 * over commitment of paging segments to backing store. An idea which is
173 * likely to be deprecated.
175 unsigned int dp_pages_free
= 0;
176 unsigned int cluster_transfer_minimum
= 100;
178 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, vm_offset_t
, vm_offset_t
, unsigned int, int); /* forward */
179 kern_return_t
ps_read_file (paging_segment_t
, upl_t
, vm_offset_t
, vm_offset_t
, unsigned int, unsigned int *, int); /* forward */
182 default_pager_thread_t
*
189 for (i
=0; i
<default_pager_internal_count
; i
++) {
190 if(dpt_array
[i
]->checked_out
== FALSE
) {
191 dpt_array
[i
]->checked_out
= TRUE
;
192 DPT_UNLOCK(dpt_lock
);
196 DPT_SLEEP(dpt_lock
, &dpt_array
, THREAD_UNINT
);
206 * List of all backing store.
209 queue_init(&backing_store_list
.bsl_queue
);
212 VS_ASYNC_LOCK_INIT();
214 vs_async_free_list
= NULL
;
215 #endif /* VS_ASYNC_REUSE */
217 for (i
= 0; i
< VM_SUPER_PAGES
+ 1; i
++) {
218 clustered_writes
[i
] = 0;
219 clustered_reads
[i
] = 0;
225 * When things do not quite workout...
227 void bs_no_paging_space(boolean_t
); /* forward */
231 boolean_t out_of_memory
)
235 dprintf(("*** OUT OF MEMORY ***\n"));
236 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
239 void bs_more_space(int); /* forward */
240 void bs_commit(int); /* forward */
242 boolean_t user_warned
= FALSE
;
243 unsigned int clusters_committed
= 0;
244 unsigned int clusters_available
= 0;
245 unsigned int clusters_committed_peak
= 0;
253 * Account for new paging space.
255 clusters_available
+= nclusters
;
257 if (clusters_available
>= clusters_committed
) {
258 if (verbose
&& user_warned
) {
259 printf("%s%s - %d excess clusters now.\n",
261 "paging space is OK now",
262 clusters_available
- clusters_committed
);
264 clusters_committed_peak
= 0;
267 if (verbose
&& user_warned
) {
268 printf("%s%s - still short of %d clusters.\n",
270 "WARNING: paging space over-committed",
271 clusters_committed
- clusters_available
);
272 clusters_committed_peak
-= nclusters
;
285 clusters_committed
+= nclusters
;
286 if (clusters_committed
> clusters_available
) {
287 if (verbose
&& !user_warned
) {
289 printf("%s%s - short of %d clusters.\n",
291 "WARNING: paging space over-committed",
292 clusters_committed
- clusters_available
);
294 if (clusters_committed
> clusters_committed_peak
) {
295 clusters_committed_peak
= clusters_committed
;
298 if (verbose
&& user_warned
) {
299 printf("%s%s - was short of up to %d clusters.\n",
301 "paging space is OK now",
302 clusters_committed_peak
- clusters_available
);
304 clusters_committed_peak
= 0;
312 int default_pager_info_verbose
= 1;
319 vm_size_t pages_total
, pages_free
;
324 pages_total
= pages_free
= 0;
325 for (i
= 0; i
<= paging_segment_max
; i
++) {
326 ps
= paging_segments
[i
];
327 if (ps
== PAGING_SEGMENT_NULL
)
331 * no need to lock: by the time this data
332 * gets back to any remote requestor it
333 * will be obsolete anyways
335 pages_total
+= ps
->ps_pgnum
;
336 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
337 DEBUG(DEBUG_BS_INTERNAL
,
338 ("segment #%d: %d total, %d free\n",
339 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
341 *totalp
= pages_total
;
343 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
344 if (clusters_available
< clusters_committed
) {
345 printf("%s %d clusters committed, %d available.\n",
354 backing_store_t
backing_store_alloc(void); /* forward */
357 backing_store_alloc(void)
361 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
362 if (bs
== BACKING_STORE_NULL
)
363 panic("backing_store_alloc: no memory");
366 bs
->bs_port
= MACH_PORT_NULL
;
369 bs
->bs_pages_total
= 0;
371 bs
->bs_pages_in_fail
= 0;
372 bs
->bs_pages_out
= 0;
373 bs
->bs_pages_out_fail
= 0;
378 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
380 /* Even in both the component space and external versions of this pager, */
381 /* backing_store_lookup will be called from tasks in the application space */
383 backing_store_lookup(
389 port is currently backed with a vs structure in the alias field
390 we could create an ISBS alias and a port_is_bs call but frankly
391 I see no reason for the test, the bs->port == port check below
392 will work properly on junk entries.
394 if ((port == MACH_PORT_NULL) || port_is_vs(port))
396 if ((port
== MACH_PORT_NULL
))
397 return BACKING_STORE_NULL
;
400 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
403 if (bs
->bs_port
== port
) {
405 /* Success, return it locked. */
411 return BACKING_STORE_NULL
;
414 void backing_store_add(backing_store_t
); /* forward */
420 MACH_PORT_FACE port
= bs
->bs_port
;
421 MACH_PORT_FACE pset
= default_pager_default_set
;
422 kern_return_t kr
= KERN_SUCCESS
;
424 if (kr
!= KERN_SUCCESS
)
425 panic("backing_store_add: add to set");
430 * Set up default page shift, but only if not already
431 * set and argument is within range.
434 bs_set_default_clsize(unsigned int npages
)
441 if (default_pager_clsize
== 0) /* if not yet set */
442 vstruct_def_clshift
= local_log2(npages
);
448 int bs_get_global_clsize(int clsize
); /* forward */
451 bs_get_global_clsize(
455 memory_object_default_t dmm
;
459 * Only allow setting of cluster size once. If called
460 * with no cluster size (default), we use the compiled-in default
461 * for the duration. The same cluster size is used for all
464 if (default_pager_clsize
== 0) {
466 * Keep cluster size in bit shift because it's quicker
467 * arithmetic, and easier to keep at a power of 2.
469 if (clsize
!= NO_CLSIZE
) {
470 for (i
= 0; (1 << i
) < clsize
; i
++);
471 if (i
> MAX_CLUSTER_SHIFT
)
472 i
= MAX_CLUSTER_SHIFT
;
473 vstruct_def_clshift
= i
;
475 default_pager_clsize
= (1 << vstruct_def_clshift
);
478 * Let the user know the new (and definitive) cluster size.
481 printf("%scluster size = %d page%s\n",
482 my_name
, default_pager_clsize
,
483 (default_pager_clsize
== 1) ? "" : "s");
486 * Let the kernel know too, in case it hasn't used the
487 * default value provided in main() yet.
489 dmm
= default_pager_object
;
490 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
491 kr
= host_default_memory_manager(host_priv_self(),
494 memory_object_default_deallocate(dmm
);
496 if (kr
!= KERN_SUCCESS
) {
497 panic("bs_get_global_cl_size:host_default_memory_manager");
499 if (dmm
!= default_pager_object
) {
500 panic("bs_get_global_cl_size:there is another default pager");
503 ASSERT(default_pager_clsize
> 0 &&
504 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
506 return default_pager_clsize
;
510 default_pager_backing_store_create(
511 memory_object_default_t pager
,
513 int clsize
, /* in bytes */
514 MACH_PORT_FACE
*backing_store
)
519 struct vstruct_alias
*alias_struct
;
521 if (pager
!= default_pager_object
)
522 return KERN_INVALID_ARGUMENT
;
524 bs
= backing_store_alloc();
525 port
= ipc_port_alloc_kernel();
526 ipc_port_make_send(port
);
527 assert (port
!= IP_NULL
);
529 DEBUG(DEBUG_BS_EXTERNAL
,
530 ("priority=%d clsize=%d bs_port=0x%x\n",
531 priority
, clsize
, (int) backing_store
));
533 alias_struct
= (struct vstruct_alias
*)
534 kalloc(sizeof (struct vstruct_alias
));
535 if(alias_struct
!= NULL
) {
536 alias_struct
->vs
= (struct vstruct
*)bs
;
537 alias_struct
->name
= ISVS
;
538 port
->alias
= (int) alias_struct
;
541 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
542 kfree((vm_offset_t
)bs
, sizeof (struct backing_store
));
543 return KERN_RESOURCE_SHORTAGE
;
547 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
548 priority
= BS_MAXPRI
;
549 else if (priority
== BS_NOPRI
)
550 priority
= BS_MAXPRI
;
552 priority
= BS_MINPRI
;
553 bs
->bs_priority
= priority
;
555 bs
->bs_clsize
= bs_get_global_clsize(atop_32(clsize
));
558 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
562 backing_store_add(bs
);
564 *backing_store
= port
;
569 default_pager_backing_store_info(
570 MACH_PORT_FACE backing_store
,
571 backing_store_flavor_t flavour
,
572 backing_store_info_t info
,
573 mach_msg_type_number_t
*size
)
576 backing_store_basic_info_t basic
;
580 if (flavour
!= BACKING_STORE_BASIC_INFO
||
581 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
582 return KERN_INVALID_ARGUMENT
;
584 basic
= (backing_store_basic_info_t
)info
;
585 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
587 VSTATS_LOCK(&global_stats
.gs_lock
);
588 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
589 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
590 basic
->pages_in
= global_stats
.gs_pages_in
;
591 basic
->pages_out
= global_stats
.gs_pages_out
;
592 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
593 basic
->pages_init
= global_stats
.gs_pages_init
;
594 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
595 VSTATS_UNLOCK(&global_stats
.gs_lock
);
597 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
598 return KERN_INVALID_ARGUMENT
;
600 basic
->bs_pages_total
= bs
->bs_pages_total
;
602 bs
->bs_pages_free
= 0;
603 for (i
= 0; i
<= paging_segment_max
; i
++) {
604 ps
= paging_segments
[i
];
605 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
607 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
612 basic
->bs_pages_free
= bs
->bs_pages_free
;
613 basic
->bs_pages_in
= bs
->bs_pages_in
;
614 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
615 basic
->bs_pages_out
= bs
->bs_pages_out
;
616 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
618 basic
->bs_priority
= bs
->bs_priority
;
619 basic
->bs_clsize
= ptoa_32(bs
->bs_clsize
); /* in bytes */
626 int ps_delete(paging_segment_t
); /* forward */
633 kern_return_t error
= KERN_SUCCESS
;
636 VSL_LOCK(); /* get the lock on the list of vs's */
638 /* The lock relationship and sequence is farily complicated */
639 /* this code looks at a live list, locking and unlocking the list */
640 /* as it traverses it. It depends on the locking behavior of */
641 /* default_pager_no_senders. no_senders always locks the vstruct */
642 /* targeted for removal before locking the vstruct list. However */
643 /* it will remove that member of the list without locking its */
644 /* neighbors. We can be sure when we hold a lock on a vstruct */
645 /* it cannot be removed from the list but we must hold the list */
646 /* lock to be sure that its pointers to its neighbors are valid. */
647 /* Also, we can hold off destruction of a vstruct when the list */
648 /* lock and the vs locks are not being held by bumping the */
649 /* vs_async_pending count. */
652 while(backing_store_release_trigger_disable
!= 0) {
653 VSL_SLEEP(&backing_store_release_trigger_disable
, THREAD_UNINT
);
656 /* we will choose instead to hold a send right */
657 vs_count
= vstruct_list
.vsl_count
;
658 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
659 if(vs
== (vstruct_t
)&vstruct_list
) {
664 vs_async_wait(vs
); /* wait for any pending async writes */
665 if ((vs_count
!= 0) && (vs
!= NULL
))
666 vs
->vs_async_pending
+= 1; /* hold parties calling */
670 while((vs_count
!= 0) && (vs
!= NULL
)) {
671 /* We take the count of AMO's before beginning the */
672 /* transfer of of the target segment. */
673 /* We are guaranteed that the target segment cannot get */
674 /* more users. We also know that queue entries are */
675 /* made at the back of the list. If some of the entries */
676 /* we would check disappear while we are traversing the */
677 /* list then we will either check new entries which */
678 /* do not have any backing store in the target segment */
679 /* or re-check old entries. This might not be optimal */
680 /* but it will always be correct. The alternative is to */
681 /* take a snapshot of the list. */
684 if(dp_pages_free
< cluster_transfer_minimum
)
685 error
= KERN_FAILURE
;
687 vm_object_t transfer_object
;
691 transfer_object
= vm_object_allocate(VM_SUPER_CLUSTER
);
693 error
= vm_object_upl_request(transfer_object
,
694 (vm_object_offset_t
)0, VM_SUPER_CLUSTER
,
696 UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
698 if(error
== KERN_SUCCESS
) {
699 error
= ps_vstruct_transfer_from_segment(
701 upl_commit(upl
, NULL
);
704 error
= KERN_FAILURE
;
706 vm_object_deallocate(transfer_object
);
710 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
711 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
712 vs
->vs_waiting_async
= FALSE
;
714 thread_wakeup(&vs
->vs_async_pending
);
723 while(backing_store_release_trigger_disable
!= 0) {
724 VSL_SLEEP(&backing_store_release_trigger_disable
,
728 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
729 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
730 (vs
!= next_vs
) && (vs_count
!= 1)) {
732 vs_async_wait(next_vs
); /* wait for any */
733 /* pending async writes */
734 next_vs
->vs_async_pending
+= 1; /* hold parties */
735 /* calling vs_async_wait */
740 vs
->vs_async_pending
-= 1;
741 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
742 vs
->vs_waiting_async
= FALSE
;
744 thread_wakeup(&vs
->vs_async_pending
);
748 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
759 default_pager_backing_store_delete(
760 MACH_PORT_FACE backing_store
)
766 int interim_pages_removed
= 0;
769 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
770 return KERN_INVALID_ARGUMENT
;
773 /* not implemented */
780 error
= KERN_SUCCESS
;
781 for (i
= 0; i
<= paging_segment_max
; i
++) {
782 ps
= paging_segments
[i
];
783 if (ps
!= PAGING_SEGMENT_NULL
&&
785 ! ps
->ps_going_away
) {
787 /* disable access to this segment */
788 ps
->ps_going_away
= TRUE
;
791 * The "ps" segment is "off-line" now,
792 * we can try and delete it...
794 if(dp_pages_free
< (cluster_transfer_minimum
796 error
= KERN_FAILURE
;
800 /* remove all pages associated with the */
801 /* segment from the list of free pages */
802 /* when transfer is through, all target */
803 /* segment pages will appear to be free */
805 dp_pages_free
-= ps
->ps_pgcount
;
806 interim_pages_removed
+= ps
->ps_pgcount
;
808 error
= ps_delete(ps
);
810 if (error
!= KERN_SUCCESS
) {
812 * We couldn't delete the segment,
813 * probably because there's not enough
814 * virtual memory left.
815 * Re-enable all the segments.
824 if (error
!= KERN_SUCCESS
) {
825 for (i
= 0; i
<= paging_segment_max
; i
++) {
826 ps
= paging_segments
[i
];
827 if (ps
!= PAGING_SEGMENT_NULL
&&
831 /* re-enable access to this segment */
832 ps
->ps_going_away
= FALSE
;
836 dp_pages_free
+= interim_pages_removed
;
842 for (i
= 0; i
<= paging_segment_max
; i
++) {
843 ps
= paging_segments
[i
];
844 if (ps
!= PAGING_SEGMENT_NULL
&&
846 if(ps
->ps_going_away
) {
847 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
848 paging_segment_count
--;
850 kfree((vm_offset_t
)ps
->ps_bmap
,
851 RMAPSIZE(ps
->ps_ncls
));
852 kfree((vm_offset_t
)ps
, sizeof *ps
);
857 /* Scan the entire ps array separately to make certain we find the */
858 /* proper paging_segment_max */
859 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
860 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
861 paging_segment_max
= i
;
867 * All the segments have been deleted.
868 * We can remove the backing store.
872 * Disable lookups of this backing store.
874 if((void *)bs
->bs_port
->alias
!= NULL
)
875 kfree((vm_offset_t
) bs
->bs_port
->alias
,
876 sizeof (struct vstruct_alias
));
877 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
878 bs
->bs_port
= MACH_PORT_NULL
;
882 * Remove backing store from backing_store list.
885 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
890 * Free the backing store structure.
892 kfree((vm_offset_t
)bs
, sizeof *bs
);
897 int ps_enter(paging_segment_t
); /* forward */
907 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
908 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
912 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
913 paging_segments
[i
] = ps
;
914 if (i
> paging_segment_max
)
915 paging_segment_max
= i
;
916 paging_segment_count
++;
917 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
918 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
919 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
923 return KERN_RESOURCE_SHORTAGE
;
932 default_pager_add_segment(
933 MACH_PORT_FACE backing_store
,
934 MACH_PORT_FACE device
,
944 if ((bs
= backing_store_lookup(backing_store
))
945 == BACKING_STORE_NULL
)
946 return KERN_INVALID_ARGUMENT
;
949 for (i
= 0; i
<= paging_segment_max
; i
++) {
950 ps
= paging_segments
[i
];
951 if (ps
== PAGING_SEGMENT_NULL
)
955 * Check for overlap on same device.
957 if (!(ps
->ps_device
!= device
958 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
959 || offset
+ count
<= ps
->ps_offset
)) {
962 return KERN_INVALID_ARGUMENT
;
968 * Set up the paging segment
970 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
971 if (ps
== PAGING_SEGMENT_NULL
) {
973 return KERN_RESOURCE_SHORTAGE
;
976 ps
->ps_segtype
= PS_PARTITION
;
977 ps
->ps_device
= device
;
978 ps
->ps_offset
= offset
;
979 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
980 ps
->ps_recnum
= count
;
981 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
983 ps
->ps_pgcount
= ps
->ps_pgnum
;
984 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
985 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
989 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
991 kfree((vm_offset_t
)ps
, sizeof *ps
);
993 return KERN_RESOURCE_SHORTAGE
;
995 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
996 clrbit(ps
->ps_bmap
, i
);
999 ps
->ps_going_away
= FALSE
;
1002 if ((error
= ps_enter(ps
)) != 0) {
1003 kfree((vm_offset_t
)ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
1004 kfree((vm_offset_t
)ps
, sizeof *ps
);
1006 return KERN_RESOURCE_SHORTAGE
;
1009 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1010 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1014 dp_pages_free
+= ps
->ps_pgcount
;
1017 bs_more_space(ps
->ps_clcount
);
1019 DEBUG(DEBUG_BS_INTERNAL
,
1020 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1021 device
, offset
, count
, record_size
,
1022 ps
->ps_record_shift
, ps
->ps_pgnum
));
1024 return KERN_SUCCESS
;
1030 MACH_PORT_FACE master
)
1032 security_token_t null_security_token
= {
1035 MACH_PORT_FACE device
;
1036 int info
[DEV_GET_SIZE_COUNT
];
1037 mach_msg_type_number_t info_count
;
1038 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1039 unsigned int rec_size
;
1042 MACH_PORT_FACE reply_port
;
1044 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1045 null_security_token
, dev_name
, &device
))
1048 info_count
= DEV_GET_SIZE_COUNT
;
1049 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1050 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1051 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1052 clsize
= bs_get_global_clsize(0);
1053 if (!default_pager_backing_store_create(
1054 default_pager_object
,
1055 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1056 (clsize
* vm_page_size
),
1058 if (!default_pager_add_segment(bs
, device
,
1059 0, count
, rec_size
)) {
1062 ipc_port_release_receive(bs
);
1066 ipc_port_release_send(device
);
1069 #endif /* DEVICE_PAGING */
1074 vs_alloc_async(void)
1076 struct vs_async
*vsa
;
1077 MACH_PORT_FACE reply_port
;
1081 if (vs_async_free_list
== NULL
) {
1083 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1086 * Try allocating a reply port named after the
1087 * address of the vs_async structure.
1089 struct vstruct_alias
*alias_struct
;
1091 reply_port
= ipc_port_alloc_kernel();
1092 alias_struct
= (struct vstruct_alias
*)
1093 kalloc(sizeof (struct vstruct_alias
));
1094 if(alias_struct
!= NULL
) {
1095 alias_struct
->vs
= (struct vstruct
*)vsa
;
1096 alias_struct
->name
= ISVS
;
1097 reply_port
->alias
= (int) alias_struct
;
1098 vsa
->reply_port
= reply_port
;
1099 vs_alloc_async_count
++;
1102 vs_alloc_async_failed
++;
1103 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1105 kfree((vm_offset_t
)vsa
,
1106 sizeof (struct vs_async
));
1111 vsa
= vs_async_free_list
;
1112 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1121 struct vs_async
*vsa
)
1124 vsa
->vsa_next
= vs_async_free_list
;
1125 vs_async_free_list
= vsa
;
1129 #else /* VS_ASYNC_REUSE */
1132 vs_alloc_async(void)
1134 struct vs_async
*vsa
;
1135 MACH_PORT_FACE reply_port
;
1138 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1141 * Try allocating a reply port named after the
1142 * address of the vs_async structure.
1144 reply_port
= ipc_port_alloc_kernel();
1145 alias_struct
= (vstruct_alias
*)
1146 kalloc(sizeof (struct vstruct_alias
));
1147 if(alias_struct
!= NULL
) {
1148 alias_struct
->vs
= reply_port
;
1149 alias_struct
->name
= ISVS
;
1150 reply_port
->alias
= (int) vsa
;
1151 vsa
->reply_port
= reply_port
;
1152 vs_alloc_async_count
++;
1155 vs_alloc_async_failed
++;
1156 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1158 kfree((vm_offset_t
) vsa
,
1159 sizeof (struct vs_async
));
1169 struct vs_async
*vsa
)
1171 MACH_PORT_FACE reply_port
;
1174 reply_port
= vsa
->reply_port
;
1175 kfree((vm_offset_t
) reply_port
->alias
, sizeof (struct vstuct_alias
));
1176 kfree((vm_offset_t
) vsa
, sizeof (struct vs_async
));
1177 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1180 vs_alloc_async_count
--;
1185 #endif /* VS_ASYNC_REUSE */
1187 zone_t vstruct_zone
;
1196 vs
= (vstruct_t
) zalloc(vstruct_zone
);
1197 if (vs
== VSTRUCT_NULL
) {
1198 return VSTRUCT_NULL
;
1204 * The following fields will be provided later.
1206 vs
->vs_mem_obj
= NULL
;
1207 vs
->vs_control
= MEMORY_OBJECT_CONTROL_NULL
;
1208 vs
->vs_references
= 1;
1212 vs
->vs_waiting_seqno
= FALSE
;
1213 vs
->vs_waiting_read
= FALSE
;
1214 vs
->vs_waiting_write
= FALSE
;
1215 vs
->vs_waiting_async
= FALSE
;
1217 mutex_init(&vs
->vs_waiting_seqno
, ETAP_DPAGE_VSSEQNO
);
1218 mutex_init(&vs
->vs_waiting_read
, ETAP_DPAGE_VSREAD
);
1219 mutex_init(&vs
->vs_waiting_write
, ETAP_DPAGE_VSWRITE
);
1220 mutex_init(&vs
->vs_waiting_refs
, ETAP_DPAGE_VSREFS
);
1221 mutex_init(&vs
->vs_waiting_async
, ETAP_DPAGE_VSASYNC
);
1229 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1230 vs
->vs_size
= ((atop_32(round_page_32(size
)) - 1) >> vs
->vs_clshift
) + 1;
1231 vs
->vs_async_pending
= 0;
1234 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1235 * depending on the size of the memory object.
1237 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1238 vs
->vs_imap
= (struct vs_map
**)
1239 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1240 vs
->vs_indirect
= TRUE
;
1242 vs
->vs_dmap
= (struct vs_map
*)
1243 kalloc(CLMAP_SIZE(vs
->vs_size
));
1244 vs
->vs_indirect
= FALSE
;
1246 vs
->vs_xfer_pending
= FALSE
;
1247 DEBUG(DEBUG_VS_INTERNAL
,
1248 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1251 * Check to see that we got the space.
1254 kfree((vm_offset_t
)vs
, sizeof *vs
);
1255 return VSTRUCT_NULL
;
1259 * Zero the indirect pointers, or clear the direct pointers.
1261 if (vs
->vs_indirect
)
1262 memset(vs
->vs_imap
, 0,
1263 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1265 for (i
= 0; i
< vs
->vs_size
; i
++)
1266 VSM_CLR(vs
->vs_dmap
[i
]);
1268 VS_MAP_LOCK_INIT(vs
);
1270 bs_commit(vs
->vs_size
);
1275 paging_segment_t
ps_select_segment(int, int *); /* forward */
1282 paging_segment_t ps
;
1287 * Optimize case where there's only one segment.
1288 * paging_segment_max will index the one and only segment.
1292 if (paging_segment_count
== 1) {
1293 paging_segment_t lps
; /* used to avoid extra PS_UNLOCK */
1294 ipc_port_t trigger
= IP_NULL
;
1296 ps
= paging_segments
[paging_segment_max
];
1297 *psindex
= paging_segment_max
;
1299 if (ps
->ps_going_away
) {
1300 /* this segment is being turned off */
1301 lps
= PAGING_SEGMENT_NULL
;
1303 ASSERT(ps
->ps_clshift
>= shift
);
1304 if (ps
->ps_clcount
) {
1306 dp_pages_free
-= 1 << ps
->ps_clshift
;
1307 if(min_pages_trigger_port
&&
1308 (dp_pages_free
< minimum_pages_remaining
)) {
1309 trigger
= min_pages_trigger_port
;
1310 min_pages_trigger_port
= NULL
;
1315 lps
= PAGING_SEGMENT_NULL
;
1320 if (trigger
!= IP_NULL
) {
1321 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1322 ipc_port_release_send(trigger
);
1327 if (paging_segment_count
== 0) {
1329 return PAGING_SEGMENT_NULL
;
1333 i
>= BS_MINPRI
; i
--) {
1336 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1337 (ps_select_array
[i
] == BS_FULLPRI
))
1339 start_index
= ps_select_array
[i
];
1341 if(!(paging_segments
[start_index
])) {
1343 physical_transfer_cluster_count
= 0;
1345 else if ((physical_transfer_cluster_count
+1) == (ALLOC_STRIDE
>>
1346 (((paging_segments
[start_index
])->ps_clshift
)
1347 + vm_page_shift
))) {
1348 physical_transfer_cluster_count
= 0;
1349 j
= start_index
+ 1;
1351 physical_transfer_cluster_count
+=1;
1353 if(start_index
== 0)
1354 start_index
= paging_segment_max
;
1356 start_index
= start_index
- 1;
1360 if (j
> paging_segment_max
)
1362 if ((ps
= paging_segments
[j
]) &&
1363 (ps
->ps_bs
->bs_priority
== i
)) {
1365 * Force the ps cluster size to be
1366 * >= that of the vstruct.
1369 if (ps
->ps_going_away
) {
1370 /* this segment is being turned off */
1371 } else if ((ps
->ps_clcount
) &&
1372 (ps
->ps_clshift
>= shift
)) {
1373 ipc_port_t trigger
= IP_NULL
;
1376 dp_pages_free
-= 1 << ps
->ps_clshift
;
1377 if(min_pages_trigger_port
&&
1379 minimum_pages_remaining
)) {
1380 trigger
= min_pages_trigger_port
;
1381 min_pages_trigger_port
= NULL
;
1385 * found one, quit looking.
1387 ps_select_array
[i
] = j
;
1390 if (trigger
!= IP_NULL
) {
1391 default_pager_space_alert(
1394 ipc_port_release_send(trigger
);
1401 if (j
== start_index
) {
1403 * none at this priority -- mark it full
1405 ps_select_array
[i
] = BS_FULLPRI
;
1412 return PAGING_SEGMENT_NULL
;
1415 vm_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1418 ps_allocate_cluster(
1421 paging_segment_t use_ps
)
1425 paging_segment_t ps
;
1426 vm_offset_t cluster
;
1427 ipc_port_t trigger
= IP_NULL
;
1430 * Find best paging segment.
1431 * ps_select_segment will decrement cluster count on ps.
1432 * Must pass cluster shift to find the most appropriate segment.
1434 /* NOTE: The addition of paging segment delete capability threatened
1435 * to seriously complicate the treatment of paging segments in this
1436 * module and the ones that call it (notably ps_clmap), because of the
1437 * difficulty in assuring that the paging segment would continue to
1438 * exist between being unlocked and locked. This was
1439 * avoided because all calls to this module are based in either
1440 * dp_memory_object calls which rely on the vs lock, or by
1441 * the transfer function which is part of the segment delete path.
1442 * The transfer function which is part of paging segment delete is
1443 * protected from multiple callers by the backing store lock.
1444 * The paging segment delete function treats mappings to a paging
1445 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1446 * while data is transferred to the remaining segments. This is in
1447 * line with the view that incomplete or in-transition mappings between
1448 * data, a vstruct, and backing store are protected by the vs lock.
1449 * This and the ordering of the paging segment "going_away" bit setting
1452 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1457 dp_pages_free
-= 1 << ps
->ps_clshift
;
1458 if(min_pages_trigger_port
&&
1459 (dp_pages_free
< minimum_pages_remaining
)) {
1460 trigger
= min_pages_trigger_port
;
1461 min_pages_trigger_port
= NULL
;
1465 if (trigger
!= IP_NULL
) {
1466 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1467 ipc_port_release_send(trigger
);
1470 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1471 PAGING_SEGMENT_NULL
) {
1473 bs_no_paging_space(TRUE
);
1478 dprintf(("no space in available paging segments; "
1479 "swapon suggested\n"));
1480 /* the count got off maybe, reset to zero */
1483 if(min_pages_trigger_port
) {
1484 trigger
= min_pages_trigger_port
;
1485 min_pages_trigger_port
= NULL
;
1489 if (trigger
!= IP_NULL
) {
1490 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1491 ipc_port_release_send(trigger
);
1493 return (vm_offset_t
) -1;
1495 ASSERT(ps
->ps_clcount
!= 0);
1498 * Look for an available cluster. At the end of the loop,
1499 * byte_num is the byte offset and bit_num is the bit offset of the
1500 * first zero bit in the paging segment bitmap.
1503 byte_num
= ps
->ps_hint
;
1504 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1505 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1506 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1507 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1510 ASSERT(bit_num
!= NBBY
);
1514 ps
->ps_hint
= byte_num
;
1515 cluster
= (byte_num
*NBBY
) + bit_num
;
1517 /* Space was reserved, so this must be true */
1518 ASSERT(cluster
< ps
->ps_ncls
);
1520 setbit(ps
->ps_bmap
, cluster
);
1526 void ps_deallocate_cluster(paging_segment_t
, vm_offset_t
); /* forward */
1529 ps_deallocate_cluster(
1530 paging_segment_t ps
,
1531 vm_offset_t cluster
)
1533 ipc_port_t trigger
= IP_NULL
;
1535 if (cluster
>= (vm_offset_t
) ps
->ps_ncls
)
1536 panic("ps_deallocate_cluster: Invalid cluster number");
1539 * Lock the paging segment, clear the cluster's bitmap and increment the
1540 * number of free cluster.
1544 clrbit(ps
->ps_bmap
, cluster
);
1546 dp_pages_free
+= 1 << ps
->ps_clshift
;
1547 if(max_pages_trigger_port
1548 && (backing_store_release_trigger_disable
== 0)
1549 && (dp_pages_free
> maximum_pages_free
)) {
1550 trigger
= max_pages_trigger_port
;
1551 max_pages_trigger_port
= NULL
;
1556 * Move the hint down to the freed cluster if it is
1557 * less than the current hint.
1559 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1560 ps
->ps_hint
= (cluster
/NBBY
);
1566 * If we're freeing space on a full priority, reset the array.
1569 if (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1570 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1573 if (trigger
!= IP_NULL
) {
1575 if(backing_store_release_trigger_disable
!= 0) {
1576 assert_wait((event_t
)
1577 &backing_store_release_trigger_disable
,
1580 thread_block(THREAD_CONTINUE_NULL
);
1584 default_pager_space_alert(trigger
, LO_WAT_ALERT
);
1585 ipc_port_release_send(trigger
);
1591 void ps_dealloc_vsmap(struct vs_map
*, vm_size_t
); /* forward */
1595 struct vs_map
*vsmap
,
1599 for (i
= 0; i
< size
; i
++)
1600 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1601 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1602 VSM_CLOFF(vsmap
[i
]));
1615 * If this is an indirect structure, then we walk through the valid
1616 * (non-zero) indirect pointers and deallocate the clusters
1617 * associated with each used map entry (via ps_dealloc_vsmap).
1618 * When all of the clusters in an indirect block have been
1619 * freed, we deallocate the block. When all of the indirect
1620 * blocks have been deallocated we deallocate the memory
1621 * holding the indirect pointers.
1623 if (vs
->vs_indirect
) {
1624 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1625 if (vs
->vs_imap
[i
] != NULL
) {
1626 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1627 kfree((vm_offset_t
)vs
->vs_imap
[i
],
1631 kfree((vm_offset_t
)vs
->vs_imap
,
1632 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1635 * Direct map. Free used clusters, then memory.
1637 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1638 kfree((vm_offset_t
)vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1642 bs_commit(- vs
->vs_size
);
1644 zfree(vstruct_zone
, (vm_offset_t
)vs
);
1647 int ps_map_extend(vstruct_t
, int); /* forward */
1653 struct vs_map
**new_imap
;
1654 struct vs_map
*new_dmap
= NULL
;
1657 void *old_map
= NULL
;
1658 int old_map_size
= 0;
1660 if (vs
->vs_size
>= new_size
) {
1662 * Someone has already done the work.
1668 * If the new size extends into the indirect range, then we have one
1669 * of two cases: we are going from indirect to indirect, or we are
1670 * going from direct to indirect. If we are going from indirect to
1671 * indirect, then it is possible that the new size will fit in the old
1672 * indirect map. If this is the case, then just reset the size of the
1673 * vstruct map and we are done. If the new size will not
1674 * fit into the old indirect map, then we have to allocate a new
1675 * indirect map and copy the old map pointers into this new map.
1677 * If we are going from direct to indirect, then we have to allocate a
1678 * new indirect map and copy the old direct pages into the first
1679 * indirect page of the new map.
1680 * NOTE: allocating memory here is dangerous, as we're in the
1683 if (INDIRECT_CLMAP(new_size
)) {
1684 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1687 * Get a new indirect map and zero it.
1689 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1690 if (vs
->vs_indirect
&&
1691 (new_map_size
== old_map_size
)) {
1692 bs_commit(new_size
- vs
->vs_size
);
1693 vs
->vs_size
= new_size
;
1697 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1698 if (new_imap
== NULL
) {
1701 memset(new_imap
, 0, new_map_size
);
1703 if (vs
->vs_indirect
) {
1704 /* Copy old entries into new map */
1705 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1706 /* Arrange to free the old map */
1707 old_map
= (void *) vs
->vs_imap
;
1709 } else { /* Old map was a direct map */
1710 /* Allocate an indirect page */
1711 if ((new_imap
[0] = (struct vs_map
*)
1712 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1713 kfree((vm_offset_t
)new_imap
, new_map_size
);
1716 new_dmap
= new_imap
[0];
1717 newdsize
= CLMAP_ENTRIES
;
1721 newdsize
= new_size
;
1723 * If the new map is a direct map, then the old map must
1724 * also have been a direct map. All we have to do is
1725 * to allocate a new direct map, copy the old entries
1726 * into it and free the old map.
1728 if ((new_dmap
= (struct vs_map
*)
1729 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1735 /* Free the old map */
1736 old_map
= (void *) vs
->vs_dmap
;
1737 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1739 /* Copy info from the old map into the new map */
1740 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1742 /* Initialize the rest of the new map */
1743 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1744 VSM_CLR(new_dmap
[i
]);
1747 vs
->vs_imap
= new_imap
;
1748 vs
->vs_indirect
= TRUE
;
1750 vs
->vs_dmap
= new_dmap
;
1751 bs_commit(new_size
- vs
->vs_size
);
1752 vs
->vs_size
= new_size
;
1754 kfree((vm_offset_t
)old_map
, old_map_size
);
1762 struct clmap
*clmap
,
1767 vm_offset_t cluster
; /* The cluster of offset. */
1768 vm_offset_t newcl
; /* The new cluster allocated. */
1771 struct vs_map
*vsmap
;
1775 ASSERT(vs
->vs_dmap
);
1776 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1779 * Initialize cluster error value
1781 clmap
->cl_error
= 0;
1784 * If the object has grown, extend the page map.
1786 if (cluster
>= vs
->vs_size
) {
1787 if (flag
== CL_FIND
) {
1788 /* Do not allocate if just doing a lookup */
1790 return (vm_offset_t
) -1;
1792 if (ps_map_extend(vs
, cluster
+ 1)) {
1794 return (vm_offset_t
) -1;
1799 * Look for the desired cluster. If the map is indirect, then we
1800 * have a two level lookup. First find the indirect block, then
1801 * find the actual cluster. If the indirect block has not yet
1802 * been allocated, then do so. If the cluster has not yet been
1803 * allocated, then do so.
1805 * If any of the allocations fail, then return an error.
1806 * Don't allocate if just doing a lookup.
1808 if (vs
->vs_indirect
) {
1809 long ind_block
= cluster
/CLMAP_ENTRIES
;
1811 /* Is the indirect block allocated? */
1812 vsmap
= vs
->vs_imap
[ind_block
];
1813 if (vsmap
== NULL
) {
1814 if (flag
== CL_FIND
) {
1816 return (vm_offset_t
) -1;
1819 /* Allocate the indirect block */
1820 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
1821 if (vsmap
== NULL
) {
1823 return (vm_offset_t
) -1;
1825 /* Initialize the cluster offsets */
1826 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
1828 vs
->vs_imap
[ind_block
] = vsmap
;
1831 vsmap
= vs
->vs_dmap
;
1834 vsmap
+= cluster%CLMAP_ENTRIES
;
1837 * At this point, vsmap points to the struct vs_map desired.
1839 * Look in the map for the cluster, if there was an error on a
1840 * previous write, flag it and return. If it is not yet
1841 * allocated, then allocate it, if we're writing; if we're
1842 * doing a lookup and the cluster's not allocated, return error.
1844 if (VSM_ISERR(*vsmap
)) {
1845 clmap
->cl_error
= VSM_GETERR(*vsmap
);
1847 return (vm_offset_t
) -1;
1848 } else if (VSM_ISCLR(*vsmap
)) {
1851 if (flag
== CL_FIND
) {
1853 * If there's an error and the entry is clear, then
1854 * we've run out of swap space. Record the error
1858 VSM_SETERR(*vsmap
, error
);
1861 return (vm_offset_t
) -1;
1864 * Attempt to allocate a cluster from the paging segment
1866 newcl
= ps_allocate_cluster(vs
, &psindex
,
1867 PAGING_SEGMENT_NULL
);
1870 return (vm_offset_t
) -1;
1873 VSM_SETCLOFF(*vsmap
, newcl
);
1874 VSM_SETPS(*vsmap
, psindex
);
1877 newcl
= VSM_CLOFF(*vsmap
);
1880 * Fill in pertinent fields of the clmap
1882 clmap
->cl_ps
= VSM_PS(*vsmap
);
1883 clmap
->cl_numpages
= VSCLSIZE(vs
);
1884 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
1887 * Byte offset in paging segment is byte offset to cluster plus
1888 * byte offset within cluster. It looks ugly, but should be
1891 ASSERT(trunc_page(offset
) == offset
);
1892 newcl
= ptoa_32(newcl
) << vs
->vs_clshift
;
1893 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
1894 if (flag
== CL_ALLOC
) {
1896 * set bits in the allocation bitmap according to which
1897 * pages were requested. size is in bytes.
1899 i
= atop_32(newoff
);
1900 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
1901 VSM_SETALLOC(*vsmap
, i
);
1903 size
-= vm_page_size
;
1906 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
1909 * Offset is not cluster aligned, so number of pages
1910 * and bitmaps must be adjusted
1912 clmap
->cl_numpages
-= atop_32(newoff
);
1913 CLMAP_SHIFT(clmap
, vs
);
1914 CLMAP_SHIFTALLOC(clmap
, vs
);
1919 * The setting of valid bits and handling of write errors
1920 * must be done here, while we hold the lock on the map.
1921 * It logically should be done in ps_vs_write_complete().
1922 * The size and error information has been passed from
1923 * ps_vs_write_complete(). If the size parameter is non-zero,
1924 * then there is work to be done. If error is also non-zero,
1925 * then the error number is recorded in the cluster and the
1926 * entire cluster is in error.
1928 if (size
&& flag
== CL_FIND
) {
1929 vm_offset_t off
= (vm_offset_t
) 0;
1932 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
1934 VSM_SETPG(*vsmap
, i
);
1935 size
-= vm_page_size
;
1937 ASSERT(i
<= VSCLSIZE(vs
));
1939 BS_STAT(clmap
->cl_ps
->ps_bs
,
1940 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
1942 off
= VSM_CLOFF(*vsmap
);
1943 VSM_SETERR(*vsmap
, error
);
1946 * Deallocate cluster if error, and no valid pages
1949 if (off
!= (vm_offset_t
) 0)
1950 ps_deallocate_cluster(clmap
->cl_ps
, off
);
1952 return (vm_offset_t
) 0;
1956 DEBUG(DEBUG_VS_INTERNAL
,
1957 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1958 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
1959 DEBUG(DEBUG_VS_INTERNAL
,
1960 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1961 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
1962 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
1964 return (newcl
+ newoff
);
1967 void ps_clunmap(vstruct_t
, vm_offset_t
, vm_size_t
); /* forward */
1975 vm_offset_t cluster
; /* The cluster number of offset */
1976 struct vs_map
*vsmap
;
1981 * Loop through all clusters in this range, freeing paging segment
1982 * clusters and map entries as encountered.
1984 while (length
> 0) {
1988 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1989 if (vs
->vs_indirect
) /* indirect map */
1990 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
1992 vsmap
= vs
->vs_dmap
;
1993 if (vsmap
== NULL
) {
1997 vsmap
+= cluster%CLMAP_ENTRIES
;
1998 if (VSM_ISCLR(*vsmap
)) {
1999 length
-= vm_page_size
;
2000 offset
+= vm_page_size
;
2004 * We've got a valid mapping. Clear it and deallocate
2005 * paging segment cluster pages.
2006 * Optimize for entire cluster cleraing.
2008 if (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) {
2010 * Not cluster aligned.
2012 ASSERT(trunc_page(newoff
) == newoff
);
2013 i
= atop_32(newoff
);
2016 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
2017 VSM_CLRPG(*vsmap
, i
);
2018 VSM_CLRALLOC(*vsmap
, i
);
2019 length
-= vm_page_size
;
2020 offset
+= vm_page_size
;
2025 * If map entry is empty, clear and deallocate cluster.
2027 if (!VSM_ALLOC(*vsmap
)) {
2028 ps_deallocate_cluster(VSM_PS(*vsmap
),
2037 void ps_vs_write_complete(vstruct_t
, vm_offset_t
, vm_size_t
, int); /* forward */
2040 ps_vs_write_complete(
2049 * Get the struct vsmap for this cluster.
2050 * Use READ, even though it was written, because the
2051 * cluster MUST be present, unless there was an error
2052 * in the original ps_clmap (e.g. no space), in which
2053 * case, nothing happens.
2055 * Must pass enough information to ps_clmap to allow it
2056 * to set the vs_map structure bitmap under lock.
2058 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2061 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, vm_offset_t
, vm_offset_t
, vm_size_t
, boolean_t
, int); /* forward */
2064 vs_cl_write_complete(
2066 paging_segment_t ps
,
2077 * For internal objects, the error is recorded on a
2078 * per-cluster basis by ps_clmap() which is called
2079 * by ps_vs_write_complete() below.
2081 dprintf(("write failed error = 0x%x\n", error
));
2082 /* add upl_abort code here */
2084 GSTAT(global_stats
.gs_pages_out
+= atop_32(size
));
2086 * Notify the vstruct mapping code, so it can do its accounting.
2088 ps_vs_write_complete(vs
, offset
, size
, error
);
2092 ASSERT(vs
->vs_async_pending
> 0);
2093 vs
->vs_async_pending
-= size
;
2094 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
2095 vs
->vs_waiting_async
= FALSE
;
2097 /* mutex_unlock(&vs->vs_waiting_async); */
2098 thread_wakeup(&vs
->vs_async_pending
);
2105 #ifdef DEVICE_PAGING
2106 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2110 MACH_PORT_FACE reply_port
,
2111 kern_return_t device_code
,
2112 io_buf_len_t bytes_written
)
2114 struct vs_async
*vsa
;
2116 vsa
= (struct vs_async
*)
2117 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2119 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2120 device_code
= KERN_FAILURE
;
2123 vsa
->vsa_error
= device_code
;
2126 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2127 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2128 /* revisit when async disk segments redone */
2129 if(vsa
->vsa_error
) {
2130 /* need to consider error condition. re-write data or */
2131 /* throw it away here. */
2133 if(vm_map_copyout(kernel_map
, &ioaddr
,
2134 (vm_map_copy_t
)vsa
->vsa_addr
) != KERN_SUCCESS
)
2135 panic("vs_cluster_write: unable to copy source list\n");
2136 vm_deallocate(kernel_map
, ioaddr
, vsa
->vsa_size
);
2138 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2139 vsa
->vsa_size
, vsa
->vsa_error
);
2141 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2142 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2147 return KERN_SUCCESS
;
2150 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2152 device_write_reply_inband(
2153 MACH_PORT_FACE reply_port
,
2154 kern_return_t return_code
,
2155 io_buf_len_t bytes_written
)
2157 panic("device_write_reply_inband: illegal");
2158 return KERN_SUCCESS
;
2161 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2164 MACH_PORT_FACE reply_port
,
2165 kern_return_t return_code
,
2167 mach_msg_type_number_t dataCnt
)
2169 struct vs_async
*vsa
;
2170 vsa
= (struct vs_async
*)
2171 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2172 vsa
->vsa_addr
= (vm_offset_t
)data
;
2173 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2174 vsa
->vsa_error
= return_code
;
2175 thread_wakeup(&vsa
->vsa_lock
);
2176 return KERN_SUCCESS
;
2179 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2181 device_read_reply_inband(
2182 MACH_PORT_FACE reply_port
,
2183 kern_return_t return_code
,
2184 io_buf_ptr_inband_t data
,
2185 mach_msg_type_number_t dataCnt
)
2187 panic("device_read_reply_inband: illegal");
2188 return KERN_SUCCESS
;
2191 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2193 device_read_reply_overwrite(
2194 MACH_PORT_FACE reply_port
,
2195 kern_return_t return_code
,
2196 io_buf_len_t bytes_read
)
2198 panic("device_read_reply_overwrite: illegal\n");
2199 return KERN_SUCCESS
;
2202 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2205 MACH_PORT_FACE reply_port
,
2206 kern_return_t return_code
,
2207 MACH_PORT_FACE device_port
)
2209 panic("device_open_reply: illegal\n");
2210 return KERN_SUCCESS
;
2213 kern_return_t
ps_read_device(paging_segment_t
, vm_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
2217 paging_segment_t ps
,
2219 vm_offset_t
*bufferp
,
2221 unsigned int *residualp
,
2225 recnum_t dev_offset
;
2226 unsigned int bytes_wanted
;
2227 unsigned int bytes_read
;
2228 unsigned int total_read
;
2229 vm_offset_t dev_buffer
;
2230 vm_offset_t buf_ptr
;
2231 unsigned int records_read
;
2232 struct vs_async
*vsa
;
2233 mutex_t vs_waiting_read_reply
;
2236 vm_map_copy_t device_data
= NULL
;
2237 default_pager_thread_t
*dpt
= NULL
;
2239 device
= dev_port_lookup(ps
->ps_device
);
2240 clustered_reads
[atop_32(size
)]++;
2242 dev_offset
= (ps
->ps_offset
+
2243 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2244 bytes_wanted
= size
;
2246 *bufferp
= (vm_offset_t
)NULL
;
2249 vsa
= VS_ALLOC_ASYNC();
2253 vsa
->vsa_offset
= 0;
2257 mutex_init(&vsa
->vsa_lock
, ETAP_DPAGE_VSSEQNO
);
2258 ip_lock(vsa
->reply_port
);
2259 vsa
->reply_port
->ip_sorights
++;
2260 ip_reference(vsa
->reply_port
);
2261 ip_unlock(vsa
->reply_port
);
2262 kr
= ds_device_read_common(device
,
2264 (mach_msg_type_name_t
)
2265 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2269 (IO_READ
| IO_CALL
),
2270 (io_buf_ptr_t
*) &dev_buffer
,
2271 (mach_msg_type_number_t
*) &bytes_read
);
2272 if(kr
== MIG_NO_REPLY
) {
2273 assert_wait(&vsa
->vsa_lock
, THREAD_UNINT
);
2274 thread_block(THREAD_CONTINUE_NULL
);
2276 dev_buffer
= vsa
->vsa_addr
;
2277 bytes_read
= (unsigned int)vsa
->vsa_size
;
2278 kr
= vsa
->vsa_error
;
2281 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2284 total_read
+= bytes_read
;
2287 * If we got the entire range, use the returned dev_buffer.
2289 if (bytes_read
== size
) {
2290 *bufferp
= (vm_offset_t
)dev_buffer
;
2295 dprintf(("read only %d bytes out of %d\n",
2296 bytes_read
, bytes_wanted
));
2299 dpt
= get_read_buffer();
2300 buf_ptr
= dpt
->dpt_buffer
;
2301 *bufferp
= (vm_offset_t
)buf_ptr
;
2304 * Otherwise, copy the data into the provided buffer (*bufferp)
2305 * and append the rest of the range as it comes in.
2307 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2308 buf_ptr
+= bytes_read
;
2309 bytes_wanted
-= bytes_read
;
2310 records_read
= (bytes_read
>>
2311 (vm_page_shift
- ps
->ps_record_shift
));
2312 dev_offset
+= records_read
;
2313 DEBUG(DEBUG_VS_INTERNAL
,
2314 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2315 dev_buffer
, bytes_read
));
2316 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2318 Panic("dealloc buf");
2319 } while (bytes_wanted
);
2321 *residualp
= size
- total_read
;
2322 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2323 vm_offset_t temp_buffer
;
2324 vm_allocate(kernel_map
, &temp_buffer
, total_read
, TRUE
);
2325 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2326 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2327 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2328 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2329 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2330 (vm_map_copy_t
*)&device_data
, FALSE
))
2331 panic("ps_read_device: cannot copyin locally provided buffer\n");
2333 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2334 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2335 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2336 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2337 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2338 (vm_map_copy_t
*)&device_data
, FALSE
))
2339 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2344 *bufferp
= (vm_offset_t
)device_data
;
2347 /* Free the receive buffer */
2348 dpt
->checked_out
= 0;
2349 thread_wakeup(&dpt_array
);
2351 return KERN_SUCCESS
;
2354 kern_return_t
ps_write_device(paging_segment_t
, vm_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
2358 paging_segment_t ps
,
2362 struct vs_async
*vsa
)
2364 recnum_t dev_offset
;
2365 io_buf_len_t bytes_to_write
, bytes_written
;
2366 recnum_t records_written
;
2368 MACH_PORT_FACE reply_port
;
2372 clustered_writes
[atop_32(size
)]++;
2374 dev_offset
= (ps
->ps_offset
+
2375 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2376 bytes_to_write
= size
;
2380 * Asynchronous write.
2382 reply_port
= vsa
->reply_port
;
2383 ip_lock(reply_port
);
2384 reply_port
->ip_sorights
++;
2385 ip_reference(reply_port
);
2386 ip_unlock(reply_port
);
2389 device
= dev_port_lookup(ps
->ps_device
);
2391 vsa
->vsa_addr
= addr
;
2392 kr
=ds_device_write_common(device
,
2394 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2397 (io_buf_ptr_t
) addr
,
2399 (IO_WRITE
| IO_CALL
),
2402 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2404 dprintf(("%s0x%x, addr=0x%x,"
2405 "size=0x%x,offset=0x%x\n",
2406 "device_write_request returned ",
2407 kr
, addr
, size
, offset
));
2409 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2410 /* do the completion notification to free resources */
2411 device_write_reply(reply_port
, kr
, 0);
2416 * Synchronous write.
2420 device
= dev_port_lookup(ps
->ps_device
);
2421 kr
=ds_device_write_common(device
,
2425 (io_buf_ptr_t
) addr
,
2427 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2430 if (kr
!= KERN_SUCCESS
) {
2431 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2432 "device_write returned ",
2433 kr
, addr
, size
, offset
));
2435 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2438 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2439 Panic("fragmented write");
2440 records_written
= (bytes_written
>>
2441 (vm_page_shift
- ps
->ps_record_shift
));
2442 dev_offset
+= records_written
;
2444 if (bytes_written
!= bytes_to_write
) {
2445 dprintf(("wrote only %d bytes out of %d\n",
2446 bytes_written
, bytes_to_write
));
2449 bytes_to_write
-= bytes_written
;
2450 addr
+= bytes_written
;
2451 } while (bytes_to_write
> 0);
2453 return PAGER_SUCCESS
;
2457 #else /* !DEVICE_PAGING */
2461 paging_segment_t ps
,
2463 vm_offset_t
*bufferp
,
2465 unsigned int *residualp
,
2468 panic("ps_read_device not supported");
2472 paging_segment_t ps
,
2476 struct vs_async
*vsa
)
2478 panic("ps_write_device not supported");
2481 #endif /* DEVICE_PAGING */
2482 void pvs_object_data_provided(vstruct_t
, upl_t
, vm_offset_t
, vm_size_t
); /* forward */
2485 pvs_object_data_provided(
2492 DEBUG(DEBUG_VS_INTERNAL
,
2493 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2494 upl
, offset
, size
));
2497 GSTAT(global_stats
.gs_pages_in
+= atop_32(size
));
2501 ps_clunmap(vs
, offset
, size
);
2502 #endif /* USE_PRECIOUS */
2509 vm_offset_t vs_offset
,
2513 kern_return_t error
= KERN_SUCCESS
;
2515 unsigned int residual
;
2516 unsigned int request_flags
;
2523 vm_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2524 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2527 pages_in_cl
= 1 << vs
->vs_clshift
;
2528 cl_size
= pages_in_cl
* vm_page_size
;
2529 cl_mask
= cl_size
- 1;
2532 * This loop will be executed multiple times until the entire
2533 * request has been satisfied... if the request spans cluster
2534 * boundaries, the clusters will be checked for logical continunity,
2535 * if contiguous the I/O request will span multiple clusters, otherwise
2536 * it will be broken up into the minimal set of I/O's
2538 * If there are holes in a request (either unallocated pages in a paging
2539 * segment or an unallocated paging segment), we stop
2540 * reading at the hole, inform the VM of any data read, inform
2541 * the VM of an unavailable range, then loop again, hoping to
2542 * find valid pages later in the requested range. This continues until
2543 * the entire range has been examined, and read, if present.
2547 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
| UPL_RET_ONLY_ABSENT
;
2549 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_RET_ONLY_ABSENT
;
2551 while (cnt
&& (error
== KERN_SUCCESS
)) {
2553 int page_list_count
;
2555 if((vs_offset
& cl_mask
) &&
2556 (cnt
> (VM_SUPER_CLUSTER
-
2557 (vs_offset
& cl_mask
)))) {
2558 size
= VM_SUPER_CLUSTER
;
2559 size
-= vs_offset
& cl_mask
;
2560 } else if (cnt
> VM_SUPER_CLUSTER
) {
2561 size
= VM_SUPER_CLUSTER
;
2570 while (size
> 0 && error
== KERN_SUCCESS
) {
2575 vm_offset_t cur_offset
;
2578 if ( !ps_info_valid
) {
2579 ps_offset
[seg_index
] = ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2580 psp
[seg_index
] = CLMAP_PS(clmap
);
2584 * skip over unallocated physical segments
2586 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2587 abort_size
= cl_size
- (vs_offset
& cl_mask
);
2588 abort_size
= MIN(abort_size
, size
);
2590 page_list_count
= 0;
2591 memory_object_super_upl_request(
2593 (memory_object_offset_t
)vs_offset
,
2594 abort_size
, abort_size
,
2595 &upl
, NULL
, &page_list_count
,
2598 if (clmap
.cl_error
) {
2599 upl_abort(upl
, UPL_ABORT_ERROR
);
2601 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2603 upl_deallocate(upl
);
2606 vs_offset
+= abort_size
;
2612 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2614 for (abort_size
= 0; cl_index
< pages_in_cl
&& abort_size
< size
; cl_index
++) {
2616 * skip over unallocated pages
2618 if (CLMAP_ISSET(clmap
, cl_index
))
2620 abort_size
+= vm_page_size
;
2624 * Let VM system know about holes in clusters.
2626 GSTAT(global_stats
.gs_pages_unavail
+= atop_32(abort_size
));
2628 page_list_count
= 0;
2629 memory_object_super_upl_request(
2631 (memory_object_offset_t
)vs_offset
,
2632 abort_size
, abort_size
,
2633 &upl
, NULL
, &page_list_count
,
2636 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2637 upl_deallocate(upl
);
2640 vs_offset
+= abort_size
;
2642 if (cl_index
== pages_in_cl
) {
2644 * if we're at the end of this physical cluster
2645 * then bump to the next one and continue looking
2655 * remember the starting point of the first allocated page
2656 * for the I/O we're about to issue
2658 beg_pseg
= seg_index
;
2659 beg_indx
= cl_index
;
2660 cur_offset
= vs_offset
;
2663 * calculate the size of the I/O that we can do...
2664 * this may span multiple physical segments if
2665 * they are contiguous
2667 for (xfer_size
= 0; xfer_size
< size
; ) {
2669 while (cl_index
< pages_in_cl
2670 && xfer_size
< size
) {
2672 * accumulate allocated pages within
2673 * a physical segment
2675 if (CLMAP_ISSET(clmap
, cl_index
)) {
2676 xfer_size
+= vm_page_size
;
2677 cur_offset
+= vm_page_size
;
2680 BS_STAT(psp
[seg_index
]->ps_bs
,
2681 psp
[seg_index
]->ps_bs
->bs_pages_in
++);
2685 if (cl_index
< pages_in_cl
2686 || xfer_size
>= size
) {
2688 * we've hit an unallocated page or
2689 * the end of this request... go fire
2695 * we've hit the end of the current physical
2696 * segment and there's more to do, so try
2697 * moving to the next one
2701 ps_offset
[seg_index
] =
2703 cur_offset
& ~cl_mask
,
2704 &clmap
, CL_FIND
, 0, 0);
2705 psp
[seg_index
] = CLMAP_PS(clmap
);
2708 if ((ps_offset
[seg_index
- 1] != (ps_offset
[seg_index
] - cl_size
)) || (psp
[seg_index
- 1] != psp
[seg_index
])) {
2710 * if the physical segment we're about
2711 * to step into is not contiguous to
2712 * the one we're currently in, or it's
2713 * in a different paging file, or
2714 * it hasn't been allocated....
2715 * we stop here and generate the I/O
2720 * start with first page of the next physical
2727 * we have a contiguous range of allocated pages
2730 page_list_count
= 0;
2731 memory_object_super_upl_request(vs
->vs_control
,
2732 (memory_object_offset_t
)vs_offset
,
2733 xfer_size
, xfer_size
,
2734 &upl
, NULL
, &page_list_count
,
2735 request_flags
| UPL_SET_INTERNAL
);
2737 error
= ps_read_file(psp
[beg_pseg
],
2738 upl
, (vm_offset_t
) 0,
2739 ps_offset
[beg_pseg
] +
2740 (beg_indx
* vm_page_size
),
2741 xfer_size
, &residual
, 0);
2748 * Adjust counts and send response to VM. Optimize
2749 * for the common case, i.e. no error and/or partial
2750 * data. If there was an error, then we need to error
2751 * the entire range, even if some data was successfully
2752 * read. If there was a partial read we may supply some
2753 * data and may error some as well. In all cases the
2754 * VM must receive some notification for every page in the
2757 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2759 * Got everything we asked for, supply the data
2760 * to the VM. Note that as a side effect of
2761 * supplying * the data, the buffer holding the
2762 * supplied data is * deallocated from the pager's
2765 pvs_object_data_provided(
2766 vs
, upl
, vs_offset
, xfer_size
);
2768 failed_size
= xfer_size
;
2770 if (error
== KERN_SUCCESS
) {
2771 if (residual
== xfer_size
) {
2773 * If a read operation returns no error
2774 * and no data moved, we turn it into
2775 * an error, assuming we're reading at
2777 * Fall through and error the entire
2780 error
= KERN_FAILURE
;
2783 * Otherwise, we have partial read. If
2784 * the part read is a integral number
2785 * of pages supply it. Otherwise round
2786 * it up to a page boundary, zero fill
2787 * the unread part, and supply it.
2788 * Fall through and error the remainder
2789 * of the range, if any.
2795 lsize
= (xfer_size
- residual
)
2797 pvs_object_data_provided(
2801 if (lsize
< xfer_size
) {
2804 error
= KERN_FAILURE
;
2810 * If there was an error in any part of the range, tell
2811 * the VM. Note that error is explicitly checked again
2812 * since it can be modified above.
2814 if (error
!= KERN_SUCCESS
) {
2815 BS_STAT(psp
[beg_pseg
]->ps_bs
,
2816 psp
[beg_pseg
]->ps_bs
->bs_pages_in_fail
2817 += atop_32(failed_size
));
2820 vs_offset
+= xfer_size
;
2823 } /* END while (cnt && (error == 0)) */
2827 int vs_do_async_write
= 1;
2835 boolean_t dp_internal
,
2839 vm_offset_t transfer_size
;
2843 vm_offset_t actual_offset
; /* Offset within paging segment */
2844 paging_segment_t ps
;
2845 vm_offset_t subx_size
;
2846 vm_offset_t mobj_base_addr
;
2847 vm_offset_t mobj_target_addr
;
2850 struct vs_async
*vsa
;
2854 upl_page_info_t
*pl
;
2860 int page_list_count
;
2869 vm_offset_t upl_offset
;
2870 vm_offset_t seg_offset
;
2871 vm_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2872 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2875 pages_in_cl
= 1 << vs
->vs_clshift
;
2876 cl_size
= pages_in_cl
* vm_page_size
;
2879 super_size
= cl_size
;
2881 request_flags
= UPL_NOBLOCK
|
2882 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2883 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2885 super_size
= VM_SUPER_CLUSTER
;
2887 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
2888 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2889 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2892 page_list_count
= 0;
2893 memory_object_super_upl_request(vs
->vs_control
,
2894 (memory_object_offset_t
)offset
,
2896 &upl
, NULL
, &page_list_count
,
2897 request_flags
| UPL_FOR_PAGEOUT
);
2899 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2901 for (seg_index
= 0, transfer_size
= upl
->size
;
2902 transfer_size
> 0; ) {
2904 ps_offset
[seg_index
] =
2905 ps_clmap(vs
, upl
->offset
+ (seg_index
* cl_size
),
2907 transfer_size
< cl_size
?
2908 transfer_size
: cl_size
, 0);
2910 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2912 upl_deallocate(upl
);
2914 return KERN_FAILURE
;
2917 psp
[seg_index
] = CLMAP_PS(clmap
);
2919 if (transfer_size
> cl_size
) {
2920 transfer_size
-= cl_size
;
2925 for (page_index
= 0,
2926 num_of_pages
= upl
->size
/ vm_page_size
;
2927 page_index
< num_of_pages
; ) {
2929 * skip over non-dirty pages
2931 for ( ; page_index
< num_of_pages
; page_index
++) {
2932 if (UPL_DIRTY_PAGE(pl
, page_index
)
2933 || UPL_PRECIOUS_PAGE(pl
, page_index
))
2935 * this is a page we need to write
2936 * go see if we can buddy it up with
2937 * others that are contiguous to it
2941 * if the page is not-dirty, but present we
2942 * need to commit it... This is an unusual
2943 * case since we only asked for dirty pages
2945 if (UPL_PAGE_PRESENT(pl
, page_index
)) {
2946 boolean_t empty
= FALSE
;
2947 upl_commit_range(upl
,
2948 page_index
* vm_page_size
,
2950 UPL_COMMIT_NOTIFY_EMPTY
,
2955 upl_deallocate(upl
);
2958 if (page_index
== num_of_pages
)
2960 * no more pages to look at, we're out of here
2965 * gather up contiguous dirty pages... we have at
2966 * least 1 otherwise we would have bailed above
2967 * make sure that each physical segment that we step
2968 * into is contiguous to the one we're currently in
2969 * if it's not, we have to stop and write what we have
2971 for (first_dirty
= page_index
;
2972 page_index
< num_of_pages
; ) {
2973 if ( !UPL_DIRTY_PAGE(pl
, page_index
)
2974 && !UPL_PRECIOUS_PAGE(pl
, page_index
))
2978 * if we just looked at the last page in the UPL
2979 * we don't need to check for physical segment
2982 if (page_index
< num_of_pages
) {
2987 (page_index
- 1) / pages_in_cl
;
2988 nxt_seg
= page_index
/ pages_in_cl
;
2990 if (cur_seg
!= nxt_seg
) {
2991 if ((ps_offset
[cur_seg
] != (ps_offset
[nxt_seg
] - cl_size
)) || (psp
[cur_seg
] != psp
[nxt_seg
]))
2993 * if the segment we're about
2994 * to step into is not
2995 * contiguous to the one we're
2996 * currently in, or it's in a
2997 * different paging file....
2998 * we stop here and generate
3005 num_dirty
= page_index
- first_dirty
;
3009 upl_offset
= first_dirty
* vm_page_size
;
3010 seg_index
= first_dirty
/ pages_in_cl
;
3011 seg_offset
= upl_offset
- (seg_index
* cl_size
);
3012 transfer_size
= num_dirty
* vm_page_size
;
3015 while (transfer_size
) {
3018 if ((seg_size
= cl_size
-
3019 (upl_offset
% cl_size
))
3021 seg_size
= transfer_size
;
3023 ps_vs_write_complete(vs
,
3024 upl
->offset
+ upl_offset
,
3027 transfer_size
-= seg_size
;
3028 upl_offset
+= seg_size
;
3030 upl_offset
= first_dirty
* vm_page_size
;
3031 transfer_size
= num_dirty
* vm_page_size
;
3032 error
= ps_write_file(psp
[seg_index
],
3034 ps_offset
[seg_index
]
3036 transfer_size
, flags
);
3040 boolean_t empty
= FALSE
;
3041 upl_abort_range(upl
,
3042 first_dirty
* vm_page_size
,
3043 num_dirty
* vm_page_size
,
3044 UPL_ABORT_NOTIFY_EMPTY
,
3047 upl_deallocate(upl
);
3052 assert(cnt
<= (vm_page_size
<< vs
->vs_clshift
));
3056 /* The caller provides a mapped_data which is derived */
3057 /* from a temporary object. The targeted pages are */
3058 /* guaranteed to be set at offset 0 in the mapped_data */
3059 /* The actual offset however must still be derived */
3060 /* from the offset in the vs in question */
3061 mobj_base_addr
= offset
;
3062 mobj_target_addr
= mobj_base_addr
;
3064 for (transfer_size
= list_size
; transfer_size
!= 0;) {
3065 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
3067 transfer_size
< cl_size
?
3068 transfer_size
: cl_size
, 0);
3069 if(actual_offset
== (vm_offset_t
) -1) {
3073 cnt
= MIN(transfer_size
,
3074 CLMAP_NPGS(clmap
) * vm_page_size
);
3075 ps
= CLMAP_PS(clmap
);
3076 /* Assume that the caller has given us contiguous */
3079 ps_vs_write_complete(vs
, mobj_target_addr
,
3081 error
= ps_write_file(ps
, internal_upl
,
3089 actual_offset
+= cnt
;
3090 mobj_target_addr
+= cnt
;
3091 transfer_size
-= cnt
;
3099 return KERN_FAILURE
;
3101 return KERN_SUCCESS
;
3105 ps_vstruct_allocated_size(
3109 struct vs_map
*vsmap
;
3113 if (vs
->vs_indirect
) {
3114 /* loop on indirect maps */
3115 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3116 vsmap
= vs
->vs_imap
[i
];
3119 /* loop on clusters in this indirect map */
3120 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3121 if (VSM_ISCLR(vsmap
[j
]) ||
3122 VSM_ISERR(vsmap
[j
]))
3124 /* loop on pages in this cluster */
3125 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3126 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3132 vsmap
= vs
->vs_dmap
;
3135 /* loop on clusters in the direct map */
3136 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3137 if (VSM_ISCLR(vsmap
[j
]) ||
3138 VSM_ISERR(vsmap
[j
]))
3140 /* loop on pages in this cluster */
3141 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3142 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3148 return ptoa_32(num_pages
);
3152 ps_vstruct_allocated_pages(
3154 default_pager_page_t
*pages
,
3158 struct vs_map
*vsmap
;
3164 if (vs
->vs_indirect
) {
3165 /* loop on indirect maps */
3166 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3167 vsmap
= vs
->vs_imap
[i
];
3168 if (vsmap
== NULL
) {
3169 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
3173 /* loop on clusters in this indirect map */
3174 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3175 if (VSM_ISCLR(vsmap
[j
]) ||
3176 VSM_ISERR(vsmap
[j
])) {
3177 offset
+= vm_page_size
* VSCLSIZE(vs
);
3180 /* loop on pages in this cluster */
3181 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3182 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3184 if (num_pages
< pages_size
)
3185 pages
++->dpp_offset
=
3188 offset
+= vm_page_size
;
3193 vsmap
= vs
->vs_dmap
;
3196 /* loop on clusters in the direct map */
3197 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3198 if (VSM_ISCLR(vsmap
[j
]) ||
3199 VSM_ISERR(vsmap
[j
])) {
3200 offset
+= vm_page_size
* VSCLSIZE(vs
);
3203 /* loop on pages in this cluster */
3204 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3205 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3207 if (num_pages
< pages_size
)
3208 pages
++->dpp_offset
= offset
;
3210 offset
+= vm_page_size
;
3220 ps_vstruct_transfer_from_segment(
3222 paging_segment_t segment
,
3225 struct vs_map
*vsmap
;
3226 struct vs_map old_vsmap
;
3227 struct vs_map new_vsmap
;
3230 VS_LOCK(vs
); /* block all work on this vstruct */
3231 /* can't allow the normal multiple write */
3232 /* semantic because writes may conflict */
3233 vs
->vs_xfer_pending
= TRUE
;
3234 vs_wait_for_sync_writers(vs
);
3236 vs_wait_for_readers(vs
);
3237 /* we will unlock the vs to allow other writes while transferring */
3238 /* and will be guaranteed of the persistance of the vs struct */
3239 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3240 /* vs_async_pending */
3241 /* OK we now have guaranteed no other parties are accessing this */
3242 /* vs. Now that we are also supporting simple lock versions of */
3243 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3244 /* our purpose in holding it before was the multiple write case */
3245 /* we now use the boolean xfer_pending to do that. We can use */
3246 /* a boolean instead of a count because we have guaranteed single */
3247 /* file access to this code in its caller */
3250 if (vs
->vs_indirect
) {
3253 /* loop on indirect maps */
3254 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3255 vsmap
= vs
->vs_imap
[i
];
3258 /* loop on clusters in this indirect map */
3259 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3261 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3262 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3264 vsmap_size
= CLMAP_ENTRIES
;
3265 for (j
= 0; j
< vsmap_size
; j
++) {
3266 if (VSM_ISCLR(vsmap
[j
]) ||
3267 VSM_ISERR(vsmap
[j
]) ||
3268 (VSM_PS(vsmap
[j
]) != segment
))
3270 if(vs_cluster_transfer(vs
,
3271 (vm_page_size
* (j
<< vs
->vs_clshift
))
3273 vm_page_size
<< vs
->vs_clshift
,
3277 vs
->vs_xfer_pending
= FALSE
;
3279 vs_finish_write(vs
);
3280 return KERN_FAILURE
;
3282 /* allow other readers/writers during transfer*/
3284 vs
->vs_xfer_pending
= FALSE
;
3286 vs_finish_write(vs
);
3288 vs
->vs_xfer_pending
= TRUE
;
3289 vs_wait_for_sync_writers(vs
);
3291 vs_wait_for_readers(vs
);
3293 if (!(vs
->vs_indirect
)) {
3299 vsmap
= vs
->vs_dmap
;
3300 if (vsmap
== NULL
) {
3302 vs
->vs_xfer_pending
= FALSE
;
3304 vs_finish_write(vs
);
3305 return KERN_SUCCESS
;
3307 /* loop on clusters in the direct map */
3308 for (j
= 0; j
< vs
->vs_size
; j
++) {
3309 if (VSM_ISCLR(vsmap
[j
]) ||
3310 VSM_ISERR(vsmap
[j
]) ||
3311 (VSM_PS(vsmap
[j
]) != segment
))
3313 if(vs_cluster_transfer(vs
,
3314 vm_page_size
* (j
<< vs
->vs_clshift
),
3315 vm_page_size
<< vs
->vs_clshift
,
3316 upl
) != KERN_SUCCESS
) {
3318 vs
->vs_xfer_pending
= FALSE
;
3320 vs_finish_write(vs
);
3321 return KERN_FAILURE
;
3323 /* allow other readers/writers during transfer*/
3325 vs
->vs_xfer_pending
= FALSE
;
3327 vs_finish_write(vs
);
3329 vs
->vs_xfer_pending
= TRUE
;
3331 vs_wait_for_sync_writers(vs
);
3333 vs_wait_for_readers(vs
);
3334 if (vs
->vs_indirect
) {
3341 vs
->vs_xfer_pending
= FALSE
;
3343 vs_finish_write(vs
);
3344 return KERN_SUCCESS
;
3354 struct vs_map
*vsmap
;
3355 vm_offset_t cluster
;
3357 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
3358 if (vs
->vs_indirect
) {
3359 long ind_block
= cluster
/CLMAP_ENTRIES
;
3361 /* Is the indirect block allocated? */
3362 vsmap
= vs
->vs_imap
[ind_block
];
3363 if(vsmap
== (vs_map_t
) NULL
)
3366 vsmap
= vs
->vs_dmap
;
3367 vsmap
+= cluster%CLMAP_ENTRIES
;
3372 vs_cluster_transfer(
3378 vm_offset_t actual_offset
;
3379 paging_segment_t ps
;
3381 kern_return_t error
= KERN_SUCCESS
;
3382 int size
, size_wanted
, i
;
3383 unsigned int residual
;
3385 default_pager_thread_t
*dpt
;
3387 struct vs_map
*vsmap_ptr
;
3388 struct vs_map read_vsmap
;
3389 struct vs_map original_read_vsmap
;
3390 struct vs_map write_vsmap
;
3394 /* vs_cluster_transfer reads in the pages of a cluster and
3395 * then writes these pages back to new backing store. The
3396 * segment the pages are being read from is assumed to have
3397 * been taken off-line and is no longer considered for new
3402 * This loop will be executed once per cluster referenced.
3403 * Typically this means once, since it's unlikely that the
3404 * VM system will ask for anything spanning cluster boundaries.
3406 * If there are holes in a cluster (in a paging segment), we stop
3407 * reading at the hole, then loop again, hoping to
3408 * find valid pages later in the cluster. This continues until
3409 * the entire range has been examined, and read, if present. The
3410 * pages are written as they are read. If a failure occurs after
3411 * some pages are written the unmap call at the bottom of the loop
3412 * recovers the backing store and the old backing store remains
3416 VSM_CLR(write_vsmap
);
3417 VSM_CLR(original_read_vsmap
);
3418 /* grab the actual object's pages to sync with I/O */
3419 while (cnt
&& (error
== KERN_SUCCESS
)) {
3420 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3421 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3423 if (actual_offset
== (vm_offset_t
) -1) {
3426 * Nothing left to write in this cluster at least
3427 * set write cluster information for any previous
3428 * write, clear for next cluster, if there is one
3430 unsigned int local_size
, clmask
, clsize
;
3432 clsize
= vm_page_size
<< vs
->vs_clshift
;
3433 clmask
= clsize
- 1;
3434 local_size
= clsize
- (offset
& clmask
);
3436 local_size
= MIN(local_size
, cnt
);
3438 /* This cluster has no data in it beyond what may */
3439 /* have been found on a previous iteration through */
3440 /* the loop "write_vsmap" */
3441 *vsmap_ptr
= write_vsmap
;
3442 VSM_CLR(write_vsmap
);
3443 VSM_CLR(original_read_vsmap
);
3446 offset
+= local_size
;
3451 * Count up contiguous available or unavailable
3454 ps
= CLMAP_PS(clmap
);
3459 (size
< cnt
) && (unavail_size
< cnt
) &&
3460 (i
< CLMAP_NPGS(clmap
)); i
++) {
3461 if (CLMAP_ISSET(clmap
, i
)) {
3462 if (unavail_size
!= 0)
3464 size
+= vm_page_size
;
3466 ps
->ps_bs
->bs_pages_in
++);
3470 unavail_size
+= vm_page_size
;
3475 ASSERT(unavail_size
);
3476 cnt
-= unavail_size
;
3477 offset
+= unavail_size
;
3478 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3480 /* There is no more to transfer in this
3483 *vsmap_ptr
= write_vsmap
;
3484 VSM_CLR(write_vsmap
);
3485 VSM_CLR(original_read_vsmap
);
3490 if(VSM_ISCLR(original_read_vsmap
))
3491 original_read_vsmap
= *vsmap_ptr
;
3493 if(ps
->ps_segtype
== PS_PARTITION
) {
3495 NEED TO ISSUE WITH SYNC & NO COMMIT
3496 error = ps_read_device(ps, actual_offset, &buffer,
3497 size, &residual, flags);
3500 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3501 error
= ps_read_file(ps
, upl
, (vm_offset_t
) 0, actual_offset
,
3503 (UPL_IOSYNC
| UPL_NOCOMMIT
));
3506 read_vsmap
= *vsmap_ptr
;
3510 * Adjust counts and put data in new BS. Optimize for the
3511 * common case, i.e. no error and/or partial data.
3512 * If there was an error, then we need to error the entire
3513 * range, even if some data was successfully read.
3516 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3517 int page_list_count
= 0;
3520 * Got everything we asked for, supply the data to
3521 * the new BS. Note that as a side effect of supplying
3522 * the data, the buffer holding the supplied data is
3523 * deallocated from the pager's address space unless
3524 * the write is unsuccessful.
3527 /* note buffer will be cleaned up in all cases by */
3528 /* internal_cluster_write or if an error on write */
3529 /* the vm_map_copy_page_discard call */
3530 *vsmap_ptr
= write_vsmap
;
3532 if(vs_cluster_write(vs
, upl
, offset
,
3533 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3534 error
= KERN_FAILURE
;
3535 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3536 /* unmap the new backing store object */
3537 ps_clunmap(vs
, offset
, size
);
3539 /* original vsmap */
3540 *vsmap_ptr
= original_read_vsmap
;
3541 VSM_CLR(write_vsmap
);
3543 if((offset
+ size
) &
3544 ((vm_page_size
<< vs
->vs_clshift
)
3546 /* There is more to transfer in this
3549 write_vsmap
= *vsmap_ptr
;
3550 *vsmap_ptr
= read_vsmap
;
3552 /* discard the old backing object */
3553 write_vsmap
= *vsmap_ptr
;
3554 *vsmap_ptr
= read_vsmap
;
3555 ps_clunmap(vs
, offset
, size
);
3556 *vsmap_ptr
= write_vsmap
;
3557 VSM_CLR(write_vsmap
);
3558 VSM_CLR(original_read_vsmap
);
3563 if (error
== KERN_SUCCESS
) {
3564 if (residual
== size
) {
3566 * If a read operation returns no error
3567 * and no data moved, we turn it into
3568 * an error, assuming we're reading at
3570 * Fall through and error the entire
3573 error
= KERN_FAILURE
;
3574 *vsmap_ptr
= write_vsmap
;
3575 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3576 /* unmap the new backing store object */
3577 ps_clunmap(vs
, offset
, size
);
3579 *vsmap_ptr
= original_read_vsmap
;
3580 VSM_CLR(write_vsmap
);
3584 * Otherwise, we have partial read.
3585 * This is also considered an error
3586 * for the purposes of cluster transfer
3588 error
= KERN_FAILURE
;
3589 *vsmap_ptr
= write_vsmap
;
3590 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3591 /* unmap the new backing store object */
3592 ps_clunmap(vs
, offset
, size
);
3594 *vsmap_ptr
= original_read_vsmap
;
3595 VSM_CLR(write_vsmap
);
3604 } /* END while (cnt && (error == 0)) */
3605 if(!VSM_ISCLR(write_vsmap
))
3606 *vsmap_ptr
= write_vsmap
;
3612 default_pager_add_file(MACH_PORT_FACE backing_store
,
3618 paging_segment_t ps
;
3622 if ((bs
= backing_store_lookup(backing_store
))
3623 == BACKING_STORE_NULL
)
3624 return KERN_INVALID_ARGUMENT
;
3627 for (i
= 0; i
<= paging_segment_max
; i
++) {
3628 ps
= paging_segments
[i
];
3629 if (ps
== PAGING_SEGMENT_NULL
)
3631 if (ps
->ps_segtype
!= PS_FILE
)
3635 * Check for overlap on same device.
3637 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3640 return KERN_INVALID_ARGUMENT
;
3646 * Set up the paging segment
3648 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3649 if (ps
== PAGING_SEGMENT_NULL
) {
3651 return KERN_RESOURCE_SHORTAGE
;
3654 ps
->ps_segtype
= PS_FILE
;
3655 ps
->ps_vnode
= (struct vnode
*)vp
;
3657 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3658 ps
->ps_recnum
= size
;
3659 ps
->ps_pgnum
= size
>> ps
->ps_record_shift
;
3661 ps
->ps_pgcount
= ps
->ps_pgnum
;
3662 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3663 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3667 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3669 kfree((vm_offset_t
)ps
, sizeof *ps
);
3671 return KERN_RESOURCE_SHORTAGE
;
3673 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
3674 clrbit(ps
->ps_bmap
, i
);
3677 ps
->ps_going_away
= FALSE
;
3680 if ((error
= ps_enter(ps
)) != 0) {
3681 kfree((vm_offset_t
)ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
3682 kfree((vm_offset_t
)ps
, sizeof *ps
);
3684 return KERN_RESOURCE_SHORTAGE
;
3687 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3688 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3690 dp_pages_free
+= ps
->ps_pgcount
;
3695 bs_more_space(ps
->ps_clcount
);
3697 DEBUG(DEBUG_BS_INTERNAL
,
3698 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3699 device
, offset
, size
, record_size
,
3700 ps
->ps_record_shift
, ps
->ps_pgnum
));
3702 return KERN_SUCCESS
;
3709 paging_segment_t ps
,
3711 vm_offset_t upl_offset
,
3714 unsigned int *residualp
,
3717 vm_object_offset_t f_offset
;
3722 clustered_reads
[atop_32(size
)]++;
3724 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3726 /* for transfer case we need to pass uploffset and flags */
3727 error
= vnode_pagein(ps
->ps_vnode
,
3728 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
| UPL_NORDAHEAD
, NULL
);
3730 /* The vnode_pagein semantic is somewhat at odds with the existing */
3731 /* device_read semantic. Partial reads are not experienced at this */
3732 /* level. It is up to the bit map code and cluster read code to */
3733 /* check that requested data locations are actually backed, and the */
3734 /* pagein code to either read all of the requested data or return an */
3738 result
= KERN_FAILURE
;
3741 result
= KERN_SUCCESS
;
3748 paging_segment_t ps
,
3750 vm_offset_t upl_offset
,
3755 vm_object_offset_t f_offset
;
3756 kern_return_t result
;
3760 clustered_writes
[atop_32(size
)]++;
3761 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3763 if (vnode_pageout(ps
->ps_vnode
,
3764 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
, NULL
))
3765 result
= KERN_FAILURE
;
3767 result
= KERN_SUCCESS
;
3773 default_pager_triggers(MACH_PORT_FACE default_pager
,
3777 MACH_PORT_FACE trigger_port
)
3779 MACH_PORT_FACE release
;
3783 if (flags
== HI_WAT_ALERT
) {
3784 release
= min_pages_trigger_port
;
3785 min_pages_trigger_port
= trigger_port
;
3786 minimum_pages_remaining
= hi_wat
/vm_page_size
;
3789 } else if (flags
== LO_WAT_ALERT
) {
3790 release
= max_pages_trigger_port
;
3791 max_pages_trigger_port
= trigger_port
;
3792 maximum_pages_free
= lo_wat
/vm_page_size
;
3795 release
= trigger_port
;
3796 kr
= KERN_INVALID_ARGUMENT
;
3800 if (IP_VALID(release
))
3801 ipc_port_release_send(release
);