2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
53 * Paging File Management.
56 #include <mach/memory_object_server.h>
57 #include "default_pager_internal.h"
58 #include <default_pager/default_pager_alerts.h>
59 #include <ipc/ipc_port.h>
60 #include <ipc/ipc_space.h>
61 #include <kern/queue.h>
62 #include <kern/counters.h>
63 #include <kern/sched_prim.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_pageout.h>
67 #include <vm/vm_map.h>
69 /* MAXPHYS derived from bsd/bsd/ppc/param.h, we need a */
70 /* universal originating in the kernel, or a formal means of exporting */
71 /* from the bsd component */
73 #define MAXPHYS (64 * 1024)
74 int physical_transfer_cluster_count
= 0;
76 #define VM_SUPER_CLUSTER 0x10000
79 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
80 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
82 #define VSTRUCT_DEF_CLSHIFT 2
83 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
84 int default_pager_clsize
= 0;
87 unsigned int clustered_writes
[MAX_CLUSTER_SIZE
+1];
88 unsigned int clustered_reads
[MAX_CLUSTER_SIZE
+1];
91 * Globals used for asynchronous paging operations:
92 * vs_async_list: head of list of to-be-completed I/O ops
93 * async_num_queued: number of pages completed, but not yet
94 * processed by async thread.
95 * async_requests_out: number of pages of requests not completed.
99 struct vs_async
*vs_async_list
;
100 int async_num_queued
;
101 int async_requests_out
;
105 #define VS_ASYNC_REUSE 1
106 struct vs_async
*vs_async_free_list
;
108 mutex_t default_pager_async_lock
; /* Protects globals above */
111 int vs_alloc_async_failed
= 0; /* statistics */
112 int vs_alloc_async_count
= 0; /* statistics */
113 struct vs_async
*vs_alloc_async(void); /* forward */
114 void vs_free_async(struct vs_async
*vsa
); /* forward */
117 #define VS_ALLOC_ASYNC() vs_alloc_async()
118 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
120 #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
121 #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
122 #define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \
124 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
126 * Paging Space Hysteresis triggers and the target notification port
130 unsigned int minimum_pages_remaining
= 0;
131 unsigned int maximum_pages_free
= 0;
132 ipc_port_t min_pages_trigger_port
= NULL
;
133 ipc_port_t max_pages_trigger_port
= NULL
;
135 boolean_t bs_low
= FALSE
;
140 * Object sizes are rounded up to the next power of 2,
141 * unless they are bigger than a given maximum size.
143 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
146 * List of all backing store and segments.
148 struct backing_store_list_head backing_store_list
;
149 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
150 mutex_t paging_segments_lock
;
151 int paging_segment_max
= 0;
152 int paging_segment_count
= 0;
153 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
157 * Total pages free in system
158 * This differs from clusters committed/avail which is a measure of the
159 * over commitment of paging segments to backing store. An idea which is
160 * likely to be deprecated.
162 unsigned int dp_pages_free
= 0;
163 unsigned int cluster_transfer_minimum
= 100;
165 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, vm_offset_t
, vm_offset_t
, unsigned int, int); /* forward */
167 default_pager_thread_t
*
174 for (i
=0; i
<default_pager_internal_count
; i
++) {
175 if(dpt_array
[i
]->checked_out
== FALSE
) {
176 dpt_array
[i
]->checked_out
= TRUE
;
177 DPT_UNLOCK(dpt_lock
);
181 assert_wait(&dpt_array
, THREAD_UNINT
);
182 DPT_UNLOCK(dpt_lock
);
183 thread_block((void(*)(void))0);
193 * List of all backing store.
196 queue_init(&backing_store_list
.bsl_queue
);
199 VS_ASYNC_LOCK_INIT();
201 vs_async_free_list
= NULL
;
202 #endif /* VS_ASYNC_REUSE */
204 for (i
= 0; i
< MAX_CLUSTER_SIZE
+1; i
++) {
205 clustered_writes
[i
] = 0;
206 clustered_reads
[i
] = 0;
212 * When things do not quite workout...
214 void bs_no_paging_space(boolean_t
); /* forward */
218 boolean_t out_of_memory
)
220 static char here
[] = "bs_no_paging_space";
223 dprintf(("*** OUT OF MEMORY ***\n"));
224 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
227 void bs_more_space(int); /* forward */
228 void bs_commit(int); /* forward */
230 boolean_t user_warned
= FALSE
;
231 unsigned int clusters_committed
= 0;
232 unsigned int clusters_available
= 0;
233 unsigned int clusters_committed_peak
= 0;
241 * Account for new paging space.
243 clusters_available
+= nclusters
;
245 if (clusters_available
>= clusters_committed
) {
246 if (verbose
&& user_warned
) {
247 printf("%s%s - %d excess clusters now.\n",
249 "paging space is OK now",
250 clusters_available
- clusters_committed
);
252 clusters_committed_peak
= 0;
255 if (verbose
&& user_warned
) {
256 printf("%s%s - still short of %d clusters.\n",
258 "WARNING: paging space over-committed",
259 clusters_committed
- clusters_available
);
260 clusters_committed_peak
-= nclusters
;
273 clusters_committed
+= nclusters
;
274 if (clusters_committed
> clusters_available
) {
275 if (verbose
&& !user_warned
) {
277 printf("%s%s - short of %d clusters.\n",
279 "WARNING: paging space over-committed",
280 clusters_committed
- clusters_available
);
282 if (clusters_committed
> clusters_committed_peak
) {
283 clusters_committed_peak
= clusters_committed
;
286 if (verbose
&& user_warned
) {
287 printf("%s%s - was short of up to %d clusters.\n",
289 "paging space is OK now",
290 clusters_committed_peak
- clusters_available
);
292 clusters_committed_peak
= 0;
300 int default_pager_info_verbose
= 1;
307 vm_size_t pages_total
, pages_free
;
310 static char here
[] = "bs_global_info";
313 pages_total
= pages_free
= 0;
314 for (i
= 0; i
<= paging_segment_max
; i
++) {
315 ps
= paging_segments
[i
];
316 if (ps
== PAGING_SEGMENT_NULL
)
320 * no need to lock: by the time this data
321 * gets back to any remote requestor it
322 * will be obsolete anyways
324 pages_total
+= ps
->ps_pgnum
;
325 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
326 DEBUG(DEBUG_BS_INTERNAL
,
327 ("segment #%d: %d total, %d free\n",
328 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
330 *totalp
= pages_total
;
332 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
333 if (clusters_available
< clusters_committed
) {
334 printf("%s %d clusters committed, %d available.\n",
343 backing_store_t
backing_store_alloc(void); /* forward */
346 backing_store_alloc(void)
349 static char here
[] = "backing_store_alloc";
351 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
352 if (bs
== BACKING_STORE_NULL
)
353 panic("backing_store_alloc: no memory");
356 bs
->bs_port
= MACH_PORT_NULL
;
359 bs
->bs_pages_total
= 0;
361 bs
->bs_pages_in_fail
= 0;
362 bs
->bs_pages_out
= 0;
363 bs
->bs_pages_out_fail
= 0;
368 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
370 /* Even in both the component space and external versions of this pager, */
371 /* backing_store_lookup will be called from tasks in the application space */
373 backing_store_lookup(
379 port is currently backed with a vs structure in the alias field
380 we could create an ISBS alias and a port_is_bs call but frankly
381 I see no reason for the test, the bs->port == port check below
382 will work properly on junk entries.
384 if ((port == MACH_PORT_NULL) || port_is_vs(port))
386 if ((port
== MACH_PORT_NULL
))
387 return BACKING_STORE_NULL
;
390 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
393 if (bs
->bs_port
== port
) {
395 /* Success, return it locked. */
401 return BACKING_STORE_NULL
;
404 void backing_store_add(backing_store_t
); /* forward */
410 MACH_PORT_FACE port
= bs
->bs_port
;
411 MACH_PORT_FACE pset
= default_pager_default_set
;
412 kern_return_t kr
= KERN_SUCCESS
;
413 static char here
[] = "backing_store_add";
415 if (kr
!= KERN_SUCCESS
)
416 panic("backing_store_add: add to set");
421 * Set up default page shift, but only if not already
422 * set and argument is within range.
425 bs_set_default_clsize(unsigned int npages
)
432 if (default_pager_clsize
== 0) /* if not yet set */
433 vstruct_def_clshift
= local_log2(npages
);
439 int bs_get_global_clsize(int clsize
); /* forward */
442 bs_get_global_clsize(
448 static char here
[] = "bs_get_global_clsize";
451 * Only allow setting of cluster size once. If called
452 * with no cluster size (default), we use the compiled-in default
453 * for the duration. The same cluster size is used for all
456 if (default_pager_clsize
== 0) {
459 * On NORMA, don't use clustered paging because
460 * XMM can't handle it.
462 vstruct_def_clshift
= 0;
465 * Keep cluster size in bit shift because it's quicker
466 * arithmetic, and easier to keep at a power of 2.
468 if (clsize
!= NO_CLSIZE
) {
469 for (i
= 0; (1 << i
) < clsize
; i
++);
470 if (i
> MAX_CLUSTER_SHIFT
)
471 i
= MAX_CLUSTER_SHIFT
;
472 vstruct_def_clshift
= i
;
474 default_pager_clsize
= (1 << vstruct_def_clshift
);
477 * Let the user know the new (and definitive) cluster size.
480 printf("%scluster size = %d page%s\n",
481 my_name
, default_pager_clsize
,
482 (default_pager_clsize
== 1) ? "" : "s");
484 * Let the kernel know too, in case it hasn't used the
485 * default value provided in main() yet.
487 DMM
= default_pager_default_port
;
488 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
489 kr
= host_default_memory_manager(host_priv_self(),
492 if (kr
!= KERN_SUCCESS
) {
493 panic("bs_get_global_cl_size:host_default_memory_manager");
495 if (DMM
!= default_pager_default_port
) {
496 panic("bs_get_global_cl_size:there is another default pager");
499 ASSERT(default_pager_clsize
> 0 &&
500 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
502 return default_pager_clsize
;
506 default_pager_backing_store_create(
507 MACH_PORT_FACE pager
,
509 int clsize
, /* in bytes */
510 MACH_PORT_FACE
*backing_store
)
515 struct vstruct_alias
*alias_struct
;
516 static char here
[] = "default_pager_backing_store_create";
518 if (pager
!= default_pager_default_port
)
519 return KERN_INVALID_ARGUMENT
;
521 bs
= backing_store_alloc();
522 port
= ipc_port_alloc_kernel();
523 ipc_port_make_send(port
);
524 assert (port
!= IP_NULL
);
526 DEBUG(DEBUG_BS_EXTERNAL
,
527 ("priority=%d clsize=%d bs_port=0x%x\n",
528 priority
, clsize
, (int) backing_store
));
530 alias_struct
= (struct vstruct_alias
*)
531 kalloc(sizeof (struct vstruct_alias
));
532 if(alias_struct
!= NULL
) {
533 alias_struct
->vs
= (struct vstruct
*)bs
;
534 alias_struct
->name
= ISVS
;
535 port
->alias
= (int) alias_struct
;
538 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
539 kfree((vm_offset_t
)bs
, sizeof (struct backing_store
));
540 return KERN_RESOURCE_SHORTAGE
;
544 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
545 priority
= BS_MAXPRI
;
546 else if (priority
== BS_NOPRI
)
547 priority
= BS_MAXPRI
;
549 priority
= BS_MINPRI
;
550 bs
->bs_priority
= priority
;
552 bs
->bs_clsize
= bs_get_global_clsize(atop(clsize
));
555 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
559 backing_store_add(bs
);
561 *backing_store
= port
;
566 default_pager_backing_store_info(
567 MACH_PORT_FACE backing_store
,
568 backing_store_flavor_t flavour
,
569 backing_store_info_t info
,
570 mach_msg_type_number_t
*size
)
573 backing_store_basic_info_t basic
;
577 if (flavour
!= BACKING_STORE_BASIC_INFO
||
578 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
579 return KERN_INVALID_ARGUMENT
;
581 basic
= (backing_store_basic_info_t
)info
;
582 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
584 VSTATS_LOCK(&global_stats
.gs_lock
);
585 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
586 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
587 basic
->pages_in
= global_stats
.gs_pages_in
;
588 basic
->pages_out
= global_stats
.gs_pages_out
;
589 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
590 basic
->pages_init
= global_stats
.gs_pages_init
;
591 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
592 VSTATS_UNLOCK(&global_stats
.gs_lock
);
594 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
595 return KERN_INVALID_ARGUMENT
;
597 basic
->bs_pages_total
= bs
->bs_pages_total
;
599 bs
->bs_pages_free
= 0;
600 for (i
= 0; i
<= paging_segment_max
; i
++) {
601 ps
= paging_segments
[i
];
602 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
604 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
609 basic
->bs_pages_free
= bs
->bs_pages_free
;
610 basic
->bs_pages_in
= bs
->bs_pages_in
;
611 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
612 basic
->bs_pages_out
= bs
->bs_pages_out
;
613 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
615 basic
->bs_priority
= bs
->bs_priority
;
616 basic
->bs_clsize
= ptoa(bs
->bs_clsize
); /* in bytes */
623 int ps_delete(paging_segment_t
); /* forward */
630 kern_return_t error
= KERN_SUCCESS
;
633 VSL_LOCK(); /* get the lock on the list of vs's */
635 /* The lock relationship and sequence is farily complicated */
636 /* this code looks at a live list, locking and unlocking the list */
637 /* as it traverses it. It depends on the locking behavior of */
638 /* default_pager_no_senders. no_senders always locks the vstruct */
639 /* targeted for removal before locking the vstruct list. However */
640 /* it will remove that member of the list without locking its */
641 /* neighbors. We can be sure when we hold a lock on a vstruct */
642 /* it cannot be removed from the list but we must hold the list */
643 /* lock to be sure that its pointers to its neighbors are valid. */
644 /* Also, we can hold off destruction of a vstruct when the list */
645 /* lock and the vs locks are not being held by bumping the */
646 /* vs_async_pending count. */
648 /* we will choose instead to hold a send right */
649 vs_count
= vstruct_list
.vsl_count
;
650 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
651 if(vs
== (vstruct_t
)&vstruct_list
) {
656 vs_async_wait(vs
); /* wait for any pending async writes */
657 if ((vs_count
!= 0) && (vs
!= NULL
))
658 vs
->vs_async_pending
+= 1; /* hold parties calling */
662 while((vs_count
!= 0) && (vs
!= NULL
)) {
663 /* We take the count of AMO's before beginning the */
664 /* transfer of of the target segment. */
665 /* We are guaranteed that the target segment cannot get */
666 /* more users. We also know that queue entries are */
667 /* made at the back of the list. If some of the entries */
668 /* we would check disappear while we are traversing the */
669 /* list then we will either check new entries which */
670 /* do not have any backing store in the target segment */
671 /* or re-check old entries. This might not be optimal */
672 /* but it will always be correct. The alternative is to */
673 /* take a snapshot of the list. */
676 if(dp_pages_free
< cluster_transfer_minimum
)
677 error
= KERN_FAILURE
;
679 vm_object_t transfer_object
;
682 transfer_object
= vm_object_allocate(VM_SUPER_CLUSTER
);
683 error
= vm_fault_list_request(transfer_object
,
684 (vm_object_offset_t
)0,
685 VM_SUPER_CLUSTER
, &upl
, NULL
,
686 0, UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
688 if(error
== KERN_SUCCESS
) {
689 #ifndef ubc_sync_working
690 uc_upl_commit(upl
, NULL
);
691 error
= ps_vstruct_transfer_from_segment(
692 vs
, ps
, transfer_object
);
694 error
= ps_vstruct_transfer_from_segment(
696 uc_upl_commit(upl
, NULL
);
698 vm_object_deallocate(transfer_object
);
700 vm_object_deallocate(transfer_object
);
701 error
= KERN_FAILURE
;
706 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
707 if (vs
->vs_async_pending
== 0) {
709 thread_wakeup(&vs
->vs_waiting_async
);
717 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
718 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
719 (vs
!= next_vs
) && (vs_count
!= 1)) {
721 vs_async_wait(next_vs
); /* wait for any */
722 /* pending async writes */
723 next_vs
->vs_async_pending
+= 1; /* hold parties */
724 /* calling vs_async_wait */
729 vs
->vs_async_pending
-= 1;
730 if (vs
->vs_async_pending
== 0) {
732 thread_wakeup(&vs
->vs_waiting_async
);
736 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
747 default_pager_backing_store_delete(
748 MACH_PORT_FACE backing_store
)
754 int interim_pages_removed
= 0;
756 static char here
[] = "default_pager_backing_store_delete";
758 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
759 return KERN_INVALID_ARGUMENT
;
762 /* not implemented */
769 error
= KERN_SUCCESS
;
770 for (i
= 0; i
<= paging_segment_max
; i
++) {
771 ps
= paging_segments
[i
];
772 if (ps
!= PAGING_SEGMENT_NULL
&&
774 ! ps
->ps_going_away
) {
776 /* disable access to this segment */
777 ps
->ps_going_away
= TRUE
;
780 * The "ps" segment is "off-line" now,
781 * we can try and delete it...
783 if(dp_pages_free
< (cluster_transfer_minimum
785 error
= KERN_FAILURE
;
789 /* remove all pages associated with the */
790 /* segment from the list of free pages */
791 /* when transfer is through, all target */
792 /* segment pages will appear to be free */
794 dp_pages_free
-= ps
->ps_pgcount
;
795 interim_pages_removed
+= ps
->ps_pgcount
;
797 error
= ps_delete(ps
);
799 if (error
!= KERN_SUCCESS
) {
801 * We couldn't delete the segment,
802 * probably because there's not enough
803 * virtual memory left.
804 * Re-enable all the segments.
813 if (error
!= KERN_SUCCESS
) {
814 for (i
= 0; i
<= paging_segment_max
; i
++) {
815 ps
= paging_segments
[i
];
816 if (ps
!= PAGING_SEGMENT_NULL
&&
820 /* re-enable access to this segment */
821 ps
->ps_going_away
= FALSE
;
825 dp_pages_free
+= interim_pages_removed
;
831 for (i
= 0; i
<= paging_segment_max
; i
++) {
832 ps
= paging_segments
[i
];
833 if (ps
!= PAGING_SEGMENT_NULL
&&
835 if(ps
->ps_going_away
) {
836 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
837 paging_segment_count
--;
839 kfree((vm_offset_t
)ps
->ps_bmap
,
840 RMAPSIZE(ps
->ps_ncls
));
841 kfree((vm_offset_t
)ps
, sizeof *ps
);
846 /* Scan the entire ps array separately to make certain we find the */
847 /* proper paging_segment_max */
848 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
849 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
850 paging_segment_max
= i
;
856 * All the segments have been deleted.
857 * We can remove the backing store.
861 * Disable lookups of this backing store.
863 if((void *)bs
->bs_port
->alias
!= NULL
)
864 kfree((vm_offset_t
) bs
->bs_port
->alias
,
865 sizeof (struct vstruct_alias
));
866 pager_mux_hash_delete((ipc_port_t
) (bs
->bs_port
));
867 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
868 bs
->bs_port
= MACH_PORT_NULL
;
872 * Remove backing store from backing_store list.
875 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
880 * Free the backing store structure.
882 kfree((vm_offset_t
)bs
, sizeof *bs
);
887 int ps_enter(paging_segment_t
); /* forward */
897 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
898 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
902 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
903 paging_segments
[i
] = ps
;
904 if (i
> paging_segment_max
)
905 paging_segment_max
= i
;
906 paging_segment_count
++;
907 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
908 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
909 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
913 return KERN_RESOURCE_SHORTAGE
;
922 default_pager_add_segment(
923 MACH_PORT_FACE backing_store
,
924 MACH_PORT_FACE device
,
933 static char here
[] = "default_pager_add_segment";
935 if ((bs
= backing_store_lookup(backing_store
))
936 == BACKING_STORE_NULL
)
937 return KERN_INVALID_ARGUMENT
;
940 for (i
= 0; i
<= paging_segment_max
; i
++) {
941 ps
= paging_segments
[i
];
942 if (ps
== PAGING_SEGMENT_NULL
)
946 * Check for overlap on same device.
948 if (!(ps
->ps_device
!= device
949 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
950 || offset
+ count
<= ps
->ps_offset
)) {
953 return KERN_INVALID_ARGUMENT
;
959 * Set up the paging segment
961 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
962 if (ps
== PAGING_SEGMENT_NULL
) {
964 return KERN_RESOURCE_SHORTAGE
;
967 ps
->ps_segtype
= PS_PARTITION
;
968 ps
->ps_device
= device
;
969 ps
->ps_offset
= offset
;
970 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
971 ps
->ps_recnum
= count
;
972 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
974 ps
->ps_pgcount
= ps
->ps_pgnum
;
975 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
976 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
980 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
982 kfree((vm_offset_t
)ps
, sizeof *ps
);
984 return KERN_RESOURCE_SHORTAGE
;
986 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
987 clrbit(ps
->ps_bmap
, i
);
990 ps
->ps_going_away
= FALSE
;
993 if ((error
= ps_enter(ps
)) != 0) {
994 kfree((vm_offset_t
)ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
995 kfree((vm_offset_t
)ps
, sizeof *ps
);
997 return KERN_RESOURCE_SHORTAGE
;
1000 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1001 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1005 dp_pages_free
+= ps
->ps_pgcount
;
1008 bs_more_space(ps
->ps_clcount
);
1010 DEBUG(DEBUG_BS_INTERNAL
,
1011 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1012 device
, offset
, count
, record_size
,
1013 ps
->ps_record_shift
, ps
->ps_pgnum
));
1015 return KERN_SUCCESS
;
1021 MACH_PORT_FACE master
)
1023 security_token_t null_security_token
= {
1026 MACH_PORT_FACE device
;
1027 int info
[DEV_GET_SIZE_COUNT
];
1028 mach_msg_type_number_t info_count
;
1029 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1030 unsigned int rec_size
;
1033 MACH_PORT_FACE reply_port
;
1035 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1036 null_security_token
, dev_name
, &device
))
1039 info_count
= DEV_GET_SIZE_COUNT
;
1040 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1041 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1042 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1043 clsize
= bs_get_global_clsize(0);
1044 if (!default_pager_backing_store_create(
1045 default_pager_default_port
,
1046 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1047 (clsize
* vm_page_size
),
1049 if (!default_pager_add_segment(bs
, device
,
1050 0, count
, rec_size
)) {
1053 ipc_port_release_receive(bs
);
1057 ipc_port_release_send(device
);
1060 #endif /* DEVICE_PAGING */
1065 vs_alloc_async(void)
1067 struct vs_async
*vsa
;
1068 MACH_PORT_FACE reply_port
;
1072 if (vs_async_free_list
== NULL
) {
1074 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1077 * Try allocating a reply port named after the
1078 * address of the vs_async structure.
1080 struct vstruct_alias
*alias_struct
;
1082 reply_port
= ipc_port_alloc_kernel();
1083 alias_struct
= (struct vstruct_alias
*)
1084 kalloc(sizeof (struct vstruct_alias
));
1085 if(alias_struct
!= NULL
) {
1086 alias_struct
->vs
= (struct vstruct
*)vsa
;
1087 alias_struct
->name
= ISVS
;
1088 reply_port
->alias
= (int) alias_struct
;
1089 vsa
->reply_port
= reply_port
;
1090 vs_alloc_async_count
++;
1093 vs_alloc_async_failed
++;
1094 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1096 kfree((vm_offset_t
)vsa
,
1097 sizeof (struct vs_async
));
1102 vsa
= vs_async_free_list
;
1103 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1112 struct vs_async
*vsa
)
1115 vsa
->vsa_next
= vs_async_free_list
;
1116 vs_async_free_list
= vsa
;
1120 #else /* VS_ASYNC_REUSE */
1123 vs_alloc_async(void)
1125 struct vs_async
*vsa
;
1126 MACH_PORT_FACE reply_port
;
1129 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1132 * Try allocating a reply port named after the
1133 * address of the vs_async structure.
1135 reply_port
= ipc_port_alloc_kernel();
1136 alias_struct
= (vstruct_alias
*)
1137 kalloc(sizeof (struct vstruct_alias
));
1138 if(alias_struct
!= NULL
) {
1139 alias_struct
->vs
= reply_port
;
1140 alias_struct
->name
= ISVS
;
1141 reply_port
->alias
= (int) vsa
;
1142 vsa
->reply_port
= reply_port
;
1143 vs_alloc_async_count
++;
1146 vs_alloc_async_failed
++;
1147 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1149 kfree((vm_offset_t
) vsa
,
1150 sizeof (struct vs_async
));
1160 struct vs_async
*vsa
)
1162 static char here
[] = "vs_free_async";
1163 MACH_PORT_FACE reply_port
;
1166 reply_port
= vsa
->reply_port
;
1167 kfree((vm_offset_t
) reply_port
->alias
, sizeof (struct vstuct_alias
));
1168 kfree((vm_offset_t
) vsa
, sizeof (struct vs_async
));
1169 pager_mux_hash_delete(reply_port
);
1170 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1173 vs_alloc_async_count
--;
1178 #endif /* VS_ASYNC_REUSE */
1186 static char here
[] = "ps_vstruct_create";
1188 vs
= (vstruct_t
) kalloc(sizeof (struct vstruct
));
1189 if (vs
== VSTRUCT_NULL
) {
1190 return VSTRUCT_NULL
;
1196 * The following fields will be provided later.
1198 vs
->vs_mem_obj_port
= MACH_PORT_NULL
;
1200 vs
->vs_control_port
= MACH_PORT_NULL
;
1201 vs
->vs_control_refs
= 0;
1202 vs
->vs_object_name
= MACH_PORT_NULL
;
1203 vs
->vs_name_refs
= 0;
1206 vs
->vs_waiting_seqno
= FALSE
;
1207 vs
->vs_waiting_read
= FALSE
;
1208 vs
->vs_waiting_write
= FALSE
;
1209 vs
->vs_waiting_refs
= FALSE
;
1210 vs
->vs_waiting_async
= FALSE
;
1212 mutex_init(&vs
->vs_waiting_seqno
, ETAP_DPAGE_VSSEQNO
);
1213 mutex_init(&vs
->vs_waiting_read
, ETAP_DPAGE_VSREAD
);
1214 mutex_init(&vs
->vs_waiting_write
, ETAP_DPAGE_VSWRITE
);
1215 mutex_init(&vs
->vs_waiting_refs
, ETAP_DPAGE_VSREFS
);
1216 mutex_init(&vs
->vs_waiting_async
, ETAP_DPAGE_VSASYNC
);
1224 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1225 vs
->vs_size
= ((atop(round_page(size
)) - 1) >> vs
->vs_clshift
) + 1;
1226 vs
->vs_async_pending
= 0;
1229 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1230 * depending on the size of the memory object.
1232 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1233 vs
->vs_imap
= (struct vs_map
**)
1234 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1235 vs
->vs_indirect
= TRUE
;
1237 vs
->vs_dmap
= (struct vs_map
*)
1238 kalloc(CLMAP_SIZE(vs
->vs_size
));
1239 vs
->vs_indirect
= FALSE
;
1241 vs
->vs_xfer_pending
= FALSE
;
1242 DEBUG(DEBUG_VS_INTERNAL
,
1243 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1246 * Check to see that we got the space.
1249 kfree((vm_offset_t
)vs
, sizeof *vs
);
1250 return VSTRUCT_NULL
;
1254 * Zero the indirect pointers, or clear the direct pointers.
1256 if (vs
->vs_indirect
)
1257 memset(vs
->vs_imap
, 0,
1258 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1260 for (i
= 0; i
< vs
->vs_size
; i
++)
1261 VSM_CLR(vs
->vs_dmap
[i
]);
1263 VS_MAP_LOCK_INIT(vs
);
1265 bs_commit(vs
->vs_size
);
1270 paging_segment_t
ps_select_segment(int, int *); /* forward */
1277 paging_segment_t ps
;
1280 static char here
[] = "ps_select_segment";
1283 * Optimize case where there's only one segment.
1284 * paging_segment_max will index the one and only segment.
1288 if (paging_segment_count
== 1) {
1289 paging_segment_t lps
; /* used to avoid extra PS_UNLOCK */
1291 ps
= paging_segments
[paging_segment_max
];
1292 *psindex
= paging_segment_max
;
1294 if (ps
->ps_going_away
) {
1295 /* this segment is being turned off */
1296 lps
= PAGING_SEGMENT_NULL
;
1298 ASSERT(ps
->ps_clshift
>= shift
);
1299 if (ps
->ps_clcount
) {
1301 dp_pages_free
-= 1 << ps
->ps_clshift
;
1302 if(min_pages_trigger_port
&&
1303 (dp_pages_free
< minimum_pages_remaining
)) {
1304 default_pager_space_alert(
1305 min_pages_trigger_port
,
1307 min_pages_trigger_port
= NULL
;
1312 lps
= PAGING_SEGMENT_NULL
;
1319 if (paging_segment_count
== 0) {
1321 return PAGING_SEGMENT_NULL
;
1325 i
>= BS_MINPRI
; i
--) {
1328 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1329 (ps_select_array
[i
] == BS_FULLPRI
))
1331 start_index
= ps_select_array
[i
];
1333 if(!(paging_segments
[start_index
])) {
1335 physical_transfer_cluster_count
= 0;
1337 else if ((physical_transfer_cluster_count
+1) == (MAXPHYS
>>
1338 (((paging_segments
[start_index
])->ps_clshift
)
1340 physical_transfer_cluster_count
= 0;
1341 j
= start_index
+ 1;
1343 physical_transfer_cluster_count
+=1;
1345 if(start_index
== 0)
1346 start_index
= paging_segment_max
;
1348 start_index
= start_index
- 1;
1352 if (j
> paging_segment_max
)
1354 if ((ps
= paging_segments
[j
]) &&
1355 (ps
->ps_bs
->bs_priority
== i
)) {
1357 * Force the ps cluster size to be
1358 * >= that of the vstruct.
1361 if (ps
->ps_going_away
) {
1362 /* this segment is being turned off */
1363 } else if ((ps
->ps_clcount
) &&
1364 (ps
->ps_clshift
>= shift
)) {
1366 dp_pages_free
-= 1 << ps
->ps_clshift
;
1367 if(min_pages_trigger_port
&&
1369 minimum_pages_remaining
)) {
1370 default_pager_space_alert(
1371 min_pages_trigger_port
,
1373 min_pages_trigger_port
= NULL
;
1377 * found one, quit looking.
1379 ps_select_array
[i
] = j
;
1386 if (j
== start_index
) {
1388 * none at this priority -- mark it full
1390 ps_select_array
[i
] = BS_FULLPRI
;
1397 return PAGING_SEGMENT_NULL
;
1400 vm_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1403 ps_allocate_cluster(
1406 paging_segment_t use_ps
)
1410 paging_segment_t ps
;
1411 vm_offset_t cluster
;
1412 static char here
[] = "ps_allocate_cluster";
1415 * Find best paging segment.
1416 * ps_select_segment will decrement cluster count on ps.
1417 * Must pass cluster shift to find the most appropriate segment.
1419 /* NOTE: The addition of paging segment delete capability threatened
1420 * to seriously complicate the treatment of paging segments in this
1421 * module and the ones that call it (notably ps_clmap), because of the
1422 * difficulty in assuring that the paging segment would continue to
1423 * exist between being unlocked and locked. This was
1424 * avoided because all calls to this module are based in either
1425 * dp_memory_object calls which rely on the vs lock, or by
1426 * the transfer function which is part of the segment delete path.
1427 * The transfer function which is part of paging segment delete is
1428 * protected from multiple callers by the backing store lock.
1429 * The paging segment delete function treats mappings to a paging
1430 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1431 * while data is transferred to the remaining segments. This is in
1432 * line with the view that incomplete or in-transition mappings between
1433 * data, a vstruct, and backing store are protected by the vs lock.
1434 * This and the ordering of the paging segment "going_away" bit setting
1437 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1442 dp_pages_free
-= 1 << ps
->ps_clshift
;
1444 if(min_pages_trigger_port
&&
1445 (dp_pages_free
< minimum_pages_remaining
)) {
1446 default_pager_space_alert(
1447 min_pages_trigger_port
,
1449 min_pages_trigger_port
= NULL
;
1452 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1453 PAGING_SEGMENT_NULL
) {
1455 bs_no_paging_space(TRUE
);
1460 dprintf(("no space in available paging segments; "
1461 "swapon suggested\n"));
1462 /* the count got off maybe, reset to zero */
1464 if(min_pages_trigger_port
) {
1465 default_pager_space_alert(
1466 min_pages_trigger_port
, HI_WAT_ALERT
);
1467 min_pages_trigger_port
= NULL
;
1470 return (vm_offset_t
) -1;
1472 ASSERT(ps
->ps_clcount
!= 0);
1475 * Look for an available cluster. At the end of the loop,
1476 * byte_num is the byte offset and bit_num is the bit offset of the
1477 * first zero bit in the paging segment bitmap.
1480 byte_num
= ps
->ps_hint
;
1481 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1482 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1483 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1484 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1487 ASSERT(bit_num
!= NBBY
);
1491 ps
->ps_hint
= byte_num
;
1492 cluster
= (byte_num
*NBBY
) + bit_num
;
1494 /* Space was reserved, so this must be true */
1495 ASSERT(cluster
< ps
->ps_ncls
);
1497 setbit(ps
->ps_bmap
, cluster
);
1503 void ps_deallocate_cluster(paging_segment_t
, vm_offset_t
); /* forward */
1506 ps_deallocate_cluster(
1507 paging_segment_t ps
,
1508 vm_offset_t cluster
)
1511 if (cluster
>= (vm_offset_t
) ps
->ps_ncls
)
1512 panic("ps_deallocate_cluster: Invalid cluster number");
1515 * Lock the paging segment, clear the cluster's bitmap and increment the
1516 * number of free cluster.
1520 clrbit(ps
->ps_bmap
, cluster
);
1522 dp_pages_free
+= 1 << ps
->ps_clshift
;
1524 if(max_pages_trigger_port
&& (dp_pages_free
> maximum_pages_free
)) {
1525 default_pager_space_alert(max_pages_trigger_port
, LO_WAT_ALERT
);
1526 max_pages_trigger_port
= NULL
;
1530 * Move the hint down to the freed cluster if it is
1531 * less than the current hint.
1533 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1534 ps
->ps_hint
= (cluster
/NBBY
);
1540 * If we're freeing space on a full priority, reset the array.
1543 if (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1544 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1550 void ps_dealloc_vsmap(struct vs_map
*, vm_size_t
); /* forward */
1554 struct vs_map
*vsmap
,
1558 for (i
= 0; i
< size
; i
++)
1559 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1560 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1561 VSM_CLOFF(vsmap
[i
]));
1570 static char here
[] = "ps_vstruct_dealloc";
1575 * If this is an indirect structure, then we walk through the valid
1576 * (non-zero) indirect pointers and deallocate the clusters
1577 * associated with each used map entry (via ps_dealloc_vsmap).
1578 * When all of the clusters in an indirect block have been
1579 * freed, we deallocate the block. When all of the indirect
1580 * blocks have been deallocated we deallocate the memory
1581 * holding the indirect pointers.
1583 if (vs
->vs_indirect
) {
1584 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1585 if (vs
->vs_imap
[i
] != NULL
) {
1586 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1587 kfree((vm_offset_t
)vs
->vs_imap
[i
],
1591 kfree((vm_offset_t
)vs
->vs_imap
,
1592 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1595 * Direct map. Free used clusters, then memory.
1597 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1598 kfree((vm_offset_t
)vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1602 bs_commit(- vs
->vs_size
);
1604 ip_lock(vs_to_port(vs
));
1605 (vs_to_port(vs
))->ip_destination
= 0;
1606 (vs_to_port(vs
))->ip_receiver_name
= MACH_PORT_NULL
;
1609 imq_lock(&vs_to_port(vs
)->ip_messages
);
1610 (vs_to_port(vs
))->ip_mscount
= 0;
1611 (vs_to_port(vs
))->ip_messages
.imq_seqno
= 0;
1612 imq_unlock(&vs_to_port(vs
)->ip_messages
);
1615 ip_unlock(vs_to_port(vs
));
1616 pager_mux_hash_delete((ipc_port_t
) vs_to_port(vs
));
1617 ipc_port_release_receive(vs_to_port(vs
));
1619 * Do this *after* deallocating the port name
1621 kfree((vm_offset_t
)vs
, sizeof *vs
);
1624 int ps_map_extend(vstruct_t
, int); /* forward */
1630 struct vs_map
**new_imap
;
1631 struct vs_map
*new_dmap
= NULL
;
1634 void *old_map
= NULL
;
1635 int old_map_size
= 0;
1637 if (vs
->vs_size
>= new_size
) {
1639 * Someone has already done the work.
1645 * If the new size extends into the indirect range, then we have one
1646 * of two cases: we are going from indirect to indirect, or we are
1647 * going from direct to indirect. If we are going from indirect to
1648 * indirect, then it is possible that the new size will fit in the old
1649 * indirect map. If this is the case, then just reset the size of the
1650 * vstruct map and we are done. If the new size will not
1651 * fit into the old indirect map, then we have to allocate a new
1652 * indirect map and copy the old map pointers into this new map.
1654 * If we are going from direct to indirect, then we have to allocate a
1655 * new indirect map and copy the old direct pages into the first
1656 * indirect page of the new map.
1657 * NOTE: allocating memory here is dangerous, as we're in the
1660 if (INDIRECT_CLMAP(new_size
)) {
1661 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1664 * Get a new indirect map and zero it.
1666 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1667 if (vs
->vs_indirect
&&
1668 (new_map_size
== old_map_size
)) {
1669 bs_commit(new_size
- vs
->vs_size
);
1670 vs
->vs_size
= new_size
;
1674 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1675 if (new_imap
== NULL
) {
1678 memset(new_imap
, 0, new_map_size
);
1680 if (vs
->vs_indirect
) {
1681 /* Copy old entries into new map */
1682 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1683 /* Arrange to free the old map */
1684 old_map
= (void *) vs
->vs_imap
;
1686 } else { /* Old map was a direct map */
1687 /* Allocate an indirect page */
1688 if ((new_imap
[0] = (struct vs_map
*)
1689 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1690 kfree((vm_offset_t
)new_imap
, new_map_size
);
1693 new_dmap
= new_imap
[0];
1694 newdsize
= CLMAP_ENTRIES
;
1698 newdsize
= new_size
;
1700 * If the new map is a direct map, then the old map must
1701 * also have been a direct map. All we have to do is
1702 * to allocate a new direct map, copy the old entries
1703 * into it and free the old map.
1705 if ((new_dmap
= (struct vs_map
*)
1706 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1712 /* Free the old map */
1713 old_map
= (void *) vs
->vs_dmap
;
1714 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1716 /* Copy info from the old map into the new map */
1717 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1719 /* Initialize the rest of the new map */
1720 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1721 VSM_CLR(new_dmap
[i
]);
1724 vs
->vs_imap
= new_imap
;
1725 vs
->vs_indirect
= TRUE
;
1727 vs
->vs_dmap
= new_dmap
;
1728 bs_commit(new_size
- vs
->vs_size
);
1729 vs
->vs_size
= new_size
;
1731 kfree((vm_offset_t
)old_map
, old_map_size
);
1739 struct clmap
*clmap
,
1744 vm_offset_t cluster
; /* The cluster of offset. */
1745 vm_offset_t newcl
; /* The new cluster allocated. */
1748 struct vs_map
*vsmap
;
1749 static char here
[] = "ps_clmap";
1753 ASSERT(vs
->vs_dmap
);
1754 cluster
= atop(offset
) >> vs
->vs_clshift
;
1757 * Initialize cluster error value
1759 clmap
->cl_error
= 0;
1762 * If the object has grown, extend the page map.
1764 if (cluster
>= vs
->vs_size
) {
1765 if (flag
== CL_FIND
) {
1766 /* Do not allocate if just doing a lookup */
1768 return (vm_offset_t
) -1;
1770 if (ps_map_extend(vs
, cluster
+ 1)) {
1772 return (vm_offset_t
) -1;
1777 * Look for the desired cluster. If the map is indirect, then we
1778 * have a two level lookup. First find the indirect block, then
1779 * find the actual cluster. If the indirect block has not yet
1780 * been allocated, then do so. If the cluster has not yet been
1781 * allocated, then do so.
1783 * If any of the allocations fail, then return an error.
1784 * Don't allocate if just doing a lookup.
1786 if (vs
->vs_indirect
) {
1787 long ind_block
= cluster
/CLMAP_ENTRIES
;
1789 /* Is the indirect block allocated? */
1790 vsmap
= vs
->vs_imap
[ind_block
];
1791 if (vsmap
== NULL
) {
1792 if (flag
== CL_FIND
) {
1794 return (vm_offset_t
) -1;
1797 /* Allocate the indirect block */
1798 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
1799 if (vsmap
== NULL
) {
1801 return (vm_offset_t
) -1;
1803 /* Initialize the cluster offsets */
1804 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
1806 vs
->vs_imap
[ind_block
] = vsmap
;
1809 vsmap
= vs
->vs_dmap
;
1812 vsmap
+= cluster%CLMAP_ENTRIES
;
1815 * At this point, vsmap points to the struct vs_map desired.
1817 * Look in the map for the cluster, if there was an error on a
1818 * previous write, flag it and return. If it is not yet
1819 * allocated, then allocate it, if we're writing; if we're
1820 * doing a lookup and the cluster's not allocated, return error.
1822 if (VSM_ISERR(*vsmap
)) {
1823 clmap
->cl_error
= VSM_GETERR(*vsmap
);
1825 return (vm_offset_t
) -1;
1826 } else if (VSM_ISCLR(*vsmap
)) {
1829 if (flag
== CL_FIND
) {
1831 * If there's an error and the entry is clear, then
1832 * we've run out of swap space. Record the error
1836 VSM_SETERR(*vsmap
, error
);
1839 return (vm_offset_t
) -1;
1842 * Attempt to allocate a cluster from the paging segment
1844 newcl
= ps_allocate_cluster(vs
, &psindex
,
1845 PAGING_SEGMENT_NULL
);
1848 return (vm_offset_t
) -1;
1851 VSM_SETCLOFF(*vsmap
, newcl
);
1852 VSM_SETPS(*vsmap
, psindex
);
1855 newcl
= VSM_CLOFF(*vsmap
);
1858 * Fill in pertinent fields of the clmap
1860 clmap
->cl_ps
= VSM_PS(*vsmap
);
1861 clmap
->cl_numpages
= VSCLSIZE(vs
);
1862 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
1865 * Byte offset in paging segment is byte offset to cluster plus
1866 * byte offset within cluster. It looks ugly, but should be
1869 ASSERT(trunc_page(offset
) == offset
);
1870 newcl
= ptoa(newcl
) << vs
->vs_clshift
;
1871 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
1872 if (flag
== CL_ALLOC
) {
1874 * set bits in the allocation bitmap according to which
1875 * pages were requested. size is in bytes.
1878 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
1879 VSM_SETALLOC(*vsmap
, i
);
1881 size
-= vm_page_size
;
1884 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
1887 * Offset is not cluster aligned, so number of pages
1888 * and bitmaps must be adjusted
1890 clmap
->cl_numpages
-= atop(newoff
);
1891 CLMAP_SHIFT(clmap
, vs
);
1892 CLMAP_SHIFTALLOC(clmap
, vs
);
1897 * The setting of valid bits and handling of write errors
1898 * must be done here, while we hold the lock on the map.
1899 * It logically should be done in ps_vs_write_complete().
1900 * The size and error information has been passed from
1901 * ps_vs_write_complete(). If the size parameter is non-zero,
1902 * then there is work to be done. If error is also non-zero,
1903 * then the error number is recorded in the cluster and the
1904 * entire cluster is in error.
1906 if (size
&& flag
== CL_FIND
) {
1907 vm_offset_t off
= (vm_offset_t
) 0;
1910 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
1912 VSM_SETPG(*vsmap
, i
);
1913 size
-= vm_page_size
;
1915 ASSERT(i
<= VSCLSIZE(vs
));
1917 BS_STAT(clmap
->cl_ps
->ps_bs
,
1918 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
1920 off
= VSM_CLOFF(*vsmap
);
1921 VSM_SETERR(*vsmap
, error
);
1924 * Deallocate cluster if error, and no valid pages
1927 if (off
!= (vm_offset_t
) 0)
1928 ps_deallocate_cluster(clmap
->cl_ps
, off
);
1930 return (vm_offset_t
) 0;
1934 DEBUG(DEBUG_VS_INTERNAL
,
1935 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1936 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
1937 DEBUG(DEBUG_VS_INTERNAL
,
1938 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1939 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
1940 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
1942 return (newcl
+ newoff
);
1945 void ps_clunmap(vstruct_t
, vm_offset_t
, vm_size_t
); /* forward */
1953 vm_offset_t cluster
; /* The cluster number of offset */
1954 struct vs_map
*vsmap
;
1955 static char here
[] = "ps_clunmap";
1960 * Loop through all clusters in this range, freeing paging segment
1961 * clusters and map entries as encountered.
1963 while (length
> 0) {
1967 cluster
= atop(offset
) >> vs
->vs_clshift
;
1968 if (vs
->vs_indirect
) /* indirect map */
1969 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
1971 vsmap
= vs
->vs_dmap
;
1972 if (vsmap
== NULL
) {
1976 vsmap
+= cluster%CLMAP_ENTRIES
;
1977 if (VSM_ISCLR(*vsmap
)) {
1978 length
-= vm_page_size
;
1979 offset
+= vm_page_size
;
1983 * We've got a valid mapping. Clear it and deallocate
1984 * paging segment cluster pages.
1985 * Optimize for entire cluster cleraing.
1987 if (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) {
1989 * Not cluster aligned.
1991 ASSERT(trunc_page(newoff
) == newoff
);
1995 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
1996 VSM_CLRPG(*vsmap
, i
);
1997 VSM_CLRALLOC(*vsmap
, i
);
1998 length
-= vm_page_size
;
1999 offset
+= vm_page_size
;
2004 * If map entry is empty, clear and deallocate cluster.
2006 if (!VSM_ALLOC(*vsmap
)) {
2007 ps_deallocate_cluster(VSM_PS(*vsmap
),
2016 void ps_vs_write_complete(vstruct_t
, vm_offset_t
, vm_size_t
, int); /* forward */
2019 ps_vs_write_complete(
2028 * Get the struct vsmap for this cluster.
2029 * Use READ, even though it was written, because the
2030 * cluster MUST be present, unless there was an error
2031 * in the original ps_clmap (e.g. no space), in which
2032 * case, nothing happens.
2034 * Must pass enough information to ps_clmap to allow it
2035 * to set the vs_map structure bitmap under lock.
2037 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2040 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, vm_offset_t
, vm_offset_t
, vm_size_t
, boolean_t
, int); /* forward */
2043 vs_cl_write_complete(
2045 paging_segment_t ps
,
2052 static char here
[] = "vs_cl_write_complete";
2057 * For internal objects, the error is recorded on a
2058 * per-cluster basis by ps_clmap() which is called
2059 * by ps_vs_write_complete() below.
2061 dprintf(("write failed error = 0x%x\n", error
));
2062 /* add upl_abort code here */
2064 GSTAT(global_stats
.gs_pages_out
+= atop(size
));
2066 * Notify the vstruct mapping code, so it can do its accounting.
2068 ps_vs_write_complete(vs
, offset
, size
, error
);
2072 ASSERT(vs
->vs_async_pending
> 0);
2073 vs
->vs_async_pending
-= size
;
2074 if (vs
->vs_async_pending
== 0) {
2076 /* mutex_unlock(&vs->vs_waiting_async); */
2077 thread_wakeup(&vs
->vs_waiting_async
);
2084 #ifdef DEVICE_PAGING
2085 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2089 MACH_PORT_FACE reply_port
,
2090 kern_return_t device_code
,
2091 io_buf_len_t bytes_written
)
2093 struct vs_async
*vsa
;
2094 static char here
[] = "device_write_reply";
2096 vsa
= (struct vs_async
*)
2097 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2099 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2100 device_code
= KERN_FAILURE
;
2103 vsa
->vsa_error
= device_code
;
2106 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2107 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2108 /* revisit when async disk segments redone */
2109 if(vsa
->vsa_error
) {
2110 /* need to consider error condition. re-write data or */
2111 /* throw it away here. */
2113 if(vm_map_copyout(kernel_map
, &ioaddr
,
2114 (vm_map_copy_t
)vsa
->vsa_addr
) != KERN_SUCCESS
)
2115 panic("vs_cluster_write: unable to copy source list\n");
2116 vm_deallocate(kernel_map
, ioaddr
, vsa
->vsa_size
);
2118 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2119 vsa
->vsa_size
, vsa
->vsa_error
);
2121 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2122 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2127 return KERN_SUCCESS
;
2130 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2132 device_write_reply_inband(
2133 MACH_PORT_FACE reply_port
,
2134 kern_return_t return_code
,
2135 io_buf_len_t bytes_written
)
2137 panic("device_write_reply_inband: illegal");
2138 return KERN_SUCCESS
;
2141 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2144 MACH_PORT_FACE reply_port
,
2145 kern_return_t return_code
,
2147 mach_msg_type_number_t dataCnt
)
2149 struct vs_async
*vsa
;
2150 vsa
= (struct vs_async
*)
2151 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2152 vsa
->vsa_addr
= (vm_offset_t
)data
;
2153 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2154 vsa
->vsa_error
= return_code
;
2155 thread_wakeup(&vsa
->vsa_lock
);
2156 return KERN_SUCCESS
;
2159 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2161 device_read_reply_inband(
2162 MACH_PORT_FACE reply_port
,
2163 kern_return_t return_code
,
2164 io_buf_ptr_inband_t data
,
2165 mach_msg_type_number_t dataCnt
)
2167 panic("device_read_reply_inband: illegal");
2168 return KERN_SUCCESS
;
2171 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2173 device_read_reply_overwrite(
2174 MACH_PORT_FACE reply_port
,
2175 kern_return_t return_code
,
2176 io_buf_len_t bytes_read
)
2178 panic("device_read_reply_overwrite: illegal\n");
2179 return KERN_SUCCESS
;
2182 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2185 MACH_PORT_FACE reply_port
,
2186 kern_return_t return_code
,
2187 MACH_PORT_FACE device_port
)
2189 panic("device_open_reply: illegal\n");
2190 return KERN_SUCCESS
;
2193 kern_return_t
ps_read_device(paging_segment_t
, vm_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
2197 paging_segment_t ps
,
2199 vm_offset_t
*bufferp
,
2201 unsigned int *residualp
,
2205 recnum_t dev_offset
;
2206 unsigned int bytes_wanted
;
2207 unsigned int bytes_read
;
2208 unsigned int total_read
;
2209 vm_offset_t dev_buffer
;
2210 vm_offset_t buf_ptr
;
2211 unsigned int records_read
;
2212 static char here
[] = "ps_read_device";
2213 struct vs_async
*vsa
;
2214 mutex_t vs_waiting_read_reply
;
2217 vm_map_copy_t device_data
= NULL
;
2218 default_pager_thread_t
*dpt
= NULL
;
2220 device
= dev_port_lookup(ps
->ps_device
);
2221 clustered_reads
[atop(size
)]++;
2223 dev_offset
= (ps
->ps_offset
+
2224 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2225 bytes_wanted
= size
;
2227 *bufferp
= (vm_offset_t
)NULL
;
2230 vsa
= VS_ALLOC_ASYNC();
2234 vsa
->vsa_offset
= 0;
2238 mutex_init(&vsa
->vsa_lock
, ETAP_DPAGE_VSSEQNO
);
2239 ip_lock(vsa
->reply_port
);
2240 vsa
->reply_port
->ip_sorights
++;
2241 ip_reference(vsa
->reply_port
);
2242 ip_unlock(vsa
->reply_port
);
2243 kr
= ds_device_read_common(device
,
2245 (mach_msg_type_name_t
)
2246 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2250 (IO_READ
| IO_CALL
),
2251 (io_buf_ptr_t
*) &dev_buffer
,
2252 (mach_msg_type_number_t
*) &bytes_read
);
2253 if(kr
== MIG_NO_REPLY
) {
2254 assert_wait(&vsa
->vsa_lock
, THREAD_UNINT
);
2255 thread_block((void(*)(void))0);
2257 dev_buffer
= vsa
->vsa_addr
;
2258 bytes_read
= (unsigned int)vsa
->vsa_size
;
2259 kr
= vsa
->vsa_error
;
2262 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2265 total_read
+= bytes_read
;
2268 * If we got the entire range, use the returned dev_buffer.
2270 if (bytes_read
== size
) {
2271 *bufferp
= (vm_offset_t
)dev_buffer
;
2276 dprintf(("read only %d bytes out of %d\n",
2277 bytes_read
, bytes_wanted
));
2280 dpt
= get_read_buffer();
2281 buf_ptr
= dpt
->dpt_buffer
;
2282 *bufferp
= (vm_offset_t
)buf_ptr
;
2285 * Otherwise, copy the data into the provided buffer (*bufferp)
2286 * and append the rest of the range as it comes in.
2288 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2289 buf_ptr
+= bytes_read
;
2290 bytes_wanted
-= bytes_read
;
2291 records_read
= (bytes_read
>>
2292 (vm_page_shift
- ps
->ps_record_shift
));
2293 dev_offset
+= records_read
;
2294 DEBUG(DEBUG_VS_INTERNAL
,
2295 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2296 dev_buffer
, bytes_read
));
2297 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2299 Panic("dealloc buf");
2300 } while (bytes_wanted
);
2302 *residualp
= size
- total_read
;
2303 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2304 vm_offset_t temp_buffer
;
2305 vm_allocate(kernel_map
, &temp_buffer
, total_read
, TRUE
);
2306 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2307 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2308 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2309 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2310 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2311 (vm_map_copy_t
*)&device_data
, FALSE
))
2312 panic("ps_read_device: cannot copyin locally provided buffer\n");
2314 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2315 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2316 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2317 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2318 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2319 (vm_map_copy_t
*)&device_data
, FALSE
))
2320 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2325 *bufferp
= (vm_offset_t
)device_data
;
2328 /* Free the receive buffer */
2329 dpt
->checked_out
= 0;
2330 thread_wakeup(&dpt_array
);
2332 return KERN_SUCCESS
;
2335 kern_return_t
ps_write_device(paging_segment_t
, vm_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
2339 paging_segment_t ps
,
2343 struct vs_async
*vsa
)
2345 recnum_t dev_offset
;
2346 io_buf_len_t bytes_to_write
, bytes_written
;
2347 recnum_t records_written
;
2349 MACH_PORT_FACE reply_port
;
2350 static char here
[] = "ps_write_device";
2354 clustered_writes
[atop(size
)]++;
2356 dev_offset
= (ps
->ps_offset
+
2357 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2358 bytes_to_write
= size
;
2362 * Asynchronous write.
2364 reply_port
= vsa
->reply_port
;
2365 ip_lock(reply_port
);
2366 reply_port
->ip_sorights
++;
2367 ip_reference(reply_port
);
2368 ip_unlock(reply_port
);
2371 device
= dev_port_lookup(ps
->ps_device
);
2373 vsa
->vsa_addr
= addr
;
2374 kr
=ds_device_write_common(device
,
2376 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2379 (io_buf_ptr_t
) addr
,
2381 (IO_WRITE
| IO_CALL
),
2384 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2386 dprintf(("%s0x%x, addr=0x%x,"
2387 "size=0x%x,offset=0x%x\n",
2388 "device_write_request returned ",
2389 kr
, addr
, size
, offset
));
2391 ps
->ps_bs
->bs_pages_out_fail
+= atop(size
));
2392 /* do the completion notification to free resources */
2393 device_write_reply(reply_port
, kr
, 0);
2398 * Synchronous write.
2402 device
= dev_port_lookup(ps
->ps_device
);
2403 kr
=ds_device_write_common(device
,
2407 (io_buf_ptr_t
) addr
,
2409 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2412 if (kr
!= KERN_SUCCESS
) {
2413 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2414 "device_write returned ",
2415 kr
, addr
, size
, offset
));
2417 ps
->ps_bs
->bs_pages_out_fail
+= atop(size
));
2420 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2421 Panic("fragmented write");
2422 records_written
= (bytes_written
>>
2423 (vm_page_shift
- ps
->ps_record_shift
));
2424 dev_offset
+= records_written
;
2426 if (bytes_written
!= bytes_to_write
) {
2427 dprintf(("wrote only %d bytes out of %d\n",
2428 bytes_written
, bytes_to_write
));
2431 bytes_to_write
-= bytes_written
;
2432 addr
+= bytes_written
;
2433 } while (bytes_to_write
> 0);
2435 return PAGER_SUCCESS
;
2439 #else /* !DEVICE_PAGING */
2443 paging_segment_t ps
,
2445 vm_offset_t
*bufferp
,
2447 unsigned int *residualp
,
2450 panic("ps_read_device not supported");
2454 paging_segment_t ps
,
2458 struct vs_async
*vsa
)
2460 panic("ps_write_device not supported");
2463 #endif /* DEVICE_PAGING */
2464 void pvs_object_data_provided(vstruct_t
, upl_t
, vm_offset_t
, vm_size_t
); /* forward */
2467 pvs_object_data_provided(
2473 static char here
[] = "pvs_object_data_provided";
2475 DEBUG(DEBUG_VS_INTERNAL
,
2476 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2477 upl
, offset
, size
));
2480 GSTAT(global_stats
.gs_pages_in
+= atop(size
));
2484 ps_clunmap(vs
, offset
, size
);
2485 #endif /* USE_PRECIOUS */
2495 vm_offset_t actual_offset
;
2497 paging_segment_t ps
;
2500 kern_return_t error
= KERN_SUCCESS
;
2501 int size
, size_wanted
, i
;
2502 unsigned int residual
;
2503 unsigned int request_flags
;
2505 default_pager_thread_t
*dpt
;
2507 static char here
[] = "pvs_cluster_read";
2510 * This loop will be executed once per cluster referenced.
2511 * Typically this means once, since it's unlikely that the
2512 * VM system will ask for anything spanning cluster boundaries.
2514 * If there are holes in a cluster (in a paging segment), we stop
2515 * reading at the hole, inform the VM of any data read, inform
2516 * the VM of an unavailable range, then loop again, hoping to
2517 * find valid pages later in the cluster. This continues until
2518 * the entire range has been examined, and read, if present.
2522 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
;
2524 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
;
2526 while (cnt
&& (error
== KERN_SUCCESS
)) {
2527 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
2529 if (actual_offset
== (vm_offset_t
) -1) {
2532 * Either a failure due to an error on a previous
2533 * write or a zero fill on demand page. In either case,
2534 * optimize to do one reply for all pages up to next
2537 unsigned int local_size
, clmask
, clsize
;
2539 clmask
= (vm_page_size
<< vs
->vs_clshift
) - 1;
2540 clsize
= vm_page_size
<< vs
->vs_clshift
;
2541 clmask
= clsize
- 1;
2542 local_size
= clsize
- (offset
& clmask
);
2544 local_size
= MIN(local_size
, cnt
);
2546 upl_system_list_request((vm_object_t
)
2547 vs
->vs_control_port
->ip_kobject
,
2548 offset
, local_size
, local_size
,
2549 &upl
, NULL
, 0, request_flags
);
2550 if (clmap
.cl_error
) {
2551 uc_upl_abort(upl
, UPL_ABORT_ERROR
);
2553 uc_upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2557 offset
+= local_size
;
2562 * Count up contiguous available or unavailable
2565 ps
= CLMAP_PS(clmap
);
2571 (size
< cnt
) && (unavail_size
< cnt
) &&
2572 (i
< CLMAP_NPGS(clmap
)); i
++) {
2573 if (CLMAP_ISSET(clmap
, i
)) {
2574 if (unavail_size
!= 0)
2576 size
+= vm_page_size
;
2578 ps
->ps_bs
->bs_pages_in
++);
2582 unavail_size
+= vm_page_size
;
2586 * Let VM system know about holes in clusters.
2589 ASSERT(unavail_size
);
2590 GSTAT(global_stats
.gs_pages_unavail
+=
2591 atop(unavail_size
));
2592 upl_system_list_request((vm_object_t
)
2593 vs
->vs_control_port
->ip_kobject
,
2594 offset
, unavail_size
,
2595 unavail_size
, &upl
, NULL
, 0,
2597 uc_upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2598 cnt
-= unavail_size
;
2599 offset
+= unavail_size
;
2603 upl_system_list_request((vm_object_t
)
2604 vs
->vs_control_port
->ip_kobject
,
2605 offset
, size
, size
, &upl
,
2606 NULL
, 0, request_flags
| UPL_SET_INTERNAL
);
2607 if(ps
->ps_segtype
== PS_PARTITION
) {
2609 error = ps_read_device(ps, actual_offset, upl,
2610 size, &residual, 0);
2613 error
= ps_read_file(ps
, upl
, actual_offset
,
2614 size
, &residual
, 0);
2618 * Adjust counts and send response to VM. Optimize for the
2619 * common case, i.e. no error and/or partial data.
2620 * If there was an error, then we need to error the entire
2621 * range, even if some data was successfully read.
2622 * If there was a partial read we may supply some
2623 * data and may error some as well. In all cases the
2624 * VM must receive some notification for every page in the
2627 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2629 * Got everything we asked for, supply the data to
2630 * the VM. Note that as a side effect of supplying
2631 * the data, the buffer holding the supplied data is
2632 * deallocated from the pager's address space.
2634 pvs_object_data_provided(vs
, upl
, offset
, size
);
2637 if (error
== KERN_SUCCESS
) {
2638 if (residual
== size
) {
2640 * If a read operation returns no error
2641 * and no data moved, we turn it into
2642 * an error, assuming we're reading at
2644 * Fall through and error the entire
2647 error
= KERN_FAILURE
;
2650 * Otherwise, we have partial read. If
2651 * the part read is a integral number
2652 * of pages supply it. Otherwise round
2653 * it up to a page boundary, zero fill
2654 * the unread part, and supply it.
2655 * Fall through and error the remainder
2656 * of the range, if any.
2660 fill
= residual
& ~vm_page_size
;
2661 lsize
= (size
- residual
) + fill
;
2662 pvs_object_data_provided(vs
, upl
,
2666 if (size
-= lsize
) {
2667 error
= KERN_FAILURE
;
2673 * If there was an error in any part of the range, tell
2674 * the VM. Deallocate the remainder of the buffer.
2675 * Note that error is explicitly checked again since
2676 * it can be modified above.
2678 if (error
!= KERN_SUCCESS
) {
2680 ps
->ps_bs
->bs_pages_in_fail
+=
2687 } /* END while (cnt && (error == 0)) */
2691 int vs_do_async_write
= 1;
2699 boolean_t dp_internal
,
2702 vm_offset_t actual_offset
; /* Offset within paging segment */
2704 vm_offset_t transfer_size
;
2705 vm_offset_t subx_size
;
2708 paging_segment_t ps
;
2709 struct vs_async
*vsa
;
2711 static char here
[] = "vs_cluster_write";
2714 upl_page_info_t
*page_list
;
2715 upl_page_info_t pl
[20];
2716 vm_offset_t mobj_base_addr
;
2717 vm_offset_t mobj_target_addr
;
2724 ps
= PAGING_SEGMENT_NULL
;
2729 vm_offset_t upl_offset
;
2731 cl_size
= (1 << vs
->vs_clshift
) * vm_page_size
;
2734 super_size
= cl_size
;
2735 request_flags
= UPL_NOBLOCK
|
2736 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2737 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2739 super_size
= VM_SUPER_CLUSTER
;
2740 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
2741 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2742 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2746 upl_system_list_request((vm_object_t
)
2747 vs
->vs_control_port
->ip_kobject
,
2748 offset
, cnt
, super_size
,
2752 mobj_base_addr
= upl
->offset
;
2753 list_size
= upl
->size
;
2755 page_list
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2756 memcpy(pl
, page_list
,
2757 sizeof(upl_page_info_t
) * (list_size
/page_size
));
2759 /* Now parcel up the 64k transfer, do at most cluster size */
2763 mobj_target_addr
= mobj_base_addr
;
2765 for (transfer_size
= list_size
; transfer_size
!= 0;) {
2766 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
2768 transfer_size
< cl_size
?
2769 transfer_size
: cl_size
, 0);
2771 if (actual_offset
== (vm_offset_t
) -1) {
2772 for(;transfer_size
!= 0;) {
2773 if(UPL_PAGE_PRESENT(pl
, page_index
)) {
2774 uc_upl_abort_range(upl
,
2777 UPL_ABORT_FREE_ON_EMPTY
);
2780 transfer_size
-=page_size
;
2781 upl_offset
+= vm_page_size
;
2787 cnt
= MIN(transfer_size
,
2788 CLMAP_NPGS(clmap
) * vm_page_size
);
2789 ps
= CLMAP_PS(clmap
);
2792 /* attempt to send entire cluster */
2796 /* do the biggest contiguous transfer of dirty */
2798 if (UPL_DIRTY_PAGE(pl
, page_index
) ||
2799 UPL_PRECIOUS_PAGE(pl
, page_index
)){
2801 subx_size
+= vm_page_size
;
2802 cnt
-= vm_page_size
;
2804 if (subx_size
== 0) {
2805 actual_offset
+= vm_page_size
;
2806 mobj_target_addr
+= vm_page_size
;
2808 if(UPL_PAGE_PRESENT(pl
, page_index
)) {
2809 uc_upl_commit_range(upl
,
2815 upl_offset
+= vm_page_size
;
2816 transfer_size
-= vm_page_size
;
2818 cnt
-= vm_page_size
;
2826 error
= ps_write_file(ps
, upl
, upl_offset
,
2827 actual_offset
, subx_size
, flags
);
2829 actual_offset
+= subx_size
;
2830 mobj_target_addr
+= subx_size
;
2831 upl_offset
+= subx_size
;
2832 transfer_size
-= subx_size
;
2834 for(;transfer_size
!= 0;) {
2835 if(UPL_PAGE_PRESENT(pl
, page_index
)) {
2836 uc_upl_abort_range(upl
,
2839 UPL_ABORT_FREE_ON_EMPTY
);
2842 transfer_size
-=page_size
;
2843 upl_offset
+= vm_page_size
;
2849 ps_vs_write_complete(vs
, mobj_target_addr
,
2852 actual_offset
+= subx_size
;
2853 mobj_target_addr
+= subx_size
;
2854 upl_offset
+= subx_size
;
2856 transfer_size
-= subx_size
;
2863 assert(cnt
<= (vm_page_size
<< vs
->vs_clshift
));
2867 /* The caller provides a mapped_data which is derived */
2868 /* from a temporary object. The targeted pages are */
2869 /* guaranteed to be set at offset 0 in the mapped_data */
2870 /* The actual offset however must still be derived */
2871 /* from the offset in the vs in question */
2872 mobj_base_addr
= offset
;
2873 mobj_target_addr
= mobj_base_addr
;
2875 for (transfer_size
= list_size
; transfer_size
!= 0;) {
2876 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
2878 transfer_size
< cl_size
?
2879 transfer_size
: cl_size
, 0);
2880 if(actual_offset
== (vm_offset_t
) -1) {
2884 cnt
= MIN(transfer_size
,
2885 CLMAP_NPGS(clmap
) * vm_page_size
);
2886 ps
= CLMAP_PS(clmap
);
2887 /* Assume that the caller has given us contiguous */
2890 error
= ps_write_file(ps
, internal_upl
,
2895 ps_vs_write_complete(vs
, mobj_target_addr
,
2900 actual_offset
+= cnt
;
2901 mobj_target_addr
+= cnt
;
2902 transfer_size
-= cnt
;
2910 return KERN_FAILURE
;
2912 return KERN_SUCCESS
;
2916 ps_vstruct_allocated_size(
2920 struct vs_map
*vsmap
;
2924 if (vs
->vs_indirect
) {
2925 /* loop on indirect maps */
2926 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
2927 vsmap
= vs
->vs_imap
[i
];
2930 /* loop on clusters in this indirect map */
2931 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
2932 if (VSM_ISCLR(vsmap
[j
]) ||
2933 VSM_ISERR(vsmap
[j
]))
2935 /* loop on pages in this cluster */
2936 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
2937 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
2943 vsmap
= vs
->vs_dmap
;
2946 /* loop on clusters in the direct map */
2947 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
2948 if (VSM_ISCLR(vsmap
[j
]) ||
2949 VSM_ISERR(vsmap
[j
]))
2951 /* loop on pages in this cluster */
2952 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
2953 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
2959 return ptoa(num_pages
);
2963 ps_vstruct_allocated_pages(
2965 default_pager_page_t
*pages
,
2969 struct vs_map
*vsmap
;
2975 if (vs
->vs_indirect
) {
2976 /* loop on indirect maps */
2977 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
2978 vsmap
= vs
->vs_imap
[i
];
2979 if (vsmap
== NULL
) {
2980 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
2984 /* loop on clusters in this indirect map */
2985 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
2986 if (VSM_ISCLR(vsmap
[j
]) ||
2987 VSM_ISERR(vsmap
[j
])) {
2988 offset
+= vm_page_size
* VSCLSIZE(vs
);
2991 /* loop on pages in this cluster */
2992 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
2993 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
2995 if (num_pages
< pages_size
)
2996 pages
++->dpp_offset
=
2999 offset
+= vm_page_size
;
3004 vsmap
= vs
->vs_dmap
;
3007 /* loop on clusters in the direct map */
3008 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3009 if (VSM_ISCLR(vsmap
[j
]) ||
3010 VSM_ISERR(vsmap
[j
])) {
3011 offset
+= vm_page_size
* VSCLSIZE(vs
);
3014 /* loop on pages in this cluster */
3015 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3016 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3018 if (num_pages
< pages_size
)
3019 pages
++->dpp_offset
= offset
;
3021 offset
+= vm_page_size
;
3031 ps_vstruct_transfer_from_segment(
3033 paging_segment_t segment
,
3034 #ifndef ubc_sync_working
3035 vm_object_t transfer_object
)
3040 struct vs_map
*vsmap
;
3041 struct vs_map old_vsmap
;
3042 struct vs_map new_vsmap
;
3045 VS_LOCK(vs
); /* block all work on this vstruct */
3046 /* can't allow the normal multiple write */
3047 /* semantic because writes may conflict */
3048 vs
->vs_xfer_pending
= TRUE
;
3049 vs_wait_for_sync_writers(vs
);
3051 vs_wait_for_readers(vs
);
3052 /* we will unlock the vs to allow other writes while transferring */
3053 /* and will be guaranteed of the persistance of the vs struct */
3054 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3055 /* vs_async_pending */
3056 /* OK we now have guaranteed no other parties are accessing this */
3057 /* vs. Now that we are also supporting simple lock versions of */
3058 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3059 /* our purpose in holding it before was the multiple write case */
3060 /* we now use the boolean xfer_pending to do that. We can use */
3061 /* a boolean instead of a count because we have guaranteed single */
3062 /* file access to this code in its caller */
3065 if (vs
->vs_indirect
) {
3068 /* loop on indirect maps */
3069 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3070 vsmap
= vs
->vs_imap
[i
];
3073 /* loop on clusters in this indirect map */
3074 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3076 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3077 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3079 vsmap_size
= CLMAP_ENTRIES
;
3080 for (j
= 0; j
< vsmap_size
; j
++) {
3081 if (VSM_ISCLR(vsmap
[j
]) ||
3082 VSM_ISERR(vsmap
[j
]) ||
3083 (VSM_PS(vsmap
[j
]) != segment
))
3085 if(vs_cluster_transfer(vs
,
3086 (vm_page_size
* (j
<< vs
->vs_clshift
))
3088 vm_page_size
<< vs
->vs_clshift
,
3089 #ifndef ubc_sync_working
3096 vs
->vs_xfer_pending
= FALSE
;
3098 vs_finish_write(vs
);
3099 return KERN_FAILURE
;
3101 /* allow other readers/writers during transfer*/
3103 vs
->vs_xfer_pending
= FALSE
;
3105 vs_finish_write(vs
);
3107 vs
->vs_xfer_pending
= TRUE
;
3109 vs_wait_for_sync_writers(vs
);
3111 vs_wait_for_readers(vs
);
3112 if (!(vs
->vs_indirect
)) {
3118 vsmap
= vs
->vs_dmap
;
3119 if (vsmap
== NULL
) {
3121 vs
->vs_xfer_pending
= FALSE
;
3123 vs_finish_write(vs
);
3124 return KERN_SUCCESS
;
3126 /* loop on clusters in the direct map */
3127 for (j
= 0; j
< vs
->vs_size
; j
++) {
3128 if (VSM_ISCLR(vsmap
[j
]) ||
3129 VSM_ISERR(vsmap
[j
]) ||
3130 (VSM_PS(vsmap
[j
]) != segment
))
3132 if(vs_cluster_transfer(vs
,
3133 vm_page_size
* (j
<< vs
->vs_clshift
),
3134 vm_page_size
<< vs
->vs_clshift
,
3135 #ifndef ubc_sync_working
3136 transfer_object
) != KERN_SUCCESS
) {
3138 upl
) != KERN_SUCCESS
) {
3141 vs
->vs_xfer_pending
= FALSE
;
3143 vs_finish_write(vs
);
3144 return KERN_FAILURE
;
3146 /* allow other readers/writers during transfer*/
3148 vs
->vs_xfer_pending
= FALSE
;
3150 vs_finish_write(vs
);
3152 vs
->vs_xfer_pending
= TRUE
;
3154 vs_wait_for_sync_writers(vs
);
3156 vs_wait_for_readers(vs
);
3157 if (vs
->vs_indirect
) {
3164 vs
->vs_xfer_pending
= FALSE
;
3166 vs_finish_write(vs
);
3167 return KERN_SUCCESS
;
3177 struct vs_map
*vsmap
;
3178 vm_offset_t cluster
;
3180 cluster
= atop(offset
) >> vs
->vs_clshift
;
3181 if (vs
->vs_indirect
) {
3182 long ind_block
= cluster
/CLMAP_ENTRIES
;
3184 /* Is the indirect block allocated? */
3185 vsmap
= vs
->vs_imap
[ind_block
];
3186 if(vsmap
== (vs_map_t
) NULL
)
3189 vsmap
= vs
->vs_dmap
;
3190 vsmap
+= cluster%CLMAP_ENTRIES
;
3195 vs_cluster_transfer(
3199 #ifndef ubc_sync_working
3200 vm_object_t transfer_object
)
3205 vm_offset_t actual_offset
;
3206 paging_segment_t ps
;
3208 kern_return_t error
= KERN_SUCCESS
;
3209 int size
, size_wanted
, i
;
3210 unsigned int residual
;
3212 default_pager_thread_t
*dpt
;
3214 struct vs_map
*vsmap_ptr
;
3215 struct vs_map read_vsmap
;
3216 struct vs_map original_read_vsmap
;
3217 struct vs_map write_vsmap
;
3219 #ifndef ubc_sync_working
3225 static char here
[] = "vs_cluster_transfer";
3227 /* vs_cluster_transfer reads in the pages of a cluster and
3228 * then writes these pages back to new backing store. The
3229 * segment the pages are being read from is assumed to have
3230 * been taken off-line and is no longer considered for new
3235 * This loop will be executed once per cluster referenced.
3236 * Typically this means once, since it's unlikely that the
3237 * VM system will ask for anything spanning cluster boundaries.
3239 * If there are holes in a cluster (in a paging segment), we stop
3240 * reading at the hole, then loop again, hoping to
3241 * find valid pages later in the cluster. This continues until
3242 * the entire range has been examined, and read, if present. The
3243 * pages are written as they are read. If a failure occurs after
3244 * some pages are written the unmap call at the bottom of the loop
3245 * recovers the backing store and the old backing store remains
3249 /* uc_upl_map(kernel_map, upl, &ioaddr); */
3251 VSM_CLR(write_vsmap
);
3252 VSM_CLR(original_read_vsmap
);
3253 /* grab the actual object's pages to sync with I/O */
3254 while (cnt
&& (error
== KERN_SUCCESS
)) {
3255 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3256 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3258 if (actual_offset
== (vm_offset_t
) -1) {
3261 * Nothing left to write in this cluster at least
3262 * set write cluster information for any previous
3263 * write, clear for next cluster, if there is one
3265 unsigned int local_size
, clmask
, clsize
;
3267 clsize
= vm_page_size
<< vs
->vs_clshift
;
3268 clmask
= clsize
- 1;
3269 local_size
= clsize
- (offset
& clmask
);
3271 local_size
= MIN(local_size
, cnt
);
3273 /* This cluster has no data in it beyond what may */
3274 /* have been found on a previous iteration through */
3275 /* the loop "write_vsmap" */
3276 *vsmap_ptr
= write_vsmap
;
3277 VSM_CLR(write_vsmap
);
3278 VSM_CLR(original_read_vsmap
);
3281 offset
+= local_size
;
3286 * Count up contiguous available or unavailable
3289 ps
= CLMAP_PS(clmap
);
3294 (size
< cnt
) && (unavail_size
< cnt
) &&
3295 (i
< CLMAP_NPGS(clmap
)); i
++) {
3296 if (CLMAP_ISSET(clmap
, i
)) {
3297 if (unavail_size
!= 0)
3299 size
+= vm_page_size
;
3301 ps
->ps_bs
->bs_pages_in
++);
3305 unavail_size
+= vm_page_size
;
3310 ASSERT(unavail_size
);
3311 cnt
-= unavail_size
;
3312 offset
+= unavail_size
;
3313 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3315 /* There is no more to transfer in this
3318 *vsmap_ptr
= write_vsmap
;
3319 VSM_CLR(write_vsmap
);
3320 VSM_CLR(original_read_vsmap
);
3325 if(VSM_ISCLR(original_read_vsmap
))
3326 original_read_vsmap
= *vsmap_ptr
;
3328 if(ps
->ps_segtype
== PS_PARTITION
) {
3330 NEED TO BE WITH SYNC & NO COMMIT
3331 error = ps_read_device(ps, actual_offset, &buffer,
3332 size, &residual, flags);
3335 #ifndef ubc_sync_working
3336 error
= vm_fault_list_request(transfer_object
,
3337 (vm_object_offset_t
) (actual_offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1)),
3339 0, UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
3340 | UPL_SET_INTERNAL
);
3341 if (error
== KERN_SUCCESS
) {
3342 error
= ps_read_file(ps
, upl
, actual_offset
,
3343 size
, &residual
, 0);
3345 uc_upl_commit(upl
, NULL
);
3349 /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/
3350 error
= ps_read_file(ps
, upl
, actual_offset
,
3352 (UPL_IOSYNC
| UPL_NOCOMMIT
| UPL_NORDAHEAD
));
3356 read_vsmap
= *vsmap_ptr
;
3360 * Adjust counts and put data in new BS. Optimize for the
3361 * common case, i.e. no error and/or partial data.
3362 * If there was an error, then we need to error the entire
3363 * range, even if some data was successfully read.
3366 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3368 * Got everything we asked for, supply the data to
3369 * the new BS. Note that as a side effect of supplying
3370 * the data, the buffer holding the supplied data is
3371 * deallocated from the pager's address space unless
3372 * the write is unsuccessful.
3375 /* note buffer will be cleaned up in all cases by */
3376 /* internal_cluster_write or if an error on write */
3377 /* the vm_map_copy_page_discard call */
3378 *vsmap_ptr
= write_vsmap
;
3380 #ifndef ubc_sync_working
3381 error
= vm_fault_list_request(transfer_object
,
3382 (vm_object_offset_t
) (actual_offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1)),
3384 0, UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
3385 | UPL_SET_INTERNAL
);
3386 if(vs_cluster_write(vs
, upl
, offset
,
3387 size
, TRUE
, 0) != KERN_SUCCESS
) {
3388 uc_upl_commit(upl
, NULL
);
3390 if(vs_cluster_write(vs
, upl
, offset
,
3391 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3393 error
= KERN_FAILURE
;
3394 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3395 /* unmap the new backing store object */
3396 ps_clunmap(vs
, offset
, size
);
3398 /* original vsmap */
3399 *vsmap_ptr
= original_read_vsmap
;
3400 VSM_CLR(write_vsmap
);
3402 if((offset
+ size
) &
3403 ((vm_page_size
<< vs
->vs_clshift
)
3405 /* There is more to transfer in this
3408 write_vsmap
= *vsmap_ptr
;
3409 *vsmap_ptr
= read_vsmap
;
3411 /* discard the old backing object */
3412 write_vsmap
= *vsmap_ptr
;
3413 *vsmap_ptr
= read_vsmap
;
3414 ps_clunmap(vs
, offset
, size
);
3415 *vsmap_ptr
= write_vsmap
;
3416 VSM_CLR(write_vsmap
);
3417 VSM_CLR(original_read_vsmap
);
3422 if (error
== KERN_SUCCESS
) {
3423 if (residual
== size
) {
3425 * If a read operation returns no error
3426 * and no data moved, we turn it into
3427 * an error, assuming we're reading at
3429 * Fall through and error the entire
3432 error
= KERN_FAILURE
;
3433 *vsmap_ptr
= write_vsmap
;
3434 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3435 /* unmap the new backing store object */
3436 ps_clunmap(vs
, offset
, size
);
3438 *vsmap_ptr
= original_read_vsmap
;
3439 VSM_CLR(write_vsmap
);
3443 * Otherwise, we have partial read.
3444 * This is also considered an error
3445 * for the purposes of cluster transfer
3447 error
= KERN_FAILURE
;
3448 *vsmap_ptr
= write_vsmap
;
3449 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3450 /* unmap the new backing store object */
3451 ps_clunmap(vs
, offset
, size
);
3453 *vsmap_ptr
= original_read_vsmap
;
3454 VSM_CLR(write_vsmap
);
3463 } /* END while (cnt && (error == 0)) */
3464 if(!VSM_ISCLR(write_vsmap
))
3465 *vsmap_ptr
= write_vsmap
;
3467 /* uc_upl_un_map(kernel_map, upl); */
3472 default_pager_add_file(MACH_PORT_FACE backing_store
,
3478 paging_segment_t ps
;
3481 static char here
[] = "default_pager_add_file";
3483 if ((bs
= backing_store_lookup(backing_store
))
3484 == BACKING_STORE_NULL
)
3485 return KERN_INVALID_ARGUMENT
;
3488 for (i
= 0; i
<= paging_segment_max
; i
++) {
3489 ps
= paging_segments
[i
];
3490 if (ps
== PAGING_SEGMENT_NULL
)
3492 if (ps
->ps_segtype
!= PS_FILE
)
3496 * Check for overlap on same device.
3498 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3501 return KERN_INVALID_ARGUMENT
;
3507 * Set up the paging segment
3509 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3510 if (ps
== PAGING_SEGMENT_NULL
) {
3512 return KERN_RESOURCE_SHORTAGE
;
3515 ps
->ps_segtype
= PS_FILE
;
3516 ps
->ps_vnode
= (struct vnode
*)vp
;
3518 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3519 ps
->ps_recnum
= size
;
3520 ps
->ps_pgnum
= size
>> ps
->ps_record_shift
;
3522 ps
->ps_pgcount
= ps
->ps_pgnum
;
3523 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3524 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3528 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3530 kfree((vm_offset_t
)ps
, sizeof *ps
);
3532 return KERN_RESOURCE_SHORTAGE
;
3534 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
3535 clrbit(ps
->ps_bmap
, i
);
3538 ps
->ps_going_away
= FALSE
;
3541 if ((error
= ps_enter(ps
)) != 0) {
3542 kfree((vm_offset_t
)ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
3543 kfree((vm_offset_t
)ps
, sizeof *ps
);
3545 return KERN_RESOURCE_SHORTAGE
;
3548 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3549 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3551 dp_pages_free
+= ps
->ps_pgcount
;
3556 bs_more_space(ps
->ps_clcount
);
3558 DEBUG(DEBUG_BS_INTERNAL
,
3559 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3560 device
, offset
, size
, record_size
,
3561 ps
->ps_record_shift
, ps
->ps_pgnum
));
3563 return KERN_SUCCESS
;
3568 kern_return_t
ps_read_file(paging_segment_t
, upl_t
, vm_offset_t
, unsigned int, unsigned int *, int); /* forward */
3572 paging_segment_t ps
,
3576 unsigned int *residualp
,
3579 vm_object_offset_t f_offset
;
3582 static char here
[] = "ps_read_file";
3585 clustered_reads
[atop(size
)]++;
3587 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3589 /* for transfer case we need to pass uploffset and flags */
3590 error
= vnode_pagein(ps
->ps_vnode
,
3591 upl
, (vm_offset_t
)0, f_offset
, (vm_size_t
)size
, flags
, NULL
);
3593 /* The vnode_pagein semantic is somewhat at odds with the existing */
3594 /* device_read semantic. Partial reads are not experienced at this */
3595 /* level. It is up to the bit map code and cluster read code to */
3596 /* check that requested data locations are actually backed, and the */
3597 /* pagein code to either read all of the requested data or return an */
3601 result
= KERN_FAILURE
;
3604 result
= KERN_SUCCESS
;
3612 paging_segment_t ps
,
3614 vm_offset_t upl_offset
,
3619 vm_object_offset_t f_offset
;
3620 kern_return_t result
;
3621 static char here
[] = "ps_write_file";
3625 clustered_writes
[atop(size
)]++;
3626 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3628 if (vnode_pageout(ps
->ps_vnode
,
3629 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
, NULL
))
3630 result
= KERN_FAILURE
;
3632 result
= KERN_SUCCESS
;
3638 default_pager_triggers(MACH_PORT_FACE default_pager
,
3642 MACH_PORT_FACE trigger_port
)
3645 if(flags
& HI_WAT_ALERT
) {
3646 if(min_pages_trigger_port
)
3647 ipc_port_release_send(min_pages_trigger_port
);
3648 min_pages_trigger_port
= trigger_port
;
3649 minimum_pages_remaining
= hi_wat
/vm_page_size
;
3652 if(flags
& LO_WAT_ALERT
) {
3653 if(max_pages_trigger_port
)
3654 ipc_port_release_send(max_pages_trigger_port
);
3655 max_pages_trigger_port
= trigger_port
;
3656 maximum_pages_free
= lo_wat
/vm_page_size
;