2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
40 * Carnegie Mellon requests users of this software to return to
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
53 * Paging File Management.
56 #include <mach/host_priv.h>
57 #include <mach/memory_object_control.h>
58 #include <mach/memory_object_server.h>
60 #include <default_pager/default_pager_internal.h>
61 #include <default_pager/default_pager_alerts.h>
62 #include <default_pager/default_pager_object_server.h>
64 #include <ipc/ipc_types.h>
65 #include <ipc/ipc_port.h>
66 #include <ipc/ipc_space.h>
68 #include <kern/kern_types.h>
69 #include <kern/host.h>
70 #include <kern/queue.h>
71 #include <kern/counters.h>
72 #include <kern/sched_prim.h>
74 #include <vm/vm_kern.h>
75 #include <vm/vm_pageout.h>
76 #include <vm/vm_map.h>
77 #include <vm/vm_object.h>
78 #include <vm/vm_protos.h>
80 /* LP64todo - need large internal object support */
83 * ALLOC_STRIDE... the maximum number of bytes allocated from
84 * a swap file before moving on to the next swap file... if
85 * all swap files reside on a single disk, this value should
86 * be very large (this is the default assumption)... if the
87 * swap files are spread across multiple disks, than this value
88 * should be small (128 * 1024)...
90 * This should be determined dynamically in the future
93 #define ALLOC_STRIDE (1024 * 1024 * 1024)
94 int physical_transfer_cluster_count
= 0;
96 #define VM_SUPER_CLUSTER 0x40000
97 #define VM_SUPER_PAGES 64
100 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
101 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
103 #define VSTRUCT_DEF_CLSHIFT 2
104 int vstruct_def_clshift
= VSTRUCT_DEF_CLSHIFT
;
105 int default_pager_clsize
= 0;
108 unsigned int clustered_writes
[VM_SUPER_PAGES
+1];
109 unsigned int clustered_reads
[VM_SUPER_PAGES
+1];
112 * Globals used for asynchronous paging operations:
113 * vs_async_list: head of list of to-be-completed I/O ops
114 * async_num_queued: number of pages completed, but not yet
115 * processed by async thread.
116 * async_requests_out: number of pages of requests not completed.
120 struct vs_async
*vs_async_list
;
121 int async_num_queued
;
122 int async_requests_out
;
126 #define VS_ASYNC_REUSE 1
127 struct vs_async
*vs_async_free_list
;
129 mutex_t default_pager_async_lock
; /* Protects globals above */
132 int vs_alloc_async_failed
= 0; /* statistics */
133 int vs_alloc_async_count
= 0; /* statistics */
134 struct vs_async
*vs_alloc_async(void); /* forward */
135 void vs_free_async(struct vs_async
*vsa
); /* forward */
138 #define VS_ALLOC_ASYNC() vs_alloc_async()
139 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
141 #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
142 #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
143 #define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0)
144 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
146 * Paging Space Hysteresis triggers and the target notification port
150 unsigned int minimum_pages_remaining
= 0;
151 unsigned int maximum_pages_free
= 0;
152 ipc_port_t min_pages_trigger_port
= NULL
;
153 ipc_port_t max_pages_trigger_port
= NULL
;
155 boolean_t bs_low
= FALSE
;
156 int backing_store_release_trigger_disable
= 0;
159 /* Have we decided if swap needs to be encrypted yet ? */
160 boolean_t dp_encryption_inited
= FALSE
;
161 /* Should we encrypt swap ? */
162 boolean_t dp_encryption
= FALSE
;
166 * Object sizes are rounded up to the next power of 2,
167 * unless they are bigger than a given maximum size.
169 vm_size_t max_doubled_size
= 4 * 1024 * 1024; /* 4 meg */
172 * List of all backing store and segments.
174 struct backing_store_list_head backing_store_list
;
175 paging_segment_t paging_segments
[MAX_NUM_PAGING_SEGMENTS
];
176 mutex_t paging_segments_lock
;
177 int paging_segment_max
= 0;
178 int paging_segment_count
= 0;
179 int ps_select_array
[BS_MAXPRI
+1] = { -1,-1,-1,-1,-1 };
183 * Total pages free in system
184 * This differs from clusters committed/avail which is a measure of the
185 * over commitment of paging segments to backing store. An idea which is
186 * likely to be deprecated.
188 unsigned int dp_pages_free
= 0;
189 unsigned int cluster_transfer_minimum
= 100;
191 /* forward declarations */
192 kern_return_t
ps_write_file(paging_segment_t
, upl_t
, upl_offset_t
, vm_offset_t
, unsigned int, int); /* forward */
193 kern_return_t
ps_read_file (paging_segment_t
, upl_t
, upl_offset_t
, vm_offset_t
, unsigned int, unsigned int *, int); /* forward */
194 default_pager_thread_t
*get_read_buffer( void );
195 kern_return_t
ps_vstruct_transfer_from_segment(
197 paging_segment_t segment
,
199 kern_return_t
ps_read_device(paging_segment_t
, vm_offset_t
, vm_offset_t
*, unsigned int, unsigned int *, int); /* forward */
200 kern_return_t
ps_write_device(paging_segment_t
, vm_offset_t
, vm_offset_t
, unsigned int, struct vs_async
*); /* forward */
201 kern_return_t
vs_cluster_transfer(
206 vs_map_t
vs_get_map_entry(
211 default_pager_thread_t
*
212 get_read_buffer( void )
218 for (i
=0; i
<default_pager_internal_count
; i
++) {
219 if(dpt_array
[i
]->checked_out
== FALSE
) {
220 dpt_array
[i
]->checked_out
= TRUE
;
221 DPT_UNLOCK(dpt_lock
);
225 DPT_SLEEP(dpt_lock
, &dpt_array
, THREAD_UNINT
);
235 * List of all backing store.
238 queue_init(&backing_store_list
.bsl_queue
);
241 VS_ASYNC_LOCK_INIT();
243 vs_async_free_list
= NULL
;
244 #endif /* VS_ASYNC_REUSE */
246 for (i
= 0; i
< VM_SUPER_PAGES
+ 1; i
++) {
247 clustered_writes
[i
] = 0;
248 clustered_reads
[i
] = 0;
254 * When things do not quite workout...
256 void bs_no_paging_space(boolean_t
); /* forward */
260 boolean_t out_of_memory
)
264 dprintf(("*** OUT OF MEMORY ***\n"));
265 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
268 void bs_more_space(int); /* forward */
269 void bs_commit(int); /* forward */
271 boolean_t user_warned
= FALSE
;
272 unsigned int clusters_committed
= 0;
273 unsigned int clusters_available
= 0;
274 unsigned int clusters_committed_peak
= 0;
282 * Account for new paging space.
284 clusters_available
+= nclusters
;
286 if (clusters_available
>= clusters_committed
) {
287 if (verbose
&& user_warned
) {
288 printf("%s%s - %d excess clusters now.\n",
290 "paging space is OK now",
291 clusters_available
- clusters_committed
);
293 clusters_committed_peak
= 0;
296 if (verbose
&& user_warned
) {
297 printf("%s%s - still short of %d clusters.\n",
299 "WARNING: paging space over-committed",
300 clusters_committed
- clusters_available
);
301 clusters_committed_peak
-= nclusters
;
314 clusters_committed
+= nclusters
;
315 if (clusters_committed
> clusters_available
) {
316 if (verbose
&& !user_warned
) {
318 printf("%s%s - short of %d clusters.\n",
320 "WARNING: paging space over-committed",
321 clusters_committed
- clusters_available
);
323 if (clusters_committed
> clusters_committed_peak
) {
324 clusters_committed_peak
= clusters_committed
;
327 if (verbose
&& user_warned
) {
328 printf("%s%s - was short of up to %d clusters.\n",
330 "paging space is OK now",
331 clusters_committed_peak
- clusters_available
);
333 clusters_committed_peak
= 0;
341 int default_pager_info_verbose
= 1;
348 vm_size_t pages_total
, pages_free
;
353 pages_total
= pages_free
= 0;
354 for (i
= 0; i
<= paging_segment_max
; i
++) {
355 ps
= paging_segments
[i
];
356 if (ps
== PAGING_SEGMENT_NULL
)
360 * no need to lock: by the time this data
361 * gets back to any remote requestor it
362 * will be obsolete anyways
364 pages_total
+= ps
->ps_pgnum
;
365 pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
366 DP_DEBUG(DEBUG_BS_INTERNAL
,
367 ("segment #%d: %d total, %d free\n",
368 i
, ps
->ps_pgnum
, ps
->ps_clcount
<< ps
->ps_clshift
));
370 *totalp
= pages_total
;
372 if (verbose
&& user_warned
&& default_pager_info_verbose
) {
373 if (clusters_available
< clusters_committed
) {
374 printf("%s %d clusters committed, %d available.\n",
383 backing_store_t
backing_store_alloc(void); /* forward */
386 backing_store_alloc(void)
390 bs
= (backing_store_t
) kalloc(sizeof (struct backing_store
));
391 if (bs
== BACKING_STORE_NULL
)
392 panic("backing_store_alloc: no memory");
395 bs
->bs_port
= MACH_PORT_NULL
;
398 bs
->bs_pages_total
= 0;
400 bs
->bs_pages_in_fail
= 0;
401 bs
->bs_pages_out
= 0;
402 bs
->bs_pages_out_fail
= 0;
407 backing_store_t
backing_store_lookup(MACH_PORT_FACE
); /* forward */
409 /* Even in both the component space and external versions of this pager, */
410 /* backing_store_lookup will be called from tasks in the application space */
412 backing_store_lookup(
418 port is currently backed with a vs structure in the alias field
419 we could create an ISBS alias and a port_is_bs call but frankly
420 I see no reason for the test, the bs->port == port check below
421 will work properly on junk entries.
423 if ((port == MACH_PORT_NULL) || port_is_vs(port))
425 if ((port
== MACH_PORT_NULL
))
426 return BACKING_STORE_NULL
;
429 queue_iterate(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
432 if (bs
->bs_port
== port
) {
434 /* Success, return it locked. */
440 return BACKING_STORE_NULL
;
443 void backing_store_add(backing_store_t
); /* forward */
447 __unused backing_store_t bs
)
449 // MACH_PORT_FACE port = bs->bs_port;
450 // MACH_PORT_FACE pset = default_pager_default_set;
451 kern_return_t kr
= KERN_SUCCESS
;
453 if (kr
!= KERN_SUCCESS
)
454 panic("backing_store_add: add to set");
459 * Set up default page shift, but only if not already
460 * set and argument is within range.
463 bs_set_default_clsize(unsigned int npages
)
470 if (default_pager_clsize
== 0) /* if not yet set */
471 vstruct_def_clshift
= local_log2(npages
);
477 int bs_get_global_clsize(int clsize
); /* forward */
480 bs_get_global_clsize(
484 memory_object_default_t dmm
;
488 * Only allow setting of cluster size once. If called
489 * with no cluster size (default), we use the compiled-in default
490 * for the duration. The same cluster size is used for all
493 if (default_pager_clsize
== 0) {
495 * Keep cluster size in bit shift because it's quicker
496 * arithmetic, and easier to keep at a power of 2.
498 if (clsize
!= NO_CLSIZE
) {
499 for (i
= 0; (1 << i
) < clsize
; i
++);
500 if (i
> MAX_CLUSTER_SHIFT
)
501 i
= MAX_CLUSTER_SHIFT
;
502 vstruct_def_clshift
= i
;
504 default_pager_clsize
= (1 << vstruct_def_clshift
);
507 * Let the user know the new (and definitive) cluster size.
510 printf("%scluster size = %d page%s\n",
511 my_name
, default_pager_clsize
,
512 (default_pager_clsize
== 1) ? "" : "s");
515 * Let the kernel know too, in case it hasn't used the
516 * default value provided in main() yet.
518 dmm
= default_pager_object
;
519 clsize
= default_pager_clsize
* vm_page_size
; /* in bytes */
520 kr
= host_default_memory_manager(host_priv_self(),
523 memory_object_default_deallocate(dmm
);
525 if (kr
!= KERN_SUCCESS
) {
526 panic("bs_get_global_cl_size:host_default_memory_manager");
528 if (dmm
!= default_pager_object
) {
529 panic("bs_get_global_cl_size:there is another default pager");
532 ASSERT(default_pager_clsize
> 0 &&
533 (default_pager_clsize
& (default_pager_clsize
- 1)) == 0);
535 return default_pager_clsize
;
539 default_pager_backing_store_create(
540 memory_object_default_t pager
,
542 int clsize
, /* in bytes */
543 MACH_PORT_FACE
*backing_store
)
548 struct vstruct_alias
*alias_struct
;
550 if (pager
!= default_pager_object
)
551 return KERN_INVALID_ARGUMENT
;
553 bs
= backing_store_alloc();
554 port
= ipc_port_alloc_kernel();
555 ipc_port_make_send(port
);
556 assert (port
!= IP_NULL
);
558 DP_DEBUG(DEBUG_BS_EXTERNAL
,
559 ("priority=%d clsize=%d bs_port=0x%x\n",
560 priority
, clsize
, (int) backing_store
));
562 alias_struct
= (struct vstruct_alias
*)
563 kalloc(sizeof (struct vstruct_alias
));
564 if(alias_struct
!= NULL
) {
565 alias_struct
->vs
= (struct vstruct
*)bs
;
566 alias_struct
->name
= &default_pager_ops
;
567 port
->alias
= (int) alias_struct
;
570 ipc_port_dealloc_kernel((MACH_PORT_FACE
)(port
));
571 kfree(bs
, sizeof (struct backing_store
));
572 return KERN_RESOURCE_SHORTAGE
;
576 if (priority
== DEFAULT_PAGER_BACKING_STORE_MAXPRI
)
577 priority
= BS_MAXPRI
;
578 else if (priority
== BS_NOPRI
)
579 priority
= BS_MAXPRI
;
581 priority
= BS_MINPRI
;
582 bs
->bs_priority
= priority
;
584 bs
->bs_clsize
= bs_get_global_clsize(atop_32(clsize
));
587 queue_enter(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
591 backing_store_add(bs
);
593 *backing_store
= port
;
598 default_pager_backing_store_info(
599 MACH_PORT_FACE backing_store
,
600 backing_store_flavor_t flavour
,
601 backing_store_info_t info
,
602 mach_msg_type_number_t
*size
)
605 backing_store_basic_info_t basic
;
609 if (flavour
!= BACKING_STORE_BASIC_INFO
||
610 *size
< BACKING_STORE_BASIC_INFO_COUNT
)
611 return KERN_INVALID_ARGUMENT
;
613 basic
= (backing_store_basic_info_t
)info
;
614 *size
= BACKING_STORE_BASIC_INFO_COUNT
;
616 VSTATS_LOCK(&global_stats
.gs_lock
);
617 basic
->pageout_calls
= global_stats
.gs_pageout_calls
;
618 basic
->pagein_calls
= global_stats
.gs_pagein_calls
;
619 basic
->pages_in
= global_stats
.gs_pages_in
;
620 basic
->pages_out
= global_stats
.gs_pages_out
;
621 basic
->pages_unavail
= global_stats
.gs_pages_unavail
;
622 basic
->pages_init
= global_stats
.gs_pages_init
;
623 basic
->pages_init_writes
= global_stats
.gs_pages_init_writes
;
624 VSTATS_UNLOCK(&global_stats
.gs_lock
);
626 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
627 return KERN_INVALID_ARGUMENT
;
629 basic
->bs_pages_total
= bs
->bs_pages_total
;
631 bs
->bs_pages_free
= 0;
632 for (i
= 0; i
<= paging_segment_max
; i
++) {
633 ps
= paging_segments
[i
];
634 if (ps
!= PAGING_SEGMENT_NULL
&& ps
->ps_bs
== bs
) {
636 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
641 basic
->bs_pages_free
= bs
->bs_pages_free
;
642 basic
->bs_pages_in
= bs
->bs_pages_in
;
643 basic
->bs_pages_in_fail
= bs
->bs_pages_in_fail
;
644 basic
->bs_pages_out
= bs
->bs_pages_out
;
645 basic
->bs_pages_out_fail
= bs
->bs_pages_out_fail
;
647 basic
->bs_priority
= bs
->bs_priority
;
648 basic
->bs_clsize
= ptoa_32(bs
->bs_clsize
); /* in bytes */
655 int ps_delete(paging_segment_t
); /* forward */
662 kern_return_t error
= KERN_SUCCESS
;
665 VSL_LOCK(); /* get the lock on the list of vs's */
667 /* The lock relationship and sequence is farily complicated */
668 /* this code looks at a live list, locking and unlocking the list */
669 /* as it traverses it. It depends on the locking behavior of */
670 /* default_pager_no_senders. no_senders always locks the vstruct */
671 /* targeted for removal before locking the vstruct list. However */
672 /* it will remove that member of the list without locking its */
673 /* neighbors. We can be sure when we hold a lock on a vstruct */
674 /* it cannot be removed from the list but we must hold the list */
675 /* lock to be sure that its pointers to its neighbors are valid. */
676 /* Also, we can hold off destruction of a vstruct when the list */
677 /* lock and the vs locks are not being held by bumping the */
678 /* vs_async_pending count. */
681 while(backing_store_release_trigger_disable
!= 0) {
682 VSL_SLEEP(&backing_store_release_trigger_disable
, THREAD_UNINT
);
685 /* we will choose instead to hold a send right */
686 vs_count
= vstruct_list
.vsl_count
;
687 vs
= (vstruct_t
) queue_first((queue_entry_t
)&(vstruct_list
.vsl_queue
));
688 if(vs
== (vstruct_t
)&vstruct_list
) {
693 vs_async_wait(vs
); /* wait for any pending async writes */
694 if ((vs_count
!= 0) && (vs
!= NULL
))
695 vs
->vs_async_pending
+= 1; /* hold parties calling */
699 while((vs_count
!= 0) && (vs
!= NULL
)) {
700 /* We take the count of AMO's before beginning the */
701 /* transfer of of the target segment. */
702 /* We are guaranteed that the target segment cannot get */
703 /* more users. We also know that queue entries are */
704 /* made at the back of the list. If some of the entries */
705 /* we would check disappear while we are traversing the */
706 /* list then we will either check new entries which */
707 /* do not have any backing store in the target segment */
708 /* or re-check old entries. This might not be optimal */
709 /* but it will always be correct. The alternative is to */
710 /* take a snapshot of the list. */
713 if(dp_pages_free
< cluster_transfer_minimum
)
714 error
= KERN_FAILURE
;
716 vm_object_t transfer_object
;
720 transfer_object
= vm_object_allocate((vm_object_size_t
)VM_SUPER_CLUSTER
);
722 error
= vm_object_upl_request(transfer_object
,
723 (vm_object_offset_t
)0, VM_SUPER_CLUSTER
,
725 UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
727 if(error
== KERN_SUCCESS
) {
728 error
= ps_vstruct_transfer_from_segment(
730 upl_commit(upl
, NULL
, 0);
733 error
= KERN_FAILURE
;
735 vm_object_deallocate(transfer_object
);
739 vs
->vs_async_pending
-= 1; /* release vs_async_wait */
740 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
741 vs
->vs_waiting_async
= FALSE
;
743 thread_wakeup(&vs
->vs_async_pending
);
752 while(backing_store_release_trigger_disable
!= 0) {
753 VSL_SLEEP(&backing_store_release_trigger_disable
,
757 next_vs
= (vstruct_t
) queue_next(&(vs
->vs_links
));
758 if((next_vs
!= (vstruct_t
)&vstruct_list
) &&
759 (vs
!= next_vs
) && (vs_count
!= 1)) {
761 vs_async_wait(next_vs
); /* wait for any */
762 /* pending async writes */
763 next_vs
->vs_async_pending
+= 1; /* hold parties */
764 /* calling vs_async_wait */
769 vs
->vs_async_pending
-= 1;
770 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
771 vs
->vs_waiting_async
= FALSE
;
773 thread_wakeup(&vs
->vs_async_pending
);
777 if((vs
== next_vs
) || (next_vs
== (vstruct_t
)&vstruct_list
))
788 default_pager_backing_store_delete(
789 MACH_PORT_FACE backing_store
)
795 int interim_pages_removed
= 0;
798 if ((bs
= backing_store_lookup(backing_store
)) == BACKING_STORE_NULL
)
799 return KERN_INVALID_ARGUMENT
;
802 /* not implemented */
809 error
= KERN_SUCCESS
;
810 for (i
= 0; i
<= paging_segment_max
; i
++) {
811 ps
= paging_segments
[i
];
812 if (ps
!= PAGING_SEGMENT_NULL
&&
814 ! ps
->ps_going_away
) {
816 /* disable access to this segment */
817 ps
->ps_going_away
= TRUE
;
820 * The "ps" segment is "off-line" now,
821 * we can try and delete it...
823 if(dp_pages_free
< (cluster_transfer_minimum
825 error
= KERN_FAILURE
;
829 /* remove all pages associated with the */
830 /* segment from the list of free pages */
831 /* when transfer is through, all target */
832 /* segment pages will appear to be free */
834 dp_pages_free
-= ps
->ps_pgcount
;
835 interim_pages_removed
+= ps
->ps_pgcount
;
837 error
= ps_delete(ps
);
839 if (error
!= KERN_SUCCESS
) {
841 * We couldn't delete the segment,
842 * probably because there's not enough
843 * virtual memory left.
844 * Re-enable all the segments.
853 if (error
!= KERN_SUCCESS
) {
854 for (i
= 0; i
<= paging_segment_max
; i
++) {
855 ps
= paging_segments
[i
];
856 if (ps
!= PAGING_SEGMENT_NULL
&&
860 /* re-enable access to this segment */
861 ps
->ps_going_away
= FALSE
;
865 dp_pages_free
+= interim_pages_removed
;
871 for (i
= 0; i
<= paging_segment_max
; i
++) {
872 ps
= paging_segments
[i
];
873 if (ps
!= PAGING_SEGMENT_NULL
&&
875 if(ps
->ps_going_away
) {
876 paging_segments
[i
] = PAGING_SEGMENT_NULL
;
877 paging_segment_count
--;
879 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
880 kfree(ps
, sizeof *ps
);
885 /* Scan the entire ps array separately to make certain we find the */
886 /* proper paging_segment_max */
887 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
888 if(paging_segments
[i
] != PAGING_SEGMENT_NULL
)
889 paging_segment_max
= i
;
895 * All the segments have been deleted.
896 * We can remove the backing store.
900 * Disable lookups of this backing store.
902 if((void *)bs
->bs_port
->alias
!= NULL
)
903 kfree((void *) bs
->bs_port
->alias
,
904 sizeof (struct vstruct_alias
));
905 ipc_port_dealloc_kernel((ipc_port_t
) (bs
->bs_port
));
906 bs
->bs_port
= MACH_PORT_NULL
;
910 * Remove backing store from backing_store list.
913 queue_remove(&backing_store_list
.bsl_queue
, bs
, backing_store_t
,
918 * Free the backing store structure.
920 kfree(bs
, sizeof *bs
);
925 int ps_enter(paging_segment_t
); /* forward */
935 for (i
= 0; i
< MAX_NUM_PAGING_SEGMENTS
; i
++) {
936 if (paging_segments
[i
] == PAGING_SEGMENT_NULL
)
940 if (i
< MAX_NUM_PAGING_SEGMENTS
) {
941 paging_segments
[i
] = ps
;
942 if (i
> paging_segment_max
)
943 paging_segment_max
= i
;
944 paging_segment_count
++;
945 if ((ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_NOPRI
) ||
946 (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
))
947 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
951 return KERN_RESOURCE_SHORTAGE
;
960 default_pager_add_segment(
961 MACH_PORT_FACE backing_store
,
962 MACH_PORT_FACE device
,
972 if ((bs
= backing_store_lookup(backing_store
))
973 == BACKING_STORE_NULL
)
974 return KERN_INVALID_ARGUMENT
;
977 for (i
= 0; i
<= paging_segment_max
; i
++) {
978 ps
= paging_segments
[i
];
979 if (ps
== PAGING_SEGMENT_NULL
)
983 * Check for overlap on same device.
985 if (!(ps
->ps_device
!= device
986 || offset
>= ps
->ps_offset
+ ps
->ps_recnum
987 || offset
+ count
<= ps
->ps_offset
)) {
990 return KERN_INVALID_ARGUMENT
;
996 * Set up the paging segment
998 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
999 if (ps
== PAGING_SEGMENT_NULL
) {
1001 return KERN_RESOURCE_SHORTAGE
;
1004 ps
->ps_segtype
= PS_PARTITION
;
1005 ps
->ps_device
= device
;
1006 ps
->ps_offset
= offset
;
1007 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
1008 ps
->ps_recnum
= count
;
1009 ps
->ps_pgnum
= count
>> ps
->ps_record_shift
;
1011 ps
->ps_pgcount
= ps
->ps_pgnum
;
1012 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
1013 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
1017 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
1019 kfree(ps
, sizeof *ps
);
1021 return KERN_RESOURCE_SHORTAGE
;
1023 for (i
= 0; i
< ps
->ps_ncls
; i
++) {
1024 clrbit(ps
->ps_bmap
, i
);
1027 ps
->ps_going_away
= FALSE
;
1030 if ((error
= ps_enter(ps
)) != 0) {
1031 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
1032 kfree(ps
, sizeof *ps
);
1034 return KERN_RESOURCE_SHORTAGE
;
1037 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1038 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
1042 dp_pages_free
+= ps
->ps_pgcount
;
1045 bs_more_space(ps
->ps_clcount
);
1047 DP_DEBUG(DEBUG_BS_INTERNAL
,
1048 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1049 device
, offset
, count
, record_size
,
1050 ps
->ps_record_shift
, ps
->ps_pgnum
));
1052 return KERN_SUCCESS
;
1058 MACH_PORT_FACE master
)
1060 security_token_t null_security_token
= {
1063 MACH_PORT_FACE device
;
1064 int info
[DEV_GET_SIZE_COUNT
];
1065 mach_msg_type_number_t info_count
;
1066 MACH_PORT_FACE bs
= MACH_PORT_NULL
;
1067 unsigned int rec_size
;
1070 MACH_PORT_FACE reply_port
;
1072 if (ds_device_open_sync(master
, MACH_PORT_NULL
, D_READ
| D_WRITE
,
1073 null_security_token
, dev_name
, &device
))
1076 info_count
= DEV_GET_SIZE_COUNT
;
1077 if (!ds_device_get_status(device
, DEV_GET_SIZE
, info
, &info_count
)) {
1078 rec_size
= info
[DEV_GET_SIZE_RECORD_SIZE
];
1079 count
= info
[DEV_GET_SIZE_DEVICE_SIZE
] / rec_size
;
1080 clsize
= bs_get_global_clsize(0);
1081 if (!default_pager_backing_store_create(
1082 default_pager_object
,
1083 DEFAULT_PAGER_BACKING_STORE_MAXPRI
,
1084 (clsize
* vm_page_size
),
1086 if (!default_pager_add_segment(bs
, device
,
1087 0, count
, rec_size
)) {
1090 ipc_port_release_receive(bs
);
1094 ipc_port_release_send(device
);
1097 #endif /* DEVICE_PAGING */
1102 vs_alloc_async(void)
1104 struct vs_async
*vsa
;
1105 MACH_PORT_FACE reply_port
;
1106 // kern_return_t kr;
1109 if (vs_async_free_list
== NULL
) {
1111 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1114 * Try allocating a reply port named after the
1115 * address of the vs_async structure.
1117 struct vstruct_alias
*alias_struct
;
1119 reply_port
= ipc_port_alloc_kernel();
1120 alias_struct
= (struct vstruct_alias
*)
1121 kalloc(sizeof (struct vstruct_alias
));
1122 if(alias_struct
!= NULL
) {
1123 alias_struct
->vs
= (struct vstruct
*)vsa
;
1124 alias_struct
->name
= &default_pager_ops
;
1125 reply_port
->alias
= (int) alias_struct
;
1126 vsa
->reply_port
= reply_port
;
1127 vs_alloc_async_count
++;
1130 vs_alloc_async_failed
++;
1131 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1133 kfree(vsa
, sizeof (struct vs_async
));
1138 vsa
= vs_async_free_list
;
1139 vs_async_free_list
= vs_async_free_list
->vsa_next
;
1148 struct vs_async
*vsa
)
1151 vsa
->vsa_next
= vs_async_free_list
;
1152 vs_async_free_list
= vsa
;
1156 #else /* VS_ASYNC_REUSE */
1159 vs_alloc_async(void)
1161 struct vs_async
*vsa
;
1162 MACH_PORT_FACE reply_port
;
1165 vsa
= (struct vs_async
*) kalloc(sizeof (struct vs_async
));
1168 * Try allocating a reply port named after the
1169 * address of the vs_async structure.
1171 reply_port
= ipc_port_alloc_kernel();
1172 alias_struct
= (vstruct_alias
*)
1173 kalloc(sizeof (struct vstruct_alias
));
1174 if(alias_struct
!= NULL
) {
1175 alias_struct
->vs
= reply_port
;
1176 alias_struct
->name
= &default_pager_ops
;
1177 reply_port
->alias
= (int) vsa
;
1178 vsa
->reply_port
= reply_port
;
1179 vs_alloc_async_count
++;
1182 vs_alloc_async_failed
++;
1183 ipc_port_dealloc_kernel((MACH_PORT_FACE
)
1185 kfree(vsa
, sizeof (struct vs_async
));
1195 struct vs_async
*vsa
)
1197 MACH_PORT_FACE reply_port
;
1200 reply_port
= vsa
->reply_port
;
1201 kfree(reply_port
->alias
, sizeof (struct vstuct_alias
));
1202 kfree(vsa
, sizeof (struct vs_async
));
1203 ipc_port_dealloc_kernel((MACH_PORT_FACE
) (reply_port
));
1206 vs_alloc_async_count
--;
1211 #endif /* VS_ASYNC_REUSE */
1213 zone_t vstruct_zone
;
1222 vs
= (vstruct_t
) zalloc(vstruct_zone
);
1223 if (vs
== VSTRUCT_NULL
) {
1224 return VSTRUCT_NULL
;
1230 * The following fields will be provided later.
1232 vs
->vs_pager_ops
= NULL
;
1233 vs
->vs_control
= MEMORY_OBJECT_CONTROL_NULL
;
1234 vs
->vs_references
= 1;
1238 vs
->vs_waiting_seqno
= FALSE
;
1239 vs
->vs_waiting_read
= FALSE
;
1240 vs
->vs_waiting_write
= FALSE
;
1241 vs
->vs_waiting_async
= FALSE
;
1243 mutex_init(&vs
->vs_waiting_seqno
, 0);
1244 mutex_init(&vs
->vs_waiting_read
, 0);
1245 mutex_init(&vs
->vs_waiting_write
, 0);
1246 mutex_init(&vs
->vs_waiting_refs
, 0);
1247 mutex_init(&vs
->vs_waiting_async
, 0);
1255 vs
->vs_clshift
= local_log2(bs_get_global_clsize(0));
1256 vs
->vs_size
= ((atop_32(round_page_32(size
)) - 1) >> vs
->vs_clshift
) + 1;
1257 vs
->vs_async_pending
= 0;
1260 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1261 * depending on the size of the memory object.
1263 if (INDIRECT_CLMAP(vs
->vs_size
)) {
1264 vs
->vs_imap
= (struct vs_map
**)
1265 kalloc(INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1266 vs
->vs_indirect
= TRUE
;
1268 vs
->vs_dmap
= (struct vs_map
*)
1269 kalloc(CLMAP_SIZE(vs
->vs_size
));
1270 vs
->vs_indirect
= FALSE
;
1272 vs
->vs_xfer_pending
= FALSE
;
1273 DP_DEBUG(DEBUG_VS_INTERNAL
,
1274 ("map=0x%x, indirect=%d\n", (int) vs
->vs_dmap
, vs
->vs_indirect
));
1277 * Check to see that we got the space.
1280 kfree(vs
, sizeof *vs
);
1281 return VSTRUCT_NULL
;
1285 * Zero the indirect pointers, or clear the direct pointers.
1287 if (vs
->vs_indirect
)
1288 memset(vs
->vs_imap
, 0,
1289 INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1291 for (i
= 0; i
< vs
->vs_size
; i
++)
1292 VSM_CLR(vs
->vs_dmap
[i
]);
1294 VS_MAP_LOCK_INIT(vs
);
1296 bs_commit(vs
->vs_size
);
1301 paging_segment_t
ps_select_segment(unsigned int, int *); /* forward */
1308 paging_segment_t ps
;
1313 * Optimize case where there's only one segment.
1314 * paging_segment_max will index the one and only segment.
1318 if (paging_segment_count
== 1) {
1319 paging_segment_t lps
; /* used to avoid extra PS_UNLOCK */
1320 ipc_port_t trigger
= IP_NULL
;
1322 ps
= paging_segments
[paging_segment_max
];
1323 *psindex
= paging_segment_max
;
1325 if (ps
->ps_going_away
) {
1326 /* this segment is being turned off */
1327 lps
= PAGING_SEGMENT_NULL
;
1329 ASSERT(ps
->ps_clshift
>= shift
);
1330 if (ps
->ps_clcount
) {
1332 dp_pages_free
-= 1 << ps
->ps_clshift
;
1333 if(min_pages_trigger_port
&&
1334 (dp_pages_free
< minimum_pages_remaining
)) {
1335 trigger
= min_pages_trigger_port
;
1336 min_pages_trigger_port
= NULL
;
1341 lps
= PAGING_SEGMENT_NULL
;
1346 if (trigger
!= IP_NULL
) {
1347 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1348 ipc_port_release_send(trigger
);
1353 if (paging_segment_count
== 0) {
1355 return PAGING_SEGMENT_NULL
;
1359 i
>= BS_MINPRI
; i
--) {
1362 if ((ps_select_array
[i
] == BS_NOPRI
) ||
1363 (ps_select_array
[i
] == BS_FULLPRI
))
1365 start_index
= ps_select_array
[i
];
1367 if(!(paging_segments
[start_index
])) {
1369 physical_transfer_cluster_count
= 0;
1371 else if ((physical_transfer_cluster_count
+1) == (ALLOC_STRIDE
>>
1372 (((paging_segments
[start_index
])->ps_clshift
)
1373 + vm_page_shift
))) {
1374 physical_transfer_cluster_count
= 0;
1375 j
= start_index
+ 1;
1377 physical_transfer_cluster_count
+=1;
1379 if(start_index
== 0)
1380 start_index
= paging_segment_max
;
1382 start_index
= start_index
- 1;
1386 if (j
> paging_segment_max
)
1388 if ((ps
= paging_segments
[j
]) &&
1389 (ps
->ps_bs
->bs_priority
== i
)) {
1391 * Force the ps cluster size to be
1392 * >= that of the vstruct.
1395 if (ps
->ps_going_away
) {
1396 /* this segment is being turned off */
1397 } else if ((ps
->ps_clcount
) &&
1398 (ps
->ps_clshift
>= shift
)) {
1399 ipc_port_t trigger
= IP_NULL
;
1402 dp_pages_free
-= 1 << ps
->ps_clshift
;
1403 if(min_pages_trigger_port
&&
1405 minimum_pages_remaining
)) {
1406 trigger
= min_pages_trigger_port
;
1407 min_pages_trigger_port
= NULL
;
1411 * found one, quit looking.
1413 ps_select_array
[i
] = j
;
1416 if (trigger
!= IP_NULL
) {
1417 default_pager_space_alert(
1420 ipc_port_release_send(trigger
);
1427 if (j
== start_index
) {
1429 * none at this priority -- mark it full
1431 ps_select_array
[i
] = BS_FULLPRI
;
1438 return PAGING_SEGMENT_NULL
;
1441 vm_offset_t
ps_allocate_cluster(vstruct_t
, int *, paging_segment_t
); /*forward*/
1444 ps_allocate_cluster(
1447 paging_segment_t use_ps
)
1449 unsigned int byte_num
;
1451 paging_segment_t ps
;
1452 vm_offset_t cluster
;
1453 ipc_port_t trigger
= IP_NULL
;
1456 * Find best paging segment.
1457 * ps_select_segment will decrement cluster count on ps.
1458 * Must pass cluster shift to find the most appropriate segment.
1460 /* NOTE: The addition of paging segment delete capability threatened
1461 * to seriously complicate the treatment of paging segments in this
1462 * module and the ones that call it (notably ps_clmap), because of the
1463 * difficulty in assuring that the paging segment would continue to
1464 * exist between being unlocked and locked. This was
1465 * avoided because all calls to this module are based in either
1466 * dp_memory_object calls which rely on the vs lock, or by
1467 * the transfer function which is part of the segment delete path.
1468 * The transfer function which is part of paging segment delete is
1469 * protected from multiple callers by the backing store lock.
1470 * The paging segment delete function treats mappings to a paging
1471 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1472 * while data is transferred to the remaining segments. This is in
1473 * line with the view that incomplete or in-transition mappings between
1474 * data, a vstruct, and backing store are protected by the vs lock.
1475 * This and the ordering of the paging segment "going_away" bit setting
1478 if (use_ps
!= PAGING_SEGMENT_NULL
) {
1483 ASSERT(ps
->ps_clcount
!= 0);
1486 dp_pages_free
-= 1 << ps
->ps_clshift
;
1487 if(min_pages_trigger_port
&&
1488 (dp_pages_free
< minimum_pages_remaining
)) {
1489 trigger
= min_pages_trigger_port
;
1490 min_pages_trigger_port
= NULL
;
1494 if (trigger
!= IP_NULL
) {
1495 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1496 ipc_port_release_send(trigger
);
1499 } else if ((ps
= ps_select_segment(vs
->vs_clshift
, psindex
)) ==
1500 PAGING_SEGMENT_NULL
) {
1501 static uint32_t lastnotify
= 0;
1502 uint32_t now
, nanoseconds_dummy
;
1505 * Emit a notification of the low-paging resource condition
1506 * but don't issue it more than once every five seconds. This
1507 * prevents us from overflowing logs with thousands of
1508 * repetitions of the message.
1510 clock_get_system_nanotime(&now
, &nanoseconds_dummy
);
1511 if (now
> lastnotify
+ 5) {
1512 dprintf(("no space in available paging segments\n"));
1516 /* the count got off maybe, reset to zero */
1519 if(min_pages_trigger_port
) {
1520 trigger
= min_pages_trigger_port
;
1521 min_pages_trigger_port
= NULL
;
1525 if (trigger
!= IP_NULL
) {
1526 default_pager_space_alert(trigger
, HI_WAT_ALERT
);
1527 ipc_port_release_send(trigger
);
1529 return (vm_offset_t
) -1;
1533 * Look for an available cluster. At the end of the loop,
1534 * byte_num is the byte offset and bit_num is the bit offset of the
1535 * first zero bit in the paging segment bitmap.
1538 byte_num
= ps
->ps_hint
;
1539 for (; byte_num
< howmany(ps
->ps_ncls
, NBBY
); byte_num
++) {
1540 if (*(ps
->ps_bmap
+ byte_num
) != BYTEMASK
) {
1541 for (bit_num
= 0; bit_num
< NBBY
; bit_num
++) {
1542 if (isclr((ps
->ps_bmap
+ byte_num
), bit_num
))
1545 ASSERT(bit_num
!= NBBY
);
1549 ps
->ps_hint
= byte_num
;
1550 cluster
= (byte_num
*NBBY
) + bit_num
;
1552 /* Space was reserved, so this must be true */
1553 ASSERT(cluster
< ps
->ps_ncls
);
1555 setbit(ps
->ps_bmap
, cluster
);
1561 void ps_deallocate_cluster(paging_segment_t
, vm_offset_t
); /* forward */
1564 ps_deallocate_cluster(
1565 paging_segment_t ps
,
1566 vm_offset_t cluster
)
1569 if (cluster
>= (vm_offset_t
) ps
->ps_ncls
)
1570 panic("ps_deallocate_cluster: Invalid cluster number");
1573 * Lock the paging segment, clear the cluster's bitmap and increment the
1574 * number of free cluster.
1578 clrbit(ps
->ps_bmap
, cluster
);
1580 dp_pages_free
+= 1 << ps
->ps_clshift
;
1584 * Move the hint down to the freed cluster if it is
1585 * less than the current hint.
1587 if ((cluster
/NBBY
) < ps
->ps_hint
) {
1588 ps
->ps_hint
= (cluster
/NBBY
);
1594 * If we're freeing space on a full priority, reset the array.
1597 if (ps_select_array
[ps
->ps_bs
->bs_priority
] == BS_FULLPRI
)
1598 ps_select_array
[ps
->ps_bs
->bs_priority
] = 0;
1604 void ps_dealloc_vsmap(struct vs_map
*, vm_size_t
); /* forward */
1608 struct vs_map
*vsmap
,
1612 for (i
= 0; i
< size
; i
++)
1613 if (!VSM_ISCLR(vsmap
[i
]) && !VSM_ISERR(vsmap
[i
]))
1614 ps_deallocate_cluster(VSM_PS(vsmap
[i
]),
1615 VSM_CLOFF(vsmap
[i
]));
1628 * If this is an indirect structure, then we walk through the valid
1629 * (non-zero) indirect pointers and deallocate the clusters
1630 * associated with each used map entry (via ps_dealloc_vsmap).
1631 * When all of the clusters in an indirect block have been
1632 * freed, we deallocate the block. When all of the indirect
1633 * blocks have been deallocated we deallocate the memory
1634 * holding the indirect pointers.
1636 if (vs
->vs_indirect
) {
1637 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
1638 if (vs
->vs_imap
[i
] != NULL
) {
1639 ps_dealloc_vsmap(vs
->vs_imap
[i
], CLMAP_ENTRIES
);
1640 kfree(vs
->vs_imap
[i
], CLMAP_THRESHOLD
);
1643 kfree(vs
->vs_imap
, INDIRECT_CLMAP_SIZE(vs
->vs_size
));
1646 * Direct map. Free used clusters, then memory.
1648 ps_dealloc_vsmap(vs
->vs_dmap
, vs
->vs_size
);
1649 kfree(vs
->vs_dmap
, CLMAP_SIZE(vs
->vs_size
));
1653 bs_commit(- vs
->vs_size
);
1655 zfree(vstruct_zone
, vs
);
1658 int ps_map_extend(vstruct_t
, unsigned int); /* forward */
1662 unsigned int new_size
)
1664 struct vs_map
**new_imap
;
1665 struct vs_map
*new_dmap
= NULL
;
1668 void *old_map
= NULL
;
1669 int old_map_size
= 0;
1671 if (vs
->vs_size
>= new_size
) {
1673 * Someone has already done the work.
1679 * If the new size extends into the indirect range, then we have one
1680 * of two cases: we are going from indirect to indirect, or we are
1681 * going from direct to indirect. If we are going from indirect to
1682 * indirect, then it is possible that the new size will fit in the old
1683 * indirect map. If this is the case, then just reset the size of the
1684 * vstruct map and we are done. If the new size will not
1685 * fit into the old indirect map, then we have to allocate a new
1686 * indirect map and copy the old map pointers into this new map.
1688 * If we are going from direct to indirect, then we have to allocate a
1689 * new indirect map and copy the old direct pages into the first
1690 * indirect page of the new map.
1691 * NOTE: allocating memory here is dangerous, as we're in the
1694 if (INDIRECT_CLMAP(new_size
)) {
1695 int new_map_size
= INDIRECT_CLMAP_SIZE(new_size
);
1698 * Get a new indirect map and zero it.
1700 old_map_size
= INDIRECT_CLMAP_SIZE(vs
->vs_size
);
1701 if (vs
->vs_indirect
&&
1702 (new_map_size
== old_map_size
)) {
1703 bs_commit(new_size
- vs
->vs_size
);
1704 vs
->vs_size
= new_size
;
1708 new_imap
= (struct vs_map
**)kalloc(new_map_size
);
1709 if (new_imap
== NULL
) {
1712 memset(new_imap
, 0, new_map_size
);
1714 if (vs
->vs_indirect
) {
1715 /* Copy old entries into new map */
1716 memcpy(new_imap
, vs
->vs_imap
, old_map_size
);
1717 /* Arrange to free the old map */
1718 old_map
= (void *) vs
->vs_imap
;
1720 } else { /* Old map was a direct map */
1721 /* Allocate an indirect page */
1722 if ((new_imap
[0] = (struct vs_map
*)
1723 kalloc(CLMAP_THRESHOLD
)) == NULL
) {
1724 kfree(new_imap
, new_map_size
);
1727 new_dmap
= new_imap
[0];
1728 newdsize
= CLMAP_ENTRIES
;
1732 newdsize
= new_size
;
1734 * If the new map is a direct map, then the old map must
1735 * also have been a direct map. All we have to do is
1736 * to allocate a new direct map, copy the old entries
1737 * into it and free the old map.
1739 if ((new_dmap
= (struct vs_map
*)
1740 kalloc(CLMAP_SIZE(new_size
))) == NULL
) {
1746 /* Free the old map */
1747 old_map
= (void *) vs
->vs_dmap
;
1748 old_map_size
= CLMAP_SIZE(vs
->vs_size
);
1750 /* Copy info from the old map into the new map */
1751 memcpy(new_dmap
, vs
->vs_dmap
, old_map_size
);
1753 /* Initialize the rest of the new map */
1754 for (i
= vs
->vs_size
; i
< newdsize
; i
++)
1755 VSM_CLR(new_dmap
[i
]);
1758 vs
->vs_imap
= new_imap
;
1759 vs
->vs_indirect
= TRUE
;
1761 vs
->vs_dmap
= new_dmap
;
1762 bs_commit(new_size
- vs
->vs_size
);
1763 vs
->vs_size
= new_size
;
1765 kfree(old_map
, old_map_size
);
1773 struct clmap
*clmap
,
1778 vm_offset_t cluster
; /* The cluster of offset. */
1779 vm_offset_t newcl
; /* The new cluster allocated. */
1782 struct vs_map
*vsmap
;
1786 ASSERT(vs
->vs_dmap
);
1787 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
1790 * Initialize cluster error value
1792 clmap
->cl_error
= 0;
1795 * If the object has grown, extend the page map.
1797 if (cluster
>= vs
->vs_size
) {
1798 if (flag
== CL_FIND
) {
1799 /* Do not allocate if just doing a lookup */
1801 return (vm_offset_t
) -1;
1803 if (ps_map_extend(vs
, cluster
+ 1)) {
1805 return (vm_offset_t
) -1;
1810 * Look for the desired cluster. If the map is indirect, then we
1811 * have a two level lookup. First find the indirect block, then
1812 * find the actual cluster. If the indirect block has not yet
1813 * been allocated, then do so. If the cluster has not yet been
1814 * allocated, then do so.
1816 * If any of the allocations fail, then return an error.
1817 * Don't allocate if just doing a lookup.
1819 if (vs
->vs_indirect
) {
1820 long ind_block
= cluster
/CLMAP_ENTRIES
;
1822 /* Is the indirect block allocated? */
1823 vsmap
= vs
->vs_imap
[ind_block
];
1824 if (vsmap
== NULL
) {
1825 if (flag
== CL_FIND
) {
1827 return (vm_offset_t
) -1;
1830 /* Allocate the indirect block */
1831 vsmap
= (struct vs_map
*) kalloc(CLMAP_THRESHOLD
);
1832 if (vsmap
== NULL
) {
1834 return (vm_offset_t
) -1;
1836 /* Initialize the cluster offsets */
1837 for (i
= 0; i
< CLMAP_ENTRIES
; i
++)
1839 vs
->vs_imap
[ind_block
] = vsmap
;
1842 vsmap
= vs
->vs_dmap
;
1845 vsmap
+= cluster%CLMAP_ENTRIES
;
1848 * At this point, vsmap points to the struct vs_map desired.
1850 * Look in the map for the cluster, if there was an error on a
1851 * previous write, flag it and return. If it is not yet
1852 * allocated, then allocate it, if we're writing; if we're
1853 * doing a lookup and the cluster's not allocated, return error.
1855 if (VSM_ISERR(*vsmap
)) {
1856 clmap
->cl_error
= VSM_GETERR(*vsmap
);
1858 return (vm_offset_t
) -1;
1859 } else if (VSM_ISCLR(*vsmap
)) {
1862 if (flag
== CL_FIND
) {
1864 * If there's an error and the entry is clear, then
1865 * we've run out of swap space. Record the error
1869 VSM_SETERR(*vsmap
, error
);
1872 return (vm_offset_t
) -1;
1875 * Attempt to allocate a cluster from the paging segment
1877 newcl
= ps_allocate_cluster(vs
, &psindex
,
1878 PAGING_SEGMENT_NULL
);
1879 if (newcl
== (vm_offset_t
) -1) {
1881 return (vm_offset_t
) -1;
1884 VSM_SETCLOFF(*vsmap
, newcl
);
1885 VSM_SETPS(*vsmap
, psindex
);
1888 newcl
= VSM_CLOFF(*vsmap
);
1891 * Fill in pertinent fields of the clmap
1893 clmap
->cl_ps
= VSM_PS(*vsmap
);
1894 clmap
->cl_numpages
= VSCLSIZE(vs
);
1895 clmap
->cl_bmap
.clb_map
= (unsigned int) VSM_BMAP(*vsmap
);
1898 * Byte offset in paging segment is byte offset to cluster plus
1899 * byte offset within cluster. It looks ugly, but should be
1902 ASSERT(trunc_page(offset
) == offset
);
1903 newcl
= ptoa_32(newcl
) << vs
->vs_clshift
;
1904 newoff
= offset
& ((1<<(vm_page_shift
+ vs
->vs_clshift
)) - 1);
1905 if (flag
== CL_ALLOC
) {
1907 * set bits in the allocation bitmap according to which
1908 * pages were requested. size is in bytes.
1910 i
= atop_32(newoff
);
1911 while ((size
> 0) && (i
< VSCLSIZE(vs
))) {
1912 VSM_SETALLOC(*vsmap
, i
);
1914 size
-= vm_page_size
;
1917 clmap
->cl_alloc
.clb_map
= (unsigned int) VSM_ALLOC(*vsmap
);
1920 * Offset is not cluster aligned, so number of pages
1921 * and bitmaps must be adjusted
1923 clmap
->cl_numpages
-= atop_32(newoff
);
1924 CLMAP_SHIFT(clmap
, vs
);
1925 CLMAP_SHIFTALLOC(clmap
, vs
);
1930 * The setting of valid bits and handling of write errors
1931 * must be done here, while we hold the lock on the map.
1932 * It logically should be done in ps_vs_write_complete().
1933 * The size and error information has been passed from
1934 * ps_vs_write_complete(). If the size parameter is non-zero,
1935 * then there is work to be done. If error is also non-zero,
1936 * then the error number is recorded in the cluster and the
1937 * entire cluster is in error.
1939 if (size
&& flag
== CL_FIND
) {
1940 vm_offset_t off
= (vm_offset_t
) 0;
1943 for (i
= VSCLSIZE(vs
) - clmap
->cl_numpages
; size
> 0;
1945 VSM_SETPG(*vsmap
, i
);
1946 size
-= vm_page_size
;
1948 ASSERT(i
<= VSCLSIZE(vs
));
1950 BS_STAT(clmap
->cl_ps
->ps_bs
,
1951 clmap
->cl_ps
->ps_bs
->bs_pages_out_fail
+=
1953 off
= VSM_CLOFF(*vsmap
);
1954 VSM_SETERR(*vsmap
, error
);
1957 * Deallocate cluster if error, and no valid pages
1960 if (off
!= (vm_offset_t
) 0)
1961 ps_deallocate_cluster(clmap
->cl_ps
, off
);
1963 return (vm_offset_t
) 0;
1967 DP_DEBUG(DEBUG_VS_INTERNAL
,
1968 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1969 newcl
+newoff
, (int) vs
, (int) vsmap
, flag
));
1970 DP_DEBUG(DEBUG_VS_INTERNAL
,
1971 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1972 (int) clmap
->cl_ps
, clmap
->cl_numpages
,
1973 (int) clmap
->cl_bmap
.clb_map
, (int) clmap
->cl_alloc
.clb_map
));
1975 return (newcl
+ newoff
);
1978 void ps_clunmap(vstruct_t
, vm_offset_t
, vm_size_t
); /* forward */
1986 vm_offset_t cluster
; /* The cluster number of offset */
1987 struct vs_map
*vsmap
;
1992 * Loop through all clusters in this range, freeing paging segment
1993 * clusters and map entries as encountered.
1995 while (length
> 0) {
1999 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
2000 if (vs
->vs_indirect
) /* indirect map */
2001 vsmap
= vs
->vs_imap
[cluster
/CLMAP_ENTRIES
];
2003 vsmap
= vs
->vs_dmap
;
2004 if (vsmap
== NULL
) {
2008 vsmap
+= cluster%CLMAP_ENTRIES
;
2009 if (VSM_ISCLR(*vsmap
)) {
2010 length
-= vm_page_size
;
2011 offset
+= vm_page_size
;
2015 * We've got a valid mapping. Clear it and deallocate
2016 * paging segment cluster pages.
2017 * Optimize for entire cluster cleraing.
2019 if ( (newoff
= (offset
&((1<<(vm_page_shift
+vs
->vs_clshift
))-1))) ) {
2021 * Not cluster aligned.
2023 ASSERT(trunc_page(newoff
) == newoff
);
2024 i
= atop_32(newoff
);
2027 while ((i
< VSCLSIZE(vs
)) && (length
> 0)) {
2028 VSM_CLRPG(*vsmap
, i
);
2029 VSM_CLRALLOC(*vsmap
, i
);
2030 length
-= vm_page_size
;
2031 offset
+= vm_page_size
;
2036 * If map entry is empty, clear and deallocate cluster.
2038 if (!VSM_ALLOC(*vsmap
)) {
2039 ps_deallocate_cluster(VSM_PS(*vsmap
),
2048 void ps_vs_write_complete(vstruct_t
, vm_offset_t
, vm_size_t
, int); /* forward */
2051 ps_vs_write_complete(
2060 * Get the struct vsmap for this cluster.
2061 * Use READ, even though it was written, because the
2062 * cluster MUST be present, unless there was an error
2063 * in the original ps_clmap (e.g. no space), in which
2064 * case, nothing happens.
2066 * Must pass enough information to ps_clmap to allow it
2067 * to set the vs_map structure bitmap under lock.
2069 (void) ps_clmap(vs
, offset
, &clmap
, CL_FIND
, size
, error
);
2072 void vs_cl_write_complete(vstruct_t
, paging_segment_t
, vm_offset_t
, vm_offset_t
, vm_size_t
, boolean_t
, int); /* forward */
2075 vs_cl_write_complete(
2077 __unused paging_segment_t ps
,
2079 __unused vm_offset_t addr
,
2084 // kern_return_t kr;
2088 * For internal objects, the error is recorded on a
2089 * per-cluster basis by ps_clmap() which is called
2090 * by ps_vs_write_complete() below.
2092 dprintf(("write failed error = 0x%x\n", error
));
2093 /* add upl_abort code here */
2095 GSTAT(global_stats
.gs_pages_out
+= atop_32(size
));
2097 * Notify the vstruct mapping code, so it can do its accounting.
2099 ps_vs_write_complete(vs
, offset
, size
, error
);
2103 ASSERT(vs
->vs_async_pending
> 0);
2104 vs
->vs_async_pending
-= size
;
2105 if (vs
->vs_async_pending
== 0 && vs
->vs_waiting_async
) {
2106 vs
->vs_waiting_async
= FALSE
;
2108 /* mutex_unlock(&vs->vs_waiting_async); */
2109 thread_wakeup(&vs
->vs_async_pending
);
2116 #ifdef DEVICE_PAGING
2117 kern_return_t
device_write_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2121 MACH_PORT_FACE reply_port
,
2122 kern_return_t device_code
,
2123 io_buf_len_t bytes_written
)
2125 struct vs_async
*vsa
;
2127 vsa
= (struct vs_async
*)
2128 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2130 if (device_code
== KERN_SUCCESS
&& bytes_written
!= vsa
->vsa_size
) {
2131 device_code
= KERN_FAILURE
;
2134 vsa
->vsa_error
= device_code
;
2137 ASSERT(vsa
->vsa_vs
!= VSTRUCT_NULL
);
2138 if(vsa
->vsa_flags
& VSA_TRANSFER
) {
2139 /* revisit when async disk segments redone */
2140 if(vsa
->vsa_error
) {
2141 /* need to consider error condition. re-write data or */
2142 /* throw it away here. */
2143 vm_map_copy_discard((vm_map_copy_t
)vsa
->vsa_addr
);
2145 ps_vs_write_complete(vsa
->vsa_vs
, vsa
->vsa_offset
,
2146 vsa
->vsa_size
, vsa
->vsa_error
);
2148 vs_cl_write_complete(vsa
->vsa_vs
, vsa
->vsa_ps
, vsa
->vsa_offset
,
2149 vsa
->vsa_addr
, vsa
->vsa_size
, TRUE
,
2154 return KERN_SUCCESS
;
2157 kern_return_t
device_write_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2159 device_write_reply_inband(
2160 MACH_PORT_FACE reply_port
,
2161 kern_return_t return_code
,
2162 io_buf_len_t bytes_written
)
2164 panic("device_write_reply_inband: illegal");
2165 return KERN_SUCCESS
;
2168 kern_return_t
device_read_reply(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_t
, mach_msg_type_number_t
);
2171 MACH_PORT_FACE reply_port
,
2172 kern_return_t return_code
,
2174 mach_msg_type_number_t dataCnt
)
2176 struct vs_async
*vsa
;
2177 vsa
= (struct vs_async
*)
2178 ((struct vstruct_alias
*)(reply_port
->alias
))->vs
;
2179 vsa
->vsa_addr
= (vm_offset_t
)data
;
2180 vsa
->vsa_size
= (vm_size_t
)dataCnt
;
2181 vsa
->vsa_error
= return_code
;
2182 thread_wakeup(&vsa
->vsa_lock
);
2183 return KERN_SUCCESS
;
2186 kern_return_t
device_read_reply_inband(MACH_PORT_FACE
, kern_return_t
, io_buf_ptr_inband_t
, mach_msg_type_number_t
);
2188 device_read_reply_inband(
2189 MACH_PORT_FACE reply_port
,
2190 kern_return_t return_code
,
2191 io_buf_ptr_inband_t data
,
2192 mach_msg_type_number_t dataCnt
)
2194 panic("device_read_reply_inband: illegal");
2195 return KERN_SUCCESS
;
2198 kern_return_t
device_read_reply_overwrite(MACH_PORT_FACE
, kern_return_t
, io_buf_len_t
);
2200 device_read_reply_overwrite(
2201 MACH_PORT_FACE reply_port
,
2202 kern_return_t return_code
,
2203 io_buf_len_t bytes_read
)
2205 panic("device_read_reply_overwrite: illegal\n");
2206 return KERN_SUCCESS
;
2209 kern_return_t
device_open_reply(MACH_PORT_FACE
, kern_return_t
, MACH_PORT_FACE
);
2212 MACH_PORT_FACE reply_port
,
2213 kern_return_t return_code
,
2214 MACH_PORT_FACE device_port
)
2216 panic("device_open_reply: illegal\n");
2217 return KERN_SUCCESS
;
2222 paging_segment_t ps
,
2224 vm_offset_t
*bufferp
,
2226 unsigned int *residualp
,
2230 recnum_t dev_offset
;
2231 unsigned int bytes_wanted
;
2232 unsigned int bytes_read
;
2233 unsigned int total_read
;
2234 vm_offset_t dev_buffer
;
2235 vm_offset_t buf_ptr
;
2236 unsigned int records_read
;
2237 struct vs_async
*vsa
;
2238 mutex_t vs_waiting_read_reply
;
2241 vm_map_copy_t device_data
= NULL
;
2242 default_pager_thread_t
*dpt
= NULL
;
2244 device
= dev_port_lookup(ps
->ps_device
);
2245 clustered_reads
[atop_32(size
)]++;
2247 dev_offset
= (ps
->ps_offset
+
2248 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2249 bytes_wanted
= size
;
2251 *bufferp
= (vm_offset_t
)NULL
;
2254 vsa
= VS_ALLOC_ASYNC();
2258 vsa
->vsa_offset
= 0;
2262 mutex_init(&vsa
->vsa_lock
, 0);
2263 ip_lock(vsa
->reply_port
);
2264 vsa
->reply_port
->ip_sorights
++;
2265 ip_reference(vsa
->reply_port
);
2266 ip_unlock(vsa
->reply_port
);
2267 kr
= ds_device_read_common(device
,
2269 (mach_msg_type_name_t
)
2270 MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2274 (IO_READ
| IO_CALL
),
2275 (io_buf_ptr_t
*) &dev_buffer
,
2276 (mach_msg_type_number_t
*) &bytes_read
);
2277 if(kr
== MIG_NO_REPLY
) {
2278 assert_wait(&vsa
->vsa_lock
, THREAD_UNINT
);
2279 thread_block(THREAD_CONTINUE_NULL
);
2281 dev_buffer
= vsa
->vsa_addr
;
2282 bytes_read
= (unsigned int)vsa
->vsa_size
;
2283 kr
= vsa
->vsa_error
;
2286 if (kr
!= KERN_SUCCESS
|| bytes_read
== 0) {
2289 total_read
+= bytes_read
;
2292 * If we got the entire range, use the returned dev_buffer.
2294 if (bytes_read
== size
) {
2295 *bufferp
= (vm_offset_t
)dev_buffer
;
2300 dprintf(("read only %d bytes out of %d\n",
2301 bytes_read
, bytes_wanted
));
2304 dpt
= get_read_buffer();
2305 buf_ptr
= dpt
->dpt_buffer
;
2306 *bufferp
= (vm_offset_t
)buf_ptr
;
2309 * Otherwise, copy the data into the provided buffer (*bufferp)
2310 * and append the rest of the range as it comes in.
2312 memcpy((void *) buf_ptr
, (void *) dev_buffer
, bytes_read
);
2313 buf_ptr
+= bytes_read
;
2314 bytes_wanted
-= bytes_read
;
2315 records_read
= (bytes_read
>>
2316 (vm_page_shift
- ps
->ps_record_shift
));
2317 dev_offset
+= records_read
;
2318 DP_DEBUG(DEBUG_VS_INTERNAL
,
2319 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2320 dev_buffer
, bytes_read
));
2321 if (vm_deallocate(kernel_map
, dev_buffer
, bytes_read
)
2323 Panic("dealloc buf");
2324 } while (bytes_wanted
);
2326 *residualp
= size
- total_read
;
2327 if((dev_buffer
!= *bufferp
) && (total_read
!= 0)) {
2328 vm_offset_t temp_buffer
;
2329 vm_allocate(kernel_map
, &temp_buffer
, total_read
, VM_FLAGS_ANYWHERE
);
2330 memcpy((void *) temp_buffer
, (void *) *bufferp
, total_read
);
2331 if(vm_map_copyin_page_list(kernel_map
, temp_buffer
, total_read
,
2332 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2333 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2334 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2335 (vm_map_copy_t
*)&device_data
, FALSE
))
2336 panic("ps_read_device: cannot copyin locally provided buffer\n");
2338 else if((kr
== KERN_SUCCESS
) && (total_read
!= 0) && (dev_buffer
!= 0)){
2339 if(vm_map_copyin_page_list(kernel_map
, dev_buffer
, bytes_read
,
2340 VM_MAP_COPYIN_OPT_SRC_DESTROY
|
2341 VM_MAP_COPYIN_OPT_STEAL_PAGES
|
2342 VM_MAP_COPYIN_OPT_PMAP_ENTER
,
2343 (vm_map_copy_t
*)&device_data
, FALSE
))
2344 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2349 *bufferp
= (vm_offset_t
)device_data
;
2352 /* Free the receive buffer */
2353 dpt
->checked_out
= 0;
2354 thread_wakeup(&dpt_array
);
2356 return KERN_SUCCESS
;
2361 paging_segment_t ps
,
2365 struct vs_async
*vsa
)
2367 recnum_t dev_offset
;
2368 io_buf_len_t bytes_to_write
, bytes_written
;
2369 recnum_t records_written
;
2371 MACH_PORT_FACE reply_port
;
2375 clustered_writes
[atop_32(size
)]++;
2377 dev_offset
= (ps
->ps_offset
+
2378 (offset
>> (vm_page_shift
- ps
->ps_record_shift
)));
2379 bytes_to_write
= size
;
2383 * Asynchronous write.
2385 reply_port
= vsa
->reply_port
;
2386 ip_lock(reply_port
);
2387 reply_port
->ip_sorights
++;
2388 ip_reference(reply_port
);
2389 ip_unlock(reply_port
);
2392 device
= dev_port_lookup(ps
->ps_device
);
2394 vsa
->vsa_addr
= addr
;
2395 kr
=ds_device_write_common(device
,
2397 (mach_msg_type_name_t
) MACH_MSG_TYPE_MOVE_SEND_ONCE
,
2400 (io_buf_ptr_t
) addr
,
2402 (IO_WRITE
| IO_CALL
),
2405 if ((kr
!= KERN_SUCCESS
) && (kr
!= MIG_NO_REPLY
)) {
2407 dprintf(("%s0x%x, addr=0x%x,"
2408 "size=0x%x,offset=0x%x\n",
2409 "device_write_request returned ",
2410 kr
, addr
, size
, offset
));
2412 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2413 /* do the completion notification to free resources */
2414 device_write_reply(reply_port
, kr
, 0);
2419 * Synchronous write.
2423 device
= dev_port_lookup(ps
->ps_device
);
2424 kr
=ds_device_write_common(device
,
2428 (io_buf_ptr_t
) addr
,
2430 (IO_WRITE
| IO_SYNC
| IO_KERNEL_BUF
),
2433 if (kr
!= KERN_SUCCESS
) {
2434 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2435 "device_write returned ",
2436 kr
, addr
, size
, offset
));
2438 ps
->ps_bs
->bs_pages_out_fail
+= atop_32(size
));
2441 if (bytes_written
& ((vm_page_size
>> ps
->ps_record_shift
) - 1))
2442 Panic("fragmented write");
2443 records_written
= (bytes_written
>>
2444 (vm_page_shift
- ps
->ps_record_shift
));
2445 dev_offset
+= records_written
;
2447 if (bytes_written
!= bytes_to_write
) {
2448 dprintf(("wrote only %d bytes out of %d\n",
2449 bytes_written
, bytes_to_write
));
2452 bytes_to_write
-= bytes_written
;
2453 addr
+= bytes_written
;
2454 } while (bytes_to_write
> 0);
2456 return PAGER_SUCCESS
;
2460 #else /* !DEVICE_PAGING */
2464 __unused paging_segment_t ps
,
2465 __unused vm_offset_t offset
,
2466 __unused vm_offset_t
*bufferp
,
2467 __unused
unsigned int size
,
2468 __unused
unsigned int *residualp
,
2471 panic("ps_read_device not supported");
2472 return KERN_FAILURE
;
2477 __unused paging_segment_t ps
,
2478 __unused vm_offset_t offset
,
2479 __unused vm_offset_t addr
,
2480 __unused
unsigned int size
,
2481 __unused
struct vs_async
*vsa
)
2483 panic("ps_write_device not supported");
2484 return KERN_FAILURE
;
2487 #endif /* DEVICE_PAGING */
2488 void pvs_object_data_provided(vstruct_t
, upl_t
, upl_offset_t
, upl_size_t
); /* forward */
2491 pvs_object_data_provided(
2492 __unused vstruct_t vs
,
2494 __unused upl_offset_t offset
,
2498 DP_DEBUG(DEBUG_VS_INTERNAL
,
2499 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2500 upl
, offset
, size
));
2503 GSTAT(global_stats
.gs_pages_in
+= atop_32(size
));
2507 ps_clunmap(vs
, offset
, size
);
2508 #endif /* USE_PRECIOUS */
2515 vm_offset_t vs_offset
,
2519 kern_return_t error
= KERN_SUCCESS
;
2521 unsigned int residual
;
2522 unsigned int request_flags
;
2529 vm_offset_t ps_offset
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2530 paging_segment_t psp
[(VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
];
2533 pages_in_cl
= 1 << vs
->vs_clshift
;
2534 cl_size
= pages_in_cl
* vm_page_size
;
2535 cl_mask
= cl_size
- 1;
2538 * This loop will be executed multiple times until the entire
2539 * request has been satisfied... if the request spans cluster
2540 * boundaries, the clusters will be checked for logical continunity,
2541 * if contiguous the I/O request will span multiple clusters, otherwise
2542 * it will be broken up into the minimal set of I/O's
2544 * If there are holes in a request (either unallocated pages in a paging
2545 * segment or an unallocated paging segment), we stop
2546 * reading at the hole, inform the VM of any data read, inform
2547 * the VM of an unavailable range, then loop again, hoping to
2548 * find valid pages later in the requested range. This continues until
2549 * the entire range has been examined, and read, if present.
2553 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_PRECIOUS
| UPL_RET_ONLY_ABSENT
;
2555 request_flags
= UPL_NO_SYNC
| UPL_CLEAN_IN_PLACE
| UPL_RET_ONLY_ABSENT
;
2558 assert(dp_encryption_inited
);
2559 if (dp_encryption
) {
2562 * request that the UPL be prepared for
2565 request_flags
|= UPL_ENCRYPT
;
2568 while (cnt
&& (error
== KERN_SUCCESS
)) {
2570 unsigned int page_list_count
;
2572 if((vs_offset
& cl_mask
) &&
2573 (cnt
> (VM_SUPER_CLUSTER
-
2574 (vs_offset
& cl_mask
)))) {
2575 size
= VM_SUPER_CLUSTER
;
2576 size
-= vs_offset
& cl_mask
;
2577 } else if (cnt
> VM_SUPER_CLUSTER
) {
2578 size
= VM_SUPER_CLUSTER
;
2587 while (size
> 0 && error
== KERN_SUCCESS
) {
2592 vm_offset_t cur_offset
;
2595 if ( !ps_info_valid
) {
2596 ps_offset
[seg_index
] = ps_clmap(vs
, vs_offset
& ~cl_mask
, &clmap
, CL_FIND
, 0, 0);
2597 psp
[seg_index
] = CLMAP_PS(clmap
);
2601 * skip over unallocated physical segments
2603 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2604 abort_size
= cl_size
- (vs_offset
& cl_mask
);
2605 abort_size
= MIN(abort_size
, size
);
2607 page_list_count
= 0;
2608 memory_object_super_upl_request(
2610 (memory_object_offset_t
)vs_offset
,
2611 abort_size
, abort_size
,
2612 &upl
, NULL
, &page_list_count
,
2615 if (clmap
.cl_error
) {
2616 upl_abort(upl
, UPL_ABORT_ERROR
);
2618 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2620 upl_deallocate(upl
);
2623 vs_offset
+= abort_size
;
2629 cl_index
= (vs_offset
& cl_mask
) / vm_page_size
;
2631 for (abort_size
= 0; cl_index
< pages_in_cl
&& abort_size
< size
; cl_index
++) {
2633 * skip over unallocated pages
2635 if (CLMAP_ISSET(clmap
, cl_index
))
2637 abort_size
+= vm_page_size
;
2641 * Let VM system know about holes in clusters.
2643 GSTAT(global_stats
.gs_pages_unavail
+= atop_32(abort_size
));
2645 page_list_count
= 0;
2646 memory_object_super_upl_request(
2648 (memory_object_offset_t
)vs_offset
,
2649 abort_size
, abort_size
,
2650 &upl
, NULL
, &page_list_count
,
2653 upl_abort(upl
, UPL_ABORT_UNAVAILABLE
);
2654 upl_deallocate(upl
);
2657 vs_offset
+= abort_size
;
2659 if (cl_index
== pages_in_cl
) {
2661 * if we're at the end of this physical cluster
2662 * then bump to the next one and continue looking
2672 * remember the starting point of the first allocated page
2673 * for the I/O we're about to issue
2675 beg_pseg
= seg_index
;
2676 beg_indx
= cl_index
;
2677 cur_offset
= vs_offset
;
2680 * calculate the size of the I/O that we can do...
2681 * this may span multiple physical segments if
2682 * they are contiguous
2684 for (xfer_size
= 0; xfer_size
< size
; ) {
2686 while (cl_index
< pages_in_cl
2687 && xfer_size
< size
) {
2689 * accumulate allocated pages within
2690 * a physical segment
2692 if (CLMAP_ISSET(clmap
, cl_index
)) {
2693 xfer_size
+= vm_page_size
;
2694 cur_offset
+= vm_page_size
;
2697 BS_STAT(psp
[seg_index
]->ps_bs
,
2698 psp
[seg_index
]->ps_bs
->bs_pages_in
++);
2702 if (cl_index
< pages_in_cl
2703 || xfer_size
>= size
) {
2705 * we've hit an unallocated page or
2706 * the end of this request... go fire
2712 * we've hit the end of the current physical
2713 * segment and there's more to do, so try
2714 * moving to the next one
2718 ps_offset
[seg_index
] =
2720 cur_offset
& ~cl_mask
,
2721 &clmap
, CL_FIND
, 0, 0);
2722 psp
[seg_index
] = CLMAP_PS(clmap
);
2725 if ((ps_offset
[seg_index
- 1] != (ps_offset
[seg_index
] - cl_size
)) || (psp
[seg_index
- 1] != psp
[seg_index
])) {
2727 * if the physical segment we're about
2728 * to step into is not contiguous to
2729 * the one we're currently in, or it's
2730 * in a different paging file, or
2731 * it hasn't been allocated....
2732 * we stop here and generate the I/O
2737 * start with first page of the next physical
2744 * we have a contiguous range of allocated pages
2747 page_list_count
= 0;
2748 memory_object_super_upl_request(vs
->vs_control
,
2749 (memory_object_offset_t
)vs_offset
,
2750 xfer_size
, xfer_size
,
2751 &upl
, NULL
, &page_list_count
,
2752 request_flags
| UPL_SET_INTERNAL
);
2754 error
= ps_read_file(psp
[beg_pseg
],
2755 upl
, (upl_offset_t
) 0,
2756 ps_offset
[beg_pseg
] +
2757 (beg_indx
* vm_page_size
),
2758 xfer_size
, &residual
, 0);
2765 * Adjust counts and send response to VM. Optimize
2766 * for the common case, i.e. no error and/or partial
2767 * data. If there was an error, then we need to error
2768 * the entire range, even if some data was successfully
2769 * read. If there was a partial read we may supply some
2770 * data and may error some as well. In all cases the
2771 * VM must receive some notification for every page
2774 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
2776 * Got everything we asked for, supply the data
2777 * to the VM. Note that as a side effect of
2778 * supplying the data, the buffer holding the
2779 * supplied data is deallocated from the pager's
2782 pvs_object_data_provided(
2783 vs
, upl
, vs_offset
, xfer_size
);
2785 failed_size
= xfer_size
;
2787 if (error
== KERN_SUCCESS
) {
2788 if ((signed) residual
== xfer_size
) {
2790 * If a read operation returns no error
2791 * and no data moved, we turn it into
2792 * an error, assuming we're reading at
2794 * Fall through and error the entire
2797 error
= KERN_FAILURE
;
2800 * Otherwise, we have partial read. If
2801 * the part read is a integral number
2802 * of pages supply it. Otherwise round
2803 * it up to a page boundary, zero fill
2804 * the unread part, and supply it.
2805 * Fall through and error the remainder
2806 * of the range, if any.
2812 lsize
= (xfer_size
- residual
)
2814 pvs_object_data_provided(
2818 if (lsize
< xfer_size
) {
2821 error
= KERN_FAILURE
;
2827 * If there was an error in any part of the range, tell
2828 * the VM. Note that error is explicitly checked again
2829 * since it can be modified above.
2831 if (error
!= KERN_SUCCESS
) {
2832 BS_STAT(psp
[beg_pseg
]->ps_bs
,
2833 psp
[beg_pseg
]->ps_bs
->bs_pages_in_fail
2834 += atop_32(failed_size
));
2837 vs_offset
+= xfer_size
;
2840 } /* END while (cnt && (error == 0)) */
2844 int vs_do_async_write
= 1;
2850 upl_offset_t offset
,
2852 boolean_t dp_internal
,
2855 upl_size_t transfer_size
;
2859 vm_offset_t actual_offset
; /* Offset within paging segment */
2860 paging_segment_t ps
;
2861 vm_offset_t mobj_base_addr
;
2862 vm_offset_t mobj_target_addr
;
2865 upl_page_info_t
*pl
;
2869 unsigned int cl_size
;
2871 unsigned int seg_size
;
2873 pages_in_cl
= 1 << vs
->vs_clshift
;
2874 cl_size
= pages_in_cl
* vm_page_size
;
2877 unsigned int page_list_count
;
2879 unsigned int super_size
;
2884 upl_offset_t upl_offset
;
2885 vm_offset_t seg_offset
;
2886 vm_offset_t ps_offset
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
2887 paging_segment_t psp
[((VM_SUPER_CLUSTER
/ PAGE_SIZE
) >> VSTRUCT_DEF_CLSHIFT
) + 1];
2891 super_size
= cl_size
;
2893 request_flags
= UPL_NOBLOCK
|
2894 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2895 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2897 super_size
= VM_SUPER_CLUSTER
;
2899 request_flags
= UPL_NOBLOCK
| UPL_CLEAN_IN_PLACE
|
2900 UPL_RET_ONLY_DIRTY
| UPL_COPYOUT_FROM
|
2901 UPL_NO_SYNC
| UPL_SET_INTERNAL
;
2904 if (!dp_encryption_inited
) {
2907 * Once we've started using swap, we
2908 * can't change our mind on whether
2909 * it needs to be encrypted or
2912 dp_encryption_inited
= TRUE
;
2914 if (dp_encryption
) {
2917 * request that the UPL be prepared for
2920 request_flags
|= UPL_ENCRYPT
;
2921 flags
|= UPL_PAGING_ENCRYPTED
;
2924 page_list_count
= 0;
2925 memory_object_super_upl_request(vs
->vs_control
,
2926 (memory_object_offset_t
)offset
,
2928 &upl
, NULL
, &page_list_count
,
2929 request_flags
| UPL_FOR_PAGEOUT
);
2931 pl
= UPL_GET_INTERNAL_PAGE_LIST(upl
);
2933 seg_size
= cl_size
- (upl
->offset
% cl_size
);
2934 upl_offset
= upl
->offset
& ~(cl_size
- 1);
2936 for (seg_index
= 0, transfer_size
= upl
->size
;
2937 transfer_size
> 0; ) {
2938 ps_offset
[seg_index
] =
2944 if (ps_offset
[seg_index
] == (vm_offset_t
) -1) {
2946 upl_deallocate(upl
);
2948 return KERN_FAILURE
;
2951 psp
[seg_index
] = CLMAP_PS(clmap
);
2953 if (transfer_size
> seg_size
) {
2954 transfer_size
-= seg_size
;
2955 upl_offset
+= cl_size
;
2962 * Ignore any non-present pages at the end of the
2965 for (page_index
= upl
->size
/ vm_page_size
; page_index
> 0;)
2966 if (UPL_PAGE_PRESENT(pl
, --page_index
))
2968 num_of_pages
= page_index
+ 1;
2970 base_index
= (upl
->offset
% cl_size
) / PAGE_SIZE
;
2972 for (page_index
= 0; page_index
< num_of_pages
; ) {
2974 * skip over non-dirty pages
2976 for ( ; page_index
< num_of_pages
; page_index
++) {
2977 if (UPL_DIRTY_PAGE(pl
, page_index
)
2978 || UPL_PRECIOUS_PAGE(pl
, page_index
))
2980 * this is a page we need to write
2981 * go see if we can buddy it up with
2982 * others that are contiguous to it
2986 * if the page is not-dirty, but present we
2987 * need to commit it... This is an unusual
2988 * case since we only asked for dirty pages
2990 if (UPL_PAGE_PRESENT(pl
, page_index
)) {
2991 boolean_t empty
= FALSE
;
2992 upl_commit_range(upl
,
2993 page_index
* vm_page_size
,
2995 UPL_COMMIT_NOTIFY_EMPTY
,
3000 assert(page_index
==
3002 upl_deallocate(upl
);
3006 if (page_index
== num_of_pages
)
3008 * no more pages to look at, we're out of here
3013 * gather up contiguous dirty pages... we have at
3014 * least 1 * otherwise we would have bailed above
3015 * make sure that each physical segment that we step
3016 * into is contiguous to the one we're currently in
3017 * if it's not, we have to stop and write what we have
3019 for (first_dirty
= page_index
;
3020 page_index
< num_of_pages
; ) {
3021 if ( !UPL_DIRTY_PAGE(pl
, page_index
)
3022 && !UPL_PRECIOUS_PAGE(pl
, page_index
))
3026 * if we just looked at the last page in the UPL
3027 * we don't need to check for physical segment
3030 if (page_index
< num_of_pages
) {
3034 cur_seg
= (base_index
+ (page_index
- 1))/pages_in_cl
;
3035 nxt_seg
= (base_index
+ page_index
)/pages_in_cl
;
3037 if (cur_seg
!= nxt_seg
) {
3038 if ((ps_offset
[cur_seg
] != (ps_offset
[nxt_seg
] - cl_size
)) || (psp
[cur_seg
] != psp
[nxt_seg
]))
3040 * if the segment we're about
3041 * to step into is not
3042 * contiguous to the one we're
3043 * currently in, or it's in a
3044 * different paging file....
3045 * we stop here and generate
3052 num_dirty
= page_index
- first_dirty
;
3055 upl_offset
= first_dirty
* vm_page_size
;
3056 transfer_size
= num_dirty
* vm_page_size
;
3058 while (transfer_size
) {
3060 if ((seg_size
= cl_size
-
3061 ((upl
->offset
+ upl_offset
) % cl_size
))
3063 seg_size
= transfer_size
;
3065 ps_vs_write_complete(vs
,
3066 upl
->offset
+ upl_offset
,
3069 transfer_size
-= seg_size
;
3070 upl_offset
+= seg_size
;
3072 upl_offset
= first_dirty
* vm_page_size
;
3073 transfer_size
= num_dirty
* vm_page_size
;
3075 seg_index
= (base_index
+ first_dirty
) / pages_in_cl
;
3076 seg_offset
= (upl
->offset
+ upl_offset
) % cl_size
;
3078 error
= ps_write_file(psp
[seg_index
],
3080 ps_offset
[seg_index
]
3082 transfer_size
, flags
);
3084 boolean_t empty
= FALSE
;
3085 upl_abort_range(upl
,
3086 first_dirty
* vm_page_size
,
3087 num_dirty
* vm_page_size
,
3088 UPL_ABORT_NOTIFY_EMPTY
,
3091 assert(page_index
== num_of_pages
);
3092 upl_deallocate(upl
);
3098 assert(cnt
<= (vm_page_size
<< vs
->vs_clshift
));
3102 /* The caller provides a mapped_data which is derived */
3103 /* from a temporary object. The targeted pages are */
3104 /* guaranteed to be set at offset 0 in the mapped_data */
3105 /* The actual offset however must still be derived */
3106 /* from the offset in the vs in question */
3107 mobj_base_addr
= offset
;
3108 mobj_target_addr
= mobj_base_addr
;
3110 for (transfer_size
= list_size
; transfer_size
!= 0;) {
3111 actual_offset
= ps_clmap(vs
, mobj_target_addr
,
3113 transfer_size
< cl_size
?
3114 transfer_size
: cl_size
, 0);
3115 if(actual_offset
== (vm_offset_t
) -1) {
3119 cnt
= MIN(transfer_size
,
3120 CLMAP_NPGS(clmap
) * vm_page_size
);
3121 ps
= CLMAP_PS(clmap
);
3122 /* Assume that the caller has given us contiguous */
3125 ps_vs_write_complete(vs
, mobj_target_addr
,
3127 error
= ps_write_file(ps
, internal_upl
,
3135 actual_offset
+= cnt
;
3136 mobj_target_addr
+= cnt
;
3137 transfer_size
-= cnt
;
3145 return KERN_FAILURE
;
3147 return KERN_SUCCESS
;
3151 ps_vstruct_allocated_size(
3155 struct vs_map
*vsmap
;
3156 unsigned int i
, j
, k
;
3159 if (vs
->vs_indirect
) {
3160 /* loop on indirect maps */
3161 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3162 vsmap
= vs
->vs_imap
[i
];
3165 /* loop on clusters in this indirect map */
3166 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3167 if (VSM_ISCLR(vsmap
[j
]) ||
3168 VSM_ISERR(vsmap
[j
]))
3170 /* loop on pages in this cluster */
3171 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3172 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3178 vsmap
= vs
->vs_dmap
;
3181 /* loop on clusters in the direct map */
3182 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3183 if (VSM_ISCLR(vsmap
[j
]) ||
3184 VSM_ISERR(vsmap
[j
]))
3186 /* loop on pages in this cluster */
3187 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3188 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
))
3194 return ptoa_32(num_pages
);
3198 ps_vstruct_allocated_pages(
3200 default_pager_page_t
*pages
,
3203 unsigned int num_pages
;
3204 struct vs_map
*vsmap
;
3206 unsigned int i
, j
, k
;
3210 if (vs
->vs_indirect
) {
3211 /* loop on indirect maps */
3212 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3213 vsmap
= vs
->vs_imap
[i
];
3214 if (vsmap
== NULL
) {
3215 offset
+= (vm_page_size
* CLMAP_ENTRIES
*
3219 /* loop on clusters in this indirect map */
3220 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3221 if (VSM_ISCLR(vsmap
[j
]) ||
3222 VSM_ISERR(vsmap
[j
])) {
3223 offset
+= vm_page_size
* VSCLSIZE(vs
);
3226 /* loop on pages in this cluster */
3227 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3228 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3230 if (num_pages
< pages_size
)
3231 pages
++->dpp_offset
=
3234 offset
+= vm_page_size
;
3239 vsmap
= vs
->vs_dmap
;
3242 /* loop on clusters in the direct map */
3243 for (j
= 0; j
< CLMAP_ENTRIES
; j
++) {
3244 if (VSM_ISCLR(vsmap
[j
]) ||
3245 VSM_ISERR(vsmap
[j
])) {
3246 offset
+= vm_page_size
* VSCLSIZE(vs
);
3249 /* loop on pages in this cluster */
3250 for (k
= 0; k
< VSCLSIZE(vs
); k
++) {
3251 if ((VSM_BMAP(vsmap
[j
])) & (1 << k
)) {
3253 if (num_pages
< pages_size
)
3254 pages
++->dpp_offset
= offset
;
3256 offset
+= vm_page_size
;
3266 ps_vstruct_transfer_from_segment(
3268 paging_segment_t segment
,
3271 struct vs_map
*vsmap
;
3272 // struct vs_map old_vsmap;
3273 // struct vs_map new_vsmap;
3276 VS_LOCK(vs
); /* block all work on this vstruct */
3277 /* can't allow the normal multiple write */
3278 /* semantic because writes may conflict */
3279 vs
->vs_xfer_pending
= TRUE
;
3280 vs_wait_for_sync_writers(vs
);
3282 vs_wait_for_readers(vs
);
3283 /* we will unlock the vs to allow other writes while transferring */
3284 /* and will be guaranteed of the persistance of the vs struct */
3285 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3286 /* vs_async_pending */
3287 /* OK we now have guaranteed no other parties are accessing this */
3288 /* vs. Now that we are also supporting simple lock versions of */
3289 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3290 /* our purpose in holding it before was the multiple write case */
3291 /* we now use the boolean xfer_pending to do that. We can use */
3292 /* a boolean instead of a count because we have guaranteed single */
3293 /* file access to this code in its caller */
3296 if (vs
->vs_indirect
) {
3297 unsigned int vsmap_size
;
3299 /* loop on indirect maps */
3300 for (i
= 0; i
< INDIRECT_CLMAP_ENTRIES(vs
->vs_size
); i
++) {
3301 vsmap
= vs
->vs_imap
[i
];
3304 /* loop on clusters in this indirect map */
3305 clmap_off
= (vm_page_size
* CLMAP_ENTRIES
*
3307 if(i
+1 == INDIRECT_CLMAP_ENTRIES(vs
->vs_size
))
3308 vsmap_size
= vs
->vs_size
- (CLMAP_ENTRIES
* i
);
3310 vsmap_size
= CLMAP_ENTRIES
;
3311 for (j
= 0; j
< vsmap_size
; j
++) {
3312 if (VSM_ISCLR(vsmap
[j
]) ||
3313 VSM_ISERR(vsmap
[j
]) ||
3314 (VSM_PS(vsmap
[j
]) != segment
))
3316 if(vs_cluster_transfer(vs
,
3317 (vm_page_size
* (j
<< vs
->vs_clshift
))
3319 vm_page_size
<< vs
->vs_clshift
,
3323 vs
->vs_xfer_pending
= FALSE
;
3325 vs_finish_write(vs
);
3326 return KERN_FAILURE
;
3328 /* allow other readers/writers during transfer*/
3330 vs
->vs_xfer_pending
= FALSE
;
3332 vs_finish_write(vs
);
3334 vs
->vs_xfer_pending
= TRUE
;
3335 vs_wait_for_sync_writers(vs
);
3337 vs_wait_for_readers(vs
);
3339 if (!(vs
->vs_indirect
)) {
3345 vsmap
= vs
->vs_dmap
;
3346 if (vsmap
== NULL
) {
3348 vs
->vs_xfer_pending
= FALSE
;
3350 vs_finish_write(vs
);
3351 return KERN_SUCCESS
;
3353 /* loop on clusters in the direct map */
3354 for (j
= 0; j
< vs
->vs_size
; j
++) {
3355 if (VSM_ISCLR(vsmap
[j
]) ||
3356 VSM_ISERR(vsmap
[j
]) ||
3357 (VSM_PS(vsmap
[j
]) != segment
))
3359 if(vs_cluster_transfer(vs
,
3360 vm_page_size
* (j
<< vs
->vs_clshift
),
3361 vm_page_size
<< vs
->vs_clshift
,
3362 upl
) != KERN_SUCCESS
) {
3364 vs
->vs_xfer_pending
= FALSE
;
3366 vs_finish_write(vs
);
3367 return KERN_FAILURE
;
3369 /* allow other readers/writers during transfer*/
3371 vs
->vs_xfer_pending
= FALSE
;
3373 vs_finish_write(vs
);
3375 vs
->vs_xfer_pending
= TRUE
;
3377 vs_wait_for_sync_writers(vs
);
3379 vs_wait_for_readers(vs
);
3380 if (vs
->vs_indirect
) {
3387 vs
->vs_xfer_pending
= FALSE
;
3389 vs_finish_write(vs
);
3390 return KERN_SUCCESS
;
3400 struct vs_map
*vsmap
;
3401 vm_offset_t cluster
;
3403 cluster
= atop_32(offset
) >> vs
->vs_clshift
;
3404 if (vs
->vs_indirect
) {
3405 long ind_block
= cluster
/CLMAP_ENTRIES
;
3407 /* Is the indirect block allocated? */
3408 vsmap
= vs
->vs_imap
[ind_block
];
3409 if(vsmap
== (vs_map_t
) NULL
)
3412 vsmap
= vs
->vs_dmap
;
3413 vsmap
+= cluster%CLMAP_ENTRIES
;
3418 vs_cluster_transfer(
3424 vm_offset_t actual_offset
;
3425 paging_segment_t ps
;
3427 kern_return_t error
= KERN_SUCCESS
;
3428 unsigned int size
, size_wanted
;
3430 unsigned int residual
= 0;
3431 unsigned int unavail_size
;
3432 // default_pager_thread_t *dpt;
3433 // boolean_t dealloc;
3434 struct vs_map
*vsmap_ptr
= NULL
;
3435 struct vs_map read_vsmap
;
3436 struct vs_map original_read_vsmap
;
3437 struct vs_map write_vsmap
;
3439 // vm_offset_t ioaddr;
3441 /* vs_cluster_transfer reads in the pages of a cluster and
3442 * then writes these pages back to new backing store. The
3443 * segment the pages are being read from is assumed to have
3444 * been taken off-line and is no longer considered for new
3449 * This loop will be executed once per cluster referenced.
3450 * Typically this means once, since it's unlikely that the
3451 * VM system will ask for anything spanning cluster boundaries.
3453 * If there are holes in a cluster (in a paging segment), we stop
3454 * reading at the hole, then loop again, hoping to
3455 * find valid pages later in the cluster. This continues until
3456 * the entire range has been examined, and read, if present. The
3457 * pages are written as they are read. If a failure occurs after
3458 * some pages are written the unmap call at the bottom of the loop
3459 * recovers the backing store and the old backing store remains
3463 VSM_CLR(write_vsmap
);
3464 VSM_CLR(original_read_vsmap
);
3465 /* grab the actual object's pages to sync with I/O */
3466 while (cnt
&& (error
== KERN_SUCCESS
)) {
3467 vsmap_ptr
= vs_get_map_entry(vs
, offset
);
3468 actual_offset
= ps_clmap(vs
, offset
, &clmap
, CL_FIND
, 0, 0);
3470 if (actual_offset
== (vm_offset_t
) -1) {
3473 * Nothing left to write in this cluster at least
3474 * set write cluster information for any previous
3475 * write, clear for next cluster, if there is one
3477 unsigned int local_size
, clmask
, clsize
;
3479 clsize
= vm_page_size
<< vs
->vs_clshift
;
3480 clmask
= clsize
- 1;
3481 local_size
= clsize
- (offset
& clmask
);
3483 local_size
= MIN(local_size
, cnt
);
3485 /* This cluster has no data in it beyond what may */
3486 /* have been found on a previous iteration through */
3487 /* the loop "write_vsmap" */
3488 *vsmap_ptr
= write_vsmap
;
3489 VSM_CLR(write_vsmap
);
3490 VSM_CLR(original_read_vsmap
);
3493 offset
+= local_size
;
3498 * Count up contiguous available or unavailable
3501 ps
= CLMAP_PS(clmap
);
3506 (size
< cnt
) && (unavail_size
< cnt
) &&
3507 (i
< CLMAP_NPGS(clmap
)); i
++) {
3508 if (CLMAP_ISSET(clmap
, i
)) {
3509 if (unavail_size
!= 0)
3511 size
+= vm_page_size
;
3513 ps
->ps_bs
->bs_pages_in
++);
3517 unavail_size
+= vm_page_size
;
3522 ASSERT(unavail_size
);
3523 cnt
-= unavail_size
;
3524 offset
+= unavail_size
;
3525 if((offset
& ((vm_page_size
<< vs
->vs_clshift
) - 1))
3527 /* There is no more to transfer in this
3530 *vsmap_ptr
= write_vsmap
;
3531 VSM_CLR(write_vsmap
);
3532 VSM_CLR(original_read_vsmap
);
3537 if(VSM_ISCLR(original_read_vsmap
))
3538 original_read_vsmap
= *vsmap_ptr
;
3540 if(ps
->ps_segtype
== PS_PARTITION
) {
3541 panic("swap partition not supported\n");
3543 error
= KERN_FAILURE
;
3546 NEED TO ISSUE WITH SYNC & NO COMMIT
3547 error = ps_read_device(ps, actual_offset, &buffer,
3548 size, &residual, flags);
3551 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3552 error
= ps_read_file(ps
, upl
, (upl_offset_t
) 0, actual_offset
,
3554 (UPL_IOSYNC
| UPL_NOCOMMIT
));
3557 read_vsmap
= *vsmap_ptr
;
3561 * Adjust counts and put data in new BS. Optimize for the
3562 * common case, i.e. no error and/or partial data.
3563 * If there was an error, then we need to error the entire
3564 * range, even if some data was successfully read.
3567 if ((error
== KERN_SUCCESS
) && (residual
== 0)) {
3570 * Got everything we asked for, supply the data to
3571 * the new BS. Note that as a side effect of supplying
3572 * the data, the buffer holding the supplied data is
3573 * deallocated from the pager's address space unless
3574 * the write is unsuccessful.
3577 /* note buffer will be cleaned up in all cases by */
3578 /* internal_cluster_write or if an error on write */
3579 /* the vm_map_copy_page_discard call */
3580 *vsmap_ptr
= write_vsmap
;
3582 if(vs_cluster_write(vs
, upl
, offset
,
3583 size
, TRUE
, UPL_IOSYNC
| UPL_NOCOMMIT
) != KERN_SUCCESS
) {
3584 error
= KERN_FAILURE
;
3585 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3586 /* unmap the new backing store object */
3587 ps_clunmap(vs
, offset
, size
);
3589 /* original vsmap */
3590 *vsmap_ptr
= original_read_vsmap
;
3591 VSM_CLR(write_vsmap
);
3593 if((offset
+ size
) &
3594 ((vm_page_size
<< vs
->vs_clshift
)
3596 /* There is more to transfer in this
3599 write_vsmap
= *vsmap_ptr
;
3600 *vsmap_ptr
= read_vsmap
;
3602 /* discard the old backing object */
3603 write_vsmap
= *vsmap_ptr
;
3604 *vsmap_ptr
= read_vsmap
;
3605 ps_clunmap(vs
, offset
, size
);
3606 *vsmap_ptr
= write_vsmap
;
3607 VSM_CLR(write_vsmap
);
3608 VSM_CLR(original_read_vsmap
);
3613 if (error
== KERN_SUCCESS
) {
3614 if (residual
== size
) {
3616 * If a read operation returns no error
3617 * and no data moved, we turn it into
3618 * an error, assuming we're reading at
3620 * Fall through and error the entire
3623 error
= KERN_FAILURE
;
3624 *vsmap_ptr
= write_vsmap
;
3625 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3626 /* unmap the new backing store object */
3627 ps_clunmap(vs
, offset
, size
);
3629 *vsmap_ptr
= original_read_vsmap
;
3630 VSM_CLR(write_vsmap
);
3634 * Otherwise, we have partial read.
3635 * This is also considered an error
3636 * for the purposes of cluster transfer
3638 error
= KERN_FAILURE
;
3639 *vsmap_ptr
= write_vsmap
;
3640 if(!(VSM_ISCLR(*vsmap_ptr
))) {
3641 /* unmap the new backing store object */
3642 ps_clunmap(vs
, offset
, size
);
3644 *vsmap_ptr
= original_read_vsmap
;
3645 VSM_CLR(write_vsmap
);
3654 } /* END while (cnt && (error == 0)) */
3655 if(!VSM_ISCLR(write_vsmap
))
3656 *vsmap_ptr
= write_vsmap
;
3662 default_pager_add_file(
3663 MACH_PORT_FACE backing_store
,
3669 paging_segment_t ps
;
3674 if ((bs
= backing_store_lookup(backing_store
))
3675 == BACKING_STORE_NULL
)
3676 return KERN_INVALID_ARGUMENT
;
3679 for (i
= 0; i
<= paging_segment_max
; i
++) {
3680 ps
= paging_segments
[i
];
3681 if (ps
== PAGING_SEGMENT_NULL
)
3683 if (ps
->ps_segtype
!= PS_FILE
)
3687 * Check for overlap on same device.
3689 if (ps
->ps_vnode
== (struct vnode
*)vp
) {
3692 return KERN_INVALID_ARGUMENT
;
3698 * Set up the paging segment
3700 ps
= (paging_segment_t
) kalloc(sizeof (struct paging_segment
));
3701 if (ps
== PAGING_SEGMENT_NULL
) {
3703 return KERN_RESOURCE_SHORTAGE
;
3706 ps
->ps_segtype
= PS_FILE
;
3707 ps
->ps_vnode
= (struct vnode
*)vp
;
3709 ps
->ps_record_shift
= local_log2(vm_page_size
/ record_size
);
3710 ps
->ps_recnum
= size
;
3711 ps
->ps_pgnum
= size
>> ps
->ps_record_shift
;
3713 ps
->ps_pgcount
= ps
->ps_pgnum
;
3714 ps
->ps_clshift
= local_log2(bs
->bs_clsize
);
3715 ps
->ps_clcount
= ps
->ps_ncls
= ps
->ps_pgcount
>> ps
->ps_clshift
;
3719 ps
->ps_bmap
= (unsigned char *) kalloc(RMAPSIZE(ps
->ps_ncls
));
3721 kfree(ps
, sizeof *ps
);
3723 return KERN_RESOURCE_SHORTAGE
;
3725 for (j
= 0; j
< ps
->ps_ncls
; j
++) {
3726 clrbit(ps
->ps_bmap
, j
);
3729 ps
->ps_going_away
= FALSE
;
3732 if ((error
= ps_enter(ps
)) != 0) {
3733 kfree(ps
->ps_bmap
, RMAPSIZE(ps
->ps_ncls
));
3734 kfree(ps
, sizeof *ps
);
3736 return KERN_RESOURCE_SHORTAGE
;
3739 bs
->bs_pages_free
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3740 bs
->bs_pages_total
+= ps
->ps_clcount
<< ps
->ps_clshift
;
3742 dp_pages_free
+= ps
->ps_pgcount
;
3747 bs_more_space(ps
->ps_clcount
);
3749 DP_DEBUG(DEBUG_BS_INTERNAL
,
3750 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3751 device
, offset
, size
, record_size
,
3752 ps
->ps_record_shift
, ps
->ps_pgnum
));
3754 return KERN_SUCCESS
;
3761 paging_segment_t ps
,
3763 upl_offset_t upl_offset
,
3766 unsigned int *residualp
,
3769 vm_object_offset_t f_offset
;
3773 assert(dp_encryption_inited
);
3775 clustered_reads
[atop_32(size
)]++;
3777 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3779 /* for transfer case we need to pass uploffset and flags */
3780 error
= vnode_pagein(ps
->ps_vnode
,
3781 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
| UPL_NORDAHEAD
, NULL
);
3783 /* The vnode_pagein semantic is somewhat at odds with the existing */
3784 /* device_read semantic. Partial reads are not experienced at this */
3785 /* level. It is up to the bit map code and cluster read code to */
3786 /* check that requested data locations are actually backed, and the */
3787 /* pagein code to either read all of the requested data or return an */
3791 result
= KERN_FAILURE
;
3794 result
= KERN_SUCCESS
;
3801 paging_segment_t ps
,
3803 upl_offset_t upl_offset
,
3808 vm_object_offset_t f_offset
;
3809 kern_return_t result
;
3811 assert(dp_encryption_inited
);
3813 clustered_writes
[atop_32(size
)]++;
3814 f_offset
= (vm_object_offset_t
)(ps
->ps_offset
+ offset
);
3816 if (flags
& UPL_PAGING_ENCRYPTED
) {
3819 * encrypt all the pages that we're going
3822 upl_encrypt(upl
, upl_offset
, size
);
3825 if (vnode_pageout(ps
->ps_vnode
,
3826 upl
, upl_offset
, f_offset
, (vm_size_t
)size
, flags
, NULL
))
3827 result
= KERN_FAILURE
;
3829 result
= KERN_SUCCESS
;
3835 default_pager_triggers( __unused MACH_PORT_FACE default_pager
,
3839 MACH_PORT_FACE trigger_port
)
3841 MACH_PORT_FACE release
;
3845 if (flags
== SWAP_ENCRYPT_ON
) {
3846 /* ENCRYPTED SWAP: turn encryption on */
3847 release
= trigger_port
;
3848 if (!dp_encryption_inited
) {
3849 dp_encryption_inited
= TRUE
;
3850 dp_encryption
= TRUE
;
3855 } else if (flags
== SWAP_ENCRYPT_OFF
) {
3856 /* ENCRYPTED SWAP: turn encryption off */
3857 release
= trigger_port
;
3858 if (!dp_encryption_inited
) {
3859 dp_encryption_inited
= TRUE
;
3860 dp_encryption
= FALSE
;
3865 } else if (flags
== HI_WAT_ALERT
) {
3866 release
= min_pages_trigger_port
;
3867 min_pages_trigger_port
= trigger_port
;
3868 minimum_pages_remaining
= hi_wat
/vm_page_size
;
3871 } else if (flags
== LO_WAT_ALERT
) {
3872 release
= max_pages_trigger_port
;
3873 max_pages_trigger_port
= trigger_port
;
3874 maximum_pages_free
= lo_wat
/vm_page_size
;
3877 release
= trigger_port
;
3878 kr
= KERN_INVALID_ARGUMENT
;
3882 if (IP_VALID(release
))
3883 ipc_port_release_send(release
);
3889 * Monitor the amount of available backing store vs. the amount of
3890 * required backing store, notify a listener (if present) when
3891 * backing store may safely be removed.
3893 * We attempt to avoid the situation where backing store is
3894 * discarded en masse, as this can lead to thrashing as the
3895 * backing store is compacted.
3898 #define PF_INTERVAL 3 /* time between free level checks */
3899 #define PF_LATENCY 10 /* number of intervals before release */
3901 static int dp_pages_free_low_count
= 0;
3902 thread_call_t default_pager_backing_store_monitor_callout
;
3905 default_pager_backing_store_monitor(__unused thread_call_param_t p1
,
3906 __unused thread_call_param_t p2
)
3908 // unsigned long long average;
3913 * We determine whether it will be safe to release some
3914 * backing store by watching the free page level. If
3915 * it remains below the maximum_pages_free threshold for
3916 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3917 * then we deem it safe.
3919 * Note that this establishes a maximum rate at which backing
3920 * store will be released, as each notification (currently)
3921 * only results in a single backing store object being
3924 if (dp_pages_free
> maximum_pages_free
) {
3925 dp_pages_free_low_count
++;
3927 dp_pages_free_low_count
= 0;
3930 /* decide whether to send notification */
3932 if (max_pages_trigger_port
&&
3933 (backing_store_release_trigger_disable
== 0) &&
3934 (dp_pages_free_low_count
> PF_LATENCY
)) {
3935 trigger
= max_pages_trigger_port
;
3936 max_pages_trigger_port
= NULL
;
3939 /* send notification */
3940 if (trigger
!= IP_NULL
) {
3942 if(backing_store_release_trigger_disable
!= 0) {
3943 assert_wait((event_t
)
3944 &backing_store_release_trigger_disable
,
3947 thread_block(THREAD_CONTINUE_NULL
);
3951 default_pager_space_alert(trigger
, LO_WAT_ALERT
);
3952 ipc_port_release_send(trigger
);
3953 dp_pages_free_low_count
= 0;
3956 clock_interval_to_deadline(PF_INTERVAL
, NSEC_PER_SEC
, &deadline
);
3957 thread_call_enter_delayed(default_pager_backing_store_monitor_callout
, deadline
);