]>
Commit | Line | Data |
---|---|---|
1c79356b | 1 | /* |
b0d623f7 | 2 | * Copyright (c) 2000-2008 Apple Inc. All rights reserved. |
1c79356b | 3 | * |
2d21ac55 | 4 | * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ |
1c79356b | 5 | * |
2d21ac55 A |
6 | * This file contains Original Code and/or Modifications of Original Code |
7 | * as defined in and that are subject to the Apple Public Source License | |
8 | * Version 2.0 (the 'License'). You may not use this file except in | |
9 | * compliance with the License. The rights granted to you under the License | |
10 | * may not be used to create, or enable the creation or redistribution of, | |
11 | * unlawful or unlicensed copies of an Apple operating system, or to | |
12 | * circumvent, violate, or enable the circumvention or violation of, any | |
13 | * terms of an Apple operating system software license agreement. | |
8f6c56a5 | 14 | * |
2d21ac55 A |
15 | * Please obtain a copy of the License at |
16 | * http://www.opensource.apple.com/apsl/ and read it before using this file. | |
17 | * | |
18 | * The Original Code and all software distributed under the License are | |
19 | * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
8f6c56a5 A |
20 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, |
21 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
2d21ac55 A |
22 | * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. |
23 | * Please see the License for the specific language governing rights and | |
24 | * limitations under the License. | |
8f6c56a5 | 25 | * |
2d21ac55 | 26 | * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ |
1c79356b A |
27 | */ |
28 | /* | |
29 | * @OSF_COPYRIGHT@ | |
30 | */ | |
31 | /* | |
32 | * Mach Operating System | |
33 | * Copyright (c) 1991,1990,1989 Carnegie Mellon University | |
34 | * All Rights Reserved. | |
35 | * | |
36 | * Permission to use, copy, modify and distribute this software and its | |
37 | * documentation is hereby granted, provided that both the copyright | |
38 | * notice and this permission notice appear in all copies of the | |
39 | * software, derivative works or modified versions, and any portions | |
40 | * thereof, and that both notices appear in supporting documentation. | |
41 | * | |
42 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | |
43 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR | |
44 | * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | |
45 | * | |
46 | * Carnegie Mellon requests users of this software to return to | |
47 | * | |
48 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | |
49 | * School of Computer Science | |
50 | * Carnegie Mellon University | |
51 | * Pittsburgh PA 15213-3890 | |
52 | * | |
53 | * any improvements or extensions that they make and grant Carnegie Mellon | |
54 | * the rights to redistribute these changes. | |
55 | */ | |
56 | ||
57 | /* | |
58 | * Default Pager. | |
59 | * Paging File Management. | |
60 | */ | |
61 | ||
91447636 | 62 | #include <mach/host_priv.h> |
0b4e3aa0 | 63 | #include <mach/memory_object_control.h> |
1c79356b | 64 | #include <mach/memory_object_server.h> |
91447636 A |
65 | #include <mach/upl.h> |
66 | #include <default_pager/default_pager_internal.h> | |
1c79356b | 67 | #include <default_pager/default_pager_alerts.h> |
91447636 A |
68 | #include <default_pager/default_pager_object_server.h> |
69 | ||
70 | #include <ipc/ipc_types.h> | |
1c79356b A |
71 | #include <ipc/ipc_port.h> |
72 | #include <ipc/ipc_space.h> | |
91447636 A |
73 | |
74 | #include <kern/kern_types.h> | |
75 | #include <kern/host.h> | |
1c79356b A |
76 | #include <kern/queue.h> |
77 | #include <kern/counters.h> | |
78 | #include <kern/sched_prim.h> | |
91447636 | 79 | |
1c79356b A |
80 | #include <vm/vm_kern.h> |
81 | #include <vm/vm_pageout.h> | |
1c79356b | 82 | #include <vm/vm_map.h> |
91447636 A |
83 | #include <vm/vm_object.h> |
84 | #include <vm/vm_protos.h> | |
85 | ||
2d21ac55 | 86 | |
b0d623f7 | 87 | /* todo - need large internal object support */ |
1c79356b | 88 | |
0b4e3aa0 A |
89 | /* |
90 | * ALLOC_STRIDE... the maximum number of bytes allocated from | |
91 | * a swap file before moving on to the next swap file... if | |
92 | * all swap files reside on a single disk, this value should | |
93 | * be very large (this is the default assumption)... if the | |
94 | * swap files are spread across multiple disks, than this value | |
95 | * should be small (128 * 1024)... | |
96 | * | |
97 | * This should be determined dynamically in the future | |
98 | */ | |
1c79356b | 99 | |
0b4e3aa0 | 100 | #define ALLOC_STRIDE (1024 * 1024 * 1024) |
1c79356b A |
101 | int physical_transfer_cluster_count = 0; |
102 | ||
9bccf70c | 103 | #define VM_SUPER_CLUSTER 0x40000 |
0b4c1975 | 104 | #define VM_SUPER_PAGES (VM_SUPER_CLUSTER / PAGE_SIZE) |
1c79356b A |
105 | |
106 | /* | |
107 | * 0 means no shift to pages, so == 1 page/cluster. 1 would mean | |
108 | * 2 pages/cluster, 2 means 4 pages/cluster, and so on. | |
109 | */ | |
0b4c1975 A |
110 | #define VSTRUCT_MIN_CLSHIFT 0 |
111 | ||
1c79356b | 112 | #define VSTRUCT_DEF_CLSHIFT 2 |
1c79356b A |
113 | int default_pager_clsize = 0; |
114 | ||
0b4c1975 A |
115 | int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT; |
116 | ||
1c79356b | 117 | /* statistics */ |
0b4e3aa0 A |
118 | unsigned int clustered_writes[VM_SUPER_PAGES+1]; |
119 | unsigned int clustered_reads[VM_SUPER_PAGES+1]; | |
1c79356b A |
120 | |
121 | /* | |
122 | * Globals used for asynchronous paging operations: | |
123 | * vs_async_list: head of list of to-be-completed I/O ops | |
124 | * async_num_queued: number of pages completed, but not yet | |
125 | * processed by async thread. | |
126 | * async_requests_out: number of pages of requests not completed. | |
127 | */ | |
128 | ||
129 | #if 0 | |
130 | struct vs_async *vs_async_list; | |
131 | int async_num_queued; | |
132 | int async_requests_out; | |
133 | #endif | |
134 | ||
135 | ||
136 | #define VS_ASYNC_REUSE 1 | |
137 | struct vs_async *vs_async_free_list; | |
138 | ||
b0d623f7 | 139 | lck_mtx_t default_pager_async_lock; /* Protects globals above */ |
1c79356b A |
140 | |
141 | ||
142 | int vs_alloc_async_failed = 0; /* statistics */ | |
143 | int vs_alloc_async_count = 0; /* statistics */ | |
144 | struct vs_async *vs_alloc_async(void); /* forward */ | |
145 | void vs_free_async(struct vs_async *vsa); /* forward */ | |
146 | ||
147 | ||
148 | #define VS_ALLOC_ASYNC() vs_alloc_async() | |
149 | #define VS_FREE_ASYNC(vsa) vs_free_async(vsa) | |
150 | ||
b0d623f7 A |
151 | #define VS_ASYNC_LOCK() lck_mtx_lock(&default_pager_async_lock) |
152 | #define VS_ASYNC_UNLOCK() lck_mtx_unlock(&default_pager_async_lock) | |
153 | #define VS_ASYNC_LOCK_INIT() lck_mtx_init(&default_pager_async_lock, &default_pager_lck_grp, &default_pager_lck_attr) | |
316670eb | 154 | #define VS_ASYNC_LOCK_DESTROY() lck_mtx_destroy(&default_pager_async_lock, &default_pager_lck_grp) |
1c79356b A |
155 | #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock) |
156 | /* | |
157 | * Paging Space Hysteresis triggers and the target notification port | |
158 | * | |
159 | */ | |
b0d623f7 A |
160 | unsigned int dp_pages_free_drift_count = 0; |
161 | unsigned int dp_pages_free_drifted_max = 0; | |
1c79356b A |
162 | unsigned int minimum_pages_remaining = 0; |
163 | unsigned int maximum_pages_free = 0; | |
164 | ipc_port_t min_pages_trigger_port = NULL; | |
165 | ipc_port_t max_pages_trigger_port = NULL; | |
166 | ||
6d2010ae A |
167 | #if CONFIG_FREEZE |
168 | boolean_t use_emergency_swap_file_first = TRUE; | |
169 | #else | |
b0d623f7 | 170 | boolean_t use_emergency_swap_file_first = FALSE; |
6d2010ae | 171 | #endif |
1c79356b | 172 | boolean_t bs_low = FALSE; |
0b4e3aa0 | 173 | int backing_store_release_trigger_disable = 0; |
b0d623f7 | 174 | boolean_t backing_store_stop_compaction = FALSE; |
6d2010ae | 175 | boolean_t backing_store_abort_compaction = FALSE; |
91447636 A |
176 | |
177 | /* Have we decided if swap needs to be encrypted yet ? */ | |
178 | boolean_t dp_encryption_inited = FALSE; | |
179 | /* Should we encrypt swap ? */ | |
180 | boolean_t dp_encryption = FALSE; | |
1c79356b | 181 | |
0b4c1975 A |
182 | boolean_t dp_isssd = FALSE; |
183 | ||
1c79356b A |
184 | /* |
185 | * Object sizes are rounded up to the next power of 2, | |
186 | * unless they are bigger than a given maximum size. | |
187 | */ | |
188 | vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */ | |
189 | ||
190 | /* | |
191 | * List of all backing store and segments. | |
192 | */ | |
b0d623f7 | 193 | MACH_PORT_FACE emergency_segment_backing_store; |
1c79356b A |
194 | struct backing_store_list_head backing_store_list; |
195 | paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS]; | |
b0d623f7 | 196 | lck_mtx_t paging_segments_lock; |
1c79356b A |
197 | int paging_segment_max = 0; |
198 | int paging_segment_count = 0; | |
199 | int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 }; | |
200 | ||
201 | ||
202 | /* | |
203 | * Total pages free in system | |
204 | * This differs from clusters committed/avail which is a measure of the | |
205 | * over commitment of paging segments to backing store. An idea which is | |
206 | * likely to be deprecated. | |
207 | */ | |
208 | unsigned int dp_pages_free = 0; | |
b0d623f7 | 209 | unsigned int dp_pages_reserve = 0; |
1c79356b A |
210 | unsigned int cluster_transfer_minimum = 100; |
211 | ||
6d2010ae A |
212 | /* |
213 | * Trim state | |
214 | */ | |
215 | struct ps_vnode_trim_data { | |
216 | struct vnode *vp; | |
217 | dp_offset_t offset; | |
218 | dp_size_t length; | |
219 | }; | |
220 | ||
91447636 | 221 | /* forward declarations */ |
b0d623f7 A |
222 | kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, int); /* forward */ |
223 | kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, dp_offset_t, unsigned int, unsigned int *, int); /* forward */ | |
91447636 A |
224 | default_pager_thread_t *get_read_buffer( void ); |
225 | kern_return_t ps_vstruct_transfer_from_segment( | |
226 | vstruct_t vs, | |
227 | paging_segment_t segment, | |
228 | upl_t upl); | |
b0d623f7 A |
229 | kern_return_t ps_read_device(paging_segment_t, dp_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */ |
230 | kern_return_t ps_write_device(paging_segment_t, dp_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */ | |
91447636 A |
231 | kern_return_t vs_cluster_transfer( |
232 | vstruct_t vs, | |
b0d623f7 A |
233 | dp_offset_t offset, |
234 | dp_size_t cnt, | |
91447636 A |
235 | upl_t upl); |
236 | vs_map_t vs_get_map_entry( | |
237 | vstruct_t vs, | |
b0d623f7 | 238 | dp_offset_t offset); |
0b4e3aa0 | 239 | |
b0d623f7 A |
240 | kern_return_t |
241 | default_pager_backing_store_delete_internal( MACH_PORT_FACE ); | |
1c79356b | 242 | |
6d2010ae A |
243 | static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data); |
244 | static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data); | |
245 | static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length); | |
246 | ||
1c79356b | 247 | default_pager_thread_t * |
91447636 | 248 | get_read_buffer( void ) |
1c79356b A |
249 | { |
250 | int i; | |
251 | ||
252 | DPT_LOCK(dpt_lock); | |
253 | while(TRUE) { | |
254 | for (i=0; i<default_pager_internal_count; i++) { | |
255 | if(dpt_array[i]->checked_out == FALSE) { | |
256 | dpt_array[i]->checked_out = TRUE; | |
257 | DPT_UNLOCK(dpt_lock); | |
258 | return dpt_array[i]; | |
259 | } | |
260 | } | |
9bccf70c | 261 | DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT); |
1c79356b A |
262 | } |
263 | } | |
264 | ||
265 | void | |
266 | bs_initialize(void) | |
267 | { | |
268 | int i; | |
269 | ||
270 | /* | |
271 | * List of all backing store. | |
272 | */ | |
273 | BSL_LOCK_INIT(); | |
274 | queue_init(&backing_store_list.bsl_queue); | |
275 | PSL_LOCK_INIT(); | |
276 | ||
277 | VS_ASYNC_LOCK_INIT(); | |
278 | #if VS_ASYNC_REUSE | |
279 | vs_async_free_list = NULL; | |
280 | #endif /* VS_ASYNC_REUSE */ | |
281 | ||
0b4e3aa0 | 282 | for (i = 0; i < VM_SUPER_PAGES + 1; i++) { |
1c79356b A |
283 | clustered_writes[i] = 0; |
284 | clustered_reads[i] = 0; | |
285 | } | |
286 | ||
287 | } | |
288 | ||
289 | /* | |
290 | * When things do not quite workout... | |
291 | */ | |
292 | void bs_no_paging_space(boolean_t); /* forward */ | |
293 | ||
294 | void | |
295 | bs_no_paging_space( | |
296 | boolean_t out_of_memory) | |
297 | { | |
1c79356b A |
298 | |
299 | if (out_of_memory) | |
300 | dprintf(("*** OUT OF MEMORY ***\n")); | |
301 | panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE"); | |
302 | } | |
303 | ||
304 | void bs_more_space(int); /* forward */ | |
305 | void bs_commit(int); /* forward */ | |
306 | ||
307 | boolean_t user_warned = FALSE; | |
308 | unsigned int clusters_committed = 0; | |
309 | unsigned int clusters_available = 0; | |
310 | unsigned int clusters_committed_peak = 0; | |
311 | ||
312 | void | |
313 | bs_more_space( | |
314 | int nclusters) | |
315 | { | |
316 | BSL_LOCK(); | |
317 | /* | |
318 | * Account for new paging space. | |
319 | */ | |
320 | clusters_available += nclusters; | |
321 | ||
322 | if (clusters_available >= clusters_committed) { | |
323 | if (verbose && user_warned) { | |
324 | printf("%s%s - %d excess clusters now.\n", | |
325 | my_name, | |
326 | "paging space is OK now", | |
327 | clusters_available - clusters_committed); | |
328 | user_warned = FALSE; | |
329 | clusters_committed_peak = 0; | |
330 | } | |
331 | } else { | |
332 | if (verbose && user_warned) { | |
333 | printf("%s%s - still short of %d clusters.\n", | |
334 | my_name, | |
335 | "WARNING: paging space over-committed", | |
336 | clusters_committed - clusters_available); | |
337 | clusters_committed_peak -= nclusters; | |
338 | } | |
339 | } | |
340 | BSL_UNLOCK(); | |
341 | ||
342 | return; | |
343 | } | |
344 | ||
345 | void | |
346 | bs_commit( | |
347 | int nclusters) | |
348 | { | |
349 | BSL_LOCK(); | |
350 | clusters_committed += nclusters; | |
351 | if (clusters_committed > clusters_available) { | |
352 | if (verbose && !user_warned) { | |
353 | user_warned = TRUE; | |
354 | printf("%s%s - short of %d clusters.\n", | |
355 | my_name, | |
356 | "WARNING: paging space over-committed", | |
357 | clusters_committed - clusters_available); | |
358 | } | |
359 | if (clusters_committed > clusters_committed_peak) { | |
360 | clusters_committed_peak = clusters_committed; | |
361 | } | |
362 | } else { | |
363 | if (verbose && user_warned) { | |
364 | printf("%s%s - was short of up to %d clusters.\n", | |
365 | my_name, | |
366 | "paging space is OK now", | |
367 | clusters_committed_peak - clusters_available); | |
368 | user_warned = FALSE; | |
369 | clusters_committed_peak = 0; | |
370 | } | |
371 | } | |
372 | BSL_UNLOCK(); | |
373 | ||
374 | return; | |
375 | } | |
376 | ||
377 | int default_pager_info_verbose = 1; | |
378 | ||
379 | void | |
380 | bs_global_info( | |
b0d623f7 A |
381 | uint64_t *totalp, |
382 | uint64_t *freep) | |
1c79356b | 383 | { |
b0d623f7 | 384 | uint64_t pages_total, pages_free; |
1c79356b A |
385 | paging_segment_t ps; |
386 | int i; | |
1c79356b A |
387 | |
388 | PSL_LOCK(); | |
389 | pages_total = pages_free = 0; | |
390 | for (i = 0; i <= paging_segment_max; i++) { | |
391 | ps = paging_segments[i]; | |
392 | if (ps == PAGING_SEGMENT_NULL) | |
393 | continue; | |
394 | ||
395 | /* | |
396 | * no need to lock: by the time this data | |
397 | * gets back to any remote requestor it | |
398 | * will be obsolete anyways | |
399 | */ | |
400 | pages_total += ps->ps_pgnum; | |
401 | pages_free += ps->ps_clcount << ps->ps_clshift; | |
91447636 A |
402 | DP_DEBUG(DEBUG_BS_INTERNAL, |
403 | ("segment #%d: %d total, %d free\n", | |
404 | i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift)); | |
1c79356b A |
405 | } |
406 | *totalp = pages_total; | |
407 | *freep = pages_free; | |
408 | if (verbose && user_warned && default_pager_info_verbose) { | |
409 | if (clusters_available < clusters_committed) { | |
410 | printf("%s %d clusters committed, %d available.\n", | |
411 | my_name, | |
412 | clusters_committed, | |
413 | clusters_available); | |
414 | } | |
415 | } | |
416 | PSL_UNLOCK(); | |
417 | } | |
418 | ||
419 | backing_store_t backing_store_alloc(void); /* forward */ | |
420 | ||
421 | backing_store_t | |
422 | backing_store_alloc(void) | |
423 | { | |
424 | backing_store_t bs; | |
1c79356b A |
425 | |
426 | bs = (backing_store_t) kalloc(sizeof (struct backing_store)); | |
427 | if (bs == BACKING_STORE_NULL) | |
428 | panic("backing_store_alloc: no memory"); | |
429 | ||
430 | BS_LOCK_INIT(bs); | |
431 | bs->bs_port = MACH_PORT_NULL; | |
432 | bs->bs_priority = 0; | |
433 | bs->bs_clsize = 0; | |
434 | bs->bs_pages_total = 0; | |
435 | bs->bs_pages_in = 0; | |
436 | bs->bs_pages_in_fail = 0; | |
437 | bs->bs_pages_out = 0; | |
438 | bs->bs_pages_out_fail = 0; | |
439 | ||
440 | return bs; | |
441 | } | |
442 | ||
443 | backing_store_t backing_store_lookup(MACH_PORT_FACE); /* forward */ | |
444 | ||
445 | /* Even in both the component space and external versions of this pager, */ | |
446 | /* backing_store_lookup will be called from tasks in the application space */ | |
447 | backing_store_t | |
448 | backing_store_lookup( | |
449 | MACH_PORT_FACE port) | |
450 | { | |
451 | backing_store_t bs; | |
452 | ||
453 | /* | |
454 | port is currently backed with a vs structure in the alias field | |
455 | we could create an ISBS alias and a port_is_bs call but frankly | |
456 | I see no reason for the test, the bs->port == port check below | |
457 | will work properly on junk entries. | |
458 | ||
459 | if ((port == MACH_PORT_NULL) || port_is_vs(port)) | |
460 | */ | |
6d2010ae | 461 | if (port == MACH_PORT_NULL) |
1c79356b A |
462 | return BACKING_STORE_NULL; |
463 | ||
464 | BSL_LOCK(); | |
465 | queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t, | |
466 | bs_links) { | |
467 | BS_LOCK(bs); | |
468 | if (bs->bs_port == port) { | |
469 | BSL_UNLOCK(); | |
470 | /* Success, return it locked. */ | |
471 | return bs; | |
472 | } | |
473 | BS_UNLOCK(bs); | |
474 | } | |
475 | BSL_UNLOCK(); | |
476 | return BACKING_STORE_NULL; | |
477 | } | |
478 | ||
479 | void backing_store_add(backing_store_t); /* forward */ | |
480 | ||
481 | void | |
482 | backing_store_add( | |
91447636 | 483 | __unused backing_store_t bs) |
1c79356b | 484 | { |
91447636 A |
485 | // MACH_PORT_FACE port = bs->bs_port; |
486 | // MACH_PORT_FACE pset = default_pager_default_set; | |
1c79356b | 487 | kern_return_t kr = KERN_SUCCESS; |
1c79356b A |
488 | |
489 | if (kr != KERN_SUCCESS) | |
490 | panic("backing_store_add: add to set"); | |
491 | ||
492 | } | |
493 | ||
494 | /* | |
495 | * Set up default page shift, but only if not already | |
496 | * set and argument is within range. | |
497 | */ | |
498 | boolean_t | |
499 | bs_set_default_clsize(unsigned int npages) | |
500 | { | |
501 | switch(npages){ | |
502 | case 1: | |
503 | case 2: | |
504 | case 4: | |
505 | case 8: | |
506 | if (default_pager_clsize == 0) /* if not yet set */ | |
507 | vstruct_def_clshift = local_log2(npages); | |
508 | return(TRUE); | |
509 | } | |
510 | return(FALSE); | |
511 | } | |
512 | ||
513 | int bs_get_global_clsize(int clsize); /* forward */ | |
514 | ||
515 | int | |
516 | bs_get_global_clsize( | |
517 | int clsize) | |
518 | { | |
519 | int i; | |
0b4e3aa0 | 520 | memory_object_default_t dmm; |
1c79356b | 521 | kern_return_t kr; |
1c79356b A |
522 | |
523 | /* | |
524 | * Only allow setting of cluster size once. If called | |
525 | * with no cluster size (default), we use the compiled-in default | |
526 | * for the duration. The same cluster size is used for all | |
527 | * paging segments. | |
528 | */ | |
529 | if (default_pager_clsize == 0) { | |
1c79356b A |
530 | /* |
531 | * Keep cluster size in bit shift because it's quicker | |
532 | * arithmetic, and easier to keep at a power of 2. | |
533 | */ | |
534 | if (clsize != NO_CLSIZE) { | |
535 | for (i = 0; (1 << i) < clsize; i++); | |
536 | if (i > MAX_CLUSTER_SHIFT) | |
537 | i = MAX_CLUSTER_SHIFT; | |
538 | vstruct_def_clshift = i; | |
539 | } | |
540 | default_pager_clsize = (1 << vstruct_def_clshift); | |
541 | ||
542 | /* | |
543 | * Let the user know the new (and definitive) cluster size. | |
544 | */ | |
545 | if (verbose) | |
546 | printf("%scluster size = %d page%s\n", | |
547 | my_name, default_pager_clsize, | |
548 | (default_pager_clsize == 1) ? "" : "s"); | |
0b4e3aa0 | 549 | |
1c79356b A |
550 | /* |
551 | * Let the kernel know too, in case it hasn't used the | |
552 | * default value provided in main() yet. | |
553 | */ | |
0b4e3aa0 | 554 | dmm = default_pager_object; |
1c79356b A |
555 | clsize = default_pager_clsize * vm_page_size; /* in bytes */ |
556 | kr = host_default_memory_manager(host_priv_self(), | |
0b4e3aa0 | 557 | &dmm, |
1c79356b | 558 | clsize); |
0b4e3aa0 A |
559 | memory_object_default_deallocate(dmm); |
560 | ||
1c79356b A |
561 | if (kr != KERN_SUCCESS) { |
562 | panic("bs_get_global_cl_size:host_default_memory_manager"); | |
563 | } | |
0b4e3aa0 | 564 | if (dmm != default_pager_object) { |
1c79356b A |
565 | panic("bs_get_global_cl_size:there is another default pager"); |
566 | } | |
567 | } | |
568 | ASSERT(default_pager_clsize > 0 && | |
569 | (default_pager_clsize & (default_pager_clsize - 1)) == 0); | |
570 | ||
571 | return default_pager_clsize; | |
572 | } | |
573 | ||
574 | kern_return_t | |
575 | default_pager_backing_store_create( | |
0b4e3aa0 A |
576 | memory_object_default_t pager, |
577 | int priority, | |
578 | int clsize, /* in bytes */ | |
579 | MACH_PORT_FACE *backing_store) | |
1c79356b A |
580 | { |
581 | backing_store_t bs; | |
582 | MACH_PORT_FACE port; | |
91447636 | 583 | // kern_return_t kr; |
1c79356b | 584 | struct vstruct_alias *alias_struct; |
1c79356b | 585 | |
0b4e3aa0 | 586 | if (pager != default_pager_object) |
1c79356b A |
587 | return KERN_INVALID_ARGUMENT; |
588 | ||
589 | bs = backing_store_alloc(); | |
590 | port = ipc_port_alloc_kernel(); | |
591 | ipc_port_make_send(port); | |
592 | assert (port != IP_NULL); | |
593 | ||
91447636 A |
594 | DP_DEBUG(DEBUG_BS_EXTERNAL, |
595 | ("priority=%d clsize=%d bs_port=0x%x\n", | |
596 | priority, clsize, (int) backing_store)); | |
1c79356b A |
597 | |
598 | alias_struct = (struct vstruct_alias *) | |
599 | kalloc(sizeof (struct vstruct_alias)); | |
600 | if(alias_struct != NULL) { | |
601 | alias_struct->vs = (struct vstruct *)bs; | |
0c530ab8 | 602 | alias_struct->name = &default_pager_ops; |
39236c6e | 603 | port->ip_alias = (uintptr_t) alias_struct; |
1c79356b A |
604 | } |
605 | else { | |
606 | ipc_port_dealloc_kernel((MACH_PORT_FACE)(port)); | |
316670eb A |
607 | |
608 | BS_LOCK_DESTROY(bs); | |
91447636 | 609 | kfree(bs, sizeof (struct backing_store)); |
316670eb | 610 | |
1c79356b A |
611 | return KERN_RESOURCE_SHORTAGE; |
612 | } | |
613 | ||
614 | bs->bs_port = port; | |
615 | if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI) | |
616 | priority = BS_MAXPRI; | |
617 | else if (priority == BS_NOPRI) | |
618 | priority = BS_MAXPRI; | |
619 | else | |
620 | priority = BS_MINPRI; | |
621 | bs->bs_priority = priority; | |
622 | ||
55e303ae | 623 | bs->bs_clsize = bs_get_global_clsize(atop_32(clsize)); |
1c79356b A |
624 | |
625 | BSL_LOCK(); | |
626 | queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t, | |
627 | bs_links); | |
628 | BSL_UNLOCK(); | |
629 | ||
630 | backing_store_add(bs); | |
631 | ||
632 | *backing_store = port; | |
633 | return KERN_SUCCESS; | |
634 | } | |
635 | ||
636 | kern_return_t | |
637 | default_pager_backing_store_info( | |
638 | MACH_PORT_FACE backing_store, | |
639 | backing_store_flavor_t flavour, | |
640 | backing_store_info_t info, | |
641 | mach_msg_type_number_t *size) | |
642 | { | |
643 | backing_store_t bs; | |
644 | backing_store_basic_info_t basic; | |
645 | int i; | |
646 | paging_segment_t ps; | |
647 | ||
648 | if (flavour != BACKING_STORE_BASIC_INFO || | |
649 | *size < BACKING_STORE_BASIC_INFO_COUNT) | |
650 | return KERN_INVALID_ARGUMENT; | |
651 | ||
652 | basic = (backing_store_basic_info_t)info; | |
653 | *size = BACKING_STORE_BASIC_INFO_COUNT; | |
654 | ||
655 | VSTATS_LOCK(&global_stats.gs_lock); | |
656 | basic->pageout_calls = global_stats.gs_pageout_calls; | |
657 | basic->pagein_calls = global_stats.gs_pagein_calls; | |
658 | basic->pages_in = global_stats.gs_pages_in; | |
659 | basic->pages_out = global_stats.gs_pages_out; | |
660 | basic->pages_unavail = global_stats.gs_pages_unavail; | |
661 | basic->pages_init = global_stats.gs_pages_init; | |
662 | basic->pages_init_writes= global_stats.gs_pages_init_writes; | |
663 | VSTATS_UNLOCK(&global_stats.gs_lock); | |
664 | ||
665 | if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL) | |
666 | return KERN_INVALID_ARGUMENT; | |
667 | ||
668 | basic->bs_pages_total = bs->bs_pages_total; | |
669 | PSL_LOCK(); | |
670 | bs->bs_pages_free = 0; | |
671 | for (i = 0; i <= paging_segment_max; i++) { | |
672 | ps = paging_segments[i]; | |
673 | if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) { | |
674 | PS_LOCK(ps); | |
675 | bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift; | |
676 | PS_UNLOCK(ps); | |
677 | } | |
678 | } | |
679 | PSL_UNLOCK(); | |
680 | basic->bs_pages_free = bs->bs_pages_free; | |
681 | basic->bs_pages_in = bs->bs_pages_in; | |
682 | basic->bs_pages_in_fail = bs->bs_pages_in_fail; | |
683 | basic->bs_pages_out = bs->bs_pages_out; | |
684 | basic->bs_pages_out_fail= bs->bs_pages_out_fail; | |
685 | ||
686 | basic->bs_priority = bs->bs_priority; | |
55e303ae | 687 | basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */ |
1c79356b A |
688 | |
689 | BS_UNLOCK(bs); | |
690 | ||
691 | return KERN_SUCCESS; | |
692 | } | |
693 | ||
694 | int ps_delete(paging_segment_t); /* forward */ | |
b0d623f7 | 695 | boolean_t current_thread_aborted(void); |
1c79356b A |
696 | |
697 | int | |
698 | ps_delete( | |
699 | paging_segment_t ps) | |
700 | { | |
701 | vstruct_t vs; | |
702 | kern_return_t error = KERN_SUCCESS; | |
703 | int vs_count; | |
704 | ||
705 | VSL_LOCK(); /* get the lock on the list of vs's */ | |
706 | ||
707 | /* The lock relationship and sequence is farily complicated */ | |
708 | /* this code looks at a live list, locking and unlocking the list */ | |
709 | /* as it traverses it. It depends on the locking behavior of */ | |
710 | /* default_pager_no_senders. no_senders always locks the vstruct */ | |
711 | /* targeted for removal before locking the vstruct list. However */ | |
712 | /* it will remove that member of the list without locking its */ | |
713 | /* neighbors. We can be sure when we hold a lock on a vstruct */ | |
714 | /* it cannot be removed from the list but we must hold the list */ | |
715 | /* lock to be sure that its pointers to its neighbors are valid. */ | |
716 | /* Also, we can hold off destruction of a vstruct when the list */ | |
717 | /* lock and the vs locks are not being held by bumping the */ | |
718 | /* vs_async_pending count. */ | |
719 | ||
0b4e3aa0 A |
720 | |
721 | while(backing_store_release_trigger_disable != 0) { | |
9bccf70c | 722 | VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT); |
0b4e3aa0 A |
723 | } |
724 | ||
1c79356b A |
725 | /* we will choose instead to hold a send right */ |
726 | vs_count = vstruct_list.vsl_count; | |
727 | vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue)); | |
728 | if(vs == (vstruct_t)&vstruct_list) { | |
729 | VSL_UNLOCK(); | |
730 | return KERN_SUCCESS; | |
731 | } | |
732 | VS_LOCK(vs); | |
733 | vs_async_wait(vs); /* wait for any pending async writes */ | |
734 | if ((vs_count != 0) && (vs != NULL)) | |
735 | vs->vs_async_pending += 1; /* hold parties calling */ | |
736 | /* vs_async_wait */ | |
6d2010ae A |
737 | |
738 | if (bs_low == FALSE) | |
739 | backing_store_abort_compaction = FALSE; | |
740 | ||
1c79356b A |
741 | VS_UNLOCK(vs); |
742 | VSL_UNLOCK(); | |
743 | while((vs_count != 0) && (vs != NULL)) { | |
744 | /* We take the count of AMO's before beginning the */ | |
745 | /* transfer of of the target segment. */ | |
746 | /* We are guaranteed that the target segment cannot get */ | |
747 | /* more users. We also know that queue entries are */ | |
748 | /* made at the back of the list. If some of the entries */ | |
749 | /* we would check disappear while we are traversing the */ | |
750 | /* list then we will either check new entries which */ | |
751 | /* do not have any backing store in the target segment */ | |
752 | /* or re-check old entries. This might not be optimal */ | |
753 | /* but it will always be correct. The alternative is to */ | |
754 | /* take a snapshot of the list. */ | |
755 | vstruct_t next_vs; | |
756 | ||
757 | if(dp_pages_free < cluster_transfer_minimum) | |
758 | error = KERN_FAILURE; | |
759 | else { | |
760 | vm_object_t transfer_object; | |
0c530ab8 | 761 | unsigned int count; |
1c79356b | 762 | upl_t upl; |
6d2010ae | 763 | int upl_flags; |
1c79356b | 764 | |
91447636 | 765 | transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER); |
0b4e3aa0 | 766 | count = 0; |
6d2010ae A |
767 | upl_flags = (UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | |
768 | UPL_SET_LITE | UPL_SET_INTERNAL); | |
769 | if (dp_encryption) { | |
770 | /* mark the pages as "encrypted" when they come in */ | |
771 | upl_flags |= UPL_ENCRYPT; | |
772 | } | |
0b4e3aa0 A |
773 | error = vm_object_upl_request(transfer_object, |
774 | (vm_object_offset_t)0, VM_SUPER_CLUSTER, | |
6d2010ae | 775 | &upl, NULL, &count, upl_flags); |
2d21ac55 | 776 | |
1c79356b | 777 | if(error == KERN_SUCCESS) { |
1c79356b A |
778 | error = ps_vstruct_transfer_from_segment( |
779 | vs, ps, upl); | |
91447636 | 780 | upl_commit(upl, NULL, 0); |
0b4e3aa0 | 781 | upl_deallocate(upl); |
1c79356b | 782 | } else { |
1c79356b A |
783 | error = KERN_FAILURE; |
784 | } | |
9bccf70c | 785 | vm_object_deallocate(transfer_object); |
1c79356b | 786 | } |
6d2010ae | 787 | if(error || current_thread_aborted()) { |
1c79356b A |
788 | VS_LOCK(vs); |
789 | vs->vs_async_pending -= 1; /* release vs_async_wait */ | |
0b4e3aa0 A |
790 | if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { |
791 | vs->vs_waiting_async = FALSE; | |
1c79356b | 792 | VS_UNLOCK(vs); |
0b4e3aa0 | 793 | thread_wakeup(&vs->vs_async_pending); |
1c79356b A |
794 | } else { |
795 | VS_UNLOCK(vs); | |
796 | } | |
797 | return KERN_FAILURE; | |
798 | } | |
799 | ||
800 | VSL_LOCK(); | |
0b4e3aa0 A |
801 | |
802 | while(backing_store_release_trigger_disable != 0) { | |
9bccf70c A |
803 | VSL_SLEEP(&backing_store_release_trigger_disable, |
804 | THREAD_UNINT); | |
0b4e3aa0 A |
805 | } |
806 | ||
1c79356b A |
807 | next_vs = (vstruct_t) queue_next(&(vs->vs_links)); |
808 | if((next_vs != (vstruct_t)&vstruct_list) && | |
809 | (vs != next_vs) && (vs_count != 1)) { | |
810 | VS_LOCK(next_vs); | |
811 | vs_async_wait(next_vs); /* wait for any */ | |
812 | /* pending async writes */ | |
813 | next_vs->vs_async_pending += 1; /* hold parties */ | |
814 | /* calling vs_async_wait */ | |
815 | VS_UNLOCK(next_vs); | |
816 | } | |
817 | VSL_UNLOCK(); | |
818 | VS_LOCK(vs); | |
819 | vs->vs_async_pending -= 1; | |
0b4e3aa0 A |
820 | if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { |
821 | vs->vs_waiting_async = FALSE; | |
1c79356b | 822 | VS_UNLOCK(vs); |
0b4e3aa0 | 823 | thread_wakeup(&vs->vs_async_pending); |
1c79356b A |
824 | } else { |
825 | VS_UNLOCK(vs); | |
826 | } | |
827 | if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list)) | |
828 | vs = NULL; | |
829 | else | |
830 | vs = next_vs; | |
831 | vs_count--; | |
832 | } | |
833 | return KERN_SUCCESS; | |
834 | } | |
835 | ||
836 | ||
837 | kern_return_t | |
b0d623f7 | 838 | default_pager_backing_store_delete_internal( |
1c79356b A |
839 | MACH_PORT_FACE backing_store) |
840 | { | |
841 | backing_store_t bs; | |
842 | int i; | |
843 | paging_segment_t ps; | |
844 | int error; | |
845 | int interim_pages_removed = 0; | |
b0d623f7 | 846 | boolean_t dealing_with_emergency_segment = ( backing_store == emergency_segment_backing_store ); |
1c79356b A |
847 | |
848 | if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL) | |
849 | return KERN_INVALID_ARGUMENT; | |
850 | ||
b0d623f7 | 851 | restart: |
1c79356b A |
852 | PSL_LOCK(); |
853 | error = KERN_SUCCESS; | |
854 | for (i = 0; i <= paging_segment_max; i++) { | |
855 | ps = paging_segments[i]; | |
856 | if (ps != PAGING_SEGMENT_NULL && | |
857 | ps->ps_bs == bs && | |
b0d623f7 | 858 | ! IS_PS_GOING_AWAY(ps)) { |
1c79356b | 859 | PS_LOCK(ps); |
b0d623f7 A |
860 | |
861 | if( IS_PS_GOING_AWAY(ps) || !IS_PS_OK_TO_USE(ps)) { | |
862 | /* | |
863 | * Someone is already busy reclamining this paging segment. | |
864 | * If it's the emergency segment we are looking at then check | |
865 | * that someone has not already recovered it and set the right | |
866 | * state i.e. online but not activated. | |
867 | */ | |
868 | PS_UNLOCK(ps); | |
869 | continue; | |
870 | } | |
871 | ||
1c79356b | 872 | /* disable access to this segment */ |
b0d623f7 A |
873 | ps->ps_state &= ~PS_CAN_USE; |
874 | ps->ps_state |= PS_GOING_AWAY; | |
1c79356b A |
875 | PS_UNLOCK(ps); |
876 | /* | |
877 | * The "ps" segment is "off-line" now, | |
878 | * we can try and delete it... | |
879 | */ | |
880 | if(dp_pages_free < (cluster_transfer_minimum | |
881 | + ps->ps_pgcount)) { | |
882 | error = KERN_FAILURE; | |
883 | PSL_UNLOCK(); | |
884 | } | |
885 | else { | |
886 | /* remove all pages associated with the */ | |
887 | /* segment from the list of free pages */ | |
888 | /* when transfer is through, all target */ | |
889 | /* segment pages will appear to be free */ | |
890 | ||
891 | dp_pages_free -= ps->ps_pgcount; | |
892 | interim_pages_removed += ps->ps_pgcount; | |
893 | PSL_UNLOCK(); | |
894 | error = ps_delete(ps); | |
895 | } | |
896 | if (error != KERN_SUCCESS) { | |
897 | /* | |
898 | * We couldn't delete the segment, | |
899 | * probably because there's not enough | |
900 | * virtual memory left. | |
901 | * Re-enable all the segments. | |
902 | */ | |
903 | PSL_LOCK(); | |
904 | break; | |
905 | } | |
906 | goto restart; | |
907 | } | |
908 | } | |
909 | ||
910 | if (error != KERN_SUCCESS) { | |
911 | for (i = 0; i <= paging_segment_max; i++) { | |
912 | ps = paging_segments[i]; | |
913 | if (ps != PAGING_SEGMENT_NULL && | |
914 | ps->ps_bs == bs && | |
b0d623f7 | 915 | IS_PS_GOING_AWAY(ps)) { |
1c79356b | 916 | PS_LOCK(ps); |
b0d623f7 A |
917 | |
918 | if( !IS_PS_GOING_AWAY(ps)) { | |
919 | PS_UNLOCK(ps); | |
920 | continue; | |
921 | } | |
922 | /* Handle the special clusters that came in while we let go the lock*/ | |
923 | if( ps->ps_special_clusters) { | |
924 | dp_pages_free += ps->ps_special_clusters << ps->ps_clshift; | |
925 | ps->ps_pgcount += ps->ps_special_clusters << ps->ps_clshift; | |
926 | ps->ps_clcount += ps->ps_special_clusters; | |
927 | if ( ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) { | |
928 | ps_select_array[ps->ps_bs->bs_priority] = 0; | |
929 | } | |
930 | ps->ps_special_clusters = 0; | |
931 | } | |
1c79356b | 932 | /* re-enable access to this segment */ |
b0d623f7 A |
933 | ps->ps_state &= ~PS_GOING_AWAY; |
934 | ps->ps_state |= PS_CAN_USE; | |
1c79356b A |
935 | PS_UNLOCK(ps); |
936 | } | |
937 | } | |
938 | dp_pages_free += interim_pages_removed; | |
939 | PSL_UNLOCK(); | |
940 | BS_UNLOCK(bs); | |
941 | return error; | |
942 | } | |
943 | ||
944 | for (i = 0; i <= paging_segment_max; i++) { | |
945 | ps = paging_segments[i]; | |
946 | if (ps != PAGING_SEGMENT_NULL && | |
947 | ps->ps_bs == bs) { | |
b0d623f7 A |
948 | if(IS_PS_GOING_AWAY(ps)) { |
949 | if(IS_PS_EMERGENCY_SEGMENT(ps)) { | |
950 | PS_LOCK(ps); | |
951 | ps->ps_state &= ~PS_GOING_AWAY; | |
952 | ps->ps_special_clusters = 0; | |
953 | ps->ps_pgcount = ps->ps_pgnum; | |
954 | ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift; | |
d1ecb069 | 955 | dp_pages_reserve += ps->ps_pgcount; |
b0d623f7 | 956 | PS_UNLOCK(ps); |
b0d623f7 A |
957 | } else { |
958 | paging_segments[i] = PAGING_SEGMENT_NULL; | |
959 | paging_segment_count--; | |
960 | PS_LOCK(ps); | |
961 | kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); | |
962 | kfree(ps, sizeof *ps); | |
963 | } | |
1c79356b A |
964 | } |
965 | } | |
966 | } | |
967 | ||
968 | /* Scan the entire ps array separately to make certain we find the */ | |
969 | /* proper paging_segment_max */ | |
970 | for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) { | |
971 | if(paging_segments[i] != PAGING_SEGMENT_NULL) | |
972 | paging_segment_max = i; | |
973 | } | |
974 | ||
975 | PSL_UNLOCK(); | |
976 | ||
b0d623f7 A |
977 | if( dealing_with_emergency_segment ) { |
978 | BS_UNLOCK(bs); | |
979 | return KERN_SUCCESS; | |
980 | } | |
981 | ||
1c79356b A |
982 | /* |
983 | * All the segments have been deleted. | |
984 | * We can remove the backing store. | |
985 | */ | |
986 | ||
987 | /* | |
988 | * Disable lookups of this backing store. | |
989 | */ | |
39236c6e A |
990 | if((void *)bs->bs_port->ip_alias != NULL) |
991 | kfree((void *) bs->bs_port->ip_alias, | |
91447636 | 992 | sizeof (struct vstruct_alias)); |
1c79356b A |
993 | ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port)); |
994 | bs->bs_port = MACH_PORT_NULL; | |
995 | BS_UNLOCK(bs); | |
996 | ||
997 | /* | |
998 | * Remove backing store from backing_store list. | |
999 | */ | |
1000 | BSL_LOCK(); | |
1001 | queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t, | |
1002 | bs_links); | |
1003 | BSL_UNLOCK(); | |
1004 | ||
1005 | /* | |
1006 | * Free the backing store structure. | |
1007 | */ | |
316670eb | 1008 | BS_LOCK_DESTROY(bs); |
91447636 | 1009 | kfree(bs, sizeof *bs); |
1c79356b A |
1010 | |
1011 | return KERN_SUCCESS; | |
1012 | } | |
1013 | ||
b0d623f7 A |
1014 | kern_return_t |
1015 | default_pager_backing_store_delete( | |
1016 | MACH_PORT_FACE backing_store) | |
1017 | { | |
1018 | if( backing_store != emergency_segment_backing_store ) { | |
1019 | default_pager_backing_store_delete_internal(emergency_segment_backing_store); | |
1020 | } | |
1021 | return(default_pager_backing_store_delete_internal(backing_store)); | |
1022 | } | |
1023 | ||
1c79356b A |
1024 | int ps_enter(paging_segment_t); /* forward */ |
1025 | ||
1026 | int | |
1027 | ps_enter( | |
1028 | paging_segment_t ps) | |
1029 | { | |
1030 | int i; | |
1031 | ||
1032 | PSL_LOCK(); | |
1033 | ||
1034 | for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) { | |
1035 | if (paging_segments[i] == PAGING_SEGMENT_NULL) | |
1036 | break; | |
1037 | } | |
1038 | ||
1039 | if (i < MAX_NUM_PAGING_SEGMENTS) { | |
1040 | paging_segments[i] = ps; | |
1041 | if (i > paging_segment_max) | |
1042 | paging_segment_max = i; | |
1043 | paging_segment_count++; | |
1044 | if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) || | |
1045 | (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)) | |
1046 | ps_select_array[ps->ps_bs->bs_priority] = 0; | |
1047 | i = 0; | |
1048 | } else { | |
1049 | PSL_UNLOCK(); | |
1050 | return KERN_RESOURCE_SHORTAGE; | |
1051 | } | |
1052 | ||
1053 | PSL_UNLOCK(); | |
1054 | return i; | |
1055 | } | |
1056 | ||
1057 | #ifdef DEVICE_PAGING | |
1058 | kern_return_t | |
1059 | default_pager_add_segment( | |
1060 | MACH_PORT_FACE backing_store, | |
1061 | MACH_PORT_FACE device, | |
1062 | recnum_t offset, | |
1063 | recnum_t count, | |
1064 | int record_size) | |
1065 | { | |
1066 | backing_store_t bs; | |
1067 | paging_segment_t ps; | |
1068 | int i; | |
1069 | int error; | |
1c79356b A |
1070 | |
1071 | if ((bs = backing_store_lookup(backing_store)) | |
1072 | == BACKING_STORE_NULL) | |
1073 | return KERN_INVALID_ARGUMENT; | |
1074 | ||
1075 | PSL_LOCK(); | |
1076 | for (i = 0; i <= paging_segment_max; i++) { | |
1077 | ps = paging_segments[i]; | |
1078 | if (ps == PAGING_SEGMENT_NULL) | |
1079 | continue; | |
1080 | ||
1081 | /* | |
1082 | * Check for overlap on same device. | |
1083 | */ | |
1084 | if (!(ps->ps_device != device | |
1085 | || offset >= ps->ps_offset + ps->ps_recnum | |
1086 | || offset + count <= ps->ps_offset)) { | |
1087 | PSL_UNLOCK(); | |
1088 | BS_UNLOCK(bs); | |
1089 | return KERN_INVALID_ARGUMENT; | |
1090 | } | |
1091 | } | |
1092 | PSL_UNLOCK(); | |
1093 | ||
1094 | /* | |
1095 | * Set up the paging segment | |
1096 | */ | |
1097 | ps = (paging_segment_t) kalloc(sizeof (struct paging_segment)); | |
1098 | if (ps == PAGING_SEGMENT_NULL) { | |
1099 | BS_UNLOCK(bs); | |
1100 | return KERN_RESOURCE_SHORTAGE; | |
1101 | } | |
1102 | ||
1103 | ps->ps_segtype = PS_PARTITION; | |
1104 | ps->ps_device = device; | |
1105 | ps->ps_offset = offset; | |
1106 | ps->ps_record_shift = local_log2(vm_page_size / record_size); | |
1107 | ps->ps_recnum = count; | |
1108 | ps->ps_pgnum = count >> ps->ps_record_shift; | |
1109 | ||
1110 | ps->ps_pgcount = ps->ps_pgnum; | |
1111 | ps->ps_clshift = local_log2(bs->bs_clsize); | |
1112 | ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift; | |
1113 | ps->ps_hint = 0; | |
1114 | ||
1115 | PS_LOCK_INIT(ps); | |
1116 | ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); | |
1117 | if (!ps->ps_bmap) { | |
316670eb | 1118 | PS_LOCK_DESTROY(ps); |
91447636 | 1119 | kfree(ps, sizeof *ps); |
1c79356b A |
1120 | BS_UNLOCK(bs); |
1121 | return KERN_RESOURCE_SHORTAGE; | |
1122 | } | |
1123 | for (i = 0; i < ps->ps_ncls; i++) { | |
1124 | clrbit(ps->ps_bmap, i); | |
1125 | } | |
1126 | ||
b0d623f7 A |
1127 | if(paging_segment_count == 0) { |
1128 | ps->ps_state = PS_EMERGENCY_SEGMENT; | |
1129 | if(use_emergency_swap_file_first) { | |
1130 | ps->ps_state |= PS_CAN_USE; | |
1131 | } | |
1132 | } else { | |
1133 | ps->ps_state = PS_CAN_USE; | |
1134 | } | |
1135 | ||
1c79356b A |
1136 | ps->ps_bs = bs; |
1137 | ||
1138 | if ((error = ps_enter(ps)) != 0) { | |
91447636 | 1139 | kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); |
316670eb A |
1140 | |
1141 | PS_LOCK_DESTROY(ps); | |
91447636 | 1142 | kfree(ps, sizeof *ps); |
1c79356b A |
1143 | BS_UNLOCK(bs); |
1144 | return KERN_RESOURCE_SHORTAGE; | |
1145 | } | |
1146 | ||
1147 | bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift; | |
1148 | bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift; | |
1149 | BS_UNLOCK(bs); | |
1150 | ||
1151 | PSL_LOCK(); | |
b0d623f7 A |
1152 | if(IS_PS_OK_TO_USE(ps)) { |
1153 | dp_pages_free += ps->ps_pgcount; | |
1154 | } else { | |
1155 | dp_pages_reserve += ps->ps_pgcount; | |
1156 | } | |
1c79356b A |
1157 | PSL_UNLOCK(); |
1158 | ||
1159 | bs_more_space(ps->ps_clcount); | |
1160 | ||
91447636 A |
1161 | DP_DEBUG(DEBUG_BS_INTERNAL, |
1162 | ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", | |
1163 | device, offset, count, record_size, | |
1164 | ps->ps_record_shift, ps->ps_pgnum)); | |
1c79356b A |
1165 | |
1166 | return KERN_SUCCESS; | |
1167 | } | |
1168 | ||
1169 | boolean_t | |
1170 | bs_add_device( | |
1171 | char *dev_name, | |
1172 | MACH_PORT_FACE master) | |
1173 | { | |
1174 | security_token_t null_security_token = { | |
1175 | { 0, 0 } | |
1176 | }; | |
1177 | MACH_PORT_FACE device; | |
1178 | int info[DEV_GET_SIZE_COUNT]; | |
1179 | mach_msg_type_number_t info_count; | |
1180 | MACH_PORT_FACE bs = MACH_PORT_NULL; | |
1181 | unsigned int rec_size; | |
1182 | recnum_t count; | |
1183 | int clsize; | |
1184 | MACH_PORT_FACE reply_port; | |
1185 | ||
1186 | if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE, | |
1187 | null_security_token, dev_name, &device)) | |
1188 | return FALSE; | |
1189 | ||
1190 | info_count = DEV_GET_SIZE_COUNT; | |
1191 | if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) { | |
1192 | rec_size = info[DEV_GET_SIZE_RECORD_SIZE]; | |
1193 | count = info[DEV_GET_SIZE_DEVICE_SIZE] / rec_size; | |
1194 | clsize = bs_get_global_clsize(0); | |
1195 | if (!default_pager_backing_store_create( | |
0b4e3aa0 | 1196 | default_pager_object, |
1c79356b A |
1197 | DEFAULT_PAGER_BACKING_STORE_MAXPRI, |
1198 | (clsize * vm_page_size), | |
1199 | &bs)) { | |
1200 | if (!default_pager_add_segment(bs, device, | |
1201 | 0, count, rec_size)) { | |
1202 | return TRUE; | |
1203 | } | |
1204 | ipc_port_release_receive(bs); | |
1205 | } | |
1206 | } | |
1207 | ||
1208 | ipc_port_release_send(device); | |
1209 | return FALSE; | |
1210 | } | |
1211 | #endif /* DEVICE_PAGING */ | |
1212 | ||
1213 | #if VS_ASYNC_REUSE | |
1214 | ||
1215 | struct vs_async * | |
1216 | vs_alloc_async(void) | |
1217 | { | |
1218 | struct vs_async *vsa; | |
1219 | MACH_PORT_FACE reply_port; | |
91447636 | 1220 | // kern_return_t kr; |
1c79356b A |
1221 | |
1222 | VS_ASYNC_LOCK(); | |
1223 | if (vs_async_free_list == NULL) { | |
1224 | VS_ASYNC_UNLOCK(); | |
1225 | vsa = (struct vs_async *) kalloc(sizeof (struct vs_async)); | |
1226 | if (vsa != NULL) { | |
1227 | /* | |
1228 | * Try allocating a reply port named after the | |
1229 | * address of the vs_async structure. | |
1230 | */ | |
1231 | struct vstruct_alias *alias_struct; | |
1232 | ||
1233 | reply_port = ipc_port_alloc_kernel(); | |
1234 | alias_struct = (struct vstruct_alias *) | |
1235 | kalloc(sizeof (struct vstruct_alias)); | |
1236 | if(alias_struct != NULL) { | |
1237 | alias_struct->vs = (struct vstruct *)vsa; | |
0c530ab8 | 1238 | alias_struct->name = &default_pager_ops; |
39236c6e | 1239 | reply_port->ip_alias = (uintptr_t) alias_struct; |
1c79356b A |
1240 | vsa->reply_port = reply_port; |
1241 | vs_alloc_async_count++; | |
1242 | } | |
1243 | else { | |
1244 | vs_alloc_async_failed++; | |
1245 | ipc_port_dealloc_kernel((MACH_PORT_FACE) | |
1246 | (reply_port)); | |
91447636 | 1247 | kfree(vsa, sizeof (struct vs_async)); |
1c79356b A |
1248 | vsa = NULL; |
1249 | } | |
1250 | } | |
1251 | } else { | |
1252 | vsa = vs_async_free_list; | |
1253 | vs_async_free_list = vs_async_free_list->vsa_next; | |
1254 | VS_ASYNC_UNLOCK(); | |
1255 | } | |
1256 | ||
1257 | return vsa; | |
1258 | } | |
1259 | ||
1260 | void | |
1261 | vs_free_async( | |
1262 | struct vs_async *vsa) | |
1263 | { | |
1264 | VS_ASYNC_LOCK(); | |
1265 | vsa->vsa_next = vs_async_free_list; | |
1266 | vs_async_free_list = vsa; | |
1267 | VS_ASYNC_UNLOCK(); | |
1268 | } | |
1269 | ||
1270 | #else /* VS_ASYNC_REUSE */ | |
1271 | ||
1272 | struct vs_async * | |
1273 | vs_alloc_async(void) | |
1274 | { | |
1275 | struct vs_async *vsa; | |
1276 | MACH_PORT_FACE reply_port; | |
1277 | kern_return_t kr; | |
1278 | ||
1279 | vsa = (struct vs_async *) kalloc(sizeof (struct vs_async)); | |
1280 | if (vsa != NULL) { | |
1281 | /* | |
1282 | * Try allocating a reply port named after the | |
1283 | * address of the vs_async structure. | |
1284 | */ | |
1285 | reply_port = ipc_port_alloc_kernel(); | |
1286 | alias_struct = (vstruct_alias *) | |
1287 | kalloc(sizeof (struct vstruct_alias)); | |
1288 | if(alias_struct != NULL) { | |
1289 | alias_struct->vs = reply_port; | |
0c530ab8 | 1290 | alias_struct->name = &default_pager_ops; |
39236c6e | 1291 | reply_port->defpager_importance.alias = (int) vsa; |
1c79356b A |
1292 | vsa->reply_port = reply_port; |
1293 | vs_alloc_async_count++; | |
1294 | } | |
1295 | else { | |
1296 | vs_alloc_async_failed++; | |
1297 | ipc_port_dealloc_kernel((MACH_PORT_FACE) | |
1298 | (reply_port)); | |
91447636 | 1299 | kfree(vsa, sizeof (struct vs_async)); |
1c79356b A |
1300 | vsa = NULL; |
1301 | } | |
1302 | } | |
1303 | ||
1304 | return vsa; | |
1305 | } | |
1306 | ||
1307 | void | |
1308 | vs_free_async( | |
1309 | struct vs_async *vsa) | |
1310 | { | |
1c79356b A |
1311 | MACH_PORT_FACE reply_port; |
1312 | kern_return_t kr; | |
1313 | ||
1314 | reply_port = vsa->reply_port; | |
39236c6e | 1315 | kfree(reply_port->ip_alias, sizeof (struct vstuct_alias)); |
91447636 | 1316 | kfree(vsa, sizeof (struct vs_async)); |
1c79356b A |
1317 | ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port)); |
1318 | #if 0 | |
1319 | VS_ASYNC_LOCK(); | |
1320 | vs_alloc_async_count--; | |
1321 | VS_ASYNC_UNLOCK(); | |
1322 | #endif | |
1323 | } | |
1324 | ||
1325 | #endif /* VS_ASYNC_REUSE */ | |
1326 | ||
0b4e3aa0 A |
1327 | zone_t vstruct_zone; |
1328 | ||
1c79356b A |
1329 | vstruct_t |
1330 | ps_vstruct_create( | |
b0d623f7 | 1331 | dp_size_t size) |
1c79356b A |
1332 | { |
1333 | vstruct_t vs; | |
91447636 | 1334 | unsigned int i; |
1c79356b | 1335 | |
0b4e3aa0 | 1336 | vs = (vstruct_t) zalloc(vstruct_zone); |
1c79356b A |
1337 | if (vs == VSTRUCT_NULL) { |
1338 | return VSTRUCT_NULL; | |
1339 | } | |
1340 | ||
1341 | VS_LOCK_INIT(vs); | |
1342 | ||
1343 | /* | |
1344 | * The following fields will be provided later. | |
1345 | */ | |
0c530ab8 | 1346 | vs->vs_pager_ops = NULL; |
0b4e3aa0 A |
1347 | vs->vs_control = MEMORY_OBJECT_CONTROL_NULL; |
1348 | vs->vs_references = 1; | |
1c79356b | 1349 | vs->vs_seqno = 0; |
1c79356b | 1350 | |
1c79356b A |
1351 | vs->vs_waiting_seqno = FALSE; |
1352 | vs->vs_waiting_read = FALSE; | |
1353 | vs->vs_waiting_write = FALSE; | |
1c79356b | 1354 | vs->vs_waiting_async = FALSE; |
1c79356b A |
1355 | |
1356 | vs->vs_readers = 0; | |
1357 | vs->vs_writers = 0; | |
1358 | ||
1359 | vs->vs_errors = 0; | |
1360 | ||
1361 | vs->vs_clshift = local_log2(bs_get_global_clsize(0)); | |
55e303ae | 1362 | vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1; |
1c79356b A |
1363 | vs->vs_async_pending = 0; |
1364 | ||
1365 | /* | |
1366 | * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE | |
1367 | * depending on the size of the memory object. | |
1368 | */ | |
1369 | if (INDIRECT_CLMAP(vs->vs_size)) { | |
1370 | vs->vs_imap = (struct vs_map **) | |
1371 | kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size)); | |
1372 | vs->vs_indirect = TRUE; | |
1373 | } else { | |
1374 | vs->vs_dmap = (struct vs_map *) | |
1375 | kalloc(CLMAP_SIZE(vs->vs_size)); | |
1376 | vs->vs_indirect = FALSE; | |
1377 | } | |
1378 | vs->vs_xfer_pending = FALSE; | |
91447636 A |
1379 | DP_DEBUG(DEBUG_VS_INTERNAL, |
1380 | ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect)); | |
1c79356b A |
1381 | |
1382 | /* | |
1383 | * Check to see that we got the space. | |
1384 | */ | |
1385 | if (!vs->vs_dmap) { | |
91447636 | 1386 | kfree(vs, sizeof *vs); |
1c79356b A |
1387 | return VSTRUCT_NULL; |
1388 | } | |
1389 | ||
1390 | /* | |
1391 | * Zero the indirect pointers, or clear the direct pointers. | |
1392 | */ | |
1393 | if (vs->vs_indirect) | |
1394 | memset(vs->vs_imap, 0, | |
1395 | INDIRECT_CLMAP_SIZE(vs->vs_size)); | |
1396 | else | |
1397 | for (i = 0; i < vs->vs_size; i++) | |
1398 | VSM_CLR(vs->vs_dmap[i]); | |
1399 | ||
1400 | VS_MAP_LOCK_INIT(vs); | |
1401 | ||
1402 | bs_commit(vs->vs_size); | |
1403 | ||
1404 | return vs; | |
1405 | } | |
1406 | ||
91447636 | 1407 | paging_segment_t ps_select_segment(unsigned int, int *); /* forward */ |
1c79356b A |
1408 | |
1409 | paging_segment_t | |
1410 | ps_select_segment( | |
91447636 A |
1411 | unsigned int shift, |
1412 | int *psindex) | |
1c79356b A |
1413 | { |
1414 | paging_segment_t ps; | |
1415 | int i; | |
1416 | int j; | |
1c79356b A |
1417 | |
1418 | /* | |
1419 | * Optimize case where there's only one segment. | |
1420 | * paging_segment_max will index the one and only segment. | |
1421 | */ | |
1422 | ||
1423 | PSL_LOCK(); | |
1424 | if (paging_segment_count == 1) { | |
b0d623f7 | 1425 | paging_segment_t lps = PAGING_SEGMENT_NULL; /* used to avoid extra PS_UNLOCK */ |
0b4e3aa0 | 1426 | ipc_port_t trigger = IP_NULL; |
1c79356b A |
1427 | |
1428 | ps = paging_segments[paging_segment_max]; | |
1429 | *psindex = paging_segment_max; | |
1430 | PS_LOCK(ps); | |
b0d623f7 A |
1431 | if( !IS_PS_EMERGENCY_SEGMENT(ps) ) { |
1432 | panic("Emergency paging segment missing\n"); | |
1433 | } | |
1434 | ASSERT(ps->ps_clshift >= shift); | |
1435 | if(IS_PS_OK_TO_USE(ps)) { | |
1c79356b A |
1436 | if (ps->ps_clcount) { |
1437 | ps->ps_clcount--; | |
1438 | dp_pages_free -= 1 << ps->ps_clshift; | |
b0d623f7 | 1439 | ps->ps_pgcount -= 1 << ps->ps_clshift; |
1c79356b A |
1440 | if(min_pages_trigger_port && |
1441 | (dp_pages_free < minimum_pages_remaining)) { | |
0b4e3aa0 | 1442 | trigger = min_pages_trigger_port; |
1c79356b A |
1443 | min_pages_trigger_port = NULL; |
1444 | bs_low = TRUE; | |
6d2010ae | 1445 | backing_store_abort_compaction = TRUE; |
1c79356b A |
1446 | } |
1447 | lps = ps; | |
b0d623f7 A |
1448 | } |
1449 | } | |
1c79356b | 1450 | PS_UNLOCK(ps); |
b0d623f7 A |
1451 | |
1452 | if( lps == PAGING_SEGMENT_NULL ) { | |
1453 | if(dp_pages_free) { | |
1454 | dp_pages_free_drift_count++; | |
1455 | if(dp_pages_free > dp_pages_free_drifted_max) { | |
1456 | dp_pages_free_drifted_max = dp_pages_free; | |
1457 | } | |
1458 | dprintf(("Emergency swap segment:dp_pages_free before zeroing out: %d\n",dp_pages_free)); | |
1459 | } | |
1460 | dp_pages_free = 0; | |
1461 | } | |
1462 | ||
1c79356b | 1463 | PSL_UNLOCK(); |
0b4e3aa0 A |
1464 | |
1465 | if (trigger != IP_NULL) { | |
6d2010ae A |
1466 | dprintf(("ps_select_segment - send HI_WAT_ALERT\n")); |
1467 | ||
0b4e3aa0 A |
1468 | default_pager_space_alert(trigger, HI_WAT_ALERT); |
1469 | ipc_port_release_send(trigger); | |
1470 | } | |
1c79356b A |
1471 | return lps; |
1472 | } | |
1473 | ||
1474 | if (paging_segment_count == 0) { | |
b0d623f7 A |
1475 | if(dp_pages_free) { |
1476 | dp_pages_free_drift_count++; | |
1477 | if(dp_pages_free > dp_pages_free_drifted_max) { | |
1478 | dp_pages_free_drifted_max = dp_pages_free; | |
1479 | } | |
1480 | dprintf(("No paging segments:dp_pages_free before zeroing out: %d\n",dp_pages_free)); | |
1481 | } | |
1482 | dp_pages_free = 0; | |
1c79356b A |
1483 | PSL_UNLOCK(); |
1484 | return PAGING_SEGMENT_NULL; | |
1485 | } | |
1486 | ||
1487 | for (i = BS_MAXPRI; | |
1488 | i >= BS_MINPRI; i--) { | |
1489 | int start_index; | |
1490 | ||
1491 | if ((ps_select_array[i] == BS_NOPRI) || | |
1492 | (ps_select_array[i] == BS_FULLPRI)) | |
1493 | continue; | |
1494 | start_index = ps_select_array[i]; | |
1495 | ||
1496 | if(!(paging_segments[start_index])) { | |
1497 | j = start_index+1; | |
1498 | physical_transfer_cluster_count = 0; | |
1499 | } | |
0b4e3aa0 | 1500 | else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >> |
1c79356b | 1501 | (((paging_segments[start_index])->ps_clshift) |
0b4e3aa0 | 1502 | + vm_page_shift))) { |
1c79356b A |
1503 | physical_transfer_cluster_count = 0; |
1504 | j = start_index + 1; | |
1505 | } else { | |
1506 | physical_transfer_cluster_count+=1; | |
1507 | j = start_index; | |
1508 | if(start_index == 0) | |
1509 | start_index = paging_segment_max; | |
1510 | else | |
1511 | start_index = start_index - 1; | |
1512 | } | |
1513 | ||
1514 | while (1) { | |
1515 | if (j > paging_segment_max) | |
1516 | j = 0; | |
1517 | if ((ps = paging_segments[j]) && | |
1518 | (ps->ps_bs->bs_priority == i)) { | |
1519 | /* | |
1520 | * Force the ps cluster size to be | |
1521 | * >= that of the vstruct. | |
1522 | */ | |
1523 | PS_LOCK(ps); | |
b0d623f7 A |
1524 | if (IS_PS_OK_TO_USE(ps)) { |
1525 | if ((ps->ps_clcount) && | |
1526 | (ps->ps_clshift >= shift)) { | |
1527 | ipc_port_t trigger = IP_NULL; | |
1528 | ||
1529 | ps->ps_clcount--; | |
1530 | dp_pages_free -= 1 << ps->ps_clshift; | |
1531 | ps->ps_pgcount -= 1 << ps->ps_clshift; | |
1532 | if(min_pages_trigger_port && | |
1533 | (dp_pages_free < | |
1534 | minimum_pages_remaining)) { | |
1535 | trigger = min_pages_trigger_port; | |
1536 | min_pages_trigger_port = NULL; | |
6d2010ae A |
1537 | bs_low = TRUE; |
1538 | backing_store_abort_compaction = TRUE; | |
b0d623f7 A |
1539 | } |
1540 | PS_UNLOCK(ps); | |
1541 | /* | |
1542 | * found one, quit looking. | |
1543 | */ | |
1544 | ps_select_array[i] = j; | |
1545 | PSL_UNLOCK(); | |
1546 | ||
1547 | if (trigger != IP_NULL) { | |
6d2010ae A |
1548 | dprintf(("ps_select_segment - send HI_WAT_ALERT\n")); |
1549 | ||
b0d623f7 A |
1550 | default_pager_space_alert( |
1551 | trigger, | |
1552 | HI_WAT_ALERT); | |
1553 | ipc_port_release_send(trigger); | |
1554 | } | |
1555 | *psindex = j; | |
1556 | return ps; | |
0b4e3aa0 | 1557 | } |
1c79356b A |
1558 | } |
1559 | PS_UNLOCK(ps); | |
1560 | } | |
1561 | if (j == start_index) { | |
1562 | /* | |
1563 | * none at this priority -- mark it full | |
1564 | */ | |
1565 | ps_select_array[i] = BS_FULLPRI; | |
1566 | break; | |
1567 | } | |
1568 | j++; | |
1569 | } | |
1570 | } | |
b0d623f7 A |
1571 | |
1572 | if(dp_pages_free) { | |
1573 | dp_pages_free_drift_count++; | |
1574 | if(dp_pages_free > dp_pages_free_drifted_max) { | |
1575 | dp_pages_free_drifted_max = dp_pages_free; | |
1576 | } | |
1577 | dprintf(("%d Paging Segments: dp_pages_free before zeroing out: %d\n",paging_segment_count,dp_pages_free)); | |
1578 | } | |
1579 | dp_pages_free = 0; | |
1c79356b A |
1580 | PSL_UNLOCK(); |
1581 | return PAGING_SEGMENT_NULL; | |
1582 | } | |
1583 | ||
b0d623f7 | 1584 | dp_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/ |
1c79356b | 1585 | |
b0d623f7 | 1586 | dp_offset_t |
1c79356b A |
1587 | ps_allocate_cluster( |
1588 | vstruct_t vs, | |
1589 | int *psindex, | |
1590 | paging_segment_t use_ps) | |
1591 | { | |
91447636 | 1592 | unsigned int byte_num; |
1c79356b A |
1593 | int bit_num = 0; |
1594 | paging_segment_t ps; | |
b0d623f7 | 1595 | dp_offset_t cluster; |
0b4e3aa0 | 1596 | ipc_port_t trigger = IP_NULL; |
1c79356b A |
1597 | |
1598 | /* | |
1599 | * Find best paging segment. | |
1600 | * ps_select_segment will decrement cluster count on ps. | |
1601 | * Must pass cluster shift to find the most appropriate segment. | |
1602 | */ | |
1603 | /* NOTE: The addition of paging segment delete capability threatened | |
1604 | * to seriously complicate the treatment of paging segments in this | |
1605 | * module and the ones that call it (notably ps_clmap), because of the | |
1606 | * difficulty in assuring that the paging segment would continue to | |
1607 | * exist between being unlocked and locked. This was | |
1608 | * avoided because all calls to this module are based in either | |
1609 | * dp_memory_object calls which rely on the vs lock, or by | |
1610 | * the transfer function which is part of the segment delete path. | |
1611 | * The transfer function which is part of paging segment delete is | |
1612 | * protected from multiple callers by the backing store lock. | |
1613 | * The paging segment delete function treats mappings to a paging | |
1614 | * segment on a vstruct by vstruct basis, locking the vstruct targeted | |
1615 | * while data is transferred to the remaining segments. This is in | |
1616 | * line with the view that incomplete or in-transition mappings between | |
1617 | * data, a vstruct, and backing store are protected by the vs lock. | |
1618 | * This and the ordering of the paging segment "going_away" bit setting | |
1619 | * protects us. | |
1620 | */ | |
b0d623f7 | 1621 | retry: |
1c79356b A |
1622 | if (use_ps != PAGING_SEGMENT_NULL) { |
1623 | ps = use_ps; | |
1624 | PSL_LOCK(); | |
1625 | PS_LOCK(ps); | |
55e303ae A |
1626 | |
1627 | ASSERT(ps->ps_clcount != 0); | |
1628 | ||
1c79356b A |
1629 | ps->ps_clcount--; |
1630 | dp_pages_free -= 1 << ps->ps_clshift; | |
b0d623f7 | 1631 | ps->ps_pgcount -= 1 << ps->ps_clshift; |
1c79356b A |
1632 | if(min_pages_trigger_port && |
1633 | (dp_pages_free < minimum_pages_remaining)) { | |
0b4e3aa0 | 1634 | trigger = min_pages_trigger_port; |
1c79356b | 1635 | min_pages_trigger_port = NULL; |
6d2010ae A |
1636 | bs_low = TRUE; |
1637 | backing_store_abort_compaction = TRUE; | |
1c79356b | 1638 | } |
0b4e3aa0 | 1639 | PSL_UNLOCK(); |
1c79356b | 1640 | PS_UNLOCK(ps); |
0b4e3aa0 | 1641 | if (trigger != IP_NULL) { |
6d2010ae A |
1642 | dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n")); |
1643 | ||
0b4e3aa0 A |
1644 | default_pager_space_alert(trigger, HI_WAT_ALERT); |
1645 | ipc_port_release_send(trigger); | |
1646 | } | |
1647 | ||
1c79356b A |
1648 | } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) == |
1649 | PAGING_SEGMENT_NULL) { | |
b0d623f7 A |
1650 | static clock_sec_t lastnotify = 0; |
1651 | clock_sec_t now; | |
1652 | clock_nsec_t nanoseconds_dummy; | |
1653 | ||
1654 | /* | |
1655 | * Don't immediately jump to the emergency segment. Give the | |
1656 | * dynamic pager a chance to create it's first normal swap file. | |
1657 | * Unless, of course the very first normal swap file can't be | |
1658 | * created due to some problem and we didn't expect that problem | |
1659 | * i.e. use_emergency_swap_file_first was never set to true initially. | |
1660 | * It then gets set in the swap file creation error handling. | |
1661 | */ | |
1662 | if(paging_segment_count > 1 || use_emergency_swap_file_first == TRUE) { | |
1663 | ||
1664 | ps = paging_segments[EMERGENCY_PSEG_INDEX]; | |
1665 | if(IS_PS_EMERGENCY_SEGMENT(ps) && !IS_PS_GOING_AWAY(ps)) { | |
1666 | PSL_LOCK(); | |
1667 | PS_LOCK(ps); | |
1668 | ||
1669 | if(IS_PS_GOING_AWAY(ps)) { | |
1670 | /* Someone de-activated the emergency paging segment*/ | |
1671 | PS_UNLOCK(ps); | |
1672 | PSL_UNLOCK(); | |
91447636 | 1673 | |
b0d623f7 A |
1674 | } else if(dp_pages_free) { |
1675 | /* | |
1676 | * Someone has already activated the emergency paging segment | |
1677 | * OR | |
1678 | * Between us having rec'd a NULL segment from ps_select_segment | |
1679 | * and reaching here a new normal segment could have been added. | |
1680 | * E.g. we get NULL segment and another thread just added the | |
1681 | * new swap file. Hence check to see if we have more dp_pages_free | |
1682 | * before activating the emergency segment. | |
1683 | */ | |
1684 | PS_UNLOCK(ps); | |
1685 | PSL_UNLOCK(); | |
1686 | goto retry; | |
1687 | ||
1688 | } else if(!IS_PS_OK_TO_USE(ps) && ps->ps_clcount) { | |
1689 | /* | |
1690 | * PS_CAN_USE is only reset from the emergency segment when it's | |
1691 | * been successfully recovered. So it's legal to have an emergency | |
1692 | * segment that has PS_CAN_USE but no clusters because it's recovery | |
1693 | * failed. | |
1694 | */ | |
1695 | backing_store_t bs = ps->ps_bs; | |
1696 | ps->ps_state |= PS_CAN_USE; | |
1697 | if(ps_select_array[bs->bs_priority] == BS_FULLPRI || | |
1698 | ps_select_array[bs->bs_priority] == BS_NOPRI) { | |
1699 | ps_select_array[bs->bs_priority] = 0; | |
1700 | } | |
1701 | dp_pages_free += ps->ps_pgcount; | |
1702 | dp_pages_reserve -= ps->ps_pgcount; | |
1703 | PS_UNLOCK(ps); | |
1704 | PSL_UNLOCK(); | |
1705 | dprintf(("Switching ON Emergency paging segment\n")); | |
1706 | goto retry; | |
1707 | } | |
1708 | ||
1709 | PS_UNLOCK(ps); | |
1710 | PSL_UNLOCK(); | |
1711 | } | |
1712 | } | |
1713 | ||
91447636 A |
1714 | /* |
1715 | * Emit a notification of the low-paging resource condition | |
1716 | * but don't issue it more than once every five seconds. This | |
1717 | * prevents us from overflowing logs with thousands of | |
1718 | * repetitions of the message. | |
1719 | */ | |
1720 | clock_get_system_nanotime(&now, &nanoseconds_dummy); | |
b0d623f7 A |
1721 | if (paging_segment_count > 1 && (now > lastnotify + 5)) { |
1722 | /* With an activated emergency paging segment we still | |
1723 | * didn't get any clusters. This could mean that the | |
1724 | * emergency paging segment is exhausted. | |
1725 | */ | |
1726 | dprintf(("System is out of paging space.\n")); | |
91447636 A |
1727 | lastnotify = now; |
1728 | } | |
1729 | ||
0b4e3aa0 | 1730 | PSL_LOCK(); |
b0d623f7 | 1731 | |
1c79356b | 1732 | if(min_pages_trigger_port) { |
0b4e3aa0 | 1733 | trigger = min_pages_trigger_port; |
1c79356b A |
1734 | min_pages_trigger_port = NULL; |
1735 | bs_low = TRUE; | |
6d2010ae | 1736 | backing_store_abort_compaction = TRUE; |
1c79356b | 1737 | } |
0b4e3aa0 A |
1738 | PSL_UNLOCK(); |
1739 | if (trigger != IP_NULL) { | |
6d2010ae A |
1740 | dprintf(("ps_allocate_cluster - send HI_WAT_ALERT\n")); |
1741 | ||
0b4e3aa0 A |
1742 | default_pager_space_alert(trigger, HI_WAT_ALERT); |
1743 | ipc_port_release_send(trigger); | |
1744 | } | |
b0d623f7 | 1745 | return (dp_offset_t) -1; |
1c79356b | 1746 | } |
1c79356b A |
1747 | |
1748 | /* | |
1749 | * Look for an available cluster. At the end of the loop, | |
1750 | * byte_num is the byte offset and bit_num is the bit offset of the | |
1751 | * first zero bit in the paging segment bitmap. | |
1752 | */ | |
1753 | PS_LOCK(ps); | |
1754 | byte_num = ps->ps_hint; | |
1755 | for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) { | |
1756 | if (*(ps->ps_bmap + byte_num) != BYTEMASK) { | |
1757 | for (bit_num = 0; bit_num < NBBY; bit_num++) { | |
1758 | if (isclr((ps->ps_bmap + byte_num), bit_num)) | |
1759 | break; | |
1760 | } | |
1761 | ASSERT(bit_num != NBBY); | |
1762 | break; | |
1763 | } | |
1764 | } | |
1765 | ps->ps_hint = byte_num; | |
1766 | cluster = (byte_num*NBBY) + bit_num; | |
1767 | ||
1768 | /* Space was reserved, so this must be true */ | |
1769 | ASSERT(cluster < ps->ps_ncls); | |
1770 | ||
1771 | setbit(ps->ps_bmap, cluster); | |
1772 | PS_UNLOCK(ps); | |
1773 | ||
1774 | return cluster; | |
1775 | } | |
1776 | ||
b0d623f7 | 1777 | void ps_deallocate_cluster(paging_segment_t, dp_offset_t); /* forward */ |
1c79356b A |
1778 | |
1779 | void | |
1780 | ps_deallocate_cluster( | |
1781 | paging_segment_t ps, | |
b0d623f7 | 1782 | dp_offset_t cluster) |
1c79356b A |
1783 | { |
1784 | ||
b0d623f7 | 1785 | if (cluster >= ps->ps_ncls) |
1c79356b A |
1786 | panic("ps_deallocate_cluster: Invalid cluster number"); |
1787 | ||
1788 | /* | |
1789 | * Lock the paging segment, clear the cluster's bitmap and increment the | |
1790 | * number of free cluster. | |
1791 | */ | |
1792 | PSL_LOCK(); | |
1793 | PS_LOCK(ps); | |
1794 | clrbit(ps->ps_bmap, cluster); | |
b0d623f7 A |
1795 | if( IS_PS_OK_TO_USE(ps)) { |
1796 | ++ps->ps_clcount; | |
1797 | ps->ps_pgcount += 1 << ps->ps_clshift; | |
1798 | dp_pages_free += 1 << ps->ps_clshift; | |
1799 | } else { | |
1800 | ps->ps_special_clusters += 1; | |
1801 | } | |
1c79356b A |
1802 | |
1803 | /* | |
1804 | * Move the hint down to the freed cluster if it is | |
1805 | * less than the current hint. | |
1806 | */ | |
1807 | if ((cluster/NBBY) < ps->ps_hint) { | |
1808 | ps->ps_hint = (cluster/NBBY); | |
1809 | } | |
1810 | ||
1c79356b A |
1811 | |
1812 | /* | |
1813 | * If we're freeing space on a full priority, reset the array. | |
1814 | */ | |
b0d623f7 | 1815 | if ( IS_PS_OK_TO_USE(ps) && ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI) |
1c79356b | 1816 | ps_select_array[ps->ps_bs->bs_priority] = 0; |
b0d623f7 | 1817 | PS_UNLOCK(ps); |
1c79356b A |
1818 | PSL_UNLOCK(); |
1819 | ||
1820 | return; | |
1821 | } | |
1822 | ||
b0d623f7 | 1823 | void ps_dealloc_vsmap(struct vs_map *, dp_size_t); /* forward */ |
1c79356b A |
1824 | |
1825 | void | |
1826 | ps_dealloc_vsmap( | |
1827 | struct vs_map *vsmap, | |
b0d623f7 | 1828 | dp_size_t size) |
1c79356b | 1829 | { |
91447636 | 1830 | unsigned int i; |
6d2010ae A |
1831 | struct ps_vnode_trim_data trim_data; |
1832 | ||
1833 | ps_vnode_trim_init(&trim_data); | |
1834 | ||
1835 | for (i = 0; i < size; i++) { | |
1836 | if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i])) { | |
1837 | ps_vnode_trim_more(&trim_data, | |
1838 | &vsmap[i], | |
1839 | VSM_PS(vsmap[i])->ps_clshift, | |
1840 | vm_page_size << VSM_PS(vsmap[i])->ps_clshift); | |
1c79356b A |
1841 | ps_deallocate_cluster(VSM_PS(vsmap[i]), |
1842 | VSM_CLOFF(vsmap[i])); | |
6d2010ae A |
1843 | } else { |
1844 | ps_vnode_trim_now(&trim_data); | |
1845 | } | |
1846 | } | |
1847 | ps_vnode_trim_now(&trim_data); | |
1c79356b A |
1848 | } |
1849 | ||
1850 | void | |
1851 | ps_vstruct_dealloc( | |
1852 | vstruct_t vs) | |
1853 | { | |
91447636 A |
1854 | unsigned int i; |
1855 | // spl_t s; | |
1c79356b A |
1856 | |
1857 | VS_MAP_LOCK(vs); | |
1858 | ||
1859 | /* | |
1860 | * If this is an indirect structure, then we walk through the valid | |
1861 | * (non-zero) indirect pointers and deallocate the clusters | |
1862 | * associated with each used map entry (via ps_dealloc_vsmap). | |
1863 | * When all of the clusters in an indirect block have been | |
1864 | * freed, we deallocate the block. When all of the indirect | |
1865 | * blocks have been deallocated we deallocate the memory | |
1866 | * holding the indirect pointers. | |
1867 | */ | |
1868 | if (vs->vs_indirect) { | |
1869 | for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { | |
1870 | if (vs->vs_imap[i] != NULL) { | |
1871 | ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES); | |
91447636 | 1872 | kfree(vs->vs_imap[i], CLMAP_THRESHOLD); |
1c79356b A |
1873 | } |
1874 | } | |
91447636 | 1875 | kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size)); |
1c79356b A |
1876 | } else { |
1877 | /* | |
1878 | * Direct map. Free used clusters, then memory. | |
1879 | */ | |
1880 | ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size); | |
91447636 | 1881 | kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); |
1c79356b A |
1882 | } |
1883 | VS_MAP_UNLOCK(vs); | |
1884 | ||
1885 | bs_commit(- vs->vs_size); | |
1886 | ||
316670eb A |
1887 | VS_MAP_LOCK_DESTROY(vs); |
1888 | ||
91447636 | 1889 | zfree(vstruct_zone, vs); |
1c79356b A |
1890 | } |
1891 | ||
39236c6e | 1892 | kern_return_t |
6d2010ae A |
1893 | ps_vstruct_reclaim( |
1894 | vstruct_t vs, | |
1895 | boolean_t return_to_vm, | |
1896 | boolean_t reclaim_backing_store) | |
1897 | { | |
1898 | unsigned int i, j; | |
6d2010ae A |
1899 | struct vs_map *vsmap; |
1900 | boolean_t vsmap_all_clear, vsimap_all_clear; | |
1901 | struct vm_object_fault_info fault_info; | |
1902 | int clmap_off; | |
1903 | unsigned int vsmap_size; | |
39236c6e | 1904 | kern_return_t kr = KERN_SUCCESS; |
6d2010ae | 1905 | |
6d2010ae A |
1906 | VS_MAP_LOCK(vs); |
1907 | ||
1908 | fault_info.cluster_size = VM_SUPER_CLUSTER; | |
1909 | fault_info.behavior = VM_BEHAVIOR_SEQUENTIAL; | |
1910 | fault_info.user_tag = 0; | |
1911 | fault_info.lo_offset = 0; | |
1912 | fault_info.hi_offset = ptoa_32(vs->vs_size << vs->vs_clshift); | |
1913 | fault_info.io_sync = reclaim_backing_store; | |
316670eb | 1914 | fault_info.batch_pmap_op = FALSE; |
6d2010ae A |
1915 | |
1916 | /* | |
1917 | * If this is an indirect structure, then we walk through the valid | |
1918 | * (non-zero) indirect pointers and deallocate the clusters | |
1919 | * associated with each used map entry (via ps_dealloc_vsmap). | |
1920 | * When all of the clusters in an indirect block have been | |
1921 | * freed, we deallocate the block. When all of the indirect | |
1922 | * blocks have been deallocated we deallocate the memory | |
1923 | * holding the indirect pointers. | |
1924 | */ | |
1925 | if (vs->vs_indirect) { | |
1926 | vsimap_all_clear = TRUE; | |
1927 | for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { | |
1928 | vsmap = vs->vs_imap[i]; | |
1929 | if (vsmap == NULL) | |
1930 | continue; | |
1931 | /* loop on clusters in this indirect map */ | |
1932 | clmap_off = (vm_page_size * CLMAP_ENTRIES * | |
1933 | VSCLSIZE(vs) * i); | |
1934 | if (i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size)) | |
1935 | vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i); | |
1936 | else | |
1937 | vsmap_size = CLMAP_ENTRIES; | |
1938 | vsmap_all_clear = TRUE; | |
1939 | if (return_to_vm) { | |
1940 | for (j = 0; j < vsmap_size;) { | |
1941 | if (VSM_ISCLR(vsmap[j]) || | |
1942 | VSM_ISERR(vsmap[j])) { | |
1943 | j++; | |
1944 | clmap_off += vm_page_size * VSCLSIZE(vs); | |
1945 | continue; | |
1946 | } | |
1947 | VS_MAP_UNLOCK(vs); | |
1948 | kr = pvs_cluster_read( | |
1949 | vs, | |
1950 | clmap_off, | |
1951 | (dp_size_t) -1, /* read whole cluster */ | |
1952 | &fault_info); | |
39236c6e | 1953 | |
6d2010ae A |
1954 | VS_MAP_LOCK(vs); /* XXX what if it changed ? */ |
1955 | if (kr != KERN_SUCCESS) { | |
1956 | vsmap_all_clear = FALSE; | |
1957 | vsimap_all_clear = FALSE; | |
39236c6e A |
1958 | |
1959 | kr = KERN_MEMORY_ERROR; | |
1960 | goto out; | |
6d2010ae A |
1961 | } |
1962 | } | |
1963 | } | |
1964 | if (vsmap_all_clear) { | |
1965 | ps_dealloc_vsmap(vsmap, CLMAP_ENTRIES); | |
1966 | kfree(vsmap, CLMAP_THRESHOLD); | |
1967 | vs->vs_imap[i] = NULL; | |
1968 | } | |
1969 | } | |
1970 | if (vsimap_all_clear) { | |
1971 | // kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size)); | |
1972 | } | |
1973 | } else { | |
1974 | /* | |
1975 | * Direct map. Free used clusters, then memory. | |
1976 | */ | |
1977 | vsmap = vs->vs_dmap; | |
1978 | if (vsmap == NULL) { | |
1979 | goto out; | |
1980 | } | |
1981 | vsmap_all_clear = TRUE; | |
1982 | /* loop on clusters in the direct map */ | |
1983 | if (return_to_vm) { | |
1984 | for (j = 0; j < vs->vs_size;) { | |
1985 | if (VSM_ISCLR(vsmap[j]) || | |
1986 | VSM_ISERR(vsmap[j])) { | |
1987 | j++; | |
1988 | continue; | |
1989 | } | |
1990 | clmap_off = vm_page_size * (j << vs->vs_clshift); | |
1991 | VS_MAP_UNLOCK(vs); | |
1992 | kr = pvs_cluster_read( | |
1993 | vs, | |
1994 | clmap_off, | |
1995 | (dp_size_t) -1, /* read whole cluster */ | |
1996 | &fault_info); | |
39236c6e | 1997 | |
6d2010ae A |
1998 | VS_MAP_LOCK(vs); /* XXX what if it changed ? */ |
1999 | if (kr != KERN_SUCCESS) { | |
2000 | vsmap_all_clear = FALSE; | |
39236c6e A |
2001 | |
2002 | kr = KERN_MEMORY_ERROR; | |
2003 | goto out; | |
6d2010ae A |
2004 | } else { |
2005 | // VSM_CLR(vsmap[j]); | |
2006 | } | |
2007 | } | |
2008 | } | |
2009 | if (vsmap_all_clear) { | |
2010 | ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size); | |
2011 | // kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size)); | |
2012 | } | |
2013 | } | |
2014 | out: | |
2015 | VS_MAP_UNLOCK(vs); | |
39236c6e A |
2016 | |
2017 | return kr; | |
6d2010ae A |
2018 | } |
2019 | ||
91447636 | 2020 | int ps_map_extend(vstruct_t, unsigned int); /* forward */ |
1c79356b A |
2021 | |
2022 | int ps_map_extend( | |
2023 | vstruct_t vs, | |
91447636 | 2024 | unsigned int new_size) |
1c79356b A |
2025 | { |
2026 | struct vs_map **new_imap; | |
2027 | struct vs_map *new_dmap = NULL; | |
2028 | int newdsize; | |
2029 | int i; | |
2030 | void *old_map = NULL; | |
2031 | int old_map_size = 0; | |
2032 | ||
2033 | if (vs->vs_size >= new_size) { | |
2034 | /* | |
2035 | * Someone has already done the work. | |
2036 | */ | |
2037 | return 0; | |
2038 | } | |
2039 | ||
2040 | /* | |
2041 | * If the new size extends into the indirect range, then we have one | |
2042 | * of two cases: we are going from indirect to indirect, or we are | |
2043 | * going from direct to indirect. If we are going from indirect to | |
2044 | * indirect, then it is possible that the new size will fit in the old | |
2045 | * indirect map. If this is the case, then just reset the size of the | |
2046 | * vstruct map and we are done. If the new size will not | |
2047 | * fit into the old indirect map, then we have to allocate a new | |
2048 | * indirect map and copy the old map pointers into this new map. | |
2049 | * | |
2050 | * If we are going from direct to indirect, then we have to allocate a | |
2051 | * new indirect map and copy the old direct pages into the first | |
2052 | * indirect page of the new map. | |
2053 | * NOTE: allocating memory here is dangerous, as we're in the | |
2054 | * pageout path. | |
2055 | */ | |
2056 | if (INDIRECT_CLMAP(new_size)) { | |
2057 | int new_map_size = INDIRECT_CLMAP_SIZE(new_size); | |
2058 | ||
2059 | /* | |
2060 | * Get a new indirect map and zero it. | |
2061 | */ | |
2062 | old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size); | |
2063 | if (vs->vs_indirect && | |
2064 | (new_map_size == old_map_size)) { | |
2065 | bs_commit(new_size - vs->vs_size); | |
2066 | vs->vs_size = new_size; | |
2067 | return 0; | |
2068 | } | |
2069 | ||
2070 | new_imap = (struct vs_map **)kalloc(new_map_size); | |
2071 | if (new_imap == NULL) { | |
2072 | return -1; | |
2073 | } | |
2074 | memset(new_imap, 0, new_map_size); | |
2075 | ||
2076 | if (vs->vs_indirect) { | |
2077 | /* Copy old entries into new map */ | |
2078 | memcpy(new_imap, vs->vs_imap, old_map_size); | |
2079 | /* Arrange to free the old map */ | |
2080 | old_map = (void *) vs->vs_imap; | |
2081 | newdsize = 0; | |
2082 | } else { /* Old map was a direct map */ | |
2083 | /* Allocate an indirect page */ | |
2084 | if ((new_imap[0] = (struct vs_map *) | |
2085 | kalloc(CLMAP_THRESHOLD)) == NULL) { | |
91447636 | 2086 | kfree(new_imap, new_map_size); |
1c79356b A |
2087 | return -1; |
2088 | } | |
2089 | new_dmap = new_imap[0]; | |
2090 | newdsize = CLMAP_ENTRIES; | |
2091 | } | |
2092 | } else { | |
2093 | new_imap = NULL; | |
2094 | newdsize = new_size; | |
2095 | /* | |
2096 | * If the new map is a direct map, then the old map must | |
2097 | * also have been a direct map. All we have to do is | |
2098 | * to allocate a new direct map, copy the old entries | |
2099 | * into it and free the old map. | |
2100 | */ | |
2101 | if ((new_dmap = (struct vs_map *) | |
2102 | kalloc(CLMAP_SIZE(new_size))) == NULL) { | |
2103 | return -1; | |
2104 | } | |
2105 | } | |
2106 | if (newdsize) { | |
2107 | ||
2108 | /* Free the old map */ | |
2109 | old_map = (void *) vs->vs_dmap; | |
2110 | old_map_size = CLMAP_SIZE(vs->vs_size); | |
2111 | ||
2112 | /* Copy info from the old map into the new map */ | |
2113 | memcpy(new_dmap, vs->vs_dmap, old_map_size); | |
2114 | ||
2115 | /* Initialize the rest of the new map */ | |
2116 | for (i = vs->vs_size; i < newdsize; i++) | |
2117 | VSM_CLR(new_dmap[i]); | |
2118 | } | |
2119 | if (new_imap) { | |
2120 | vs->vs_imap = new_imap; | |
2121 | vs->vs_indirect = TRUE; | |
2122 | } else | |
2123 | vs->vs_dmap = new_dmap; | |
2124 | bs_commit(new_size - vs->vs_size); | |
2125 | vs->vs_size = new_size; | |
2126 | if (old_map) | |
91447636 | 2127 | kfree(old_map, old_map_size); |
1c79356b A |
2128 | return 0; |
2129 | } | |
2130 | ||
b0d623f7 | 2131 | dp_offset_t |
1c79356b A |
2132 | ps_clmap( |
2133 | vstruct_t vs, | |
b0d623f7 | 2134 | dp_offset_t offset, |
1c79356b A |
2135 | struct clmap *clmap, |
2136 | int flag, | |
b0d623f7 | 2137 | dp_size_t size, |
1c79356b A |
2138 | int error) |
2139 | { | |
b0d623f7 A |
2140 | dp_offset_t cluster; /* The cluster of offset. */ |
2141 | dp_offset_t newcl; /* The new cluster allocated. */ | |
2142 | dp_offset_t newoff; | |
91447636 | 2143 | unsigned int i; |
1c79356b | 2144 | struct vs_map *vsmap; |
1c79356b A |
2145 | |
2146 | VS_MAP_LOCK(vs); | |
2147 | ||
2148 | ASSERT(vs->vs_dmap); | |
55e303ae | 2149 | cluster = atop_32(offset) >> vs->vs_clshift; |
1c79356b A |
2150 | |
2151 | /* | |
2152 | * Initialize cluster error value | |
2153 | */ | |
2154 | clmap->cl_error = 0; | |
2155 | ||
2156 | /* | |
2157 | * If the object has grown, extend the page map. | |
2158 | */ | |
2159 | if (cluster >= vs->vs_size) { | |
2160 | if (flag == CL_FIND) { | |
2161 | /* Do not allocate if just doing a lookup */ | |
2162 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2163 | return (dp_offset_t) -1; |
1c79356b A |
2164 | } |
2165 | if (ps_map_extend(vs, cluster + 1)) { | |
2166 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2167 | return (dp_offset_t) -1; |
1c79356b A |
2168 | } |
2169 | } | |
2170 | ||
2171 | /* | |
2172 | * Look for the desired cluster. If the map is indirect, then we | |
2173 | * have a two level lookup. First find the indirect block, then | |
2174 | * find the actual cluster. If the indirect block has not yet | |
2175 | * been allocated, then do so. If the cluster has not yet been | |
2176 | * allocated, then do so. | |
2177 | * | |
2178 | * If any of the allocations fail, then return an error. | |
2179 | * Don't allocate if just doing a lookup. | |
2180 | */ | |
2181 | if (vs->vs_indirect) { | |
2182 | long ind_block = cluster/CLMAP_ENTRIES; | |
2183 | ||
2184 | /* Is the indirect block allocated? */ | |
2185 | vsmap = vs->vs_imap[ind_block]; | |
2186 | if (vsmap == NULL) { | |
2187 | if (flag == CL_FIND) { | |
2188 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2189 | return (dp_offset_t) -1; |
1c79356b A |
2190 | } |
2191 | ||
2192 | /* Allocate the indirect block */ | |
2193 | vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD); | |
2194 | if (vsmap == NULL) { | |
2195 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2196 | return (dp_offset_t) -1; |
1c79356b A |
2197 | } |
2198 | /* Initialize the cluster offsets */ | |
2199 | for (i = 0; i < CLMAP_ENTRIES; i++) | |
2200 | VSM_CLR(vsmap[i]); | |
2201 | vs->vs_imap[ind_block] = vsmap; | |
2202 | } | |
2203 | } else | |
2204 | vsmap = vs->vs_dmap; | |
2205 | ||
2206 | ASSERT(vsmap); | |
2207 | vsmap += cluster%CLMAP_ENTRIES; | |
2208 | ||
2209 | /* | |
2210 | * At this point, vsmap points to the struct vs_map desired. | |
2211 | * | |
2212 | * Look in the map for the cluster, if there was an error on a | |
2213 | * previous write, flag it and return. If it is not yet | |
2214 | * allocated, then allocate it, if we're writing; if we're | |
2215 | * doing a lookup and the cluster's not allocated, return error. | |
2216 | */ | |
2217 | if (VSM_ISERR(*vsmap)) { | |
2218 | clmap->cl_error = VSM_GETERR(*vsmap); | |
2219 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2220 | return (dp_offset_t) -1; |
1c79356b A |
2221 | } else if (VSM_ISCLR(*vsmap)) { |
2222 | int psindex; | |
2223 | ||
2224 | if (flag == CL_FIND) { | |
2225 | /* | |
2226 | * If there's an error and the entry is clear, then | |
2227 | * we've run out of swap space. Record the error | |
2228 | * here and return. | |
2229 | */ | |
2230 | if (error) { | |
2231 | VSM_SETERR(*vsmap, error); | |
2232 | } | |
2233 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2234 | return (dp_offset_t) -1; |
1c79356b A |
2235 | } else { |
2236 | /* | |
2237 | * Attempt to allocate a cluster from the paging segment | |
2238 | */ | |
2239 | newcl = ps_allocate_cluster(vs, &psindex, | |
2240 | PAGING_SEGMENT_NULL); | |
b0d623f7 | 2241 | if (newcl == (dp_offset_t) -1) { |
1c79356b | 2242 | VS_MAP_UNLOCK(vs); |
b0d623f7 | 2243 | return (dp_offset_t) -1; |
1c79356b A |
2244 | } |
2245 | VSM_CLR(*vsmap); | |
2246 | VSM_SETCLOFF(*vsmap, newcl); | |
2247 | VSM_SETPS(*vsmap, psindex); | |
2248 | } | |
2249 | } else | |
2250 | newcl = VSM_CLOFF(*vsmap); | |
2251 | ||
2252 | /* | |
2253 | * Fill in pertinent fields of the clmap | |
2254 | */ | |
2255 | clmap->cl_ps = VSM_PS(*vsmap); | |
2256 | clmap->cl_numpages = VSCLSIZE(vs); | |
2257 | clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap); | |
2258 | ||
2259 | /* | |
2260 | * Byte offset in paging segment is byte offset to cluster plus | |
2261 | * byte offset within cluster. It looks ugly, but should be | |
2262 | * relatively quick. | |
2263 | */ | |
2264 | ASSERT(trunc_page(offset) == offset); | |
55e303ae | 2265 | newcl = ptoa_32(newcl) << vs->vs_clshift; |
1c79356b A |
2266 | newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1); |
2267 | if (flag == CL_ALLOC) { | |
2268 | /* | |
2269 | * set bits in the allocation bitmap according to which | |
2270 | * pages were requested. size is in bytes. | |
2271 | */ | |
55e303ae | 2272 | i = atop_32(newoff); |
1c79356b A |
2273 | while ((size > 0) && (i < VSCLSIZE(vs))) { |
2274 | VSM_SETALLOC(*vsmap, i); | |
2275 | i++; | |
2276 | size -= vm_page_size; | |
2277 | } | |
2278 | } | |
2279 | clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap); | |
2280 | if (newoff) { | |
2281 | /* | |
2282 | * Offset is not cluster aligned, so number of pages | |
2283 | * and bitmaps must be adjusted | |
2284 | */ | |
55e303ae | 2285 | clmap->cl_numpages -= atop_32(newoff); |
1c79356b A |
2286 | CLMAP_SHIFT(clmap, vs); |
2287 | CLMAP_SHIFTALLOC(clmap, vs); | |
2288 | } | |
2289 | ||
2290 | /* | |
2291 | * | |
2292 | * The setting of valid bits and handling of write errors | |
2293 | * must be done here, while we hold the lock on the map. | |
2294 | * It logically should be done in ps_vs_write_complete(). | |
2295 | * The size and error information has been passed from | |
2296 | * ps_vs_write_complete(). If the size parameter is non-zero, | |
2297 | * then there is work to be done. If error is also non-zero, | |
2298 | * then the error number is recorded in the cluster and the | |
2299 | * entire cluster is in error. | |
2300 | */ | |
2301 | if (size && flag == CL_FIND) { | |
b0d623f7 | 2302 | dp_offset_t off = (dp_offset_t) 0; |
1c79356b A |
2303 | |
2304 | if (!error) { | |
2305 | for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0; | |
2306 | i++) { | |
2307 | VSM_SETPG(*vsmap, i); | |
2308 | size -= vm_page_size; | |
2309 | } | |
2310 | ASSERT(i <= VSCLSIZE(vs)); | |
2311 | } else { | |
2312 | BS_STAT(clmap->cl_ps->ps_bs, | |
2313 | clmap->cl_ps->ps_bs->bs_pages_out_fail += | |
55e303ae | 2314 | atop_32(size)); |
1c79356b A |
2315 | off = VSM_CLOFF(*vsmap); |
2316 | VSM_SETERR(*vsmap, error); | |
2317 | } | |
2318 | /* | |
2319 | * Deallocate cluster if error, and no valid pages | |
2320 | * already present. | |
2321 | */ | |
b0d623f7 | 2322 | if (off != (dp_offset_t) 0) |
1c79356b A |
2323 | ps_deallocate_cluster(clmap->cl_ps, off); |
2324 | VS_MAP_UNLOCK(vs); | |
b0d623f7 | 2325 | return (dp_offset_t) 0; |
1c79356b A |
2326 | } else |
2327 | VS_MAP_UNLOCK(vs); | |
2328 | ||
91447636 A |
2329 | DP_DEBUG(DEBUG_VS_INTERNAL, |
2330 | ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n", | |
2331 | newcl+newoff, (int) vs, (int) vsmap, flag)); | |
2332 | DP_DEBUG(DEBUG_VS_INTERNAL, | |
2333 | (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n", | |
2334 | (int) clmap->cl_ps, clmap->cl_numpages, | |
2335 | (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map)); | |
1c79356b A |
2336 | |
2337 | return (newcl + newoff); | |
2338 | } | |
2339 | ||
b0d623f7 | 2340 | void ps_clunmap(vstruct_t, dp_offset_t, dp_size_t); /* forward */ |
1c79356b A |
2341 | |
2342 | void | |
2343 | ps_clunmap( | |
2344 | vstruct_t vs, | |
b0d623f7 A |
2345 | dp_offset_t offset, |
2346 | dp_size_t length) | |
1c79356b | 2347 | { |
b0d623f7 | 2348 | dp_offset_t cluster; /* The cluster number of offset */ |
1c79356b | 2349 | struct vs_map *vsmap; |
6d2010ae A |
2350 | struct ps_vnode_trim_data trim_data; |
2351 | ||
2352 | ps_vnode_trim_init(&trim_data); | |
1c79356b A |
2353 | |
2354 | VS_MAP_LOCK(vs); | |
2355 | ||
2356 | /* | |
2357 | * Loop through all clusters in this range, freeing paging segment | |
2358 | * clusters and map entries as encountered. | |
2359 | */ | |
2360 | while (length > 0) { | |
b0d623f7 | 2361 | dp_offset_t newoff; |
91447636 | 2362 | unsigned int i; |
1c79356b | 2363 | |
55e303ae | 2364 | cluster = atop_32(offset) >> vs->vs_clshift; |
1c79356b A |
2365 | if (vs->vs_indirect) /* indirect map */ |
2366 | vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES]; | |
2367 | else | |
2368 | vsmap = vs->vs_dmap; | |
2369 | if (vsmap == NULL) { | |
6d2010ae | 2370 | ps_vnode_trim_now(&trim_data); |
1c79356b A |
2371 | VS_MAP_UNLOCK(vs); |
2372 | return; | |
2373 | } | |
2374 | vsmap += cluster%CLMAP_ENTRIES; | |
2375 | if (VSM_ISCLR(*vsmap)) { | |
6d2010ae | 2376 | ps_vnode_trim_now(&trim_data); |
1c79356b A |
2377 | length -= vm_page_size; |
2378 | offset += vm_page_size; | |
2379 | continue; | |
2380 | } | |
2381 | /* | |
2382 | * We've got a valid mapping. Clear it and deallocate | |
2383 | * paging segment cluster pages. | |
2384 | * Optimize for entire cluster cleraing. | |
2385 | */ | |
91447636 | 2386 | if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) { |
1c79356b A |
2387 | /* |
2388 | * Not cluster aligned. | |
2389 | */ | |
2390 | ASSERT(trunc_page(newoff) == newoff); | |
55e303ae | 2391 | i = atop_32(newoff); |
1c79356b A |
2392 | } else |
2393 | i = 0; | |
2394 | while ((i < VSCLSIZE(vs)) && (length > 0)) { | |
2395 | VSM_CLRPG(*vsmap, i); | |
2396 | VSM_CLRALLOC(*vsmap, i); | |
2397 | length -= vm_page_size; | |
2398 | offset += vm_page_size; | |
2399 | i++; | |
2400 | } | |
2401 | ||
2402 | /* | |
2403 | * If map entry is empty, clear and deallocate cluster. | |
2404 | */ | |
6d2010ae A |
2405 | if (!VSM_BMAP(*vsmap)) { |
2406 | ps_vnode_trim_more(&trim_data, | |
2407 | vsmap, | |
2408 | vs->vs_clshift, | |
2409 | VSCLSIZE(vs) * vm_page_size); | |
1c79356b A |
2410 | ps_deallocate_cluster(VSM_PS(*vsmap), |
2411 | VSM_CLOFF(*vsmap)); | |
2412 | VSM_CLR(*vsmap); | |
6d2010ae A |
2413 | } else { |
2414 | ps_vnode_trim_now(&trim_data); | |
1c79356b A |
2415 | } |
2416 | } | |
6d2010ae | 2417 | ps_vnode_trim_now(&trim_data); |
1c79356b A |
2418 | |
2419 | VS_MAP_UNLOCK(vs); | |
2420 | } | |
2421 | ||
b0d623f7 | 2422 | void ps_vs_write_complete(vstruct_t, dp_offset_t, dp_size_t, int); /* forward */ |
1c79356b A |
2423 | |
2424 | void | |
2425 | ps_vs_write_complete( | |
2426 | vstruct_t vs, | |
b0d623f7 A |
2427 | dp_offset_t offset, |
2428 | dp_size_t size, | |
1c79356b A |
2429 | int error) |
2430 | { | |
2431 | struct clmap clmap; | |
2432 | ||
2433 | /* | |
2434 | * Get the struct vsmap for this cluster. | |
2435 | * Use READ, even though it was written, because the | |
2436 | * cluster MUST be present, unless there was an error | |
2437 | * in the original ps_clmap (e.g. no space), in which | |
2438 | * case, nothing happens. | |
2439 | * | |
2440 | * Must pass enough information to ps_clmap to allow it | |
2441 | * to set the vs_map structure bitmap under lock. | |
2442 | */ | |
2443 | (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error); | |
2444 | } | |
2445 | ||
b0d623f7 | 2446 | void vs_cl_write_complete(vstruct_t, paging_segment_t, dp_offset_t, vm_offset_t, dp_size_t, boolean_t, int); /* forward */ |
1c79356b A |
2447 | |
2448 | void | |
2449 | vs_cl_write_complete( | |
b0d623f7 | 2450 | vstruct_t vs, |
91447636 | 2451 | __unused paging_segment_t ps, |
b0d623f7 | 2452 | dp_offset_t offset, |
91447636 | 2453 | __unused vm_offset_t addr, |
b0d623f7 A |
2454 | dp_size_t size, |
2455 | boolean_t async, | |
2456 | int error) | |
1c79356b | 2457 | { |
91447636 | 2458 | // kern_return_t kr; |
1c79356b A |
2459 | |
2460 | if (error) { | |
2461 | /* | |
2462 | * For internal objects, the error is recorded on a | |
2463 | * per-cluster basis by ps_clmap() which is called | |
2464 | * by ps_vs_write_complete() below. | |
2465 | */ | |
2466 | dprintf(("write failed error = 0x%x\n", error)); | |
2467 | /* add upl_abort code here */ | |
2468 | } else | |
55e303ae | 2469 | GSTAT(global_stats.gs_pages_out += atop_32(size)); |
1c79356b A |
2470 | /* |
2471 | * Notify the vstruct mapping code, so it can do its accounting. | |
2472 | */ | |
2473 | ps_vs_write_complete(vs, offset, size, error); | |
2474 | ||
2475 | if (async) { | |
2476 | VS_LOCK(vs); | |
2477 | ASSERT(vs->vs_async_pending > 0); | |
2478 | vs->vs_async_pending -= size; | |
0b4e3aa0 A |
2479 | if (vs->vs_async_pending == 0 && vs->vs_waiting_async) { |
2480 | vs->vs_waiting_async = FALSE; | |
1c79356b | 2481 | VS_UNLOCK(vs); |
0b4e3aa0 | 2482 | thread_wakeup(&vs->vs_async_pending); |
1c79356b A |
2483 | } else { |
2484 | VS_UNLOCK(vs); | |
2485 | } | |
2486 | } | |
2487 | } | |
2488 | ||
2489 | #ifdef DEVICE_PAGING | |
2490 | kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t); | |
2491 | ||
2492 | kern_return_t | |
2493 | device_write_reply( | |
2494 | MACH_PORT_FACE reply_port, | |
2495 | kern_return_t device_code, | |
2496 | io_buf_len_t bytes_written) | |
2497 | { | |
2498 | struct vs_async *vsa; | |
1c79356b A |
2499 | |
2500 | vsa = (struct vs_async *) | |
39236c6e | 2501 | ((struct vstruct_alias *)(reply_port->ip_alias))->vs; |
1c79356b A |
2502 | |
2503 | if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) { | |
2504 | device_code = KERN_FAILURE; | |
2505 | } | |
2506 | ||
2507 | vsa->vsa_error = device_code; | |
2508 | ||
2509 | ||
2510 | ASSERT(vsa->vsa_vs != VSTRUCT_NULL); | |
2511 | if(vsa->vsa_flags & VSA_TRANSFER) { | |
2512 | /* revisit when async disk segments redone */ | |
2513 | if(vsa->vsa_error) { | |
2514 | /* need to consider error condition. re-write data or */ | |
2515 | /* throw it away here. */ | |
91447636 | 2516 | vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr); |
1c79356b A |
2517 | } |
2518 | ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset, | |
2519 | vsa->vsa_size, vsa->vsa_error); | |
2520 | } else { | |
2521 | vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset, | |
2522 | vsa->vsa_addr, vsa->vsa_size, TRUE, | |
2523 | vsa->vsa_error); | |
2524 | } | |
2525 | VS_FREE_ASYNC(vsa); | |
2526 | ||
2527 | return KERN_SUCCESS; | |
2528 | } | |
2529 | ||
2530 | kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t); | |
2531 | kern_return_t | |
2532 | device_write_reply_inband( | |
2533 | MACH_PORT_FACE reply_port, | |
2534 | kern_return_t return_code, | |
2535 | io_buf_len_t bytes_written) | |
2536 | { | |
2537 | panic("device_write_reply_inband: illegal"); | |
2538 | return KERN_SUCCESS; | |
2539 | } | |
2540 | ||
2541 | kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t); | |
2542 | kern_return_t | |
2543 | device_read_reply( | |
2544 | MACH_PORT_FACE reply_port, | |
2545 | kern_return_t return_code, | |
2546 | io_buf_ptr_t data, | |
2547 | mach_msg_type_number_t dataCnt) | |
2548 | { | |
2549 | struct vs_async *vsa; | |
2550 | vsa = (struct vs_async *) | |
39236c6e | 2551 | ((struct vstruct_alias *)(reply_port->defpager_importance.alias))->vs; |
1c79356b A |
2552 | vsa->vsa_addr = (vm_offset_t)data; |
2553 | vsa->vsa_size = (vm_size_t)dataCnt; | |
2554 | vsa->vsa_error = return_code; | |
b0d623f7 | 2555 | thread_wakeup(&vsa); |
1c79356b A |
2556 | return KERN_SUCCESS; |
2557 | } | |
2558 | ||
2559 | kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t); | |
2560 | kern_return_t | |
2561 | device_read_reply_inband( | |
2562 | MACH_PORT_FACE reply_port, | |
2563 | kern_return_t return_code, | |
2564 | io_buf_ptr_inband_t data, | |
2565 | mach_msg_type_number_t dataCnt) | |
2566 | { | |
2567 | panic("device_read_reply_inband: illegal"); | |
2568 | return KERN_SUCCESS; | |
2569 | } | |
2570 | ||
2571 | kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t); | |
2572 | kern_return_t | |
2573 | device_read_reply_overwrite( | |
2574 | MACH_PORT_FACE reply_port, | |
2575 | kern_return_t return_code, | |
2576 | io_buf_len_t bytes_read) | |
2577 | { | |
2578 | panic("device_read_reply_overwrite: illegal\n"); | |
2579 | return KERN_SUCCESS; | |
2580 | } | |
2581 | ||
2582 | kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE); | |
2583 | kern_return_t | |
2584 | device_open_reply( | |
2585 | MACH_PORT_FACE reply_port, | |
2586 | kern_return_t return_code, | |
2587 | MACH_PORT_FACE device_port) | |
2588 | { | |
2589 | panic("device_open_reply: illegal\n"); | |
2590 | return KERN_SUCCESS; | |
2591 | } | |
2592 | ||
1c79356b A |
2593 | kern_return_t |
2594 | ps_read_device( | |
2595 | paging_segment_t ps, | |
b0d623f7 | 2596 | dp_offset_t offset, |
1c79356b A |
2597 | vm_offset_t *bufferp, |
2598 | unsigned int size, | |
2599 | unsigned int *residualp, | |
2600 | int flags) | |
2601 | { | |
2602 | kern_return_t kr; | |
2603 | recnum_t dev_offset; | |
2604 | unsigned int bytes_wanted; | |
2605 | unsigned int bytes_read; | |
2606 | unsigned int total_read; | |
2607 | vm_offset_t dev_buffer; | |
2608 | vm_offset_t buf_ptr; | |
2609 | unsigned int records_read; | |
1c79356b | 2610 | struct vs_async *vsa; |
1c79356b A |
2611 | |
2612 | device_t device; | |
2613 | vm_map_copy_t device_data = NULL; | |
2614 | default_pager_thread_t *dpt = NULL; | |
2615 | ||
2616 | device = dev_port_lookup(ps->ps_device); | |
55e303ae | 2617 | clustered_reads[atop_32(size)]++; |
1c79356b A |
2618 | |
2619 | dev_offset = (ps->ps_offset + | |
2620 | (offset >> (vm_page_shift - ps->ps_record_shift))); | |
2621 | bytes_wanted = size; | |
2622 | total_read = 0; | |
2623 | *bufferp = (vm_offset_t)NULL; | |
2624 | ||
2625 | do { | |
2626 | vsa = VS_ALLOC_ASYNC(); | |
2627 | if (vsa) { | |
2628 | vsa->vsa_vs = NULL; | |
2629 | vsa->vsa_addr = 0; | |
2630 | vsa->vsa_offset = 0; | |
2631 | vsa->vsa_size = 0; | |
2632 | vsa->vsa_ps = NULL; | |
2633 | } | |
1c79356b A |
2634 | ip_lock(vsa->reply_port); |
2635 | vsa->reply_port->ip_sorights++; | |
2636 | ip_reference(vsa->reply_port); | |
2637 | ip_unlock(vsa->reply_port); | |
2638 | kr = ds_device_read_common(device, | |
2639 | vsa->reply_port, | |
2640 | (mach_msg_type_name_t) | |
2641 | MACH_MSG_TYPE_MOVE_SEND_ONCE, | |
2642 | (dev_mode_t) 0, | |
2643 | dev_offset, | |
2644 | bytes_wanted, | |
2645 | (IO_READ | IO_CALL), | |
2646 | (io_buf_ptr_t *) &dev_buffer, | |
2647 | (mach_msg_type_number_t *) &bytes_read); | |
2648 | if(kr == MIG_NO_REPLY) { | |
b0d623f7 | 2649 | assert_wait(&vsa, THREAD_UNINT); |
9bccf70c | 2650 | thread_block(THREAD_CONTINUE_NULL); |
1c79356b A |
2651 | |
2652 | dev_buffer = vsa->vsa_addr; | |
2653 | bytes_read = (unsigned int)vsa->vsa_size; | |
2654 | kr = vsa->vsa_error; | |
2655 | } | |
2656 | VS_FREE_ASYNC(vsa); | |
2657 | if (kr != KERN_SUCCESS || bytes_read == 0) { | |
2658 | break; | |
2659 | } | |
2660 | total_read += bytes_read; | |
2661 | ||
2662 | /* | |
2663 | * If we got the entire range, use the returned dev_buffer. | |
2664 | */ | |
2665 | if (bytes_read == size) { | |
2666 | *bufferp = (vm_offset_t)dev_buffer; | |
2667 | break; | |
2668 | } | |
2669 | ||
2670 | #if 1 | |
2671 | dprintf(("read only %d bytes out of %d\n", | |
2672 | bytes_read, bytes_wanted)); | |
2673 | #endif | |
2674 | if(dpt == NULL) { | |
2675 | dpt = get_read_buffer(); | |
2676 | buf_ptr = dpt->dpt_buffer; | |
2677 | *bufferp = (vm_offset_t)buf_ptr; | |
2678 | } | |
2679 | /* | |
2680 | * Otherwise, copy the data into the provided buffer (*bufferp) | |
2681 | * and append the rest of the range as it comes in. | |
2682 | */ | |
2683 | memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read); | |
2684 | buf_ptr += bytes_read; | |
2685 | bytes_wanted -= bytes_read; | |
2686 | records_read = (bytes_read >> | |
2687 | (vm_page_shift - ps->ps_record_shift)); | |
2688 | dev_offset += records_read; | |
91447636 A |
2689 | DP_DEBUG(DEBUG_VS_INTERNAL, |
2690 | ("calling vm_deallocate(addr=0x%X,size=0x%X)\n", | |
2691 | dev_buffer, bytes_read)); | |
1c79356b A |
2692 | if (vm_deallocate(kernel_map, dev_buffer, bytes_read) |
2693 | != KERN_SUCCESS) | |
2694 | Panic("dealloc buf"); | |
2695 | } while (bytes_wanted); | |
2696 | ||
2697 | *residualp = size - total_read; | |
2698 | if((dev_buffer != *bufferp) && (total_read != 0)) { | |
2699 | vm_offset_t temp_buffer; | |
91447636 | 2700 | vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE); |
1c79356b A |
2701 | memcpy((void *) temp_buffer, (void *) *bufferp, total_read); |
2702 | if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read, | |
2703 | VM_MAP_COPYIN_OPT_SRC_DESTROY | | |
2704 | VM_MAP_COPYIN_OPT_STEAL_PAGES | | |
2705 | VM_MAP_COPYIN_OPT_PMAP_ENTER, | |
2706 | (vm_map_copy_t *)&device_data, FALSE)) | |
2707 | panic("ps_read_device: cannot copyin locally provided buffer\n"); | |
2708 | } | |
2709 | else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){ | |
2710 | if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read, | |
2711 | VM_MAP_COPYIN_OPT_SRC_DESTROY | | |
2712 | VM_MAP_COPYIN_OPT_STEAL_PAGES | | |
2713 | VM_MAP_COPYIN_OPT_PMAP_ENTER, | |
2714 | (vm_map_copy_t *)&device_data, FALSE)) | |
2715 | panic("ps_read_device: cannot copyin backing store provided buffer\n"); | |
2716 | } | |
2717 | else { | |
2718 | device_data = NULL; | |
2719 | } | |
2720 | *bufferp = (vm_offset_t)device_data; | |
2721 | ||
2722 | if(dpt != NULL) { | |
2723 | /* Free the receive buffer */ | |
2724 | dpt->checked_out = 0; | |
2725 | thread_wakeup(&dpt_array); | |
2726 | } | |
2727 | return KERN_SUCCESS; | |
2728 | } | |
2729 | ||
1c79356b A |
2730 | kern_return_t |
2731 | ps_write_device( | |
2732 | paging_segment_t ps, | |
b0d623f7 | 2733 | dp_offset_t offset, |
1c79356b A |
2734 | vm_offset_t addr, |
2735 | unsigned int size, | |
2736 | struct vs_async *vsa) | |
2737 | { | |
2738 | recnum_t dev_offset; | |
2739 | io_buf_len_t bytes_to_write, bytes_written; | |
2740 | recnum_t records_written; | |
2741 | kern_return_t kr; | |
2742 | MACH_PORT_FACE reply_port; | |
1c79356b A |
2743 | |
2744 | ||
2745 | ||
55e303ae | 2746 | clustered_writes[atop_32(size)]++; |
1c79356b A |
2747 | |
2748 | dev_offset = (ps->ps_offset + | |
2749 | (offset >> (vm_page_shift - ps->ps_record_shift))); | |
2750 | bytes_to_write = size; | |
2751 | ||
2752 | if (vsa) { | |
2753 | /* | |
2754 | * Asynchronous write. | |
2755 | */ | |
2756 | reply_port = vsa->reply_port; | |
2757 | ip_lock(reply_port); | |
2758 | reply_port->ip_sorights++; | |
2759 | ip_reference(reply_port); | |
2760 | ip_unlock(reply_port); | |
2761 | { | |
2762 | device_t device; | |
2763 | device = dev_port_lookup(ps->ps_device); | |
2764 | ||
2765 | vsa->vsa_addr = addr; | |
2766 | kr=ds_device_write_common(device, | |
2767 | reply_port, | |
2768 | (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE, | |
2769 | (dev_mode_t) 0, | |
2770 | dev_offset, | |
2771 | (io_buf_ptr_t) addr, | |
2772 | size, | |
2773 | (IO_WRITE | IO_CALL), | |
2774 | &bytes_written); | |
2775 | } | |
2776 | if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) { | |
2777 | if (verbose) | |
2778 | dprintf(("%s0x%x, addr=0x%x," | |
2779 | "size=0x%x,offset=0x%x\n", | |
2780 | "device_write_request returned ", | |
2781 | kr, addr, size, offset)); | |
2782 | BS_STAT(ps->ps_bs, | |
55e303ae | 2783 | ps->ps_bs->bs_pages_out_fail += atop_32(size)); |
1c79356b A |
2784 | /* do the completion notification to free resources */ |
2785 | device_write_reply(reply_port, kr, 0); | |
2786 | return PAGER_ERROR; | |
2787 | } | |
2788 | } else do { | |
2789 | /* | |
2790 | * Synchronous write. | |
2791 | */ | |
2792 | { | |
2793 | device_t device; | |
2794 | device = dev_port_lookup(ps->ps_device); | |
2795 | kr=ds_device_write_common(device, | |
2796 | IP_NULL, 0, | |
2797 | (dev_mode_t) 0, | |
2798 | dev_offset, | |
2799 | (io_buf_ptr_t) addr, | |
2800 | size, | |
2801 | (IO_WRITE | IO_SYNC | IO_KERNEL_BUF), | |
2802 | &bytes_written); | |
2803 | } | |
2804 | if (kr != KERN_SUCCESS) { | |
2805 | dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n", | |
2806 | "device_write returned ", | |
2807 | kr, addr, size, offset)); | |
2808 | BS_STAT(ps->ps_bs, | |
55e303ae | 2809 | ps->ps_bs->bs_pages_out_fail += atop_32(size)); |
1c79356b A |
2810 | return PAGER_ERROR; |
2811 | } | |
2812 | if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1)) | |
2813 | Panic("fragmented write"); | |
2814 | records_written = (bytes_written >> | |
2815 | (vm_page_shift - ps->ps_record_shift)); | |
2816 | dev_offset += records_written; | |
2817 | #if 1 | |
2818 | if (bytes_written != bytes_to_write) { | |
2819 | dprintf(("wrote only %d bytes out of %d\n", | |
2820 | bytes_written, bytes_to_write)); | |
2821 | } | |
2822 | #endif | |
2823 | bytes_to_write -= bytes_written; | |
2824 | addr += bytes_written; | |
2825 | } while (bytes_to_write > 0); | |
2826 | ||
2827 | return PAGER_SUCCESS; | |
2828 | } | |
2829 | ||
2830 | ||
2831 | #else /* !DEVICE_PAGING */ | |
2832 | ||
2833 | kern_return_t | |
2834 | ps_read_device( | |
91447636 | 2835 | __unused paging_segment_t ps, |
b0d623f7 | 2836 | __unused dp_offset_t offset, |
91447636 A |
2837 | __unused vm_offset_t *bufferp, |
2838 | __unused unsigned int size, | |
2839 | __unused unsigned int *residualp, | |
2840 | __unused int flags) | |
1c79356b A |
2841 | { |
2842 | panic("ps_read_device not supported"); | |
0c530ab8 | 2843 | return KERN_FAILURE; |
1c79356b A |
2844 | } |
2845 | ||
91447636 | 2846 | kern_return_t |
1c79356b | 2847 | ps_write_device( |
91447636 | 2848 | __unused paging_segment_t ps, |
b0d623f7 | 2849 | __unused dp_offset_t offset, |
91447636 A |
2850 | __unused vm_offset_t addr, |
2851 | __unused unsigned int size, | |
2852 | __unused struct vs_async *vsa) | |
1c79356b A |
2853 | { |
2854 | panic("ps_write_device not supported"); | |
0c530ab8 | 2855 | return KERN_FAILURE; |
1c79356b A |
2856 | } |
2857 | ||
2858 | #endif /* DEVICE_PAGING */ | |
91447636 | 2859 | void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */ |
1c79356b A |
2860 | |
2861 | void | |
2862 | pvs_object_data_provided( | |
91447636 A |
2863 | __unused vstruct_t vs, |
2864 | __unused upl_t upl, | |
2865 | __unused upl_offset_t offset, | |
2866 | upl_size_t size) | |
1c79356b | 2867 | { |
39236c6e A |
2868 | #if RECLAIM_SWAP |
2869 | boolean_t empty; | |
2870 | #endif | |
1c79356b | 2871 | |
91447636 A |
2872 | DP_DEBUG(DEBUG_VS_INTERNAL, |
2873 | ("buffer=0x%x,offset=0x%x,size=0x%x\n", | |
2874 | upl, offset, size)); | |
1c79356b A |
2875 | |
2876 | ASSERT(size > 0); | |
55e303ae | 2877 | GSTAT(global_stats.gs_pages_in += atop_32(size)); |
1c79356b | 2878 | |
6d2010ae A |
2879 | /* check upl iosync flag instead of using RECLAIM_SWAP*/ |
2880 | #if RECLAIM_SWAP | |
2881 | if (size != upl->size) { | |
39236c6e A |
2882 | if (size) { |
2883 | ps_clunmap(vs, offset, size); | |
2884 | upl_commit_range(upl, 0, size, 0, NULL, 0, &empty); | |
2885 | } | |
6d2010ae A |
2886 | upl_abort(upl, UPL_ABORT_ERROR); |
2887 | upl_deallocate(upl); | |
2888 | } else { | |
2889 | ps_clunmap(vs, offset, size); | |
2890 | upl_commit(upl, NULL, 0); | |
2891 | upl_deallocate(upl); | |
2892 | } | |
2893 | #endif /* RECLAIM_SWAP */ | |
1c79356b A |
2894 | |
2895 | } | |
2896 | ||
2d21ac55 A |
2897 | static memory_object_offset_t last_start; |
2898 | static vm_size_t last_length; | |
2899 | ||
6d2010ae A |
2900 | /* |
2901 | * A "cnt" of 0 means that the caller just wants to check if the page at | |
2902 | * offset "vs_offset" exists in the backing store. That page hasn't been | |
2903 | * prepared, so no need to release it. | |
2904 | * | |
2905 | * A "cnt" of -1 means that the caller wants to bring back from the backing | |
2906 | * store all existing pages in the cluster containing "vs_offset". | |
2907 | */ | |
1c79356b A |
2908 | kern_return_t |
2909 | pvs_cluster_read( | |
2910 | vstruct_t vs, | |
b0d623f7 A |
2911 | dp_offset_t vs_offset, |
2912 | dp_size_t cnt, | |
2d21ac55 | 2913 | void *fault_info) |
1c79356b | 2914 | { |
1c79356b | 2915 | kern_return_t error = KERN_SUCCESS; |
2d21ac55 | 2916 | unsigned int size; |
0c530ab8 | 2917 | unsigned int residual; |
1c79356b | 2918 | unsigned int request_flags; |
b0d623f7 | 2919 | int io_flags = 0; |
2d21ac55 A |
2920 | int seg_index; |
2921 | int pages_in_cl; | |
0b4e3aa0 A |
2922 | int cl_size; |
2923 | int cl_mask; | |
2d21ac55 A |
2924 | int cl_index; |
2925 | unsigned int xfer_size; | |
b0d623f7 | 2926 | dp_offset_t orig_vs_offset; |
0b4c1975 A |
2927 | dp_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; |
2928 | paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT]; | |
0b4e3aa0 | 2929 | struct clmap clmap; |
2d21ac55 A |
2930 | upl_t upl; |
2931 | unsigned int page_list_count; | |
b0d623f7 A |
2932 | memory_object_offset_t cluster_start; |
2933 | vm_size_t cluster_length; | |
2934 | uint32_t io_streaming; | |
6d2010ae A |
2935 | int i; |
2936 | boolean_t io_sync = FALSE; | |
39236c6e | 2937 | boolean_t reclaim_all = FALSE; |
0b4e3aa0 A |
2938 | |
2939 | pages_in_cl = 1 << vs->vs_clshift; | |
2940 | cl_size = pages_in_cl * vm_page_size; | |
2941 | cl_mask = cl_size - 1; | |
1c79356b | 2942 | |
6d2010ae | 2943 | request_flags = UPL_NO_SYNC | UPL_RET_ONLY_ABSENT | UPL_SET_LITE; |
39236c6e A |
2944 | |
2945 | if (cnt == (dp_size_t) -1) | |
2946 | reclaim_all = TRUE; | |
6d2010ae | 2947 | |
39236c6e | 2948 | if (reclaim_all == TRUE) { |
6d2010ae A |
2949 | /* |
2950 | * We've been called from ps_vstruct_reclaim() to move all | |
2951 | * the object's swapped pages back to VM pages. | |
2952 | * This can put memory pressure on the system, so we do want | |
2953 | * to wait for free pages, to avoid getting in the way of the | |
2954 | * vm_pageout_scan() thread. | |
2955 | * Let's not use UPL_NOBLOCK in this case. | |
2956 | */ | |
2957 | vs_offset &= ~cl_mask; | |
2958 | i = pages_in_cl; | |
2959 | } else { | |
2960 | i = 1; | |
316670eb A |
2961 | |
2962 | /* | |
2963 | * if the I/O cluster size == PAGE_SIZE, we don't want to set | |
2964 | * the UPL_NOBLOCK since we may be trying to recover from a | |
2965 | * previous partial pagein I/O that occurred because we were low | |
2966 | * on memory and bailed early in order to honor the UPL_NOBLOCK... | |
2967 | * since we're only asking for a single page, we can block w/o fear | |
2968 | * of tying up pages while waiting for more to become available | |
2969 | */ | |
2970 | if (fault_info == NULL || ((vm_object_fault_info_t)fault_info)->cluster_size > PAGE_SIZE) | |
2971 | request_flags |= UPL_NOBLOCK; | |
6d2010ae A |
2972 | } |
2973 | ||
2974 | again: | |
2d21ac55 A |
2975 | cl_index = (vs_offset & cl_mask) / vm_page_size; |
2976 | ||
b0d623f7 | 2977 | if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (dp_offset_t)-1) || |
2d21ac55 A |
2978 | !CLMAP_ISSET(clmap, cl_index)) { |
2979 | /* | |
2980 | * the needed page doesn't exist in the backing store... | |
2981 | * we don't want to try to do any I/O, just abort the | |
2982 | * page and let the fault handler provide a zero-fill | |
2983 | */ | |
2984 | if (cnt == 0) { | |
2985 | /* | |
2986 | * The caller was just poking at us to see if | |
2987 | * the page has been paged out. No need to | |
2988 | * mess with the page at all. | |
2989 | * Just let the caller know we don't have that page. | |
2990 | */ | |
2991 | return KERN_FAILURE; | |
2992 | } | |
39236c6e | 2993 | if (reclaim_all == TRUE) { |
6d2010ae A |
2994 | i--; |
2995 | if (i == 0) { | |
2996 | /* no more pages in this cluster */ | |
2997 | return KERN_FAILURE; | |
2998 | } | |
2999 | /* try the next page in this cluster */ | |
3000 | vs_offset += vm_page_size; | |
3001 | goto again; | |
3002 | } | |
2d21ac55 A |
3003 | |
3004 | page_list_count = 0; | |
3005 | ||
3006 | memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset, | |
3007 | PAGE_SIZE, PAGE_SIZE, | |
3008 | &upl, NULL, &page_list_count, | |
316670eb A |
3009 | request_flags | UPL_SET_INTERNAL); |
3010 | upl_range_needed(upl, 0, 1); | |
2d21ac55 A |
3011 | |
3012 | if (clmap.cl_error) | |
3013 | upl_abort(upl, UPL_ABORT_ERROR); | |
3014 | else | |
3015 | upl_abort(upl, UPL_ABORT_UNAVAILABLE); | |
3016 | upl_deallocate(upl); | |
91447636 | 3017 | |
2d21ac55 A |
3018 | return KERN_SUCCESS; |
3019 | } | |
3020 | ||
3021 | if (cnt == 0) { | |
3022 | /* | |
3023 | * The caller was just poking at us to see if | |
3024 | * the page has been paged out. No need to | |
3025 | * mess with the page at all. | |
3026 | * Just let the caller know we do have that page. | |
3027 | */ | |
3028 | return KERN_SUCCESS; | |
3029 | } | |
39236c6e | 3030 | |
6d2010ae A |
3031 | if(((vm_object_fault_info_t)fault_info)->io_sync == TRUE ) { |
3032 | io_sync = TRUE; | |
3033 | } else { | |
3034 | #if RECLAIM_SWAP | |
3035 | io_sync = TRUE; | |
3036 | #endif /* RECLAIM_SWAP */ | |
3037 | } | |
3038 | ||
3039 | if( io_sync == TRUE ) { | |
3040 | ||
3041 | io_flags |= UPL_IOSYNC | UPL_NOCOMMIT; | |
3042 | #if USE_PRECIOUS | |
3043 | request_flags |= UPL_PRECIOUS | UPL_CLEAN_IN_PLACE; | |
3044 | #else /* USE_PRECIOUS */ | |
3045 | request_flags |= UPL_REQUEST_SET_DIRTY; | |
3046 | #endif /* USE_PRECIOUS */ | |
3047 | } | |
3048 | ||
91447636 A |
3049 | assert(dp_encryption_inited); |
3050 | if (dp_encryption) { | |
3051 | /* | |
3052 | * ENCRYPTED SWAP: | |
3053 | * request that the UPL be prepared for | |
3054 | * decryption. | |
3055 | */ | |
3056 | request_flags |= UPL_ENCRYPT; | |
6d2010ae | 3057 | io_flags |= UPL_PAGING_ENCRYPTED; |
91447636 | 3058 | } |
2d21ac55 | 3059 | orig_vs_offset = vs_offset; |
91447636 | 3060 | |
2d21ac55 A |
3061 | assert(cnt != 0); |
3062 | cnt = VM_SUPER_CLUSTER; | |
b0d623f7 A |
3063 | cluster_start = (memory_object_offset_t) vs_offset; |
3064 | cluster_length = (vm_size_t) cnt; | |
3065 | io_streaming = 0; | |
1c79356b | 3066 | |
2d21ac55 A |
3067 | /* |
3068 | * determine how big a speculative I/O we should try for... | |
3069 | */ | |
b0d623f7 A |
3070 | if (memory_object_cluster_size(vs->vs_control, &cluster_start, &cluster_length, &io_streaming, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) { |
3071 | assert(vs_offset >= (dp_offset_t) cluster_start && | |
3072 | vs_offset < (dp_offset_t) (cluster_start + cluster_length)); | |
3073 | vs_offset = (dp_offset_t) cluster_start; | |
3074 | cnt = (dp_size_t) cluster_length; | |
3075 | } else { | |
3076 | cluster_length = PAGE_SIZE; | |
2d21ac55 | 3077 | cnt = PAGE_SIZE; |
b0d623f7 A |
3078 | } |
3079 | ||
3080 | if (io_streaming) | |
3081 | io_flags |= UPL_IOSTREAMING; | |
2d21ac55 | 3082 | |
b0d623f7 A |
3083 | last_start = cluster_start; |
3084 | last_length = cluster_length; | |
2d21ac55 A |
3085 | |
3086 | /* | |
3087 | * This loop will be executed multiple times until the entire | |
3088 | * range has been looked at or we issue an I/O... if the request spans cluster | |
3089 | * boundaries, the clusters will be checked for logical continunity, | |
3090 | * if contiguous the I/O request will span multiple clusters... | |
3091 | * at most only 1 I/O will be issued... it will encompass the original offset | |
3092 | */ | |
3093 | while (cnt && error == KERN_SUCCESS) { | |
3094 | int ps_info_valid; | |
3095 | ||
3096 | if ((vs_offset & cl_mask) && (cnt > (VM_SUPER_CLUSTER - (vs_offset & cl_mask)))) { | |
d12e1678 A |
3097 | size = VM_SUPER_CLUSTER; |
3098 | size -= vs_offset & cl_mask; | |
2d21ac55 | 3099 | } else if (cnt > VM_SUPER_CLUSTER) |
0b4e3aa0 | 3100 | size = VM_SUPER_CLUSTER; |
2d21ac55 | 3101 | else |
0b4e3aa0 | 3102 | size = cnt; |
2d21ac55 | 3103 | |
0b4e3aa0 | 3104 | cnt -= size; |
1c79356b | 3105 | |
0b4e3aa0 A |
3106 | ps_info_valid = 0; |
3107 | seg_index = 0; | |
1c79356b | 3108 | |
0b4e3aa0 | 3109 | while (size > 0 && error == KERN_SUCCESS) { |
2d21ac55 | 3110 | unsigned int abort_size; |
39236c6e | 3111 | unsigned int lsize; |
0b4e3aa0 A |
3112 | int failed_size; |
3113 | int beg_pseg; | |
3114 | int beg_indx; | |
b0d623f7 | 3115 | dp_offset_t cur_offset; |
1c79356b | 3116 | |
0b4e3aa0 A |
3117 | if ( !ps_info_valid) { |
3118 | ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0); | |
3119 | psp[seg_index] = CLMAP_PS(clmap); | |
3120 | ps_info_valid = 1; | |
1c79356b | 3121 | } |
0b4e3aa0 A |
3122 | /* |
3123 | * skip over unallocated physical segments | |
3124 | */ | |
b0d623f7 | 3125 | if (ps_offset[seg_index] == (dp_offset_t) -1) { |
0b4e3aa0 A |
3126 | abort_size = cl_size - (vs_offset & cl_mask); |
3127 | abort_size = MIN(abort_size, size); | |
3128 | ||
2d21ac55 A |
3129 | size -= abort_size; |
3130 | vs_offset += abort_size; | |
1c79356b | 3131 | |
0b4e3aa0 A |
3132 | seg_index++; |
3133 | ps_info_valid = 0; | |
2d21ac55 | 3134 | |
0b4e3aa0 | 3135 | continue; |
1c79356b | 3136 | } |
0b4e3aa0 A |
3137 | cl_index = (vs_offset & cl_mask) / vm_page_size; |
3138 | ||
3139 | for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) { | |
3140 | /* | |
3141 | * skip over unallocated pages | |
3142 | */ | |
3143 | if (CLMAP_ISSET(clmap, cl_index)) | |
3144 | break; | |
3145 | abort_size += vm_page_size; | |
3146 | } | |
3147 | if (abort_size) { | |
2d21ac55 A |
3148 | size -= abort_size; |
3149 | vs_offset += abort_size; | |
0b4e3aa0 A |
3150 | |
3151 | if (cl_index == pages_in_cl) { | |
3152 | /* | |
3153 | * if we're at the end of this physical cluster | |
3154 | * then bump to the next one and continue looking | |
3155 | */ | |
3156 | seg_index++; | |
3157 | ps_info_valid = 0; | |
2d21ac55 | 3158 | |
0b4e3aa0 A |
3159 | continue; |
3160 | } | |
3161 | if (size == 0) | |
3162 | break; | |
3163 | } | |
1c79356b | 3164 | /* |
0b4e3aa0 A |
3165 | * remember the starting point of the first allocated page |
3166 | * for the I/O we're about to issue | |
1c79356b | 3167 | */ |
0b4e3aa0 A |
3168 | beg_pseg = seg_index; |
3169 | beg_indx = cl_index; | |
3170 | cur_offset = vs_offset; | |
3171 | ||
3172 | /* | |
3173 | * calculate the size of the I/O that we can do... | |
3174 | * this may span multiple physical segments if | |
3175 | * they are contiguous | |
3176 | */ | |
3177 | for (xfer_size = 0; xfer_size < size; ) { | |
3178 | ||
2d21ac55 | 3179 | while (cl_index < pages_in_cl && xfer_size < size) { |
0b4e3aa0 | 3180 | /* |
55e303ae | 3181 | * accumulate allocated pages within |
d12e1678 | 3182 | * a physical segment |
1c79356b | 3183 | */ |
0b4e3aa0 A |
3184 | if (CLMAP_ISSET(clmap, cl_index)) { |
3185 | xfer_size += vm_page_size; | |
3186 | cur_offset += vm_page_size; | |
3187 | cl_index++; | |
3188 | ||
3189 | BS_STAT(psp[seg_index]->ps_bs, | |
3190 | psp[seg_index]->ps_bs->bs_pages_in++); | |
3191 | } else | |
3192 | break; | |
3193 | } | |
2d21ac55 | 3194 | if (cl_index < pages_in_cl || xfer_size >= size) { |
0b4e3aa0 | 3195 | /* |
55e303ae | 3196 | * we've hit an unallocated page or |
2d21ac55 A |
3197 | * the end of this request... see if |
3198 | * it's time to fire the I/O | |
1c79356b | 3199 | */ |
0b4e3aa0 A |
3200 | break; |
3201 | } | |
3202 | /* | |
d12e1678 | 3203 | * we've hit the end of the current physical |
55e303ae | 3204 | * segment and there's more to do, so try |
d12e1678 | 3205 | * moving to the next one |
0b4e3aa0 A |
3206 | */ |
3207 | seg_index++; | |
3208 | ||
2d21ac55 | 3209 | ps_offset[seg_index] = ps_clmap(vs, cur_offset & ~cl_mask, &clmap, CL_FIND, 0, 0); |
d12e1678 | 3210 | psp[seg_index] = CLMAP_PS(clmap); |
0b4e3aa0 A |
3211 | ps_info_valid = 1; |
3212 | ||
3213 | if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) { | |
3214 | /* | |
55e303ae A |
3215 | * if the physical segment we're about |
3216 | * to step into is not contiguous to | |
3217 | * the one we're currently in, or it's | |
d12e1678 | 3218 | * in a different paging file, or |
0b4e3aa0 | 3219 | * it hasn't been allocated.... |
2d21ac55 A |
3220 | * we stop this run and go check |
3221 | * to see if it's time to fire the I/O | |
0b4e3aa0 A |
3222 | */ |
3223 | break; | |
1c79356b | 3224 | } |
0b4e3aa0 | 3225 | /* |
d12e1678 | 3226 | * start with first page of the next physical |
2d21ac55 | 3227 | * segment |
0b4e3aa0 A |
3228 | */ |
3229 | cl_index = 0; | |
1c79356b | 3230 | } |
2d21ac55 | 3231 | if (xfer_size == 0) { |
0b4e3aa0 | 3232 | /* |
2d21ac55 | 3233 | * no I/O to generate for this segment |
0b4e3aa0 | 3234 | */ |
0b4e3aa0 | 3235 | continue; |
2d21ac55 A |
3236 | } |
3237 | if (cur_offset <= orig_vs_offset) { | |
3238 | /* | |
3239 | * we've hit a hole in our speculative cluster | |
3240 | * before the offset that we're really after... | |
3241 | * don't issue the I/O since it doesn't encompass | |
3242 | * the original offset and we're looking to only | |
3243 | * pull in the speculative pages if they can be | |
3244 | * made part of a single I/O | |
3245 | */ | |
3246 | size -= xfer_size; | |
3247 | vs_offset += xfer_size; | |
1c79356b | 3248 | |
2d21ac55 A |
3249 | continue; |
3250 | } | |
3251 | /* | |
3252 | * we have a contiguous range of allocated pages | |
3253 | * to read from that encompasses the original offset | |
3254 | */ | |
3255 | page_list_count = 0; | |
3256 | memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset, | |
3257 | xfer_size, xfer_size, | |
3258 | &upl, NULL, &page_list_count, | |
6d2010ae | 3259 | request_flags | UPL_SET_INTERNAL); |
2d21ac55 A |
3260 | |
3261 | error = ps_read_file(psp[beg_pseg], | |
3262 | upl, (upl_offset_t) 0, | |
3263 | ps_offset[beg_pseg] + (beg_indx * vm_page_size), | |
b0d623f7 | 3264 | xfer_size, &residual, io_flags); |
39236c6e | 3265 | |
0b4e3aa0 A |
3266 | |
3267 | /* | |
55e303ae | 3268 | * Adjust counts and send response to VM. Optimize |
d12e1678 | 3269 | * for the common case, i.e. no error and/or partial |
55e303ae | 3270 | * data. If there was an error, then we need to error |
d12e1678 | 3271 | * the entire range, even if some data was successfully |
55e303ae | 3272 | * read. If there was a partial read we may supply some |
0b4e3aa0 | 3273 | * data and may error some as well. In all cases the |
55e303ae A |
3274 | * VM must receive some notification for every page |
3275 | * in the range. | |
0b4e3aa0 A |
3276 | */ |
3277 | if ((error == KERN_SUCCESS) && (residual == 0)) { | |
3278 | /* | |
d12e1678 | 3279 | * Got everything we asked for, supply the data |
55e303ae A |
3280 | * to the VM. Note that as a side effect of |
3281 | * supplying the data, the buffer holding the | |
3282 | * supplied data is deallocated from the pager's | |
3283 | * address space. | |
0b4e3aa0 | 3284 | */ |
39236c6e A |
3285 | lsize = xfer_size; |
3286 | failed_size = 0; | |
0b4e3aa0 | 3287 | } else { |
39236c6e | 3288 | lsize = 0; |
0b4e3aa0 A |
3289 | failed_size = xfer_size; |
3290 | ||
3291 | if (error == KERN_SUCCESS) { | |
2d21ac55 A |
3292 | if (residual == xfer_size) { |
3293 | /* | |
3294 | * If a read operation returns no error | |
3295 | * and no data moved, we turn it into | |
3296 | * an error, assuming we're reading at | |
3297 | * or beyong EOF. | |
3298 | * Fall through and error the entire range. | |
3299 | */ | |
0b4e3aa0 A |
3300 | error = KERN_FAILURE; |
3301 | } else { | |
2d21ac55 A |
3302 | /* |
3303 | * Otherwise, we have partial read. If | |
3304 | * the part read is a integral number | |
3305 | * of pages supply it. Otherwise round | |
3306 | * it up to a page boundary, zero fill | |
3307 | * the unread part, and supply it. | |
3308 | * Fall through and error the remainder | |
3309 | * of the range, if any. | |
3310 | */ | |
3311 | int fill; | |
2d21ac55 | 3312 | |
39236c6e | 3313 | fill = residual & (vm_page_size - 1); |
2d21ac55 | 3314 | lsize = (xfer_size - residual) + fill; |
0b4e3aa0 | 3315 | |
39236c6e | 3316 | if (lsize < xfer_size) |
2d21ac55 | 3317 | failed_size = xfer_size - lsize; |
39236c6e A |
3318 | |
3319 | if (reclaim_all == FALSE) | |
0b4e3aa0 | 3320 | error = KERN_FAILURE; |
0b4e3aa0 A |
3321 | } |
3322 | } | |
3323 | } | |
39236c6e A |
3324 | pvs_object_data_provided(vs, upl, vs_offset, lsize); |
3325 | ||
3326 | if (failed_size) { | |
2d21ac55 A |
3327 | /* |
3328 | * There was an error in some part of the range, tell | |
3329 | * the VM. Note that error is explicitly checked again | |
3330 | * since it can be modified above. | |
3331 | */ | |
0b4e3aa0 | 3332 | BS_STAT(psp[beg_pseg]->ps_bs, |
2d21ac55 | 3333 | psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop_32(failed_size)); |
1c79356b | 3334 | } |
2d21ac55 A |
3335 | /* |
3336 | * we've issued a single I/O that encompassed the original offset | |
3337 | * at this point we either met our speculative request length or | |
3338 | * we ran into a 'hole' (i.e. page not present in the cluster, cluster | |
3339 | * not present or not physically contiguous to the previous one), so | |
3340 | * we're done issuing I/O at this point | |
3341 | */ | |
3342 | return (error); | |
1c79356b | 3343 | } |
2d21ac55 | 3344 | } |
1c79356b A |
3345 | return error; |
3346 | } | |
3347 | ||
3348 | int vs_do_async_write = 1; | |
3349 | ||
3350 | kern_return_t | |
3351 | vs_cluster_write( | |
3352 | vstruct_t vs, | |
3353 | upl_t internal_upl, | |
91447636 A |
3354 | upl_offset_t offset, |
3355 | upl_size_t cnt, | |
1c79356b A |
3356 | boolean_t dp_internal, |
3357 | int flags) | |
3358 | { | |
91447636 | 3359 | upl_size_t transfer_size; |
1c79356b A |
3360 | int error = 0; |
3361 | struct clmap clmap; | |
0b4e3aa0 | 3362 | |
b0d623f7 | 3363 | dp_offset_t actual_offset; /* Offset within paging segment */ |
1c79356b | 3364 | paging_segment_t ps; |
b0d623f7 A |
3365 | dp_offset_t mobj_base_addr; |
3366 | dp_offset_t mobj_target_addr; | |
1c79356b A |
3367 | |
3368 | upl_t upl; | |
0b4e3aa0 | 3369 | upl_page_info_t *pl; |
1c79356b | 3370 | int page_index; |
0b4c1975 | 3371 | unsigned int page_max_index; |
1c79356b | 3372 | int list_size; |
55e303ae | 3373 | int pages_in_cl; |
91447636 | 3374 | unsigned int cl_size; |
55e303ae | 3375 | int base_index; |
91447636 | 3376 | unsigned int seg_size; |
b0d623f7 | 3377 | unsigned int upl_offset_in_object; |
0b4c1975 A |
3378 | boolean_t minimal_clustering = FALSE; |
3379 | boolean_t found_dirty; | |
55e303ae | 3380 | |
6d2010ae A |
3381 | if (!dp_encryption_inited) { |
3382 | /* | |
3383 | * ENCRYPTED SWAP: | |
3384 | * Once we've started using swap, we | |
3385 | * can't change our mind on whether | |
3386 | * it needs to be encrypted or | |
3387 | * not. | |
3388 | */ | |
3389 | dp_encryption_inited = TRUE; | |
3390 | } | |
3391 | if (dp_encryption) { | |
3392 | /* | |
3393 | * ENCRYPTED SWAP: | |
3394 | * the UPL will need to be encrypted... | |
3395 | */ | |
3396 | flags |= UPL_PAGING_ENCRYPTED; | |
3397 | } | |
3398 | ||
55e303ae A |
3399 | pages_in_cl = 1 << vs->vs_clshift; |
3400 | cl_size = pages_in_cl * vm_page_size; | |
1c79356b | 3401 | |
0b4c1975 A |
3402 | #if CONFIG_FREEZE |
3403 | minimal_clustering = TRUE; | |
6d2010ae | 3404 | #else |
0b4c1975 A |
3405 | if (dp_isssd == TRUE) |
3406 | minimal_clustering = TRUE; | |
6d2010ae | 3407 | #endif |
1c79356b | 3408 | if (!dp_internal) { |
0c530ab8 | 3409 | unsigned int page_list_count; |
1c79356b | 3410 | int request_flags; |
91447636 | 3411 | unsigned int super_size; |
0b4e3aa0 A |
3412 | int first_dirty; |
3413 | int num_dirty; | |
3414 | int num_of_pages; | |
3415 | int seg_index; | |
91447636 | 3416 | upl_offset_t upl_offset; |
0b4c1975 | 3417 | upl_offset_t upl_offset_aligned; |
b0d623f7 | 3418 | dp_offset_t seg_offset; |
0b4c1975 A |
3419 | dp_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1]; |
3420 | paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_MIN_CLSHIFT) + 1]; | |
0b4e3aa0 | 3421 | |
1c79356b | 3422 | |
0b4c1975 | 3423 | if (bs_low) |
1c79356b | 3424 | super_size = cl_size; |
0b4c1975 | 3425 | else |
1c79356b | 3426 | super_size = VM_SUPER_CLUSTER; |
0b4e3aa0 | 3427 | |
0b4c1975 A |
3428 | request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE | |
3429 | UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM | | |
2d21ac55 | 3430 | UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE; |
1c79356b | 3431 | |
91447636 A |
3432 | if (dp_encryption) { |
3433 | /* | |
3434 | * ENCRYPTED SWAP: | |
3435 | * request that the UPL be prepared for | |
3436 | * encryption. | |
3437 | */ | |
3438 | request_flags |= UPL_ENCRYPT; | |
3439 | flags |= UPL_PAGING_ENCRYPTED; | |
3440 | } | |
6d2010ae | 3441 | |
0b4e3aa0 A |
3442 | page_list_count = 0; |
3443 | memory_object_super_upl_request(vs->vs_control, | |
3444 | (memory_object_offset_t)offset, | |
3445 | cnt, super_size, | |
3446 | &upl, NULL, &page_list_count, | |
55e303ae | 3447 | request_flags | UPL_FOR_PAGEOUT); |
1c79356b | 3448 | |
b0d623f7 A |
3449 | /* |
3450 | * The default pager does not handle objects larger than | |
3451 | * 4GB, so it does not deal with offset that don't fit in | |
3452 | * 32-bit. Cast down upl->offset now and make sure we | |
3453 | * did not lose any valuable bits. | |
3454 | */ | |
3455 | upl_offset_in_object = (unsigned int) upl->offset; | |
3456 | assert(upl->offset == upl_offset_in_object); | |
3457 | ||
0b4e3aa0 | 3458 | pl = UPL_GET_INTERNAL_PAGE_LIST(upl); |
1c79356b | 3459 | |
b0d623f7 | 3460 | seg_size = cl_size - (upl_offset_in_object % cl_size); |
0b4c1975 A |
3461 | upl_offset_aligned = upl_offset_in_object & ~(cl_size - 1); |
3462 | page_index = 0; | |
3463 | page_max_index = upl->size / PAGE_SIZE; | |
3464 | found_dirty = TRUE; | |
55e303ae | 3465 | |
0b4c1975 | 3466 | for (seg_index = 0, transfer_size = upl->size; transfer_size > 0; ) { |
6d2010ae | 3467 | |
0b4c1975 | 3468 | unsigned int seg_pgcnt; |
1c79356b | 3469 | |
0b4c1975 | 3470 | seg_pgcnt = seg_size / PAGE_SIZE; |
1c79356b | 3471 | |
0b4c1975 A |
3472 | if (minimal_clustering == TRUE) { |
3473 | unsigned int non_dirty; | |
1c79356b | 3474 | |
0b4c1975 A |
3475 | non_dirty = 0; |
3476 | found_dirty = FALSE; | |
3477 | ||
3478 | for (; non_dirty < seg_pgcnt; non_dirty++) { | |
3479 | if ((page_index + non_dirty) >= page_max_index) | |
3480 | break; | |
3481 | ||
3482 | if (UPL_DIRTY_PAGE(pl, page_index + non_dirty) || | |
3483 | UPL_PRECIOUS_PAGE(pl, page_index + non_dirty)) { | |
3484 | found_dirty = TRUE; | |
3485 | break; | |
3486 | } | |
3487 | } | |
3488 | } | |
3489 | if (found_dirty == TRUE) { | |
3490 | ps_offset[seg_index] = | |
3491 | ps_clmap(vs, | |
3492 | upl_offset_aligned, | |
3493 | &clmap, CL_ALLOC, | |
3494 | cl_size, 0); | |
3495 | ||
3496 | if (ps_offset[seg_index] == (dp_offset_t) -1) { | |
3497 | upl_abort(upl, 0); | |
3498 | upl_deallocate(upl); | |
3499 | ||
3500 | return KERN_FAILURE; | |
3501 | } | |
3502 | psp[seg_index] = CLMAP_PS(clmap); | |
3503 | } | |
55e303ae | 3504 | if (transfer_size > seg_size) { |
0b4c1975 | 3505 | page_index += seg_pgcnt; |
55e303ae | 3506 | transfer_size -= seg_size; |
0b4c1975 | 3507 | upl_offset_aligned += cl_size; |
6d2010ae | 3508 | seg_size = cl_size; |
0b4e3aa0 A |
3509 | seg_index++; |
3510 | } else | |
3511 | transfer_size = 0; | |
3512 | } | |
55e303ae A |
3513 | /* |
3514 | * Ignore any non-present pages at the end of the | |
3515 | * UPL. | |
3516 | */ | |
316670eb A |
3517 | for (page_index = upl->size / vm_page_size; page_index > 0;) { |
3518 | if (UPL_PAGE_PRESENT(pl, --page_index)) { | |
3519 | page_index++; | |
55e303ae | 3520 | break; |
316670eb A |
3521 | } |
3522 | } | |
3523 | if (page_index == 0) { | |
3524 | /* | |
3525 | * no pages in the UPL | |
3526 | * abort and return | |
3527 | */ | |
3528 | upl_abort(upl, 0); | |
3529 | upl_deallocate(upl); | |
3530 | ||
3531 | return KERN_SUCCESS; | |
3532 | } | |
3533 | num_of_pages = page_index; | |
55e303ae | 3534 | |
b0d623f7 | 3535 | base_index = (upl_offset_in_object % cl_size) / PAGE_SIZE; |
55e303ae A |
3536 | |
3537 | for (page_index = 0; page_index < num_of_pages; ) { | |
0b4e3aa0 A |
3538 | /* |
3539 | * skip over non-dirty pages | |
3540 | */ | |
3541 | for ( ; page_index < num_of_pages; page_index++) { | |
55e303ae | 3542 | if (UPL_DIRTY_PAGE(pl, page_index) |
d12e1678 | 3543 | || UPL_PRECIOUS_PAGE(pl, page_index)) |
0b4e3aa0 A |
3544 | /* |
3545 | * this is a page we need to write | |
55e303ae | 3546 | * go see if we can buddy it up with |
d12e1678 | 3547 | * others that are contiguous to it |
0b4e3aa0 A |
3548 | */ |
3549 | break; | |
3550 | /* | |
d12e1678 | 3551 | * if the page is not-dirty, but present we |
55e303ae | 3552 | * need to commit it... This is an unusual |
d12e1678 | 3553 | * case since we only asked for dirty pages |
0b4e3aa0 A |
3554 | */ |
3555 | if (UPL_PAGE_PRESENT(pl, page_index)) { | |
3556 | boolean_t empty = FALSE; | |
3557 | upl_commit_range(upl, | |
3558 | page_index * vm_page_size, | |
3559 | vm_page_size, | |
3560 | UPL_COMMIT_NOTIFY_EMPTY, | |
3561 | pl, | |
d52fe63f | 3562 | page_list_count, |
0b4e3aa0 | 3563 | &empty); |
55e303ae A |
3564 | if (empty) { |
3565 | assert(page_index == | |
3566 | num_of_pages - 1); | |
0b4e3aa0 | 3567 | upl_deallocate(upl); |
55e303ae | 3568 | } |
1c79356b | 3569 | } |
1c79356b | 3570 | } |
0b4e3aa0 A |
3571 | if (page_index == num_of_pages) |
3572 | /* | |
3573 | * no more pages to look at, we're out of here | |
3574 | */ | |
3575 | break; | |
1c79356b | 3576 | |
0b4e3aa0 | 3577 | /* |
55e303ae A |
3578 | * gather up contiguous dirty pages... we have at |
3579 | * least 1 * otherwise we would have bailed above | |
0b4e3aa0 A |
3580 | * make sure that each physical segment that we step |
3581 | * into is contiguous to the one we're currently in | |
3582 | * if it's not, we have to stop and write what we have | |
3583 | */ | |
55e303ae | 3584 | for (first_dirty = page_index; |
d12e1678 | 3585 | page_index < num_of_pages; ) { |
55e303ae | 3586 | if ( !UPL_DIRTY_PAGE(pl, page_index) |
d12e1678 | 3587 | && !UPL_PRECIOUS_PAGE(pl, page_index)) |
0b4e3aa0 A |
3588 | break; |
3589 | page_index++; | |
3590 | /* | |
3591 | * if we just looked at the last page in the UPL | |
3592 | * we don't need to check for physical segment | |
3593 | * continuity | |
3594 | */ | |
3595 | if (page_index < num_of_pages) { | |
3596 | int cur_seg; | |
3597 | int nxt_seg; | |
3598 | ||
55e303ae A |
3599 | cur_seg = (base_index + (page_index - 1))/pages_in_cl; |
3600 | nxt_seg = (base_index + page_index)/pages_in_cl; | |
0b4e3aa0 A |
3601 | |
3602 | if (cur_seg != nxt_seg) { | |
3603 | if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg])) | |
55e303ae A |
3604 | /* |
3605 | * if the segment we're about | |
3606 | * to step into is not | |
3607 | * contiguous to the one we're | |
3608 | * currently in, or it's in a | |
d12e1678 | 3609 | * different paging file.... |
55e303ae | 3610 | * we stop here and generate |
d12e1678 A |
3611 | * the I/O |
3612 | */ | |
0b4e3aa0 | 3613 | break; |
1c79356b | 3614 | } |
1c79356b | 3615 | } |
0b4e3aa0 A |
3616 | } |
3617 | num_dirty = page_index - first_dirty; | |
1c79356b | 3618 | |
0b4e3aa0 A |
3619 | if (num_dirty) { |
3620 | upl_offset = first_dirty * vm_page_size; | |
0b4e3aa0 A |
3621 | transfer_size = num_dirty * vm_page_size; |
3622 | ||
d12e1678 | 3623 | while (transfer_size) { |
1c79356b | 3624 | |
d12e1678 | 3625 | if ((seg_size = cl_size - |
b0d623f7 A |
3626 | ((upl_offset_in_object + |
3627 | upl_offset) % cl_size)) | |
d12e1678 A |
3628 | > transfer_size) |
3629 | seg_size = transfer_size; | |
0b4e3aa0 | 3630 | |
b0d623f7 A |
3631 | ps_vs_write_complete( |
3632 | vs, | |
3633 | (upl_offset_in_object + | |
3634 | upl_offset), | |
d12e1678 | 3635 | seg_size, error); |
0b4e3aa0 | 3636 | |
d12e1678 A |
3637 | transfer_size -= seg_size; |
3638 | upl_offset += seg_size; | |
0b4e3aa0 | 3639 | } |
d12e1678 A |
3640 | upl_offset = first_dirty * vm_page_size; |
3641 | transfer_size = num_dirty * vm_page_size; | |
55e303ae A |
3642 | |
3643 | seg_index = (base_index + first_dirty) / pages_in_cl; | |
b0d623f7 | 3644 | seg_offset = (upl_offset_in_object + upl_offset) % cl_size; |
55e303ae | 3645 | |
d12e1678 A |
3646 | error = ps_write_file(psp[seg_index], |
3647 | upl, upl_offset, | |
3648 | ps_offset[seg_index] | |
3649 | + seg_offset, | |
3650 | transfer_size, flags); | |
1c79356b | 3651 | } |
1c79356b | 3652 | } |
0b4e3aa0 | 3653 | |
1c79356b | 3654 | } else { |
b0d623f7 | 3655 | assert(cnt <= (unsigned) (vm_page_size << vs->vs_clshift)); |
1c79356b A |
3656 | list_size = cnt; |
3657 | ||
3658 | page_index = 0; | |
3659 | /* The caller provides a mapped_data which is derived */ | |
3660 | /* from a temporary object. The targeted pages are */ | |
3661 | /* guaranteed to be set at offset 0 in the mapped_data */ | |
3662 | /* The actual offset however must still be derived */ | |
3663 | /* from the offset in the vs in question */ | |
3664 | mobj_base_addr = offset; | |
3665 | mobj_target_addr = mobj_base_addr; | |
3666 | ||
3667 | for (transfer_size = list_size; transfer_size != 0;) { | |
3668 | actual_offset = ps_clmap(vs, mobj_target_addr, | |
3669 | &clmap, CL_ALLOC, | |
3670 | transfer_size < cl_size ? | |
3671 | transfer_size : cl_size, 0); | |
b0d623f7 | 3672 | if(actual_offset == (dp_offset_t) -1) { |
1c79356b A |
3673 | error = 1; |
3674 | break; | |
3675 | } | |
3676 | cnt = MIN(transfer_size, | |
b0d623f7 | 3677 | (unsigned) CLMAP_NPGS(clmap) * vm_page_size); |
1c79356b A |
3678 | ps = CLMAP_PS(clmap); |
3679 | /* Assume that the caller has given us contiguous */ | |
3680 | /* pages */ | |
3681 | if(cnt) { | |
d12e1678 A |
3682 | ps_vs_write_complete(vs, mobj_target_addr, |
3683 | cnt, error); | |
1c79356b A |
3684 | error = ps_write_file(ps, internal_upl, |
3685 | 0, actual_offset, | |
3686 | cnt, flags); | |
3687 | if (error) | |
3688 | break; | |
55e303ae | 3689 | } |
1c79356b A |
3690 | if (error) |
3691 | break; | |
3692 | actual_offset += cnt; | |
3693 | mobj_target_addr += cnt; | |
3694 | transfer_size -= cnt; | |
3695 | cnt = 0; | |
3696 | ||
3697 | if (error) | |
3698 | break; | |
3699 | } | |
3700 | } | |
3701 | if(error) | |
3702 | return KERN_FAILURE; | |
3703 | else | |
3704 | return KERN_SUCCESS; | |
3705 | } | |
3706 | ||
3707 | vm_size_t | |
3708 | ps_vstruct_allocated_size( | |
3709 | vstruct_t vs) | |
3710 | { | |
3711 | int num_pages; | |
3712 | struct vs_map *vsmap; | |
91447636 | 3713 | unsigned int i, j, k; |
1c79356b A |
3714 | |
3715 | num_pages = 0; | |
3716 | if (vs->vs_indirect) { | |
3717 | /* loop on indirect maps */ | |
3718 | for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { | |
3719 | vsmap = vs->vs_imap[i]; | |
3720 | if (vsmap == NULL) | |
3721 | continue; | |
3722 | /* loop on clusters in this indirect map */ | |
3723 | for (j = 0; j < CLMAP_ENTRIES; j++) { | |
3724 | if (VSM_ISCLR(vsmap[j]) || | |
3725 | VSM_ISERR(vsmap[j])) | |
3726 | continue; | |
3727 | /* loop on pages in this cluster */ | |
3728 | for (k = 0; k < VSCLSIZE(vs); k++) { | |
3729 | if ((VSM_BMAP(vsmap[j])) & (1 << k)) | |
3730 | num_pages++; | |
3731 | } | |
3732 | } | |
3733 | } | |
3734 | } else { | |
3735 | vsmap = vs->vs_dmap; | |
3736 | if (vsmap == NULL) | |
3737 | return 0; | |
3738 | /* loop on clusters in the direct map */ | |
3739 | for (j = 0; j < CLMAP_ENTRIES; j++) { | |
3740 | if (VSM_ISCLR(vsmap[j]) || | |
3741 | VSM_ISERR(vsmap[j])) | |
3742 | continue; | |
3743 | /* loop on pages in this cluster */ | |
3744 | for (k = 0; k < VSCLSIZE(vs); k++) { | |
3745 | if ((VSM_BMAP(vsmap[j])) & (1 << k)) | |
3746 | num_pages++; | |
3747 | } | |
3748 | } | |
3749 | } | |
3750 | ||
55e303ae | 3751 | return ptoa_32(num_pages); |
1c79356b A |
3752 | } |
3753 | ||
b0d623f7 | 3754 | unsigned int |
1c79356b A |
3755 | ps_vstruct_allocated_pages( |
3756 | vstruct_t vs, | |
3757 | default_pager_page_t *pages, | |
b0d623f7 | 3758 | unsigned int pages_size) |
1c79356b | 3759 | { |
91447636 | 3760 | unsigned int num_pages; |
1c79356b | 3761 | struct vs_map *vsmap; |
b0d623f7 | 3762 | dp_offset_t offset; |
91447636 | 3763 | unsigned int i, j, k; |
1c79356b A |
3764 | |
3765 | num_pages = 0; | |
3766 | offset = 0; | |
3767 | if (vs->vs_indirect) { | |
3768 | /* loop on indirect maps */ | |
3769 | for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { | |
3770 | vsmap = vs->vs_imap[i]; | |
3771 | if (vsmap == NULL) { | |
3772 | offset += (vm_page_size * CLMAP_ENTRIES * | |
3773 | VSCLSIZE(vs)); | |
3774 | continue; | |
3775 | } | |
3776 | /* loop on clusters in this indirect map */ | |
3777 | for (j = 0; j < CLMAP_ENTRIES; j++) { | |
3778 | if (VSM_ISCLR(vsmap[j]) || | |
3779 | VSM_ISERR(vsmap[j])) { | |
3780 | offset += vm_page_size * VSCLSIZE(vs); | |
3781 | continue; | |
3782 | } | |
3783 | /* loop on pages in this cluster */ | |
3784 | for (k = 0; k < VSCLSIZE(vs); k++) { | |
3785 | if ((VSM_BMAP(vsmap[j])) & (1 << k)) { | |
3786 | num_pages++; | |
3787 | if (num_pages < pages_size) | |
3788 | pages++->dpp_offset = | |
3789 | offset; | |
3790 | } | |
3791 | offset += vm_page_size; | |
3792 | } | |
3793 | } | |
3794 | } | |
3795 | } else { | |
3796 | vsmap = vs->vs_dmap; | |
3797 | if (vsmap == NULL) | |
3798 | return 0; | |
3799 | /* loop on clusters in the direct map */ | |
3800 | for (j = 0; j < CLMAP_ENTRIES; j++) { | |
3801 | if (VSM_ISCLR(vsmap[j]) || | |
3802 | VSM_ISERR(vsmap[j])) { | |
3803 | offset += vm_page_size * VSCLSIZE(vs); | |
3804 | continue; | |
3805 | } | |
3806 | /* loop on pages in this cluster */ | |
3807 | for (k = 0; k < VSCLSIZE(vs); k++) { | |
3808 | if ((VSM_BMAP(vsmap[j])) & (1 << k)) { | |
3809 | num_pages++; | |
3810 | if (num_pages < pages_size) | |
3811 | pages++->dpp_offset = offset; | |
3812 | } | |
3813 | offset += vm_page_size; | |
3814 | } | |
3815 | } | |
3816 | } | |
3817 | ||
3818 | return num_pages; | |
3819 | } | |
3820 | ||
3821 | ||
3822 | kern_return_t | |
3823 | ps_vstruct_transfer_from_segment( | |
3824 | vstruct_t vs, | |
3825 | paging_segment_t segment, | |
1c79356b | 3826 | upl_t upl) |
1c79356b A |
3827 | { |
3828 | struct vs_map *vsmap; | |
91447636 A |
3829 | // struct vs_map old_vsmap; |
3830 | // struct vs_map new_vsmap; | |
3831 | unsigned int i, j; | |
1c79356b A |
3832 | |
3833 | VS_LOCK(vs); /* block all work on this vstruct */ | |
3834 | /* can't allow the normal multiple write */ | |
3835 | /* semantic because writes may conflict */ | |
3836 | vs->vs_xfer_pending = TRUE; | |
3837 | vs_wait_for_sync_writers(vs); | |
3838 | vs_start_write(vs); | |
3839 | vs_wait_for_readers(vs); | |
3840 | /* we will unlock the vs to allow other writes while transferring */ | |
3841 | /* and will be guaranteed of the persistance of the vs struct */ | |
3842 | /* because the caller of ps_vstruct_transfer_from_segment bumped */ | |
3843 | /* vs_async_pending */ | |
3844 | /* OK we now have guaranteed no other parties are accessing this */ | |
3845 | /* vs. Now that we are also supporting simple lock versions of */ | |
3846 | /* vs_lock we cannot hold onto VS_LOCK as we may block below. */ | |
3847 | /* our purpose in holding it before was the multiple write case */ | |
3848 | /* we now use the boolean xfer_pending to do that. We can use */ | |
3849 | /* a boolean instead of a count because we have guaranteed single */ | |
3850 | /* file access to this code in its caller */ | |
3851 | VS_UNLOCK(vs); | |
3852 | vs_changed: | |
3853 | if (vs->vs_indirect) { | |
91447636 A |
3854 | unsigned int vsmap_size; |
3855 | int clmap_off; | |
1c79356b A |
3856 | /* loop on indirect maps */ |
3857 | for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) { | |
3858 | vsmap = vs->vs_imap[i]; | |
3859 | if (vsmap == NULL) | |
3860 | continue; | |
3861 | /* loop on clusters in this indirect map */ | |
3862 | clmap_off = (vm_page_size * CLMAP_ENTRIES * | |
3863 | VSCLSIZE(vs) * i); | |
3864 | if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size)) | |
3865 | vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i); | |
3866 | else | |
3867 | vsmap_size = CLMAP_ENTRIES; | |
3868 | for (j = 0; j < vsmap_size; j++) { | |
3869 | if (VSM_ISCLR(vsmap[j]) || | |
3870 | VSM_ISERR(vsmap[j]) || | |
3871 | (VSM_PS(vsmap[j]) != segment)) | |
3872 | continue; | |
3873 | if(vs_cluster_transfer(vs, | |
3874 | (vm_page_size * (j << vs->vs_clshift)) | |
3875 | + clmap_off, | |
3876 | vm_page_size << vs->vs_clshift, | |
1c79356b | 3877 | upl) |
1c79356b A |
3878 | != KERN_SUCCESS) { |
3879 | VS_LOCK(vs); | |
3880 | vs->vs_xfer_pending = FALSE; | |
3881 | VS_UNLOCK(vs); | |
3882 | vs_finish_write(vs); | |
3883 | return KERN_FAILURE; | |
3884 | } | |
3885 | /* allow other readers/writers during transfer*/ | |
3886 | VS_LOCK(vs); | |
3887 | vs->vs_xfer_pending = FALSE; | |
3888 | VS_UNLOCK(vs); | |
3889 | vs_finish_write(vs); | |
6d2010ae A |
3890 | |
3891 | if (backing_store_abort_compaction || backing_store_stop_compaction) { | |
3892 | backing_store_abort_compaction = FALSE; | |
3893 | dprintf(("ps_vstruct_transfer_from_segment - ABORTED\n")); | |
3894 | return KERN_FAILURE; | |
3895 | } | |
3896 | vnode_pager_throttle(); | |
3897 | ||
1c79356b A |
3898 | VS_LOCK(vs); |
3899 | vs->vs_xfer_pending = TRUE; | |
1c79356b A |
3900 | vs_wait_for_sync_writers(vs); |
3901 | vs_start_write(vs); | |
3902 | vs_wait_for_readers(vs); | |
0b4e3aa0 | 3903 | VS_UNLOCK(vs); |
1c79356b A |
3904 | if (!(vs->vs_indirect)) { |
3905 | goto vs_changed; | |
3906 | } | |
3907 | } | |
3908 | } | |
3909 | } else { | |
3910 | vsmap = vs->vs_dmap; | |
3911 | if (vsmap == NULL) { | |
3912 | VS_LOCK(vs); | |
3913 | vs->vs_xfer_pending = FALSE; | |
3914 | VS_UNLOCK(vs); | |
3915 | vs_finish_write(vs); | |
3916 | return KERN_SUCCESS; | |
3917 | } | |
3918 | /* loop on clusters in the direct map */ | |
3919 | for (j = 0; j < vs->vs_size; j++) { | |
3920 | if (VSM_ISCLR(vsmap[j]) || | |
3921 | VSM_ISERR(vsmap[j]) || | |
3922 | (VSM_PS(vsmap[j]) != segment)) | |
3923 | continue; | |
3924 | if(vs_cluster_transfer(vs, | |
3925 | vm_page_size * (j << vs->vs_clshift), | |
3926 | vm_page_size << vs->vs_clshift, | |
1c79356b | 3927 | upl) != KERN_SUCCESS) { |
1c79356b A |
3928 | VS_LOCK(vs); |
3929 | vs->vs_xfer_pending = FALSE; | |
3930 | VS_UNLOCK(vs); | |
3931 | vs_finish_write(vs); | |
3932 | return KERN_FAILURE; | |
3933 | } | |
3934 | /* allow other readers/writers during transfer*/ | |
3935 | VS_LOCK(vs); | |
3936 | vs->vs_xfer_pending = FALSE; | |
3937 | VS_UNLOCK(vs); | |
3938 | vs_finish_write(vs); | |
3939 | VS_LOCK(vs); | |
3940 | vs->vs_xfer_pending = TRUE; | |
1c79356b A |
3941 | vs_wait_for_sync_writers(vs); |
3942 | vs_start_write(vs); | |
3943 | vs_wait_for_readers(vs); | |
b0d623f7 | 3944 | VS_UNLOCK(vs); |
1c79356b A |
3945 | if (vs->vs_indirect) { |
3946 | goto vs_changed; | |
3947 | } | |
3948 | } | |
3949 | } | |
3950 | ||
3951 | VS_LOCK(vs); | |
3952 | vs->vs_xfer_pending = FALSE; | |
3953 | VS_UNLOCK(vs); | |
3954 | vs_finish_write(vs); | |
3955 | return KERN_SUCCESS; | |
3956 | } | |
3957 | ||
3958 | ||
3959 | ||
3960 | vs_map_t | |
3961 | vs_get_map_entry( | |
3962 | vstruct_t vs, | |
b0d623f7 | 3963 | dp_offset_t offset) |
1c79356b A |
3964 | { |
3965 | struct vs_map *vsmap; | |
b0d623f7 | 3966 | dp_offset_t cluster; |
1c79356b | 3967 | |
55e303ae | 3968 | cluster = atop_32(offset) >> vs->vs_clshift; |
1c79356b A |
3969 | if (vs->vs_indirect) { |
3970 | long ind_block = cluster/CLMAP_ENTRIES; | |
3971 | ||
3972 | /* Is the indirect block allocated? */ | |
3973 | vsmap = vs->vs_imap[ind_block]; | |
3974 | if(vsmap == (vs_map_t) NULL) | |
3975 | return vsmap; | |
3976 | } else | |
3977 | vsmap = vs->vs_dmap; | |
3978 | vsmap += cluster%CLMAP_ENTRIES; | |
3979 | return vsmap; | |
3980 | } | |
3981 | ||
3982 | kern_return_t | |
3983 | vs_cluster_transfer( | |
3984 | vstruct_t vs, | |
b0d623f7 A |
3985 | dp_offset_t offset, |
3986 | dp_size_t cnt, | |
1c79356b | 3987 | upl_t upl) |
1c79356b | 3988 | { |
b0d623f7 | 3989 | dp_offset_t actual_offset; |
1c79356b A |
3990 | paging_segment_t ps; |
3991 | struct clmap clmap; | |
3992 | kern_return_t error = KERN_SUCCESS; | |
91447636 A |
3993 | unsigned int size, size_wanted; |
3994 | int i; | |
0c530ab8 | 3995 | unsigned int residual = 0; |
91447636 A |
3996 | unsigned int unavail_size; |
3997 | // default_pager_thread_t *dpt; | |
3998 | // boolean_t dealloc; | |
3999 | struct vs_map *vsmap_ptr = NULL; | |
1c79356b A |
4000 | struct vs_map read_vsmap; |
4001 | struct vs_map original_read_vsmap; | |
4002 | struct vs_map write_vsmap; | |
91447636 A |
4003 | // upl_t sync_upl; |
4004 | // vm_offset_t ioaddr; | |
1c79356b | 4005 | |
1c79356b A |
4006 | /* vs_cluster_transfer reads in the pages of a cluster and |
4007 | * then writes these pages back to new backing store. The | |
4008 | * segment the pages are being read from is assumed to have | |
4009 | * been taken off-line and is no longer considered for new | |
4010 | * space requests. | |
4011 | */ | |
4012 | ||
4013 | /* | |
4014 | * This loop will be executed once per cluster referenced. | |
4015 | * Typically this means once, since it's unlikely that the | |
4016 | * VM system will ask for anything spanning cluster boundaries. | |
4017 | * | |
4018 | * If there are holes in a cluster (in a paging segment), we stop | |
4019 | * reading at the hole, then loop again, hoping to | |
4020 | * find valid pages later in the cluster. This continues until | |
4021 | * the entire range has been examined, and read, if present. The | |
4022 | * pages are written as they are read. If a failure occurs after | |
4023 | * some pages are written the unmap call at the bottom of the loop | |
4024 | * recovers the backing store and the old backing store remains | |
4025 | * in effect. | |
4026 | */ | |
4027 | ||
1c79356b A |
4028 | VSM_CLR(write_vsmap); |
4029 | VSM_CLR(original_read_vsmap); | |
4030 | /* grab the actual object's pages to sync with I/O */ | |
4031 | while (cnt && (error == KERN_SUCCESS)) { | |
4032 | vsmap_ptr = vs_get_map_entry(vs, offset); | |
4033 | actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0); | |
4034 | ||
b0d623f7 | 4035 | if (actual_offset == (dp_offset_t) -1) { |
1c79356b A |
4036 | |
4037 | /* | |
4038 | * Nothing left to write in this cluster at least | |
4039 | * set write cluster information for any previous | |
4040 | * write, clear for next cluster, if there is one | |
4041 | */ | |
4042 | unsigned int local_size, clmask, clsize; | |
4043 | ||
4044 | clsize = vm_page_size << vs->vs_clshift; | |
4045 | clmask = clsize - 1; | |
4046 | local_size = clsize - (offset & clmask); | |
4047 | ASSERT(local_size); | |
4048 | local_size = MIN(local_size, cnt); | |
4049 | ||
4050 | /* This cluster has no data in it beyond what may */ | |
4051 | /* have been found on a previous iteration through */ | |
4052 | /* the loop "write_vsmap" */ | |
4053 | *vsmap_ptr = write_vsmap; | |
4054 | VSM_CLR(write_vsmap); | |
4055 | VSM_CLR(original_read_vsmap); | |
4056 | ||
4057 | cnt -= local_size; | |
4058 | offset += local_size; | |
4059 | continue; | |
4060 | } | |
4061 | ||
4062 | /* | |
4063 | * Count up contiguous available or unavailable | |
4064 | * pages. | |
4065 | */ | |
4066 | ps = CLMAP_PS(clmap); | |
4067 | ASSERT(ps); | |
4068 | size = 0; | |
4069 | unavail_size = 0; | |
4070 | for (i = 0; | |
4071 | (size < cnt) && (unavail_size < cnt) && | |
4072 | (i < CLMAP_NPGS(clmap)); i++) { | |
4073 | if (CLMAP_ISSET(clmap, i)) { | |
4074 | if (unavail_size != 0) | |
4075 | break; | |
4076 | size += vm_page_size; | |
4077 | BS_STAT(ps->ps_bs, | |
4078 | ps->ps_bs->bs_pages_in++); | |
4079 | } else { | |
4080 | if (size != 0) | |
4081 | break; | |
4082 | unavail_size += vm_page_size; | |
4083 | } | |
4084 | } | |
4085 | ||
4086 | if (size == 0) { | |
4087 | ASSERT(unavail_size); | |
593a1d5f | 4088 | ps_clunmap(vs, offset, unavail_size); |
1c79356b A |
4089 | cnt -= unavail_size; |
4090 | offset += unavail_size; | |
4091 | if((offset & ((vm_page_size << vs->vs_clshift) - 1)) | |
4092 | == 0) { | |
4093 | /* There is no more to transfer in this | |
4094 | cluster | |
4095 | */ | |
4096 | *vsmap_ptr = write_vsmap; | |
4097 | VSM_CLR(write_vsmap); | |
4098 | VSM_CLR(original_read_vsmap); | |
4099 | } | |
4100 | continue; | |
4101 | } | |
4102 | ||
4103 | if(VSM_ISCLR(original_read_vsmap)) | |
4104 | original_read_vsmap = *vsmap_ptr; | |
4105 | ||
4106 | if(ps->ps_segtype == PS_PARTITION) { | |
0c530ab8 A |
4107 | panic("swap partition not supported\n"); |
4108 | /*NOTREACHED*/ | |
4109 | error = KERN_FAILURE; | |
4110 | residual = size; | |
1c79356b | 4111 | /* |
9bccf70c | 4112 | NEED TO ISSUE WITH SYNC & NO COMMIT |
1c79356b A |
4113 | error = ps_read_device(ps, actual_offset, &buffer, |
4114 | size, &residual, flags); | |
4115 | */ | |
4116 | } else { | |
9bccf70c | 4117 | /* NEED TO ISSUE WITH SYNC & NO COMMIT */ |
91447636 | 4118 | error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset, |
1c79356b | 4119 | size, &residual, |
6d2010ae | 4120 | (UPL_IOSYNC | UPL_NOCOMMIT | (dp_encryption ? UPL_PAGING_ENCRYPTED : 0))); |
1c79356b A |
4121 | } |
4122 | ||
4123 | read_vsmap = *vsmap_ptr; | |
4124 | ||
4125 | ||
4126 | /* | |
4127 | * Adjust counts and put data in new BS. Optimize for the | |
4128 | * common case, i.e. no error and/or partial data. | |
4129 | * If there was an error, then we need to error the entire | |
4130 | * range, even if some data was successfully read. | |
4131 | * | |
4132 | */ | |
4133 | if ((error == KERN_SUCCESS) && (residual == 0)) { | |
0b4e3aa0 | 4134 | |
1c79356b A |
4135 | /* |
4136 | * Got everything we asked for, supply the data to | |
4137 | * the new BS. Note that as a side effect of supplying | |
4138 | * the data, the buffer holding the supplied data is | |
4139 | * deallocated from the pager's address space unless | |
4140 | * the write is unsuccessful. | |
4141 | */ | |
4142 | ||
4143 | /* note buffer will be cleaned up in all cases by */ | |
4144 | /* internal_cluster_write or if an error on write */ | |
4145 | /* the vm_map_copy_page_discard call */ | |
4146 | *vsmap_ptr = write_vsmap; | |
4147 | ||
1c79356b A |
4148 | if(vs_cluster_write(vs, upl, offset, |
4149 | size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) { | |
1c79356b A |
4150 | error = KERN_FAILURE; |
4151 | if(!(VSM_ISCLR(*vsmap_ptr))) { | |
4152 | /* unmap the new backing store object */ | |
4153 | ps_clunmap(vs, offset, size); | |
4154 | } | |
4155 | /* original vsmap */ | |
4156 | *vsmap_ptr = original_read_vsmap; | |
4157 | VSM_CLR(write_vsmap); | |
4158 | } else { | |
4159 | if((offset + size) & | |
4160 | ((vm_page_size << vs->vs_clshift) | |
4161 | - 1)) { | |
4162 | /* There is more to transfer in this | |
4163 | cluster | |
4164 | */ | |
4165 | write_vsmap = *vsmap_ptr; | |
4166 | *vsmap_ptr = read_vsmap; | |
593a1d5f | 4167 | ps_clunmap(vs, offset, size); |
1c79356b A |
4168 | } else { |
4169 | /* discard the old backing object */ | |
4170 | write_vsmap = *vsmap_ptr; | |
4171 | *vsmap_ptr = read_vsmap; | |
4172 | ps_clunmap(vs, offset, size); | |
4173 | *vsmap_ptr = write_vsmap; | |
4174 | VSM_CLR(write_vsmap); | |
4175 | VSM_CLR(original_read_vsmap); | |
4176 | } | |
4177 | } | |
4178 | } else { | |
4179 | size_wanted = size; | |
4180 | if (error == KERN_SUCCESS) { | |
4181 | if (residual == size) { | |
4182 | /* | |
4183 | * If a read operation returns no error | |
4184 | * and no data moved, we turn it into | |
4185 | * an error, assuming we're reading at | |
4186 | * or beyond EOF. | |
4187 | * Fall through and error the entire | |
4188 | * range. | |
4189 | */ | |
4190 | error = KERN_FAILURE; | |
4191 | *vsmap_ptr = write_vsmap; | |
4192 | if(!(VSM_ISCLR(*vsmap_ptr))) { | |
4193 | /* unmap the new backing store object */ | |
4194 | ps_clunmap(vs, offset, size); | |
4195 | } | |
4196 | *vsmap_ptr = original_read_vsmap; | |
4197 | VSM_CLR(write_vsmap); | |
4198 | continue; | |
4199 | } else { | |
4200 | /* | |
4201 | * Otherwise, we have partial read. | |
4202 | * This is also considered an error | |
4203 | * for the purposes of cluster transfer | |
4204 | */ | |
4205 | error = KERN_FAILURE; | |
4206 | *vsmap_ptr = write_vsmap; | |
4207 | if(!(VSM_ISCLR(*vsmap_ptr))) { | |
4208 | /* unmap the new backing store object */ | |
4209 | ps_clunmap(vs, offset, size); | |
4210 | } | |
4211 | *vsmap_ptr = original_read_vsmap; | |
4212 | VSM_CLR(write_vsmap); | |
4213 | continue; | |
4214 | } | |
4215 | } | |
4216 | ||
4217 | } | |
4218 | cnt -= size; | |
4219 | offset += size; | |
4220 | ||
4221 | } /* END while (cnt && (error == 0)) */ | |
4222 | if(!VSM_ISCLR(write_vsmap)) | |
4223 | *vsmap_ptr = write_vsmap; | |
4224 | ||
1c79356b A |
4225 | return error; |
4226 | } | |
4227 | ||
4228 | kern_return_t | |
91447636 A |
4229 | default_pager_add_file( |
4230 | MACH_PORT_FACE backing_store, | |
4231 | vnode_ptr_t vp, | |
1c79356b | 4232 | int record_size, |
91447636 | 4233 | vm_size_t size) |
1c79356b A |
4234 | { |
4235 | backing_store_t bs; | |
4236 | paging_segment_t ps; | |
4237 | int i; | |
91447636 | 4238 | unsigned int j; |
1c79356b | 4239 | int error; |
1c79356b A |
4240 | |
4241 | if ((bs = backing_store_lookup(backing_store)) | |
4242 | == BACKING_STORE_NULL) | |
4243 | return KERN_INVALID_ARGUMENT; | |
4244 | ||
4245 | PSL_LOCK(); | |
4246 | for (i = 0; i <= paging_segment_max; i++) { | |
4247 | ps = paging_segments[i]; | |
4248 | if (ps == PAGING_SEGMENT_NULL) | |
4249 | continue; | |
4250 | if (ps->ps_segtype != PS_FILE) | |
4251 | continue; | |
4252 | ||
4253 | /* | |
4254 | * Check for overlap on same device. | |
4255 | */ | |
4256 | if (ps->ps_vnode == (struct vnode *)vp) { | |
4257 | PSL_UNLOCK(); | |
4258 | BS_UNLOCK(bs); | |
4259 | return KERN_INVALID_ARGUMENT; | |
4260 | } | |
4261 | } | |
4262 | PSL_UNLOCK(); | |
4263 | ||
4264 | /* | |
4265 | * Set up the paging segment | |
4266 | */ | |
4267 | ps = (paging_segment_t) kalloc(sizeof (struct paging_segment)); | |
4268 | if (ps == PAGING_SEGMENT_NULL) { | |
4269 | BS_UNLOCK(bs); | |
4270 | return KERN_RESOURCE_SHORTAGE; | |
4271 | } | |
4272 | ||
4273 | ps->ps_segtype = PS_FILE; | |
4274 | ps->ps_vnode = (struct vnode *)vp; | |
4275 | ps->ps_offset = 0; | |
4276 | ps->ps_record_shift = local_log2(vm_page_size / record_size); | |
b0d623f7 A |
4277 | assert((dp_size_t) size == size); |
4278 | ps->ps_recnum = (dp_size_t) size; | |
4279 | ps->ps_pgnum = ((dp_size_t) size) >> ps->ps_record_shift; | |
1c79356b A |
4280 | |
4281 | ps->ps_pgcount = ps->ps_pgnum; | |
4282 | ps->ps_clshift = local_log2(bs->bs_clsize); | |
4283 | ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift; | |
b0d623f7 | 4284 | ps->ps_special_clusters = 0; |
1c79356b A |
4285 | ps->ps_hint = 0; |
4286 | ||
4287 | PS_LOCK_INIT(ps); | |
4288 | ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls)); | |
4289 | if (!ps->ps_bmap) { | |
316670eb | 4290 | PS_LOCK_DESTROY(ps); |
91447636 | 4291 | kfree(ps, sizeof *ps); |
1c79356b A |
4292 | BS_UNLOCK(bs); |
4293 | return KERN_RESOURCE_SHORTAGE; | |
4294 | } | |
91447636 A |
4295 | for (j = 0; j < ps->ps_ncls; j++) { |
4296 | clrbit(ps->ps_bmap, j); | |
1c79356b A |
4297 | } |
4298 | ||
b0d623f7 A |
4299 | if(paging_segment_count == 0) { |
4300 | ps->ps_state = PS_EMERGENCY_SEGMENT; | |
4301 | if(use_emergency_swap_file_first) { | |
4302 | ps->ps_state |= PS_CAN_USE; | |
4303 | } | |
4304 | emergency_segment_backing_store = backing_store; | |
4305 | } else { | |
4306 | ps->ps_state = PS_CAN_USE; | |
4307 | } | |
4308 | ||
1c79356b A |
4309 | ps->ps_bs = bs; |
4310 | ||
4311 | if ((error = ps_enter(ps)) != 0) { | |
91447636 | 4312 | kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls)); |
316670eb | 4313 | PS_LOCK_DESTROY(ps); |
91447636 | 4314 | kfree(ps, sizeof *ps); |
1c79356b A |
4315 | BS_UNLOCK(bs); |
4316 | return KERN_RESOURCE_SHORTAGE; | |
4317 | } | |
4318 | ||
4319 | bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift; | |
4320 | bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift; | |
4321 | PSL_LOCK(); | |
b0d623f7 A |
4322 | if(IS_PS_OK_TO_USE(ps)) { |
4323 | dp_pages_free += ps->ps_pgcount; | |
4324 | } else { | |
4325 | dp_pages_reserve += ps->ps_pgcount; | |
4326 | } | |
1c79356b A |
4327 | PSL_UNLOCK(); |
4328 | ||
4329 | BS_UNLOCK(bs); | |
4330 | ||
4331 | bs_more_space(ps->ps_clcount); | |
4332 | ||
b0d623f7 A |
4333 | /* |
4334 | * If the paging segment being activated is not the emergency | |
4335 | * segment and we notice that the emergency segment is being | |
4336 | * used then we help recover it. If all goes well, the | |
4337 | * emergency segment will be back to its original state of | |
4338 | * online but not activated (till it's needed the next time). | |
4339 | */ | |
6d2010ae | 4340 | #if CONFIG_FREEZE |
316670eb | 4341 | if (!memorystatus_freeze_enabled) |
6d2010ae A |
4342 | #endif |
4343 | { | |
4344 | ps = paging_segments[EMERGENCY_PSEG_INDEX]; | |
4345 | if(IS_PS_EMERGENCY_SEGMENT(ps) && IS_PS_OK_TO_USE(ps)) { | |
4346 | if(default_pager_backing_store_delete(emergency_segment_backing_store)) { | |
4347 | dprintf(("Failed to recover emergency paging segment\n")); | |
4348 | } else { | |
4349 | dprintf(("Recovered emergency paging segment\n")); | |
4350 | } | |
b0d623f7 A |
4351 | } |
4352 | } | |
4353 | ||
91447636 A |
4354 | DP_DEBUG(DEBUG_BS_INTERNAL, |
4355 | ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n", | |
b0d623f7 | 4356 | device, offset, (dp_size_t) size, record_size, |
91447636 | 4357 | ps->ps_record_shift, ps->ps_pgnum)); |
1c79356b A |
4358 | |
4359 | return KERN_SUCCESS; | |
4360 | } | |
4361 | ||
4362 | ||
4363 | ||
1c79356b A |
4364 | kern_return_t |
4365 | ps_read_file( | |
4366 | paging_segment_t ps, | |
4367 | upl_t upl, | |
91447636 | 4368 | upl_offset_t upl_offset, |
b0d623f7 | 4369 | dp_offset_t offset, |
91447636 | 4370 | upl_size_t size, |
1c79356b A |
4371 | unsigned int *residualp, |
4372 | int flags) | |
4373 | { | |
4374 | vm_object_offset_t f_offset; | |
4375 | int error = 0; | |
4376 | int result; | |
1c79356b | 4377 | |
91447636 | 4378 | assert(dp_encryption_inited); |
1c79356b | 4379 | |
55e303ae | 4380 | clustered_reads[atop_32(size)]++; |
1c79356b A |
4381 | |
4382 | f_offset = (vm_object_offset_t)(ps->ps_offset + offset); | |
4383 | ||
2d21ac55 A |
4384 | /* |
4385 | * for transfer case we need to pass uploffset and flags | |
4386 | */ | |
b0d623f7 A |
4387 | assert((upl_size_t) size == size); |
4388 | error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL); | |
1c79356b A |
4389 | |
4390 | /* The vnode_pagein semantic is somewhat at odds with the existing */ | |
4391 | /* device_read semantic. Partial reads are not experienced at this */ | |
4392 | /* level. It is up to the bit map code and cluster read code to */ | |
4393 | /* check that requested data locations are actually backed, and the */ | |
4394 | /* pagein code to either read all of the requested data or return an */ | |
4395 | /* error. */ | |
4396 | ||
4397 | if (error) | |
4398 | result = KERN_FAILURE; | |
4399 | else { | |
4400 | *residualp = 0; | |
4401 | result = KERN_SUCCESS; | |
4402 | } | |
4403 | return result; | |
1c79356b A |
4404 | } |
4405 | ||
4406 | kern_return_t | |
4407 | ps_write_file( | |
4408 | paging_segment_t ps, | |
4409 | upl_t upl, | |
91447636 | 4410 | upl_offset_t upl_offset, |
b0d623f7 | 4411 | dp_offset_t offset, |
1c79356b A |
4412 | unsigned int size, |
4413 | int flags) | |
4414 | { | |
4415 | vm_object_offset_t f_offset; | |
4416 | kern_return_t result; | |
1c79356b | 4417 | |
91447636 | 4418 | assert(dp_encryption_inited); |
1c79356b | 4419 | |
55e303ae | 4420 | clustered_writes[atop_32(size)]++; |
1c79356b A |
4421 | f_offset = (vm_object_offset_t)(ps->ps_offset + offset); |
4422 | ||
91447636 A |
4423 | if (flags & UPL_PAGING_ENCRYPTED) { |
4424 | /* | |
4425 | * ENCRYPTED SWAP: | |
4426 | * encrypt all the pages that we're going | |
4427 | * to pageout. | |
4428 | */ | |
4429 | upl_encrypt(upl, upl_offset, size); | |
4430 | } | |
b0d623f7 A |
4431 | assert((upl_size_t) size == size); |
4432 | if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (upl_size_t)size, flags, NULL)) | |
1c79356b A |
4433 | result = KERN_FAILURE; |
4434 | else | |
4435 | result = KERN_SUCCESS; | |
4436 | ||
4437 | return result; | |
4438 | } | |
4439 | ||
6d2010ae A |
4440 | static inline void ps_vnode_trim_init(struct ps_vnode_trim_data *data) |
4441 | { | |
6d2010ae | 4442 | #pragma unused(data) |
6d2010ae A |
4443 | } |
4444 | ||
4445 | static inline void ps_vnode_trim_now(struct ps_vnode_trim_data *data) | |
4446 | { | |
6d2010ae | 4447 | #pragma unused(data) |
6d2010ae A |
4448 | } |
4449 | ||
4450 | static inline void ps_vnode_trim_more(struct ps_vnode_trim_data *data, struct vs_map *map, unsigned int shift, dp_size_t length) | |
4451 | { | |
6d2010ae | 4452 | #pragma unused(data, map, shift, length) |
6d2010ae A |
4453 | } |
4454 | ||
1c79356b | 4455 | kern_return_t |
91447636 | 4456 | default_pager_triggers( __unused MACH_PORT_FACE default_pager, |
1c79356b A |
4457 | int hi_wat, |
4458 | int lo_wat, | |
4459 | int flags, | |
4460 | MACH_PORT_FACE trigger_port) | |
4461 | { | |
6d2010ae | 4462 | MACH_PORT_FACE release = IPC_PORT_NULL; |
0b4e3aa0 | 4463 | kern_return_t kr; |
b0d623f7 A |
4464 | clock_sec_t now; |
4465 | clock_nsec_t nanoseconds_dummy; | |
4466 | static clock_sec_t error_notify = 0; | |
1c79356b | 4467 | |
0b4e3aa0 | 4468 | PSL_LOCK(); |
91447636 A |
4469 | if (flags == SWAP_ENCRYPT_ON) { |
4470 | /* ENCRYPTED SWAP: turn encryption on */ | |
4471 | release = trigger_port; | |
4472 | if (!dp_encryption_inited) { | |
4473 | dp_encryption_inited = TRUE; | |
4474 | dp_encryption = TRUE; | |
4475 | kr = KERN_SUCCESS; | |
4476 | } else { | |
4477 | kr = KERN_FAILURE; | |
4478 | } | |
4479 | } else if (flags == SWAP_ENCRYPT_OFF) { | |
4480 | /* ENCRYPTED SWAP: turn encryption off */ | |
4481 | release = trigger_port; | |
4482 | if (!dp_encryption_inited) { | |
4483 | dp_encryption_inited = TRUE; | |
4484 | dp_encryption = FALSE; | |
4485 | kr = KERN_SUCCESS; | |
4486 | } else { | |
4487 | kr = KERN_FAILURE; | |
4488 | } | |
4489 | } else if (flags == HI_WAT_ALERT) { | |
0b4e3aa0 | 4490 | release = min_pages_trigger_port; |
6d2010ae A |
4491 | #if CONFIG_FREEZE |
4492 | /* High and low water signals aren't applicable when freeze is */ | |
4493 | /* enabled, so release the trigger ports here and return */ | |
4494 | /* KERN_FAILURE. */ | |
316670eb | 4495 | if (memorystatus_freeze_enabled) { |
6d2010ae A |
4496 | if (IP_VALID( trigger_port )){ |
4497 | ipc_port_release_send( trigger_port ); | |
4498 | } | |
4499 | min_pages_trigger_port = IPC_PORT_NULL; | |
4500 | kr = KERN_FAILURE; | |
4501 | } | |
4502 | else | |
4503 | #endif | |
4504 | { | |
4505 | min_pages_trigger_port = trigger_port; | |
4506 | minimum_pages_remaining = hi_wat/vm_page_size; | |
4507 | bs_low = FALSE; | |
4508 | kr = KERN_SUCCESS; | |
4509 | } | |
0b4e3aa0 A |
4510 | } else if (flags == LO_WAT_ALERT) { |
4511 | release = max_pages_trigger_port; | |
6d2010ae | 4512 | #if CONFIG_FREEZE |
316670eb | 4513 | if (memorystatus_freeze_enabled) { |
6d2010ae A |
4514 | if (IP_VALID( trigger_port )){ |
4515 | ipc_port_release_send( trigger_port ); | |
4516 | } | |
4517 | max_pages_trigger_port = IPC_PORT_NULL; | |
4518 | kr = KERN_FAILURE; | |
4519 | } | |
4520 | else | |
4521 | #endif | |
4522 | { | |
4523 | max_pages_trigger_port = trigger_port; | |
4524 | maximum_pages_free = lo_wat/vm_page_size; | |
4525 | kr = KERN_SUCCESS; | |
4526 | } | |
b0d623f7 A |
4527 | } else if (flags == USE_EMERGENCY_SWAP_FILE_FIRST) { |
4528 | use_emergency_swap_file_first = TRUE; | |
4529 | release = trigger_port; | |
4530 | kr = KERN_SUCCESS; | |
4531 | } else if (flags == SWAP_FILE_CREATION_ERROR) { | |
4532 | release = trigger_port; | |
4533 | kr = KERN_SUCCESS; | |
4534 | if( paging_segment_count == 1) { | |
4535 | use_emergency_swap_file_first = TRUE; | |
4536 | } | |
4537 | no_paging_space_action(); | |
4538 | clock_get_system_nanotime(&now, &nanoseconds_dummy); | |
4539 | if (now > error_notify + 5) { | |
4540 | dprintf(("Swap File Error.\n")); | |
4541 | error_notify = now; | |
4542 | } | |
0b4e3aa0 A |
4543 | } else { |
4544 | release = trigger_port; | |
4545 | kr = KERN_INVALID_ARGUMENT; | |
1c79356b | 4546 | } |
0b4e3aa0 A |
4547 | PSL_UNLOCK(); |
4548 | ||
4549 | if (IP_VALID(release)) | |
4550 | ipc_port_release_send(release); | |
4551 | ||
4552 | return kr; | |
1c79356b | 4553 | } |
55e303ae A |
4554 | |
4555 | /* | |
4556 | * Monitor the amount of available backing store vs. the amount of | |
4557 | * required backing store, notify a listener (if present) when | |
4558 | * backing store may safely be removed. | |
4559 | * | |
4560 | * We attempt to avoid the situation where backing store is | |
4561 | * discarded en masse, as this can lead to thrashing as the | |
4562 | * backing store is compacted. | |
4563 | */ | |
4564 | ||
4565 | #define PF_INTERVAL 3 /* time between free level checks */ | |
4566 | #define PF_LATENCY 10 /* number of intervals before release */ | |
4567 | ||
4568 | static int dp_pages_free_low_count = 0; | |
91447636 | 4569 | thread_call_t default_pager_backing_store_monitor_callout; |
55e303ae A |
4570 | |
4571 | void | |
91447636 A |
4572 | default_pager_backing_store_monitor(__unused thread_call_param_t p1, |
4573 | __unused thread_call_param_t p2) | |
55e303ae | 4574 | { |
91447636 | 4575 | // unsigned long long average; |
55e303ae A |
4576 | ipc_port_t trigger; |
4577 | uint64_t deadline; | |
4578 | ||
4579 | /* | |
4580 | * We determine whether it will be safe to release some | |
4581 | * backing store by watching the free page level. If | |
4582 | * it remains below the maximum_pages_free threshold for | |
4583 | * at least PF_LATENCY checks (taken at PF_INTERVAL seconds) | |
4584 | * then we deem it safe. | |
4585 | * | |
4586 | * Note that this establishes a maximum rate at which backing | |
4587 | * store will be released, as each notification (currently) | |
4588 | * only results in a single backing store object being | |
4589 | * released. | |
4590 | */ | |
4591 | if (dp_pages_free > maximum_pages_free) { | |
4592 | dp_pages_free_low_count++; | |
4593 | } else { | |
4594 | dp_pages_free_low_count = 0; | |
4595 | } | |
4596 | ||
4597 | /* decide whether to send notification */ | |
4598 | trigger = IP_NULL; | |
4599 | if (max_pages_trigger_port && | |
4600 | (backing_store_release_trigger_disable == 0) && | |
4601 | (dp_pages_free_low_count > PF_LATENCY)) { | |
4602 | trigger = max_pages_trigger_port; | |
4603 | max_pages_trigger_port = NULL; | |
4604 | } | |
4605 | ||
4606 | /* send notification */ | |
4607 | if (trigger != IP_NULL) { | |
4608 | VSL_LOCK(); | |
4609 | if(backing_store_release_trigger_disable != 0) { | |
4610 | assert_wait((event_t) | |
4611 | &backing_store_release_trigger_disable, | |
4612 | THREAD_UNINT); | |
4613 | VSL_UNLOCK(); | |
4614 | thread_block(THREAD_CONTINUE_NULL); | |
4615 | } else { | |
4616 | VSL_UNLOCK(); | |
4617 | } | |
6d2010ae A |
4618 | dprintf(("default_pager_backing_store_monitor - send LO_WAT_ALERT\n")); |
4619 | ||
55e303ae A |
4620 | default_pager_space_alert(trigger, LO_WAT_ALERT); |
4621 | ipc_port_release_send(trigger); | |
4622 | dp_pages_free_low_count = 0; | |
4623 | } | |
4624 | ||
4625 | clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline); | |
91447636 | 4626 | thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline); |
55e303ae | 4627 | } |
6d2010ae A |
4628 | |
4629 | #if CONFIG_FREEZE | |
4630 | unsigned int default_pager_swap_pages_free() { | |
4631 | return dp_pages_free; | |
4632 | } | |
4633 | #endif |