]> git.saurik.com Git - apple/xnu.git/blob - osfmk/default_pager/dp_backing_store.c
xnu-124.13.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_backing_store.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51 /*
52 * Default Pager.
53 * Paging File Management.
54 */
55
56 #include <mach/memory_object_server.h>
57 #include "default_pager_internal.h"
58 #include <default_pager/default_pager_alerts.h>
59 #include <ipc/ipc_port.h>
60 #include <ipc/ipc_space.h>
61 #include <kern/queue.h>
62 #include <kern/counters.h>
63 #include <kern/sched_prim.h>
64 #include <vm/vm_kern.h>
65 #include <vm/vm_pageout.h>
66 /* CDY CDY */
67 #include <vm/vm_map.h>
68
69 /* MAXPHYS derived from bsd/bsd/ppc/param.h, we need a */
70 /* universal originating in the kernel, or a formal means of exporting */
71 /* from the bsd component */
72
73 #define MAXPHYS (64 * 1024)
74 int physical_transfer_cluster_count = 0;
75
76 #define VM_SUPER_CLUSTER 0x10000
77
78 /*
79 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
80 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
81 */
82 #define VSTRUCT_DEF_CLSHIFT 2
83 int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
84 int default_pager_clsize = 0;
85
86 /* statistics */
87 unsigned int clustered_writes[MAX_CLUSTER_SIZE+1];
88 unsigned int clustered_reads[MAX_CLUSTER_SIZE+1];
89
90 /*
91 * Globals used for asynchronous paging operations:
92 * vs_async_list: head of list of to-be-completed I/O ops
93 * async_num_queued: number of pages completed, but not yet
94 * processed by async thread.
95 * async_requests_out: number of pages of requests not completed.
96 */
97
98 #if 0
99 struct vs_async *vs_async_list;
100 int async_num_queued;
101 int async_requests_out;
102 #endif
103
104
105 #define VS_ASYNC_REUSE 1
106 struct vs_async *vs_async_free_list;
107
108 mutex_t default_pager_async_lock; /* Protects globals above */
109
110
111 int vs_alloc_async_failed = 0; /* statistics */
112 int vs_alloc_async_count = 0; /* statistics */
113 struct vs_async *vs_alloc_async(void); /* forward */
114 void vs_free_async(struct vs_async *vsa); /* forward */
115
116
117 #define VS_ALLOC_ASYNC() vs_alloc_async()
118 #define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
119
120 #define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
121 #define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
122 #define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, \
123 ETAP_IO_DEV_PAGEH)
124 #define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
125 /*
126 * Paging Space Hysteresis triggers and the target notification port
127 *
128 */
129
130 unsigned int minimum_pages_remaining = 0;
131 unsigned int maximum_pages_free = 0;
132 ipc_port_t min_pages_trigger_port = NULL;
133 ipc_port_t max_pages_trigger_port = NULL;
134
135 boolean_t bs_low = FALSE;
136
137
138
139 /*
140 * Object sizes are rounded up to the next power of 2,
141 * unless they are bigger than a given maximum size.
142 */
143 vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */
144
145 /*
146 * List of all backing store and segments.
147 */
148 struct backing_store_list_head backing_store_list;
149 paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS];
150 mutex_t paging_segments_lock;
151 int paging_segment_max = 0;
152 int paging_segment_count = 0;
153 int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
154
155
156 /*
157 * Total pages free in system
158 * This differs from clusters committed/avail which is a measure of the
159 * over commitment of paging segments to backing store. An idea which is
160 * likely to be deprecated.
161 */
162 unsigned int dp_pages_free = 0;
163 unsigned int cluster_transfer_minimum = 100;
164
165 kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int); /* forward */
166
167 default_pager_thread_t *
168 get_read_buffer()
169 {
170 int i;
171
172 DPT_LOCK(dpt_lock);
173 while(TRUE) {
174 for (i=0; i<default_pager_internal_count; i++) {
175 if(dpt_array[i]->checked_out == FALSE) {
176 dpt_array[i]->checked_out = TRUE;
177 DPT_UNLOCK(dpt_lock);
178 return dpt_array[i];
179 }
180 }
181 assert_wait(&dpt_array, THREAD_UNINT);
182 DPT_UNLOCK(dpt_lock);
183 thread_block((void(*)(void))0);
184 }
185 }
186
187 void
188 bs_initialize(void)
189 {
190 int i;
191
192 /*
193 * List of all backing store.
194 */
195 BSL_LOCK_INIT();
196 queue_init(&backing_store_list.bsl_queue);
197 PSL_LOCK_INIT();
198
199 VS_ASYNC_LOCK_INIT();
200 #if VS_ASYNC_REUSE
201 vs_async_free_list = NULL;
202 #endif /* VS_ASYNC_REUSE */
203
204 for (i = 0; i < MAX_CLUSTER_SIZE+1; i++) {
205 clustered_writes[i] = 0;
206 clustered_reads[i] = 0;
207 }
208
209 }
210
211 /*
212 * When things do not quite workout...
213 */
214 void bs_no_paging_space(boolean_t); /* forward */
215
216 void
217 bs_no_paging_space(
218 boolean_t out_of_memory)
219 {
220 static char here[] = "bs_no_paging_space";
221
222 if (out_of_memory)
223 dprintf(("*** OUT OF MEMORY ***\n"));
224 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
225 }
226
227 void bs_more_space(int); /* forward */
228 void bs_commit(int); /* forward */
229
230 boolean_t user_warned = FALSE;
231 unsigned int clusters_committed = 0;
232 unsigned int clusters_available = 0;
233 unsigned int clusters_committed_peak = 0;
234
235 void
236 bs_more_space(
237 int nclusters)
238 {
239 BSL_LOCK();
240 /*
241 * Account for new paging space.
242 */
243 clusters_available += nclusters;
244
245 if (clusters_available >= clusters_committed) {
246 if (verbose && user_warned) {
247 printf("%s%s - %d excess clusters now.\n",
248 my_name,
249 "paging space is OK now",
250 clusters_available - clusters_committed);
251 user_warned = FALSE;
252 clusters_committed_peak = 0;
253 }
254 } else {
255 if (verbose && user_warned) {
256 printf("%s%s - still short of %d clusters.\n",
257 my_name,
258 "WARNING: paging space over-committed",
259 clusters_committed - clusters_available);
260 clusters_committed_peak -= nclusters;
261 }
262 }
263 BSL_UNLOCK();
264
265 return;
266 }
267
268 void
269 bs_commit(
270 int nclusters)
271 {
272 BSL_LOCK();
273 clusters_committed += nclusters;
274 if (clusters_committed > clusters_available) {
275 if (verbose && !user_warned) {
276 user_warned = TRUE;
277 printf("%s%s - short of %d clusters.\n",
278 my_name,
279 "WARNING: paging space over-committed",
280 clusters_committed - clusters_available);
281 }
282 if (clusters_committed > clusters_committed_peak) {
283 clusters_committed_peak = clusters_committed;
284 }
285 } else {
286 if (verbose && user_warned) {
287 printf("%s%s - was short of up to %d clusters.\n",
288 my_name,
289 "paging space is OK now",
290 clusters_committed_peak - clusters_available);
291 user_warned = FALSE;
292 clusters_committed_peak = 0;
293 }
294 }
295 BSL_UNLOCK();
296
297 return;
298 }
299
300 int default_pager_info_verbose = 1;
301
302 void
303 bs_global_info(
304 vm_size_t *totalp,
305 vm_size_t *freep)
306 {
307 vm_size_t pages_total, pages_free;
308 paging_segment_t ps;
309 int i;
310 static char here[] = "bs_global_info";
311
312 PSL_LOCK();
313 pages_total = pages_free = 0;
314 for (i = 0; i <= paging_segment_max; i++) {
315 ps = paging_segments[i];
316 if (ps == PAGING_SEGMENT_NULL)
317 continue;
318
319 /*
320 * no need to lock: by the time this data
321 * gets back to any remote requestor it
322 * will be obsolete anyways
323 */
324 pages_total += ps->ps_pgnum;
325 pages_free += ps->ps_clcount << ps->ps_clshift;
326 DEBUG(DEBUG_BS_INTERNAL,
327 ("segment #%d: %d total, %d free\n",
328 i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
329 }
330 *totalp = pages_total;
331 *freep = pages_free;
332 if (verbose && user_warned && default_pager_info_verbose) {
333 if (clusters_available < clusters_committed) {
334 printf("%s %d clusters committed, %d available.\n",
335 my_name,
336 clusters_committed,
337 clusters_available);
338 }
339 }
340 PSL_UNLOCK();
341 }
342
343 backing_store_t backing_store_alloc(void); /* forward */
344
345 backing_store_t
346 backing_store_alloc(void)
347 {
348 backing_store_t bs;
349 static char here[] = "backing_store_alloc";
350
351 bs = (backing_store_t) kalloc(sizeof (struct backing_store));
352 if (bs == BACKING_STORE_NULL)
353 panic("backing_store_alloc: no memory");
354
355 BS_LOCK_INIT(bs);
356 bs->bs_port = MACH_PORT_NULL;
357 bs->bs_priority = 0;
358 bs->bs_clsize = 0;
359 bs->bs_pages_total = 0;
360 bs->bs_pages_in = 0;
361 bs->bs_pages_in_fail = 0;
362 bs->bs_pages_out = 0;
363 bs->bs_pages_out_fail = 0;
364
365 return bs;
366 }
367
368 backing_store_t backing_store_lookup(MACH_PORT_FACE); /* forward */
369
370 /* Even in both the component space and external versions of this pager, */
371 /* backing_store_lookup will be called from tasks in the application space */
372 backing_store_t
373 backing_store_lookup(
374 MACH_PORT_FACE port)
375 {
376 backing_store_t bs;
377
378 /*
379 port is currently backed with a vs structure in the alias field
380 we could create an ISBS alias and a port_is_bs call but frankly
381 I see no reason for the test, the bs->port == port check below
382 will work properly on junk entries.
383
384 if ((port == MACH_PORT_NULL) || port_is_vs(port))
385 */
386 if ((port == MACH_PORT_NULL))
387 return BACKING_STORE_NULL;
388
389 BSL_LOCK();
390 queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t,
391 bs_links) {
392 BS_LOCK(bs);
393 if (bs->bs_port == port) {
394 BSL_UNLOCK();
395 /* Success, return it locked. */
396 return bs;
397 }
398 BS_UNLOCK(bs);
399 }
400 BSL_UNLOCK();
401 return BACKING_STORE_NULL;
402 }
403
404 void backing_store_add(backing_store_t); /* forward */
405
406 void
407 backing_store_add(
408 backing_store_t bs)
409 {
410 MACH_PORT_FACE port = bs->bs_port;
411 MACH_PORT_FACE pset = default_pager_default_set;
412 kern_return_t kr = KERN_SUCCESS;
413 static char here[] = "backing_store_add";
414
415 if (kr != KERN_SUCCESS)
416 panic("backing_store_add: add to set");
417
418 }
419
420 /*
421 * Set up default page shift, but only if not already
422 * set and argument is within range.
423 */
424 boolean_t
425 bs_set_default_clsize(unsigned int npages)
426 {
427 switch(npages){
428 case 1:
429 case 2:
430 case 4:
431 case 8:
432 if (default_pager_clsize == 0) /* if not yet set */
433 vstruct_def_clshift = local_log2(npages);
434 return(TRUE);
435 }
436 return(FALSE);
437 }
438
439 int bs_get_global_clsize(int clsize); /* forward */
440
441 int
442 bs_get_global_clsize(
443 int clsize)
444 {
445 int i;
446 MACH_PORT_FACE DMM;
447 kern_return_t kr;
448 static char here[] = "bs_get_global_clsize";
449
450 /*
451 * Only allow setting of cluster size once. If called
452 * with no cluster size (default), we use the compiled-in default
453 * for the duration. The same cluster size is used for all
454 * paging segments.
455 */
456 if (default_pager_clsize == 0) {
457 if (norma_mk) {
458 /*
459 * On NORMA, don't use clustered paging because
460 * XMM can't handle it.
461 */
462 vstruct_def_clshift = 0;
463 }
464 /*
465 * Keep cluster size in bit shift because it's quicker
466 * arithmetic, and easier to keep at a power of 2.
467 */
468 if (clsize != NO_CLSIZE) {
469 for (i = 0; (1 << i) < clsize; i++);
470 if (i > MAX_CLUSTER_SHIFT)
471 i = MAX_CLUSTER_SHIFT;
472 vstruct_def_clshift = i;
473 }
474 default_pager_clsize = (1 << vstruct_def_clshift);
475
476 /*
477 * Let the user know the new (and definitive) cluster size.
478 */
479 if (verbose)
480 printf("%scluster size = %d page%s\n",
481 my_name, default_pager_clsize,
482 (default_pager_clsize == 1) ? "" : "s");
483 /*
484 * Let the kernel know too, in case it hasn't used the
485 * default value provided in main() yet.
486 */
487 DMM = default_pager_default_port;
488 clsize = default_pager_clsize * vm_page_size; /* in bytes */
489 kr = host_default_memory_manager(host_priv_self(),
490 &DMM,
491 clsize);
492 if (kr != KERN_SUCCESS) {
493 panic("bs_get_global_cl_size:host_default_memory_manager");
494 }
495 if (DMM != default_pager_default_port) {
496 panic("bs_get_global_cl_size:there is another default pager");
497 }
498 }
499 ASSERT(default_pager_clsize > 0 &&
500 (default_pager_clsize & (default_pager_clsize - 1)) == 0);
501
502 return default_pager_clsize;
503 }
504
505 kern_return_t
506 default_pager_backing_store_create(
507 MACH_PORT_FACE pager,
508 int priority,
509 int clsize, /* in bytes */
510 MACH_PORT_FACE *backing_store)
511 {
512 backing_store_t bs;
513 MACH_PORT_FACE port;
514 kern_return_t kr;
515 struct vstruct_alias *alias_struct;
516 static char here[] = "default_pager_backing_store_create";
517
518 if (pager != default_pager_default_port)
519 return KERN_INVALID_ARGUMENT;
520
521 bs = backing_store_alloc();
522 port = ipc_port_alloc_kernel();
523 ipc_port_make_send(port);
524 assert (port != IP_NULL);
525
526 DEBUG(DEBUG_BS_EXTERNAL,
527 ("priority=%d clsize=%d bs_port=0x%x\n",
528 priority, clsize, (int) backing_store));
529
530 alias_struct = (struct vstruct_alias *)
531 kalloc(sizeof (struct vstruct_alias));
532 if(alias_struct != NULL) {
533 alias_struct->vs = (struct vstruct *)bs;
534 alias_struct->name = ISVS;
535 port->alias = (int) alias_struct;
536 }
537 else {
538 ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
539 kfree((vm_offset_t)bs, sizeof (struct backing_store));
540 return KERN_RESOURCE_SHORTAGE;
541 }
542
543 bs->bs_port = port;
544 if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI)
545 priority = BS_MAXPRI;
546 else if (priority == BS_NOPRI)
547 priority = BS_MAXPRI;
548 else
549 priority = BS_MINPRI;
550 bs->bs_priority = priority;
551
552 bs->bs_clsize = bs_get_global_clsize(atop(clsize));
553
554 BSL_LOCK();
555 queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
556 bs_links);
557 BSL_UNLOCK();
558
559 backing_store_add(bs);
560
561 *backing_store = port;
562 return KERN_SUCCESS;
563 }
564
565 kern_return_t
566 default_pager_backing_store_info(
567 MACH_PORT_FACE backing_store,
568 backing_store_flavor_t flavour,
569 backing_store_info_t info,
570 mach_msg_type_number_t *size)
571 {
572 backing_store_t bs;
573 backing_store_basic_info_t basic;
574 int i;
575 paging_segment_t ps;
576
577 if (flavour != BACKING_STORE_BASIC_INFO ||
578 *size < BACKING_STORE_BASIC_INFO_COUNT)
579 return KERN_INVALID_ARGUMENT;
580
581 basic = (backing_store_basic_info_t)info;
582 *size = BACKING_STORE_BASIC_INFO_COUNT;
583
584 VSTATS_LOCK(&global_stats.gs_lock);
585 basic->pageout_calls = global_stats.gs_pageout_calls;
586 basic->pagein_calls = global_stats.gs_pagein_calls;
587 basic->pages_in = global_stats.gs_pages_in;
588 basic->pages_out = global_stats.gs_pages_out;
589 basic->pages_unavail = global_stats.gs_pages_unavail;
590 basic->pages_init = global_stats.gs_pages_init;
591 basic->pages_init_writes= global_stats.gs_pages_init_writes;
592 VSTATS_UNLOCK(&global_stats.gs_lock);
593
594 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
595 return KERN_INVALID_ARGUMENT;
596
597 basic->bs_pages_total = bs->bs_pages_total;
598 PSL_LOCK();
599 bs->bs_pages_free = 0;
600 for (i = 0; i <= paging_segment_max; i++) {
601 ps = paging_segments[i];
602 if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) {
603 PS_LOCK(ps);
604 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
605 PS_UNLOCK(ps);
606 }
607 }
608 PSL_UNLOCK();
609 basic->bs_pages_free = bs->bs_pages_free;
610 basic->bs_pages_in = bs->bs_pages_in;
611 basic->bs_pages_in_fail = bs->bs_pages_in_fail;
612 basic->bs_pages_out = bs->bs_pages_out;
613 basic->bs_pages_out_fail= bs->bs_pages_out_fail;
614
615 basic->bs_priority = bs->bs_priority;
616 basic->bs_clsize = ptoa(bs->bs_clsize); /* in bytes */
617
618 BS_UNLOCK(bs);
619
620 return KERN_SUCCESS;
621 }
622
623 int ps_delete(paging_segment_t); /* forward */
624
625 int
626 ps_delete(
627 paging_segment_t ps)
628 {
629 vstruct_t vs;
630 kern_return_t error = KERN_SUCCESS;
631 int vs_count;
632
633 VSL_LOCK(); /* get the lock on the list of vs's */
634
635 /* The lock relationship and sequence is farily complicated */
636 /* this code looks at a live list, locking and unlocking the list */
637 /* as it traverses it. It depends on the locking behavior of */
638 /* default_pager_no_senders. no_senders always locks the vstruct */
639 /* targeted for removal before locking the vstruct list. However */
640 /* it will remove that member of the list without locking its */
641 /* neighbors. We can be sure when we hold a lock on a vstruct */
642 /* it cannot be removed from the list but we must hold the list */
643 /* lock to be sure that its pointers to its neighbors are valid. */
644 /* Also, we can hold off destruction of a vstruct when the list */
645 /* lock and the vs locks are not being held by bumping the */
646 /* vs_async_pending count. */
647
648 /* we will choose instead to hold a send right */
649 vs_count = vstruct_list.vsl_count;
650 vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
651 if(vs == (vstruct_t)&vstruct_list) {
652 VSL_UNLOCK();
653 return KERN_SUCCESS;
654 }
655 VS_LOCK(vs);
656 vs_async_wait(vs); /* wait for any pending async writes */
657 if ((vs_count != 0) && (vs != NULL))
658 vs->vs_async_pending += 1; /* hold parties calling */
659 /* vs_async_wait */
660 VS_UNLOCK(vs);
661 VSL_UNLOCK();
662 while((vs_count != 0) && (vs != NULL)) {
663 /* We take the count of AMO's before beginning the */
664 /* transfer of of the target segment. */
665 /* We are guaranteed that the target segment cannot get */
666 /* more users. We also know that queue entries are */
667 /* made at the back of the list. If some of the entries */
668 /* we would check disappear while we are traversing the */
669 /* list then we will either check new entries which */
670 /* do not have any backing store in the target segment */
671 /* or re-check old entries. This might not be optimal */
672 /* but it will always be correct. The alternative is to */
673 /* take a snapshot of the list. */
674 vstruct_t next_vs;
675
676 if(dp_pages_free < cluster_transfer_minimum)
677 error = KERN_FAILURE;
678 else {
679 vm_object_t transfer_object;
680 upl_t upl;
681
682 transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
683 error = vm_fault_list_request(transfer_object,
684 (vm_object_offset_t)0,
685 VM_SUPER_CLUSTER, &upl, NULL,
686 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
687 | UPL_SET_INTERNAL);
688 if(error == KERN_SUCCESS) {
689 #ifndef ubc_sync_working
690 uc_upl_commit(upl, NULL);
691 error = ps_vstruct_transfer_from_segment(
692 vs, ps, transfer_object);
693 #else
694 error = ps_vstruct_transfer_from_segment(
695 vs, ps, upl);
696 uc_upl_commit(upl, NULL);
697 #endif
698 vm_object_deallocate(transfer_object);
699 } else {
700 vm_object_deallocate(transfer_object);
701 error = KERN_FAILURE;
702 }
703 }
704 if(error) {
705 VS_LOCK(vs);
706 vs->vs_async_pending -= 1; /* release vs_async_wait */
707 if (vs->vs_async_pending == 0) {
708 VS_UNLOCK(vs);
709 thread_wakeup(&vs->vs_waiting_async);
710 } else {
711 VS_UNLOCK(vs);
712 }
713 return KERN_FAILURE;
714 }
715
716 VSL_LOCK();
717 next_vs = (vstruct_t) queue_next(&(vs->vs_links));
718 if((next_vs != (vstruct_t)&vstruct_list) &&
719 (vs != next_vs) && (vs_count != 1)) {
720 VS_LOCK(next_vs);
721 vs_async_wait(next_vs); /* wait for any */
722 /* pending async writes */
723 next_vs->vs_async_pending += 1; /* hold parties */
724 /* calling vs_async_wait */
725 VS_UNLOCK(next_vs);
726 }
727 VSL_UNLOCK();
728 VS_LOCK(vs);
729 vs->vs_async_pending -= 1;
730 if (vs->vs_async_pending == 0) {
731 VS_UNLOCK(vs);
732 thread_wakeup(&vs->vs_waiting_async);
733 } else {
734 VS_UNLOCK(vs);
735 }
736 if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list))
737 vs = NULL;
738 else
739 vs = next_vs;
740 vs_count--;
741 }
742 return KERN_SUCCESS;
743 }
744
745
746 kern_return_t
747 default_pager_backing_store_delete(
748 MACH_PORT_FACE backing_store)
749 {
750 backing_store_t bs;
751 int i;
752 paging_segment_t ps;
753 int error;
754 int interim_pages_removed = 0;
755 kern_return_t kr;
756 static char here[] = "default_pager_backing_store_delete";
757
758 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
759 return KERN_INVALID_ARGUMENT;
760
761 #if 0
762 /* not implemented */
763 BS_UNLOCK(bs);
764 return KERN_FAILURE;
765 #endif
766
767 restart:
768 PSL_LOCK();
769 error = KERN_SUCCESS;
770 for (i = 0; i <= paging_segment_max; i++) {
771 ps = paging_segments[i];
772 if (ps != PAGING_SEGMENT_NULL &&
773 ps->ps_bs == bs &&
774 ! ps->ps_going_away) {
775 PS_LOCK(ps);
776 /* disable access to this segment */
777 ps->ps_going_away = TRUE;
778 PS_UNLOCK(ps);
779 /*
780 * The "ps" segment is "off-line" now,
781 * we can try and delete it...
782 */
783 if(dp_pages_free < (cluster_transfer_minimum
784 + ps->ps_pgcount)) {
785 error = KERN_FAILURE;
786 PSL_UNLOCK();
787 }
788 else {
789 /* remove all pages associated with the */
790 /* segment from the list of free pages */
791 /* when transfer is through, all target */
792 /* segment pages will appear to be free */
793
794 dp_pages_free -= ps->ps_pgcount;
795 interim_pages_removed += ps->ps_pgcount;
796 PSL_UNLOCK();
797 error = ps_delete(ps);
798 }
799 if (error != KERN_SUCCESS) {
800 /*
801 * We couldn't delete the segment,
802 * probably because there's not enough
803 * virtual memory left.
804 * Re-enable all the segments.
805 */
806 PSL_LOCK();
807 break;
808 }
809 goto restart;
810 }
811 }
812
813 if (error != KERN_SUCCESS) {
814 for (i = 0; i <= paging_segment_max; i++) {
815 ps = paging_segments[i];
816 if (ps != PAGING_SEGMENT_NULL &&
817 ps->ps_bs == bs &&
818 ps->ps_going_away) {
819 PS_LOCK(ps);
820 /* re-enable access to this segment */
821 ps->ps_going_away = FALSE;
822 PS_UNLOCK(ps);
823 }
824 }
825 dp_pages_free += interim_pages_removed;
826 PSL_UNLOCK();
827 BS_UNLOCK(bs);
828 return error;
829 }
830
831 for (i = 0; i <= paging_segment_max; i++) {
832 ps = paging_segments[i];
833 if (ps != PAGING_SEGMENT_NULL &&
834 ps->ps_bs == bs) {
835 if(ps->ps_going_away) {
836 paging_segments[i] = PAGING_SEGMENT_NULL;
837 paging_segment_count--;
838 PS_LOCK(ps);
839 kfree((vm_offset_t)ps->ps_bmap,
840 RMAPSIZE(ps->ps_ncls));
841 kfree((vm_offset_t)ps, sizeof *ps);
842 }
843 }
844 }
845
846 /* Scan the entire ps array separately to make certain we find the */
847 /* proper paging_segment_max */
848 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
849 if(paging_segments[i] != PAGING_SEGMENT_NULL)
850 paging_segment_max = i;
851 }
852
853 PSL_UNLOCK();
854
855 /*
856 * All the segments have been deleted.
857 * We can remove the backing store.
858 */
859
860 /*
861 * Disable lookups of this backing store.
862 */
863 if((void *)bs->bs_port->alias != NULL)
864 kfree((vm_offset_t) bs->bs_port->alias,
865 sizeof (struct vstruct_alias));
866 pager_mux_hash_delete((ipc_port_t) (bs->bs_port));
867 ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
868 bs->bs_port = MACH_PORT_NULL;
869 BS_UNLOCK(bs);
870
871 /*
872 * Remove backing store from backing_store list.
873 */
874 BSL_LOCK();
875 queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t,
876 bs_links);
877 BSL_UNLOCK();
878
879 /*
880 * Free the backing store structure.
881 */
882 kfree((vm_offset_t)bs, sizeof *bs);
883
884 return KERN_SUCCESS;
885 }
886
887 int ps_enter(paging_segment_t); /* forward */
888
889 int
890 ps_enter(
891 paging_segment_t ps)
892 {
893 int i;
894
895 PSL_LOCK();
896
897 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
898 if (paging_segments[i] == PAGING_SEGMENT_NULL)
899 break;
900 }
901
902 if (i < MAX_NUM_PAGING_SEGMENTS) {
903 paging_segments[i] = ps;
904 if (i > paging_segment_max)
905 paging_segment_max = i;
906 paging_segment_count++;
907 if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) ||
908 (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI))
909 ps_select_array[ps->ps_bs->bs_priority] = 0;
910 i = 0;
911 } else {
912 PSL_UNLOCK();
913 return KERN_RESOURCE_SHORTAGE;
914 }
915
916 PSL_UNLOCK();
917 return i;
918 }
919
920 #ifdef DEVICE_PAGING
921 kern_return_t
922 default_pager_add_segment(
923 MACH_PORT_FACE backing_store,
924 MACH_PORT_FACE device,
925 recnum_t offset,
926 recnum_t count,
927 int record_size)
928 {
929 backing_store_t bs;
930 paging_segment_t ps;
931 int i;
932 int error;
933 static char here[] = "default_pager_add_segment";
934
935 if ((bs = backing_store_lookup(backing_store))
936 == BACKING_STORE_NULL)
937 return KERN_INVALID_ARGUMENT;
938
939 PSL_LOCK();
940 for (i = 0; i <= paging_segment_max; i++) {
941 ps = paging_segments[i];
942 if (ps == PAGING_SEGMENT_NULL)
943 continue;
944
945 /*
946 * Check for overlap on same device.
947 */
948 if (!(ps->ps_device != device
949 || offset >= ps->ps_offset + ps->ps_recnum
950 || offset + count <= ps->ps_offset)) {
951 PSL_UNLOCK();
952 BS_UNLOCK(bs);
953 return KERN_INVALID_ARGUMENT;
954 }
955 }
956 PSL_UNLOCK();
957
958 /*
959 * Set up the paging segment
960 */
961 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
962 if (ps == PAGING_SEGMENT_NULL) {
963 BS_UNLOCK(bs);
964 return KERN_RESOURCE_SHORTAGE;
965 }
966
967 ps->ps_segtype = PS_PARTITION;
968 ps->ps_device = device;
969 ps->ps_offset = offset;
970 ps->ps_record_shift = local_log2(vm_page_size / record_size);
971 ps->ps_recnum = count;
972 ps->ps_pgnum = count >> ps->ps_record_shift;
973
974 ps->ps_pgcount = ps->ps_pgnum;
975 ps->ps_clshift = local_log2(bs->bs_clsize);
976 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
977 ps->ps_hint = 0;
978
979 PS_LOCK_INIT(ps);
980 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
981 if (!ps->ps_bmap) {
982 kfree((vm_offset_t)ps, sizeof *ps);
983 BS_UNLOCK(bs);
984 return KERN_RESOURCE_SHORTAGE;
985 }
986 for (i = 0; i < ps->ps_ncls; i++) {
987 clrbit(ps->ps_bmap, i);
988 }
989
990 ps->ps_going_away = FALSE;
991 ps->ps_bs = bs;
992
993 if ((error = ps_enter(ps)) != 0) {
994 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
995 kfree((vm_offset_t)ps, sizeof *ps);
996 BS_UNLOCK(bs);
997 return KERN_RESOURCE_SHORTAGE;
998 }
999
1000 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
1001 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
1002 BS_UNLOCK(bs);
1003
1004 PSL_LOCK();
1005 dp_pages_free += ps->ps_pgcount;
1006 PSL_UNLOCK();
1007
1008 bs_more_space(ps->ps_clcount);
1009
1010 DEBUG(DEBUG_BS_INTERNAL,
1011 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1012 device, offset, count, record_size,
1013 ps->ps_record_shift, ps->ps_pgnum));
1014
1015 return KERN_SUCCESS;
1016 }
1017
1018 boolean_t
1019 bs_add_device(
1020 char *dev_name,
1021 MACH_PORT_FACE master)
1022 {
1023 security_token_t null_security_token = {
1024 { 0, 0 }
1025 };
1026 MACH_PORT_FACE device;
1027 int info[DEV_GET_SIZE_COUNT];
1028 mach_msg_type_number_t info_count;
1029 MACH_PORT_FACE bs = MACH_PORT_NULL;
1030 unsigned int rec_size;
1031 recnum_t count;
1032 int clsize;
1033 MACH_PORT_FACE reply_port;
1034
1035 if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE,
1036 null_security_token, dev_name, &device))
1037 return FALSE;
1038
1039 info_count = DEV_GET_SIZE_COUNT;
1040 if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) {
1041 rec_size = info[DEV_GET_SIZE_RECORD_SIZE];
1042 count = info[DEV_GET_SIZE_DEVICE_SIZE] / rec_size;
1043 clsize = bs_get_global_clsize(0);
1044 if (!default_pager_backing_store_create(
1045 default_pager_default_port,
1046 DEFAULT_PAGER_BACKING_STORE_MAXPRI,
1047 (clsize * vm_page_size),
1048 &bs)) {
1049 if (!default_pager_add_segment(bs, device,
1050 0, count, rec_size)) {
1051 return TRUE;
1052 }
1053 ipc_port_release_receive(bs);
1054 }
1055 }
1056
1057 ipc_port_release_send(device);
1058 return FALSE;
1059 }
1060 #endif /* DEVICE_PAGING */
1061
1062 #if VS_ASYNC_REUSE
1063
1064 struct vs_async *
1065 vs_alloc_async(void)
1066 {
1067 struct vs_async *vsa;
1068 MACH_PORT_FACE reply_port;
1069 kern_return_t kr;
1070
1071 VS_ASYNC_LOCK();
1072 if (vs_async_free_list == NULL) {
1073 VS_ASYNC_UNLOCK();
1074 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1075 if (vsa != NULL) {
1076 /*
1077 * Try allocating a reply port named after the
1078 * address of the vs_async structure.
1079 */
1080 struct vstruct_alias *alias_struct;
1081
1082 reply_port = ipc_port_alloc_kernel();
1083 alias_struct = (struct vstruct_alias *)
1084 kalloc(sizeof (struct vstruct_alias));
1085 if(alias_struct != NULL) {
1086 alias_struct->vs = (struct vstruct *)vsa;
1087 alias_struct->name = ISVS;
1088 reply_port->alias = (int) alias_struct;
1089 vsa->reply_port = reply_port;
1090 vs_alloc_async_count++;
1091 }
1092 else {
1093 vs_alloc_async_failed++;
1094 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1095 (reply_port));
1096 kfree((vm_offset_t)vsa,
1097 sizeof (struct vs_async));
1098 vsa = NULL;
1099 }
1100 }
1101 } else {
1102 vsa = vs_async_free_list;
1103 vs_async_free_list = vs_async_free_list->vsa_next;
1104 VS_ASYNC_UNLOCK();
1105 }
1106
1107 return vsa;
1108 }
1109
1110 void
1111 vs_free_async(
1112 struct vs_async *vsa)
1113 {
1114 VS_ASYNC_LOCK();
1115 vsa->vsa_next = vs_async_free_list;
1116 vs_async_free_list = vsa;
1117 VS_ASYNC_UNLOCK();
1118 }
1119
1120 #else /* VS_ASYNC_REUSE */
1121
1122 struct vs_async *
1123 vs_alloc_async(void)
1124 {
1125 struct vs_async *vsa;
1126 MACH_PORT_FACE reply_port;
1127 kern_return_t kr;
1128
1129 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1130 if (vsa != NULL) {
1131 /*
1132 * Try allocating a reply port named after the
1133 * address of the vs_async structure.
1134 */
1135 reply_port = ipc_port_alloc_kernel();
1136 alias_struct = (vstruct_alias *)
1137 kalloc(sizeof (struct vstruct_alias));
1138 if(alias_struct != NULL) {
1139 alias_struct->vs = reply_port;
1140 alias_struct->name = ISVS;
1141 reply_port->alias = (int) vsa;
1142 vsa->reply_port = reply_port;
1143 vs_alloc_async_count++;
1144 }
1145 else {
1146 vs_alloc_async_failed++;
1147 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1148 (reply_port));
1149 kfree((vm_offset_t) vsa,
1150 sizeof (struct vs_async));
1151 vsa = NULL;
1152 }
1153 }
1154
1155 return vsa;
1156 }
1157
1158 void
1159 vs_free_async(
1160 struct vs_async *vsa)
1161 {
1162 static char here[] = "vs_free_async";
1163 MACH_PORT_FACE reply_port;
1164 kern_return_t kr;
1165
1166 reply_port = vsa->reply_port;
1167 kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
1168 kfree((vm_offset_t) vsa, sizeof (struct vs_async));
1169 pager_mux_hash_delete(reply_port);
1170 ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
1171 #if 0
1172 VS_ASYNC_LOCK();
1173 vs_alloc_async_count--;
1174 VS_ASYNC_UNLOCK();
1175 #endif
1176 }
1177
1178 #endif /* VS_ASYNC_REUSE */
1179
1180 vstruct_t
1181 ps_vstruct_create(
1182 vm_size_t size)
1183 {
1184 vstruct_t vs;
1185 int i;
1186 static char here[] = "ps_vstruct_create";
1187
1188 vs = (vstruct_t) kalloc(sizeof (struct vstruct));
1189 if (vs == VSTRUCT_NULL) {
1190 return VSTRUCT_NULL;
1191 }
1192
1193 VS_LOCK_INIT(vs);
1194
1195 /*
1196 * The following fields will be provided later.
1197 */
1198 vs->vs_mem_obj_port = MACH_PORT_NULL;
1199 vs->vs_seqno = 0;
1200 vs->vs_control_port = MACH_PORT_NULL;
1201 vs->vs_control_refs = 0;
1202 vs->vs_object_name = MACH_PORT_NULL;
1203 vs->vs_name_refs = 0;
1204
1205 #ifdef MACH_KERNEL
1206 vs->vs_waiting_seqno = FALSE;
1207 vs->vs_waiting_read = FALSE;
1208 vs->vs_waiting_write = FALSE;
1209 vs->vs_waiting_refs = FALSE;
1210 vs->vs_waiting_async = FALSE;
1211 #else
1212 mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
1213 mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD);
1214 mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE);
1215 mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS);
1216 mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC);
1217 #endif
1218
1219 vs->vs_readers = 0;
1220 vs->vs_writers = 0;
1221
1222 vs->vs_errors = 0;
1223
1224 vs->vs_clshift = local_log2(bs_get_global_clsize(0));
1225 vs->vs_size = ((atop(round_page(size)) - 1) >> vs->vs_clshift) + 1;
1226 vs->vs_async_pending = 0;
1227
1228 /*
1229 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1230 * depending on the size of the memory object.
1231 */
1232 if (INDIRECT_CLMAP(vs->vs_size)) {
1233 vs->vs_imap = (struct vs_map **)
1234 kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size));
1235 vs->vs_indirect = TRUE;
1236 } else {
1237 vs->vs_dmap = (struct vs_map *)
1238 kalloc(CLMAP_SIZE(vs->vs_size));
1239 vs->vs_indirect = FALSE;
1240 }
1241 vs->vs_xfer_pending = FALSE;
1242 DEBUG(DEBUG_VS_INTERNAL,
1243 ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
1244
1245 /*
1246 * Check to see that we got the space.
1247 */
1248 if (!vs->vs_dmap) {
1249 kfree((vm_offset_t)vs, sizeof *vs);
1250 return VSTRUCT_NULL;
1251 }
1252
1253 /*
1254 * Zero the indirect pointers, or clear the direct pointers.
1255 */
1256 if (vs->vs_indirect)
1257 memset(vs->vs_imap, 0,
1258 INDIRECT_CLMAP_SIZE(vs->vs_size));
1259 else
1260 for (i = 0; i < vs->vs_size; i++)
1261 VSM_CLR(vs->vs_dmap[i]);
1262
1263 VS_MAP_LOCK_INIT(vs);
1264
1265 bs_commit(vs->vs_size);
1266
1267 return vs;
1268 }
1269
1270 paging_segment_t ps_select_segment(int, int *); /* forward */
1271
1272 paging_segment_t
1273 ps_select_segment(
1274 int shift,
1275 int *psindex)
1276 {
1277 paging_segment_t ps;
1278 int i;
1279 int j;
1280 static char here[] = "ps_select_segment";
1281
1282 /*
1283 * Optimize case where there's only one segment.
1284 * paging_segment_max will index the one and only segment.
1285 */
1286
1287 PSL_LOCK();
1288 if (paging_segment_count == 1) {
1289 paging_segment_t lps; /* used to avoid extra PS_UNLOCK */
1290
1291 ps = paging_segments[paging_segment_max];
1292 *psindex = paging_segment_max;
1293 PS_LOCK(ps);
1294 if (ps->ps_going_away) {
1295 /* this segment is being turned off */
1296 lps = PAGING_SEGMENT_NULL;
1297 } else {
1298 ASSERT(ps->ps_clshift >= shift);
1299 if (ps->ps_clcount) {
1300 ps->ps_clcount--;
1301 dp_pages_free -= 1 << ps->ps_clshift;
1302 if(min_pages_trigger_port &&
1303 (dp_pages_free < minimum_pages_remaining)) {
1304 default_pager_space_alert(
1305 min_pages_trigger_port,
1306 HI_WAT_ALERT);
1307 min_pages_trigger_port = NULL;
1308 bs_low = TRUE;
1309 }
1310 lps = ps;
1311 } else
1312 lps = PAGING_SEGMENT_NULL;
1313 }
1314 PS_UNLOCK(ps);
1315 PSL_UNLOCK();
1316 return lps;
1317 }
1318
1319 if (paging_segment_count == 0) {
1320 PSL_UNLOCK();
1321 return PAGING_SEGMENT_NULL;
1322 }
1323
1324 for (i = BS_MAXPRI;
1325 i >= BS_MINPRI; i--) {
1326 int start_index;
1327
1328 if ((ps_select_array[i] == BS_NOPRI) ||
1329 (ps_select_array[i] == BS_FULLPRI))
1330 continue;
1331 start_index = ps_select_array[i];
1332
1333 if(!(paging_segments[start_index])) {
1334 j = start_index+1;
1335 physical_transfer_cluster_count = 0;
1336 }
1337 else if ((physical_transfer_cluster_count+1) == (MAXPHYS >>
1338 (((paging_segments[start_index])->ps_clshift)
1339 + page_shift))) {
1340 physical_transfer_cluster_count = 0;
1341 j = start_index + 1;
1342 } else {
1343 physical_transfer_cluster_count+=1;
1344 j = start_index;
1345 if(start_index == 0)
1346 start_index = paging_segment_max;
1347 else
1348 start_index = start_index - 1;
1349 }
1350
1351 while (1) {
1352 if (j > paging_segment_max)
1353 j = 0;
1354 if ((ps = paging_segments[j]) &&
1355 (ps->ps_bs->bs_priority == i)) {
1356 /*
1357 * Force the ps cluster size to be
1358 * >= that of the vstruct.
1359 */
1360 PS_LOCK(ps);
1361 if (ps->ps_going_away) {
1362 /* this segment is being turned off */
1363 } else if ((ps->ps_clcount) &&
1364 (ps->ps_clshift >= shift)) {
1365 ps->ps_clcount--;
1366 dp_pages_free -= 1 << ps->ps_clshift;
1367 if(min_pages_trigger_port &&
1368 (dp_pages_free <
1369 minimum_pages_remaining)) {
1370 default_pager_space_alert(
1371 min_pages_trigger_port,
1372 HI_WAT_ALERT);
1373 min_pages_trigger_port = NULL;
1374 }
1375 PS_UNLOCK(ps);
1376 /*
1377 * found one, quit looking.
1378 */
1379 ps_select_array[i] = j;
1380 PSL_UNLOCK();
1381 *psindex = j;
1382 return ps;
1383 }
1384 PS_UNLOCK(ps);
1385 }
1386 if (j == start_index) {
1387 /*
1388 * none at this priority -- mark it full
1389 */
1390 ps_select_array[i] = BS_FULLPRI;
1391 break;
1392 }
1393 j++;
1394 }
1395 }
1396 PSL_UNLOCK();
1397 return PAGING_SEGMENT_NULL;
1398 }
1399
1400 vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
1401
1402 vm_offset_t
1403 ps_allocate_cluster(
1404 vstruct_t vs,
1405 int *psindex,
1406 paging_segment_t use_ps)
1407 {
1408 int byte_num;
1409 int bit_num = 0;
1410 paging_segment_t ps;
1411 vm_offset_t cluster;
1412 static char here[] = "ps_allocate_cluster";
1413
1414 /*
1415 * Find best paging segment.
1416 * ps_select_segment will decrement cluster count on ps.
1417 * Must pass cluster shift to find the most appropriate segment.
1418 */
1419 /* NOTE: The addition of paging segment delete capability threatened
1420 * to seriously complicate the treatment of paging segments in this
1421 * module and the ones that call it (notably ps_clmap), because of the
1422 * difficulty in assuring that the paging segment would continue to
1423 * exist between being unlocked and locked. This was
1424 * avoided because all calls to this module are based in either
1425 * dp_memory_object calls which rely on the vs lock, or by
1426 * the transfer function which is part of the segment delete path.
1427 * The transfer function which is part of paging segment delete is
1428 * protected from multiple callers by the backing store lock.
1429 * The paging segment delete function treats mappings to a paging
1430 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1431 * while data is transferred to the remaining segments. This is in
1432 * line with the view that incomplete or in-transition mappings between
1433 * data, a vstruct, and backing store are protected by the vs lock.
1434 * This and the ordering of the paging segment "going_away" bit setting
1435 * protects us.
1436 */
1437 if (use_ps != PAGING_SEGMENT_NULL) {
1438 ps = use_ps;
1439 PSL_LOCK();
1440 PS_LOCK(ps);
1441 ps->ps_clcount--;
1442 dp_pages_free -= 1 << ps->ps_clshift;
1443 PSL_UNLOCK();
1444 if(min_pages_trigger_port &&
1445 (dp_pages_free < minimum_pages_remaining)) {
1446 default_pager_space_alert(
1447 min_pages_trigger_port,
1448 HI_WAT_ALERT);
1449 min_pages_trigger_port = NULL;
1450 }
1451 PS_UNLOCK(ps);
1452 } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
1453 PAGING_SEGMENT_NULL) {
1454 #if 0
1455 bs_no_paging_space(TRUE);
1456 #endif
1457 #if 0
1458 if (verbose)
1459 #endif
1460 dprintf(("no space in available paging segments; "
1461 "swapon suggested\n"));
1462 /* the count got off maybe, reset to zero */
1463 dp_pages_free = 0;
1464 if(min_pages_trigger_port) {
1465 default_pager_space_alert(
1466 min_pages_trigger_port, HI_WAT_ALERT);
1467 min_pages_trigger_port = NULL;
1468 bs_low = TRUE;
1469 }
1470 return (vm_offset_t) -1;
1471 }
1472 ASSERT(ps->ps_clcount != 0);
1473
1474 /*
1475 * Look for an available cluster. At the end of the loop,
1476 * byte_num is the byte offset and bit_num is the bit offset of the
1477 * first zero bit in the paging segment bitmap.
1478 */
1479 PS_LOCK(ps);
1480 byte_num = ps->ps_hint;
1481 for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) {
1482 if (*(ps->ps_bmap + byte_num) != BYTEMASK) {
1483 for (bit_num = 0; bit_num < NBBY; bit_num++) {
1484 if (isclr((ps->ps_bmap + byte_num), bit_num))
1485 break;
1486 }
1487 ASSERT(bit_num != NBBY);
1488 break;
1489 }
1490 }
1491 ps->ps_hint = byte_num;
1492 cluster = (byte_num*NBBY) + bit_num;
1493
1494 /* Space was reserved, so this must be true */
1495 ASSERT(cluster < ps->ps_ncls);
1496
1497 setbit(ps->ps_bmap, cluster);
1498 PS_UNLOCK(ps);
1499
1500 return cluster;
1501 }
1502
1503 void ps_deallocate_cluster(paging_segment_t, vm_offset_t); /* forward */
1504
1505 void
1506 ps_deallocate_cluster(
1507 paging_segment_t ps,
1508 vm_offset_t cluster)
1509 {
1510
1511 if (cluster >= (vm_offset_t) ps->ps_ncls)
1512 panic("ps_deallocate_cluster: Invalid cluster number");
1513
1514 /*
1515 * Lock the paging segment, clear the cluster's bitmap and increment the
1516 * number of free cluster.
1517 */
1518 PSL_LOCK();
1519 PS_LOCK(ps);
1520 clrbit(ps->ps_bmap, cluster);
1521 ++ps->ps_clcount;
1522 dp_pages_free += 1 << ps->ps_clshift;
1523 PSL_UNLOCK();
1524 if(max_pages_trigger_port && (dp_pages_free > maximum_pages_free)) {
1525 default_pager_space_alert(max_pages_trigger_port, LO_WAT_ALERT);
1526 max_pages_trigger_port = NULL;
1527 }
1528
1529 /*
1530 * Move the hint down to the freed cluster if it is
1531 * less than the current hint.
1532 */
1533 if ((cluster/NBBY) < ps->ps_hint) {
1534 ps->ps_hint = (cluster/NBBY);
1535 }
1536
1537 PS_UNLOCK(ps);
1538
1539 /*
1540 * If we're freeing space on a full priority, reset the array.
1541 */
1542 PSL_LOCK();
1543 if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
1544 ps_select_array[ps->ps_bs->bs_priority] = 0;
1545 PSL_UNLOCK();
1546
1547 return;
1548 }
1549
1550 void ps_dealloc_vsmap(struct vs_map *, vm_size_t); /* forward */
1551
1552 void
1553 ps_dealloc_vsmap(
1554 struct vs_map *vsmap,
1555 vm_size_t size)
1556 {
1557 int i;
1558 for (i = 0; i < size; i++)
1559 if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
1560 ps_deallocate_cluster(VSM_PS(vsmap[i]),
1561 VSM_CLOFF(vsmap[i]));
1562 }
1563
1564 void
1565 ps_vstruct_dealloc(
1566 vstruct_t vs)
1567 {
1568 int i;
1569 spl_t s;
1570 static char here[] = "ps_vstruct_dealloc";
1571
1572 VS_MAP_LOCK(vs);
1573
1574 /*
1575 * If this is an indirect structure, then we walk through the valid
1576 * (non-zero) indirect pointers and deallocate the clusters
1577 * associated with each used map entry (via ps_dealloc_vsmap).
1578 * When all of the clusters in an indirect block have been
1579 * freed, we deallocate the block. When all of the indirect
1580 * blocks have been deallocated we deallocate the memory
1581 * holding the indirect pointers.
1582 */
1583 if (vs->vs_indirect) {
1584 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
1585 if (vs->vs_imap[i] != NULL) {
1586 ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
1587 kfree((vm_offset_t)vs->vs_imap[i],
1588 CLMAP_THRESHOLD);
1589 }
1590 }
1591 kfree((vm_offset_t)vs->vs_imap,
1592 INDIRECT_CLMAP_SIZE(vs->vs_size));
1593 } else {
1594 /*
1595 * Direct map. Free used clusters, then memory.
1596 */
1597 ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
1598 kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
1599 }
1600 VS_MAP_UNLOCK(vs);
1601
1602 bs_commit(- vs->vs_size);
1603
1604 ip_lock(vs_to_port(vs));
1605 (vs_to_port(vs))->ip_destination = 0;
1606 (vs_to_port(vs))->ip_receiver_name = MACH_PORT_NULL;
1607
1608 s= splsched();
1609 imq_lock(&vs_to_port(vs)->ip_messages);
1610 (vs_to_port(vs))->ip_mscount = 0;
1611 (vs_to_port(vs))->ip_messages.imq_seqno = 0;
1612 imq_unlock(&vs_to_port(vs)->ip_messages);
1613 splx(s);
1614
1615 ip_unlock(vs_to_port(vs));
1616 pager_mux_hash_delete((ipc_port_t) vs_to_port(vs));
1617 ipc_port_release_receive(vs_to_port(vs));
1618 /*
1619 * Do this *after* deallocating the port name
1620 */
1621 kfree((vm_offset_t)vs, sizeof *vs);
1622 }
1623
1624 int ps_map_extend(vstruct_t, int); /* forward */
1625
1626 int ps_map_extend(
1627 vstruct_t vs,
1628 int new_size)
1629 {
1630 struct vs_map **new_imap;
1631 struct vs_map *new_dmap = NULL;
1632 int newdsize;
1633 int i;
1634 void *old_map = NULL;
1635 int old_map_size = 0;
1636
1637 if (vs->vs_size >= new_size) {
1638 /*
1639 * Someone has already done the work.
1640 */
1641 return 0;
1642 }
1643
1644 /*
1645 * If the new size extends into the indirect range, then we have one
1646 * of two cases: we are going from indirect to indirect, or we are
1647 * going from direct to indirect. If we are going from indirect to
1648 * indirect, then it is possible that the new size will fit in the old
1649 * indirect map. If this is the case, then just reset the size of the
1650 * vstruct map and we are done. If the new size will not
1651 * fit into the old indirect map, then we have to allocate a new
1652 * indirect map and copy the old map pointers into this new map.
1653 *
1654 * If we are going from direct to indirect, then we have to allocate a
1655 * new indirect map and copy the old direct pages into the first
1656 * indirect page of the new map.
1657 * NOTE: allocating memory here is dangerous, as we're in the
1658 * pageout path.
1659 */
1660 if (INDIRECT_CLMAP(new_size)) {
1661 int new_map_size = INDIRECT_CLMAP_SIZE(new_size);
1662
1663 /*
1664 * Get a new indirect map and zero it.
1665 */
1666 old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size);
1667 if (vs->vs_indirect &&
1668 (new_map_size == old_map_size)) {
1669 bs_commit(new_size - vs->vs_size);
1670 vs->vs_size = new_size;
1671 return 0;
1672 }
1673
1674 new_imap = (struct vs_map **)kalloc(new_map_size);
1675 if (new_imap == NULL) {
1676 return -1;
1677 }
1678 memset(new_imap, 0, new_map_size);
1679
1680 if (vs->vs_indirect) {
1681 /* Copy old entries into new map */
1682 memcpy(new_imap, vs->vs_imap, old_map_size);
1683 /* Arrange to free the old map */
1684 old_map = (void *) vs->vs_imap;
1685 newdsize = 0;
1686 } else { /* Old map was a direct map */
1687 /* Allocate an indirect page */
1688 if ((new_imap[0] = (struct vs_map *)
1689 kalloc(CLMAP_THRESHOLD)) == NULL) {
1690 kfree((vm_offset_t)new_imap, new_map_size);
1691 return -1;
1692 }
1693 new_dmap = new_imap[0];
1694 newdsize = CLMAP_ENTRIES;
1695 }
1696 } else {
1697 new_imap = NULL;
1698 newdsize = new_size;
1699 /*
1700 * If the new map is a direct map, then the old map must
1701 * also have been a direct map. All we have to do is
1702 * to allocate a new direct map, copy the old entries
1703 * into it and free the old map.
1704 */
1705 if ((new_dmap = (struct vs_map *)
1706 kalloc(CLMAP_SIZE(new_size))) == NULL) {
1707 return -1;
1708 }
1709 }
1710 if (newdsize) {
1711
1712 /* Free the old map */
1713 old_map = (void *) vs->vs_dmap;
1714 old_map_size = CLMAP_SIZE(vs->vs_size);
1715
1716 /* Copy info from the old map into the new map */
1717 memcpy(new_dmap, vs->vs_dmap, old_map_size);
1718
1719 /* Initialize the rest of the new map */
1720 for (i = vs->vs_size; i < newdsize; i++)
1721 VSM_CLR(new_dmap[i]);
1722 }
1723 if (new_imap) {
1724 vs->vs_imap = new_imap;
1725 vs->vs_indirect = TRUE;
1726 } else
1727 vs->vs_dmap = new_dmap;
1728 bs_commit(new_size - vs->vs_size);
1729 vs->vs_size = new_size;
1730 if (old_map)
1731 kfree((vm_offset_t)old_map, old_map_size);
1732 return 0;
1733 }
1734
1735 vm_offset_t
1736 ps_clmap(
1737 vstruct_t vs,
1738 vm_offset_t offset,
1739 struct clmap *clmap,
1740 int flag,
1741 vm_size_t size,
1742 int error)
1743 {
1744 vm_offset_t cluster; /* The cluster of offset. */
1745 vm_offset_t newcl; /* The new cluster allocated. */
1746 vm_offset_t newoff;
1747 int i;
1748 struct vs_map *vsmap;
1749 static char here[] = "ps_clmap";
1750
1751 VS_MAP_LOCK(vs);
1752
1753 ASSERT(vs->vs_dmap);
1754 cluster = atop(offset) >> vs->vs_clshift;
1755
1756 /*
1757 * Initialize cluster error value
1758 */
1759 clmap->cl_error = 0;
1760
1761 /*
1762 * If the object has grown, extend the page map.
1763 */
1764 if (cluster >= vs->vs_size) {
1765 if (flag == CL_FIND) {
1766 /* Do not allocate if just doing a lookup */
1767 VS_MAP_UNLOCK(vs);
1768 return (vm_offset_t) -1;
1769 }
1770 if (ps_map_extend(vs, cluster + 1)) {
1771 VS_MAP_UNLOCK(vs);
1772 return (vm_offset_t) -1;
1773 }
1774 }
1775
1776 /*
1777 * Look for the desired cluster. If the map is indirect, then we
1778 * have a two level lookup. First find the indirect block, then
1779 * find the actual cluster. If the indirect block has not yet
1780 * been allocated, then do so. If the cluster has not yet been
1781 * allocated, then do so.
1782 *
1783 * If any of the allocations fail, then return an error.
1784 * Don't allocate if just doing a lookup.
1785 */
1786 if (vs->vs_indirect) {
1787 long ind_block = cluster/CLMAP_ENTRIES;
1788
1789 /* Is the indirect block allocated? */
1790 vsmap = vs->vs_imap[ind_block];
1791 if (vsmap == NULL) {
1792 if (flag == CL_FIND) {
1793 VS_MAP_UNLOCK(vs);
1794 return (vm_offset_t) -1;
1795 }
1796
1797 /* Allocate the indirect block */
1798 vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
1799 if (vsmap == NULL) {
1800 VS_MAP_UNLOCK(vs);
1801 return (vm_offset_t) -1;
1802 }
1803 /* Initialize the cluster offsets */
1804 for (i = 0; i < CLMAP_ENTRIES; i++)
1805 VSM_CLR(vsmap[i]);
1806 vs->vs_imap[ind_block] = vsmap;
1807 }
1808 } else
1809 vsmap = vs->vs_dmap;
1810
1811 ASSERT(vsmap);
1812 vsmap += cluster%CLMAP_ENTRIES;
1813
1814 /*
1815 * At this point, vsmap points to the struct vs_map desired.
1816 *
1817 * Look in the map for the cluster, if there was an error on a
1818 * previous write, flag it and return. If it is not yet
1819 * allocated, then allocate it, if we're writing; if we're
1820 * doing a lookup and the cluster's not allocated, return error.
1821 */
1822 if (VSM_ISERR(*vsmap)) {
1823 clmap->cl_error = VSM_GETERR(*vsmap);
1824 VS_MAP_UNLOCK(vs);
1825 return (vm_offset_t) -1;
1826 } else if (VSM_ISCLR(*vsmap)) {
1827 int psindex;
1828
1829 if (flag == CL_FIND) {
1830 /*
1831 * If there's an error and the entry is clear, then
1832 * we've run out of swap space. Record the error
1833 * here and return.
1834 */
1835 if (error) {
1836 VSM_SETERR(*vsmap, error);
1837 }
1838 VS_MAP_UNLOCK(vs);
1839 return (vm_offset_t) -1;
1840 } else {
1841 /*
1842 * Attempt to allocate a cluster from the paging segment
1843 */
1844 newcl = ps_allocate_cluster(vs, &psindex,
1845 PAGING_SEGMENT_NULL);
1846 if (newcl == -1) {
1847 VS_MAP_UNLOCK(vs);
1848 return (vm_offset_t) -1;
1849 }
1850 VSM_CLR(*vsmap);
1851 VSM_SETCLOFF(*vsmap, newcl);
1852 VSM_SETPS(*vsmap, psindex);
1853 }
1854 } else
1855 newcl = VSM_CLOFF(*vsmap);
1856
1857 /*
1858 * Fill in pertinent fields of the clmap
1859 */
1860 clmap->cl_ps = VSM_PS(*vsmap);
1861 clmap->cl_numpages = VSCLSIZE(vs);
1862 clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap);
1863
1864 /*
1865 * Byte offset in paging segment is byte offset to cluster plus
1866 * byte offset within cluster. It looks ugly, but should be
1867 * relatively quick.
1868 */
1869 ASSERT(trunc_page(offset) == offset);
1870 newcl = ptoa(newcl) << vs->vs_clshift;
1871 newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
1872 if (flag == CL_ALLOC) {
1873 /*
1874 * set bits in the allocation bitmap according to which
1875 * pages were requested. size is in bytes.
1876 */
1877 i = atop(newoff);
1878 while ((size > 0) && (i < VSCLSIZE(vs))) {
1879 VSM_SETALLOC(*vsmap, i);
1880 i++;
1881 size -= vm_page_size;
1882 }
1883 }
1884 clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap);
1885 if (newoff) {
1886 /*
1887 * Offset is not cluster aligned, so number of pages
1888 * and bitmaps must be adjusted
1889 */
1890 clmap->cl_numpages -= atop(newoff);
1891 CLMAP_SHIFT(clmap, vs);
1892 CLMAP_SHIFTALLOC(clmap, vs);
1893 }
1894
1895 /*
1896 *
1897 * The setting of valid bits and handling of write errors
1898 * must be done here, while we hold the lock on the map.
1899 * It logically should be done in ps_vs_write_complete().
1900 * The size and error information has been passed from
1901 * ps_vs_write_complete(). If the size parameter is non-zero,
1902 * then there is work to be done. If error is also non-zero,
1903 * then the error number is recorded in the cluster and the
1904 * entire cluster is in error.
1905 */
1906 if (size && flag == CL_FIND) {
1907 vm_offset_t off = (vm_offset_t) 0;
1908
1909 if (!error) {
1910 for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
1911 i++) {
1912 VSM_SETPG(*vsmap, i);
1913 size -= vm_page_size;
1914 }
1915 ASSERT(i <= VSCLSIZE(vs));
1916 } else {
1917 BS_STAT(clmap->cl_ps->ps_bs,
1918 clmap->cl_ps->ps_bs->bs_pages_out_fail +=
1919 atop(size));
1920 off = VSM_CLOFF(*vsmap);
1921 VSM_SETERR(*vsmap, error);
1922 }
1923 /*
1924 * Deallocate cluster if error, and no valid pages
1925 * already present.
1926 */
1927 if (off != (vm_offset_t) 0)
1928 ps_deallocate_cluster(clmap->cl_ps, off);
1929 VS_MAP_UNLOCK(vs);
1930 return (vm_offset_t) 0;
1931 } else
1932 VS_MAP_UNLOCK(vs);
1933
1934 DEBUG(DEBUG_VS_INTERNAL,
1935 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1936 newcl+newoff, (int) vs, (int) vsmap, flag));
1937 DEBUG(DEBUG_VS_INTERNAL,
1938 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1939 (int) clmap->cl_ps, clmap->cl_numpages,
1940 (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
1941
1942 return (newcl + newoff);
1943 }
1944
1945 void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t); /* forward */
1946
1947 void
1948 ps_clunmap(
1949 vstruct_t vs,
1950 vm_offset_t offset,
1951 vm_size_t length)
1952 {
1953 vm_offset_t cluster; /* The cluster number of offset */
1954 struct vs_map *vsmap;
1955 static char here[] = "ps_clunmap";
1956
1957 VS_MAP_LOCK(vs);
1958
1959 /*
1960 * Loop through all clusters in this range, freeing paging segment
1961 * clusters and map entries as encountered.
1962 */
1963 while (length > 0) {
1964 vm_offset_t newoff;
1965 int i;
1966
1967 cluster = atop(offset) >> vs->vs_clshift;
1968 if (vs->vs_indirect) /* indirect map */
1969 vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
1970 else
1971 vsmap = vs->vs_dmap;
1972 if (vsmap == NULL) {
1973 VS_MAP_UNLOCK(vs);
1974 return;
1975 }
1976 vsmap += cluster%CLMAP_ENTRIES;
1977 if (VSM_ISCLR(*vsmap)) {
1978 length -= vm_page_size;
1979 offset += vm_page_size;
1980 continue;
1981 }
1982 /*
1983 * We've got a valid mapping. Clear it and deallocate
1984 * paging segment cluster pages.
1985 * Optimize for entire cluster cleraing.
1986 */
1987 if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) {
1988 /*
1989 * Not cluster aligned.
1990 */
1991 ASSERT(trunc_page(newoff) == newoff);
1992 i = atop(newoff);
1993 } else
1994 i = 0;
1995 while ((i < VSCLSIZE(vs)) && (length > 0)) {
1996 VSM_CLRPG(*vsmap, i);
1997 VSM_CLRALLOC(*vsmap, i);
1998 length -= vm_page_size;
1999 offset += vm_page_size;
2000 i++;
2001 }
2002
2003 /*
2004 * If map entry is empty, clear and deallocate cluster.
2005 */
2006 if (!VSM_ALLOC(*vsmap)) {
2007 ps_deallocate_cluster(VSM_PS(*vsmap),
2008 VSM_CLOFF(*vsmap));
2009 VSM_CLR(*vsmap);
2010 }
2011 }
2012
2013 VS_MAP_UNLOCK(vs);
2014 }
2015
2016 void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
2017
2018 void
2019 ps_vs_write_complete(
2020 vstruct_t vs,
2021 vm_offset_t offset,
2022 vm_size_t size,
2023 int error)
2024 {
2025 struct clmap clmap;
2026
2027 /*
2028 * Get the struct vsmap for this cluster.
2029 * Use READ, even though it was written, because the
2030 * cluster MUST be present, unless there was an error
2031 * in the original ps_clmap (e.g. no space), in which
2032 * case, nothing happens.
2033 *
2034 * Must pass enough information to ps_clmap to allow it
2035 * to set the vs_map structure bitmap under lock.
2036 */
2037 (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
2038 }
2039
2040 void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int); /* forward */
2041
2042 void
2043 vs_cl_write_complete(
2044 vstruct_t vs,
2045 paging_segment_t ps,
2046 vm_offset_t offset,
2047 vm_offset_t addr,
2048 vm_size_t size,
2049 boolean_t async,
2050 int error)
2051 {
2052 static char here[] = "vs_cl_write_complete";
2053 kern_return_t kr;
2054
2055 if (error) {
2056 /*
2057 * For internal objects, the error is recorded on a
2058 * per-cluster basis by ps_clmap() which is called
2059 * by ps_vs_write_complete() below.
2060 */
2061 dprintf(("write failed error = 0x%x\n", error));
2062 /* add upl_abort code here */
2063 } else
2064 GSTAT(global_stats.gs_pages_out += atop(size));
2065 /*
2066 * Notify the vstruct mapping code, so it can do its accounting.
2067 */
2068 ps_vs_write_complete(vs, offset, size, error);
2069
2070 if (async) {
2071 VS_LOCK(vs);
2072 ASSERT(vs->vs_async_pending > 0);
2073 vs->vs_async_pending -= size;
2074 if (vs->vs_async_pending == 0) {
2075 VS_UNLOCK(vs);
2076 /* mutex_unlock(&vs->vs_waiting_async); */
2077 thread_wakeup(&vs->vs_waiting_async);
2078 } else {
2079 VS_UNLOCK(vs);
2080 }
2081 }
2082 }
2083
2084 #ifdef DEVICE_PAGING
2085 kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2086
2087 kern_return_t
2088 device_write_reply(
2089 MACH_PORT_FACE reply_port,
2090 kern_return_t device_code,
2091 io_buf_len_t bytes_written)
2092 {
2093 struct vs_async *vsa;
2094 static char here[] = "device_write_reply";
2095
2096 vsa = (struct vs_async *)
2097 ((struct vstruct_alias *)(reply_port->alias))->vs;
2098
2099 if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
2100 device_code = KERN_FAILURE;
2101 }
2102
2103 vsa->vsa_error = device_code;
2104
2105
2106 ASSERT(vsa->vsa_vs != VSTRUCT_NULL);
2107 if(vsa->vsa_flags & VSA_TRANSFER) {
2108 /* revisit when async disk segments redone */
2109 if(vsa->vsa_error) {
2110 /* need to consider error condition. re-write data or */
2111 /* throw it away here. */
2112 vm_offset_t ioaddr;
2113 if(vm_map_copyout(kernel_map, &ioaddr,
2114 (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS)
2115 panic("vs_cluster_write: unable to copy source list\n");
2116 vm_deallocate(kernel_map, ioaddr, vsa->vsa_size);
2117 }
2118 ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
2119 vsa->vsa_size, vsa->vsa_error);
2120 } else {
2121 vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset,
2122 vsa->vsa_addr, vsa->vsa_size, TRUE,
2123 vsa->vsa_error);
2124 }
2125 VS_FREE_ASYNC(vsa);
2126
2127 return KERN_SUCCESS;
2128 }
2129
2130 kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2131 kern_return_t
2132 device_write_reply_inband(
2133 MACH_PORT_FACE reply_port,
2134 kern_return_t return_code,
2135 io_buf_len_t bytes_written)
2136 {
2137 panic("device_write_reply_inband: illegal");
2138 return KERN_SUCCESS;
2139 }
2140
2141 kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t);
2142 kern_return_t
2143 device_read_reply(
2144 MACH_PORT_FACE reply_port,
2145 kern_return_t return_code,
2146 io_buf_ptr_t data,
2147 mach_msg_type_number_t dataCnt)
2148 {
2149 struct vs_async *vsa;
2150 vsa = (struct vs_async *)
2151 ((struct vstruct_alias *)(reply_port->alias))->vs;
2152 vsa->vsa_addr = (vm_offset_t)data;
2153 vsa->vsa_size = (vm_size_t)dataCnt;
2154 vsa->vsa_error = return_code;
2155 thread_wakeup(&vsa->vsa_lock);
2156 return KERN_SUCCESS;
2157 }
2158
2159 kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t);
2160 kern_return_t
2161 device_read_reply_inband(
2162 MACH_PORT_FACE reply_port,
2163 kern_return_t return_code,
2164 io_buf_ptr_inband_t data,
2165 mach_msg_type_number_t dataCnt)
2166 {
2167 panic("device_read_reply_inband: illegal");
2168 return KERN_SUCCESS;
2169 }
2170
2171 kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2172 kern_return_t
2173 device_read_reply_overwrite(
2174 MACH_PORT_FACE reply_port,
2175 kern_return_t return_code,
2176 io_buf_len_t bytes_read)
2177 {
2178 panic("device_read_reply_overwrite: illegal\n");
2179 return KERN_SUCCESS;
2180 }
2181
2182 kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE);
2183 kern_return_t
2184 device_open_reply(
2185 MACH_PORT_FACE reply_port,
2186 kern_return_t return_code,
2187 MACH_PORT_FACE device_port)
2188 {
2189 panic("device_open_reply: illegal\n");
2190 return KERN_SUCCESS;
2191 }
2192
2193 kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
2194
2195 kern_return_t
2196 ps_read_device(
2197 paging_segment_t ps,
2198 vm_offset_t offset,
2199 vm_offset_t *bufferp,
2200 unsigned int size,
2201 unsigned int *residualp,
2202 int flags)
2203 {
2204 kern_return_t kr;
2205 recnum_t dev_offset;
2206 unsigned int bytes_wanted;
2207 unsigned int bytes_read;
2208 unsigned int total_read;
2209 vm_offset_t dev_buffer;
2210 vm_offset_t buf_ptr;
2211 unsigned int records_read;
2212 static char here[] = "ps_read_device";
2213 struct vs_async *vsa;
2214 mutex_t vs_waiting_read_reply;
2215
2216 device_t device;
2217 vm_map_copy_t device_data = NULL;
2218 default_pager_thread_t *dpt = NULL;
2219
2220 device = dev_port_lookup(ps->ps_device);
2221 clustered_reads[atop(size)]++;
2222
2223 dev_offset = (ps->ps_offset +
2224 (offset >> (vm_page_shift - ps->ps_record_shift)));
2225 bytes_wanted = size;
2226 total_read = 0;
2227 *bufferp = (vm_offset_t)NULL;
2228
2229 do {
2230 vsa = VS_ALLOC_ASYNC();
2231 if (vsa) {
2232 vsa->vsa_vs = NULL;
2233 vsa->vsa_addr = 0;
2234 vsa->vsa_offset = 0;
2235 vsa->vsa_size = 0;
2236 vsa->vsa_ps = NULL;
2237 }
2238 mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO);
2239 ip_lock(vsa->reply_port);
2240 vsa->reply_port->ip_sorights++;
2241 ip_reference(vsa->reply_port);
2242 ip_unlock(vsa->reply_port);
2243 kr = ds_device_read_common(device,
2244 vsa->reply_port,
2245 (mach_msg_type_name_t)
2246 MACH_MSG_TYPE_MOVE_SEND_ONCE,
2247 (dev_mode_t) 0,
2248 dev_offset,
2249 bytes_wanted,
2250 (IO_READ | IO_CALL),
2251 (io_buf_ptr_t *) &dev_buffer,
2252 (mach_msg_type_number_t *) &bytes_read);
2253 if(kr == MIG_NO_REPLY) {
2254 assert_wait(&vsa->vsa_lock, THREAD_UNINT);
2255 thread_block((void(*)(void))0);
2256
2257 dev_buffer = vsa->vsa_addr;
2258 bytes_read = (unsigned int)vsa->vsa_size;
2259 kr = vsa->vsa_error;
2260 }
2261 VS_FREE_ASYNC(vsa);
2262 if (kr != KERN_SUCCESS || bytes_read == 0) {
2263 break;
2264 }
2265 total_read += bytes_read;
2266
2267 /*
2268 * If we got the entire range, use the returned dev_buffer.
2269 */
2270 if (bytes_read == size) {
2271 *bufferp = (vm_offset_t)dev_buffer;
2272 break;
2273 }
2274
2275 #if 1
2276 dprintf(("read only %d bytes out of %d\n",
2277 bytes_read, bytes_wanted));
2278 #endif
2279 if(dpt == NULL) {
2280 dpt = get_read_buffer();
2281 buf_ptr = dpt->dpt_buffer;
2282 *bufferp = (vm_offset_t)buf_ptr;
2283 }
2284 /*
2285 * Otherwise, copy the data into the provided buffer (*bufferp)
2286 * and append the rest of the range as it comes in.
2287 */
2288 memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read);
2289 buf_ptr += bytes_read;
2290 bytes_wanted -= bytes_read;
2291 records_read = (bytes_read >>
2292 (vm_page_shift - ps->ps_record_shift));
2293 dev_offset += records_read;
2294 DEBUG(DEBUG_VS_INTERNAL,
2295 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2296 dev_buffer, bytes_read));
2297 if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
2298 != KERN_SUCCESS)
2299 Panic("dealloc buf");
2300 } while (bytes_wanted);
2301
2302 *residualp = size - total_read;
2303 if((dev_buffer != *bufferp) && (total_read != 0)) {
2304 vm_offset_t temp_buffer;
2305 vm_allocate(kernel_map, &temp_buffer, total_read, TRUE);
2306 memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
2307 if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
2308 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2309 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2310 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2311 (vm_map_copy_t *)&device_data, FALSE))
2312 panic("ps_read_device: cannot copyin locally provided buffer\n");
2313 }
2314 else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){
2315 if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read,
2316 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2317 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2318 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2319 (vm_map_copy_t *)&device_data, FALSE))
2320 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2321 }
2322 else {
2323 device_data = NULL;
2324 }
2325 *bufferp = (vm_offset_t)device_data;
2326
2327 if(dpt != NULL) {
2328 /* Free the receive buffer */
2329 dpt->checked_out = 0;
2330 thread_wakeup(&dpt_array);
2331 }
2332 return KERN_SUCCESS;
2333 }
2334
2335 kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */
2336
2337 kern_return_t
2338 ps_write_device(
2339 paging_segment_t ps,
2340 vm_offset_t offset,
2341 vm_offset_t addr,
2342 unsigned int size,
2343 struct vs_async *vsa)
2344 {
2345 recnum_t dev_offset;
2346 io_buf_len_t bytes_to_write, bytes_written;
2347 recnum_t records_written;
2348 kern_return_t kr;
2349 MACH_PORT_FACE reply_port;
2350 static char here[] = "ps_write_device";
2351
2352
2353
2354 clustered_writes[atop(size)]++;
2355
2356 dev_offset = (ps->ps_offset +
2357 (offset >> (vm_page_shift - ps->ps_record_shift)));
2358 bytes_to_write = size;
2359
2360 if (vsa) {
2361 /*
2362 * Asynchronous write.
2363 */
2364 reply_port = vsa->reply_port;
2365 ip_lock(reply_port);
2366 reply_port->ip_sorights++;
2367 ip_reference(reply_port);
2368 ip_unlock(reply_port);
2369 {
2370 device_t device;
2371 device = dev_port_lookup(ps->ps_device);
2372
2373 vsa->vsa_addr = addr;
2374 kr=ds_device_write_common(device,
2375 reply_port,
2376 (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE,
2377 (dev_mode_t) 0,
2378 dev_offset,
2379 (io_buf_ptr_t) addr,
2380 size,
2381 (IO_WRITE | IO_CALL),
2382 &bytes_written);
2383 }
2384 if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) {
2385 if (verbose)
2386 dprintf(("%s0x%x, addr=0x%x,"
2387 "size=0x%x,offset=0x%x\n",
2388 "device_write_request returned ",
2389 kr, addr, size, offset));
2390 BS_STAT(ps->ps_bs,
2391 ps->ps_bs->bs_pages_out_fail += atop(size));
2392 /* do the completion notification to free resources */
2393 device_write_reply(reply_port, kr, 0);
2394 return PAGER_ERROR;
2395 }
2396 } else do {
2397 /*
2398 * Synchronous write.
2399 */
2400 {
2401 device_t device;
2402 device = dev_port_lookup(ps->ps_device);
2403 kr=ds_device_write_common(device,
2404 IP_NULL, 0,
2405 (dev_mode_t) 0,
2406 dev_offset,
2407 (io_buf_ptr_t) addr,
2408 size,
2409 (IO_WRITE | IO_SYNC | IO_KERNEL_BUF),
2410 &bytes_written);
2411 }
2412 if (kr != KERN_SUCCESS) {
2413 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2414 "device_write returned ",
2415 kr, addr, size, offset));
2416 BS_STAT(ps->ps_bs,
2417 ps->ps_bs->bs_pages_out_fail += atop(size));
2418 return PAGER_ERROR;
2419 }
2420 if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
2421 Panic("fragmented write");
2422 records_written = (bytes_written >>
2423 (vm_page_shift - ps->ps_record_shift));
2424 dev_offset += records_written;
2425 #if 1
2426 if (bytes_written != bytes_to_write) {
2427 dprintf(("wrote only %d bytes out of %d\n",
2428 bytes_written, bytes_to_write));
2429 }
2430 #endif
2431 bytes_to_write -= bytes_written;
2432 addr += bytes_written;
2433 } while (bytes_to_write > 0);
2434
2435 return PAGER_SUCCESS;
2436 }
2437
2438
2439 #else /* !DEVICE_PAGING */
2440
2441 kern_return_t
2442 ps_read_device(
2443 paging_segment_t ps,
2444 vm_offset_t offset,
2445 vm_offset_t *bufferp,
2446 unsigned int size,
2447 unsigned int *residualp,
2448 int flags)
2449 {
2450 panic("ps_read_device not supported");
2451 }
2452
2453 ps_write_device(
2454 paging_segment_t ps,
2455 vm_offset_t offset,
2456 vm_offset_t addr,
2457 unsigned int size,
2458 struct vs_async *vsa)
2459 {
2460 panic("ps_write_device not supported");
2461 }
2462
2463 #endif /* DEVICE_PAGING */
2464 void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t); /* forward */
2465
2466 void
2467 pvs_object_data_provided(
2468 vstruct_t vs,
2469 upl_t upl,
2470 vm_offset_t offset,
2471 vm_size_t size)
2472 {
2473 static char here[] = "pvs_object_data_provided";
2474
2475 DEBUG(DEBUG_VS_INTERNAL,
2476 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2477 upl, offset, size));
2478
2479 ASSERT(size > 0);
2480 GSTAT(global_stats.gs_pages_in += atop(size));
2481
2482
2483 #if USE_PRECIOUS
2484 ps_clunmap(vs, offset, size);
2485 #endif /* USE_PRECIOUS */
2486
2487 }
2488
2489 kern_return_t
2490 pvs_cluster_read(
2491 vstruct_t vs,
2492 vm_offset_t offset,
2493 vm_size_t cnt)
2494 {
2495 vm_offset_t actual_offset;
2496 vm_offset_t buffer;
2497 paging_segment_t ps;
2498 struct clmap clmap;
2499 upl_t upl;
2500 kern_return_t error = KERN_SUCCESS;
2501 int size, size_wanted, i;
2502 unsigned int residual;
2503 unsigned int request_flags;
2504 int unavail_size;
2505 default_pager_thread_t *dpt;
2506 boolean_t dealloc;
2507 static char here[] = "pvs_cluster_read";
2508
2509 /*
2510 * This loop will be executed once per cluster referenced.
2511 * Typically this means once, since it's unlikely that the
2512 * VM system will ask for anything spanning cluster boundaries.
2513 *
2514 * If there are holes in a cluster (in a paging segment), we stop
2515 * reading at the hole, inform the VM of any data read, inform
2516 * the VM of an unavailable range, then loop again, hoping to
2517 * find valid pages later in the cluster. This continues until
2518 * the entire range has been examined, and read, if present.
2519 */
2520
2521 #if USE_PRECIOUS
2522 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS;
2523 #else
2524 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE ;
2525 #endif
2526 while (cnt && (error == KERN_SUCCESS)) {
2527 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
2528
2529 if (actual_offset == (vm_offset_t) -1) {
2530
2531 /*
2532 * Either a failure due to an error on a previous
2533 * write or a zero fill on demand page. In either case,
2534 * optimize to do one reply for all pages up to next
2535 * cluster boundary.
2536 */
2537 unsigned int local_size, clmask, clsize;
2538
2539 clmask = (vm_page_size << vs->vs_clshift) - 1;
2540 clsize = vm_page_size << vs->vs_clshift;
2541 clmask = clsize - 1;
2542 local_size = clsize - (offset & clmask);
2543 ASSERT(local_size);
2544 local_size = MIN(local_size, cnt);
2545
2546 upl_system_list_request((vm_object_t)
2547 vs->vs_control_port->ip_kobject,
2548 offset, local_size, local_size,
2549 &upl, NULL, 0, request_flags);
2550 if (clmap.cl_error) {
2551 uc_upl_abort(upl, UPL_ABORT_ERROR);
2552 } else {
2553 uc_upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2554 }
2555
2556 cnt -= local_size;
2557 offset += local_size;
2558 continue;
2559 }
2560
2561 /*
2562 * Count up contiguous available or unavailable
2563 * pages.
2564 */
2565 ps = CLMAP_PS(clmap);
2566 ASSERT(ps);
2567 size = 0;
2568 unavail_size = 0;
2569
2570 for (i = 0;
2571 (size < cnt) && (unavail_size < cnt) &&
2572 (i < CLMAP_NPGS(clmap)); i++) {
2573 if (CLMAP_ISSET(clmap, i)) {
2574 if (unavail_size != 0)
2575 break;
2576 size += vm_page_size;
2577 BS_STAT(ps->ps_bs,
2578 ps->ps_bs->bs_pages_in++);
2579 } else {
2580 if (size != 0)
2581 break;
2582 unavail_size += vm_page_size;
2583 }
2584 }
2585 /*
2586 * Let VM system know about holes in clusters.
2587 */
2588 if (size == 0) {
2589 ASSERT(unavail_size);
2590 GSTAT(global_stats.gs_pages_unavail +=
2591 atop(unavail_size));
2592 upl_system_list_request((vm_object_t)
2593 vs->vs_control_port->ip_kobject,
2594 offset, unavail_size,
2595 unavail_size, &upl, NULL, 0,
2596 request_flags);
2597 uc_upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2598 cnt -= unavail_size;
2599 offset += unavail_size;
2600 continue;
2601 }
2602
2603 upl_system_list_request((vm_object_t)
2604 vs->vs_control_port->ip_kobject,
2605 offset, size, size, &upl,
2606 NULL, 0, request_flags | UPL_SET_INTERNAL);
2607 if(ps->ps_segtype == PS_PARTITION) {
2608 /*
2609 error = ps_read_device(ps, actual_offset, upl,
2610 size, &residual, 0);
2611 */
2612 } else {
2613 error = ps_read_file(ps, upl, actual_offset,
2614 size, &residual, 0);
2615 }
2616
2617 /*
2618 * Adjust counts and send response to VM. Optimize for the
2619 * common case, i.e. no error and/or partial data.
2620 * If there was an error, then we need to error the entire
2621 * range, even if some data was successfully read.
2622 * If there was a partial read we may supply some
2623 * data and may error some as well. In all cases the
2624 * VM must receive some notification for every page in the
2625 * range.
2626 */
2627 if ((error == KERN_SUCCESS) && (residual == 0)) {
2628 /*
2629 * Got everything we asked for, supply the data to
2630 * the VM. Note that as a side effect of supplying
2631 * the data, the buffer holding the supplied data is
2632 * deallocated from the pager's address space.
2633 */
2634 pvs_object_data_provided(vs, upl, offset, size);
2635 } else {
2636 size_wanted = size;
2637 if (error == KERN_SUCCESS) {
2638 if (residual == size) {
2639 /*
2640 * If a read operation returns no error
2641 * and no data moved, we turn it into
2642 * an error, assuming we're reading at
2643 * or beyong EOF.
2644 * Fall through and error the entire
2645 * range.
2646 */
2647 error = KERN_FAILURE;
2648 } else {
2649 /*
2650 * Otherwise, we have partial read. If
2651 * the part read is a integral number
2652 * of pages supply it. Otherwise round
2653 * it up to a page boundary, zero fill
2654 * the unread part, and supply it.
2655 * Fall through and error the remainder
2656 * of the range, if any.
2657 */
2658 int fill, lsize;
2659
2660 fill = residual & ~vm_page_size;
2661 lsize = (size - residual) + fill;
2662 pvs_object_data_provided(vs, upl,
2663 offset, lsize);
2664 cnt -= lsize;
2665 offset += lsize;
2666 if (size -= lsize) {
2667 error = KERN_FAILURE;
2668 }
2669 }
2670 }
2671
2672 /*
2673 * If there was an error in any part of the range, tell
2674 * the VM. Deallocate the remainder of the buffer.
2675 * Note that error is explicitly checked again since
2676 * it can be modified above.
2677 */
2678 if (error != KERN_SUCCESS) {
2679 BS_STAT(ps->ps_bs,
2680 ps->ps_bs->bs_pages_in_fail +=
2681 atop(size));
2682 }
2683 }
2684 cnt -= size;
2685 offset += size;
2686
2687 } /* END while (cnt && (error == 0)) */
2688 return error;
2689 }
2690
2691 int vs_do_async_write = 1;
2692
2693 kern_return_t
2694 vs_cluster_write(
2695 vstruct_t vs,
2696 upl_t internal_upl,
2697 vm_offset_t offset,
2698 vm_size_t cnt,
2699 boolean_t dp_internal,
2700 int flags)
2701 {
2702 vm_offset_t actual_offset; /* Offset within paging segment */
2703 vm_offset_t size;
2704 vm_offset_t transfer_size;
2705 vm_offset_t subx_size;
2706 int error = 0;
2707 struct clmap clmap;
2708 paging_segment_t ps;
2709 struct vs_async *vsa;
2710 vm_map_copy_t copy;
2711 static char here[] = "vs_cluster_write";
2712
2713 upl_t upl;
2714 upl_page_info_t *page_list;
2715 upl_page_info_t pl[20];
2716 vm_offset_t mobj_base_addr;
2717 vm_offset_t mobj_target_addr;
2718 int mobj_size;
2719 int page_index;
2720 int list_size;
2721 int cl_size;
2722
2723
2724 ps = PAGING_SEGMENT_NULL;
2725
2726 if (!dp_internal) {
2727 int request_flags;
2728 int super_size;
2729 vm_offset_t upl_offset;
2730
2731 cl_size = (1 << vs->vs_clshift) * vm_page_size;
2732
2733 if (bs_low) {
2734 super_size = cl_size;
2735 request_flags = UPL_NOBLOCK |
2736 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2737 UPL_NO_SYNC | UPL_SET_INTERNAL;
2738 } else {
2739 super_size = VM_SUPER_CLUSTER;
2740 request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
2741 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2742 UPL_NO_SYNC | UPL_SET_INTERNAL;
2743 }
2744
2745
2746 upl_system_list_request((vm_object_t)
2747 vs->vs_control_port->ip_kobject,
2748 offset, cnt, super_size,
2749 &upl, NULL,
2750 0, request_flags);
2751
2752 mobj_base_addr = upl->offset;
2753 list_size = upl->size;
2754
2755 page_list = UPL_GET_INTERNAL_PAGE_LIST(upl);
2756 memcpy(pl, page_list,
2757 sizeof(upl_page_info_t) * (list_size/page_size));
2758
2759 /* Now parcel up the 64k transfer, do at most cluster size */
2760 /* at a time. */
2761 upl_offset = 0;
2762 page_index = 0;
2763 mobj_target_addr = mobj_base_addr;
2764
2765 for (transfer_size = list_size; transfer_size != 0;) {
2766 actual_offset = ps_clmap(vs, mobj_target_addr,
2767 &clmap, CL_ALLOC,
2768 transfer_size < cl_size ?
2769 transfer_size : cl_size, 0);
2770
2771 if (actual_offset == (vm_offset_t) -1) {
2772 for(;transfer_size != 0;) {
2773 if(UPL_PAGE_PRESENT(pl, page_index)) {
2774 uc_upl_abort_range(upl,
2775 upl_offset,
2776 transfer_size,
2777 UPL_ABORT_FREE_ON_EMPTY);
2778 break;
2779 }
2780 transfer_size-=page_size;
2781 upl_offset += vm_page_size;
2782 page_index++;
2783 }
2784 error = 1;
2785 break;
2786 }
2787 cnt = MIN(transfer_size,
2788 CLMAP_NPGS(clmap) * vm_page_size);
2789 ps = CLMAP_PS(clmap);
2790
2791 while (cnt > 0) {
2792 /* attempt to send entire cluster */
2793 subx_size = 0;
2794
2795 while (cnt > 0) {
2796 /* do the biggest contiguous transfer of dirty */
2797 /* pages */
2798 if (UPL_DIRTY_PAGE(pl, page_index) ||
2799 UPL_PRECIOUS_PAGE(pl, page_index)){
2800 page_index++;
2801 subx_size += vm_page_size;
2802 cnt -= vm_page_size;
2803 } else {
2804 if (subx_size == 0) {
2805 actual_offset += vm_page_size;
2806 mobj_target_addr += vm_page_size;
2807
2808 if(UPL_PAGE_PRESENT(pl, page_index)) {
2809 uc_upl_commit_range(upl,
2810 upl_offset,
2811 vm_page_size,
2812 TRUE, pl);
2813 }
2814
2815 upl_offset += vm_page_size;
2816 transfer_size -= vm_page_size;
2817 page_index++;
2818 cnt -= vm_page_size;
2819 } else {
2820 break;
2821 }
2822 }
2823 }
2824 if (subx_size) {
2825
2826 error = ps_write_file(ps, upl, upl_offset,
2827 actual_offset, subx_size, flags);
2828 if (error) {
2829 actual_offset += subx_size;
2830 mobj_target_addr += subx_size;
2831 upl_offset += subx_size;
2832 transfer_size -= subx_size;
2833
2834 for(;transfer_size != 0;) {
2835 if(UPL_PAGE_PRESENT(pl, page_index)) {
2836 uc_upl_abort_range(upl,
2837 upl_offset,
2838 transfer_size,
2839 UPL_ABORT_FREE_ON_EMPTY);
2840 break;
2841 }
2842 transfer_size-=page_size;
2843 upl_offset += vm_page_size;
2844 page_index++;
2845 }
2846 break;
2847 }
2848
2849 ps_vs_write_complete(vs, mobj_target_addr,
2850 subx_size, error);
2851 }
2852 actual_offset += subx_size;
2853 mobj_target_addr += subx_size;
2854 upl_offset += subx_size;
2855
2856 transfer_size -= subx_size;
2857 subx_size = 0;
2858 }
2859 if (error)
2860 break;
2861 }
2862 } else {
2863 assert(cnt <= (vm_page_size << vs->vs_clshift));
2864 list_size = cnt;
2865
2866 page_index = 0;
2867 /* The caller provides a mapped_data which is derived */
2868 /* from a temporary object. The targeted pages are */
2869 /* guaranteed to be set at offset 0 in the mapped_data */
2870 /* The actual offset however must still be derived */
2871 /* from the offset in the vs in question */
2872 mobj_base_addr = offset;
2873 mobj_target_addr = mobj_base_addr;
2874
2875 for (transfer_size = list_size; transfer_size != 0;) {
2876 actual_offset = ps_clmap(vs, mobj_target_addr,
2877 &clmap, CL_ALLOC,
2878 transfer_size < cl_size ?
2879 transfer_size : cl_size, 0);
2880 if(actual_offset == (vm_offset_t) -1) {
2881 error = 1;
2882 break;
2883 }
2884 cnt = MIN(transfer_size,
2885 CLMAP_NPGS(clmap) * vm_page_size);
2886 ps = CLMAP_PS(clmap);
2887 /* Assume that the caller has given us contiguous */
2888 /* pages */
2889 if(cnt) {
2890 error = ps_write_file(ps, internal_upl,
2891 0, actual_offset,
2892 cnt, flags);
2893 if (error)
2894 break;
2895 ps_vs_write_complete(vs, mobj_target_addr,
2896 cnt, error);
2897 }
2898 if (error)
2899 break;
2900 actual_offset += cnt;
2901 mobj_target_addr += cnt;
2902 transfer_size -= cnt;
2903 cnt = 0;
2904
2905 if (error)
2906 break;
2907 }
2908 }
2909 if(error)
2910 return KERN_FAILURE;
2911 else
2912 return KERN_SUCCESS;
2913 }
2914
2915 vm_size_t
2916 ps_vstruct_allocated_size(
2917 vstruct_t vs)
2918 {
2919 int num_pages;
2920 struct vs_map *vsmap;
2921 int i, j, k;
2922
2923 num_pages = 0;
2924 if (vs->vs_indirect) {
2925 /* loop on indirect maps */
2926 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
2927 vsmap = vs->vs_imap[i];
2928 if (vsmap == NULL)
2929 continue;
2930 /* loop on clusters in this indirect map */
2931 for (j = 0; j < CLMAP_ENTRIES; j++) {
2932 if (VSM_ISCLR(vsmap[j]) ||
2933 VSM_ISERR(vsmap[j]))
2934 continue;
2935 /* loop on pages in this cluster */
2936 for (k = 0; k < VSCLSIZE(vs); k++) {
2937 if ((VSM_BMAP(vsmap[j])) & (1 << k))
2938 num_pages++;
2939 }
2940 }
2941 }
2942 } else {
2943 vsmap = vs->vs_dmap;
2944 if (vsmap == NULL)
2945 return 0;
2946 /* loop on clusters in the direct map */
2947 for (j = 0; j < CLMAP_ENTRIES; j++) {
2948 if (VSM_ISCLR(vsmap[j]) ||
2949 VSM_ISERR(vsmap[j]))
2950 continue;
2951 /* loop on pages in this cluster */
2952 for (k = 0; k < VSCLSIZE(vs); k++) {
2953 if ((VSM_BMAP(vsmap[j])) & (1 << k))
2954 num_pages++;
2955 }
2956 }
2957 }
2958
2959 return ptoa(num_pages);
2960 }
2961
2962 size_t
2963 ps_vstruct_allocated_pages(
2964 vstruct_t vs,
2965 default_pager_page_t *pages,
2966 size_t pages_size)
2967 {
2968 int num_pages;
2969 struct vs_map *vsmap;
2970 vm_offset_t offset;
2971 int i, j, k;
2972
2973 num_pages = 0;
2974 offset = 0;
2975 if (vs->vs_indirect) {
2976 /* loop on indirect maps */
2977 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
2978 vsmap = vs->vs_imap[i];
2979 if (vsmap == NULL) {
2980 offset += (vm_page_size * CLMAP_ENTRIES *
2981 VSCLSIZE(vs));
2982 continue;
2983 }
2984 /* loop on clusters in this indirect map */
2985 for (j = 0; j < CLMAP_ENTRIES; j++) {
2986 if (VSM_ISCLR(vsmap[j]) ||
2987 VSM_ISERR(vsmap[j])) {
2988 offset += vm_page_size * VSCLSIZE(vs);
2989 continue;
2990 }
2991 /* loop on pages in this cluster */
2992 for (k = 0; k < VSCLSIZE(vs); k++) {
2993 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
2994 num_pages++;
2995 if (num_pages < pages_size)
2996 pages++->dpp_offset =
2997 offset;
2998 }
2999 offset += vm_page_size;
3000 }
3001 }
3002 }
3003 } else {
3004 vsmap = vs->vs_dmap;
3005 if (vsmap == NULL)
3006 return 0;
3007 /* loop on clusters in the direct map */
3008 for (j = 0; j < CLMAP_ENTRIES; j++) {
3009 if (VSM_ISCLR(vsmap[j]) ||
3010 VSM_ISERR(vsmap[j])) {
3011 offset += vm_page_size * VSCLSIZE(vs);
3012 continue;
3013 }
3014 /* loop on pages in this cluster */
3015 for (k = 0; k < VSCLSIZE(vs); k++) {
3016 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3017 num_pages++;
3018 if (num_pages < pages_size)
3019 pages++->dpp_offset = offset;
3020 }
3021 offset += vm_page_size;
3022 }
3023 }
3024 }
3025
3026 return num_pages;
3027 }
3028
3029
3030 kern_return_t
3031 ps_vstruct_transfer_from_segment(
3032 vstruct_t vs,
3033 paging_segment_t segment,
3034 #ifndef ubc_sync_working
3035 vm_object_t transfer_object)
3036 #else
3037 upl_t upl)
3038 #endif
3039 {
3040 struct vs_map *vsmap;
3041 struct vs_map old_vsmap;
3042 struct vs_map new_vsmap;
3043 int i, j, k;
3044
3045 VS_LOCK(vs); /* block all work on this vstruct */
3046 /* can't allow the normal multiple write */
3047 /* semantic because writes may conflict */
3048 vs->vs_xfer_pending = TRUE;
3049 vs_wait_for_sync_writers(vs);
3050 vs_start_write(vs);
3051 vs_wait_for_readers(vs);
3052 /* we will unlock the vs to allow other writes while transferring */
3053 /* and will be guaranteed of the persistance of the vs struct */
3054 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3055 /* vs_async_pending */
3056 /* OK we now have guaranteed no other parties are accessing this */
3057 /* vs. Now that we are also supporting simple lock versions of */
3058 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3059 /* our purpose in holding it before was the multiple write case */
3060 /* we now use the boolean xfer_pending to do that. We can use */
3061 /* a boolean instead of a count because we have guaranteed single */
3062 /* file access to this code in its caller */
3063 VS_UNLOCK(vs);
3064 vs_changed:
3065 if (vs->vs_indirect) {
3066 int vsmap_size;
3067 int clmap_off;
3068 /* loop on indirect maps */
3069 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3070 vsmap = vs->vs_imap[i];
3071 if (vsmap == NULL)
3072 continue;
3073 /* loop on clusters in this indirect map */
3074 clmap_off = (vm_page_size * CLMAP_ENTRIES *
3075 VSCLSIZE(vs) * i);
3076 if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
3077 vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
3078 else
3079 vsmap_size = CLMAP_ENTRIES;
3080 for (j = 0; j < vsmap_size; j++) {
3081 if (VSM_ISCLR(vsmap[j]) ||
3082 VSM_ISERR(vsmap[j]) ||
3083 (VSM_PS(vsmap[j]) != segment))
3084 continue;
3085 if(vs_cluster_transfer(vs,
3086 (vm_page_size * (j << vs->vs_clshift))
3087 + clmap_off,
3088 vm_page_size << vs->vs_clshift,
3089 #ifndef ubc_sync_working
3090 transfer_object)
3091 #else
3092 upl)
3093 #endif
3094 != KERN_SUCCESS) {
3095 VS_LOCK(vs);
3096 vs->vs_xfer_pending = FALSE;
3097 VS_UNLOCK(vs);
3098 vs_finish_write(vs);
3099 return KERN_FAILURE;
3100 }
3101 /* allow other readers/writers during transfer*/
3102 VS_LOCK(vs);
3103 vs->vs_xfer_pending = FALSE;
3104 VS_UNLOCK(vs);
3105 vs_finish_write(vs);
3106 VS_LOCK(vs);
3107 vs->vs_xfer_pending = TRUE;
3108 VS_UNLOCK(vs);
3109 vs_wait_for_sync_writers(vs);
3110 vs_start_write(vs);
3111 vs_wait_for_readers(vs);
3112 if (!(vs->vs_indirect)) {
3113 goto vs_changed;
3114 }
3115 }
3116 }
3117 } else {
3118 vsmap = vs->vs_dmap;
3119 if (vsmap == NULL) {
3120 VS_LOCK(vs);
3121 vs->vs_xfer_pending = FALSE;
3122 VS_UNLOCK(vs);
3123 vs_finish_write(vs);
3124 return KERN_SUCCESS;
3125 }
3126 /* loop on clusters in the direct map */
3127 for (j = 0; j < vs->vs_size; j++) {
3128 if (VSM_ISCLR(vsmap[j]) ||
3129 VSM_ISERR(vsmap[j]) ||
3130 (VSM_PS(vsmap[j]) != segment))
3131 continue;
3132 if(vs_cluster_transfer(vs,
3133 vm_page_size * (j << vs->vs_clshift),
3134 vm_page_size << vs->vs_clshift,
3135 #ifndef ubc_sync_working
3136 transfer_object) != KERN_SUCCESS) {
3137 #else
3138 upl) != KERN_SUCCESS) {
3139 #endif
3140 VS_LOCK(vs);
3141 vs->vs_xfer_pending = FALSE;
3142 VS_UNLOCK(vs);
3143 vs_finish_write(vs);
3144 return KERN_FAILURE;
3145 }
3146 /* allow other readers/writers during transfer*/
3147 VS_LOCK(vs);
3148 vs->vs_xfer_pending = FALSE;
3149 VS_UNLOCK(vs);
3150 vs_finish_write(vs);
3151 VS_LOCK(vs);
3152 vs->vs_xfer_pending = TRUE;
3153 VS_UNLOCK(vs);
3154 vs_wait_for_sync_writers(vs);
3155 vs_start_write(vs);
3156 vs_wait_for_readers(vs);
3157 if (vs->vs_indirect) {
3158 goto vs_changed;
3159 }
3160 }
3161 }
3162
3163 VS_LOCK(vs);
3164 vs->vs_xfer_pending = FALSE;
3165 VS_UNLOCK(vs);
3166 vs_finish_write(vs);
3167 return KERN_SUCCESS;
3168 }
3169
3170
3171
3172 vs_map_t
3173 vs_get_map_entry(
3174 vstruct_t vs,
3175 vm_offset_t offset)
3176 {
3177 struct vs_map *vsmap;
3178 vm_offset_t cluster;
3179
3180 cluster = atop(offset) >> vs->vs_clshift;
3181 if (vs->vs_indirect) {
3182 long ind_block = cluster/CLMAP_ENTRIES;
3183
3184 /* Is the indirect block allocated? */
3185 vsmap = vs->vs_imap[ind_block];
3186 if(vsmap == (vs_map_t) NULL)
3187 return vsmap;
3188 } else
3189 vsmap = vs->vs_dmap;
3190 vsmap += cluster%CLMAP_ENTRIES;
3191 return vsmap;
3192 }
3193
3194 kern_return_t
3195 vs_cluster_transfer(
3196 vstruct_t vs,
3197 vm_offset_t offset,
3198 vm_size_t cnt,
3199 #ifndef ubc_sync_working
3200 vm_object_t transfer_object)
3201 #else
3202 upl_t upl)
3203 #endif
3204 {
3205 vm_offset_t actual_offset;
3206 paging_segment_t ps;
3207 struct clmap clmap;
3208 kern_return_t error = KERN_SUCCESS;
3209 int size, size_wanted, i;
3210 unsigned int residual;
3211 int unavail_size;
3212 default_pager_thread_t *dpt;
3213 boolean_t dealloc;
3214 struct vs_map *vsmap_ptr;
3215 struct vs_map read_vsmap;
3216 struct vs_map original_read_vsmap;
3217 struct vs_map write_vsmap;
3218 upl_t sync_upl;
3219 #ifndef ubc_sync_working
3220 upl_t upl;
3221 #endif
3222
3223 vm_offset_t ioaddr;
3224
3225 static char here[] = "vs_cluster_transfer";
3226
3227 /* vs_cluster_transfer reads in the pages of a cluster and
3228 * then writes these pages back to new backing store. The
3229 * segment the pages are being read from is assumed to have
3230 * been taken off-line and is no longer considered for new
3231 * space requests.
3232 */
3233
3234 /*
3235 * This loop will be executed once per cluster referenced.
3236 * Typically this means once, since it's unlikely that the
3237 * VM system will ask for anything spanning cluster boundaries.
3238 *
3239 * If there are holes in a cluster (in a paging segment), we stop
3240 * reading at the hole, then loop again, hoping to
3241 * find valid pages later in the cluster. This continues until
3242 * the entire range has been examined, and read, if present. The
3243 * pages are written as they are read. If a failure occurs after
3244 * some pages are written the unmap call at the bottom of the loop
3245 * recovers the backing store and the old backing store remains
3246 * in effect.
3247 */
3248
3249 /* uc_upl_map(kernel_map, upl, &ioaddr); */
3250
3251 VSM_CLR(write_vsmap);
3252 VSM_CLR(original_read_vsmap);
3253 /* grab the actual object's pages to sync with I/O */
3254 while (cnt && (error == KERN_SUCCESS)) {
3255 vsmap_ptr = vs_get_map_entry(vs, offset);
3256 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
3257
3258 if (actual_offset == (vm_offset_t) -1) {
3259
3260 /*
3261 * Nothing left to write in this cluster at least
3262 * set write cluster information for any previous
3263 * write, clear for next cluster, if there is one
3264 */
3265 unsigned int local_size, clmask, clsize;
3266
3267 clsize = vm_page_size << vs->vs_clshift;
3268 clmask = clsize - 1;
3269 local_size = clsize - (offset & clmask);
3270 ASSERT(local_size);
3271 local_size = MIN(local_size, cnt);
3272
3273 /* This cluster has no data in it beyond what may */
3274 /* have been found on a previous iteration through */
3275 /* the loop "write_vsmap" */
3276 *vsmap_ptr = write_vsmap;
3277 VSM_CLR(write_vsmap);
3278 VSM_CLR(original_read_vsmap);
3279
3280 cnt -= local_size;
3281 offset += local_size;
3282 continue;
3283 }
3284
3285 /*
3286 * Count up contiguous available or unavailable
3287 * pages.
3288 */
3289 ps = CLMAP_PS(clmap);
3290 ASSERT(ps);
3291 size = 0;
3292 unavail_size = 0;
3293 for (i = 0;
3294 (size < cnt) && (unavail_size < cnt) &&
3295 (i < CLMAP_NPGS(clmap)); i++) {
3296 if (CLMAP_ISSET(clmap, i)) {
3297 if (unavail_size != 0)
3298 break;
3299 size += vm_page_size;
3300 BS_STAT(ps->ps_bs,
3301 ps->ps_bs->bs_pages_in++);
3302 } else {
3303 if (size != 0)
3304 break;
3305 unavail_size += vm_page_size;
3306 }
3307 }
3308
3309 if (size == 0) {
3310 ASSERT(unavail_size);
3311 cnt -= unavail_size;
3312 offset += unavail_size;
3313 if((offset & ((vm_page_size << vs->vs_clshift) - 1))
3314 == 0) {
3315 /* There is no more to transfer in this
3316 cluster
3317 */
3318 *vsmap_ptr = write_vsmap;
3319 VSM_CLR(write_vsmap);
3320 VSM_CLR(original_read_vsmap);
3321 }
3322 continue;
3323 }
3324
3325 if(VSM_ISCLR(original_read_vsmap))
3326 original_read_vsmap = *vsmap_ptr;
3327
3328 if(ps->ps_segtype == PS_PARTITION) {
3329 /*
3330 NEED TO BE WITH SYNC & NO COMMIT
3331 error = ps_read_device(ps, actual_offset, &buffer,
3332 size, &residual, flags);
3333 */
3334 } else {
3335 #ifndef ubc_sync_working
3336 error = vm_fault_list_request(transfer_object,
3337 (vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
3338 size, &upl, NULL,
3339 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
3340 | UPL_SET_INTERNAL);
3341 if (error == KERN_SUCCESS) {
3342 error = ps_read_file(ps, upl, actual_offset,
3343 size, &residual, 0);
3344 if(error)
3345 uc_upl_commit(upl, NULL);
3346 }
3347
3348 #else
3349 /* NEED TO BE WITH SYNC & NO COMMIT & NO RDAHEAD*/
3350 error = ps_read_file(ps, upl, actual_offset,
3351 size, &residual,
3352 (UPL_IOSYNC | UPL_NOCOMMIT | UPL_NORDAHEAD));
3353 #endif
3354 }
3355
3356 read_vsmap = *vsmap_ptr;
3357
3358
3359 /*
3360 * Adjust counts and put data in new BS. Optimize for the
3361 * common case, i.e. no error and/or partial data.
3362 * If there was an error, then we need to error the entire
3363 * range, even if some data was successfully read.
3364 *
3365 */
3366 if ((error == KERN_SUCCESS) && (residual == 0)) {
3367 /*
3368 * Got everything we asked for, supply the data to
3369 * the new BS. Note that as a side effect of supplying
3370 * the data, the buffer holding the supplied data is
3371 * deallocated from the pager's address space unless
3372 * the write is unsuccessful.
3373 */
3374
3375 /* note buffer will be cleaned up in all cases by */
3376 /* internal_cluster_write or if an error on write */
3377 /* the vm_map_copy_page_discard call */
3378 *vsmap_ptr = write_vsmap;
3379
3380 #ifndef ubc_sync_working
3381 error = vm_fault_list_request(transfer_object,
3382 (vm_object_offset_t) (actual_offset & ((vm_page_size << vs->vs_clshift) - 1)),
3383 size, &upl, NULL,
3384 0, UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
3385 | UPL_SET_INTERNAL);
3386 if(vs_cluster_write(vs, upl, offset,
3387 size, TRUE, 0) != KERN_SUCCESS) {
3388 uc_upl_commit(upl, NULL);
3389 #else
3390 if(vs_cluster_write(vs, upl, offset,
3391 size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
3392 #endif
3393 error = KERN_FAILURE;
3394 if(!(VSM_ISCLR(*vsmap_ptr))) {
3395 /* unmap the new backing store object */
3396 ps_clunmap(vs, offset, size);
3397 }
3398 /* original vsmap */
3399 *vsmap_ptr = original_read_vsmap;
3400 VSM_CLR(write_vsmap);
3401 } else {
3402 if((offset + size) &
3403 ((vm_page_size << vs->vs_clshift)
3404 - 1)) {
3405 /* There is more to transfer in this
3406 cluster
3407 */
3408 write_vsmap = *vsmap_ptr;
3409 *vsmap_ptr = read_vsmap;
3410 } else {
3411 /* discard the old backing object */
3412 write_vsmap = *vsmap_ptr;
3413 *vsmap_ptr = read_vsmap;
3414 ps_clunmap(vs, offset, size);
3415 *vsmap_ptr = write_vsmap;
3416 VSM_CLR(write_vsmap);
3417 VSM_CLR(original_read_vsmap);
3418 }
3419 }
3420 } else {
3421 size_wanted = size;
3422 if (error == KERN_SUCCESS) {
3423 if (residual == size) {
3424 /*
3425 * If a read operation returns no error
3426 * and no data moved, we turn it into
3427 * an error, assuming we're reading at
3428 * or beyond EOF.
3429 * Fall through and error the entire
3430 * range.
3431 */
3432 error = KERN_FAILURE;
3433 *vsmap_ptr = write_vsmap;
3434 if(!(VSM_ISCLR(*vsmap_ptr))) {
3435 /* unmap the new backing store object */
3436 ps_clunmap(vs, offset, size);
3437 }
3438 *vsmap_ptr = original_read_vsmap;
3439 VSM_CLR(write_vsmap);
3440 continue;
3441 } else {
3442 /*
3443 * Otherwise, we have partial read.
3444 * This is also considered an error
3445 * for the purposes of cluster transfer
3446 */
3447 error = KERN_FAILURE;
3448 *vsmap_ptr = write_vsmap;
3449 if(!(VSM_ISCLR(*vsmap_ptr))) {
3450 /* unmap the new backing store object */
3451 ps_clunmap(vs, offset, size);
3452 }
3453 *vsmap_ptr = original_read_vsmap;
3454 VSM_CLR(write_vsmap);
3455 continue;
3456 }
3457 }
3458
3459 }
3460 cnt -= size;
3461 offset += size;
3462
3463 } /* END while (cnt && (error == 0)) */
3464 if(!VSM_ISCLR(write_vsmap))
3465 *vsmap_ptr = write_vsmap;
3466
3467 /* uc_upl_un_map(kernel_map, upl); */
3468 return error;
3469 }
3470
3471 kern_return_t
3472 default_pager_add_file(MACH_PORT_FACE backing_store,
3473 int *vp,
3474 int record_size,
3475 long size)
3476 {
3477 backing_store_t bs;
3478 paging_segment_t ps;
3479 int i;
3480 int error;
3481 static char here[] = "default_pager_add_file";
3482
3483 if ((bs = backing_store_lookup(backing_store))
3484 == BACKING_STORE_NULL)
3485 return KERN_INVALID_ARGUMENT;
3486
3487 PSL_LOCK();
3488 for (i = 0; i <= paging_segment_max; i++) {
3489 ps = paging_segments[i];
3490 if (ps == PAGING_SEGMENT_NULL)
3491 continue;
3492 if (ps->ps_segtype != PS_FILE)
3493 continue;
3494
3495 /*
3496 * Check for overlap on same device.
3497 */
3498 if (ps->ps_vnode == (struct vnode *)vp) {
3499 PSL_UNLOCK();
3500 BS_UNLOCK(bs);
3501 return KERN_INVALID_ARGUMENT;
3502 }
3503 }
3504 PSL_UNLOCK();
3505
3506 /*
3507 * Set up the paging segment
3508 */
3509 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
3510 if (ps == PAGING_SEGMENT_NULL) {
3511 BS_UNLOCK(bs);
3512 return KERN_RESOURCE_SHORTAGE;
3513 }
3514
3515 ps->ps_segtype = PS_FILE;
3516 ps->ps_vnode = (struct vnode *)vp;
3517 ps->ps_offset = 0;
3518 ps->ps_record_shift = local_log2(vm_page_size / record_size);
3519 ps->ps_recnum = size;
3520 ps->ps_pgnum = size >> ps->ps_record_shift;
3521
3522 ps->ps_pgcount = ps->ps_pgnum;
3523 ps->ps_clshift = local_log2(bs->bs_clsize);
3524 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
3525 ps->ps_hint = 0;
3526
3527 PS_LOCK_INIT(ps);
3528 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
3529 if (!ps->ps_bmap) {
3530 kfree((vm_offset_t)ps, sizeof *ps);
3531 BS_UNLOCK(bs);
3532 return KERN_RESOURCE_SHORTAGE;
3533 }
3534 for (i = 0; i < ps->ps_ncls; i++) {
3535 clrbit(ps->ps_bmap, i);
3536 }
3537
3538 ps->ps_going_away = FALSE;
3539 ps->ps_bs = bs;
3540
3541 if ((error = ps_enter(ps)) != 0) {
3542 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
3543 kfree((vm_offset_t)ps, sizeof *ps);
3544 BS_UNLOCK(bs);
3545 return KERN_RESOURCE_SHORTAGE;
3546 }
3547
3548 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
3549 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
3550 PSL_LOCK();
3551 dp_pages_free += ps->ps_pgcount;
3552 PSL_UNLOCK();
3553
3554 BS_UNLOCK(bs);
3555
3556 bs_more_space(ps->ps_clcount);
3557
3558 DEBUG(DEBUG_BS_INTERNAL,
3559 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3560 device, offset, size, record_size,
3561 ps->ps_record_shift, ps->ps_pgnum));
3562
3563 return KERN_SUCCESS;
3564 }
3565
3566
3567
3568 kern_return_t ps_read_file(paging_segment_t, upl_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
3569
3570 kern_return_t
3571 ps_read_file(
3572 paging_segment_t ps,
3573 upl_t upl,
3574 vm_offset_t offset,
3575 unsigned int size,
3576 unsigned int *residualp,
3577 int flags)
3578 {
3579 vm_object_offset_t f_offset;
3580 int error = 0;
3581 int result;
3582 static char here[] = "ps_read_file";
3583
3584
3585 clustered_reads[atop(size)]++;
3586
3587 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3588
3589 /* for transfer case we need to pass uploffset and flags */
3590 error = vnode_pagein(ps->ps_vnode,
3591 upl, (vm_offset_t)0, f_offset, (vm_size_t)size, flags, NULL);
3592
3593 /* The vnode_pagein semantic is somewhat at odds with the existing */
3594 /* device_read semantic. Partial reads are not experienced at this */
3595 /* level. It is up to the bit map code and cluster read code to */
3596 /* check that requested data locations are actually backed, and the */
3597 /* pagein code to either read all of the requested data or return an */
3598 /* error. */
3599
3600 if (error)
3601 result = KERN_FAILURE;
3602 else {
3603 *residualp = 0;
3604 result = KERN_SUCCESS;
3605 }
3606 return result;
3607
3608 }
3609
3610 kern_return_t
3611 ps_write_file(
3612 paging_segment_t ps,
3613 upl_t upl,
3614 vm_offset_t upl_offset,
3615 vm_offset_t offset,
3616 unsigned int size,
3617 int flags)
3618 {
3619 vm_object_offset_t f_offset;
3620 kern_return_t result;
3621 static char here[] = "ps_write_file";
3622
3623 int error = 0;
3624
3625 clustered_writes[atop(size)]++;
3626 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3627
3628 if (vnode_pageout(ps->ps_vnode,
3629 upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
3630 result = KERN_FAILURE;
3631 else
3632 result = KERN_SUCCESS;
3633
3634 return result;
3635 }
3636
3637 kern_return_t
3638 default_pager_triggers(MACH_PORT_FACE default_pager,
3639 int hi_wat,
3640 int lo_wat,
3641 int flags,
3642 MACH_PORT_FACE trigger_port)
3643 {
3644
3645 if(flags & HI_WAT_ALERT) {
3646 if(min_pages_trigger_port)
3647 ipc_port_release_send(min_pages_trigger_port);
3648 min_pages_trigger_port = trigger_port;
3649 minimum_pages_remaining = hi_wat/vm_page_size;
3650 bs_low = FALSE;
3651 }
3652 if(flags & LO_WAT_ALERT) {
3653 if(max_pages_trigger_port)
3654 ipc_port_release_send(max_pages_trigger_port);
3655 max_pages_trigger_port = trigger_port;
3656 maximum_pages_free = lo_wat/vm_page_size;
3657 }
3658 }