]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_backing_store.c
xnu-792.18.15.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_backing_store.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
8f6c56a5 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
8f6c56a5
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
8ad349bb 24 * limitations under the License.
8f6c56a5
A
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57/*
58 * Default Pager.
59 * Paging File Management.
60 */
61
91447636 62#include <mach/host_priv.h>
0b4e3aa0 63#include <mach/memory_object_control.h>
1c79356b 64#include <mach/memory_object_server.h>
91447636
A
65#include <mach/upl.h>
66#include <default_pager/default_pager_internal.h>
1c79356b 67#include <default_pager/default_pager_alerts.h>
91447636
A
68#include <default_pager/default_pager_object_server.h>
69
70#include <ipc/ipc_types.h>
1c79356b
A
71#include <ipc/ipc_port.h>
72#include <ipc/ipc_space.h>
91447636
A
73
74#include <kern/kern_types.h>
75#include <kern/host.h>
1c79356b
A
76#include <kern/queue.h>
77#include <kern/counters.h>
78#include <kern/sched_prim.h>
91447636 79
1c79356b
A
80#include <vm/vm_kern.h>
81#include <vm/vm_pageout.h>
1c79356b 82#include <vm/vm_map.h>
91447636
A
83#include <vm/vm_object.h>
84#include <vm/vm_protos.h>
85
86/* LP64todo - need large internal object support */
1c79356b 87
0b4e3aa0
A
88/*
89 * ALLOC_STRIDE... the maximum number of bytes allocated from
90 * a swap file before moving on to the next swap file... if
91 * all swap files reside on a single disk, this value should
92 * be very large (this is the default assumption)... if the
93 * swap files are spread across multiple disks, than this value
94 * should be small (128 * 1024)...
95 *
96 * This should be determined dynamically in the future
97 */
1c79356b 98
0b4e3aa0 99#define ALLOC_STRIDE (1024 * 1024 * 1024)
1c79356b
A
100int physical_transfer_cluster_count = 0;
101
9bccf70c
A
102#define VM_SUPER_CLUSTER 0x40000
103#define VM_SUPER_PAGES 64
1c79356b
A
104
105/*
106 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
107 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
108 */
109#define VSTRUCT_DEF_CLSHIFT 2
110int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
111int default_pager_clsize = 0;
112
113/* statistics */
0b4e3aa0
A
114unsigned int clustered_writes[VM_SUPER_PAGES+1];
115unsigned int clustered_reads[VM_SUPER_PAGES+1];
1c79356b
A
116
117/*
118 * Globals used for asynchronous paging operations:
119 * vs_async_list: head of list of to-be-completed I/O ops
120 * async_num_queued: number of pages completed, but not yet
121 * processed by async thread.
122 * async_requests_out: number of pages of requests not completed.
123 */
124
125#if 0
126struct vs_async *vs_async_list;
127int async_num_queued;
128int async_requests_out;
129#endif
130
131
132#define VS_ASYNC_REUSE 1
133struct vs_async *vs_async_free_list;
134
135mutex_t default_pager_async_lock; /* Protects globals above */
136
137
138int vs_alloc_async_failed = 0; /* statistics */
139int vs_alloc_async_count = 0; /* statistics */
140struct vs_async *vs_alloc_async(void); /* forward */
141void vs_free_async(struct vs_async *vsa); /* forward */
142
143
144#define VS_ALLOC_ASYNC() vs_alloc_async()
145#define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
146
147#define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
148#define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
91447636 149#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0)
1c79356b
A
150#define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
151/*
152 * Paging Space Hysteresis triggers and the target notification port
153 *
154 */
155
156unsigned int minimum_pages_remaining = 0;
157unsigned int maximum_pages_free = 0;
158ipc_port_t min_pages_trigger_port = NULL;
159ipc_port_t max_pages_trigger_port = NULL;
160
161boolean_t bs_low = FALSE;
0b4e3aa0 162int backing_store_release_trigger_disable = 0;
91447636
A
163
164
165/* Have we decided if swap needs to be encrypted yet ? */
166boolean_t dp_encryption_inited = FALSE;
167/* Should we encrypt swap ? */
168boolean_t dp_encryption = FALSE;
1c79356b
A
169
170
171/*
172 * Object sizes are rounded up to the next power of 2,
173 * unless they are bigger than a given maximum size.
174 */
175vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */
176
177/*
178 * List of all backing store and segments.
179 */
180struct backing_store_list_head backing_store_list;
181paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS];
182mutex_t paging_segments_lock;
183int paging_segment_max = 0;
184int paging_segment_count = 0;
185int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
186
187
188/*
189 * Total pages free in system
190 * This differs from clusters committed/avail which is a measure of the
191 * over commitment of paging segments to backing store. An idea which is
192 * likely to be deprecated.
193 */
194unsigned int dp_pages_free = 0;
195unsigned int cluster_transfer_minimum = 100;
196
91447636
A
197/* forward declarations */
198kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, int); /* forward */
199kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
200default_pager_thread_t *get_read_buffer( void );
201kern_return_t ps_vstruct_transfer_from_segment(
202 vstruct_t vs,
203 paging_segment_t segment,
204 upl_t upl);
205kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
206kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */
207kern_return_t vs_cluster_transfer(
208 vstruct_t vs,
209 upl_offset_t offset,
210 upl_size_t cnt,
211 upl_t upl);
212vs_map_t vs_get_map_entry(
213 vstruct_t vs,
214 vm_offset_t offset);
0b4e3aa0 215
1c79356b
A
216
217default_pager_thread_t *
91447636 218get_read_buffer( void )
1c79356b
A
219{
220 int i;
221
222 DPT_LOCK(dpt_lock);
223 while(TRUE) {
224 for (i=0; i<default_pager_internal_count; i++) {
225 if(dpt_array[i]->checked_out == FALSE) {
226 dpt_array[i]->checked_out = TRUE;
227 DPT_UNLOCK(dpt_lock);
228 return dpt_array[i];
229 }
230 }
9bccf70c 231 DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
1c79356b
A
232 }
233}
234
235void
236bs_initialize(void)
237{
238 int i;
239
240 /*
241 * List of all backing store.
242 */
243 BSL_LOCK_INIT();
244 queue_init(&backing_store_list.bsl_queue);
245 PSL_LOCK_INIT();
246
247 VS_ASYNC_LOCK_INIT();
248#if VS_ASYNC_REUSE
249 vs_async_free_list = NULL;
250#endif /* VS_ASYNC_REUSE */
251
0b4e3aa0 252 for (i = 0; i < VM_SUPER_PAGES + 1; i++) {
1c79356b
A
253 clustered_writes[i] = 0;
254 clustered_reads[i] = 0;
255 }
256
257}
258
259/*
260 * When things do not quite workout...
261 */
262void bs_no_paging_space(boolean_t); /* forward */
263
264void
265bs_no_paging_space(
266 boolean_t out_of_memory)
267{
1c79356b
A
268
269 if (out_of_memory)
270 dprintf(("*** OUT OF MEMORY ***\n"));
271 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
272}
273
274void bs_more_space(int); /* forward */
275void bs_commit(int); /* forward */
276
277boolean_t user_warned = FALSE;
278unsigned int clusters_committed = 0;
279unsigned int clusters_available = 0;
280unsigned int clusters_committed_peak = 0;
281
282void
283bs_more_space(
284 int nclusters)
285{
286 BSL_LOCK();
287 /*
288 * Account for new paging space.
289 */
290 clusters_available += nclusters;
291
292 if (clusters_available >= clusters_committed) {
293 if (verbose && user_warned) {
294 printf("%s%s - %d excess clusters now.\n",
295 my_name,
296 "paging space is OK now",
297 clusters_available - clusters_committed);
298 user_warned = FALSE;
299 clusters_committed_peak = 0;
300 }
301 } else {
302 if (verbose && user_warned) {
303 printf("%s%s - still short of %d clusters.\n",
304 my_name,
305 "WARNING: paging space over-committed",
306 clusters_committed - clusters_available);
307 clusters_committed_peak -= nclusters;
308 }
309 }
310 BSL_UNLOCK();
311
312 return;
313}
314
315void
316bs_commit(
317 int nclusters)
318{
319 BSL_LOCK();
320 clusters_committed += nclusters;
321 if (clusters_committed > clusters_available) {
322 if (verbose && !user_warned) {
323 user_warned = TRUE;
324 printf("%s%s - short of %d clusters.\n",
325 my_name,
326 "WARNING: paging space over-committed",
327 clusters_committed - clusters_available);
328 }
329 if (clusters_committed > clusters_committed_peak) {
330 clusters_committed_peak = clusters_committed;
331 }
332 } else {
333 if (verbose && user_warned) {
334 printf("%s%s - was short of up to %d clusters.\n",
335 my_name,
336 "paging space is OK now",
337 clusters_committed_peak - clusters_available);
338 user_warned = FALSE;
339 clusters_committed_peak = 0;
340 }
341 }
342 BSL_UNLOCK();
343
344 return;
345}
346
347int default_pager_info_verbose = 1;
348
349void
350bs_global_info(
351 vm_size_t *totalp,
352 vm_size_t *freep)
353{
354 vm_size_t pages_total, pages_free;
355 paging_segment_t ps;
356 int i;
1c79356b
A
357
358 PSL_LOCK();
359 pages_total = pages_free = 0;
360 for (i = 0; i <= paging_segment_max; i++) {
361 ps = paging_segments[i];
362 if (ps == PAGING_SEGMENT_NULL)
363 continue;
364
365 /*
366 * no need to lock: by the time this data
367 * gets back to any remote requestor it
368 * will be obsolete anyways
369 */
370 pages_total += ps->ps_pgnum;
371 pages_free += ps->ps_clcount << ps->ps_clshift;
91447636
A
372 DP_DEBUG(DEBUG_BS_INTERNAL,
373 ("segment #%d: %d total, %d free\n",
374 i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
1c79356b
A
375 }
376 *totalp = pages_total;
377 *freep = pages_free;
378 if (verbose && user_warned && default_pager_info_verbose) {
379 if (clusters_available < clusters_committed) {
380 printf("%s %d clusters committed, %d available.\n",
381 my_name,
382 clusters_committed,
383 clusters_available);
384 }
385 }
386 PSL_UNLOCK();
387}
388
389backing_store_t backing_store_alloc(void); /* forward */
390
391backing_store_t
392backing_store_alloc(void)
393{
394 backing_store_t bs;
1c79356b
A
395
396 bs = (backing_store_t) kalloc(sizeof (struct backing_store));
397 if (bs == BACKING_STORE_NULL)
398 panic("backing_store_alloc: no memory");
399
400 BS_LOCK_INIT(bs);
401 bs->bs_port = MACH_PORT_NULL;
402 bs->bs_priority = 0;
403 bs->bs_clsize = 0;
404 bs->bs_pages_total = 0;
405 bs->bs_pages_in = 0;
406 bs->bs_pages_in_fail = 0;
407 bs->bs_pages_out = 0;
408 bs->bs_pages_out_fail = 0;
409
410 return bs;
411}
412
413backing_store_t backing_store_lookup(MACH_PORT_FACE); /* forward */
414
415/* Even in both the component space and external versions of this pager, */
416/* backing_store_lookup will be called from tasks in the application space */
417backing_store_t
418backing_store_lookup(
419 MACH_PORT_FACE port)
420{
421 backing_store_t bs;
422
423/*
424 port is currently backed with a vs structure in the alias field
425 we could create an ISBS alias and a port_is_bs call but frankly
426 I see no reason for the test, the bs->port == port check below
427 will work properly on junk entries.
428
429 if ((port == MACH_PORT_NULL) || port_is_vs(port))
430*/
431 if ((port == MACH_PORT_NULL))
432 return BACKING_STORE_NULL;
433
434 BSL_LOCK();
435 queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t,
436 bs_links) {
437 BS_LOCK(bs);
438 if (bs->bs_port == port) {
439 BSL_UNLOCK();
440 /* Success, return it locked. */
441 return bs;
442 }
443 BS_UNLOCK(bs);
444 }
445 BSL_UNLOCK();
446 return BACKING_STORE_NULL;
447}
448
449void backing_store_add(backing_store_t); /* forward */
450
451void
452backing_store_add(
91447636 453 __unused backing_store_t bs)
1c79356b 454{
91447636
A
455// MACH_PORT_FACE port = bs->bs_port;
456// MACH_PORT_FACE pset = default_pager_default_set;
1c79356b 457 kern_return_t kr = KERN_SUCCESS;
1c79356b
A
458
459 if (kr != KERN_SUCCESS)
460 panic("backing_store_add: add to set");
461
462}
463
464/*
465 * Set up default page shift, but only if not already
466 * set and argument is within range.
467 */
468boolean_t
469bs_set_default_clsize(unsigned int npages)
470{
471 switch(npages){
472 case 1:
473 case 2:
474 case 4:
475 case 8:
476 if (default_pager_clsize == 0) /* if not yet set */
477 vstruct_def_clshift = local_log2(npages);
478 return(TRUE);
479 }
480 return(FALSE);
481}
482
483int bs_get_global_clsize(int clsize); /* forward */
484
485int
486bs_get_global_clsize(
487 int clsize)
488{
489 int i;
0b4e3aa0 490 memory_object_default_t dmm;
1c79356b 491 kern_return_t kr;
1c79356b
A
492
493 /*
494 * Only allow setting of cluster size once. If called
495 * with no cluster size (default), we use the compiled-in default
496 * for the duration. The same cluster size is used for all
497 * paging segments.
498 */
499 if (default_pager_clsize == 0) {
1c79356b
A
500 /*
501 * Keep cluster size in bit shift because it's quicker
502 * arithmetic, and easier to keep at a power of 2.
503 */
504 if (clsize != NO_CLSIZE) {
505 for (i = 0; (1 << i) < clsize; i++);
506 if (i > MAX_CLUSTER_SHIFT)
507 i = MAX_CLUSTER_SHIFT;
508 vstruct_def_clshift = i;
509 }
510 default_pager_clsize = (1 << vstruct_def_clshift);
511
512 /*
513 * Let the user know the new (and definitive) cluster size.
514 */
515 if (verbose)
516 printf("%scluster size = %d page%s\n",
517 my_name, default_pager_clsize,
518 (default_pager_clsize == 1) ? "" : "s");
0b4e3aa0 519
1c79356b
A
520 /*
521 * Let the kernel know too, in case it hasn't used the
522 * default value provided in main() yet.
523 */
0b4e3aa0 524 dmm = default_pager_object;
1c79356b
A
525 clsize = default_pager_clsize * vm_page_size; /* in bytes */
526 kr = host_default_memory_manager(host_priv_self(),
0b4e3aa0 527 &dmm,
1c79356b 528 clsize);
0b4e3aa0
A
529 memory_object_default_deallocate(dmm);
530
1c79356b
A
531 if (kr != KERN_SUCCESS) {
532 panic("bs_get_global_cl_size:host_default_memory_manager");
533 }
0b4e3aa0 534 if (dmm != default_pager_object) {
1c79356b
A
535 panic("bs_get_global_cl_size:there is another default pager");
536 }
537 }
538 ASSERT(default_pager_clsize > 0 &&
539 (default_pager_clsize & (default_pager_clsize - 1)) == 0);
540
541 return default_pager_clsize;
542}
543
544kern_return_t
545default_pager_backing_store_create(
0b4e3aa0
A
546 memory_object_default_t pager,
547 int priority,
548 int clsize, /* in bytes */
549 MACH_PORT_FACE *backing_store)
1c79356b
A
550{
551 backing_store_t bs;
552 MACH_PORT_FACE port;
91447636 553// kern_return_t kr;
1c79356b 554 struct vstruct_alias *alias_struct;
1c79356b 555
0b4e3aa0 556 if (pager != default_pager_object)
1c79356b
A
557 return KERN_INVALID_ARGUMENT;
558
559 bs = backing_store_alloc();
560 port = ipc_port_alloc_kernel();
561 ipc_port_make_send(port);
562 assert (port != IP_NULL);
563
91447636
A
564 DP_DEBUG(DEBUG_BS_EXTERNAL,
565 ("priority=%d clsize=%d bs_port=0x%x\n",
566 priority, clsize, (int) backing_store));
1c79356b
A
567
568 alias_struct = (struct vstruct_alias *)
569 kalloc(sizeof (struct vstruct_alias));
570 if(alias_struct != NULL) {
571 alias_struct->vs = (struct vstruct *)bs;
89b3af67 572 alias_struct->name = &default_pager_ops;
1c79356b
A
573 port->alias = (int) alias_struct;
574 }
575 else {
576 ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
91447636 577 kfree(bs, sizeof (struct backing_store));
1c79356b
A
578 return KERN_RESOURCE_SHORTAGE;
579 }
580
581 bs->bs_port = port;
582 if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI)
583 priority = BS_MAXPRI;
584 else if (priority == BS_NOPRI)
585 priority = BS_MAXPRI;
586 else
587 priority = BS_MINPRI;
588 bs->bs_priority = priority;
589
55e303ae 590 bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
1c79356b
A
591
592 BSL_LOCK();
593 queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
594 bs_links);
595 BSL_UNLOCK();
596
597 backing_store_add(bs);
598
599 *backing_store = port;
600 return KERN_SUCCESS;
601}
602
603kern_return_t
604default_pager_backing_store_info(
605 MACH_PORT_FACE backing_store,
606 backing_store_flavor_t flavour,
607 backing_store_info_t info,
608 mach_msg_type_number_t *size)
609{
610 backing_store_t bs;
611 backing_store_basic_info_t basic;
612 int i;
613 paging_segment_t ps;
614
615 if (flavour != BACKING_STORE_BASIC_INFO ||
616 *size < BACKING_STORE_BASIC_INFO_COUNT)
617 return KERN_INVALID_ARGUMENT;
618
619 basic = (backing_store_basic_info_t)info;
620 *size = BACKING_STORE_BASIC_INFO_COUNT;
621
622 VSTATS_LOCK(&global_stats.gs_lock);
623 basic->pageout_calls = global_stats.gs_pageout_calls;
624 basic->pagein_calls = global_stats.gs_pagein_calls;
625 basic->pages_in = global_stats.gs_pages_in;
626 basic->pages_out = global_stats.gs_pages_out;
627 basic->pages_unavail = global_stats.gs_pages_unavail;
628 basic->pages_init = global_stats.gs_pages_init;
629 basic->pages_init_writes= global_stats.gs_pages_init_writes;
630 VSTATS_UNLOCK(&global_stats.gs_lock);
631
632 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
633 return KERN_INVALID_ARGUMENT;
634
635 basic->bs_pages_total = bs->bs_pages_total;
636 PSL_LOCK();
637 bs->bs_pages_free = 0;
638 for (i = 0; i <= paging_segment_max; i++) {
639 ps = paging_segments[i];
640 if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) {
641 PS_LOCK(ps);
642 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
643 PS_UNLOCK(ps);
644 }
645 }
646 PSL_UNLOCK();
647 basic->bs_pages_free = bs->bs_pages_free;
648 basic->bs_pages_in = bs->bs_pages_in;
649 basic->bs_pages_in_fail = bs->bs_pages_in_fail;
650 basic->bs_pages_out = bs->bs_pages_out;
651 basic->bs_pages_out_fail= bs->bs_pages_out_fail;
652
653 basic->bs_priority = bs->bs_priority;
55e303ae 654 basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */
1c79356b
A
655
656 BS_UNLOCK(bs);
657
658 return KERN_SUCCESS;
659}
660
661int ps_delete(paging_segment_t); /* forward */
662
663int
664ps_delete(
665 paging_segment_t ps)
666{
667 vstruct_t vs;
668 kern_return_t error = KERN_SUCCESS;
669 int vs_count;
670
671 VSL_LOCK(); /* get the lock on the list of vs's */
672
673 /* The lock relationship and sequence is farily complicated */
674 /* this code looks at a live list, locking and unlocking the list */
675 /* as it traverses it. It depends on the locking behavior of */
676 /* default_pager_no_senders. no_senders always locks the vstruct */
677 /* targeted for removal before locking the vstruct list. However */
678 /* it will remove that member of the list without locking its */
679 /* neighbors. We can be sure when we hold a lock on a vstruct */
680 /* it cannot be removed from the list but we must hold the list */
681 /* lock to be sure that its pointers to its neighbors are valid. */
682 /* Also, we can hold off destruction of a vstruct when the list */
683 /* lock and the vs locks are not being held by bumping the */
684 /* vs_async_pending count. */
685
0b4e3aa0
A
686
687 while(backing_store_release_trigger_disable != 0) {
9bccf70c 688 VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
0b4e3aa0
A
689 }
690
1c79356b
A
691 /* we will choose instead to hold a send right */
692 vs_count = vstruct_list.vsl_count;
693 vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
694 if(vs == (vstruct_t)&vstruct_list) {
695 VSL_UNLOCK();
696 return KERN_SUCCESS;
697 }
698 VS_LOCK(vs);
699 vs_async_wait(vs); /* wait for any pending async writes */
700 if ((vs_count != 0) && (vs != NULL))
701 vs->vs_async_pending += 1; /* hold parties calling */
702 /* vs_async_wait */
703 VS_UNLOCK(vs);
704 VSL_UNLOCK();
705 while((vs_count != 0) && (vs != NULL)) {
706 /* We take the count of AMO's before beginning the */
707 /* transfer of of the target segment. */
708 /* We are guaranteed that the target segment cannot get */
709 /* more users. We also know that queue entries are */
710 /* made at the back of the list. If some of the entries */
711 /* we would check disappear while we are traversing the */
712 /* list then we will either check new entries which */
713 /* do not have any backing store in the target segment */
714 /* or re-check old entries. This might not be optimal */
715 /* but it will always be correct. The alternative is to */
716 /* take a snapshot of the list. */
717 vstruct_t next_vs;
718
719 if(dp_pages_free < cluster_transfer_minimum)
720 error = KERN_FAILURE;
721 else {
722 vm_object_t transfer_object;
89b3af67 723 unsigned int count;
1c79356b
A
724 upl_t upl;
725
91447636 726 transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
0b4e3aa0
A
727 count = 0;
728 error = vm_object_upl_request(transfer_object,
729 (vm_object_offset_t)0, VM_SUPER_CLUSTER,
730 &upl, NULL, &count,
731 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
732 | UPL_SET_INTERNAL);
1c79356b 733 if(error == KERN_SUCCESS) {
1c79356b
A
734 error = ps_vstruct_transfer_from_segment(
735 vs, ps, upl);
91447636 736 upl_commit(upl, NULL, 0);
0b4e3aa0 737 upl_deallocate(upl);
1c79356b 738 } else {
1c79356b
A
739 error = KERN_FAILURE;
740 }
9bccf70c 741 vm_object_deallocate(transfer_object);
1c79356b
A
742 }
743 if(error) {
744 VS_LOCK(vs);
745 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
746 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
747 vs->vs_waiting_async = FALSE;
1c79356b 748 VS_UNLOCK(vs);
0b4e3aa0 749 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
750 } else {
751 VS_UNLOCK(vs);
752 }
753 return KERN_FAILURE;
754 }
755
756 VSL_LOCK();
0b4e3aa0
A
757
758 while(backing_store_release_trigger_disable != 0) {
9bccf70c
A
759 VSL_SLEEP(&backing_store_release_trigger_disable,
760 THREAD_UNINT);
0b4e3aa0
A
761 }
762
1c79356b
A
763 next_vs = (vstruct_t) queue_next(&(vs->vs_links));
764 if((next_vs != (vstruct_t)&vstruct_list) &&
765 (vs != next_vs) && (vs_count != 1)) {
766 VS_LOCK(next_vs);
767 vs_async_wait(next_vs); /* wait for any */
768 /* pending async writes */
769 next_vs->vs_async_pending += 1; /* hold parties */
770 /* calling vs_async_wait */
771 VS_UNLOCK(next_vs);
772 }
773 VSL_UNLOCK();
774 VS_LOCK(vs);
775 vs->vs_async_pending -= 1;
0b4e3aa0
A
776 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
777 vs->vs_waiting_async = FALSE;
1c79356b 778 VS_UNLOCK(vs);
0b4e3aa0 779 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
780 } else {
781 VS_UNLOCK(vs);
782 }
783 if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list))
784 vs = NULL;
785 else
786 vs = next_vs;
787 vs_count--;
788 }
789 return KERN_SUCCESS;
790}
791
792
793kern_return_t
794default_pager_backing_store_delete(
795 MACH_PORT_FACE backing_store)
796{
797 backing_store_t bs;
798 int i;
799 paging_segment_t ps;
800 int error;
801 int interim_pages_removed = 0;
91447636 802// kern_return_t kr;
1c79356b
A
803
804 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
805 return KERN_INVALID_ARGUMENT;
806
807#if 0
808 /* not implemented */
809 BS_UNLOCK(bs);
810 return KERN_FAILURE;
811#endif
812
813 restart:
814 PSL_LOCK();
815 error = KERN_SUCCESS;
816 for (i = 0; i <= paging_segment_max; i++) {
817 ps = paging_segments[i];
818 if (ps != PAGING_SEGMENT_NULL &&
819 ps->ps_bs == bs &&
820 ! ps->ps_going_away) {
821 PS_LOCK(ps);
822 /* disable access to this segment */
823 ps->ps_going_away = TRUE;
824 PS_UNLOCK(ps);
825 /*
826 * The "ps" segment is "off-line" now,
827 * we can try and delete it...
828 */
829 if(dp_pages_free < (cluster_transfer_minimum
830 + ps->ps_pgcount)) {
831 error = KERN_FAILURE;
832 PSL_UNLOCK();
833 }
834 else {
835 /* remove all pages associated with the */
836 /* segment from the list of free pages */
837 /* when transfer is through, all target */
838 /* segment pages will appear to be free */
839
840 dp_pages_free -= ps->ps_pgcount;
841 interim_pages_removed += ps->ps_pgcount;
842 PSL_UNLOCK();
843 error = ps_delete(ps);
844 }
845 if (error != KERN_SUCCESS) {
846 /*
847 * We couldn't delete the segment,
848 * probably because there's not enough
849 * virtual memory left.
850 * Re-enable all the segments.
851 */
852 PSL_LOCK();
853 break;
854 }
855 goto restart;
856 }
857 }
858
859 if (error != KERN_SUCCESS) {
860 for (i = 0; i <= paging_segment_max; i++) {
861 ps = paging_segments[i];
862 if (ps != PAGING_SEGMENT_NULL &&
863 ps->ps_bs == bs &&
864 ps->ps_going_away) {
865 PS_LOCK(ps);
866 /* re-enable access to this segment */
867 ps->ps_going_away = FALSE;
868 PS_UNLOCK(ps);
869 }
870 }
871 dp_pages_free += interim_pages_removed;
872 PSL_UNLOCK();
873 BS_UNLOCK(bs);
874 return error;
875 }
876
877 for (i = 0; i <= paging_segment_max; i++) {
878 ps = paging_segments[i];
879 if (ps != PAGING_SEGMENT_NULL &&
880 ps->ps_bs == bs) {
881 if(ps->ps_going_away) {
882 paging_segments[i] = PAGING_SEGMENT_NULL;
883 paging_segment_count--;
884 PS_LOCK(ps);
91447636
A
885 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
886 kfree(ps, sizeof *ps);
1c79356b
A
887 }
888 }
889 }
890
891 /* Scan the entire ps array separately to make certain we find the */
892 /* proper paging_segment_max */
893 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
894 if(paging_segments[i] != PAGING_SEGMENT_NULL)
895 paging_segment_max = i;
896 }
897
898 PSL_UNLOCK();
899
900 /*
901 * All the segments have been deleted.
902 * We can remove the backing store.
903 */
904
905 /*
906 * Disable lookups of this backing store.
907 */
908 if((void *)bs->bs_port->alias != NULL)
91447636
A
909 kfree((void *) bs->bs_port->alias,
910 sizeof (struct vstruct_alias));
1c79356b
A
911 ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
912 bs->bs_port = MACH_PORT_NULL;
913 BS_UNLOCK(bs);
914
915 /*
916 * Remove backing store from backing_store list.
917 */
918 BSL_LOCK();
919 queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t,
920 bs_links);
921 BSL_UNLOCK();
922
923 /*
924 * Free the backing store structure.
925 */
91447636 926 kfree(bs, sizeof *bs);
1c79356b
A
927
928 return KERN_SUCCESS;
929}
930
931int ps_enter(paging_segment_t); /* forward */
932
933int
934ps_enter(
935 paging_segment_t ps)
936{
937 int i;
938
939 PSL_LOCK();
940
941 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
942 if (paging_segments[i] == PAGING_SEGMENT_NULL)
943 break;
944 }
945
946 if (i < MAX_NUM_PAGING_SEGMENTS) {
947 paging_segments[i] = ps;
948 if (i > paging_segment_max)
949 paging_segment_max = i;
950 paging_segment_count++;
951 if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) ||
952 (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI))
953 ps_select_array[ps->ps_bs->bs_priority] = 0;
954 i = 0;
955 } else {
956 PSL_UNLOCK();
957 return KERN_RESOURCE_SHORTAGE;
958 }
959
960 PSL_UNLOCK();
961 return i;
962}
963
964#ifdef DEVICE_PAGING
965kern_return_t
966default_pager_add_segment(
967 MACH_PORT_FACE backing_store,
968 MACH_PORT_FACE device,
969 recnum_t offset,
970 recnum_t count,
971 int record_size)
972{
973 backing_store_t bs;
974 paging_segment_t ps;
975 int i;
976 int error;
1c79356b
A
977
978 if ((bs = backing_store_lookup(backing_store))
979 == BACKING_STORE_NULL)
980 return KERN_INVALID_ARGUMENT;
981
982 PSL_LOCK();
983 for (i = 0; i <= paging_segment_max; i++) {
984 ps = paging_segments[i];
985 if (ps == PAGING_SEGMENT_NULL)
986 continue;
987
988 /*
989 * Check for overlap on same device.
990 */
991 if (!(ps->ps_device != device
992 || offset >= ps->ps_offset + ps->ps_recnum
993 || offset + count <= ps->ps_offset)) {
994 PSL_UNLOCK();
995 BS_UNLOCK(bs);
996 return KERN_INVALID_ARGUMENT;
997 }
998 }
999 PSL_UNLOCK();
1000
1001 /*
1002 * Set up the paging segment
1003 */
1004 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
1005 if (ps == PAGING_SEGMENT_NULL) {
1006 BS_UNLOCK(bs);
1007 return KERN_RESOURCE_SHORTAGE;
1008 }
1009
1010 ps->ps_segtype = PS_PARTITION;
1011 ps->ps_device = device;
1012 ps->ps_offset = offset;
1013 ps->ps_record_shift = local_log2(vm_page_size / record_size);
1014 ps->ps_recnum = count;
1015 ps->ps_pgnum = count >> ps->ps_record_shift;
1016
1017 ps->ps_pgcount = ps->ps_pgnum;
1018 ps->ps_clshift = local_log2(bs->bs_clsize);
1019 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
1020 ps->ps_hint = 0;
1021
1022 PS_LOCK_INIT(ps);
1023 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
1024 if (!ps->ps_bmap) {
91447636 1025 kfree(ps, sizeof *ps);
1c79356b
A
1026 BS_UNLOCK(bs);
1027 return KERN_RESOURCE_SHORTAGE;
1028 }
1029 for (i = 0; i < ps->ps_ncls; i++) {
1030 clrbit(ps->ps_bmap, i);
1031 }
1032
1033 ps->ps_going_away = FALSE;
1034 ps->ps_bs = bs;
1035
1036 if ((error = ps_enter(ps)) != 0) {
91447636
A
1037 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
1038 kfree(ps, sizeof *ps);
1c79356b
A
1039 BS_UNLOCK(bs);
1040 return KERN_RESOURCE_SHORTAGE;
1041 }
1042
1043 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
1044 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
1045 BS_UNLOCK(bs);
1046
1047 PSL_LOCK();
1048 dp_pages_free += ps->ps_pgcount;
1049 PSL_UNLOCK();
1050
1051 bs_more_space(ps->ps_clcount);
1052
91447636
A
1053 DP_DEBUG(DEBUG_BS_INTERNAL,
1054 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1055 device, offset, count, record_size,
1056 ps->ps_record_shift, ps->ps_pgnum));
1c79356b
A
1057
1058 return KERN_SUCCESS;
1059}
1060
1061boolean_t
1062bs_add_device(
1063 char *dev_name,
1064 MACH_PORT_FACE master)
1065{
1066 security_token_t null_security_token = {
1067 { 0, 0 }
1068 };
1069 MACH_PORT_FACE device;
1070 int info[DEV_GET_SIZE_COUNT];
1071 mach_msg_type_number_t info_count;
1072 MACH_PORT_FACE bs = MACH_PORT_NULL;
1073 unsigned int rec_size;
1074 recnum_t count;
1075 int clsize;
1076 MACH_PORT_FACE reply_port;
1077
1078 if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE,
1079 null_security_token, dev_name, &device))
1080 return FALSE;
1081
1082 info_count = DEV_GET_SIZE_COUNT;
1083 if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) {
1084 rec_size = info[DEV_GET_SIZE_RECORD_SIZE];
1085 count = info[DEV_GET_SIZE_DEVICE_SIZE] / rec_size;
1086 clsize = bs_get_global_clsize(0);
1087 if (!default_pager_backing_store_create(
0b4e3aa0 1088 default_pager_object,
1c79356b
A
1089 DEFAULT_PAGER_BACKING_STORE_MAXPRI,
1090 (clsize * vm_page_size),
1091 &bs)) {
1092 if (!default_pager_add_segment(bs, device,
1093 0, count, rec_size)) {
1094 return TRUE;
1095 }
1096 ipc_port_release_receive(bs);
1097 }
1098 }
1099
1100 ipc_port_release_send(device);
1101 return FALSE;
1102}
1103#endif /* DEVICE_PAGING */
1104
1105#if VS_ASYNC_REUSE
1106
1107struct vs_async *
1108vs_alloc_async(void)
1109{
1110 struct vs_async *vsa;
1111 MACH_PORT_FACE reply_port;
91447636 1112// kern_return_t kr;
1c79356b
A
1113
1114 VS_ASYNC_LOCK();
1115 if (vs_async_free_list == NULL) {
1116 VS_ASYNC_UNLOCK();
1117 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1118 if (vsa != NULL) {
1119 /*
1120 * Try allocating a reply port named after the
1121 * address of the vs_async structure.
1122 */
1123 struct vstruct_alias *alias_struct;
1124
1125 reply_port = ipc_port_alloc_kernel();
1126 alias_struct = (struct vstruct_alias *)
1127 kalloc(sizeof (struct vstruct_alias));
1128 if(alias_struct != NULL) {
1129 alias_struct->vs = (struct vstruct *)vsa;
89b3af67 1130 alias_struct->name = &default_pager_ops;
1c79356b
A
1131 reply_port->alias = (int) alias_struct;
1132 vsa->reply_port = reply_port;
1133 vs_alloc_async_count++;
1134 }
1135 else {
1136 vs_alloc_async_failed++;
1137 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1138 (reply_port));
91447636 1139 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1140 vsa = NULL;
1141 }
1142 }
1143 } else {
1144 vsa = vs_async_free_list;
1145 vs_async_free_list = vs_async_free_list->vsa_next;
1146 VS_ASYNC_UNLOCK();
1147 }
1148
1149 return vsa;
1150}
1151
1152void
1153vs_free_async(
1154 struct vs_async *vsa)
1155{
1156 VS_ASYNC_LOCK();
1157 vsa->vsa_next = vs_async_free_list;
1158 vs_async_free_list = vsa;
1159 VS_ASYNC_UNLOCK();
1160}
1161
1162#else /* VS_ASYNC_REUSE */
1163
1164struct vs_async *
1165vs_alloc_async(void)
1166{
1167 struct vs_async *vsa;
1168 MACH_PORT_FACE reply_port;
1169 kern_return_t kr;
1170
1171 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1172 if (vsa != NULL) {
1173 /*
1174 * Try allocating a reply port named after the
1175 * address of the vs_async structure.
1176 */
1177 reply_port = ipc_port_alloc_kernel();
1178 alias_struct = (vstruct_alias *)
1179 kalloc(sizeof (struct vstruct_alias));
1180 if(alias_struct != NULL) {
1181 alias_struct->vs = reply_port;
89b3af67 1182 alias_struct->name = &default_pager_ops;
1c79356b
A
1183 reply_port->alias = (int) vsa;
1184 vsa->reply_port = reply_port;
1185 vs_alloc_async_count++;
1186 }
1187 else {
1188 vs_alloc_async_failed++;
1189 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1190 (reply_port));
91447636 1191 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1192 vsa = NULL;
1193 }
1194 }
1195
1196 return vsa;
1197}
1198
1199void
1200vs_free_async(
1201 struct vs_async *vsa)
1202{
1c79356b
A
1203 MACH_PORT_FACE reply_port;
1204 kern_return_t kr;
1205
1206 reply_port = vsa->reply_port;
91447636
A
1207 kfree(reply_port->alias, sizeof (struct vstuct_alias));
1208 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1209 ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
1210#if 0
1211 VS_ASYNC_LOCK();
1212 vs_alloc_async_count--;
1213 VS_ASYNC_UNLOCK();
1214#endif
1215}
1216
1217#endif /* VS_ASYNC_REUSE */
1218
0b4e3aa0
A
1219zone_t vstruct_zone;
1220
1c79356b
A
1221vstruct_t
1222ps_vstruct_create(
1223 vm_size_t size)
1224{
1225 vstruct_t vs;
91447636 1226 unsigned int i;
1c79356b 1227
0b4e3aa0 1228 vs = (vstruct_t) zalloc(vstruct_zone);
1c79356b
A
1229 if (vs == VSTRUCT_NULL) {
1230 return VSTRUCT_NULL;
1231 }
1232
1233 VS_LOCK_INIT(vs);
1234
1235 /*
1236 * The following fields will be provided later.
1237 */
89b3af67 1238 vs->vs_pager_ops = NULL;
0b4e3aa0
A
1239 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1240 vs->vs_references = 1;
1c79356b 1241 vs->vs_seqno = 0;
1c79356b
A
1242
1243#ifdef MACH_KERNEL
1244 vs->vs_waiting_seqno = FALSE;
1245 vs->vs_waiting_read = FALSE;
1246 vs->vs_waiting_write = FALSE;
1c79356b
A
1247 vs->vs_waiting_async = FALSE;
1248#else
91447636
A
1249 mutex_init(&vs->vs_waiting_seqno, 0);
1250 mutex_init(&vs->vs_waiting_read, 0);
1251 mutex_init(&vs->vs_waiting_write, 0);
1252 mutex_init(&vs->vs_waiting_refs, 0);
1253 mutex_init(&vs->vs_waiting_async, 0);
1c79356b
A
1254#endif
1255
1256 vs->vs_readers = 0;
1257 vs->vs_writers = 0;
1258
1259 vs->vs_errors = 0;
1260
1261 vs->vs_clshift = local_log2(bs_get_global_clsize(0));
55e303ae 1262 vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
1c79356b
A
1263 vs->vs_async_pending = 0;
1264
1265 /*
1266 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1267 * depending on the size of the memory object.
1268 */
1269 if (INDIRECT_CLMAP(vs->vs_size)) {
1270 vs->vs_imap = (struct vs_map **)
1271 kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size));
1272 vs->vs_indirect = TRUE;
1273 } else {
1274 vs->vs_dmap = (struct vs_map *)
1275 kalloc(CLMAP_SIZE(vs->vs_size));
1276 vs->vs_indirect = FALSE;
1277 }
1278 vs->vs_xfer_pending = FALSE;
91447636
A
1279 DP_DEBUG(DEBUG_VS_INTERNAL,
1280 ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
1c79356b
A
1281
1282 /*
1283 * Check to see that we got the space.
1284 */
1285 if (!vs->vs_dmap) {
91447636 1286 kfree(vs, sizeof *vs);
1c79356b
A
1287 return VSTRUCT_NULL;
1288 }
1289
1290 /*
1291 * Zero the indirect pointers, or clear the direct pointers.
1292 */
1293 if (vs->vs_indirect)
1294 memset(vs->vs_imap, 0,
1295 INDIRECT_CLMAP_SIZE(vs->vs_size));
1296 else
1297 for (i = 0; i < vs->vs_size; i++)
1298 VSM_CLR(vs->vs_dmap[i]);
1299
1300 VS_MAP_LOCK_INIT(vs);
1301
1302 bs_commit(vs->vs_size);
1303
1304 return vs;
1305}
1306
91447636 1307paging_segment_t ps_select_segment(unsigned int, int *); /* forward */
1c79356b
A
1308
1309paging_segment_t
1310ps_select_segment(
91447636
A
1311 unsigned int shift,
1312 int *psindex)
1c79356b
A
1313{
1314 paging_segment_t ps;
1315 int i;
1316 int j;
1c79356b
A
1317
1318 /*
1319 * Optimize case where there's only one segment.
1320 * paging_segment_max will index the one and only segment.
1321 */
1322
1323 PSL_LOCK();
1324 if (paging_segment_count == 1) {
1325 paging_segment_t lps; /* used to avoid extra PS_UNLOCK */
0b4e3aa0 1326 ipc_port_t trigger = IP_NULL;
1c79356b
A
1327
1328 ps = paging_segments[paging_segment_max];
1329 *psindex = paging_segment_max;
1330 PS_LOCK(ps);
1331 if (ps->ps_going_away) {
1332 /* this segment is being turned off */
1333 lps = PAGING_SEGMENT_NULL;
1334 } else {
1335 ASSERT(ps->ps_clshift >= shift);
1336 if (ps->ps_clcount) {
1337 ps->ps_clcount--;
1338 dp_pages_free -= 1 << ps->ps_clshift;
1339 if(min_pages_trigger_port &&
1340 (dp_pages_free < minimum_pages_remaining)) {
0b4e3aa0 1341 trigger = min_pages_trigger_port;
1c79356b
A
1342 min_pages_trigger_port = NULL;
1343 bs_low = TRUE;
1344 }
1345 lps = ps;
1346 } else
1347 lps = PAGING_SEGMENT_NULL;
1348 }
1349 PS_UNLOCK(ps);
1350 PSL_UNLOCK();
0b4e3aa0
A
1351
1352 if (trigger != IP_NULL) {
1353 default_pager_space_alert(trigger, HI_WAT_ALERT);
1354 ipc_port_release_send(trigger);
1355 }
1c79356b
A
1356 return lps;
1357 }
1358
1359 if (paging_segment_count == 0) {
1360 PSL_UNLOCK();
1361 return PAGING_SEGMENT_NULL;
1362 }
1363
1364 for (i = BS_MAXPRI;
1365 i >= BS_MINPRI; i--) {
1366 int start_index;
1367
1368 if ((ps_select_array[i] == BS_NOPRI) ||
1369 (ps_select_array[i] == BS_FULLPRI))
1370 continue;
1371 start_index = ps_select_array[i];
1372
1373 if(!(paging_segments[start_index])) {
1374 j = start_index+1;
1375 physical_transfer_cluster_count = 0;
1376 }
0b4e3aa0 1377 else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >>
1c79356b 1378 (((paging_segments[start_index])->ps_clshift)
0b4e3aa0 1379 + vm_page_shift))) {
1c79356b
A
1380 physical_transfer_cluster_count = 0;
1381 j = start_index + 1;
1382 } else {
1383 physical_transfer_cluster_count+=1;
1384 j = start_index;
1385 if(start_index == 0)
1386 start_index = paging_segment_max;
1387 else
1388 start_index = start_index - 1;
1389 }
1390
1391 while (1) {
1392 if (j > paging_segment_max)
1393 j = 0;
1394 if ((ps = paging_segments[j]) &&
1395 (ps->ps_bs->bs_priority == i)) {
1396 /*
1397 * Force the ps cluster size to be
1398 * >= that of the vstruct.
1399 */
1400 PS_LOCK(ps);
1401 if (ps->ps_going_away) {
1402 /* this segment is being turned off */
1403 } else if ((ps->ps_clcount) &&
1404 (ps->ps_clshift >= shift)) {
0b4e3aa0
A
1405 ipc_port_t trigger = IP_NULL;
1406
1c79356b
A
1407 ps->ps_clcount--;
1408 dp_pages_free -= 1 << ps->ps_clshift;
1409 if(min_pages_trigger_port &&
1410 (dp_pages_free <
1411 minimum_pages_remaining)) {
0b4e3aa0 1412 trigger = min_pages_trigger_port;
1c79356b
A
1413 min_pages_trigger_port = NULL;
1414 }
1415 PS_UNLOCK(ps);
1416 /*
1417 * found one, quit looking.
1418 */
1419 ps_select_array[i] = j;
1420 PSL_UNLOCK();
0b4e3aa0
A
1421
1422 if (trigger != IP_NULL) {
1423 default_pager_space_alert(
1424 trigger,
1425 HI_WAT_ALERT);
1426 ipc_port_release_send(trigger);
1427 }
1c79356b
A
1428 *psindex = j;
1429 return ps;
1430 }
1431 PS_UNLOCK(ps);
1432 }
1433 if (j == start_index) {
1434 /*
1435 * none at this priority -- mark it full
1436 */
1437 ps_select_array[i] = BS_FULLPRI;
1438 break;
1439 }
1440 j++;
1441 }
1442 }
1443 PSL_UNLOCK();
1444 return PAGING_SEGMENT_NULL;
1445}
1446
1447vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
1448
1449vm_offset_t
1450ps_allocate_cluster(
1451 vstruct_t vs,
1452 int *psindex,
1453 paging_segment_t use_ps)
1454{
91447636 1455 unsigned int byte_num;
1c79356b
A
1456 int bit_num = 0;
1457 paging_segment_t ps;
1458 vm_offset_t cluster;
0b4e3aa0 1459 ipc_port_t trigger = IP_NULL;
1c79356b
A
1460
1461 /*
1462 * Find best paging segment.
1463 * ps_select_segment will decrement cluster count on ps.
1464 * Must pass cluster shift to find the most appropriate segment.
1465 */
1466 /* NOTE: The addition of paging segment delete capability threatened
1467 * to seriously complicate the treatment of paging segments in this
1468 * module and the ones that call it (notably ps_clmap), because of the
1469 * difficulty in assuring that the paging segment would continue to
1470 * exist between being unlocked and locked. This was
1471 * avoided because all calls to this module are based in either
1472 * dp_memory_object calls which rely on the vs lock, or by
1473 * the transfer function which is part of the segment delete path.
1474 * The transfer function which is part of paging segment delete is
1475 * protected from multiple callers by the backing store lock.
1476 * The paging segment delete function treats mappings to a paging
1477 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1478 * while data is transferred to the remaining segments. This is in
1479 * line with the view that incomplete or in-transition mappings between
1480 * data, a vstruct, and backing store are protected by the vs lock.
1481 * This and the ordering of the paging segment "going_away" bit setting
1482 * protects us.
1483 */
1484 if (use_ps != PAGING_SEGMENT_NULL) {
1485 ps = use_ps;
1486 PSL_LOCK();
1487 PS_LOCK(ps);
55e303ae
A
1488
1489 ASSERT(ps->ps_clcount != 0);
1490
1c79356b
A
1491 ps->ps_clcount--;
1492 dp_pages_free -= 1 << ps->ps_clshift;
1c79356b
A
1493 if(min_pages_trigger_port &&
1494 (dp_pages_free < minimum_pages_remaining)) {
0b4e3aa0 1495 trigger = min_pages_trigger_port;
1c79356b
A
1496 min_pages_trigger_port = NULL;
1497 }
0b4e3aa0 1498 PSL_UNLOCK();
1c79356b 1499 PS_UNLOCK(ps);
0b4e3aa0
A
1500 if (trigger != IP_NULL) {
1501 default_pager_space_alert(trigger, HI_WAT_ALERT);
1502 ipc_port_release_send(trigger);
1503 }
1504
1c79356b
A
1505 } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
1506 PAGING_SEGMENT_NULL) {
91447636
A
1507 static uint32_t lastnotify = 0;
1508 uint32_t now, nanoseconds_dummy;
1509
1510 /*
1511 * Emit a notification of the low-paging resource condition
1512 * but don't issue it more than once every five seconds. This
1513 * prevents us from overflowing logs with thousands of
1514 * repetitions of the message.
1515 */
1516 clock_get_system_nanotime(&now, &nanoseconds_dummy);
1517 if (now > lastnotify + 5) {
1518 dprintf(("no space in available paging segments\n"));
1519 lastnotify = now;
1520 }
1521
1c79356b 1522 /* the count got off maybe, reset to zero */
0b4e3aa0 1523 PSL_LOCK();
1c79356b
A
1524 dp_pages_free = 0;
1525 if(min_pages_trigger_port) {
0b4e3aa0 1526 trigger = min_pages_trigger_port;
1c79356b
A
1527 min_pages_trigger_port = NULL;
1528 bs_low = TRUE;
1529 }
0b4e3aa0
A
1530 PSL_UNLOCK();
1531 if (trigger != IP_NULL) {
1532 default_pager_space_alert(trigger, HI_WAT_ALERT);
1533 ipc_port_release_send(trigger);
1534 }
1c79356b
A
1535 return (vm_offset_t) -1;
1536 }
1c79356b
A
1537
1538 /*
1539 * Look for an available cluster. At the end of the loop,
1540 * byte_num is the byte offset and bit_num is the bit offset of the
1541 * first zero bit in the paging segment bitmap.
1542 */
1543 PS_LOCK(ps);
1544 byte_num = ps->ps_hint;
1545 for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) {
1546 if (*(ps->ps_bmap + byte_num) != BYTEMASK) {
1547 for (bit_num = 0; bit_num < NBBY; bit_num++) {
1548 if (isclr((ps->ps_bmap + byte_num), bit_num))
1549 break;
1550 }
1551 ASSERT(bit_num != NBBY);
1552 break;
1553 }
1554 }
1555 ps->ps_hint = byte_num;
1556 cluster = (byte_num*NBBY) + bit_num;
1557
1558 /* Space was reserved, so this must be true */
1559 ASSERT(cluster < ps->ps_ncls);
1560
1561 setbit(ps->ps_bmap, cluster);
1562 PS_UNLOCK(ps);
1563
1564 return cluster;
1565}
1566
1567void ps_deallocate_cluster(paging_segment_t, vm_offset_t); /* forward */
1568
1569void
1570ps_deallocate_cluster(
1571 paging_segment_t ps,
1572 vm_offset_t cluster)
1573{
1574
1575 if (cluster >= (vm_offset_t) ps->ps_ncls)
1576 panic("ps_deallocate_cluster: Invalid cluster number");
1577
1578 /*
1579 * Lock the paging segment, clear the cluster's bitmap and increment the
1580 * number of free cluster.
1581 */
1582 PSL_LOCK();
1583 PS_LOCK(ps);
1584 clrbit(ps->ps_bmap, cluster);
1585 ++ps->ps_clcount;
1586 dp_pages_free += 1 << ps->ps_clshift;
0b4e3aa0 1587 PSL_UNLOCK();
1c79356b
A
1588
1589 /*
1590 * Move the hint down to the freed cluster if it is
1591 * less than the current hint.
1592 */
1593 if ((cluster/NBBY) < ps->ps_hint) {
1594 ps->ps_hint = (cluster/NBBY);
1595 }
1596
1597 PS_UNLOCK(ps);
1598
1599 /*
1600 * If we're freeing space on a full priority, reset the array.
1601 */
1602 PSL_LOCK();
1603 if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
1604 ps_select_array[ps->ps_bs->bs_priority] = 0;
1605 PSL_UNLOCK();
1606
1607 return;
1608}
1609
1610void ps_dealloc_vsmap(struct vs_map *, vm_size_t); /* forward */
1611
1612void
1613ps_dealloc_vsmap(
1614 struct vs_map *vsmap,
1615 vm_size_t size)
1616{
91447636 1617 unsigned int i;
1c79356b
A
1618 for (i = 0; i < size; i++)
1619 if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
1620 ps_deallocate_cluster(VSM_PS(vsmap[i]),
1621 VSM_CLOFF(vsmap[i]));
1622}
1623
1624void
1625ps_vstruct_dealloc(
1626 vstruct_t vs)
1627{
91447636
A
1628 unsigned int i;
1629// spl_t s;
1c79356b
A
1630
1631 VS_MAP_LOCK(vs);
1632
1633 /*
1634 * If this is an indirect structure, then we walk through the valid
1635 * (non-zero) indirect pointers and deallocate the clusters
1636 * associated with each used map entry (via ps_dealloc_vsmap).
1637 * When all of the clusters in an indirect block have been
1638 * freed, we deallocate the block. When all of the indirect
1639 * blocks have been deallocated we deallocate the memory
1640 * holding the indirect pointers.
1641 */
1642 if (vs->vs_indirect) {
1643 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
1644 if (vs->vs_imap[i] != NULL) {
1645 ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
91447636 1646 kfree(vs->vs_imap[i], CLMAP_THRESHOLD);
1c79356b
A
1647 }
1648 }
91447636 1649 kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
1c79356b
A
1650 } else {
1651 /*
1652 * Direct map. Free used clusters, then memory.
1653 */
1654 ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
91447636 1655 kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
1c79356b
A
1656 }
1657 VS_MAP_UNLOCK(vs);
1658
1659 bs_commit(- vs->vs_size);
1660
91447636 1661 zfree(vstruct_zone, vs);
1c79356b
A
1662}
1663
91447636 1664int ps_map_extend(vstruct_t, unsigned int); /* forward */
1c79356b
A
1665
1666int ps_map_extend(
1667 vstruct_t vs,
91447636 1668 unsigned int new_size)
1c79356b
A
1669{
1670 struct vs_map **new_imap;
1671 struct vs_map *new_dmap = NULL;
1672 int newdsize;
1673 int i;
1674 void *old_map = NULL;
1675 int old_map_size = 0;
1676
1677 if (vs->vs_size >= new_size) {
1678 /*
1679 * Someone has already done the work.
1680 */
1681 return 0;
1682 }
1683
1684 /*
1685 * If the new size extends into the indirect range, then we have one
1686 * of two cases: we are going from indirect to indirect, or we are
1687 * going from direct to indirect. If we are going from indirect to
1688 * indirect, then it is possible that the new size will fit in the old
1689 * indirect map. If this is the case, then just reset the size of the
1690 * vstruct map and we are done. If the new size will not
1691 * fit into the old indirect map, then we have to allocate a new
1692 * indirect map and copy the old map pointers into this new map.
1693 *
1694 * If we are going from direct to indirect, then we have to allocate a
1695 * new indirect map and copy the old direct pages into the first
1696 * indirect page of the new map.
1697 * NOTE: allocating memory here is dangerous, as we're in the
1698 * pageout path.
1699 */
1700 if (INDIRECT_CLMAP(new_size)) {
1701 int new_map_size = INDIRECT_CLMAP_SIZE(new_size);
1702
1703 /*
1704 * Get a new indirect map and zero it.
1705 */
1706 old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size);
1707 if (vs->vs_indirect &&
1708 (new_map_size == old_map_size)) {
1709 bs_commit(new_size - vs->vs_size);
1710 vs->vs_size = new_size;
1711 return 0;
1712 }
1713
1714 new_imap = (struct vs_map **)kalloc(new_map_size);
1715 if (new_imap == NULL) {
1716 return -1;
1717 }
1718 memset(new_imap, 0, new_map_size);
1719
1720 if (vs->vs_indirect) {
1721 /* Copy old entries into new map */
1722 memcpy(new_imap, vs->vs_imap, old_map_size);
1723 /* Arrange to free the old map */
1724 old_map = (void *) vs->vs_imap;
1725 newdsize = 0;
1726 } else { /* Old map was a direct map */
1727 /* Allocate an indirect page */
1728 if ((new_imap[0] = (struct vs_map *)
1729 kalloc(CLMAP_THRESHOLD)) == NULL) {
91447636 1730 kfree(new_imap, new_map_size);
1c79356b
A
1731 return -1;
1732 }
1733 new_dmap = new_imap[0];
1734 newdsize = CLMAP_ENTRIES;
1735 }
1736 } else {
1737 new_imap = NULL;
1738 newdsize = new_size;
1739 /*
1740 * If the new map is a direct map, then the old map must
1741 * also have been a direct map. All we have to do is
1742 * to allocate a new direct map, copy the old entries
1743 * into it and free the old map.
1744 */
1745 if ((new_dmap = (struct vs_map *)
1746 kalloc(CLMAP_SIZE(new_size))) == NULL) {
1747 return -1;
1748 }
1749 }
1750 if (newdsize) {
1751
1752 /* Free the old map */
1753 old_map = (void *) vs->vs_dmap;
1754 old_map_size = CLMAP_SIZE(vs->vs_size);
1755
1756 /* Copy info from the old map into the new map */
1757 memcpy(new_dmap, vs->vs_dmap, old_map_size);
1758
1759 /* Initialize the rest of the new map */
1760 for (i = vs->vs_size; i < newdsize; i++)
1761 VSM_CLR(new_dmap[i]);
1762 }
1763 if (new_imap) {
1764 vs->vs_imap = new_imap;
1765 vs->vs_indirect = TRUE;
1766 } else
1767 vs->vs_dmap = new_dmap;
1768 bs_commit(new_size - vs->vs_size);
1769 vs->vs_size = new_size;
1770 if (old_map)
91447636 1771 kfree(old_map, old_map_size);
1c79356b
A
1772 return 0;
1773}
1774
1775vm_offset_t
1776ps_clmap(
1777 vstruct_t vs,
1778 vm_offset_t offset,
1779 struct clmap *clmap,
1780 int flag,
1781 vm_size_t size,
1782 int error)
1783{
1784 vm_offset_t cluster; /* The cluster of offset. */
1785 vm_offset_t newcl; /* The new cluster allocated. */
1786 vm_offset_t newoff;
91447636 1787 unsigned int i;
1c79356b 1788 struct vs_map *vsmap;
1c79356b
A
1789
1790 VS_MAP_LOCK(vs);
1791
1792 ASSERT(vs->vs_dmap);
55e303ae 1793 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
1794
1795 /*
1796 * Initialize cluster error value
1797 */
1798 clmap->cl_error = 0;
1799
1800 /*
1801 * If the object has grown, extend the page map.
1802 */
1803 if (cluster >= vs->vs_size) {
1804 if (flag == CL_FIND) {
1805 /* Do not allocate if just doing a lookup */
1806 VS_MAP_UNLOCK(vs);
1807 return (vm_offset_t) -1;
1808 }
1809 if (ps_map_extend(vs, cluster + 1)) {
1810 VS_MAP_UNLOCK(vs);
1811 return (vm_offset_t) -1;
1812 }
1813 }
1814
1815 /*
1816 * Look for the desired cluster. If the map is indirect, then we
1817 * have a two level lookup. First find the indirect block, then
1818 * find the actual cluster. If the indirect block has not yet
1819 * been allocated, then do so. If the cluster has not yet been
1820 * allocated, then do so.
1821 *
1822 * If any of the allocations fail, then return an error.
1823 * Don't allocate if just doing a lookup.
1824 */
1825 if (vs->vs_indirect) {
1826 long ind_block = cluster/CLMAP_ENTRIES;
1827
1828 /* Is the indirect block allocated? */
1829 vsmap = vs->vs_imap[ind_block];
1830 if (vsmap == NULL) {
1831 if (flag == CL_FIND) {
1832 VS_MAP_UNLOCK(vs);
1833 return (vm_offset_t) -1;
1834 }
1835
1836 /* Allocate the indirect block */
1837 vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
1838 if (vsmap == NULL) {
1839 VS_MAP_UNLOCK(vs);
1840 return (vm_offset_t) -1;
1841 }
1842 /* Initialize the cluster offsets */
1843 for (i = 0; i < CLMAP_ENTRIES; i++)
1844 VSM_CLR(vsmap[i]);
1845 vs->vs_imap[ind_block] = vsmap;
1846 }
1847 } else
1848 vsmap = vs->vs_dmap;
1849
1850 ASSERT(vsmap);
1851 vsmap += cluster%CLMAP_ENTRIES;
1852
1853 /*
1854 * At this point, vsmap points to the struct vs_map desired.
1855 *
1856 * Look in the map for the cluster, if there was an error on a
1857 * previous write, flag it and return. If it is not yet
1858 * allocated, then allocate it, if we're writing; if we're
1859 * doing a lookup and the cluster's not allocated, return error.
1860 */
1861 if (VSM_ISERR(*vsmap)) {
1862 clmap->cl_error = VSM_GETERR(*vsmap);
1863 VS_MAP_UNLOCK(vs);
1864 return (vm_offset_t) -1;
1865 } else if (VSM_ISCLR(*vsmap)) {
1866 int psindex;
1867
1868 if (flag == CL_FIND) {
1869 /*
1870 * If there's an error and the entry is clear, then
1871 * we've run out of swap space. Record the error
1872 * here and return.
1873 */
1874 if (error) {
1875 VSM_SETERR(*vsmap, error);
1876 }
1877 VS_MAP_UNLOCK(vs);
1878 return (vm_offset_t) -1;
1879 } else {
1880 /*
1881 * Attempt to allocate a cluster from the paging segment
1882 */
1883 newcl = ps_allocate_cluster(vs, &psindex,
1884 PAGING_SEGMENT_NULL);
91447636 1885 if (newcl == (vm_offset_t) -1) {
1c79356b
A
1886 VS_MAP_UNLOCK(vs);
1887 return (vm_offset_t) -1;
1888 }
1889 VSM_CLR(*vsmap);
1890 VSM_SETCLOFF(*vsmap, newcl);
1891 VSM_SETPS(*vsmap, psindex);
1892 }
1893 } else
1894 newcl = VSM_CLOFF(*vsmap);
1895
1896 /*
1897 * Fill in pertinent fields of the clmap
1898 */
1899 clmap->cl_ps = VSM_PS(*vsmap);
1900 clmap->cl_numpages = VSCLSIZE(vs);
1901 clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap);
1902
1903 /*
1904 * Byte offset in paging segment is byte offset to cluster plus
1905 * byte offset within cluster. It looks ugly, but should be
1906 * relatively quick.
1907 */
1908 ASSERT(trunc_page(offset) == offset);
55e303ae 1909 newcl = ptoa_32(newcl) << vs->vs_clshift;
1c79356b
A
1910 newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
1911 if (flag == CL_ALLOC) {
1912 /*
1913 * set bits in the allocation bitmap according to which
1914 * pages were requested. size is in bytes.
1915 */
55e303ae 1916 i = atop_32(newoff);
1c79356b
A
1917 while ((size > 0) && (i < VSCLSIZE(vs))) {
1918 VSM_SETALLOC(*vsmap, i);
1919 i++;
1920 size -= vm_page_size;
1921 }
1922 }
1923 clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap);
1924 if (newoff) {
1925 /*
1926 * Offset is not cluster aligned, so number of pages
1927 * and bitmaps must be adjusted
1928 */
55e303ae 1929 clmap->cl_numpages -= atop_32(newoff);
1c79356b
A
1930 CLMAP_SHIFT(clmap, vs);
1931 CLMAP_SHIFTALLOC(clmap, vs);
1932 }
1933
1934 /*
1935 *
1936 * The setting of valid bits and handling of write errors
1937 * must be done here, while we hold the lock on the map.
1938 * It logically should be done in ps_vs_write_complete().
1939 * The size and error information has been passed from
1940 * ps_vs_write_complete(). If the size parameter is non-zero,
1941 * then there is work to be done. If error is also non-zero,
1942 * then the error number is recorded in the cluster and the
1943 * entire cluster is in error.
1944 */
1945 if (size && flag == CL_FIND) {
1946 vm_offset_t off = (vm_offset_t) 0;
1947
1948 if (!error) {
1949 for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
1950 i++) {
1951 VSM_SETPG(*vsmap, i);
1952 size -= vm_page_size;
1953 }
1954 ASSERT(i <= VSCLSIZE(vs));
1955 } else {
1956 BS_STAT(clmap->cl_ps->ps_bs,
1957 clmap->cl_ps->ps_bs->bs_pages_out_fail +=
55e303ae 1958 atop_32(size));
1c79356b
A
1959 off = VSM_CLOFF(*vsmap);
1960 VSM_SETERR(*vsmap, error);
1961 }
1962 /*
1963 * Deallocate cluster if error, and no valid pages
1964 * already present.
1965 */
1966 if (off != (vm_offset_t) 0)
1967 ps_deallocate_cluster(clmap->cl_ps, off);
1968 VS_MAP_UNLOCK(vs);
1969 return (vm_offset_t) 0;
1970 } else
1971 VS_MAP_UNLOCK(vs);
1972
91447636
A
1973 DP_DEBUG(DEBUG_VS_INTERNAL,
1974 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1975 newcl+newoff, (int) vs, (int) vsmap, flag));
1976 DP_DEBUG(DEBUG_VS_INTERNAL,
1977 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1978 (int) clmap->cl_ps, clmap->cl_numpages,
1979 (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
1c79356b
A
1980
1981 return (newcl + newoff);
1982}
1983
1984void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t); /* forward */
1985
1986void
1987ps_clunmap(
1988 vstruct_t vs,
1989 vm_offset_t offset,
1990 vm_size_t length)
1991{
1992 vm_offset_t cluster; /* The cluster number of offset */
1993 struct vs_map *vsmap;
1c79356b
A
1994
1995 VS_MAP_LOCK(vs);
1996
1997 /*
1998 * Loop through all clusters in this range, freeing paging segment
1999 * clusters and map entries as encountered.
2000 */
2001 while (length > 0) {
2002 vm_offset_t newoff;
91447636 2003 unsigned int i;
1c79356b 2004
55e303ae 2005 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
2006 if (vs->vs_indirect) /* indirect map */
2007 vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
2008 else
2009 vsmap = vs->vs_dmap;
2010 if (vsmap == NULL) {
2011 VS_MAP_UNLOCK(vs);
2012 return;
2013 }
2014 vsmap += cluster%CLMAP_ENTRIES;
2015 if (VSM_ISCLR(*vsmap)) {
2016 length -= vm_page_size;
2017 offset += vm_page_size;
2018 continue;
2019 }
2020 /*
2021 * We've got a valid mapping. Clear it and deallocate
2022 * paging segment cluster pages.
2023 * Optimize for entire cluster cleraing.
2024 */
91447636 2025 if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) {
1c79356b
A
2026 /*
2027 * Not cluster aligned.
2028 */
2029 ASSERT(trunc_page(newoff) == newoff);
55e303ae 2030 i = atop_32(newoff);
1c79356b
A
2031 } else
2032 i = 0;
2033 while ((i < VSCLSIZE(vs)) && (length > 0)) {
2034 VSM_CLRPG(*vsmap, i);
2035 VSM_CLRALLOC(*vsmap, i);
2036 length -= vm_page_size;
2037 offset += vm_page_size;
2038 i++;
2039 }
2040
2041 /*
2042 * If map entry is empty, clear and deallocate cluster.
2043 */
2044 if (!VSM_ALLOC(*vsmap)) {
2045 ps_deallocate_cluster(VSM_PS(*vsmap),
2046 VSM_CLOFF(*vsmap));
2047 VSM_CLR(*vsmap);
2048 }
2049 }
2050
2051 VS_MAP_UNLOCK(vs);
2052}
2053
2054void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
2055
2056void
2057ps_vs_write_complete(
2058 vstruct_t vs,
2059 vm_offset_t offset,
2060 vm_size_t size,
2061 int error)
2062{
2063 struct clmap clmap;
2064
2065 /*
2066 * Get the struct vsmap for this cluster.
2067 * Use READ, even though it was written, because the
2068 * cluster MUST be present, unless there was an error
2069 * in the original ps_clmap (e.g. no space), in which
2070 * case, nothing happens.
2071 *
2072 * Must pass enough information to ps_clmap to allow it
2073 * to set the vs_map structure bitmap under lock.
2074 */
2075 (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
2076}
2077
2078void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int); /* forward */
2079
2080void
2081vs_cl_write_complete(
91447636
A
2082 vstruct_t vs,
2083 __unused paging_segment_t ps,
2084 vm_offset_t offset,
2085 __unused vm_offset_t addr,
2086 vm_size_t size,
2087 boolean_t async,
2088 int error)
1c79356b 2089{
91447636 2090// kern_return_t kr;
1c79356b
A
2091
2092 if (error) {
2093 /*
2094 * For internal objects, the error is recorded on a
2095 * per-cluster basis by ps_clmap() which is called
2096 * by ps_vs_write_complete() below.
2097 */
2098 dprintf(("write failed error = 0x%x\n", error));
2099 /* add upl_abort code here */
2100 } else
55e303ae 2101 GSTAT(global_stats.gs_pages_out += atop_32(size));
1c79356b
A
2102 /*
2103 * Notify the vstruct mapping code, so it can do its accounting.
2104 */
2105 ps_vs_write_complete(vs, offset, size, error);
2106
2107 if (async) {
2108 VS_LOCK(vs);
2109 ASSERT(vs->vs_async_pending > 0);
2110 vs->vs_async_pending -= size;
0b4e3aa0
A
2111 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
2112 vs->vs_waiting_async = FALSE;
1c79356b
A
2113 VS_UNLOCK(vs);
2114 /* mutex_unlock(&vs->vs_waiting_async); */
0b4e3aa0 2115 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
2116 } else {
2117 VS_UNLOCK(vs);
2118 }
2119 }
2120}
2121
2122#ifdef DEVICE_PAGING
2123kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2124
2125kern_return_t
2126device_write_reply(
2127 MACH_PORT_FACE reply_port,
2128 kern_return_t device_code,
2129 io_buf_len_t bytes_written)
2130{
2131 struct vs_async *vsa;
1c79356b
A
2132
2133 vsa = (struct vs_async *)
2134 ((struct vstruct_alias *)(reply_port->alias))->vs;
2135
2136 if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
2137 device_code = KERN_FAILURE;
2138 }
2139
2140 vsa->vsa_error = device_code;
2141
2142
2143 ASSERT(vsa->vsa_vs != VSTRUCT_NULL);
2144 if(vsa->vsa_flags & VSA_TRANSFER) {
2145 /* revisit when async disk segments redone */
2146 if(vsa->vsa_error) {
2147 /* need to consider error condition. re-write data or */
2148 /* throw it away here. */
91447636 2149 vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr);
1c79356b
A
2150 }
2151 ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
2152 vsa->vsa_size, vsa->vsa_error);
2153 } else {
2154 vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset,
2155 vsa->vsa_addr, vsa->vsa_size, TRUE,
2156 vsa->vsa_error);
2157 }
2158 VS_FREE_ASYNC(vsa);
2159
2160 return KERN_SUCCESS;
2161}
2162
2163kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2164kern_return_t
2165device_write_reply_inband(
2166 MACH_PORT_FACE reply_port,
2167 kern_return_t return_code,
2168 io_buf_len_t bytes_written)
2169{
2170 panic("device_write_reply_inband: illegal");
2171 return KERN_SUCCESS;
2172}
2173
2174kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t);
2175kern_return_t
2176device_read_reply(
2177 MACH_PORT_FACE reply_port,
2178 kern_return_t return_code,
2179 io_buf_ptr_t data,
2180 mach_msg_type_number_t dataCnt)
2181{
2182 struct vs_async *vsa;
2183 vsa = (struct vs_async *)
2184 ((struct vstruct_alias *)(reply_port->alias))->vs;
2185 vsa->vsa_addr = (vm_offset_t)data;
2186 vsa->vsa_size = (vm_size_t)dataCnt;
2187 vsa->vsa_error = return_code;
2188 thread_wakeup(&vsa->vsa_lock);
2189 return KERN_SUCCESS;
2190}
2191
2192kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t);
2193kern_return_t
2194device_read_reply_inband(
2195 MACH_PORT_FACE reply_port,
2196 kern_return_t return_code,
2197 io_buf_ptr_inband_t data,
2198 mach_msg_type_number_t dataCnt)
2199{
2200 panic("device_read_reply_inband: illegal");
2201 return KERN_SUCCESS;
2202}
2203
2204kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2205kern_return_t
2206device_read_reply_overwrite(
2207 MACH_PORT_FACE reply_port,
2208 kern_return_t return_code,
2209 io_buf_len_t bytes_read)
2210{
2211 panic("device_read_reply_overwrite: illegal\n");
2212 return KERN_SUCCESS;
2213}
2214
2215kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE);
2216kern_return_t
2217device_open_reply(
2218 MACH_PORT_FACE reply_port,
2219 kern_return_t return_code,
2220 MACH_PORT_FACE device_port)
2221{
2222 panic("device_open_reply: illegal\n");
2223 return KERN_SUCCESS;
2224}
2225
1c79356b
A
2226kern_return_t
2227ps_read_device(
2228 paging_segment_t ps,
2229 vm_offset_t offset,
2230 vm_offset_t *bufferp,
2231 unsigned int size,
2232 unsigned int *residualp,
2233 int flags)
2234{
2235 kern_return_t kr;
2236 recnum_t dev_offset;
2237 unsigned int bytes_wanted;
2238 unsigned int bytes_read;
2239 unsigned int total_read;
2240 vm_offset_t dev_buffer;
2241 vm_offset_t buf_ptr;
2242 unsigned int records_read;
1c79356b
A
2243 struct vs_async *vsa;
2244 mutex_t vs_waiting_read_reply;
2245
2246 device_t device;
2247 vm_map_copy_t device_data = NULL;
2248 default_pager_thread_t *dpt = NULL;
2249
2250 device = dev_port_lookup(ps->ps_device);
55e303ae 2251 clustered_reads[atop_32(size)]++;
1c79356b
A
2252
2253 dev_offset = (ps->ps_offset +
2254 (offset >> (vm_page_shift - ps->ps_record_shift)));
2255 bytes_wanted = size;
2256 total_read = 0;
2257 *bufferp = (vm_offset_t)NULL;
2258
2259 do {
2260 vsa = VS_ALLOC_ASYNC();
2261 if (vsa) {
2262 vsa->vsa_vs = NULL;
2263 vsa->vsa_addr = 0;
2264 vsa->vsa_offset = 0;
2265 vsa->vsa_size = 0;
2266 vsa->vsa_ps = NULL;
2267 }
91447636 2268 mutex_init(&vsa->vsa_lock, 0);
1c79356b
A
2269 ip_lock(vsa->reply_port);
2270 vsa->reply_port->ip_sorights++;
2271 ip_reference(vsa->reply_port);
2272 ip_unlock(vsa->reply_port);
2273 kr = ds_device_read_common(device,
2274 vsa->reply_port,
2275 (mach_msg_type_name_t)
2276 MACH_MSG_TYPE_MOVE_SEND_ONCE,
2277 (dev_mode_t) 0,
2278 dev_offset,
2279 bytes_wanted,
2280 (IO_READ | IO_CALL),
2281 (io_buf_ptr_t *) &dev_buffer,
2282 (mach_msg_type_number_t *) &bytes_read);
2283 if(kr == MIG_NO_REPLY) {
2284 assert_wait(&vsa->vsa_lock, THREAD_UNINT);
9bccf70c 2285 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2286
2287 dev_buffer = vsa->vsa_addr;
2288 bytes_read = (unsigned int)vsa->vsa_size;
2289 kr = vsa->vsa_error;
2290 }
2291 VS_FREE_ASYNC(vsa);
2292 if (kr != KERN_SUCCESS || bytes_read == 0) {
2293 break;
2294 }
2295 total_read += bytes_read;
2296
2297 /*
2298 * If we got the entire range, use the returned dev_buffer.
2299 */
2300 if (bytes_read == size) {
2301 *bufferp = (vm_offset_t)dev_buffer;
2302 break;
2303 }
2304
2305#if 1
2306 dprintf(("read only %d bytes out of %d\n",
2307 bytes_read, bytes_wanted));
2308#endif
2309 if(dpt == NULL) {
2310 dpt = get_read_buffer();
2311 buf_ptr = dpt->dpt_buffer;
2312 *bufferp = (vm_offset_t)buf_ptr;
2313 }
2314 /*
2315 * Otherwise, copy the data into the provided buffer (*bufferp)
2316 * and append the rest of the range as it comes in.
2317 */
2318 memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read);
2319 buf_ptr += bytes_read;
2320 bytes_wanted -= bytes_read;
2321 records_read = (bytes_read >>
2322 (vm_page_shift - ps->ps_record_shift));
2323 dev_offset += records_read;
91447636
A
2324 DP_DEBUG(DEBUG_VS_INTERNAL,
2325 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2326 dev_buffer, bytes_read));
1c79356b
A
2327 if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
2328 != KERN_SUCCESS)
2329 Panic("dealloc buf");
2330 } while (bytes_wanted);
2331
2332 *residualp = size - total_read;
2333 if((dev_buffer != *bufferp) && (total_read != 0)) {
2334 vm_offset_t temp_buffer;
91447636 2335 vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE);
1c79356b
A
2336 memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
2337 if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
2338 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2339 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2340 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2341 (vm_map_copy_t *)&device_data, FALSE))
2342 panic("ps_read_device: cannot copyin locally provided buffer\n");
2343 }
2344 else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){
2345 if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read,
2346 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2347 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2348 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2349 (vm_map_copy_t *)&device_data, FALSE))
2350 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2351 }
2352 else {
2353 device_data = NULL;
2354 }
2355 *bufferp = (vm_offset_t)device_data;
2356
2357 if(dpt != NULL) {
2358 /* Free the receive buffer */
2359 dpt->checked_out = 0;
2360 thread_wakeup(&dpt_array);
2361 }
2362 return KERN_SUCCESS;
2363}
2364
1c79356b
A
2365kern_return_t
2366ps_write_device(
2367 paging_segment_t ps,
2368 vm_offset_t offset,
2369 vm_offset_t addr,
2370 unsigned int size,
2371 struct vs_async *vsa)
2372{
2373 recnum_t dev_offset;
2374 io_buf_len_t bytes_to_write, bytes_written;
2375 recnum_t records_written;
2376 kern_return_t kr;
2377 MACH_PORT_FACE reply_port;
1c79356b
A
2378
2379
2380
55e303ae 2381 clustered_writes[atop_32(size)]++;
1c79356b
A
2382
2383 dev_offset = (ps->ps_offset +
2384 (offset >> (vm_page_shift - ps->ps_record_shift)));
2385 bytes_to_write = size;
2386
2387 if (vsa) {
2388 /*
2389 * Asynchronous write.
2390 */
2391 reply_port = vsa->reply_port;
2392 ip_lock(reply_port);
2393 reply_port->ip_sorights++;
2394 ip_reference(reply_port);
2395 ip_unlock(reply_port);
2396 {
2397 device_t device;
2398 device = dev_port_lookup(ps->ps_device);
2399
2400 vsa->vsa_addr = addr;
2401 kr=ds_device_write_common(device,
2402 reply_port,
2403 (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE,
2404 (dev_mode_t) 0,
2405 dev_offset,
2406 (io_buf_ptr_t) addr,
2407 size,
2408 (IO_WRITE | IO_CALL),
2409 &bytes_written);
2410 }
2411 if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) {
2412 if (verbose)
2413 dprintf(("%s0x%x, addr=0x%x,"
2414 "size=0x%x,offset=0x%x\n",
2415 "device_write_request returned ",
2416 kr, addr, size, offset));
2417 BS_STAT(ps->ps_bs,
55e303ae 2418 ps->ps_bs->bs_pages_out_fail += atop_32(size));
1c79356b
A
2419 /* do the completion notification to free resources */
2420 device_write_reply(reply_port, kr, 0);
2421 return PAGER_ERROR;
2422 }
2423 } else do {
2424 /*
2425 * Synchronous write.
2426 */
2427 {
2428 device_t device;
2429 device = dev_port_lookup(ps->ps_device);
2430 kr=ds_device_write_common(device,
2431 IP_NULL, 0,
2432 (dev_mode_t) 0,
2433 dev_offset,
2434 (io_buf_ptr_t) addr,
2435 size,
2436 (IO_WRITE | IO_SYNC | IO_KERNEL_BUF),
2437 &bytes_written);
2438 }
2439 if (kr != KERN_SUCCESS) {
2440 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2441 "device_write returned ",
2442 kr, addr, size, offset));
2443 BS_STAT(ps->ps_bs,
55e303ae 2444 ps->ps_bs->bs_pages_out_fail += atop_32(size));
1c79356b
A
2445 return PAGER_ERROR;
2446 }
2447 if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
2448 Panic("fragmented write");
2449 records_written = (bytes_written >>
2450 (vm_page_shift - ps->ps_record_shift));
2451 dev_offset += records_written;
2452#if 1
2453 if (bytes_written != bytes_to_write) {
2454 dprintf(("wrote only %d bytes out of %d\n",
2455 bytes_written, bytes_to_write));
2456 }
2457#endif
2458 bytes_to_write -= bytes_written;
2459 addr += bytes_written;
2460 } while (bytes_to_write > 0);
2461
2462 return PAGER_SUCCESS;
2463}
2464
2465
2466#else /* !DEVICE_PAGING */
2467
2468kern_return_t
2469ps_read_device(
91447636
A
2470 __unused paging_segment_t ps,
2471 __unused vm_offset_t offset,
2472 __unused vm_offset_t *bufferp,
2473 __unused unsigned int size,
2474 __unused unsigned int *residualp,
2475 __unused int flags)
1c79356b
A
2476{
2477 panic("ps_read_device not supported");
89b3af67 2478 return KERN_FAILURE;
1c79356b
A
2479}
2480
91447636 2481kern_return_t
1c79356b 2482ps_write_device(
91447636
A
2483 __unused paging_segment_t ps,
2484 __unused vm_offset_t offset,
2485 __unused vm_offset_t addr,
2486 __unused unsigned int size,
2487 __unused struct vs_async *vsa)
1c79356b
A
2488{
2489 panic("ps_write_device not supported");
89b3af67 2490 return KERN_FAILURE;
1c79356b
A
2491}
2492
2493#endif /* DEVICE_PAGING */
91447636 2494void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */
1c79356b
A
2495
2496void
2497pvs_object_data_provided(
91447636
A
2498 __unused vstruct_t vs,
2499 __unused upl_t upl,
2500 __unused upl_offset_t offset,
2501 upl_size_t size)
1c79356b 2502{
1c79356b 2503
91447636
A
2504 DP_DEBUG(DEBUG_VS_INTERNAL,
2505 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2506 upl, offset, size));
1c79356b
A
2507
2508 ASSERT(size > 0);
55e303ae 2509 GSTAT(global_stats.gs_pages_in += atop_32(size));
1c79356b
A
2510
2511
2512#if USE_PRECIOUS
2513 ps_clunmap(vs, offset, size);
2514#endif /* USE_PRECIOUS */
2515
2516}
2517
2518kern_return_t
2519pvs_cluster_read(
2520 vstruct_t vs,
0b4e3aa0 2521 vm_offset_t vs_offset,
1c79356b
A
2522 vm_size_t cnt)
2523{
1c79356b
A
2524 upl_t upl;
2525 kern_return_t error = KERN_SUCCESS;
91447636 2526 int size;
89b3af67 2527 unsigned int residual;
1c79356b 2528 unsigned int request_flags;
91447636
A
2529 int seg_index;
2530 int pages_in_cl;
0b4e3aa0
A
2531 int cl_size;
2532 int cl_mask;
91447636
A
2533 int cl_index;
2534 int xfer_size;
0b4e3aa0
A
2535 vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2536 paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2537 struct clmap clmap;
2538
2539 pages_in_cl = 1 << vs->vs_clshift;
2540 cl_size = pages_in_cl * vm_page_size;
2541 cl_mask = cl_size - 1;
1c79356b
A
2542
2543 /*
0b4e3aa0
A
2544 * This loop will be executed multiple times until the entire
2545 * request has been satisfied... if the request spans cluster
2546 * boundaries, the clusters will be checked for logical continunity,
2547 * if contiguous the I/O request will span multiple clusters, otherwise
2548 * it will be broken up into the minimal set of I/O's
1c79356b 2549 *
0b4e3aa0
A
2550 * If there are holes in a request (either unallocated pages in a paging
2551 * segment or an unallocated paging segment), we stop
1c79356b
A
2552 * reading at the hole, inform the VM of any data read, inform
2553 * the VM of an unavailable range, then loop again, hoping to
0b4e3aa0 2554 * find valid pages later in the requested range. This continues until
1c79356b
A
2555 * the entire range has been examined, and read, if present.
2556 */
2557
2558#if USE_PRECIOUS
9bccf70c 2559 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT;
1c79356b 2560#else
9bccf70c 2561 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT;
1c79356b 2562#endif
91447636
A
2563
2564 assert(dp_encryption_inited);
2565 if (dp_encryption) {
2566 /*
2567 * ENCRYPTED SWAP:
2568 * request that the UPL be prepared for
2569 * decryption.
2570 */
2571 request_flags |= UPL_ENCRYPT;
2572 }
2573
1c79356b 2574 while (cnt && (error == KERN_SUCCESS)) {
89b3af67
A
2575 int ps_info_valid;
2576 unsigned int page_list_count;
1c79356b 2577
d12e1678
A
2578 if((vs_offset & cl_mask) &&
2579 (cnt > (VM_SUPER_CLUSTER -
2580 (vs_offset & cl_mask)))) {
2581 size = VM_SUPER_CLUSTER;
2582 size -= vs_offset & cl_mask;
2583 } else if (cnt > VM_SUPER_CLUSTER) {
0b4e3aa0 2584 size = VM_SUPER_CLUSTER;
d12e1678 2585 } else {
0b4e3aa0 2586 size = cnt;
d12e1678 2587 }
0b4e3aa0 2588 cnt -= size;
1c79356b 2589
0b4e3aa0
A
2590 ps_info_valid = 0;
2591 seg_index = 0;
1c79356b 2592
0b4e3aa0
A
2593 while (size > 0 && error == KERN_SUCCESS) {
2594 int abort_size;
2595 int failed_size;
2596 int beg_pseg;
2597 int beg_indx;
2598 vm_offset_t cur_offset;
1c79356b 2599
0b4e3aa0
A
2600
2601 if ( !ps_info_valid) {
2602 ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
2603 psp[seg_index] = CLMAP_PS(clmap);
2604 ps_info_valid = 1;
1c79356b 2605 }
0b4e3aa0
A
2606 /*
2607 * skip over unallocated physical segments
2608 */
2609 if (ps_offset[seg_index] == (vm_offset_t) -1) {
2610 abort_size = cl_size - (vs_offset & cl_mask);
2611 abort_size = MIN(abort_size, size);
2612
2613 page_list_count = 0;
2614 memory_object_super_upl_request(
2615 vs->vs_control,
2616 (memory_object_offset_t)vs_offset,
2617 abort_size, abort_size,
2618 &upl, NULL, &page_list_count,
2619 request_flags);
1c79356b 2620
0b4e3aa0
A
2621 if (clmap.cl_error) {
2622 upl_abort(upl, UPL_ABORT_ERROR);
2623 } else {
2624 upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2625 }
2626 upl_deallocate(upl);
1c79356b 2627
0b4e3aa0
A
2628 size -= abort_size;
2629 vs_offset += abort_size;
1c79356b 2630
0b4e3aa0
A
2631 seg_index++;
2632 ps_info_valid = 0;
2633 continue;
1c79356b 2634 }
0b4e3aa0
A
2635 cl_index = (vs_offset & cl_mask) / vm_page_size;
2636
2637 for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) {
2638 /*
2639 * skip over unallocated pages
2640 */
2641 if (CLMAP_ISSET(clmap, cl_index))
2642 break;
2643 abort_size += vm_page_size;
2644 }
2645 if (abort_size) {
2646 /*
2647 * Let VM system know about holes in clusters.
2648 */
55e303ae 2649 GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size));
0b4e3aa0
A
2650
2651 page_list_count = 0;
2652 memory_object_super_upl_request(
2653 vs->vs_control,
2654 (memory_object_offset_t)vs_offset,
2655 abort_size, abort_size,
2656 &upl, NULL, &page_list_count,
1c79356b 2657 request_flags);
1c79356b 2658
0b4e3aa0
A
2659 upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2660 upl_deallocate(upl);
1c79356b 2661
0b4e3aa0
A
2662 size -= abort_size;
2663 vs_offset += abort_size;
2664
2665 if (cl_index == pages_in_cl) {
2666 /*
2667 * if we're at the end of this physical cluster
2668 * then bump to the next one and continue looking
2669 */
2670 seg_index++;
2671 ps_info_valid = 0;
2672 continue;
2673 }
2674 if (size == 0)
2675 break;
2676 }
1c79356b 2677 /*
0b4e3aa0
A
2678 * remember the starting point of the first allocated page
2679 * for the I/O we're about to issue
1c79356b 2680 */
0b4e3aa0
A
2681 beg_pseg = seg_index;
2682 beg_indx = cl_index;
2683 cur_offset = vs_offset;
2684
2685 /*
2686 * calculate the size of the I/O that we can do...
2687 * this may span multiple physical segments if
2688 * they are contiguous
2689 */
2690 for (xfer_size = 0; xfer_size < size; ) {
2691
d12e1678
A
2692 while (cl_index < pages_in_cl
2693 && xfer_size < size) {
0b4e3aa0 2694 /*
55e303ae 2695 * accumulate allocated pages within
d12e1678 2696 * a physical segment
1c79356b 2697 */
0b4e3aa0
A
2698 if (CLMAP_ISSET(clmap, cl_index)) {
2699 xfer_size += vm_page_size;
2700 cur_offset += vm_page_size;
2701 cl_index++;
2702
2703 BS_STAT(psp[seg_index]->ps_bs,
2704 psp[seg_index]->ps_bs->bs_pages_in++);
2705 } else
2706 break;
2707 }
d12e1678
A
2708 if (cl_index < pages_in_cl
2709 || xfer_size >= size) {
0b4e3aa0 2710 /*
55e303ae 2711 * we've hit an unallocated page or
d12e1678
A
2712 * the end of this request... go fire
2713 * the I/O
1c79356b 2714 */
0b4e3aa0
A
2715 break;
2716 }
2717 /*
d12e1678 2718 * we've hit the end of the current physical
55e303ae 2719 * segment and there's more to do, so try
d12e1678 2720 * moving to the next one
0b4e3aa0
A
2721 */
2722 seg_index++;
2723
d12e1678 2724 ps_offset[seg_index] =
55e303ae
A
2725 ps_clmap(vs,
2726 cur_offset & ~cl_mask,
d12e1678
A
2727 &clmap, CL_FIND, 0, 0);
2728 psp[seg_index] = CLMAP_PS(clmap);
0b4e3aa0
A
2729 ps_info_valid = 1;
2730
2731 if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
2732 /*
55e303ae
A
2733 * if the physical segment we're about
2734 * to step into is not contiguous to
2735 * the one we're currently in, or it's
d12e1678 2736 * in a different paging file, or
0b4e3aa0
A
2737 * it hasn't been allocated....
2738 * we stop here and generate the I/O
2739 */
2740 break;
1c79356b 2741 }
0b4e3aa0 2742 /*
d12e1678 2743 * start with first page of the next physical
55e303ae 2744 * segment
0b4e3aa0
A
2745 */
2746 cl_index = 0;
1c79356b 2747 }
0b4e3aa0
A
2748 if (xfer_size) {
2749 /*
2750 * we have a contiguous range of allocated pages
2751 * to read from
2752 */
2753 page_list_count = 0;
2754 memory_object_super_upl_request(vs->vs_control,
d12e1678
A
2755 (memory_object_offset_t)vs_offset,
2756 xfer_size, xfer_size,
2757 &upl, NULL, &page_list_count,
2758 request_flags | UPL_SET_INTERNAL);
0b4e3aa0 2759
55e303ae 2760 error = ps_read_file(psp[beg_pseg],
91447636 2761 upl, (upl_offset_t) 0,
55e303ae
A
2762 ps_offset[beg_pseg] +
2763 (beg_indx * vm_page_size),
d12e1678 2764 xfer_size, &residual, 0);
0b4e3aa0
A
2765 } else
2766 continue;
1c79356b 2767
0b4e3aa0
A
2768 failed_size = 0;
2769
2770 /*
55e303ae 2771 * Adjust counts and send response to VM. Optimize
d12e1678 2772 * for the common case, i.e. no error and/or partial
55e303ae 2773 * data. If there was an error, then we need to error
d12e1678 2774 * the entire range, even if some data was successfully
55e303ae 2775 * read. If there was a partial read we may supply some
0b4e3aa0 2776 * data and may error some as well. In all cases the
55e303ae
A
2777 * VM must receive some notification for every page
2778 * in the range.
0b4e3aa0
A
2779 */
2780 if ((error == KERN_SUCCESS) && (residual == 0)) {
2781 /*
d12e1678 2782 * Got everything we asked for, supply the data
55e303ae
A
2783 * to the VM. Note that as a side effect of
2784 * supplying the data, the buffer holding the
2785 * supplied data is deallocated from the pager's
2786 * address space.
0b4e3aa0 2787 */
d12e1678
A
2788 pvs_object_data_provided(
2789 vs, upl, vs_offset, xfer_size);
0b4e3aa0
A
2790 } else {
2791 failed_size = xfer_size;
2792
2793 if (error == KERN_SUCCESS) {
89b3af67 2794 if ((signed) residual == xfer_size) {
d12e1678
A
2795 /*
2796 * If a read operation returns no error
2797 * and no data moved, we turn it into
2798 * an error, assuming we're reading at
2799 * or beyong EOF.
2800 * Fall through and error the entire
2801 * range.
2802 */
0b4e3aa0
A
2803 error = KERN_FAILURE;
2804 } else {
d12e1678
A
2805 /*
2806 * Otherwise, we have partial read. If
2807 * the part read is a integral number
2808 * of pages supply it. Otherwise round
2809 * it up to a page boundary, zero fill
2810 * the unread part, and supply it.
2811 * Fall through and error the remainder
2812 * of the range, if any.
2813 */
0b4e3aa0
A
2814 int fill, lsize;
2815
d12e1678
A
2816 fill = residual
2817 & ~vm_page_size;
55e303ae 2818 lsize = (xfer_size - residual)
d12e1678
A
2819 + fill;
2820 pvs_object_data_provided(
55e303ae 2821 vs, upl,
d12e1678 2822 vs_offset, lsize);
0b4e3aa0
A
2823
2824 if (lsize < xfer_size) {
d12e1678
A
2825 failed_size =
2826 xfer_size - lsize;
0b4e3aa0
A
2827 error = KERN_FAILURE;
2828 }
2829 }
2830 }
2831 }
1c79356b
A
2832 /*
2833 * If there was an error in any part of the range, tell
d12e1678 2834 * the VM. Note that error is explicitly checked again
55e303ae 2835 * since it can be modified above.
1c79356b
A
2836 */
2837 if (error != KERN_SUCCESS) {
0b4e3aa0 2838 BS_STAT(psp[beg_pseg]->ps_bs,
d12e1678 2839 psp[beg_pseg]->ps_bs->bs_pages_in_fail
55e303ae 2840 += atop_32(failed_size));
1c79356b 2841 }
0b4e3aa0
A
2842 size -= xfer_size;
2843 vs_offset += xfer_size;
1c79356b 2844 }
1c79356b
A
2845
2846 } /* END while (cnt && (error == 0)) */
2847 return error;
2848}
2849
2850int vs_do_async_write = 1;
2851
2852kern_return_t
2853vs_cluster_write(
2854 vstruct_t vs,
2855 upl_t internal_upl,
91447636
A
2856 upl_offset_t offset,
2857 upl_size_t cnt,
1c79356b
A
2858 boolean_t dp_internal,
2859 int flags)
2860{
91447636 2861 upl_size_t transfer_size;
1c79356b
A
2862 int error = 0;
2863 struct clmap clmap;
0b4e3aa0
A
2864
2865 vm_offset_t actual_offset; /* Offset within paging segment */
1c79356b 2866 paging_segment_t ps;
0b4e3aa0
A
2867 vm_offset_t mobj_base_addr;
2868 vm_offset_t mobj_target_addr;
1c79356b
A
2869
2870 upl_t upl;
0b4e3aa0 2871 upl_page_info_t *pl;
1c79356b
A
2872 int page_index;
2873 int list_size;
55e303ae 2874 int pages_in_cl;
91447636 2875 unsigned int cl_size;
55e303ae 2876 int base_index;
91447636 2877 unsigned int seg_size;
55e303ae
A
2878
2879 pages_in_cl = 1 << vs->vs_clshift;
2880 cl_size = pages_in_cl * vm_page_size;
1c79356b 2881
1c79356b 2882 if (!dp_internal) {
89b3af67 2883 unsigned int page_list_count;
1c79356b 2884 int request_flags;
91447636 2885 unsigned int super_size;
0b4e3aa0
A
2886 int first_dirty;
2887 int num_dirty;
2888 int num_of_pages;
2889 int seg_index;
91447636 2890 upl_offset_t upl_offset;
0b4e3aa0 2891 vm_offset_t seg_offset;
55e303ae
A
2892 vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2893 paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
0b4e3aa0 2894
1c79356b 2895
1c79356b
A
2896 if (bs_low) {
2897 super_size = cl_size;
0b4e3aa0 2898
1c79356b
A
2899 request_flags = UPL_NOBLOCK |
2900 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2901 UPL_NO_SYNC | UPL_SET_INTERNAL;
2902 } else {
2903 super_size = VM_SUPER_CLUSTER;
0b4e3aa0 2904
1c79356b
A
2905 request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
2906 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2907 UPL_NO_SYNC | UPL_SET_INTERNAL;
2908 }
2909
91447636
A
2910 if (!dp_encryption_inited) {
2911 /*
2912 * ENCRYPTED SWAP:
2913 * Once we've started using swap, we
2914 * can't change our mind on whether
2915 * it needs to be encrypted or
2916 * not.
2917 */
2918 dp_encryption_inited = TRUE;
2919 }
2920 if (dp_encryption) {
2921 /*
2922 * ENCRYPTED SWAP:
2923 * request that the UPL be prepared for
2924 * encryption.
2925 */
2926 request_flags |= UPL_ENCRYPT;
2927 flags |= UPL_PAGING_ENCRYPTED;
2928 }
2929
0b4e3aa0
A
2930 page_list_count = 0;
2931 memory_object_super_upl_request(vs->vs_control,
2932 (memory_object_offset_t)offset,
2933 cnt, super_size,
2934 &upl, NULL, &page_list_count,
55e303ae 2935 request_flags | UPL_FOR_PAGEOUT);
1c79356b 2936
0b4e3aa0 2937 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1c79356b 2938
55e303ae
A
2939 seg_size = cl_size - (upl->offset % cl_size);
2940 upl_offset = upl->offset & ~(cl_size - 1);
2941
d12e1678
A
2942 for (seg_index = 0, transfer_size = upl->size;
2943 transfer_size > 0; ) {
d12e1678 2944 ps_offset[seg_index] =
55e303ae
A
2945 ps_clmap(vs,
2946 upl_offset,
2947 &clmap, CL_ALLOC,
2948 cl_size, 0);
1c79356b 2949
0b4e3aa0
A
2950 if (ps_offset[seg_index] == (vm_offset_t) -1) {
2951 upl_abort(upl, 0);
2952 upl_deallocate(upl);
2953
2954 return KERN_FAILURE;
1c79356b 2955
0b4e3aa0
A
2956 }
2957 psp[seg_index] = CLMAP_PS(clmap);
1c79356b 2958
55e303ae
A
2959 if (transfer_size > seg_size) {
2960 transfer_size -= seg_size;
2961 upl_offset += cl_size;
2962 seg_size = cl_size;
0b4e3aa0
A
2963 seg_index++;
2964 } else
2965 transfer_size = 0;
2966 }
55e303ae
A
2967 /*
2968 * Ignore any non-present pages at the end of the
2969 * UPL.
2970 */
2971 for (page_index = upl->size / vm_page_size; page_index > 0;)
2972 if (UPL_PAGE_PRESENT(pl, --page_index))
2973 break;
2974 num_of_pages = page_index + 1;
2975
2976 base_index = (upl->offset % cl_size) / PAGE_SIZE;
2977
2978 for (page_index = 0; page_index < num_of_pages; ) {
0b4e3aa0
A
2979 /*
2980 * skip over non-dirty pages
2981 */
2982 for ( ; page_index < num_of_pages; page_index++) {
55e303ae 2983 if (UPL_DIRTY_PAGE(pl, page_index)
d12e1678 2984 || UPL_PRECIOUS_PAGE(pl, page_index))
0b4e3aa0
A
2985 /*
2986 * this is a page we need to write
55e303ae 2987 * go see if we can buddy it up with
d12e1678 2988 * others that are contiguous to it
0b4e3aa0
A
2989 */
2990 break;
2991 /*
d12e1678 2992 * if the page is not-dirty, but present we
55e303ae 2993 * need to commit it... This is an unusual
d12e1678 2994 * case since we only asked for dirty pages
0b4e3aa0
A
2995 */
2996 if (UPL_PAGE_PRESENT(pl, page_index)) {
2997 boolean_t empty = FALSE;
2998 upl_commit_range(upl,
2999 page_index * vm_page_size,
3000 vm_page_size,
3001 UPL_COMMIT_NOTIFY_EMPTY,
3002 pl,
d52fe63f 3003 page_list_count,
0b4e3aa0 3004 &empty);
55e303ae
A
3005 if (empty) {
3006 assert(page_index ==
3007 num_of_pages - 1);
0b4e3aa0 3008 upl_deallocate(upl);
55e303ae 3009 }
1c79356b 3010 }
1c79356b 3011 }
0b4e3aa0
A
3012 if (page_index == num_of_pages)
3013 /*
3014 * no more pages to look at, we're out of here
3015 */
3016 break;
1c79356b 3017
0b4e3aa0 3018 /*
55e303ae
A
3019 * gather up contiguous dirty pages... we have at
3020 * least 1 * otherwise we would have bailed above
0b4e3aa0
A
3021 * make sure that each physical segment that we step
3022 * into is contiguous to the one we're currently in
3023 * if it's not, we have to stop and write what we have
3024 */
55e303ae 3025 for (first_dirty = page_index;
d12e1678 3026 page_index < num_of_pages; ) {
55e303ae 3027 if ( !UPL_DIRTY_PAGE(pl, page_index)
d12e1678 3028 && !UPL_PRECIOUS_PAGE(pl, page_index))
0b4e3aa0
A
3029 break;
3030 page_index++;
3031 /*
3032 * if we just looked at the last page in the UPL
3033 * we don't need to check for physical segment
3034 * continuity
3035 */
3036 if (page_index < num_of_pages) {
3037 int cur_seg;
3038 int nxt_seg;
3039
55e303ae
A
3040 cur_seg = (base_index + (page_index - 1))/pages_in_cl;
3041 nxt_seg = (base_index + page_index)/pages_in_cl;
0b4e3aa0
A
3042
3043 if (cur_seg != nxt_seg) {
3044 if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
55e303ae
A
3045 /*
3046 * if the segment we're about
3047 * to step into is not
3048 * contiguous to the one we're
3049 * currently in, or it's in a
d12e1678 3050 * different paging file....
55e303ae 3051 * we stop here and generate
d12e1678
A
3052 * the I/O
3053 */
0b4e3aa0 3054 break;
1c79356b 3055 }
1c79356b 3056 }
0b4e3aa0
A
3057 }
3058 num_dirty = page_index - first_dirty;
1c79356b 3059
0b4e3aa0
A
3060 if (num_dirty) {
3061 upl_offset = first_dirty * vm_page_size;
0b4e3aa0
A
3062 transfer_size = num_dirty * vm_page_size;
3063
d12e1678 3064 while (transfer_size) {
1c79356b 3065
d12e1678 3066 if ((seg_size = cl_size -
55e303ae 3067 ((upl->offset + upl_offset) % cl_size))
d12e1678
A
3068 > transfer_size)
3069 seg_size = transfer_size;
0b4e3aa0 3070
d12e1678
A
3071 ps_vs_write_complete(vs,
3072 upl->offset + upl_offset,
3073 seg_size, error);
0b4e3aa0 3074
d12e1678
A
3075 transfer_size -= seg_size;
3076 upl_offset += seg_size;
0b4e3aa0 3077 }
d12e1678
A
3078 upl_offset = first_dirty * vm_page_size;
3079 transfer_size = num_dirty * vm_page_size;
55e303ae
A
3080
3081 seg_index = (base_index + first_dirty) / pages_in_cl;
3082 seg_offset = (upl->offset + upl_offset) % cl_size;
3083
d12e1678
A
3084 error = ps_write_file(psp[seg_index],
3085 upl, upl_offset,
3086 ps_offset[seg_index]
3087 + seg_offset,
3088 transfer_size, flags);
55e303ae 3089 } else {
0b4e3aa0
A
3090 boolean_t empty = FALSE;
3091 upl_abort_range(upl,
3092 first_dirty * vm_page_size,
3093 num_dirty * vm_page_size,
3094 UPL_ABORT_NOTIFY_EMPTY,
3095 &empty);
55e303ae
A
3096 if (empty) {
3097 assert(page_index == num_of_pages);
0b4e3aa0 3098 upl_deallocate(upl);
55e303ae 3099 }
1c79356b 3100 }
1c79356b 3101 }
0b4e3aa0 3102
1c79356b
A
3103 } else {
3104 assert(cnt <= (vm_page_size << vs->vs_clshift));
3105 list_size = cnt;
3106
3107 page_index = 0;
3108 /* The caller provides a mapped_data which is derived */
3109 /* from a temporary object. The targeted pages are */
3110 /* guaranteed to be set at offset 0 in the mapped_data */
3111 /* The actual offset however must still be derived */
3112 /* from the offset in the vs in question */
3113 mobj_base_addr = offset;
3114 mobj_target_addr = mobj_base_addr;
3115
3116 for (transfer_size = list_size; transfer_size != 0;) {
3117 actual_offset = ps_clmap(vs, mobj_target_addr,
3118 &clmap, CL_ALLOC,
3119 transfer_size < cl_size ?
3120 transfer_size : cl_size, 0);
3121 if(actual_offset == (vm_offset_t) -1) {
3122 error = 1;
3123 break;
3124 }
3125 cnt = MIN(transfer_size,
3126 CLMAP_NPGS(clmap) * vm_page_size);
3127 ps = CLMAP_PS(clmap);
3128 /* Assume that the caller has given us contiguous */
3129 /* pages */
3130 if(cnt) {
d12e1678
A
3131 ps_vs_write_complete(vs, mobj_target_addr,
3132 cnt, error);
1c79356b
A
3133 error = ps_write_file(ps, internal_upl,
3134 0, actual_offset,
3135 cnt, flags);
3136 if (error)
3137 break;
55e303ae 3138 }
1c79356b
A
3139 if (error)
3140 break;
3141 actual_offset += cnt;
3142 mobj_target_addr += cnt;
3143 transfer_size -= cnt;
3144 cnt = 0;
3145
3146 if (error)
3147 break;
3148 }
3149 }
3150 if(error)
3151 return KERN_FAILURE;
3152 else
3153 return KERN_SUCCESS;
3154}
3155
3156vm_size_t
3157ps_vstruct_allocated_size(
3158 vstruct_t vs)
3159{
3160 int num_pages;
3161 struct vs_map *vsmap;
91447636 3162 unsigned int i, j, k;
1c79356b
A
3163
3164 num_pages = 0;
3165 if (vs->vs_indirect) {
3166 /* loop on indirect maps */
3167 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3168 vsmap = vs->vs_imap[i];
3169 if (vsmap == NULL)
3170 continue;
3171 /* loop on clusters in this indirect map */
3172 for (j = 0; j < CLMAP_ENTRIES; j++) {
3173 if (VSM_ISCLR(vsmap[j]) ||
3174 VSM_ISERR(vsmap[j]))
3175 continue;
3176 /* loop on pages in this cluster */
3177 for (k = 0; k < VSCLSIZE(vs); k++) {
3178 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3179 num_pages++;
3180 }
3181 }
3182 }
3183 } else {
3184 vsmap = vs->vs_dmap;
3185 if (vsmap == NULL)
3186 return 0;
3187 /* loop on clusters in the direct map */
3188 for (j = 0; j < CLMAP_ENTRIES; j++) {
3189 if (VSM_ISCLR(vsmap[j]) ||
3190 VSM_ISERR(vsmap[j]))
3191 continue;
3192 /* loop on pages in this cluster */
3193 for (k = 0; k < VSCLSIZE(vs); k++) {
3194 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3195 num_pages++;
3196 }
3197 }
3198 }
3199
55e303ae 3200 return ptoa_32(num_pages);
1c79356b
A
3201}
3202
3203size_t
3204ps_vstruct_allocated_pages(
3205 vstruct_t vs,
3206 default_pager_page_t *pages,
3207 size_t pages_size)
3208{
91447636 3209 unsigned int num_pages;
1c79356b
A
3210 struct vs_map *vsmap;
3211 vm_offset_t offset;
91447636 3212 unsigned int i, j, k;
1c79356b
A
3213
3214 num_pages = 0;
3215 offset = 0;
3216 if (vs->vs_indirect) {
3217 /* loop on indirect maps */
3218 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3219 vsmap = vs->vs_imap[i];
3220 if (vsmap == NULL) {
3221 offset += (vm_page_size * CLMAP_ENTRIES *
3222 VSCLSIZE(vs));
3223 continue;
3224 }
3225 /* loop on clusters in this indirect map */
3226 for (j = 0; j < CLMAP_ENTRIES; j++) {
3227 if (VSM_ISCLR(vsmap[j]) ||
3228 VSM_ISERR(vsmap[j])) {
3229 offset += vm_page_size * VSCLSIZE(vs);
3230 continue;
3231 }
3232 /* loop on pages in this cluster */
3233 for (k = 0; k < VSCLSIZE(vs); k++) {
3234 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3235 num_pages++;
3236 if (num_pages < pages_size)
3237 pages++->dpp_offset =
3238 offset;
3239 }
3240 offset += vm_page_size;
3241 }
3242 }
3243 }
3244 } else {
3245 vsmap = vs->vs_dmap;
3246 if (vsmap == NULL)
3247 return 0;
3248 /* loop on clusters in the direct map */
3249 for (j = 0; j < CLMAP_ENTRIES; j++) {
3250 if (VSM_ISCLR(vsmap[j]) ||
3251 VSM_ISERR(vsmap[j])) {
3252 offset += vm_page_size * VSCLSIZE(vs);
3253 continue;
3254 }
3255 /* loop on pages in this cluster */
3256 for (k = 0; k < VSCLSIZE(vs); k++) {
3257 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3258 num_pages++;
3259 if (num_pages < pages_size)
3260 pages++->dpp_offset = offset;
3261 }
3262 offset += vm_page_size;
3263 }
3264 }
3265 }
3266
3267 return num_pages;
3268}
3269
3270
3271kern_return_t
3272ps_vstruct_transfer_from_segment(
3273 vstruct_t vs,
3274 paging_segment_t segment,
1c79356b 3275 upl_t upl)
1c79356b
A
3276{
3277 struct vs_map *vsmap;
91447636
A
3278// struct vs_map old_vsmap;
3279// struct vs_map new_vsmap;
3280 unsigned int i, j;
1c79356b
A
3281
3282 VS_LOCK(vs); /* block all work on this vstruct */
3283 /* can't allow the normal multiple write */
3284 /* semantic because writes may conflict */
3285 vs->vs_xfer_pending = TRUE;
3286 vs_wait_for_sync_writers(vs);
3287 vs_start_write(vs);
3288 vs_wait_for_readers(vs);
3289 /* we will unlock the vs to allow other writes while transferring */
3290 /* and will be guaranteed of the persistance of the vs struct */
3291 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3292 /* vs_async_pending */
3293 /* OK we now have guaranteed no other parties are accessing this */
3294 /* vs. Now that we are also supporting simple lock versions of */
3295 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3296 /* our purpose in holding it before was the multiple write case */
3297 /* we now use the boolean xfer_pending to do that. We can use */
3298 /* a boolean instead of a count because we have guaranteed single */
3299 /* file access to this code in its caller */
3300 VS_UNLOCK(vs);
3301vs_changed:
3302 if (vs->vs_indirect) {
91447636
A
3303 unsigned int vsmap_size;
3304 int clmap_off;
1c79356b
A
3305 /* loop on indirect maps */
3306 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3307 vsmap = vs->vs_imap[i];
3308 if (vsmap == NULL)
3309 continue;
3310 /* loop on clusters in this indirect map */
3311 clmap_off = (vm_page_size * CLMAP_ENTRIES *
3312 VSCLSIZE(vs) * i);
3313 if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
3314 vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
3315 else
3316 vsmap_size = CLMAP_ENTRIES;
3317 for (j = 0; j < vsmap_size; j++) {
3318 if (VSM_ISCLR(vsmap[j]) ||
3319 VSM_ISERR(vsmap[j]) ||
3320 (VSM_PS(vsmap[j]) != segment))
3321 continue;
3322 if(vs_cluster_transfer(vs,
3323 (vm_page_size * (j << vs->vs_clshift))
3324 + clmap_off,
3325 vm_page_size << vs->vs_clshift,
1c79356b 3326 upl)
1c79356b
A
3327 != KERN_SUCCESS) {
3328 VS_LOCK(vs);
3329 vs->vs_xfer_pending = FALSE;
3330 VS_UNLOCK(vs);
3331 vs_finish_write(vs);
3332 return KERN_FAILURE;
3333 }
3334 /* allow other readers/writers during transfer*/
3335 VS_LOCK(vs);
3336 vs->vs_xfer_pending = FALSE;
3337 VS_UNLOCK(vs);
3338 vs_finish_write(vs);
3339 VS_LOCK(vs);
3340 vs->vs_xfer_pending = TRUE;
1c79356b
A
3341 vs_wait_for_sync_writers(vs);
3342 vs_start_write(vs);
3343 vs_wait_for_readers(vs);
0b4e3aa0 3344 VS_UNLOCK(vs);
1c79356b
A
3345 if (!(vs->vs_indirect)) {
3346 goto vs_changed;
3347 }
3348 }
3349 }
3350 } else {
3351 vsmap = vs->vs_dmap;
3352 if (vsmap == NULL) {
3353 VS_LOCK(vs);
3354 vs->vs_xfer_pending = FALSE;
3355 VS_UNLOCK(vs);
3356 vs_finish_write(vs);
3357 return KERN_SUCCESS;
3358 }
3359 /* loop on clusters in the direct map */
3360 for (j = 0; j < vs->vs_size; j++) {
3361 if (VSM_ISCLR(vsmap[j]) ||
3362 VSM_ISERR(vsmap[j]) ||
3363 (VSM_PS(vsmap[j]) != segment))
3364 continue;
3365 if(vs_cluster_transfer(vs,
3366 vm_page_size * (j << vs->vs_clshift),
3367 vm_page_size << vs->vs_clshift,
1c79356b 3368 upl) != KERN_SUCCESS) {
1c79356b
A
3369 VS_LOCK(vs);
3370 vs->vs_xfer_pending = FALSE;
3371 VS_UNLOCK(vs);
3372 vs_finish_write(vs);
3373 return KERN_FAILURE;
3374 }
3375 /* allow other readers/writers during transfer*/
3376 VS_LOCK(vs);
3377 vs->vs_xfer_pending = FALSE;
3378 VS_UNLOCK(vs);
3379 vs_finish_write(vs);
3380 VS_LOCK(vs);
3381 vs->vs_xfer_pending = TRUE;
3382 VS_UNLOCK(vs);
3383 vs_wait_for_sync_writers(vs);
3384 vs_start_write(vs);
3385 vs_wait_for_readers(vs);
3386 if (vs->vs_indirect) {
3387 goto vs_changed;
3388 }
3389 }
3390 }
3391
3392 VS_LOCK(vs);
3393 vs->vs_xfer_pending = FALSE;
3394 VS_UNLOCK(vs);
3395 vs_finish_write(vs);
3396 return KERN_SUCCESS;
3397}
3398
3399
3400
3401vs_map_t
3402vs_get_map_entry(
3403 vstruct_t vs,
3404 vm_offset_t offset)
3405{
3406 struct vs_map *vsmap;
3407 vm_offset_t cluster;
3408
55e303ae 3409 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
3410 if (vs->vs_indirect) {
3411 long ind_block = cluster/CLMAP_ENTRIES;
3412
3413 /* Is the indirect block allocated? */
3414 vsmap = vs->vs_imap[ind_block];
3415 if(vsmap == (vs_map_t) NULL)
3416 return vsmap;
3417 } else
3418 vsmap = vs->vs_dmap;
3419 vsmap += cluster%CLMAP_ENTRIES;
3420 return vsmap;
3421}
3422
3423kern_return_t
3424vs_cluster_transfer(
3425 vstruct_t vs,
3426 vm_offset_t offset,
3427 vm_size_t cnt,
1c79356b 3428 upl_t upl)
1c79356b
A
3429{
3430 vm_offset_t actual_offset;
3431 paging_segment_t ps;
3432 struct clmap clmap;
3433 kern_return_t error = KERN_SUCCESS;
91447636
A
3434 unsigned int size, size_wanted;
3435 int i;
89b3af67 3436 unsigned int residual = 0;
91447636
A
3437 unsigned int unavail_size;
3438// default_pager_thread_t *dpt;
3439// boolean_t dealloc;
3440 struct vs_map *vsmap_ptr = NULL;
1c79356b
A
3441 struct vs_map read_vsmap;
3442 struct vs_map original_read_vsmap;
3443 struct vs_map write_vsmap;
91447636
A
3444// upl_t sync_upl;
3445// vm_offset_t ioaddr;
1c79356b 3446
1c79356b
A
3447 /* vs_cluster_transfer reads in the pages of a cluster and
3448 * then writes these pages back to new backing store. The
3449 * segment the pages are being read from is assumed to have
3450 * been taken off-line and is no longer considered for new
3451 * space requests.
3452 */
3453
3454 /*
3455 * This loop will be executed once per cluster referenced.
3456 * Typically this means once, since it's unlikely that the
3457 * VM system will ask for anything spanning cluster boundaries.
3458 *
3459 * If there are holes in a cluster (in a paging segment), we stop
3460 * reading at the hole, then loop again, hoping to
3461 * find valid pages later in the cluster. This continues until
3462 * the entire range has been examined, and read, if present. The
3463 * pages are written as they are read. If a failure occurs after
3464 * some pages are written the unmap call at the bottom of the loop
3465 * recovers the backing store and the old backing store remains
3466 * in effect.
3467 */
3468
1c79356b
A
3469 VSM_CLR(write_vsmap);
3470 VSM_CLR(original_read_vsmap);
3471 /* grab the actual object's pages to sync with I/O */
3472 while (cnt && (error == KERN_SUCCESS)) {
3473 vsmap_ptr = vs_get_map_entry(vs, offset);
3474 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
3475
3476 if (actual_offset == (vm_offset_t) -1) {
3477
3478 /*
3479 * Nothing left to write in this cluster at least
3480 * set write cluster information for any previous
3481 * write, clear for next cluster, if there is one
3482 */
3483 unsigned int local_size, clmask, clsize;
3484
3485 clsize = vm_page_size << vs->vs_clshift;
3486 clmask = clsize - 1;
3487 local_size = clsize - (offset & clmask);
3488 ASSERT(local_size);
3489 local_size = MIN(local_size, cnt);
3490
3491 /* This cluster has no data in it beyond what may */
3492 /* have been found on a previous iteration through */
3493 /* the loop "write_vsmap" */
3494 *vsmap_ptr = write_vsmap;
3495 VSM_CLR(write_vsmap);
3496 VSM_CLR(original_read_vsmap);
3497
3498 cnt -= local_size;
3499 offset += local_size;
3500 continue;
3501 }
3502
3503 /*
3504 * Count up contiguous available or unavailable
3505 * pages.
3506 */
3507 ps = CLMAP_PS(clmap);
3508 ASSERT(ps);
3509 size = 0;
3510 unavail_size = 0;
3511 for (i = 0;
3512 (size < cnt) && (unavail_size < cnt) &&
3513 (i < CLMAP_NPGS(clmap)); i++) {
3514 if (CLMAP_ISSET(clmap, i)) {
3515 if (unavail_size != 0)
3516 break;
3517 size += vm_page_size;
3518 BS_STAT(ps->ps_bs,
3519 ps->ps_bs->bs_pages_in++);
3520 } else {
3521 if (size != 0)
3522 break;
3523 unavail_size += vm_page_size;
3524 }
3525 }
3526
3527 if (size == 0) {
3528 ASSERT(unavail_size);
3529 cnt -= unavail_size;
3530 offset += unavail_size;
3531 if((offset & ((vm_page_size << vs->vs_clshift) - 1))
3532 == 0) {
3533 /* There is no more to transfer in this
3534 cluster
3535 */
3536 *vsmap_ptr = write_vsmap;
3537 VSM_CLR(write_vsmap);
3538 VSM_CLR(original_read_vsmap);
3539 }
3540 continue;
3541 }
3542
3543 if(VSM_ISCLR(original_read_vsmap))
3544 original_read_vsmap = *vsmap_ptr;
3545
3546 if(ps->ps_segtype == PS_PARTITION) {
89b3af67
A
3547 panic("swap partition not supported\n");
3548 /*NOTREACHED*/
3549 error = KERN_FAILURE;
3550 residual = size;
1c79356b 3551/*
9bccf70c 3552 NEED TO ISSUE WITH SYNC & NO COMMIT
1c79356b
A
3553 error = ps_read_device(ps, actual_offset, &buffer,
3554 size, &residual, flags);
3555*/
3556 } else {
9bccf70c 3557 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
91447636 3558 error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset,
1c79356b 3559 size, &residual,
9bccf70c 3560 (UPL_IOSYNC | UPL_NOCOMMIT));
1c79356b
A
3561 }
3562
3563 read_vsmap = *vsmap_ptr;
3564
3565
3566 /*
3567 * Adjust counts and put data in new BS. Optimize for the
3568 * common case, i.e. no error and/or partial data.
3569 * If there was an error, then we need to error the entire
3570 * range, even if some data was successfully read.
3571 *
3572 */
3573 if ((error == KERN_SUCCESS) && (residual == 0)) {
0b4e3aa0 3574
1c79356b
A
3575 /*
3576 * Got everything we asked for, supply the data to
3577 * the new BS. Note that as a side effect of supplying
3578 * the data, the buffer holding the supplied data is
3579 * deallocated from the pager's address space unless
3580 * the write is unsuccessful.
3581 */
3582
3583 /* note buffer will be cleaned up in all cases by */
3584 /* internal_cluster_write or if an error on write */
3585 /* the vm_map_copy_page_discard call */
3586 *vsmap_ptr = write_vsmap;
3587
1c79356b
A
3588 if(vs_cluster_write(vs, upl, offset,
3589 size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
1c79356b
A
3590 error = KERN_FAILURE;
3591 if(!(VSM_ISCLR(*vsmap_ptr))) {
3592 /* unmap the new backing store object */
3593 ps_clunmap(vs, offset, size);
3594 }
3595 /* original vsmap */
3596 *vsmap_ptr = original_read_vsmap;
3597 VSM_CLR(write_vsmap);
3598 } else {
3599 if((offset + size) &
3600 ((vm_page_size << vs->vs_clshift)
3601 - 1)) {
3602 /* There is more to transfer in this
3603 cluster
3604 */
3605 write_vsmap = *vsmap_ptr;
3606 *vsmap_ptr = read_vsmap;
3607 } else {
3608 /* discard the old backing object */
3609 write_vsmap = *vsmap_ptr;
3610 *vsmap_ptr = read_vsmap;
3611 ps_clunmap(vs, offset, size);
3612 *vsmap_ptr = write_vsmap;
3613 VSM_CLR(write_vsmap);
3614 VSM_CLR(original_read_vsmap);
3615 }
3616 }
3617 } else {
3618 size_wanted = size;
3619 if (error == KERN_SUCCESS) {
3620 if (residual == size) {
3621 /*
3622 * If a read operation returns no error
3623 * and no data moved, we turn it into
3624 * an error, assuming we're reading at
3625 * or beyond EOF.
3626 * Fall through and error the entire
3627 * range.
3628 */
3629 error = KERN_FAILURE;
3630 *vsmap_ptr = write_vsmap;
3631 if(!(VSM_ISCLR(*vsmap_ptr))) {
3632 /* unmap the new backing store object */
3633 ps_clunmap(vs, offset, size);
3634 }
3635 *vsmap_ptr = original_read_vsmap;
3636 VSM_CLR(write_vsmap);
3637 continue;
3638 } else {
3639 /*
3640 * Otherwise, we have partial read.
3641 * This is also considered an error
3642 * for the purposes of cluster transfer
3643 */
3644 error = KERN_FAILURE;
3645 *vsmap_ptr = write_vsmap;
3646 if(!(VSM_ISCLR(*vsmap_ptr))) {
3647 /* unmap the new backing store object */
3648 ps_clunmap(vs, offset, size);
3649 }
3650 *vsmap_ptr = original_read_vsmap;
3651 VSM_CLR(write_vsmap);
3652 continue;
3653 }
3654 }
3655
3656 }
3657 cnt -= size;
3658 offset += size;
3659
3660 } /* END while (cnt && (error == 0)) */
3661 if(!VSM_ISCLR(write_vsmap))
3662 *vsmap_ptr = write_vsmap;
3663
1c79356b
A
3664 return error;
3665}
3666
3667kern_return_t
91447636
A
3668default_pager_add_file(
3669 MACH_PORT_FACE backing_store,
3670 vnode_ptr_t vp,
1c79356b 3671 int record_size,
91447636 3672 vm_size_t size)
1c79356b
A
3673{
3674 backing_store_t bs;
3675 paging_segment_t ps;
3676 int i;
91447636 3677 unsigned int j;
1c79356b 3678 int error;
1c79356b
A
3679
3680 if ((bs = backing_store_lookup(backing_store))
3681 == BACKING_STORE_NULL)
3682 return KERN_INVALID_ARGUMENT;
3683
3684 PSL_LOCK();
3685 for (i = 0; i <= paging_segment_max; i++) {
3686 ps = paging_segments[i];
3687 if (ps == PAGING_SEGMENT_NULL)
3688 continue;
3689 if (ps->ps_segtype != PS_FILE)
3690 continue;
3691
3692 /*
3693 * Check for overlap on same device.
3694 */
3695 if (ps->ps_vnode == (struct vnode *)vp) {
3696 PSL_UNLOCK();
3697 BS_UNLOCK(bs);
3698 return KERN_INVALID_ARGUMENT;
3699 }
3700 }
3701 PSL_UNLOCK();
3702
3703 /*
3704 * Set up the paging segment
3705 */
3706 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
3707 if (ps == PAGING_SEGMENT_NULL) {
3708 BS_UNLOCK(bs);
3709 return KERN_RESOURCE_SHORTAGE;
3710 }
3711
3712 ps->ps_segtype = PS_FILE;
3713 ps->ps_vnode = (struct vnode *)vp;
3714 ps->ps_offset = 0;
3715 ps->ps_record_shift = local_log2(vm_page_size / record_size);
3716 ps->ps_recnum = size;
3717 ps->ps_pgnum = size >> ps->ps_record_shift;
3718
3719 ps->ps_pgcount = ps->ps_pgnum;
3720 ps->ps_clshift = local_log2(bs->bs_clsize);
3721 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
3722 ps->ps_hint = 0;
3723
3724 PS_LOCK_INIT(ps);
3725 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
3726 if (!ps->ps_bmap) {
91447636 3727 kfree(ps, sizeof *ps);
1c79356b
A
3728 BS_UNLOCK(bs);
3729 return KERN_RESOURCE_SHORTAGE;
3730 }
91447636
A
3731 for (j = 0; j < ps->ps_ncls; j++) {
3732 clrbit(ps->ps_bmap, j);
1c79356b
A
3733 }
3734
3735 ps->ps_going_away = FALSE;
3736 ps->ps_bs = bs;
3737
3738 if ((error = ps_enter(ps)) != 0) {
91447636
A
3739 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
3740 kfree(ps, sizeof *ps);
1c79356b
A
3741 BS_UNLOCK(bs);
3742 return KERN_RESOURCE_SHORTAGE;
3743 }
3744
3745 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
3746 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
3747 PSL_LOCK();
3748 dp_pages_free += ps->ps_pgcount;
3749 PSL_UNLOCK();
3750
3751 BS_UNLOCK(bs);
3752
3753 bs_more_space(ps->ps_clcount);
3754
91447636
A
3755 DP_DEBUG(DEBUG_BS_INTERNAL,
3756 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3757 device, offset, size, record_size,
3758 ps->ps_record_shift, ps->ps_pgnum));
1c79356b
A
3759
3760 return KERN_SUCCESS;
3761}
3762
3763
3764
1c79356b
A
3765kern_return_t
3766ps_read_file(
3767 paging_segment_t ps,
3768 upl_t upl,
91447636 3769 upl_offset_t upl_offset,
1c79356b 3770 vm_offset_t offset,
91447636 3771 upl_size_t size,
1c79356b
A
3772 unsigned int *residualp,
3773 int flags)
3774{
3775 vm_object_offset_t f_offset;
3776 int error = 0;
3777 int result;
1c79356b 3778
91447636 3779 assert(dp_encryption_inited);
1c79356b 3780
55e303ae 3781 clustered_reads[atop_32(size)]++;
1c79356b
A
3782
3783 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3784
3785 /* for transfer case we need to pass uploffset and flags */
3786 error = vnode_pagein(ps->ps_vnode,
0b4e3aa0 3787 upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
1c79356b
A
3788
3789 /* The vnode_pagein semantic is somewhat at odds with the existing */
3790 /* device_read semantic. Partial reads are not experienced at this */
3791 /* level. It is up to the bit map code and cluster read code to */
3792 /* check that requested data locations are actually backed, and the */
3793 /* pagein code to either read all of the requested data or return an */
3794 /* error. */
3795
3796 if (error)
3797 result = KERN_FAILURE;
3798 else {
3799 *residualp = 0;
3800 result = KERN_SUCCESS;
3801 }
3802 return result;
1c79356b
A
3803}
3804
3805kern_return_t
3806ps_write_file(
3807 paging_segment_t ps,
3808 upl_t upl,
91447636 3809 upl_offset_t upl_offset,
1c79356b
A
3810 vm_offset_t offset,
3811 unsigned int size,
3812 int flags)
3813{
3814 vm_object_offset_t f_offset;
3815 kern_return_t result;
1c79356b 3816
91447636 3817 assert(dp_encryption_inited);
1c79356b 3818
55e303ae 3819 clustered_writes[atop_32(size)]++;
1c79356b
A
3820 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3821
91447636
A
3822 if (flags & UPL_PAGING_ENCRYPTED) {
3823 /*
3824 * ENCRYPTED SWAP:
3825 * encrypt all the pages that we're going
3826 * to pageout.
3827 */
3828 upl_encrypt(upl, upl_offset, size);
3829 }
3830
1c79356b
A
3831 if (vnode_pageout(ps->ps_vnode,
3832 upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
3833 result = KERN_FAILURE;
3834 else
3835 result = KERN_SUCCESS;
3836
3837 return result;
3838}
3839
3840kern_return_t
91447636 3841default_pager_triggers( __unused MACH_PORT_FACE default_pager,
1c79356b
A
3842 int hi_wat,
3843 int lo_wat,
3844 int flags,
3845 MACH_PORT_FACE trigger_port)
3846{
0b4e3aa0
A
3847 MACH_PORT_FACE release;
3848 kern_return_t kr;
1c79356b 3849
0b4e3aa0 3850 PSL_LOCK();
91447636
A
3851 if (flags == SWAP_ENCRYPT_ON) {
3852 /* ENCRYPTED SWAP: turn encryption on */
3853 release = trigger_port;
3854 if (!dp_encryption_inited) {
3855 dp_encryption_inited = TRUE;
3856 dp_encryption = TRUE;
3857 kr = KERN_SUCCESS;
3858 } else {
3859 kr = KERN_FAILURE;
3860 }
3861 } else if (flags == SWAP_ENCRYPT_OFF) {
3862 /* ENCRYPTED SWAP: turn encryption off */
3863 release = trigger_port;
3864 if (!dp_encryption_inited) {
3865 dp_encryption_inited = TRUE;
3866 dp_encryption = FALSE;
3867 kr = KERN_SUCCESS;
3868 } else {
3869 kr = KERN_FAILURE;
3870 }
3871 } else if (flags == HI_WAT_ALERT) {
0b4e3aa0 3872 release = min_pages_trigger_port;
1c79356b
A
3873 min_pages_trigger_port = trigger_port;
3874 minimum_pages_remaining = hi_wat/vm_page_size;
3875 bs_low = FALSE;
0b4e3aa0
A
3876 kr = KERN_SUCCESS;
3877 } else if (flags == LO_WAT_ALERT) {
3878 release = max_pages_trigger_port;
1c79356b
A
3879 max_pages_trigger_port = trigger_port;
3880 maximum_pages_free = lo_wat/vm_page_size;
0b4e3aa0
A
3881 kr = KERN_SUCCESS;
3882 } else {
3883 release = trigger_port;
3884 kr = KERN_INVALID_ARGUMENT;
1c79356b 3885 }
0b4e3aa0
A
3886 PSL_UNLOCK();
3887
3888 if (IP_VALID(release))
3889 ipc_port_release_send(release);
3890
3891 return kr;
1c79356b 3892}
55e303ae
A
3893
3894/*
3895 * Monitor the amount of available backing store vs. the amount of
3896 * required backing store, notify a listener (if present) when
3897 * backing store may safely be removed.
3898 *
3899 * We attempt to avoid the situation where backing store is
3900 * discarded en masse, as this can lead to thrashing as the
3901 * backing store is compacted.
3902 */
3903
3904#define PF_INTERVAL 3 /* time between free level checks */
3905#define PF_LATENCY 10 /* number of intervals before release */
3906
3907static int dp_pages_free_low_count = 0;
91447636 3908thread_call_t default_pager_backing_store_monitor_callout;
55e303ae
A
3909
3910void
91447636
A
3911default_pager_backing_store_monitor(__unused thread_call_param_t p1,
3912 __unused thread_call_param_t p2)
55e303ae 3913{
91447636 3914// unsigned long long average;
55e303ae
A
3915 ipc_port_t trigger;
3916 uint64_t deadline;
3917
3918 /*
3919 * We determine whether it will be safe to release some
3920 * backing store by watching the free page level. If
3921 * it remains below the maximum_pages_free threshold for
3922 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3923 * then we deem it safe.
3924 *
3925 * Note that this establishes a maximum rate at which backing
3926 * store will be released, as each notification (currently)
3927 * only results in a single backing store object being
3928 * released.
3929 */
3930 if (dp_pages_free > maximum_pages_free) {
3931 dp_pages_free_low_count++;
3932 } else {
3933 dp_pages_free_low_count = 0;
3934 }
3935
3936 /* decide whether to send notification */
3937 trigger = IP_NULL;
3938 if (max_pages_trigger_port &&
3939 (backing_store_release_trigger_disable == 0) &&
3940 (dp_pages_free_low_count > PF_LATENCY)) {
3941 trigger = max_pages_trigger_port;
3942 max_pages_trigger_port = NULL;
3943 }
3944
3945 /* send notification */
3946 if (trigger != IP_NULL) {
3947 VSL_LOCK();
3948 if(backing_store_release_trigger_disable != 0) {
3949 assert_wait((event_t)
3950 &backing_store_release_trigger_disable,
3951 THREAD_UNINT);
3952 VSL_UNLOCK();
3953 thread_block(THREAD_CONTINUE_NULL);
3954 } else {
3955 VSL_UNLOCK();
3956 }
3957 default_pager_space_alert(trigger, LO_WAT_ALERT);
3958 ipc_port_release_send(trigger);
3959 dp_pages_free_low_count = 0;
3960 }
3961
3962 clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
91447636 3963 thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline);
55e303ae 3964}