]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_backing_store.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_backing_store.c
CommitLineData
1c79356b 1/*
2d21ac55 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56
57/*
58 * Default Pager.
59 * Paging File Management.
60 */
61
91447636 62#include <mach/host_priv.h>
0b4e3aa0 63#include <mach/memory_object_control.h>
1c79356b 64#include <mach/memory_object_server.h>
91447636
A
65#include <mach/upl.h>
66#include <default_pager/default_pager_internal.h>
1c79356b 67#include <default_pager/default_pager_alerts.h>
91447636
A
68#include <default_pager/default_pager_object_server.h>
69
70#include <ipc/ipc_types.h>
1c79356b
A
71#include <ipc/ipc_port.h>
72#include <ipc/ipc_space.h>
91447636
A
73
74#include <kern/kern_types.h>
75#include <kern/host.h>
1c79356b
A
76#include <kern/queue.h>
77#include <kern/counters.h>
78#include <kern/sched_prim.h>
91447636 79
1c79356b
A
80#include <vm/vm_kern.h>
81#include <vm/vm_pageout.h>
1c79356b 82#include <vm/vm_map.h>
91447636
A
83#include <vm/vm_object.h>
84#include <vm/vm_protos.h>
85
2d21ac55 86
91447636 87/* LP64todo - need large internal object support */
1c79356b 88
0b4e3aa0
A
89/*
90 * ALLOC_STRIDE... the maximum number of bytes allocated from
91 * a swap file before moving on to the next swap file... if
92 * all swap files reside on a single disk, this value should
93 * be very large (this is the default assumption)... if the
94 * swap files are spread across multiple disks, than this value
95 * should be small (128 * 1024)...
96 *
97 * This should be determined dynamically in the future
98 */
1c79356b 99
0b4e3aa0 100#define ALLOC_STRIDE (1024 * 1024 * 1024)
1c79356b
A
101int physical_transfer_cluster_count = 0;
102
9bccf70c
A
103#define VM_SUPER_CLUSTER 0x40000
104#define VM_SUPER_PAGES 64
1c79356b
A
105
106/*
107 * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
108 * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
109 */
110#define VSTRUCT_DEF_CLSHIFT 2
111int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
112int default_pager_clsize = 0;
113
114/* statistics */
0b4e3aa0
A
115unsigned int clustered_writes[VM_SUPER_PAGES+1];
116unsigned int clustered_reads[VM_SUPER_PAGES+1];
1c79356b
A
117
118/*
119 * Globals used for asynchronous paging operations:
120 * vs_async_list: head of list of to-be-completed I/O ops
121 * async_num_queued: number of pages completed, but not yet
122 * processed by async thread.
123 * async_requests_out: number of pages of requests not completed.
124 */
125
126#if 0
127struct vs_async *vs_async_list;
128int async_num_queued;
129int async_requests_out;
130#endif
131
132
133#define VS_ASYNC_REUSE 1
134struct vs_async *vs_async_free_list;
135
136mutex_t default_pager_async_lock; /* Protects globals above */
137
138
139int vs_alloc_async_failed = 0; /* statistics */
140int vs_alloc_async_count = 0; /* statistics */
141struct vs_async *vs_alloc_async(void); /* forward */
142void vs_free_async(struct vs_async *vsa); /* forward */
143
144
145#define VS_ALLOC_ASYNC() vs_alloc_async()
146#define VS_FREE_ASYNC(vsa) vs_free_async(vsa)
147
148#define VS_ASYNC_LOCK() mutex_lock(&default_pager_async_lock)
149#define VS_ASYNC_UNLOCK() mutex_unlock(&default_pager_async_lock)
91447636 150#define VS_ASYNC_LOCK_INIT() mutex_init(&default_pager_async_lock, 0)
1c79356b
A
151#define VS_ASYNC_LOCK_ADDR() (&default_pager_async_lock)
152/*
153 * Paging Space Hysteresis triggers and the target notification port
154 *
155 */
156
157unsigned int minimum_pages_remaining = 0;
158unsigned int maximum_pages_free = 0;
159ipc_port_t min_pages_trigger_port = NULL;
160ipc_port_t max_pages_trigger_port = NULL;
161
162boolean_t bs_low = FALSE;
0b4e3aa0 163int backing_store_release_trigger_disable = 0;
91447636
A
164
165
166/* Have we decided if swap needs to be encrypted yet ? */
167boolean_t dp_encryption_inited = FALSE;
168/* Should we encrypt swap ? */
169boolean_t dp_encryption = FALSE;
1c79356b
A
170
171
172/*
173 * Object sizes are rounded up to the next power of 2,
174 * unless they are bigger than a given maximum size.
175 */
176vm_size_t max_doubled_size = 4 * 1024 * 1024; /* 4 meg */
177
178/*
179 * List of all backing store and segments.
180 */
181struct backing_store_list_head backing_store_list;
182paging_segment_t paging_segments[MAX_NUM_PAGING_SEGMENTS];
183mutex_t paging_segments_lock;
184int paging_segment_max = 0;
185int paging_segment_count = 0;
186int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
187
188
189/*
190 * Total pages free in system
191 * This differs from clusters committed/avail which is a measure of the
192 * over commitment of paging segments to backing store. An idea which is
193 * likely to be deprecated.
194 */
195unsigned int dp_pages_free = 0;
196unsigned int cluster_transfer_minimum = 100;
197
91447636
A
198/* forward declarations */
199kern_return_t ps_write_file(paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, int); /* forward */
200kern_return_t ps_read_file (paging_segment_t, upl_t, upl_offset_t, vm_offset_t, unsigned int, unsigned int *, int); /* forward */
201default_pager_thread_t *get_read_buffer( void );
202kern_return_t ps_vstruct_transfer_from_segment(
203 vstruct_t vs,
204 paging_segment_t segment,
205 upl_t upl);
206kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int); /* forward */
207kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *); /* forward */
208kern_return_t vs_cluster_transfer(
209 vstruct_t vs,
210 upl_offset_t offset,
211 upl_size_t cnt,
212 upl_t upl);
213vs_map_t vs_get_map_entry(
214 vstruct_t vs,
215 vm_offset_t offset);
0b4e3aa0 216
1c79356b
A
217
218default_pager_thread_t *
91447636 219get_read_buffer( void )
1c79356b
A
220{
221 int i;
222
223 DPT_LOCK(dpt_lock);
224 while(TRUE) {
225 for (i=0; i<default_pager_internal_count; i++) {
226 if(dpt_array[i]->checked_out == FALSE) {
227 dpt_array[i]->checked_out = TRUE;
228 DPT_UNLOCK(dpt_lock);
229 return dpt_array[i];
230 }
231 }
9bccf70c 232 DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
1c79356b
A
233 }
234}
235
236void
237bs_initialize(void)
238{
239 int i;
240
241 /*
242 * List of all backing store.
243 */
244 BSL_LOCK_INIT();
245 queue_init(&backing_store_list.bsl_queue);
246 PSL_LOCK_INIT();
247
248 VS_ASYNC_LOCK_INIT();
249#if VS_ASYNC_REUSE
250 vs_async_free_list = NULL;
251#endif /* VS_ASYNC_REUSE */
252
0b4e3aa0 253 for (i = 0; i < VM_SUPER_PAGES + 1; i++) {
1c79356b
A
254 clustered_writes[i] = 0;
255 clustered_reads[i] = 0;
256 }
257
258}
259
260/*
261 * When things do not quite workout...
262 */
263void bs_no_paging_space(boolean_t); /* forward */
264
265void
266bs_no_paging_space(
267 boolean_t out_of_memory)
268{
1c79356b
A
269
270 if (out_of_memory)
271 dprintf(("*** OUT OF MEMORY ***\n"));
272 panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
273}
274
275void bs_more_space(int); /* forward */
276void bs_commit(int); /* forward */
277
278boolean_t user_warned = FALSE;
279unsigned int clusters_committed = 0;
280unsigned int clusters_available = 0;
281unsigned int clusters_committed_peak = 0;
282
283void
284bs_more_space(
285 int nclusters)
286{
287 BSL_LOCK();
288 /*
289 * Account for new paging space.
290 */
291 clusters_available += nclusters;
292
293 if (clusters_available >= clusters_committed) {
294 if (verbose && user_warned) {
295 printf("%s%s - %d excess clusters now.\n",
296 my_name,
297 "paging space is OK now",
298 clusters_available - clusters_committed);
299 user_warned = FALSE;
300 clusters_committed_peak = 0;
301 }
302 } else {
303 if (verbose && user_warned) {
304 printf("%s%s - still short of %d clusters.\n",
305 my_name,
306 "WARNING: paging space over-committed",
307 clusters_committed - clusters_available);
308 clusters_committed_peak -= nclusters;
309 }
310 }
311 BSL_UNLOCK();
312
313 return;
314}
315
316void
317bs_commit(
318 int nclusters)
319{
320 BSL_LOCK();
321 clusters_committed += nclusters;
322 if (clusters_committed > clusters_available) {
323 if (verbose && !user_warned) {
324 user_warned = TRUE;
325 printf("%s%s - short of %d clusters.\n",
326 my_name,
327 "WARNING: paging space over-committed",
328 clusters_committed - clusters_available);
329 }
330 if (clusters_committed > clusters_committed_peak) {
331 clusters_committed_peak = clusters_committed;
332 }
333 } else {
334 if (verbose && user_warned) {
335 printf("%s%s - was short of up to %d clusters.\n",
336 my_name,
337 "paging space is OK now",
338 clusters_committed_peak - clusters_available);
339 user_warned = FALSE;
340 clusters_committed_peak = 0;
341 }
342 }
343 BSL_UNLOCK();
344
345 return;
346}
347
348int default_pager_info_verbose = 1;
349
350void
351bs_global_info(
352 vm_size_t *totalp,
353 vm_size_t *freep)
354{
355 vm_size_t pages_total, pages_free;
356 paging_segment_t ps;
357 int i;
1c79356b
A
358
359 PSL_LOCK();
360 pages_total = pages_free = 0;
361 for (i = 0; i <= paging_segment_max; i++) {
362 ps = paging_segments[i];
363 if (ps == PAGING_SEGMENT_NULL)
364 continue;
365
366 /*
367 * no need to lock: by the time this data
368 * gets back to any remote requestor it
369 * will be obsolete anyways
370 */
371 pages_total += ps->ps_pgnum;
372 pages_free += ps->ps_clcount << ps->ps_clshift;
91447636
A
373 DP_DEBUG(DEBUG_BS_INTERNAL,
374 ("segment #%d: %d total, %d free\n",
375 i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
1c79356b
A
376 }
377 *totalp = pages_total;
378 *freep = pages_free;
379 if (verbose && user_warned && default_pager_info_verbose) {
380 if (clusters_available < clusters_committed) {
381 printf("%s %d clusters committed, %d available.\n",
382 my_name,
383 clusters_committed,
384 clusters_available);
385 }
386 }
387 PSL_UNLOCK();
388}
389
390backing_store_t backing_store_alloc(void); /* forward */
391
392backing_store_t
393backing_store_alloc(void)
394{
395 backing_store_t bs;
1c79356b
A
396
397 bs = (backing_store_t) kalloc(sizeof (struct backing_store));
398 if (bs == BACKING_STORE_NULL)
399 panic("backing_store_alloc: no memory");
400
401 BS_LOCK_INIT(bs);
402 bs->bs_port = MACH_PORT_NULL;
403 bs->bs_priority = 0;
404 bs->bs_clsize = 0;
405 bs->bs_pages_total = 0;
406 bs->bs_pages_in = 0;
407 bs->bs_pages_in_fail = 0;
408 bs->bs_pages_out = 0;
409 bs->bs_pages_out_fail = 0;
410
411 return bs;
412}
413
414backing_store_t backing_store_lookup(MACH_PORT_FACE); /* forward */
415
416/* Even in both the component space and external versions of this pager, */
417/* backing_store_lookup will be called from tasks in the application space */
418backing_store_t
419backing_store_lookup(
420 MACH_PORT_FACE port)
421{
422 backing_store_t bs;
423
424/*
425 port is currently backed with a vs structure in the alias field
426 we could create an ISBS alias and a port_is_bs call but frankly
427 I see no reason for the test, the bs->port == port check below
428 will work properly on junk entries.
429
430 if ((port == MACH_PORT_NULL) || port_is_vs(port))
431*/
432 if ((port == MACH_PORT_NULL))
433 return BACKING_STORE_NULL;
434
435 BSL_LOCK();
436 queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t,
437 bs_links) {
438 BS_LOCK(bs);
439 if (bs->bs_port == port) {
440 BSL_UNLOCK();
441 /* Success, return it locked. */
442 return bs;
443 }
444 BS_UNLOCK(bs);
445 }
446 BSL_UNLOCK();
447 return BACKING_STORE_NULL;
448}
449
450void backing_store_add(backing_store_t); /* forward */
451
452void
453backing_store_add(
91447636 454 __unused backing_store_t bs)
1c79356b 455{
91447636
A
456// MACH_PORT_FACE port = bs->bs_port;
457// MACH_PORT_FACE pset = default_pager_default_set;
1c79356b 458 kern_return_t kr = KERN_SUCCESS;
1c79356b
A
459
460 if (kr != KERN_SUCCESS)
461 panic("backing_store_add: add to set");
462
463}
464
465/*
466 * Set up default page shift, but only if not already
467 * set and argument is within range.
468 */
469boolean_t
470bs_set_default_clsize(unsigned int npages)
471{
472 switch(npages){
473 case 1:
474 case 2:
475 case 4:
476 case 8:
477 if (default_pager_clsize == 0) /* if not yet set */
478 vstruct_def_clshift = local_log2(npages);
479 return(TRUE);
480 }
481 return(FALSE);
482}
483
484int bs_get_global_clsize(int clsize); /* forward */
485
486int
487bs_get_global_clsize(
488 int clsize)
489{
490 int i;
0b4e3aa0 491 memory_object_default_t dmm;
1c79356b 492 kern_return_t kr;
1c79356b
A
493
494 /*
495 * Only allow setting of cluster size once. If called
496 * with no cluster size (default), we use the compiled-in default
497 * for the duration. The same cluster size is used for all
498 * paging segments.
499 */
500 if (default_pager_clsize == 0) {
1c79356b
A
501 /*
502 * Keep cluster size in bit shift because it's quicker
503 * arithmetic, and easier to keep at a power of 2.
504 */
505 if (clsize != NO_CLSIZE) {
506 for (i = 0; (1 << i) < clsize; i++);
507 if (i > MAX_CLUSTER_SHIFT)
508 i = MAX_CLUSTER_SHIFT;
509 vstruct_def_clshift = i;
510 }
511 default_pager_clsize = (1 << vstruct_def_clshift);
512
513 /*
514 * Let the user know the new (and definitive) cluster size.
515 */
516 if (verbose)
517 printf("%scluster size = %d page%s\n",
518 my_name, default_pager_clsize,
519 (default_pager_clsize == 1) ? "" : "s");
0b4e3aa0 520
1c79356b
A
521 /*
522 * Let the kernel know too, in case it hasn't used the
523 * default value provided in main() yet.
524 */
0b4e3aa0 525 dmm = default_pager_object;
1c79356b
A
526 clsize = default_pager_clsize * vm_page_size; /* in bytes */
527 kr = host_default_memory_manager(host_priv_self(),
0b4e3aa0 528 &dmm,
1c79356b 529 clsize);
0b4e3aa0
A
530 memory_object_default_deallocate(dmm);
531
1c79356b
A
532 if (kr != KERN_SUCCESS) {
533 panic("bs_get_global_cl_size:host_default_memory_manager");
534 }
0b4e3aa0 535 if (dmm != default_pager_object) {
1c79356b
A
536 panic("bs_get_global_cl_size:there is another default pager");
537 }
538 }
539 ASSERT(default_pager_clsize > 0 &&
540 (default_pager_clsize & (default_pager_clsize - 1)) == 0);
541
542 return default_pager_clsize;
543}
544
545kern_return_t
546default_pager_backing_store_create(
0b4e3aa0
A
547 memory_object_default_t pager,
548 int priority,
549 int clsize, /* in bytes */
550 MACH_PORT_FACE *backing_store)
1c79356b
A
551{
552 backing_store_t bs;
553 MACH_PORT_FACE port;
91447636 554// kern_return_t kr;
1c79356b 555 struct vstruct_alias *alias_struct;
1c79356b 556
0b4e3aa0 557 if (pager != default_pager_object)
1c79356b
A
558 return KERN_INVALID_ARGUMENT;
559
560 bs = backing_store_alloc();
561 port = ipc_port_alloc_kernel();
562 ipc_port_make_send(port);
563 assert (port != IP_NULL);
564
91447636
A
565 DP_DEBUG(DEBUG_BS_EXTERNAL,
566 ("priority=%d clsize=%d bs_port=0x%x\n",
567 priority, clsize, (int) backing_store));
1c79356b
A
568
569 alias_struct = (struct vstruct_alias *)
570 kalloc(sizeof (struct vstruct_alias));
571 if(alias_struct != NULL) {
572 alias_struct->vs = (struct vstruct *)bs;
0c530ab8 573 alias_struct->name = &default_pager_ops;
1c79356b
A
574 port->alias = (int) alias_struct;
575 }
576 else {
577 ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
91447636 578 kfree(bs, sizeof (struct backing_store));
1c79356b
A
579 return KERN_RESOURCE_SHORTAGE;
580 }
581
582 bs->bs_port = port;
583 if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI)
584 priority = BS_MAXPRI;
585 else if (priority == BS_NOPRI)
586 priority = BS_MAXPRI;
587 else
588 priority = BS_MINPRI;
589 bs->bs_priority = priority;
590
55e303ae 591 bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
1c79356b
A
592
593 BSL_LOCK();
594 queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
595 bs_links);
596 BSL_UNLOCK();
597
598 backing_store_add(bs);
599
600 *backing_store = port;
601 return KERN_SUCCESS;
602}
603
604kern_return_t
605default_pager_backing_store_info(
606 MACH_PORT_FACE backing_store,
607 backing_store_flavor_t flavour,
608 backing_store_info_t info,
609 mach_msg_type_number_t *size)
610{
611 backing_store_t bs;
612 backing_store_basic_info_t basic;
613 int i;
614 paging_segment_t ps;
615
616 if (flavour != BACKING_STORE_BASIC_INFO ||
617 *size < BACKING_STORE_BASIC_INFO_COUNT)
618 return KERN_INVALID_ARGUMENT;
619
620 basic = (backing_store_basic_info_t)info;
621 *size = BACKING_STORE_BASIC_INFO_COUNT;
622
623 VSTATS_LOCK(&global_stats.gs_lock);
624 basic->pageout_calls = global_stats.gs_pageout_calls;
625 basic->pagein_calls = global_stats.gs_pagein_calls;
626 basic->pages_in = global_stats.gs_pages_in;
627 basic->pages_out = global_stats.gs_pages_out;
628 basic->pages_unavail = global_stats.gs_pages_unavail;
629 basic->pages_init = global_stats.gs_pages_init;
630 basic->pages_init_writes= global_stats.gs_pages_init_writes;
631 VSTATS_UNLOCK(&global_stats.gs_lock);
632
633 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
634 return KERN_INVALID_ARGUMENT;
635
636 basic->bs_pages_total = bs->bs_pages_total;
637 PSL_LOCK();
638 bs->bs_pages_free = 0;
639 for (i = 0; i <= paging_segment_max; i++) {
640 ps = paging_segments[i];
641 if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) {
642 PS_LOCK(ps);
643 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
644 PS_UNLOCK(ps);
645 }
646 }
647 PSL_UNLOCK();
648 basic->bs_pages_free = bs->bs_pages_free;
649 basic->bs_pages_in = bs->bs_pages_in;
650 basic->bs_pages_in_fail = bs->bs_pages_in_fail;
651 basic->bs_pages_out = bs->bs_pages_out;
652 basic->bs_pages_out_fail= bs->bs_pages_out_fail;
653
654 basic->bs_priority = bs->bs_priority;
55e303ae 655 basic->bs_clsize = ptoa_32(bs->bs_clsize); /* in bytes */
1c79356b
A
656
657 BS_UNLOCK(bs);
658
659 return KERN_SUCCESS;
660}
661
662int ps_delete(paging_segment_t); /* forward */
663
664int
665ps_delete(
666 paging_segment_t ps)
667{
668 vstruct_t vs;
669 kern_return_t error = KERN_SUCCESS;
670 int vs_count;
671
672 VSL_LOCK(); /* get the lock on the list of vs's */
673
674 /* The lock relationship and sequence is farily complicated */
675 /* this code looks at a live list, locking and unlocking the list */
676 /* as it traverses it. It depends on the locking behavior of */
677 /* default_pager_no_senders. no_senders always locks the vstruct */
678 /* targeted for removal before locking the vstruct list. However */
679 /* it will remove that member of the list without locking its */
680 /* neighbors. We can be sure when we hold a lock on a vstruct */
681 /* it cannot be removed from the list but we must hold the list */
682 /* lock to be sure that its pointers to its neighbors are valid. */
683 /* Also, we can hold off destruction of a vstruct when the list */
684 /* lock and the vs locks are not being held by bumping the */
685 /* vs_async_pending count. */
686
0b4e3aa0
A
687
688 while(backing_store_release_trigger_disable != 0) {
9bccf70c 689 VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
0b4e3aa0
A
690 }
691
1c79356b
A
692 /* we will choose instead to hold a send right */
693 vs_count = vstruct_list.vsl_count;
694 vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
695 if(vs == (vstruct_t)&vstruct_list) {
696 VSL_UNLOCK();
697 return KERN_SUCCESS;
698 }
699 VS_LOCK(vs);
700 vs_async_wait(vs); /* wait for any pending async writes */
701 if ((vs_count != 0) && (vs != NULL))
702 vs->vs_async_pending += 1; /* hold parties calling */
703 /* vs_async_wait */
704 VS_UNLOCK(vs);
705 VSL_UNLOCK();
706 while((vs_count != 0) && (vs != NULL)) {
707 /* We take the count of AMO's before beginning the */
708 /* transfer of of the target segment. */
709 /* We are guaranteed that the target segment cannot get */
710 /* more users. We also know that queue entries are */
711 /* made at the back of the list. If some of the entries */
712 /* we would check disappear while we are traversing the */
713 /* list then we will either check new entries which */
714 /* do not have any backing store in the target segment */
715 /* or re-check old entries. This might not be optimal */
716 /* but it will always be correct. The alternative is to */
717 /* take a snapshot of the list. */
718 vstruct_t next_vs;
719
720 if(dp_pages_free < cluster_transfer_minimum)
721 error = KERN_FAILURE;
722 else {
723 vm_object_t transfer_object;
0c530ab8 724 unsigned int count;
1c79356b
A
725 upl_t upl;
726
91447636 727 transfer_object = vm_object_allocate((vm_object_size_t)VM_SUPER_CLUSTER);
0b4e3aa0
A
728 count = 0;
729 error = vm_object_upl_request(transfer_object,
730 (vm_object_offset_t)0, VM_SUPER_CLUSTER,
731 &upl, NULL, &count,
2d21ac55
A
732 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_SET_LITE | UPL_SET_INTERNAL);
733
1c79356b 734 if(error == KERN_SUCCESS) {
1c79356b
A
735 error = ps_vstruct_transfer_from_segment(
736 vs, ps, upl);
91447636 737 upl_commit(upl, NULL, 0);
0b4e3aa0 738 upl_deallocate(upl);
1c79356b 739 } else {
1c79356b
A
740 error = KERN_FAILURE;
741 }
9bccf70c 742 vm_object_deallocate(transfer_object);
1c79356b
A
743 }
744 if(error) {
745 VS_LOCK(vs);
746 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
747 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
748 vs->vs_waiting_async = FALSE;
1c79356b 749 VS_UNLOCK(vs);
0b4e3aa0 750 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
751 } else {
752 VS_UNLOCK(vs);
753 }
754 return KERN_FAILURE;
755 }
756
757 VSL_LOCK();
0b4e3aa0
A
758
759 while(backing_store_release_trigger_disable != 0) {
9bccf70c
A
760 VSL_SLEEP(&backing_store_release_trigger_disable,
761 THREAD_UNINT);
0b4e3aa0
A
762 }
763
1c79356b
A
764 next_vs = (vstruct_t) queue_next(&(vs->vs_links));
765 if((next_vs != (vstruct_t)&vstruct_list) &&
766 (vs != next_vs) && (vs_count != 1)) {
767 VS_LOCK(next_vs);
768 vs_async_wait(next_vs); /* wait for any */
769 /* pending async writes */
770 next_vs->vs_async_pending += 1; /* hold parties */
771 /* calling vs_async_wait */
772 VS_UNLOCK(next_vs);
773 }
774 VSL_UNLOCK();
775 VS_LOCK(vs);
776 vs->vs_async_pending -= 1;
0b4e3aa0
A
777 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
778 vs->vs_waiting_async = FALSE;
1c79356b 779 VS_UNLOCK(vs);
0b4e3aa0 780 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
781 } else {
782 VS_UNLOCK(vs);
783 }
784 if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list))
785 vs = NULL;
786 else
787 vs = next_vs;
788 vs_count--;
789 }
790 return KERN_SUCCESS;
791}
792
793
794kern_return_t
795default_pager_backing_store_delete(
796 MACH_PORT_FACE backing_store)
797{
798 backing_store_t bs;
799 int i;
800 paging_segment_t ps;
801 int error;
802 int interim_pages_removed = 0;
91447636 803// kern_return_t kr;
1c79356b
A
804
805 if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
806 return KERN_INVALID_ARGUMENT;
807
808#if 0
809 /* not implemented */
810 BS_UNLOCK(bs);
811 return KERN_FAILURE;
812#endif
813
814 restart:
815 PSL_LOCK();
816 error = KERN_SUCCESS;
817 for (i = 0; i <= paging_segment_max; i++) {
818 ps = paging_segments[i];
819 if (ps != PAGING_SEGMENT_NULL &&
820 ps->ps_bs == bs &&
821 ! ps->ps_going_away) {
822 PS_LOCK(ps);
823 /* disable access to this segment */
824 ps->ps_going_away = TRUE;
825 PS_UNLOCK(ps);
826 /*
827 * The "ps" segment is "off-line" now,
828 * we can try and delete it...
829 */
830 if(dp_pages_free < (cluster_transfer_minimum
831 + ps->ps_pgcount)) {
832 error = KERN_FAILURE;
833 PSL_UNLOCK();
834 }
835 else {
836 /* remove all pages associated with the */
837 /* segment from the list of free pages */
838 /* when transfer is through, all target */
839 /* segment pages will appear to be free */
840
841 dp_pages_free -= ps->ps_pgcount;
842 interim_pages_removed += ps->ps_pgcount;
843 PSL_UNLOCK();
844 error = ps_delete(ps);
845 }
846 if (error != KERN_SUCCESS) {
847 /*
848 * We couldn't delete the segment,
849 * probably because there's not enough
850 * virtual memory left.
851 * Re-enable all the segments.
852 */
853 PSL_LOCK();
854 break;
855 }
856 goto restart;
857 }
858 }
859
860 if (error != KERN_SUCCESS) {
861 for (i = 0; i <= paging_segment_max; i++) {
862 ps = paging_segments[i];
863 if (ps != PAGING_SEGMENT_NULL &&
864 ps->ps_bs == bs &&
865 ps->ps_going_away) {
866 PS_LOCK(ps);
867 /* re-enable access to this segment */
868 ps->ps_going_away = FALSE;
869 PS_UNLOCK(ps);
870 }
871 }
872 dp_pages_free += interim_pages_removed;
873 PSL_UNLOCK();
874 BS_UNLOCK(bs);
875 return error;
876 }
877
878 for (i = 0; i <= paging_segment_max; i++) {
879 ps = paging_segments[i];
880 if (ps != PAGING_SEGMENT_NULL &&
881 ps->ps_bs == bs) {
882 if(ps->ps_going_away) {
883 paging_segments[i] = PAGING_SEGMENT_NULL;
884 paging_segment_count--;
885 PS_LOCK(ps);
91447636
A
886 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
887 kfree(ps, sizeof *ps);
1c79356b
A
888 }
889 }
890 }
891
892 /* Scan the entire ps array separately to make certain we find the */
893 /* proper paging_segment_max */
894 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
895 if(paging_segments[i] != PAGING_SEGMENT_NULL)
896 paging_segment_max = i;
897 }
898
899 PSL_UNLOCK();
900
901 /*
902 * All the segments have been deleted.
903 * We can remove the backing store.
904 */
905
906 /*
907 * Disable lookups of this backing store.
908 */
909 if((void *)bs->bs_port->alias != NULL)
91447636
A
910 kfree((void *) bs->bs_port->alias,
911 sizeof (struct vstruct_alias));
1c79356b
A
912 ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
913 bs->bs_port = MACH_PORT_NULL;
914 BS_UNLOCK(bs);
915
916 /*
917 * Remove backing store from backing_store list.
918 */
919 BSL_LOCK();
920 queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t,
921 bs_links);
922 BSL_UNLOCK();
923
924 /*
925 * Free the backing store structure.
926 */
91447636 927 kfree(bs, sizeof *bs);
1c79356b
A
928
929 return KERN_SUCCESS;
930}
931
932int ps_enter(paging_segment_t); /* forward */
933
934int
935ps_enter(
936 paging_segment_t ps)
937{
938 int i;
939
940 PSL_LOCK();
941
942 for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
943 if (paging_segments[i] == PAGING_SEGMENT_NULL)
944 break;
945 }
946
947 if (i < MAX_NUM_PAGING_SEGMENTS) {
948 paging_segments[i] = ps;
949 if (i > paging_segment_max)
950 paging_segment_max = i;
951 paging_segment_count++;
952 if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) ||
953 (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI))
954 ps_select_array[ps->ps_bs->bs_priority] = 0;
955 i = 0;
956 } else {
957 PSL_UNLOCK();
958 return KERN_RESOURCE_SHORTAGE;
959 }
960
961 PSL_UNLOCK();
962 return i;
963}
964
965#ifdef DEVICE_PAGING
966kern_return_t
967default_pager_add_segment(
968 MACH_PORT_FACE backing_store,
969 MACH_PORT_FACE device,
970 recnum_t offset,
971 recnum_t count,
972 int record_size)
973{
974 backing_store_t bs;
975 paging_segment_t ps;
976 int i;
977 int error;
1c79356b
A
978
979 if ((bs = backing_store_lookup(backing_store))
980 == BACKING_STORE_NULL)
981 return KERN_INVALID_ARGUMENT;
982
983 PSL_LOCK();
984 for (i = 0; i <= paging_segment_max; i++) {
985 ps = paging_segments[i];
986 if (ps == PAGING_SEGMENT_NULL)
987 continue;
988
989 /*
990 * Check for overlap on same device.
991 */
992 if (!(ps->ps_device != device
993 || offset >= ps->ps_offset + ps->ps_recnum
994 || offset + count <= ps->ps_offset)) {
995 PSL_UNLOCK();
996 BS_UNLOCK(bs);
997 return KERN_INVALID_ARGUMENT;
998 }
999 }
1000 PSL_UNLOCK();
1001
1002 /*
1003 * Set up the paging segment
1004 */
1005 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
1006 if (ps == PAGING_SEGMENT_NULL) {
1007 BS_UNLOCK(bs);
1008 return KERN_RESOURCE_SHORTAGE;
1009 }
1010
1011 ps->ps_segtype = PS_PARTITION;
1012 ps->ps_device = device;
1013 ps->ps_offset = offset;
1014 ps->ps_record_shift = local_log2(vm_page_size / record_size);
1015 ps->ps_recnum = count;
1016 ps->ps_pgnum = count >> ps->ps_record_shift;
1017
1018 ps->ps_pgcount = ps->ps_pgnum;
1019 ps->ps_clshift = local_log2(bs->bs_clsize);
1020 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
1021 ps->ps_hint = 0;
1022
1023 PS_LOCK_INIT(ps);
1024 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
1025 if (!ps->ps_bmap) {
91447636 1026 kfree(ps, sizeof *ps);
1c79356b
A
1027 BS_UNLOCK(bs);
1028 return KERN_RESOURCE_SHORTAGE;
1029 }
1030 for (i = 0; i < ps->ps_ncls; i++) {
1031 clrbit(ps->ps_bmap, i);
1032 }
1033
1034 ps->ps_going_away = FALSE;
1035 ps->ps_bs = bs;
1036
1037 if ((error = ps_enter(ps)) != 0) {
91447636
A
1038 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
1039 kfree(ps, sizeof *ps);
1c79356b
A
1040 BS_UNLOCK(bs);
1041 return KERN_RESOURCE_SHORTAGE;
1042 }
1043
1044 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
1045 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
1046 BS_UNLOCK(bs);
1047
1048 PSL_LOCK();
1049 dp_pages_free += ps->ps_pgcount;
1050 PSL_UNLOCK();
1051
1052 bs_more_space(ps->ps_clcount);
1053
91447636
A
1054 DP_DEBUG(DEBUG_BS_INTERNAL,
1055 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1056 device, offset, count, record_size,
1057 ps->ps_record_shift, ps->ps_pgnum));
1c79356b
A
1058
1059 return KERN_SUCCESS;
1060}
1061
1062boolean_t
1063bs_add_device(
1064 char *dev_name,
1065 MACH_PORT_FACE master)
1066{
1067 security_token_t null_security_token = {
1068 { 0, 0 }
1069 };
1070 MACH_PORT_FACE device;
1071 int info[DEV_GET_SIZE_COUNT];
1072 mach_msg_type_number_t info_count;
1073 MACH_PORT_FACE bs = MACH_PORT_NULL;
1074 unsigned int rec_size;
1075 recnum_t count;
1076 int clsize;
1077 MACH_PORT_FACE reply_port;
1078
1079 if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE,
1080 null_security_token, dev_name, &device))
1081 return FALSE;
1082
1083 info_count = DEV_GET_SIZE_COUNT;
1084 if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) {
1085 rec_size = info[DEV_GET_SIZE_RECORD_SIZE];
1086 count = info[DEV_GET_SIZE_DEVICE_SIZE] / rec_size;
1087 clsize = bs_get_global_clsize(0);
1088 if (!default_pager_backing_store_create(
0b4e3aa0 1089 default_pager_object,
1c79356b
A
1090 DEFAULT_PAGER_BACKING_STORE_MAXPRI,
1091 (clsize * vm_page_size),
1092 &bs)) {
1093 if (!default_pager_add_segment(bs, device,
1094 0, count, rec_size)) {
1095 return TRUE;
1096 }
1097 ipc_port_release_receive(bs);
1098 }
1099 }
1100
1101 ipc_port_release_send(device);
1102 return FALSE;
1103}
1104#endif /* DEVICE_PAGING */
1105
1106#if VS_ASYNC_REUSE
1107
1108struct vs_async *
1109vs_alloc_async(void)
1110{
1111 struct vs_async *vsa;
1112 MACH_PORT_FACE reply_port;
91447636 1113// kern_return_t kr;
1c79356b
A
1114
1115 VS_ASYNC_LOCK();
1116 if (vs_async_free_list == NULL) {
1117 VS_ASYNC_UNLOCK();
1118 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1119 if (vsa != NULL) {
1120 /*
1121 * Try allocating a reply port named after the
1122 * address of the vs_async structure.
1123 */
1124 struct vstruct_alias *alias_struct;
1125
1126 reply_port = ipc_port_alloc_kernel();
1127 alias_struct = (struct vstruct_alias *)
1128 kalloc(sizeof (struct vstruct_alias));
1129 if(alias_struct != NULL) {
1130 alias_struct->vs = (struct vstruct *)vsa;
0c530ab8 1131 alias_struct->name = &default_pager_ops;
1c79356b
A
1132 reply_port->alias = (int) alias_struct;
1133 vsa->reply_port = reply_port;
1134 vs_alloc_async_count++;
1135 }
1136 else {
1137 vs_alloc_async_failed++;
1138 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1139 (reply_port));
91447636 1140 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1141 vsa = NULL;
1142 }
1143 }
1144 } else {
1145 vsa = vs_async_free_list;
1146 vs_async_free_list = vs_async_free_list->vsa_next;
1147 VS_ASYNC_UNLOCK();
1148 }
1149
1150 return vsa;
1151}
1152
1153void
1154vs_free_async(
1155 struct vs_async *vsa)
1156{
1157 VS_ASYNC_LOCK();
1158 vsa->vsa_next = vs_async_free_list;
1159 vs_async_free_list = vsa;
1160 VS_ASYNC_UNLOCK();
1161}
1162
1163#else /* VS_ASYNC_REUSE */
1164
1165struct vs_async *
1166vs_alloc_async(void)
1167{
1168 struct vs_async *vsa;
1169 MACH_PORT_FACE reply_port;
1170 kern_return_t kr;
1171
1172 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1173 if (vsa != NULL) {
1174 /*
1175 * Try allocating a reply port named after the
1176 * address of the vs_async structure.
1177 */
1178 reply_port = ipc_port_alloc_kernel();
1179 alias_struct = (vstruct_alias *)
1180 kalloc(sizeof (struct vstruct_alias));
1181 if(alias_struct != NULL) {
1182 alias_struct->vs = reply_port;
0c530ab8 1183 alias_struct->name = &default_pager_ops;
1c79356b
A
1184 reply_port->alias = (int) vsa;
1185 vsa->reply_port = reply_port;
1186 vs_alloc_async_count++;
1187 }
1188 else {
1189 vs_alloc_async_failed++;
1190 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1191 (reply_port));
91447636 1192 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1193 vsa = NULL;
1194 }
1195 }
1196
1197 return vsa;
1198}
1199
1200void
1201vs_free_async(
1202 struct vs_async *vsa)
1203{
1c79356b
A
1204 MACH_PORT_FACE reply_port;
1205 kern_return_t kr;
1206
1207 reply_port = vsa->reply_port;
91447636
A
1208 kfree(reply_port->alias, sizeof (struct vstuct_alias));
1209 kfree(vsa, sizeof (struct vs_async));
1c79356b
A
1210 ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
1211#if 0
1212 VS_ASYNC_LOCK();
1213 vs_alloc_async_count--;
1214 VS_ASYNC_UNLOCK();
1215#endif
1216}
1217
1218#endif /* VS_ASYNC_REUSE */
1219
0b4e3aa0
A
1220zone_t vstruct_zone;
1221
1c79356b
A
1222vstruct_t
1223ps_vstruct_create(
1224 vm_size_t size)
1225{
1226 vstruct_t vs;
91447636 1227 unsigned int i;
1c79356b 1228
0b4e3aa0 1229 vs = (vstruct_t) zalloc(vstruct_zone);
1c79356b
A
1230 if (vs == VSTRUCT_NULL) {
1231 return VSTRUCT_NULL;
1232 }
1233
1234 VS_LOCK_INIT(vs);
1235
1236 /*
1237 * The following fields will be provided later.
1238 */
0c530ab8 1239 vs->vs_pager_ops = NULL;
0b4e3aa0
A
1240 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1241 vs->vs_references = 1;
1c79356b 1242 vs->vs_seqno = 0;
1c79356b
A
1243
1244#ifdef MACH_KERNEL
1245 vs->vs_waiting_seqno = FALSE;
1246 vs->vs_waiting_read = FALSE;
1247 vs->vs_waiting_write = FALSE;
1c79356b
A
1248 vs->vs_waiting_async = FALSE;
1249#else
91447636
A
1250 mutex_init(&vs->vs_waiting_seqno, 0);
1251 mutex_init(&vs->vs_waiting_read, 0);
1252 mutex_init(&vs->vs_waiting_write, 0);
1253 mutex_init(&vs->vs_waiting_refs, 0);
1254 mutex_init(&vs->vs_waiting_async, 0);
1c79356b
A
1255#endif
1256
1257 vs->vs_readers = 0;
1258 vs->vs_writers = 0;
1259
1260 vs->vs_errors = 0;
1261
1262 vs->vs_clshift = local_log2(bs_get_global_clsize(0));
55e303ae 1263 vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
1c79356b
A
1264 vs->vs_async_pending = 0;
1265
1266 /*
1267 * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1268 * depending on the size of the memory object.
1269 */
1270 if (INDIRECT_CLMAP(vs->vs_size)) {
1271 vs->vs_imap = (struct vs_map **)
1272 kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size));
1273 vs->vs_indirect = TRUE;
1274 } else {
1275 vs->vs_dmap = (struct vs_map *)
1276 kalloc(CLMAP_SIZE(vs->vs_size));
1277 vs->vs_indirect = FALSE;
1278 }
1279 vs->vs_xfer_pending = FALSE;
91447636
A
1280 DP_DEBUG(DEBUG_VS_INTERNAL,
1281 ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
1c79356b
A
1282
1283 /*
1284 * Check to see that we got the space.
1285 */
1286 if (!vs->vs_dmap) {
91447636 1287 kfree(vs, sizeof *vs);
1c79356b
A
1288 return VSTRUCT_NULL;
1289 }
1290
1291 /*
1292 * Zero the indirect pointers, or clear the direct pointers.
1293 */
1294 if (vs->vs_indirect)
1295 memset(vs->vs_imap, 0,
1296 INDIRECT_CLMAP_SIZE(vs->vs_size));
1297 else
1298 for (i = 0; i < vs->vs_size; i++)
1299 VSM_CLR(vs->vs_dmap[i]);
1300
1301 VS_MAP_LOCK_INIT(vs);
1302
1303 bs_commit(vs->vs_size);
1304
1305 return vs;
1306}
1307
91447636 1308paging_segment_t ps_select_segment(unsigned int, int *); /* forward */
1c79356b
A
1309
1310paging_segment_t
1311ps_select_segment(
91447636
A
1312 unsigned int shift,
1313 int *psindex)
1c79356b
A
1314{
1315 paging_segment_t ps;
1316 int i;
1317 int j;
1c79356b
A
1318
1319 /*
1320 * Optimize case where there's only one segment.
1321 * paging_segment_max will index the one and only segment.
1322 */
1323
1324 PSL_LOCK();
1325 if (paging_segment_count == 1) {
1326 paging_segment_t lps; /* used to avoid extra PS_UNLOCK */
0b4e3aa0 1327 ipc_port_t trigger = IP_NULL;
1c79356b
A
1328
1329 ps = paging_segments[paging_segment_max];
1330 *psindex = paging_segment_max;
1331 PS_LOCK(ps);
1332 if (ps->ps_going_away) {
1333 /* this segment is being turned off */
1334 lps = PAGING_SEGMENT_NULL;
1335 } else {
1336 ASSERT(ps->ps_clshift >= shift);
1337 if (ps->ps_clcount) {
1338 ps->ps_clcount--;
1339 dp_pages_free -= 1 << ps->ps_clshift;
1340 if(min_pages_trigger_port &&
1341 (dp_pages_free < minimum_pages_remaining)) {
0b4e3aa0 1342 trigger = min_pages_trigger_port;
1c79356b
A
1343 min_pages_trigger_port = NULL;
1344 bs_low = TRUE;
1345 }
1346 lps = ps;
1347 } else
1348 lps = PAGING_SEGMENT_NULL;
1349 }
1350 PS_UNLOCK(ps);
1351 PSL_UNLOCK();
0b4e3aa0
A
1352
1353 if (trigger != IP_NULL) {
1354 default_pager_space_alert(trigger, HI_WAT_ALERT);
1355 ipc_port_release_send(trigger);
1356 }
1c79356b
A
1357 return lps;
1358 }
1359
1360 if (paging_segment_count == 0) {
1361 PSL_UNLOCK();
1362 return PAGING_SEGMENT_NULL;
1363 }
1364
1365 for (i = BS_MAXPRI;
1366 i >= BS_MINPRI; i--) {
1367 int start_index;
1368
1369 if ((ps_select_array[i] == BS_NOPRI) ||
1370 (ps_select_array[i] == BS_FULLPRI))
1371 continue;
1372 start_index = ps_select_array[i];
1373
1374 if(!(paging_segments[start_index])) {
1375 j = start_index+1;
1376 physical_transfer_cluster_count = 0;
1377 }
0b4e3aa0 1378 else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >>
1c79356b 1379 (((paging_segments[start_index])->ps_clshift)
0b4e3aa0 1380 + vm_page_shift))) {
1c79356b
A
1381 physical_transfer_cluster_count = 0;
1382 j = start_index + 1;
1383 } else {
1384 physical_transfer_cluster_count+=1;
1385 j = start_index;
1386 if(start_index == 0)
1387 start_index = paging_segment_max;
1388 else
1389 start_index = start_index - 1;
1390 }
1391
1392 while (1) {
1393 if (j > paging_segment_max)
1394 j = 0;
1395 if ((ps = paging_segments[j]) &&
1396 (ps->ps_bs->bs_priority == i)) {
1397 /*
1398 * Force the ps cluster size to be
1399 * >= that of the vstruct.
1400 */
1401 PS_LOCK(ps);
1402 if (ps->ps_going_away) {
1403 /* this segment is being turned off */
1404 } else if ((ps->ps_clcount) &&
1405 (ps->ps_clshift >= shift)) {
0b4e3aa0
A
1406 ipc_port_t trigger = IP_NULL;
1407
1c79356b
A
1408 ps->ps_clcount--;
1409 dp_pages_free -= 1 << ps->ps_clshift;
1410 if(min_pages_trigger_port &&
1411 (dp_pages_free <
1412 minimum_pages_remaining)) {
0b4e3aa0 1413 trigger = min_pages_trigger_port;
1c79356b
A
1414 min_pages_trigger_port = NULL;
1415 }
1416 PS_UNLOCK(ps);
1417 /*
1418 * found one, quit looking.
1419 */
1420 ps_select_array[i] = j;
1421 PSL_UNLOCK();
0b4e3aa0
A
1422
1423 if (trigger != IP_NULL) {
1424 default_pager_space_alert(
1425 trigger,
1426 HI_WAT_ALERT);
1427 ipc_port_release_send(trigger);
1428 }
1c79356b
A
1429 *psindex = j;
1430 return ps;
1431 }
1432 PS_UNLOCK(ps);
1433 }
1434 if (j == start_index) {
1435 /*
1436 * none at this priority -- mark it full
1437 */
1438 ps_select_array[i] = BS_FULLPRI;
1439 break;
1440 }
1441 j++;
1442 }
1443 }
1444 PSL_UNLOCK();
1445 return PAGING_SEGMENT_NULL;
1446}
1447
1448vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
1449
1450vm_offset_t
1451ps_allocate_cluster(
1452 vstruct_t vs,
1453 int *psindex,
1454 paging_segment_t use_ps)
1455{
91447636 1456 unsigned int byte_num;
1c79356b
A
1457 int bit_num = 0;
1458 paging_segment_t ps;
1459 vm_offset_t cluster;
0b4e3aa0 1460 ipc_port_t trigger = IP_NULL;
1c79356b
A
1461
1462 /*
1463 * Find best paging segment.
1464 * ps_select_segment will decrement cluster count on ps.
1465 * Must pass cluster shift to find the most appropriate segment.
1466 */
1467 /* NOTE: The addition of paging segment delete capability threatened
1468 * to seriously complicate the treatment of paging segments in this
1469 * module and the ones that call it (notably ps_clmap), because of the
1470 * difficulty in assuring that the paging segment would continue to
1471 * exist between being unlocked and locked. This was
1472 * avoided because all calls to this module are based in either
1473 * dp_memory_object calls which rely on the vs lock, or by
1474 * the transfer function which is part of the segment delete path.
1475 * The transfer function which is part of paging segment delete is
1476 * protected from multiple callers by the backing store lock.
1477 * The paging segment delete function treats mappings to a paging
1478 * segment on a vstruct by vstruct basis, locking the vstruct targeted
1479 * while data is transferred to the remaining segments. This is in
1480 * line with the view that incomplete or in-transition mappings between
1481 * data, a vstruct, and backing store are protected by the vs lock.
1482 * This and the ordering of the paging segment "going_away" bit setting
1483 * protects us.
1484 */
1485 if (use_ps != PAGING_SEGMENT_NULL) {
1486 ps = use_ps;
1487 PSL_LOCK();
1488 PS_LOCK(ps);
55e303ae
A
1489
1490 ASSERT(ps->ps_clcount != 0);
1491
1c79356b
A
1492 ps->ps_clcount--;
1493 dp_pages_free -= 1 << ps->ps_clshift;
1c79356b
A
1494 if(min_pages_trigger_port &&
1495 (dp_pages_free < minimum_pages_remaining)) {
0b4e3aa0 1496 trigger = min_pages_trigger_port;
1c79356b
A
1497 min_pages_trigger_port = NULL;
1498 }
0b4e3aa0 1499 PSL_UNLOCK();
1c79356b 1500 PS_UNLOCK(ps);
0b4e3aa0
A
1501 if (trigger != IP_NULL) {
1502 default_pager_space_alert(trigger, HI_WAT_ALERT);
1503 ipc_port_release_send(trigger);
1504 }
1505
1c79356b
A
1506 } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
1507 PAGING_SEGMENT_NULL) {
91447636
A
1508 static uint32_t lastnotify = 0;
1509 uint32_t now, nanoseconds_dummy;
1510
1511 /*
1512 * Emit a notification of the low-paging resource condition
1513 * but don't issue it more than once every five seconds. This
1514 * prevents us from overflowing logs with thousands of
1515 * repetitions of the message.
1516 */
1517 clock_get_system_nanotime(&now, &nanoseconds_dummy);
1518 if (now > lastnotify + 5) {
1519 dprintf(("no space in available paging segments\n"));
1520 lastnotify = now;
1521 }
1522
1c79356b 1523 /* the count got off maybe, reset to zero */
0b4e3aa0 1524 PSL_LOCK();
1c79356b
A
1525 dp_pages_free = 0;
1526 if(min_pages_trigger_port) {
0b4e3aa0 1527 trigger = min_pages_trigger_port;
1c79356b
A
1528 min_pages_trigger_port = NULL;
1529 bs_low = TRUE;
1530 }
0b4e3aa0
A
1531 PSL_UNLOCK();
1532 if (trigger != IP_NULL) {
1533 default_pager_space_alert(trigger, HI_WAT_ALERT);
1534 ipc_port_release_send(trigger);
1535 }
1c79356b
A
1536 return (vm_offset_t) -1;
1537 }
1c79356b
A
1538
1539 /*
1540 * Look for an available cluster. At the end of the loop,
1541 * byte_num is the byte offset and bit_num is the bit offset of the
1542 * first zero bit in the paging segment bitmap.
1543 */
1544 PS_LOCK(ps);
1545 byte_num = ps->ps_hint;
1546 for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) {
1547 if (*(ps->ps_bmap + byte_num) != BYTEMASK) {
1548 for (bit_num = 0; bit_num < NBBY; bit_num++) {
1549 if (isclr((ps->ps_bmap + byte_num), bit_num))
1550 break;
1551 }
1552 ASSERT(bit_num != NBBY);
1553 break;
1554 }
1555 }
1556 ps->ps_hint = byte_num;
1557 cluster = (byte_num*NBBY) + bit_num;
1558
1559 /* Space was reserved, so this must be true */
1560 ASSERT(cluster < ps->ps_ncls);
1561
1562 setbit(ps->ps_bmap, cluster);
1563 PS_UNLOCK(ps);
1564
1565 return cluster;
1566}
1567
1568void ps_deallocate_cluster(paging_segment_t, vm_offset_t); /* forward */
1569
1570void
1571ps_deallocate_cluster(
1572 paging_segment_t ps,
1573 vm_offset_t cluster)
1574{
1575
1576 if (cluster >= (vm_offset_t) ps->ps_ncls)
1577 panic("ps_deallocate_cluster: Invalid cluster number");
1578
1579 /*
1580 * Lock the paging segment, clear the cluster's bitmap and increment the
1581 * number of free cluster.
1582 */
1583 PSL_LOCK();
1584 PS_LOCK(ps);
1585 clrbit(ps->ps_bmap, cluster);
1586 ++ps->ps_clcount;
1587 dp_pages_free += 1 << ps->ps_clshift;
0b4e3aa0 1588 PSL_UNLOCK();
1c79356b
A
1589
1590 /*
1591 * Move the hint down to the freed cluster if it is
1592 * less than the current hint.
1593 */
1594 if ((cluster/NBBY) < ps->ps_hint) {
1595 ps->ps_hint = (cluster/NBBY);
1596 }
1597
1598 PS_UNLOCK(ps);
1599
1600 /*
1601 * If we're freeing space on a full priority, reset the array.
1602 */
1603 PSL_LOCK();
1604 if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
1605 ps_select_array[ps->ps_bs->bs_priority] = 0;
1606 PSL_UNLOCK();
1607
1608 return;
1609}
1610
1611void ps_dealloc_vsmap(struct vs_map *, vm_size_t); /* forward */
1612
1613void
1614ps_dealloc_vsmap(
1615 struct vs_map *vsmap,
1616 vm_size_t size)
1617{
91447636 1618 unsigned int i;
1c79356b
A
1619 for (i = 0; i < size; i++)
1620 if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
1621 ps_deallocate_cluster(VSM_PS(vsmap[i]),
1622 VSM_CLOFF(vsmap[i]));
1623}
1624
1625void
1626ps_vstruct_dealloc(
1627 vstruct_t vs)
1628{
91447636
A
1629 unsigned int i;
1630// spl_t s;
1c79356b
A
1631
1632 VS_MAP_LOCK(vs);
1633
1634 /*
1635 * If this is an indirect structure, then we walk through the valid
1636 * (non-zero) indirect pointers and deallocate the clusters
1637 * associated with each used map entry (via ps_dealloc_vsmap).
1638 * When all of the clusters in an indirect block have been
1639 * freed, we deallocate the block. When all of the indirect
1640 * blocks have been deallocated we deallocate the memory
1641 * holding the indirect pointers.
1642 */
1643 if (vs->vs_indirect) {
1644 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
1645 if (vs->vs_imap[i] != NULL) {
1646 ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
91447636 1647 kfree(vs->vs_imap[i], CLMAP_THRESHOLD);
1c79356b
A
1648 }
1649 }
91447636 1650 kfree(vs->vs_imap, INDIRECT_CLMAP_SIZE(vs->vs_size));
1c79356b
A
1651 } else {
1652 /*
1653 * Direct map. Free used clusters, then memory.
1654 */
1655 ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
91447636 1656 kfree(vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
1c79356b
A
1657 }
1658 VS_MAP_UNLOCK(vs);
1659
1660 bs_commit(- vs->vs_size);
1661
91447636 1662 zfree(vstruct_zone, vs);
1c79356b
A
1663}
1664
91447636 1665int ps_map_extend(vstruct_t, unsigned int); /* forward */
1c79356b
A
1666
1667int ps_map_extend(
1668 vstruct_t vs,
91447636 1669 unsigned int new_size)
1c79356b
A
1670{
1671 struct vs_map **new_imap;
1672 struct vs_map *new_dmap = NULL;
1673 int newdsize;
1674 int i;
1675 void *old_map = NULL;
1676 int old_map_size = 0;
1677
1678 if (vs->vs_size >= new_size) {
1679 /*
1680 * Someone has already done the work.
1681 */
1682 return 0;
1683 }
1684
1685 /*
1686 * If the new size extends into the indirect range, then we have one
1687 * of two cases: we are going from indirect to indirect, or we are
1688 * going from direct to indirect. If we are going from indirect to
1689 * indirect, then it is possible that the new size will fit in the old
1690 * indirect map. If this is the case, then just reset the size of the
1691 * vstruct map and we are done. If the new size will not
1692 * fit into the old indirect map, then we have to allocate a new
1693 * indirect map and copy the old map pointers into this new map.
1694 *
1695 * If we are going from direct to indirect, then we have to allocate a
1696 * new indirect map and copy the old direct pages into the first
1697 * indirect page of the new map.
1698 * NOTE: allocating memory here is dangerous, as we're in the
1699 * pageout path.
1700 */
1701 if (INDIRECT_CLMAP(new_size)) {
1702 int new_map_size = INDIRECT_CLMAP_SIZE(new_size);
1703
1704 /*
1705 * Get a new indirect map and zero it.
1706 */
1707 old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size);
1708 if (vs->vs_indirect &&
1709 (new_map_size == old_map_size)) {
1710 bs_commit(new_size - vs->vs_size);
1711 vs->vs_size = new_size;
1712 return 0;
1713 }
1714
1715 new_imap = (struct vs_map **)kalloc(new_map_size);
1716 if (new_imap == NULL) {
1717 return -1;
1718 }
1719 memset(new_imap, 0, new_map_size);
1720
1721 if (vs->vs_indirect) {
1722 /* Copy old entries into new map */
1723 memcpy(new_imap, vs->vs_imap, old_map_size);
1724 /* Arrange to free the old map */
1725 old_map = (void *) vs->vs_imap;
1726 newdsize = 0;
1727 } else { /* Old map was a direct map */
1728 /* Allocate an indirect page */
1729 if ((new_imap[0] = (struct vs_map *)
1730 kalloc(CLMAP_THRESHOLD)) == NULL) {
91447636 1731 kfree(new_imap, new_map_size);
1c79356b
A
1732 return -1;
1733 }
1734 new_dmap = new_imap[0];
1735 newdsize = CLMAP_ENTRIES;
1736 }
1737 } else {
1738 new_imap = NULL;
1739 newdsize = new_size;
1740 /*
1741 * If the new map is a direct map, then the old map must
1742 * also have been a direct map. All we have to do is
1743 * to allocate a new direct map, copy the old entries
1744 * into it and free the old map.
1745 */
1746 if ((new_dmap = (struct vs_map *)
1747 kalloc(CLMAP_SIZE(new_size))) == NULL) {
1748 return -1;
1749 }
1750 }
1751 if (newdsize) {
1752
1753 /* Free the old map */
1754 old_map = (void *) vs->vs_dmap;
1755 old_map_size = CLMAP_SIZE(vs->vs_size);
1756
1757 /* Copy info from the old map into the new map */
1758 memcpy(new_dmap, vs->vs_dmap, old_map_size);
1759
1760 /* Initialize the rest of the new map */
1761 for (i = vs->vs_size; i < newdsize; i++)
1762 VSM_CLR(new_dmap[i]);
1763 }
1764 if (new_imap) {
1765 vs->vs_imap = new_imap;
1766 vs->vs_indirect = TRUE;
1767 } else
1768 vs->vs_dmap = new_dmap;
1769 bs_commit(new_size - vs->vs_size);
1770 vs->vs_size = new_size;
1771 if (old_map)
91447636 1772 kfree(old_map, old_map_size);
1c79356b
A
1773 return 0;
1774}
1775
1776vm_offset_t
1777ps_clmap(
1778 vstruct_t vs,
1779 vm_offset_t offset,
1780 struct clmap *clmap,
1781 int flag,
1782 vm_size_t size,
1783 int error)
1784{
1785 vm_offset_t cluster; /* The cluster of offset. */
1786 vm_offset_t newcl; /* The new cluster allocated. */
1787 vm_offset_t newoff;
91447636 1788 unsigned int i;
1c79356b 1789 struct vs_map *vsmap;
1c79356b
A
1790
1791 VS_MAP_LOCK(vs);
1792
1793 ASSERT(vs->vs_dmap);
55e303ae 1794 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
1795
1796 /*
1797 * Initialize cluster error value
1798 */
1799 clmap->cl_error = 0;
1800
1801 /*
1802 * If the object has grown, extend the page map.
1803 */
1804 if (cluster >= vs->vs_size) {
1805 if (flag == CL_FIND) {
1806 /* Do not allocate if just doing a lookup */
1807 VS_MAP_UNLOCK(vs);
1808 return (vm_offset_t) -1;
1809 }
1810 if (ps_map_extend(vs, cluster + 1)) {
1811 VS_MAP_UNLOCK(vs);
1812 return (vm_offset_t) -1;
1813 }
1814 }
1815
1816 /*
1817 * Look for the desired cluster. If the map is indirect, then we
1818 * have a two level lookup. First find the indirect block, then
1819 * find the actual cluster. If the indirect block has not yet
1820 * been allocated, then do so. If the cluster has not yet been
1821 * allocated, then do so.
1822 *
1823 * If any of the allocations fail, then return an error.
1824 * Don't allocate if just doing a lookup.
1825 */
1826 if (vs->vs_indirect) {
1827 long ind_block = cluster/CLMAP_ENTRIES;
1828
1829 /* Is the indirect block allocated? */
1830 vsmap = vs->vs_imap[ind_block];
1831 if (vsmap == NULL) {
1832 if (flag == CL_FIND) {
1833 VS_MAP_UNLOCK(vs);
1834 return (vm_offset_t) -1;
1835 }
1836
1837 /* Allocate the indirect block */
1838 vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
1839 if (vsmap == NULL) {
1840 VS_MAP_UNLOCK(vs);
1841 return (vm_offset_t) -1;
1842 }
1843 /* Initialize the cluster offsets */
1844 for (i = 0; i < CLMAP_ENTRIES; i++)
1845 VSM_CLR(vsmap[i]);
1846 vs->vs_imap[ind_block] = vsmap;
1847 }
1848 } else
1849 vsmap = vs->vs_dmap;
1850
1851 ASSERT(vsmap);
1852 vsmap += cluster%CLMAP_ENTRIES;
1853
1854 /*
1855 * At this point, vsmap points to the struct vs_map desired.
1856 *
1857 * Look in the map for the cluster, if there was an error on a
1858 * previous write, flag it and return. If it is not yet
1859 * allocated, then allocate it, if we're writing; if we're
1860 * doing a lookup and the cluster's not allocated, return error.
1861 */
1862 if (VSM_ISERR(*vsmap)) {
1863 clmap->cl_error = VSM_GETERR(*vsmap);
1864 VS_MAP_UNLOCK(vs);
1865 return (vm_offset_t) -1;
1866 } else if (VSM_ISCLR(*vsmap)) {
1867 int psindex;
1868
1869 if (flag == CL_FIND) {
1870 /*
1871 * If there's an error and the entry is clear, then
1872 * we've run out of swap space. Record the error
1873 * here and return.
1874 */
1875 if (error) {
1876 VSM_SETERR(*vsmap, error);
1877 }
1878 VS_MAP_UNLOCK(vs);
1879 return (vm_offset_t) -1;
1880 } else {
1881 /*
1882 * Attempt to allocate a cluster from the paging segment
1883 */
1884 newcl = ps_allocate_cluster(vs, &psindex,
1885 PAGING_SEGMENT_NULL);
91447636 1886 if (newcl == (vm_offset_t) -1) {
1c79356b
A
1887 VS_MAP_UNLOCK(vs);
1888 return (vm_offset_t) -1;
1889 }
1890 VSM_CLR(*vsmap);
1891 VSM_SETCLOFF(*vsmap, newcl);
1892 VSM_SETPS(*vsmap, psindex);
1893 }
1894 } else
1895 newcl = VSM_CLOFF(*vsmap);
1896
1897 /*
1898 * Fill in pertinent fields of the clmap
1899 */
1900 clmap->cl_ps = VSM_PS(*vsmap);
1901 clmap->cl_numpages = VSCLSIZE(vs);
1902 clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap);
1903
1904 /*
1905 * Byte offset in paging segment is byte offset to cluster plus
1906 * byte offset within cluster. It looks ugly, but should be
1907 * relatively quick.
1908 */
1909 ASSERT(trunc_page(offset) == offset);
55e303ae 1910 newcl = ptoa_32(newcl) << vs->vs_clshift;
1c79356b
A
1911 newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
1912 if (flag == CL_ALLOC) {
1913 /*
1914 * set bits in the allocation bitmap according to which
1915 * pages were requested. size is in bytes.
1916 */
55e303ae 1917 i = atop_32(newoff);
1c79356b
A
1918 while ((size > 0) && (i < VSCLSIZE(vs))) {
1919 VSM_SETALLOC(*vsmap, i);
1920 i++;
1921 size -= vm_page_size;
1922 }
1923 }
1924 clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap);
1925 if (newoff) {
1926 /*
1927 * Offset is not cluster aligned, so number of pages
1928 * and bitmaps must be adjusted
1929 */
55e303ae 1930 clmap->cl_numpages -= atop_32(newoff);
1c79356b
A
1931 CLMAP_SHIFT(clmap, vs);
1932 CLMAP_SHIFTALLOC(clmap, vs);
1933 }
1934
1935 /*
1936 *
1937 * The setting of valid bits and handling of write errors
1938 * must be done here, while we hold the lock on the map.
1939 * It logically should be done in ps_vs_write_complete().
1940 * The size and error information has been passed from
1941 * ps_vs_write_complete(). If the size parameter is non-zero,
1942 * then there is work to be done. If error is also non-zero,
1943 * then the error number is recorded in the cluster and the
1944 * entire cluster is in error.
1945 */
1946 if (size && flag == CL_FIND) {
1947 vm_offset_t off = (vm_offset_t) 0;
1948
1949 if (!error) {
1950 for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
1951 i++) {
1952 VSM_SETPG(*vsmap, i);
1953 size -= vm_page_size;
1954 }
1955 ASSERT(i <= VSCLSIZE(vs));
1956 } else {
1957 BS_STAT(clmap->cl_ps->ps_bs,
1958 clmap->cl_ps->ps_bs->bs_pages_out_fail +=
55e303ae 1959 atop_32(size));
1c79356b
A
1960 off = VSM_CLOFF(*vsmap);
1961 VSM_SETERR(*vsmap, error);
1962 }
1963 /*
1964 * Deallocate cluster if error, and no valid pages
1965 * already present.
1966 */
1967 if (off != (vm_offset_t) 0)
1968 ps_deallocate_cluster(clmap->cl_ps, off);
1969 VS_MAP_UNLOCK(vs);
1970 return (vm_offset_t) 0;
1971 } else
1972 VS_MAP_UNLOCK(vs);
1973
91447636
A
1974 DP_DEBUG(DEBUG_VS_INTERNAL,
1975 ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1976 newcl+newoff, (int) vs, (int) vsmap, flag));
1977 DP_DEBUG(DEBUG_VS_INTERNAL,
1978 (" clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1979 (int) clmap->cl_ps, clmap->cl_numpages,
1980 (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
1c79356b
A
1981
1982 return (newcl + newoff);
1983}
1984
1985void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t); /* forward */
1986
1987void
1988ps_clunmap(
1989 vstruct_t vs,
1990 vm_offset_t offset,
1991 vm_size_t length)
1992{
1993 vm_offset_t cluster; /* The cluster number of offset */
1994 struct vs_map *vsmap;
1c79356b
A
1995
1996 VS_MAP_LOCK(vs);
1997
1998 /*
1999 * Loop through all clusters in this range, freeing paging segment
2000 * clusters and map entries as encountered.
2001 */
2002 while (length > 0) {
2003 vm_offset_t newoff;
91447636 2004 unsigned int i;
1c79356b 2005
55e303ae 2006 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
2007 if (vs->vs_indirect) /* indirect map */
2008 vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
2009 else
2010 vsmap = vs->vs_dmap;
2011 if (vsmap == NULL) {
2012 VS_MAP_UNLOCK(vs);
2013 return;
2014 }
2015 vsmap += cluster%CLMAP_ENTRIES;
2016 if (VSM_ISCLR(*vsmap)) {
2017 length -= vm_page_size;
2018 offset += vm_page_size;
2019 continue;
2020 }
2021 /*
2022 * We've got a valid mapping. Clear it and deallocate
2023 * paging segment cluster pages.
2024 * Optimize for entire cluster cleraing.
2025 */
91447636 2026 if ( (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) ) {
1c79356b
A
2027 /*
2028 * Not cluster aligned.
2029 */
2030 ASSERT(trunc_page(newoff) == newoff);
55e303ae 2031 i = atop_32(newoff);
1c79356b
A
2032 } else
2033 i = 0;
2034 while ((i < VSCLSIZE(vs)) && (length > 0)) {
2035 VSM_CLRPG(*vsmap, i);
2036 VSM_CLRALLOC(*vsmap, i);
2037 length -= vm_page_size;
2038 offset += vm_page_size;
2039 i++;
2040 }
2041
2042 /*
2043 * If map entry is empty, clear and deallocate cluster.
2044 */
2045 if (!VSM_ALLOC(*vsmap)) {
2046 ps_deallocate_cluster(VSM_PS(*vsmap),
2047 VSM_CLOFF(*vsmap));
2048 VSM_CLR(*vsmap);
2049 }
2050 }
2051
2052 VS_MAP_UNLOCK(vs);
2053}
2054
2055void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
2056
2057void
2058ps_vs_write_complete(
2059 vstruct_t vs,
2060 vm_offset_t offset,
2061 vm_size_t size,
2062 int error)
2063{
2064 struct clmap clmap;
2065
2066 /*
2067 * Get the struct vsmap for this cluster.
2068 * Use READ, even though it was written, because the
2069 * cluster MUST be present, unless there was an error
2070 * in the original ps_clmap (e.g. no space), in which
2071 * case, nothing happens.
2072 *
2073 * Must pass enough information to ps_clmap to allow it
2074 * to set the vs_map structure bitmap under lock.
2075 */
2076 (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
2077}
2078
2079void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int); /* forward */
2080
2081void
2082vs_cl_write_complete(
91447636
A
2083 vstruct_t vs,
2084 __unused paging_segment_t ps,
2085 vm_offset_t offset,
2086 __unused vm_offset_t addr,
2087 vm_size_t size,
2088 boolean_t async,
2089 int error)
1c79356b 2090{
91447636 2091// kern_return_t kr;
1c79356b
A
2092
2093 if (error) {
2094 /*
2095 * For internal objects, the error is recorded on a
2096 * per-cluster basis by ps_clmap() which is called
2097 * by ps_vs_write_complete() below.
2098 */
2099 dprintf(("write failed error = 0x%x\n", error));
2100 /* add upl_abort code here */
2101 } else
55e303ae 2102 GSTAT(global_stats.gs_pages_out += atop_32(size));
1c79356b
A
2103 /*
2104 * Notify the vstruct mapping code, so it can do its accounting.
2105 */
2106 ps_vs_write_complete(vs, offset, size, error);
2107
2108 if (async) {
2109 VS_LOCK(vs);
2110 ASSERT(vs->vs_async_pending > 0);
2111 vs->vs_async_pending -= size;
0b4e3aa0
A
2112 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
2113 vs->vs_waiting_async = FALSE;
1c79356b
A
2114 VS_UNLOCK(vs);
2115 /* mutex_unlock(&vs->vs_waiting_async); */
0b4e3aa0 2116 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
2117 } else {
2118 VS_UNLOCK(vs);
2119 }
2120 }
2121}
2122
2123#ifdef DEVICE_PAGING
2124kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2125
2126kern_return_t
2127device_write_reply(
2128 MACH_PORT_FACE reply_port,
2129 kern_return_t device_code,
2130 io_buf_len_t bytes_written)
2131{
2132 struct vs_async *vsa;
1c79356b
A
2133
2134 vsa = (struct vs_async *)
2135 ((struct vstruct_alias *)(reply_port->alias))->vs;
2136
2137 if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
2138 device_code = KERN_FAILURE;
2139 }
2140
2141 vsa->vsa_error = device_code;
2142
2143
2144 ASSERT(vsa->vsa_vs != VSTRUCT_NULL);
2145 if(vsa->vsa_flags & VSA_TRANSFER) {
2146 /* revisit when async disk segments redone */
2147 if(vsa->vsa_error) {
2148 /* need to consider error condition. re-write data or */
2149 /* throw it away here. */
91447636 2150 vm_map_copy_discard((vm_map_copy_t)vsa->vsa_addr);
1c79356b
A
2151 }
2152 ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
2153 vsa->vsa_size, vsa->vsa_error);
2154 } else {
2155 vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset,
2156 vsa->vsa_addr, vsa->vsa_size, TRUE,
2157 vsa->vsa_error);
2158 }
2159 VS_FREE_ASYNC(vsa);
2160
2161 return KERN_SUCCESS;
2162}
2163
2164kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2165kern_return_t
2166device_write_reply_inband(
2167 MACH_PORT_FACE reply_port,
2168 kern_return_t return_code,
2169 io_buf_len_t bytes_written)
2170{
2171 panic("device_write_reply_inband: illegal");
2172 return KERN_SUCCESS;
2173}
2174
2175kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t);
2176kern_return_t
2177device_read_reply(
2178 MACH_PORT_FACE reply_port,
2179 kern_return_t return_code,
2180 io_buf_ptr_t data,
2181 mach_msg_type_number_t dataCnt)
2182{
2183 struct vs_async *vsa;
2184 vsa = (struct vs_async *)
2185 ((struct vstruct_alias *)(reply_port->alias))->vs;
2186 vsa->vsa_addr = (vm_offset_t)data;
2187 vsa->vsa_size = (vm_size_t)dataCnt;
2188 vsa->vsa_error = return_code;
2189 thread_wakeup(&vsa->vsa_lock);
2190 return KERN_SUCCESS;
2191}
2192
2193kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t);
2194kern_return_t
2195device_read_reply_inband(
2196 MACH_PORT_FACE reply_port,
2197 kern_return_t return_code,
2198 io_buf_ptr_inband_t data,
2199 mach_msg_type_number_t dataCnt)
2200{
2201 panic("device_read_reply_inband: illegal");
2202 return KERN_SUCCESS;
2203}
2204
2205kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2206kern_return_t
2207device_read_reply_overwrite(
2208 MACH_PORT_FACE reply_port,
2209 kern_return_t return_code,
2210 io_buf_len_t bytes_read)
2211{
2212 panic("device_read_reply_overwrite: illegal\n");
2213 return KERN_SUCCESS;
2214}
2215
2216kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE);
2217kern_return_t
2218device_open_reply(
2219 MACH_PORT_FACE reply_port,
2220 kern_return_t return_code,
2221 MACH_PORT_FACE device_port)
2222{
2223 panic("device_open_reply: illegal\n");
2224 return KERN_SUCCESS;
2225}
2226
1c79356b
A
2227kern_return_t
2228ps_read_device(
2229 paging_segment_t ps,
2230 vm_offset_t offset,
2231 vm_offset_t *bufferp,
2232 unsigned int size,
2233 unsigned int *residualp,
2234 int flags)
2235{
2236 kern_return_t kr;
2237 recnum_t dev_offset;
2238 unsigned int bytes_wanted;
2239 unsigned int bytes_read;
2240 unsigned int total_read;
2241 vm_offset_t dev_buffer;
2242 vm_offset_t buf_ptr;
2243 unsigned int records_read;
1c79356b
A
2244 struct vs_async *vsa;
2245 mutex_t vs_waiting_read_reply;
2246
2247 device_t device;
2248 vm_map_copy_t device_data = NULL;
2249 default_pager_thread_t *dpt = NULL;
2250
2251 device = dev_port_lookup(ps->ps_device);
55e303ae 2252 clustered_reads[atop_32(size)]++;
1c79356b
A
2253
2254 dev_offset = (ps->ps_offset +
2255 (offset >> (vm_page_shift - ps->ps_record_shift)));
2256 bytes_wanted = size;
2257 total_read = 0;
2258 *bufferp = (vm_offset_t)NULL;
2259
2260 do {
2261 vsa = VS_ALLOC_ASYNC();
2262 if (vsa) {
2263 vsa->vsa_vs = NULL;
2264 vsa->vsa_addr = 0;
2265 vsa->vsa_offset = 0;
2266 vsa->vsa_size = 0;
2267 vsa->vsa_ps = NULL;
2268 }
91447636 2269 mutex_init(&vsa->vsa_lock, 0);
1c79356b
A
2270 ip_lock(vsa->reply_port);
2271 vsa->reply_port->ip_sorights++;
2272 ip_reference(vsa->reply_port);
2273 ip_unlock(vsa->reply_port);
2274 kr = ds_device_read_common(device,
2275 vsa->reply_port,
2276 (mach_msg_type_name_t)
2277 MACH_MSG_TYPE_MOVE_SEND_ONCE,
2278 (dev_mode_t) 0,
2279 dev_offset,
2280 bytes_wanted,
2281 (IO_READ | IO_CALL),
2282 (io_buf_ptr_t *) &dev_buffer,
2283 (mach_msg_type_number_t *) &bytes_read);
2284 if(kr == MIG_NO_REPLY) {
2285 assert_wait(&vsa->vsa_lock, THREAD_UNINT);
9bccf70c 2286 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
2287
2288 dev_buffer = vsa->vsa_addr;
2289 bytes_read = (unsigned int)vsa->vsa_size;
2290 kr = vsa->vsa_error;
2291 }
2292 VS_FREE_ASYNC(vsa);
2293 if (kr != KERN_SUCCESS || bytes_read == 0) {
2294 break;
2295 }
2296 total_read += bytes_read;
2297
2298 /*
2299 * If we got the entire range, use the returned dev_buffer.
2300 */
2301 if (bytes_read == size) {
2302 *bufferp = (vm_offset_t)dev_buffer;
2303 break;
2304 }
2305
2306#if 1
2307 dprintf(("read only %d bytes out of %d\n",
2308 bytes_read, bytes_wanted));
2309#endif
2310 if(dpt == NULL) {
2311 dpt = get_read_buffer();
2312 buf_ptr = dpt->dpt_buffer;
2313 *bufferp = (vm_offset_t)buf_ptr;
2314 }
2315 /*
2316 * Otherwise, copy the data into the provided buffer (*bufferp)
2317 * and append the rest of the range as it comes in.
2318 */
2319 memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read);
2320 buf_ptr += bytes_read;
2321 bytes_wanted -= bytes_read;
2322 records_read = (bytes_read >>
2323 (vm_page_shift - ps->ps_record_shift));
2324 dev_offset += records_read;
91447636
A
2325 DP_DEBUG(DEBUG_VS_INTERNAL,
2326 ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2327 dev_buffer, bytes_read));
1c79356b
A
2328 if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
2329 != KERN_SUCCESS)
2330 Panic("dealloc buf");
2331 } while (bytes_wanted);
2332
2333 *residualp = size - total_read;
2334 if((dev_buffer != *bufferp) && (total_read != 0)) {
2335 vm_offset_t temp_buffer;
91447636 2336 vm_allocate(kernel_map, &temp_buffer, total_read, VM_FLAGS_ANYWHERE);
1c79356b
A
2337 memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
2338 if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
2339 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2340 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2341 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2342 (vm_map_copy_t *)&device_data, FALSE))
2343 panic("ps_read_device: cannot copyin locally provided buffer\n");
2344 }
2345 else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){
2346 if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read,
2347 VM_MAP_COPYIN_OPT_SRC_DESTROY |
2348 VM_MAP_COPYIN_OPT_STEAL_PAGES |
2349 VM_MAP_COPYIN_OPT_PMAP_ENTER,
2350 (vm_map_copy_t *)&device_data, FALSE))
2351 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2352 }
2353 else {
2354 device_data = NULL;
2355 }
2356 *bufferp = (vm_offset_t)device_data;
2357
2358 if(dpt != NULL) {
2359 /* Free the receive buffer */
2360 dpt->checked_out = 0;
2361 thread_wakeup(&dpt_array);
2362 }
2363 return KERN_SUCCESS;
2364}
2365
1c79356b
A
2366kern_return_t
2367ps_write_device(
2368 paging_segment_t ps,
2369 vm_offset_t offset,
2370 vm_offset_t addr,
2371 unsigned int size,
2372 struct vs_async *vsa)
2373{
2374 recnum_t dev_offset;
2375 io_buf_len_t bytes_to_write, bytes_written;
2376 recnum_t records_written;
2377 kern_return_t kr;
2378 MACH_PORT_FACE reply_port;
1c79356b
A
2379
2380
2381
55e303ae 2382 clustered_writes[atop_32(size)]++;
1c79356b
A
2383
2384 dev_offset = (ps->ps_offset +
2385 (offset >> (vm_page_shift - ps->ps_record_shift)));
2386 bytes_to_write = size;
2387
2388 if (vsa) {
2389 /*
2390 * Asynchronous write.
2391 */
2392 reply_port = vsa->reply_port;
2393 ip_lock(reply_port);
2394 reply_port->ip_sorights++;
2395 ip_reference(reply_port);
2396 ip_unlock(reply_port);
2397 {
2398 device_t device;
2399 device = dev_port_lookup(ps->ps_device);
2400
2401 vsa->vsa_addr = addr;
2402 kr=ds_device_write_common(device,
2403 reply_port,
2404 (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE,
2405 (dev_mode_t) 0,
2406 dev_offset,
2407 (io_buf_ptr_t) addr,
2408 size,
2409 (IO_WRITE | IO_CALL),
2410 &bytes_written);
2411 }
2412 if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) {
2413 if (verbose)
2414 dprintf(("%s0x%x, addr=0x%x,"
2415 "size=0x%x,offset=0x%x\n",
2416 "device_write_request returned ",
2417 kr, addr, size, offset));
2418 BS_STAT(ps->ps_bs,
55e303ae 2419 ps->ps_bs->bs_pages_out_fail += atop_32(size));
1c79356b
A
2420 /* do the completion notification to free resources */
2421 device_write_reply(reply_port, kr, 0);
2422 return PAGER_ERROR;
2423 }
2424 } else do {
2425 /*
2426 * Synchronous write.
2427 */
2428 {
2429 device_t device;
2430 device = dev_port_lookup(ps->ps_device);
2431 kr=ds_device_write_common(device,
2432 IP_NULL, 0,
2433 (dev_mode_t) 0,
2434 dev_offset,
2435 (io_buf_ptr_t) addr,
2436 size,
2437 (IO_WRITE | IO_SYNC | IO_KERNEL_BUF),
2438 &bytes_written);
2439 }
2440 if (kr != KERN_SUCCESS) {
2441 dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2442 "device_write returned ",
2443 kr, addr, size, offset));
2444 BS_STAT(ps->ps_bs,
55e303ae 2445 ps->ps_bs->bs_pages_out_fail += atop_32(size));
1c79356b
A
2446 return PAGER_ERROR;
2447 }
2448 if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
2449 Panic("fragmented write");
2450 records_written = (bytes_written >>
2451 (vm_page_shift - ps->ps_record_shift));
2452 dev_offset += records_written;
2453#if 1
2454 if (bytes_written != bytes_to_write) {
2455 dprintf(("wrote only %d bytes out of %d\n",
2456 bytes_written, bytes_to_write));
2457 }
2458#endif
2459 bytes_to_write -= bytes_written;
2460 addr += bytes_written;
2461 } while (bytes_to_write > 0);
2462
2463 return PAGER_SUCCESS;
2464}
2465
2466
2467#else /* !DEVICE_PAGING */
2468
2469kern_return_t
2470ps_read_device(
91447636
A
2471 __unused paging_segment_t ps,
2472 __unused vm_offset_t offset,
2473 __unused vm_offset_t *bufferp,
2474 __unused unsigned int size,
2475 __unused unsigned int *residualp,
2476 __unused int flags)
1c79356b
A
2477{
2478 panic("ps_read_device not supported");
0c530ab8 2479 return KERN_FAILURE;
1c79356b
A
2480}
2481
91447636 2482kern_return_t
1c79356b 2483ps_write_device(
91447636
A
2484 __unused paging_segment_t ps,
2485 __unused vm_offset_t offset,
2486 __unused vm_offset_t addr,
2487 __unused unsigned int size,
2488 __unused struct vs_async *vsa)
1c79356b
A
2489{
2490 panic("ps_write_device not supported");
0c530ab8 2491 return KERN_FAILURE;
1c79356b
A
2492}
2493
2494#endif /* DEVICE_PAGING */
91447636 2495void pvs_object_data_provided(vstruct_t, upl_t, upl_offset_t, upl_size_t); /* forward */
1c79356b
A
2496
2497void
2498pvs_object_data_provided(
91447636
A
2499 __unused vstruct_t vs,
2500 __unused upl_t upl,
2501 __unused upl_offset_t offset,
2502 upl_size_t size)
1c79356b 2503{
1c79356b 2504
91447636
A
2505 DP_DEBUG(DEBUG_VS_INTERNAL,
2506 ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2507 upl, offset, size));
1c79356b
A
2508
2509 ASSERT(size > 0);
55e303ae 2510 GSTAT(global_stats.gs_pages_in += atop_32(size));
1c79356b
A
2511
2512
2513#if USE_PRECIOUS
2514 ps_clunmap(vs, offset, size);
2515#endif /* USE_PRECIOUS */
2516
2517}
2518
2d21ac55
A
2519static memory_object_offset_t last_start;
2520static vm_size_t last_length;
2521
1c79356b
A
2522kern_return_t
2523pvs_cluster_read(
2524 vstruct_t vs,
0b4e3aa0 2525 vm_offset_t vs_offset,
2d21ac55
A
2526 vm_size_t cnt,
2527 void *fault_info)
1c79356b 2528{
1c79356b 2529 kern_return_t error = KERN_SUCCESS;
2d21ac55 2530 unsigned int size;
0c530ab8 2531 unsigned int residual;
1c79356b 2532 unsigned int request_flags;
2d21ac55
A
2533 int seg_index;
2534 int pages_in_cl;
0b4e3aa0
A
2535 int cl_size;
2536 int cl_mask;
2d21ac55
A
2537 int cl_index;
2538 unsigned int xfer_size;
2539 vm_offset_t orig_vs_offset;
0b4e3aa0
A
2540 vm_offset_t ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2541 paging_segment_t psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2542 struct clmap clmap;
2d21ac55
A
2543 upl_t upl;
2544 unsigned int page_list_count;
2545 memory_object_offset_t start;
0b4e3aa0
A
2546
2547 pages_in_cl = 1 << vs->vs_clshift;
2548 cl_size = pages_in_cl * vm_page_size;
2549 cl_mask = cl_size - 1;
1c79356b 2550
1c79356b 2551#if USE_PRECIOUS
2d21ac55 2552 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
1c79356b 2553#else
2d21ac55 2554 request_flags = UPL_NO_SYNC | UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT | UPL_SET_LITE;
1c79356b 2555#endif
2d21ac55
A
2556 cl_index = (vs_offset & cl_mask) / vm_page_size;
2557
2558 if ((ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0) == (vm_offset_t)-1) ||
2559 !CLMAP_ISSET(clmap, cl_index)) {
2560 /*
2561 * the needed page doesn't exist in the backing store...
2562 * we don't want to try to do any I/O, just abort the
2563 * page and let the fault handler provide a zero-fill
2564 */
2565 if (cnt == 0) {
2566 /*
2567 * The caller was just poking at us to see if
2568 * the page has been paged out. No need to
2569 * mess with the page at all.
2570 * Just let the caller know we don't have that page.
2571 */
2572 return KERN_FAILURE;
2573 }
2574
2575 page_list_count = 0;
2576
2577 memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
2578 PAGE_SIZE, PAGE_SIZE,
2579 &upl, NULL, &page_list_count,
2580 request_flags);
2581
2582 if (clmap.cl_error)
2583 upl_abort(upl, UPL_ABORT_ERROR);
2584 else
2585 upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2586 upl_deallocate(upl);
91447636 2587
2d21ac55
A
2588 return KERN_SUCCESS;
2589 }
2590
2591 if (cnt == 0) {
2592 /*
2593 * The caller was just poking at us to see if
2594 * the page has been paged out. No need to
2595 * mess with the page at all.
2596 * Just let the caller know we do have that page.
2597 */
2598 return KERN_SUCCESS;
2599 }
2600
91447636
A
2601 assert(dp_encryption_inited);
2602 if (dp_encryption) {
2603 /*
2604 * ENCRYPTED SWAP:
2605 * request that the UPL be prepared for
2606 * decryption.
2607 */
2608 request_flags |= UPL_ENCRYPT;
2609 }
2d21ac55 2610 orig_vs_offset = vs_offset;
91447636 2611
2d21ac55
A
2612 start = (memory_object_offset_t)vs_offset;
2613 assert(cnt != 0);
2614 cnt = VM_SUPER_CLUSTER;
1c79356b 2615
2d21ac55
A
2616 /*
2617 * determine how big a speculative I/O we should try for...
2618 */
2619 if (memory_object_cluster_size(vs->vs_control, &start, &cnt, (memory_object_fault_info_t)fault_info) == KERN_SUCCESS) {
2620 assert(vs_offset >= (vm_offset_t) start &&
2621 vs_offset < (vm_offset_t) (start + cnt));
2622 vs_offset = (vm_offset_t)start;
2623 } else
2624 cnt = PAGE_SIZE;
2625
2626 last_start = start;
2627 last_length = cnt;
2628
2629 /*
2630 * This loop will be executed multiple times until the entire
2631 * range has been looked at or we issue an I/O... if the request spans cluster
2632 * boundaries, the clusters will be checked for logical continunity,
2633 * if contiguous the I/O request will span multiple clusters...
2634 * at most only 1 I/O will be issued... it will encompass the original offset
2635 */
2636 while (cnt && error == KERN_SUCCESS) {
2637 int ps_info_valid;
2638
2639 if ((vs_offset & cl_mask) && (cnt > (VM_SUPER_CLUSTER - (vs_offset & cl_mask)))) {
d12e1678
A
2640 size = VM_SUPER_CLUSTER;
2641 size -= vs_offset & cl_mask;
2d21ac55 2642 } else if (cnt > VM_SUPER_CLUSTER)
0b4e3aa0 2643 size = VM_SUPER_CLUSTER;
2d21ac55 2644 else
0b4e3aa0 2645 size = cnt;
2d21ac55 2646
0b4e3aa0 2647 cnt -= size;
1c79356b 2648
0b4e3aa0
A
2649 ps_info_valid = 0;
2650 seg_index = 0;
1c79356b 2651
0b4e3aa0 2652 while (size > 0 && error == KERN_SUCCESS) {
2d21ac55 2653 unsigned int abort_size;
0b4e3aa0
A
2654 int failed_size;
2655 int beg_pseg;
2656 int beg_indx;
2657 vm_offset_t cur_offset;
1c79356b 2658
0b4e3aa0
A
2659 if ( !ps_info_valid) {
2660 ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
2661 psp[seg_index] = CLMAP_PS(clmap);
2662 ps_info_valid = 1;
1c79356b 2663 }
0b4e3aa0
A
2664 /*
2665 * skip over unallocated physical segments
2666 */
2667 if (ps_offset[seg_index] == (vm_offset_t) -1) {
2668 abort_size = cl_size - (vs_offset & cl_mask);
2669 abort_size = MIN(abort_size, size);
2670
2d21ac55
A
2671 size -= abort_size;
2672 vs_offset += abort_size;
1c79356b 2673
0b4e3aa0
A
2674 seg_index++;
2675 ps_info_valid = 0;
2d21ac55 2676
0b4e3aa0 2677 continue;
1c79356b 2678 }
0b4e3aa0
A
2679 cl_index = (vs_offset & cl_mask) / vm_page_size;
2680
2681 for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) {
2682 /*
2683 * skip over unallocated pages
2684 */
2685 if (CLMAP_ISSET(clmap, cl_index))
2686 break;
2687 abort_size += vm_page_size;
2688 }
2689 if (abort_size) {
2d21ac55
A
2690 size -= abort_size;
2691 vs_offset += abort_size;
0b4e3aa0
A
2692
2693 if (cl_index == pages_in_cl) {
2694 /*
2695 * if we're at the end of this physical cluster
2696 * then bump to the next one and continue looking
2697 */
2698 seg_index++;
2699 ps_info_valid = 0;
2d21ac55 2700
0b4e3aa0
A
2701 continue;
2702 }
2703 if (size == 0)
2704 break;
2705 }
1c79356b 2706 /*
0b4e3aa0
A
2707 * remember the starting point of the first allocated page
2708 * for the I/O we're about to issue
1c79356b 2709 */
0b4e3aa0
A
2710 beg_pseg = seg_index;
2711 beg_indx = cl_index;
2712 cur_offset = vs_offset;
2713
2714 /*
2715 * calculate the size of the I/O that we can do...
2716 * this may span multiple physical segments if
2717 * they are contiguous
2718 */
2719 for (xfer_size = 0; xfer_size < size; ) {
2720
2d21ac55 2721 while (cl_index < pages_in_cl && xfer_size < size) {
0b4e3aa0 2722 /*
55e303ae 2723 * accumulate allocated pages within
d12e1678 2724 * a physical segment
1c79356b 2725 */
0b4e3aa0
A
2726 if (CLMAP_ISSET(clmap, cl_index)) {
2727 xfer_size += vm_page_size;
2728 cur_offset += vm_page_size;
2729 cl_index++;
2730
2731 BS_STAT(psp[seg_index]->ps_bs,
2732 psp[seg_index]->ps_bs->bs_pages_in++);
2733 } else
2734 break;
2735 }
2d21ac55 2736 if (cl_index < pages_in_cl || xfer_size >= size) {
0b4e3aa0 2737 /*
55e303ae 2738 * we've hit an unallocated page or
2d21ac55
A
2739 * the end of this request... see if
2740 * it's time to fire the I/O
1c79356b 2741 */
0b4e3aa0
A
2742 break;
2743 }
2744 /*
d12e1678 2745 * we've hit the end of the current physical
55e303ae 2746 * segment and there's more to do, so try
d12e1678 2747 * moving to the next one
0b4e3aa0
A
2748 */
2749 seg_index++;
2750
2d21ac55 2751 ps_offset[seg_index] = ps_clmap(vs, cur_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
d12e1678 2752 psp[seg_index] = CLMAP_PS(clmap);
0b4e3aa0
A
2753 ps_info_valid = 1;
2754
2755 if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
2756 /*
55e303ae
A
2757 * if the physical segment we're about
2758 * to step into is not contiguous to
2759 * the one we're currently in, or it's
d12e1678 2760 * in a different paging file, or
0b4e3aa0 2761 * it hasn't been allocated....
2d21ac55
A
2762 * we stop this run and go check
2763 * to see if it's time to fire the I/O
0b4e3aa0
A
2764 */
2765 break;
1c79356b 2766 }
0b4e3aa0 2767 /*
d12e1678 2768 * start with first page of the next physical
2d21ac55 2769 * segment
0b4e3aa0
A
2770 */
2771 cl_index = 0;
1c79356b 2772 }
2d21ac55 2773 if (xfer_size == 0) {
0b4e3aa0 2774 /*
2d21ac55 2775 * no I/O to generate for this segment
0b4e3aa0 2776 */
0b4e3aa0 2777 continue;
2d21ac55
A
2778 }
2779 if (cur_offset <= orig_vs_offset) {
2780 /*
2781 * we've hit a hole in our speculative cluster
2782 * before the offset that we're really after...
2783 * don't issue the I/O since it doesn't encompass
2784 * the original offset and we're looking to only
2785 * pull in the speculative pages if they can be
2786 * made part of a single I/O
2787 */
2788 size -= xfer_size;
2789 vs_offset += xfer_size;
1c79356b 2790
2d21ac55
A
2791 continue;
2792 }
2793 /*
2794 * we have a contiguous range of allocated pages
2795 * to read from that encompasses the original offset
2796 */
2797 page_list_count = 0;
2798 memory_object_super_upl_request(vs->vs_control, (memory_object_offset_t)vs_offset,
2799 xfer_size, xfer_size,
2800 &upl, NULL, &page_list_count,
2801 request_flags | UPL_SET_INTERNAL | UPL_NOBLOCK);
2802
2803 error = ps_read_file(psp[beg_pseg],
2804 upl, (upl_offset_t) 0,
2805 ps_offset[beg_pseg] + (beg_indx * vm_page_size),
2806 xfer_size, &residual, 0);
2807
0b4e3aa0
A
2808 failed_size = 0;
2809
2810 /*
55e303ae 2811 * Adjust counts and send response to VM. Optimize
d12e1678 2812 * for the common case, i.e. no error and/or partial
55e303ae 2813 * data. If there was an error, then we need to error
d12e1678 2814 * the entire range, even if some data was successfully
55e303ae 2815 * read. If there was a partial read we may supply some
0b4e3aa0 2816 * data and may error some as well. In all cases the
55e303ae
A
2817 * VM must receive some notification for every page
2818 * in the range.
0b4e3aa0
A
2819 */
2820 if ((error == KERN_SUCCESS) && (residual == 0)) {
2821 /*
d12e1678 2822 * Got everything we asked for, supply the data
55e303ae
A
2823 * to the VM. Note that as a side effect of
2824 * supplying the data, the buffer holding the
2825 * supplied data is deallocated from the pager's
2826 * address space.
0b4e3aa0 2827 */
2d21ac55 2828 pvs_object_data_provided(vs, upl, vs_offset, xfer_size);
0b4e3aa0
A
2829 } else {
2830 failed_size = xfer_size;
2831
2832 if (error == KERN_SUCCESS) {
2d21ac55
A
2833 if (residual == xfer_size) {
2834 /*
2835 * If a read operation returns no error
2836 * and no data moved, we turn it into
2837 * an error, assuming we're reading at
2838 * or beyong EOF.
2839 * Fall through and error the entire range.
2840 */
0b4e3aa0
A
2841 error = KERN_FAILURE;
2842 } else {
2d21ac55
A
2843 /*
2844 * Otherwise, we have partial read. If
2845 * the part read is a integral number
2846 * of pages supply it. Otherwise round
2847 * it up to a page boundary, zero fill
2848 * the unread part, and supply it.
2849 * Fall through and error the remainder
2850 * of the range, if any.
2851 */
2852 int fill;
2853 unsigned int lsize;
2854
2855 fill = residual & ~vm_page_size;
2856 lsize = (xfer_size - residual) + fill;
0b4e3aa0 2857
2d21ac55 2858 pvs_object_data_provided(vs, upl, vs_offset, lsize);
0b4e3aa0
A
2859
2860 if (lsize < xfer_size) {
2d21ac55 2861 failed_size = xfer_size - lsize;
0b4e3aa0
A
2862 error = KERN_FAILURE;
2863 }
2864 }
2865 }
2866 }
1c79356b 2867 if (error != KERN_SUCCESS) {
2d21ac55
A
2868 /*
2869 * There was an error in some part of the range, tell
2870 * the VM. Note that error is explicitly checked again
2871 * since it can be modified above.
2872 */
0b4e3aa0 2873 BS_STAT(psp[beg_pseg]->ps_bs,
2d21ac55 2874 psp[beg_pseg]->ps_bs->bs_pages_in_fail += atop_32(failed_size));
1c79356b 2875 }
2d21ac55
A
2876 /*
2877 * we've issued a single I/O that encompassed the original offset
2878 * at this point we either met our speculative request length or
2879 * we ran into a 'hole' (i.e. page not present in the cluster, cluster
2880 * not present or not physically contiguous to the previous one), so
2881 * we're done issuing I/O at this point
2882 */
2883 return (error);
1c79356b 2884 }
2d21ac55 2885 }
1c79356b
A
2886 return error;
2887}
2888
2889int vs_do_async_write = 1;
2890
2891kern_return_t
2892vs_cluster_write(
2893 vstruct_t vs,
2894 upl_t internal_upl,
91447636
A
2895 upl_offset_t offset,
2896 upl_size_t cnt,
1c79356b
A
2897 boolean_t dp_internal,
2898 int flags)
2899{
91447636 2900 upl_size_t transfer_size;
1c79356b
A
2901 int error = 0;
2902 struct clmap clmap;
0b4e3aa0
A
2903
2904 vm_offset_t actual_offset; /* Offset within paging segment */
1c79356b 2905 paging_segment_t ps;
0b4e3aa0
A
2906 vm_offset_t mobj_base_addr;
2907 vm_offset_t mobj_target_addr;
1c79356b
A
2908
2909 upl_t upl;
0b4e3aa0 2910 upl_page_info_t *pl;
1c79356b
A
2911 int page_index;
2912 int list_size;
55e303ae 2913 int pages_in_cl;
91447636 2914 unsigned int cl_size;
55e303ae 2915 int base_index;
91447636 2916 unsigned int seg_size;
55e303ae
A
2917
2918 pages_in_cl = 1 << vs->vs_clshift;
2919 cl_size = pages_in_cl * vm_page_size;
1c79356b 2920
1c79356b 2921 if (!dp_internal) {
0c530ab8 2922 unsigned int page_list_count;
1c79356b 2923 int request_flags;
91447636 2924 unsigned int super_size;
0b4e3aa0
A
2925 int first_dirty;
2926 int num_dirty;
2927 int num_of_pages;
2928 int seg_index;
91447636 2929 upl_offset_t upl_offset;
0b4e3aa0 2930 vm_offset_t seg_offset;
55e303ae
A
2931 vm_offset_t ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2932 paging_segment_t psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
0b4e3aa0 2933
1c79356b 2934
1c79356b
A
2935 if (bs_low) {
2936 super_size = cl_size;
0b4e3aa0 2937
1c79356b
A
2938 request_flags = UPL_NOBLOCK |
2939 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2d21ac55 2940 UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE;
1c79356b
A
2941 } else {
2942 super_size = VM_SUPER_CLUSTER;
0b4e3aa0 2943
1c79356b
A
2944 request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
2945 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2d21ac55 2946 UPL_NO_SYNC | UPL_SET_INTERNAL | UPL_SET_LITE;
1c79356b
A
2947 }
2948
91447636
A
2949 if (!dp_encryption_inited) {
2950 /*
2951 * ENCRYPTED SWAP:
2952 * Once we've started using swap, we
2953 * can't change our mind on whether
2954 * it needs to be encrypted or
2955 * not.
2956 */
2957 dp_encryption_inited = TRUE;
2958 }
2959 if (dp_encryption) {
2960 /*
2961 * ENCRYPTED SWAP:
2962 * request that the UPL be prepared for
2963 * encryption.
2964 */
2965 request_flags |= UPL_ENCRYPT;
2966 flags |= UPL_PAGING_ENCRYPTED;
2967 }
2968
0b4e3aa0
A
2969 page_list_count = 0;
2970 memory_object_super_upl_request(vs->vs_control,
2971 (memory_object_offset_t)offset,
2972 cnt, super_size,
2973 &upl, NULL, &page_list_count,
55e303ae 2974 request_flags | UPL_FOR_PAGEOUT);
1c79356b 2975
0b4e3aa0 2976 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
1c79356b 2977
55e303ae
A
2978 seg_size = cl_size - (upl->offset % cl_size);
2979 upl_offset = upl->offset & ~(cl_size - 1);
2980
d12e1678
A
2981 for (seg_index = 0, transfer_size = upl->size;
2982 transfer_size > 0; ) {
d12e1678 2983 ps_offset[seg_index] =
55e303ae
A
2984 ps_clmap(vs,
2985 upl_offset,
2986 &clmap, CL_ALLOC,
2987 cl_size, 0);
1c79356b 2988
0b4e3aa0
A
2989 if (ps_offset[seg_index] == (vm_offset_t) -1) {
2990 upl_abort(upl, 0);
2991 upl_deallocate(upl);
2992
2993 return KERN_FAILURE;
1c79356b 2994
0b4e3aa0
A
2995 }
2996 psp[seg_index] = CLMAP_PS(clmap);
1c79356b 2997
55e303ae
A
2998 if (transfer_size > seg_size) {
2999 transfer_size -= seg_size;
3000 upl_offset += cl_size;
3001 seg_size = cl_size;
0b4e3aa0
A
3002 seg_index++;
3003 } else
3004 transfer_size = 0;
3005 }
55e303ae
A
3006 /*
3007 * Ignore any non-present pages at the end of the
3008 * UPL.
3009 */
3010 for (page_index = upl->size / vm_page_size; page_index > 0;)
3011 if (UPL_PAGE_PRESENT(pl, --page_index))
3012 break;
3013 num_of_pages = page_index + 1;
3014
3015 base_index = (upl->offset % cl_size) / PAGE_SIZE;
3016
3017 for (page_index = 0; page_index < num_of_pages; ) {
0b4e3aa0
A
3018 /*
3019 * skip over non-dirty pages
3020 */
3021 for ( ; page_index < num_of_pages; page_index++) {
55e303ae 3022 if (UPL_DIRTY_PAGE(pl, page_index)
d12e1678 3023 || UPL_PRECIOUS_PAGE(pl, page_index))
0b4e3aa0
A
3024 /*
3025 * this is a page we need to write
55e303ae 3026 * go see if we can buddy it up with
d12e1678 3027 * others that are contiguous to it
0b4e3aa0
A
3028 */
3029 break;
3030 /*
d12e1678 3031 * if the page is not-dirty, but present we
55e303ae 3032 * need to commit it... This is an unusual
d12e1678 3033 * case since we only asked for dirty pages
0b4e3aa0
A
3034 */
3035 if (UPL_PAGE_PRESENT(pl, page_index)) {
3036 boolean_t empty = FALSE;
3037 upl_commit_range(upl,
3038 page_index * vm_page_size,
3039 vm_page_size,
3040 UPL_COMMIT_NOTIFY_EMPTY,
3041 pl,
d52fe63f 3042 page_list_count,
0b4e3aa0 3043 &empty);
55e303ae
A
3044 if (empty) {
3045 assert(page_index ==
3046 num_of_pages - 1);
0b4e3aa0 3047 upl_deallocate(upl);
55e303ae 3048 }
1c79356b 3049 }
1c79356b 3050 }
0b4e3aa0
A
3051 if (page_index == num_of_pages)
3052 /*
3053 * no more pages to look at, we're out of here
3054 */
3055 break;
1c79356b 3056
0b4e3aa0 3057 /*
55e303ae
A
3058 * gather up contiguous dirty pages... we have at
3059 * least 1 * otherwise we would have bailed above
0b4e3aa0
A
3060 * make sure that each physical segment that we step
3061 * into is contiguous to the one we're currently in
3062 * if it's not, we have to stop and write what we have
3063 */
55e303ae 3064 for (first_dirty = page_index;
d12e1678 3065 page_index < num_of_pages; ) {
55e303ae 3066 if ( !UPL_DIRTY_PAGE(pl, page_index)
d12e1678 3067 && !UPL_PRECIOUS_PAGE(pl, page_index))
0b4e3aa0
A
3068 break;
3069 page_index++;
3070 /*
3071 * if we just looked at the last page in the UPL
3072 * we don't need to check for physical segment
3073 * continuity
3074 */
3075 if (page_index < num_of_pages) {
3076 int cur_seg;
3077 int nxt_seg;
3078
55e303ae
A
3079 cur_seg = (base_index + (page_index - 1))/pages_in_cl;
3080 nxt_seg = (base_index + page_index)/pages_in_cl;
0b4e3aa0
A
3081
3082 if (cur_seg != nxt_seg) {
3083 if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
55e303ae
A
3084 /*
3085 * if the segment we're about
3086 * to step into is not
3087 * contiguous to the one we're
3088 * currently in, or it's in a
d12e1678 3089 * different paging file....
55e303ae 3090 * we stop here and generate
d12e1678
A
3091 * the I/O
3092 */
0b4e3aa0 3093 break;
1c79356b 3094 }
1c79356b 3095 }
0b4e3aa0
A
3096 }
3097 num_dirty = page_index - first_dirty;
1c79356b 3098
0b4e3aa0
A
3099 if (num_dirty) {
3100 upl_offset = first_dirty * vm_page_size;
0b4e3aa0
A
3101 transfer_size = num_dirty * vm_page_size;
3102
d12e1678 3103 while (transfer_size) {
1c79356b 3104
d12e1678 3105 if ((seg_size = cl_size -
55e303ae 3106 ((upl->offset + upl_offset) % cl_size))
d12e1678
A
3107 > transfer_size)
3108 seg_size = transfer_size;
0b4e3aa0 3109
d12e1678
A
3110 ps_vs_write_complete(vs,
3111 upl->offset + upl_offset,
3112 seg_size, error);
0b4e3aa0 3113
d12e1678
A
3114 transfer_size -= seg_size;
3115 upl_offset += seg_size;
0b4e3aa0 3116 }
d12e1678
A
3117 upl_offset = first_dirty * vm_page_size;
3118 transfer_size = num_dirty * vm_page_size;
55e303ae
A
3119
3120 seg_index = (base_index + first_dirty) / pages_in_cl;
3121 seg_offset = (upl->offset + upl_offset) % cl_size;
3122
d12e1678
A
3123 error = ps_write_file(psp[seg_index],
3124 upl, upl_offset,
3125 ps_offset[seg_index]
3126 + seg_offset,
3127 transfer_size, flags);
55e303ae 3128 } else {
0b4e3aa0
A
3129 boolean_t empty = FALSE;
3130 upl_abort_range(upl,
3131 first_dirty * vm_page_size,
3132 num_dirty * vm_page_size,
3133 UPL_ABORT_NOTIFY_EMPTY,
3134 &empty);
55e303ae
A
3135 if (empty) {
3136 assert(page_index == num_of_pages);
0b4e3aa0 3137 upl_deallocate(upl);
55e303ae 3138 }
1c79356b 3139 }
1c79356b 3140 }
0b4e3aa0 3141
1c79356b
A
3142 } else {
3143 assert(cnt <= (vm_page_size << vs->vs_clshift));
3144 list_size = cnt;
3145
3146 page_index = 0;
3147 /* The caller provides a mapped_data which is derived */
3148 /* from a temporary object. The targeted pages are */
3149 /* guaranteed to be set at offset 0 in the mapped_data */
3150 /* The actual offset however must still be derived */
3151 /* from the offset in the vs in question */
3152 mobj_base_addr = offset;
3153 mobj_target_addr = mobj_base_addr;
3154
3155 for (transfer_size = list_size; transfer_size != 0;) {
3156 actual_offset = ps_clmap(vs, mobj_target_addr,
3157 &clmap, CL_ALLOC,
3158 transfer_size < cl_size ?
3159 transfer_size : cl_size, 0);
3160 if(actual_offset == (vm_offset_t) -1) {
3161 error = 1;
3162 break;
3163 }
3164 cnt = MIN(transfer_size,
3165 CLMAP_NPGS(clmap) * vm_page_size);
3166 ps = CLMAP_PS(clmap);
3167 /* Assume that the caller has given us contiguous */
3168 /* pages */
3169 if(cnt) {
d12e1678
A
3170 ps_vs_write_complete(vs, mobj_target_addr,
3171 cnt, error);
1c79356b
A
3172 error = ps_write_file(ps, internal_upl,
3173 0, actual_offset,
3174 cnt, flags);
3175 if (error)
3176 break;
55e303ae 3177 }
1c79356b
A
3178 if (error)
3179 break;
3180 actual_offset += cnt;
3181 mobj_target_addr += cnt;
3182 transfer_size -= cnt;
3183 cnt = 0;
3184
3185 if (error)
3186 break;
3187 }
3188 }
3189 if(error)
3190 return KERN_FAILURE;
3191 else
3192 return KERN_SUCCESS;
3193}
3194
3195vm_size_t
3196ps_vstruct_allocated_size(
3197 vstruct_t vs)
3198{
3199 int num_pages;
3200 struct vs_map *vsmap;
91447636 3201 unsigned int i, j, k;
1c79356b
A
3202
3203 num_pages = 0;
3204 if (vs->vs_indirect) {
3205 /* loop on indirect maps */
3206 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3207 vsmap = vs->vs_imap[i];
3208 if (vsmap == NULL)
3209 continue;
3210 /* loop on clusters in this indirect map */
3211 for (j = 0; j < CLMAP_ENTRIES; j++) {
3212 if (VSM_ISCLR(vsmap[j]) ||
3213 VSM_ISERR(vsmap[j]))
3214 continue;
3215 /* loop on pages in this cluster */
3216 for (k = 0; k < VSCLSIZE(vs); k++) {
3217 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3218 num_pages++;
3219 }
3220 }
3221 }
3222 } else {
3223 vsmap = vs->vs_dmap;
3224 if (vsmap == NULL)
3225 return 0;
3226 /* loop on clusters in the direct map */
3227 for (j = 0; j < CLMAP_ENTRIES; j++) {
3228 if (VSM_ISCLR(vsmap[j]) ||
3229 VSM_ISERR(vsmap[j]))
3230 continue;
3231 /* loop on pages in this cluster */
3232 for (k = 0; k < VSCLSIZE(vs); k++) {
3233 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3234 num_pages++;
3235 }
3236 }
3237 }
3238
55e303ae 3239 return ptoa_32(num_pages);
1c79356b
A
3240}
3241
3242size_t
3243ps_vstruct_allocated_pages(
3244 vstruct_t vs,
3245 default_pager_page_t *pages,
3246 size_t pages_size)
3247{
91447636 3248 unsigned int num_pages;
1c79356b
A
3249 struct vs_map *vsmap;
3250 vm_offset_t offset;
91447636 3251 unsigned int i, j, k;
1c79356b
A
3252
3253 num_pages = 0;
3254 offset = 0;
3255 if (vs->vs_indirect) {
3256 /* loop on indirect maps */
3257 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3258 vsmap = vs->vs_imap[i];
3259 if (vsmap == NULL) {
3260 offset += (vm_page_size * CLMAP_ENTRIES *
3261 VSCLSIZE(vs));
3262 continue;
3263 }
3264 /* loop on clusters in this indirect map */
3265 for (j = 0; j < CLMAP_ENTRIES; j++) {
3266 if (VSM_ISCLR(vsmap[j]) ||
3267 VSM_ISERR(vsmap[j])) {
3268 offset += vm_page_size * VSCLSIZE(vs);
3269 continue;
3270 }
3271 /* loop on pages in this cluster */
3272 for (k = 0; k < VSCLSIZE(vs); k++) {
3273 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3274 num_pages++;
3275 if (num_pages < pages_size)
3276 pages++->dpp_offset =
3277 offset;
3278 }
3279 offset += vm_page_size;
3280 }
3281 }
3282 }
3283 } else {
3284 vsmap = vs->vs_dmap;
3285 if (vsmap == NULL)
3286 return 0;
3287 /* loop on clusters in the direct map */
3288 for (j = 0; j < CLMAP_ENTRIES; j++) {
3289 if (VSM_ISCLR(vsmap[j]) ||
3290 VSM_ISERR(vsmap[j])) {
3291 offset += vm_page_size * VSCLSIZE(vs);
3292 continue;
3293 }
3294 /* loop on pages in this cluster */
3295 for (k = 0; k < VSCLSIZE(vs); k++) {
3296 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3297 num_pages++;
3298 if (num_pages < pages_size)
3299 pages++->dpp_offset = offset;
3300 }
3301 offset += vm_page_size;
3302 }
3303 }
3304 }
3305
3306 return num_pages;
3307}
3308
3309
3310kern_return_t
3311ps_vstruct_transfer_from_segment(
3312 vstruct_t vs,
3313 paging_segment_t segment,
1c79356b 3314 upl_t upl)
1c79356b
A
3315{
3316 struct vs_map *vsmap;
91447636
A
3317// struct vs_map old_vsmap;
3318// struct vs_map new_vsmap;
3319 unsigned int i, j;
1c79356b
A
3320
3321 VS_LOCK(vs); /* block all work on this vstruct */
3322 /* can't allow the normal multiple write */
3323 /* semantic because writes may conflict */
3324 vs->vs_xfer_pending = TRUE;
3325 vs_wait_for_sync_writers(vs);
3326 vs_start_write(vs);
3327 vs_wait_for_readers(vs);
3328 /* we will unlock the vs to allow other writes while transferring */
3329 /* and will be guaranteed of the persistance of the vs struct */
3330 /* because the caller of ps_vstruct_transfer_from_segment bumped */
3331 /* vs_async_pending */
3332 /* OK we now have guaranteed no other parties are accessing this */
3333 /* vs. Now that we are also supporting simple lock versions of */
3334 /* vs_lock we cannot hold onto VS_LOCK as we may block below. */
3335 /* our purpose in holding it before was the multiple write case */
3336 /* we now use the boolean xfer_pending to do that. We can use */
3337 /* a boolean instead of a count because we have guaranteed single */
3338 /* file access to this code in its caller */
3339 VS_UNLOCK(vs);
3340vs_changed:
3341 if (vs->vs_indirect) {
91447636
A
3342 unsigned int vsmap_size;
3343 int clmap_off;
1c79356b
A
3344 /* loop on indirect maps */
3345 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3346 vsmap = vs->vs_imap[i];
3347 if (vsmap == NULL)
3348 continue;
3349 /* loop on clusters in this indirect map */
3350 clmap_off = (vm_page_size * CLMAP_ENTRIES *
3351 VSCLSIZE(vs) * i);
3352 if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
3353 vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
3354 else
3355 vsmap_size = CLMAP_ENTRIES;
3356 for (j = 0; j < vsmap_size; j++) {
3357 if (VSM_ISCLR(vsmap[j]) ||
3358 VSM_ISERR(vsmap[j]) ||
3359 (VSM_PS(vsmap[j]) != segment))
3360 continue;
3361 if(vs_cluster_transfer(vs,
3362 (vm_page_size * (j << vs->vs_clshift))
3363 + clmap_off,
3364 vm_page_size << vs->vs_clshift,
1c79356b 3365 upl)
1c79356b
A
3366 != KERN_SUCCESS) {
3367 VS_LOCK(vs);
3368 vs->vs_xfer_pending = FALSE;
3369 VS_UNLOCK(vs);
3370 vs_finish_write(vs);
3371 return KERN_FAILURE;
3372 }
3373 /* allow other readers/writers during transfer*/
3374 VS_LOCK(vs);
3375 vs->vs_xfer_pending = FALSE;
3376 VS_UNLOCK(vs);
3377 vs_finish_write(vs);
3378 VS_LOCK(vs);
3379 vs->vs_xfer_pending = TRUE;
1c79356b
A
3380 vs_wait_for_sync_writers(vs);
3381 vs_start_write(vs);
3382 vs_wait_for_readers(vs);
0b4e3aa0 3383 VS_UNLOCK(vs);
1c79356b
A
3384 if (!(vs->vs_indirect)) {
3385 goto vs_changed;
3386 }
3387 }
3388 }
3389 } else {
3390 vsmap = vs->vs_dmap;
3391 if (vsmap == NULL) {
3392 VS_LOCK(vs);
3393 vs->vs_xfer_pending = FALSE;
3394 VS_UNLOCK(vs);
3395 vs_finish_write(vs);
3396 return KERN_SUCCESS;
3397 }
3398 /* loop on clusters in the direct map */
3399 for (j = 0; j < vs->vs_size; j++) {
3400 if (VSM_ISCLR(vsmap[j]) ||
3401 VSM_ISERR(vsmap[j]) ||
3402 (VSM_PS(vsmap[j]) != segment))
3403 continue;
3404 if(vs_cluster_transfer(vs,
3405 vm_page_size * (j << vs->vs_clshift),
3406 vm_page_size << vs->vs_clshift,
1c79356b 3407 upl) != KERN_SUCCESS) {
1c79356b
A
3408 VS_LOCK(vs);
3409 vs->vs_xfer_pending = FALSE;
3410 VS_UNLOCK(vs);
3411 vs_finish_write(vs);
3412 return KERN_FAILURE;
3413 }
3414 /* allow other readers/writers during transfer*/
3415 VS_LOCK(vs);
3416 vs->vs_xfer_pending = FALSE;
3417 VS_UNLOCK(vs);
3418 vs_finish_write(vs);
3419 VS_LOCK(vs);
3420 vs->vs_xfer_pending = TRUE;
3421 VS_UNLOCK(vs);
3422 vs_wait_for_sync_writers(vs);
3423 vs_start_write(vs);
3424 vs_wait_for_readers(vs);
3425 if (vs->vs_indirect) {
3426 goto vs_changed;
3427 }
3428 }
3429 }
3430
3431 VS_LOCK(vs);
3432 vs->vs_xfer_pending = FALSE;
3433 VS_UNLOCK(vs);
3434 vs_finish_write(vs);
3435 return KERN_SUCCESS;
3436}
3437
3438
3439
3440vs_map_t
3441vs_get_map_entry(
3442 vstruct_t vs,
3443 vm_offset_t offset)
3444{
3445 struct vs_map *vsmap;
3446 vm_offset_t cluster;
3447
55e303ae 3448 cluster = atop_32(offset) >> vs->vs_clshift;
1c79356b
A
3449 if (vs->vs_indirect) {
3450 long ind_block = cluster/CLMAP_ENTRIES;
3451
3452 /* Is the indirect block allocated? */
3453 vsmap = vs->vs_imap[ind_block];
3454 if(vsmap == (vs_map_t) NULL)
3455 return vsmap;
3456 } else
3457 vsmap = vs->vs_dmap;
3458 vsmap += cluster%CLMAP_ENTRIES;
3459 return vsmap;
3460}
3461
3462kern_return_t
3463vs_cluster_transfer(
3464 vstruct_t vs,
3465 vm_offset_t offset,
3466 vm_size_t cnt,
1c79356b 3467 upl_t upl)
1c79356b
A
3468{
3469 vm_offset_t actual_offset;
3470 paging_segment_t ps;
3471 struct clmap clmap;
3472 kern_return_t error = KERN_SUCCESS;
91447636
A
3473 unsigned int size, size_wanted;
3474 int i;
0c530ab8 3475 unsigned int residual = 0;
91447636
A
3476 unsigned int unavail_size;
3477// default_pager_thread_t *dpt;
3478// boolean_t dealloc;
3479 struct vs_map *vsmap_ptr = NULL;
1c79356b
A
3480 struct vs_map read_vsmap;
3481 struct vs_map original_read_vsmap;
3482 struct vs_map write_vsmap;
91447636
A
3483// upl_t sync_upl;
3484// vm_offset_t ioaddr;
1c79356b 3485
1c79356b
A
3486 /* vs_cluster_transfer reads in the pages of a cluster and
3487 * then writes these pages back to new backing store. The
3488 * segment the pages are being read from is assumed to have
3489 * been taken off-line and is no longer considered for new
3490 * space requests.
3491 */
3492
3493 /*
3494 * This loop will be executed once per cluster referenced.
3495 * Typically this means once, since it's unlikely that the
3496 * VM system will ask for anything spanning cluster boundaries.
3497 *
3498 * If there are holes in a cluster (in a paging segment), we stop
3499 * reading at the hole, then loop again, hoping to
3500 * find valid pages later in the cluster. This continues until
3501 * the entire range has been examined, and read, if present. The
3502 * pages are written as they are read. If a failure occurs after
3503 * some pages are written the unmap call at the bottom of the loop
3504 * recovers the backing store and the old backing store remains
3505 * in effect.
3506 */
3507
1c79356b
A
3508 VSM_CLR(write_vsmap);
3509 VSM_CLR(original_read_vsmap);
3510 /* grab the actual object's pages to sync with I/O */
3511 while (cnt && (error == KERN_SUCCESS)) {
3512 vsmap_ptr = vs_get_map_entry(vs, offset);
3513 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
3514
3515 if (actual_offset == (vm_offset_t) -1) {
3516
3517 /*
3518 * Nothing left to write in this cluster at least
3519 * set write cluster information for any previous
3520 * write, clear for next cluster, if there is one
3521 */
3522 unsigned int local_size, clmask, clsize;
3523
3524 clsize = vm_page_size << vs->vs_clshift;
3525 clmask = clsize - 1;
3526 local_size = clsize - (offset & clmask);
3527 ASSERT(local_size);
3528 local_size = MIN(local_size, cnt);
3529
3530 /* This cluster has no data in it beyond what may */
3531 /* have been found on a previous iteration through */
3532 /* the loop "write_vsmap" */
3533 *vsmap_ptr = write_vsmap;
3534 VSM_CLR(write_vsmap);
3535 VSM_CLR(original_read_vsmap);
3536
3537 cnt -= local_size;
3538 offset += local_size;
3539 continue;
3540 }
3541
3542 /*
3543 * Count up contiguous available or unavailable
3544 * pages.
3545 */
3546 ps = CLMAP_PS(clmap);
3547 ASSERT(ps);
3548 size = 0;
3549 unavail_size = 0;
3550 for (i = 0;
3551 (size < cnt) && (unavail_size < cnt) &&
3552 (i < CLMAP_NPGS(clmap)); i++) {
3553 if (CLMAP_ISSET(clmap, i)) {
3554 if (unavail_size != 0)
3555 break;
3556 size += vm_page_size;
3557 BS_STAT(ps->ps_bs,
3558 ps->ps_bs->bs_pages_in++);
3559 } else {
3560 if (size != 0)
3561 break;
3562 unavail_size += vm_page_size;
3563 }
3564 }
3565
3566 if (size == 0) {
3567 ASSERT(unavail_size);
593a1d5f 3568 ps_clunmap(vs, offset, unavail_size);
1c79356b
A
3569 cnt -= unavail_size;
3570 offset += unavail_size;
3571 if((offset & ((vm_page_size << vs->vs_clshift) - 1))
3572 == 0) {
3573 /* There is no more to transfer in this
3574 cluster
3575 */
3576 *vsmap_ptr = write_vsmap;
3577 VSM_CLR(write_vsmap);
3578 VSM_CLR(original_read_vsmap);
3579 }
3580 continue;
3581 }
3582
3583 if(VSM_ISCLR(original_read_vsmap))
3584 original_read_vsmap = *vsmap_ptr;
3585
3586 if(ps->ps_segtype == PS_PARTITION) {
0c530ab8
A
3587 panic("swap partition not supported\n");
3588 /*NOTREACHED*/
3589 error = KERN_FAILURE;
3590 residual = size;
1c79356b 3591/*
9bccf70c 3592 NEED TO ISSUE WITH SYNC & NO COMMIT
1c79356b
A
3593 error = ps_read_device(ps, actual_offset, &buffer,
3594 size, &residual, flags);
3595*/
3596 } else {
9bccf70c 3597 /* NEED TO ISSUE WITH SYNC & NO COMMIT */
91447636 3598 error = ps_read_file(ps, upl, (upl_offset_t) 0, actual_offset,
1c79356b 3599 size, &residual,
9bccf70c 3600 (UPL_IOSYNC | UPL_NOCOMMIT));
1c79356b
A
3601 }
3602
3603 read_vsmap = *vsmap_ptr;
3604
3605
3606 /*
3607 * Adjust counts and put data in new BS. Optimize for the
3608 * common case, i.e. no error and/or partial data.
3609 * If there was an error, then we need to error the entire
3610 * range, even if some data was successfully read.
3611 *
3612 */
3613 if ((error == KERN_SUCCESS) && (residual == 0)) {
0b4e3aa0 3614
1c79356b
A
3615 /*
3616 * Got everything we asked for, supply the data to
3617 * the new BS. Note that as a side effect of supplying
3618 * the data, the buffer holding the supplied data is
3619 * deallocated from the pager's address space unless
3620 * the write is unsuccessful.
3621 */
3622
3623 /* note buffer will be cleaned up in all cases by */
3624 /* internal_cluster_write or if an error on write */
3625 /* the vm_map_copy_page_discard call */
3626 *vsmap_ptr = write_vsmap;
3627
1c79356b
A
3628 if(vs_cluster_write(vs, upl, offset,
3629 size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
1c79356b
A
3630 error = KERN_FAILURE;
3631 if(!(VSM_ISCLR(*vsmap_ptr))) {
3632 /* unmap the new backing store object */
3633 ps_clunmap(vs, offset, size);
3634 }
3635 /* original vsmap */
3636 *vsmap_ptr = original_read_vsmap;
3637 VSM_CLR(write_vsmap);
3638 } else {
3639 if((offset + size) &
3640 ((vm_page_size << vs->vs_clshift)
3641 - 1)) {
3642 /* There is more to transfer in this
3643 cluster
3644 */
3645 write_vsmap = *vsmap_ptr;
3646 *vsmap_ptr = read_vsmap;
593a1d5f 3647 ps_clunmap(vs, offset, size);
1c79356b
A
3648 } else {
3649 /* discard the old backing object */
3650 write_vsmap = *vsmap_ptr;
3651 *vsmap_ptr = read_vsmap;
3652 ps_clunmap(vs, offset, size);
3653 *vsmap_ptr = write_vsmap;
3654 VSM_CLR(write_vsmap);
3655 VSM_CLR(original_read_vsmap);
3656 }
3657 }
3658 } else {
3659 size_wanted = size;
3660 if (error == KERN_SUCCESS) {
3661 if (residual == size) {
3662 /*
3663 * If a read operation returns no error
3664 * and no data moved, we turn it into
3665 * an error, assuming we're reading at
3666 * or beyond EOF.
3667 * Fall through and error the entire
3668 * range.
3669 */
3670 error = KERN_FAILURE;
3671 *vsmap_ptr = write_vsmap;
3672 if(!(VSM_ISCLR(*vsmap_ptr))) {
3673 /* unmap the new backing store object */
3674 ps_clunmap(vs, offset, size);
3675 }
3676 *vsmap_ptr = original_read_vsmap;
3677 VSM_CLR(write_vsmap);
3678 continue;
3679 } else {
3680 /*
3681 * Otherwise, we have partial read.
3682 * This is also considered an error
3683 * for the purposes of cluster transfer
3684 */
3685 error = KERN_FAILURE;
3686 *vsmap_ptr = write_vsmap;
3687 if(!(VSM_ISCLR(*vsmap_ptr))) {
3688 /* unmap the new backing store object */
3689 ps_clunmap(vs, offset, size);
3690 }
3691 *vsmap_ptr = original_read_vsmap;
3692 VSM_CLR(write_vsmap);
3693 continue;
3694 }
3695 }
3696
3697 }
3698 cnt -= size;
3699 offset += size;
3700
3701 } /* END while (cnt && (error == 0)) */
3702 if(!VSM_ISCLR(write_vsmap))
3703 *vsmap_ptr = write_vsmap;
3704
1c79356b
A
3705 return error;
3706}
3707
3708kern_return_t
91447636
A
3709default_pager_add_file(
3710 MACH_PORT_FACE backing_store,
3711 vnode_ptr_t vp,
1c79356b 3712 int record_size,
91447636 3713 vm_size_t size)
1c79356b
A
3714{
3715 backing_store_t bs;
3716 paging_segment_t ps;
3717 int i;
91447636 3718 unsigned int j;
1c79356b 3719 int error;
1c79356b
A
3720
3721 if ((bs = backing_store_lookup(backing_store))
3722 == BACKING_STORE_NULL)
3723 return KERN_INVALID_ARGUMENT;
3724
3725 PSL_LOCK();
3726 for (i = 0; i <= paging_segment_max; i++) {
3727 ps = paging_segments[i];
3728 if (ps == PAGING_SEGMENT_NULL)
3729 continue;
3730 if (ps->ps_segtype != PS_FILE)
3731 continue;
3732
3733 /*
3734 * Check for overlap on same device.
3735 */
3736 if (ps->ps_vnode == (struct vnode *)vp) {
3737 PSL_UNLOCK();
3738 BS_UNLOCK(bs);
3739 return KERN_INVALID_ARGUMENT;
3740 }
3741 }
3742 PSL_UNLOCK();
3743
3744 /*
3745 * Set up the paging segment
3746 */
3747 ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
3748 if (ps == PAGING_SEGMENT_NULL) {
3749 BS_UNLOCK(bs);
3750 return KERN_RESOURCE_SHORTAGE;
3751 }
3752
3753 ps->ps_segtype = PS_FILE;
3754 ps->ps_vnode = (struct vnode *)vp;
3755 ps->ps_offset = 0;
3756 ps->ps_record_shift = local_log2(vm_page_size / record_size);
3757 ps->ps_recnum = size;
3758 ps->ps_pgnum = size >> ps->ps_record_shift;
3759
3760 ps->ps_pgcount = ps->ps_pgnum;
3761 ps->ps_clshift = local_log2(bs->bs_clsize);
3762 ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
3763 ps->ps_hint = 0;
3764
3765 PS_LOCK_INIT(ps);
3766 ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
3767 if (!ps->ps_bmap) {
91447636 3768 kfree(ps, sizeof *ps);
1c79356b
A
3769 BS_UNLOCK(bs);
3770 return KERN_RESOURCE_SHORTAGE;
3771 }
91447636
A
3772 for (j = 0; j < ps->ps_ncls; j++) {
3773 clrbit(ps->ps_bmap, j);
1c79356b
A
3774 }
3775
3776 ps->ps_going_away = FALSE;
3777 ps->ps_bs = bs;
3778
3779 if ((error = ps_enter(ps)) != 0) {
91447636
A
3780 kfree(ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
3781 kfree(ps, sizeof *ps);
1c79356b
A
3782 BS_UNLOCK(bs);
3783 return KERN_RESOURCE_SHORTAGE;
3784 }
3785
3786 bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
3787 bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
3788 PSL_LOCK();
3789 dp_pages_free += ps->ps_pgcount;
3790 PSL_UNLOCK();
3791
3792 BS_UNLOCK(bs);
3793
3794 bs_more_space(ps->ps_clcount);
3795
91447636
A
3796 DP_DEBUG(DEBUG_BS_INTERNAL,
3797 ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3798 device, offset, size, record_size,
3799 ps->ps_record_shift, ps->ps_pgnum));
1c79356b
A
3800
3801 return KERN_SUCCESS;
3802}
3803
3804
3805
1c79356b
A
3806kern_return_t
3807ps_read_file(
3808 paging_segment_t ps,
3809 upl_t upl,
91447636 3810 upl_offset_t upl_offset,
1c79356b 3811 vm_offset_t offset,
91447636 3812 upl_size_t size,
1c79356b
A
3813 unsigned int *residualp,
3814 int flags)
3815{
3816 vm_object_offset_t f_offset;
3817 int error = 0;
3818 int result;
1c79356b 3819
91447636 3820 assert(dp_encryption_inited);
1c79356b 3821
55e303ae 3822 clustered_reads[atop_32(size)]++;
1c79356b
A
3823
3824 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3825
2d21ac55
A
3826 /*
3827 * for transfer case we need to pass uploffset and flags
3828 */
3829 error = vnode_pagein(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL);
1c79356b
A
3830
3831 /* The vnode_pagein semantic is somewhat at odds with the existing */
3832 /* device_read semantic. Partial reads are not experienced at this */
3833 /* level. It is up to the bit map code and cluster read code to */
3834 /* check that requested data locations are actually backed, and the */
3835 /* pagein code to either read all of the requested data or return an */
3836 /* error. */
3837
3838 if (error)
3839 result = KERN_FAILURE;
3840 else {
3841 *residualp = 0;
3842 result = KERN_SUCCESS;
3843 }
3844 return result;
1c79356b
A
3845}
3846
3847kern_return_t
3848ps_write_file(
3849 paging_segment_t ps,
3850 upl_t upl,
91447636 3851 upl_offset_t upl_offset,
1c79356b
A
3852 vm_offset_t offset,
3853 unsigned int size,
3854 int flags)
3855{
3856 vm_object_offset_t f_offset;
3857 kern_return_t result;
1c79356b 3858
91447636 3859 assert(dp_encryption_inited);
1c79356b 3860
55e303ae 3861 clustered_writes[atop_32(size)]++;
1c79356b
A
3862 f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3863
91447636
A
3864 if (flags & UPL_PAGING_ENCRYPTED) {
3865 /*
3866 * ENCRYPTED SWAP:
3867 * encrypt all the pages that we're going
3868 * to pageout.
3869 */
3870 upl_encrypt(upl, upl_offset, size);
3871 }
2d21ac55 3872 if (vnode_pageout(ps->ps_vnode, upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
1c79356b
A
3873 result = KERN_FAILURE;
3874 else
3875 result = KERN_SUCCESS;
3876
3877 return result;
3878}
3879
3880kern_return_t
91447636 3881default_pager_triggers( __unused MACH_PORT_FACE default_pager,
1c79356b
A
3882 int hi_wat,
3883 int lo_wat,
3884 int flags,
3885 MACH_PORT_FACE trigger_port)
3886{
0b4e3aa0
A
3887 MACH_PORT_FACE release;
3888 kern_return_t kr;
1c79356b 3889
0b4e3aa0 3890 PSL_LOCK();
91447636
A
3891 if (flags == SWAP_ENCRYPT_ON) {
3892 /* ENCRYPTED SWAP: turn encryption on */
3893 release = trigger_port;
3894 if (!dp_encryption_inited) {
3895 dp_encryption_inited = TRUE;
3896 dp_encryption = TRUE;
3897 kr = KERN_SUCCESS;
3898 } else {
3899 kr = KERN_FAILURE;
3900 }
3901 } else if (flags == SWAP_ENCRYPT_OFF) {
3902 /* ENCRYPTED SWAP: turn encryption off */
3903 release = trigger_port;
3904 if (!dp_encryption_inited) {
3905 dp_encryption_inited = TRUE;
3906 dp_encryption = FALSE;
3907 kr = KERN_SUCCESS;
3908 } else {
3909 kr = KERN_FAILURE;
3910 }
3911 } else if (flags == HI_WAT_ALERT) {
0b4e3aa0 3912 release = min_pages_trigger_port;
1c79356b
A
3913 min_pages_trigger_port = trigger_port;
3914 minimum_pages_remaining = hi_wat/vm_page_size;
3915 bs_low = FALSE;
0b4e3aa0
A
3916 kr = KERN_SUCCESS;
3917 } else if (flags == LO_WAT_ALERT) {
3918 release = max_pages_trigger_port;
1c79356b
A
3919 max_pages_trigger_port = trigger_port;
3920 maximum_pages_free = lo_wat/vm_page_size;
0b4e3aa0
A
3921 kr = KERN_SUCCESS;
3922 } else {
3923 release = trigger_port;
3924 kr = KERN_INVALID_ARGUMENT;
1c79356b 3925 }
0b4e3aa0
A
3926 PSL_UNLOCK();
3927
3928 if (IP_VALID(release))
3929 ipc_port_release_send(release);
3930
3931 return kr;
1c79356b 3932}
55e303ae
A
3933
3934/*
3935 * Monitor the amount of available backing store vs. the amount of
3936 * required backing store, notify a listener (if present) when
3937 * backing store may safely be removed.
3938 *
3939 * We attempt to avoid the situation where backing store is
3940 * discarded en masse, as this can lead to thrashing as the
3941 * backing store is compacted.
3942 */
3943
3944#define PF_INTERVAL 3 /* time between free level checks */
3945#define PF_LATENCY 10 /* number of intervals before release */
3946
3947static int dp_pages_free_low_count = 0;
91447636 3948thread_call_t default_pager_backing_store_monitor_callout;
55e303ae
A
3949
3950void
91447636
A
3951default_pager_backing_store_monitor(__unused thread_call_param_t p1,
3952 __unused thread_call_param_t p2)
55e303ae 3953{
91447636 3954// unsigned long long average;
55e303ae
A
3955 ipc_port_t trigger;
3956 uint64_t deadline;
3957
3958 /*
3959 * We determine whether it will be safe to release some
3960 * backing store by watching the free page level. If
3961 * it remains below the maximum_pages_free threshold for
3962 * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3963 * then we deem it safe.
3964 *
3965 * Note that this establishes a maximum rate at which backing
3966 * store will be released, as each notification (currently)
3967 * only results in a single backing store object being
3968 * released.
3969 */
3970 if (dp_pages_free > maximum_pages_free) {
3971 dp_pages_free_low_count++;
3972 } else {
3973 dp_pages_free_low_count = 0;
3974 }
3975
3976 /* decide whether to send notification */
3977 trigger = IP_NULL;
3978 if (max_pages_trigger_port &&
3979 (backing_store_release_trigger_disable == 0) &&
3980 (dp_pages_free_low_count > PF_LATENCY)) {
3981 trigger = max_pages_trigger_port;
3982 max_pages_trigger_port = NULL;
3983 }
3984
3985 /* send notification */
3986 if (trigger != IP_NULL) {
3987 VSL_LOCK();
3988 if(backing_store_release_trigger_disable != 0) {
3989 assert_wait((event_t)
3990 &backing_store_release_trigger_disable,
3991 THREAD_UNINT);
3992 VSL_UNLOCK();
3993 thread_block(THREAD_CONTINUE_NULL);
3994 } else {
3995 VSL_UNLOCK();
3996 }
3997 default_pager_space_alert(trigger, LO_WAT_ALERT);
3998 ipc_port_release_send(trigger);
3999 dp_pages_free_low_count = 0;
4000 }
4001
4002 clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
91447636 4003 thread_call_enter_delayed(default_pager_backing_store_monitor_callout, deadline);
55e303ae 4004}