]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-792.12.6.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b 3 *
8ad349bb 4 * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
1c79356b 5 *
8ad349bb
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the
10 * License may not be used to create, or enable the creation or
11 * redistribution of, unlawful or unlicensed copies of an Apple operating
12 * system, or to circumvent, violate, or enable the circumvention or
13 * violation of, any terms of an Apple operating system software license
14 * agreement.
15 *
16 * Please obtain a copy of the License at
17 * http://www.opensource.apple.com/apsl/ and read it before using this
18 * file.
19 *
20 * The Original Code and all software distributed under the License are
21 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
22 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
23 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
25 * Please see the License for the specific language governing rights and
26 * limitations under the License.
27 *
28 * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
1c79356b
A
29 */
30/*
31 * @OSF_COPYRIGHT@
32 */
33/*
34 * Mach Operating System
35 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
36 * All Rights Reserved.
37 *
38 * Permission to use, copy, modify and distribute this software and its
39 * documentation is hereby granted, provided that both the copyright
40 * notice and this permission notice appear in all copies of the
41 * software, derivative works or modified versions, and any portions
42 * thereof, and that both notices appear in supporting documentation.
43 *
44 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
45 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
46 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
47 *
48 * Carnegie Mellon requests users of this software to return to
49 *
50 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
51 * School of Computer Science
52 * Carnegie Mellon University
53 * Pittsburgh PA 15213-3890
54 *
55 * any improvements or extensions that they make and grant Carnegie Mellon
56 * the rights to redistribute these changes.
57 */
58
59/*
60 * Default Pager.
61 * Memory Object Management.
62 */
63
64#include "default_pager_internal.h"
91447636
A
65#include <default_pager/default_pager_object_server.h>
66#include <mach/memory_object_default_server.h>
67#include <mach/memory_object_control.h>
0b4e3aa0 68#include <mach/memory_object_types.h>
1c79356b 69#include <mach/memory_object_server.h>
91447636
A
70#include <mach/upl.h>
71#include <mach/vm_map.h>
0b4e3aa0
A
72#include <vm/memory_object.h>
73#include <vm/vm_pageout.h>
91447636
A
74#include <vm/vm_map.h>
75#include <vm/vm_protos.h>
1c79356b 76
91447636
A
77/* forward declaration */
78vstruct_t vs_object_create(vm_size_t size);
1c79356b
A
79
80/*
81 * List of all vstructs. A specific vstruct is
82 * found directly via its port, this list is
83 * only used for monitoring purposes by the
84 * default_pager_object* calls and by ps_delete
85 * when abstract memory objects must be scanned
86 * to remove any live storage on a segment which
87 * is to be removed.
88 */
89struct vstruct_list_head vstruct_list;
90
0b4e3aa0 91__private_extern__ void
1c79356b
A
92vstruct_list_insert(
93 vstruct_t vs)
94{
95 VSL_LOCK();
96 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
97 vstruct_list.vsl_count++;
98 VSL_UNLOCK();
99}
100
1c79356b 101
0b4e3aa0 102__private_extern__ void
1c79356b
A
103vstruct_list_delete(
104 vstruct_t vs)
105{
106 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
107 vstruct_list.vsl_count--;
108}
109
110/*
111 * We use the sequence numbers on requests to regulate
112 * our parallelism. In general, we allow multiple reads and writes
113 * to proceed in parallel, with the exception that reads must
114 * wait for previous writes to finish. (Because the kernel might
115 * generate a data-request for a page on the heels of a data-write
116 * for the same page, and we must avoid returning stale data.)
117 * terminate requests wait for proceeding reads and writes to finish.
118 */
119
0b4e3aa0
A
120static unsigned int default_pager_total = 0; /* debugging */
121static unsigned int default_pager_wait_seqno = 0; /* debugging */
122static unsigned int default_pager_wait_read = 0; /* debugging */
123static unsigned int default_pager_wait_write = 0; /* debugging */
1c79356b 124
0b4e3aa0 125__private_extern__ void
1c79356b
A
126vs_async_wait(
127 vstruct_t vs)
128{
1c79356b
A
129
130 ASSERT(vs->vs_async_pending >= 0);
131 while (vs->vs_async_pending > 0) {
132 vs->vs_waiting_async = TRUE;
0b4e3aa0 133 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 134 VS_UNLOCK(vs);
9bccf70c 135 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
136 VS_LOCK(vs);
137 }
138 ASSERT(vs->vs_async_pending == 0);
139}
140
1c79356b 141
0b4e3aa0 142#if PARALLEL
1c79356b
A
143/*
144 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
145 *
146 * JMM - Sequence numbers guarantee ordering of requests generated
147 * by a single thread if the receiver is multithreaded and
148 * the interfaces are asynchronous (i.e. sender can generate
149 * more than one request before the first is received in the
150 * pager). Normally, IPC would generate these number in that
151 * case. But we are trying to avoid using IPC for the in-kernel
152 * scenario. Since these are actually invoked synchronously
153 * anyway (in-kernel), we can just fake the sequence number
154 * generation here (thus avoiding the dependence on IPC).
1c79356b 155 */
0b4e3aa0 156__private_extern__ void
1c79356b 157vs_lock(
0b4e3aa0 158 vstruct_t vs)
1c79356b 159{
0b4e3aa0
A
160 mach_port_seqno_t seqno;
161
1c79356b
A
162 default_pager_total++;
163 VS_LOCK(vs);
164
165 seqno = vs->vs_next_seqno++;
166
167 while (vs->vs_seqno != seqno) {
168 default_pager_wait_seqno++;
169 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 170 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 171 VS_UNLOCK(vs);
9bccf70c 172 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
173 VS_LOCK(vs);
174 }
175}
176
177/*
178 * Increments sequence number and unlocks pager.
179 */
0b4e3aa0 180__private_extern__ void
1c79356b
A
181vs_unlock(vstruct_t vs)
182{
1c79356b 183 vs->vs_seqno++;
0b4e3aa0
A
184 if (vs->vs_waiting_seqno) {
185 vs->vs_waiting_seqno = FALSE;
186 VS_UNLOCK(vs);
187 thread_wakeup(&vs->vs_seqno);
188 return;
189 }
1c79356b 190 VS_UNLOCK(vs);
1c79356b
A
191}
192
193/*
194 * Start a read - one more reader. Pager must be locked.
195 */
0b4e3aa0 196__private_extern__ void
1c79356b
A
197vs_start_read(
198 vstruct_t vs)
199{
200 vs->vs_readers++;
201}
202
203/*
204 * Wait for readers. Unlocks and relocks pager if wait needed.
205 */
0b4e3aa0 206__private_extern__ void
1c79356b
A
207vs_wait_for_readers(
208 vstruct_t vs)
209{
210 while (vs->vs_readers != 0) {
211 default_pager_wait_read++;
212 vs->vs_waiting_read = TRUE;
0b4e3aa0 213 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 214 VS_UNLOCK(vs);
9bccf70c 215 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
216 VS_LOCK(vs);
217 }
218}
219
220/*
221 * Finish a read. Pager is unlocked and returns unlocked.
222 */
0b4e3aa0 223__private_extern__ void
1c79356b
A
224vs_finish_read(
225 vstruct_t vs)
226{
227 VS_LOCK(vs);
0b4e3aa0 228 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
229 vs->vs_waiting_read = FALSE;
230 VS_UNLOCK(vs);
0b4e3aa0
A
231 thread_wakeup(&vs->vs_readers);
232 return;
233 }
234 VS_UNLOCK(vs);
1c79356b
A
235}
236
237/*
238 * Start a write - one more writer. Pager must be locked.
239 */
0b4e3aa0 240__private_extern__ void
1c79356b
A
241vs_start_write(
242 vstruct_t vs)
243{
244 vs->vs_writers++;
245}
246
247/*
248 * Wait for writers. Unlocks and relocks pager if wait needed.
249 */
0b4e3aa0 250__private_extern__ void
1c79356b
A
251vs_wait_for_writers(
252 vstruct_t vs)
253{
254 while (vs->vs_writers != 0) {
255 default_pager_wait_write++;
256 vs->vs_waiting_write = TRUE;
0b4e3aa0 257 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 258 VS_UNLOCK(vs);
9bccf70c 259 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
260 VS_LOCK(vs);
261 }
262 vs_async_wait(vs);
263}
264
265/* This is to be used for the transfer from segment code ONLY */
266/* The transfer code holds off vs destruction by keeping the */
267/* vs_async_wait count non-zero. It will not ocnflict with */
268/* other writers on an async basis because it only writes on */
269/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
270
271__private_extern__ void
1c79356b
A
272vs_wait_for_sync_writers(
273 vstruct_t vs)
274{
275 while (vs->vs_writers != 0) {
276 default_pager_wait_write++;
277 vs->vs_waiting_write = TRUE;
0b4e3aa0 278 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 279 VS_UNLOCK(vs);
9bccf70c 280 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
281 VS_LOCK(vs);
282 }
283}
284
285
286/*
287 * Finish a write. Pager is unlocked and returns unlocked.
288 */
0b4e3aa0 289__private_extern__ void
1c79356b
A
290vs_finish_write(
291 vstruct_t vs)
292{
293 VS_LOCK(vs);
0b4e3aa0 294 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
295 vs->vs_waiting_write = FALSE;
296 VS_UNLOCK(vs);
0b4e3aa0
A
297 thread_wakeup(&vs->vs_writers);
298 return;
1c79356b 299 }
0b4e3aa0 300 VS_UNLOCK(vs);
1c79356b 301}
1c79356b
A
302#endif /* PARALLEL */
303
1c79356b
A
304vstruct_t
305vs_object_create(
306 vm_size_t size)
307{
308 vstruct_t vs;
1c79356b
A
309
310 /*
311 * Allocate a vstruct. If there are any problems, then report them
312 * to the console.
313 */
314 vs = ps_vstruct_create(size);
315 if (vs == VSTRUCT_NULL) {
316 dprintf(("vs_object_create: unable to allocate %s\n",
317 "-- either run swapon command or reboot"));
318 return VSTRUCT_NULL;
319 }
320
321 return vs;
322}
323
0b4e3aa0 324#if 0
1c79356b
A
325void default_pager_add(vstruct_t, boolean_t); /* forward */
326
327void
328default_pager_add(
329 vstruct_t vs,
330 boolean_t internal)
331{
0b4e3aa0
A
332 memory_object_t mem_obj = vs->vs_mem_obj;
333 mach_port_t pset;
1c79356b 334 mach_port_mscount_t sync;
0b4e3aa0 335 mach_port_t previous;
1c79356b
A
336 kern_return_t kr;
337 static char here[] = "default_pager_add";
338
339 /*
340 * The port currently has a make-send count of zero,
341 * because either we just created the port or we just
342 * received the port in a memory_object_create request.
343 */
344
345 if (internal) {
346 /* possibly generate an immediate no-senders notification */
347 sync = 0;
348 pset = default_pager_internal_set;
349 } else {
350 /* delay notification till send right is created */
351 sync = 1;
352 pset = default_pager_external_set;
353 }
354
355 ipc_port_make_sonce(mem_obj);
356 ip_lock(mem_obj); /* unlocked in nsrequest below */
357 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
358}
359
0b4e3aa0 360#endif
1c79356b
A
361
362kern_return_t
363dp_memory_object_init(
0b4e3aa0
A
364 memory_object_t mem_obj,
365 memory_object_control_t control,
91447636 366 __unused vm_size_t pager_page_size)
1c79356b 367{
1c79356b 368 vstruct_t vs;
1c79356b
A
369
370 assert(pager_page_size == vm_page_size);
371
0b4e3aa0
A
372 memory_object_control_reference(control);
373
1c79356b 374 vs_lookup(mem_obj, vs);
0b4e3aa0 375 vs_lock(vs);
1c79356b 376
0b4e3aa0 377 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
378 Panic("bad request");
379
0b4e3aa0 380 vs->vs_control = control;
1c79356b
A
381 vs_unlock(vs);
382
383 return KERN_SUCCESS;
384}
385
386kern_return_t
387dp_memory_object_synchronize(
0b4e3aa0
A
388 memory_object_t mem_obj,
389 memory_object_offset_t offset,
390 vm_size_t length,
91447636 391 __unused vm_sync_t flags)
1c79356b 392{
1c79356b 393 vstruct_t vs;
1c79356b
A
394
395 vs_lookup(mem_obj, vs);
0b4e3aa0 396 vs_lock(vs);
1c79356b
A
397 vs_unlock(vs);
398
0b4e3aa0 399 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
400
401 return KERN_SUCCESS;
402}
403
0b4e3aa0
A
404kern_return_t
405dp_memory_object_unmap(
91447636 406 __unused memory_object_t mem_obj)
0b4e3aa0
A
407{
408 panic("dp_memory_object_unmap");
409
410 return KERN_FAILURE;
411}
412
1c79356b
A
413kern_return_t
414dp_memory_object_terminate(
0b4e3aa0 415 memory_object_t mem_obj)
1c79356b 416{
0b4e3aa0 417 memory_object_control_t control;
1c79356b 418 vstruct_t vs;
1c79356b
A
419
420 /*
421 * control port is a receive right, not a send right.
422 */
423
424 vs_lookup(mem_obj, vs);
0b4e3aa0 425 vs_lock(vs);
1c79356b
A
426
427 /*
428 * Wait for read and write requests to terminate.
429 */
430
431 vs_wait_for_readers(vs);
432 vs_wait_for_writers(vs);
433
434 /*
435 * After memory_object_terminate both memory_object_init
436 * and a no-senders notification are possible, so we need
0b4e3aa0
A
437 * to clean up our reference to the memory_object_control
438 * to prepare for a new init.
1c79356b
A
439 */
440
0b4e3aa0
A
441 control = vs->vs_control;
442 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
443
444 /* a bit of special case ugliness here. Wakeup any waiting reads */
445 /* these data requests had to be removed from the seqno traffic */
446 /* based on a performance bottleneck with large memory objects */
447 /* the problem will right itself with the new component based */
448 /* synchronous interface. The new async will be able to return */
449 /* failure during its sync phase. In the mean time ... */
450
0b4e3aa0
A
451 thread_wakeup(&vs->vs_writers);
452 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
453
454 vs_unlock(vs);
455
456 /*
0b4e3aa0 457 * Now we deallocate our reference on the control.
1c79356b 458 */
0b4e3aa0 459 memory_object_control_deallocate(control);
1c79356b
A
460 return KERN_SUCCESS;
461}
462
463void
0b4e3aa0
A
464dp_memory_object_reference(
465 memory_object_t mem_obj)
466{
467 vstruct_t vs;
468
469 vs_lookup_safe(mem_obj, vs);
470 if (vs == VSTRUCT_NULL)
471 return;
472
473 VS_LOCK(vs);
474 assert(vs->vs_references > 0);
475 vs->vs_references++;
476 VS_UNLOCK(vs);
477}
478
0b4e3aa0
A
479void
480dp_memory_object_deallocate(
481 memory_object_t mem_obj)
1c79356b
A
482{
483 vstruct_t vs;
0b4e3aa0 484 mach_port_seqno_t seqno;
1c79356b
A
485
486 /*
0b4e3aa0 487 * Because we don't give out multiple first references
1c79356b 488 * for a memory object, there can't be a race
0b4e3aa0
A
489 * between getting a deallocate call and creating
490 * a new reference for the object.
1c79356b
A
491 */
492
0b4e3aa0
A
493 vs_lookup_safe(mem_obj, vs);
494 if (vs == VSTRUCT_NULL)
495 return;
496
497 VS_LOCK(vs);
498 if (--vs->vs_references > 0) {
499 VS_UNLOCK(vs);
500 return;
501 }
502
503 seqno = vs->vs_next_seqno++;
504 while (vs->vs_seqno != seqno) {
505 default_pager_wait_seqno++;
506 vs->vs_waiting_seqno = TRUE;
507 assert_wait(&vs->vs_seqno, THREAD_UNINT);
508 VS_UNLOCK(vs);
9bccf70c 509 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
510 VS_LOCK(vs);
511 }
512
1c79356b
A
513 vs_async_wait(vs); /* wait for pending async IO */
514
515 /* do not delete the vs structure until the referencing pointers */
516 /* in the vstruct list have been expunged */
517
518 /* get VSL_LOCK out of order by using TRY mechanism */
519 while(!VSL_LOCK_TRY()) {
520 VS_UNLOCK(vs);
521 VSL_LOCK();
522 VSL_UNLOCK();
523 VS_LOCK(vs);
524 vs_async_wait(vs); /* wait for pending async IO */
525 }
0b4e3aa0
A
526
527
1c79356b 528 /*
0b4e3aa0 529 * We shouldn't get a deallocation call
1c79356b
A
530 * when the kernel has the object cached.
531 */
0b4e3aa0 532 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
533 Panic("bad request");
534
535 /*
536 * Unlock the pager (though there should be no one
537 * waiting for it).
538 */
539 VS_UNLOCK(vs);
540
0b4e3aa0
A
541 /* Lock out paging segment removal for the duration of this */
542 /* call. We are vulnerable to losing a paging segment we rely */
543 /* on as soon as we remove ourselves from the VSL and unlock */
544
545 /* Keep our thread from blocking on attempt to trigger backing */
546 /* store release */
547 backing_store_release_trigger_disable += 1;
548
1c79356b
A
549 /*
550 * Remove the memory object port association, and then
551 * the destroy the port itself. We must remove the object
552 * from the port list before deallocating the pager,
553 * because of default_pager_objects.
554 */
555 vstruct_list_delete(vs);
0b4e3aa0
A
556 VSL_UNLOCK();
557
1c79356b
A
558 ps_vstruct_dealloc(vs);
559
0b4e3aa0
A
560 VSL_LOCK();
561 backing_store_release_trigger_disable -= 1;
562 if(backing_store_release_trigger_disable == 0) {
9bccf70c 563 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
564 }
565 VSL_UNLOCK();
566}
567
568kern_return_t
569dp_memory_object_data_request(
0b4e3aa0
A
570 memory_object_t mem_obj,
571 memory_object_offset_t offset,
1c79356b 572 vm_size_t length,
91447636 573 __unused vm_prot_t protection_required)
1c79356b 574{
1c79356b 575 vstruct_t vs;
1c79356b
A
576
577 GSTAT(global_stats.gs_pagein_calls++);
578
579
580 /* CDY at this moment vs_lookup panics when presented with the wrong */
581 /* port. As we are expanding this pager to support user interfaces */
582 /* this should be changed to return kern_failure */
583 vs_lookup(mem_obj, vs);
0b4e3aa0 584 vs_lock(vs);
1c79356b
A
585
586 /* We are going to relax the strict sequencing here for performance */
587 /* reasons. We can do this because we know that the read and */
588 /* write threads are different and we rely on synchronization */
589 /* of read and write requests at the cache memory_object level */
590 /* break out wait_for_writers, all of this goes away when */
591 /* we get real control of seqno with the new component interface */
0b4e3aa0 592
1c79356b
A
593 if (vs->vs_writers != 0) {
594 /* you can't hold on to the seqno and go */
595 /* to sleep like that */
596 vs_unlock(vs); /* bump internal count of seqno */
597 VS_LOCK(vs);
598 while (vs->vs_writers != 0) {
599 default_pager_wait_write++;
600 vs->vs_waiting_write = TRUE;
0b4e3aa0 601 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 602 VS_UNLOCK(vs);
9bccf70c 603 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
604 VS_LOCK(vs);
605 vs_async_wait(vs);
606 }
0b4e3aa0 607 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
608 VS_UNLOCK(vs);
609 return KERN_FAILURE;
610 }
611 vs_start_read(vs);
612 VS_UNLOCK(vs);
613 } else {
614 vs_start_read(vs);
615 vs_unlock(vs);
616 }
617
618 /*
619 * Request must be on a page boundary and a multiple of pages.
620 */
621 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
622 Panic("bad alignment");
623
624 pvs_cluster_read(vs, (vm_offset_t)offset, length);
625
626 vs_finish_read(vs);
627
628 return KERN_SUCCESS;
629}
630
631/*
632 * memory_object_data_initialize: check whether we already have each page, and
633 * write it if we do not. The implementation is far from optimized, and
634 * also assumes that the default_pager is single-threaded.
635 */
636/* It is questionable whether or not a pager should decide what is relevant */
637/* and what is not in data sent from the kernel. Data initialize has been */
638/* changed to copy back all data sent to it in preparation for its eventual */
639/* merge with data return. It is the kernel that should decide what pages */
640/* to write back. As of the writing of this note, this is indeed the case */
641/* the kernel writes back one page at a time through this interface */
642
643kern_return_t
644dp_memory_object_data_initialize(
0b4e3aa0
A
645 memory_object_t mem_obj,
646 memory_object_offset_t offset,
647 vm_size_t size)
1c79356b 648{
1c79356b 649 vstruct_t vs;
1c79356b 650
91447636
A
651 DP_DEBUG(DEBUG_MO_EXTERNAL,
652 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
653 (int)mem_obj, (int)offset, (int)size));
55e303ae 654 GSTAT(global_stats.gs_pages_init += atop_32(size));
1c79356b
A
655
656 vs_lookup(mem_obj, vs);
0b4e3aa0 657 vs_lock(vs);
1c79356b
A
658 vs_start_write(vs);
659 vs_unlock(vs);
660
661 /*
662 * Write the data via clustered writes. vs_cluster_write will
663 * loop if the address range specified crosses cluster
664 * boundaries.
665 */
0b4e3aa0 666 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
667
668 vs_finish_write(vs);
669
670 return KERN_SUCCESS;
671}
672
1c79356b
A
673kern_return_t
674dp_memory_object_data_unlock(
91447636
A
675 __unused memory_object_t mem_obj,
676 __unused memory_object_offset_t offset,
677 __unused vm_size_t size,
678 __unused vm_prot_t desired_access)
1c79356b 679{
0b4e3aa0 680 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
681 return KERN_FAILURE;
682}
683
684
91447636 685/*ARGSUSED8*/
1c79356b
A
686kern_return_t
687dp_memory_object_data_return(
0b4e3aa0
A
688 memory_object_t mem_obj,
689 memory_object_offset_t offset,
91447636
A
690 vm_size_t size,
691 __unused memory_object_offset_t *resid_offset,
692 __unused int *io_error,
693 __unused boolean_t dirty,
694 __unused boolean_t kernel_copy,
695 __unused int upl_flags)
1c79356b 696{
1c79356b 697 vstruct_t vs;
1c79356b 698
91447636
A
699 DP_DEBUG(DEBUG_MO_EXTERNAL,
700 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
701 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
702 GSTAT(global_stats.gs_pageout_calls++);
703
704 /* This routine is called by the pageout thread. The pageout thread */
705 /* cannot be blocked by read activities unless the read activities */
706 /* Therefore the grant of vs lock must be done on a try versus a */
707 /* blocking basis. The code below relies on the fact that the */
708 /* interface is synchronous. Should this interface be again async */
709 /* for some type of pager in the future the pages will have to be */
710 /* returned through a separate, asynchronous path. */
711
712 vs_lookup(mem_obj, vs);
713
714 default_pager_total++;
715 if(!VS_TRY_LOCK(vs)) {
716 /* the call below will not be done by caller when we have */
717 /* a synchronous interface */
718 /* return KERN_LOCK_OWNED; */
719 upl_t upl;
8ad349bb 720 int page_list_count = 0;
0b4e3aa0
A
721 memory_object_super_upl_request(vs->vs_control,
722 (memory_object_offset_t)offset,
723 size, size,
724 &upl, NULL, &page_list_count,
725 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 726 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
727 upl_abort(upl,0);
728 upl_deallocate(upl);
1c79356b
A
729 return KERN_SUCCESS;
730 }
731
d12e1678
A
732 if ((vs->vs_seqno != vs->vs_next_seqno++)
733 || (vs->vs_readers)
734 || (vs->vs_xfer_pending)) {
8ad349bb
A
735 upl_t upl;
736 int page_list_count = 0;
0b4e3aa0 737
1c79356b
A
738 vs->vs_next_seqno--;
739 VS_UNLOCK(vs);
0b4e3aa0 740
1c79356b
A
741 /* the call below will not be done by caller when we have */
742 /* a synchronous interface */
743 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
744 memory_object_super_upl_request(vs->vs_control,
745 (memory_object_offset_t)offset,
746 size, size,
747 &upl, NULL, &page_list_count,
1c79356b
A
748 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
749 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
750 upl_abort(upl,0);
751 upl_deallocate(upl);
1c79356b
A
752 return KERN_SUCCESS;
753 }
754
0b4e3aa0 755 if ((size % vm_page_size) != 0)
1c79356b
A
756 Panic("bad alignment");
757
758 vs_start_write(vs);
759
760
761 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 762 vs_unlock(vs);
1c79356b
A
763
764 /*
765 * Write the data via clustered writes. vs_cluster_write will
766 * loop if the address range specified crosses cluster
767 * boundaries.
768 */
0b4e3aa0 769 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
770
771 vs_finish_write(vs);
772
773 /* temporary, need a finer lock based on cluster */
774
775 VS_LOCK(vs);
776 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
777 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
778 vs->vs_waiting_async = FALSE;
1c79356b 779 VS_UNLOCK(vs);
0b4e3aa0 780 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
781 } else {
782 VS_UNLOCK(vs);
783 }
784
785
786 return KERN_SUCCESS;
787}
788
0b4e3aa0
A
789/*
790 * Routine: default_pager_memory_object_create
791 * Purpose:
792 * Handle requests for memory objects from the
793 * kernel.
794 * Notes:
795 * Because we only give out the default memory
796 * manager port to the kernel, we don't have to
797 * be so paranoid about the contents.
798 */
1c79356b 799kern_return_t
0b4e3aa0 800default_pager_memory_object_create(
91447636 801 __unused memory_object_default_t dmm,
0b4e3aa0
A
802 vm_size_t new_size,
803 memory_object_t *new_mem_obj)
1c79356b 804{
0b4e3aa0 805 vstruct_t vs;
1c79356b 806
0b4e3aa0
A
807 assert(dmm == default_pager_object);
808
809 vs = vs_object_create(new_size);
810 if (vs == VSTRUCT_NULL)
811 return KERN_RESOURCE_SHORTAGE;
812
813 vs->vs_next_seqno = 0;
814
815 /*
816 * Set up associations between this memory object
817 * and this default_pager structure
818 */
819
8ad349bb 820 vs->vs_mem_obj = ISVS;
0b4e3aa0
A
821 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
822
823 /*
824 * After this, other threads might receive requests
825 * for this memory object or find it in the port list.
826 */
827
828 vstruct_list_insert(vs);
829 *new_mem_obj = vs_to_mem_obj(vs);
830 return KERN_SUCCESS;
1c79356b
A
831}
832
833/*
834 * Create an external object.
835 */
836kern_return_t
837default_pager_object_create(
91447636 838 default_pager_t default_pager,
0b4e3aa0
A
839 vm_size_t size,
840 memory_object_t *mem_objp)
1c79356b
A
841{
842 vstruct_t vs;
1c79356b 843
91447636 844 if (default_pager != default_pager_object)
1c79356b
A
845 return KERN_INVALID_ARGUMENT;
846
847 vs = vs_object_create(size);
0b4e3aa0
A
848 if (vs == VSTRUCT_NULL)
849 return KERN_RESOURCE_SHORTAGE;
1c79356b 850
1c79356b 851 /*
0b4e3aa0 852 * Set up associations between the default pager
1c79356b
A
853 * and this vstruct structure
854 */
8ad349bb 855 vs->vs_mem_obj = ISVS;
1c79356b 856 vstruct_list_insert(vs);
0b4e3aa0 857 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
858 return KERN_SUCCESS;
859}
860
861kern_return_t
862default_pager_objects(
91447636 863 default_pager_t default_pager,
1c79356b
A
864 default_pager_object_array_t *objectsp,
865 mach_msg_type_number_t *ocountp,
91447636 866 mach_port_array_t *portsp,
1c79356b
A
867 mach_msg_type_number_t *pcountp)
868{
869 vm_offset_t oaddr = 0; /* memory for objects */
870 vm_size_t osize = 0; /* current size */
871 default_pager_object_t * objects;
91447636 872 unsigned int opotential = 0;
1c79356b 873
91447636 874 vm_map_copy_t pcopy = 0; /* copy handle for pagers */
1c79356b 875 vm_size_t psize = 0; /* current size */
0b4e3aa0 876 memory_object_t * pagers;
91447636 877 unsigned int ppotential = 0;
1c79356b
A
878
879 unsigned int actual;
880 unsigned int num_objects;
881 kern_return_t kr;
882 vstruct_t entry;
1c79356b 883
91447636
A
884 if (default_pager != default_pager_object)
885 return KERN_INVALID_ARGUMENT;
1c79356b
A
886
887 /*
888 * We will send no more than this many
889 */
890 actual = vstruct_list.vsl_count;
1c79356b 891
91447636
A
892 /*
893 * Out out-of-line port arrays are simply kalloc'ed.
894 */
895 psize = round_page(actual * sizeof * pagers);
896 ppotential = psize / sizeof * pagers;
897 pagers = (memory_object_t *)kalloc(psize);
898 if (0 == pagers)
899 return KERN_RESOURCE_SHORTAGE;
900
901 /*
902 * returned out of line data must be allocated out
903 * the ipc_kernel_map, wired down, filled in, and
904 * then "copied in" as if it had been sent by a
905 * user process.
906 */
907 osize = round_page(actual * sizeof * objects);
908 opotential = osize / sizeof * objects;
909 kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
910 if (KERN_SUCCESS != kr) {
911 kfree(pagers, psize);
912 return KERN_RESOURCE_SHORTAGE;
1c79356b 913 }
91447636 914 objects = (default_pager_object_t *)oaddr;
1c79356b 915
1c79356b
A
916
917 /*
918 * Now scan the list.
919 */
920
921 VSL_LOCK();
922
923 num_objects = 0;
924 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
925
91447636
A
926 memory_object_t pager;
927 vm_size_t size;
1c79356b
A
928
929 if ((num_objects >= opotential) ||
930 (num_objects >= ppotential)) {
931
932 /*
933 * This should be rare. In any case,
934 * we will only miss recent objects,
935 * because they are added at the end.
936 */
937 break;
938 }
939
940 /*
941 * Avoid interfering with normal operations
942 */
943 if (!VS_MAP_TRY_LOCK(entry))
944 goto not_this_one;
945 size = ps_vstruct_allocated_size(entry);
946 VS_MAP_UNLOCK(entry);
947
948 VS_LOCK(entry);
949
1c79356b 950 /*
0b4e3aa0
A
951 * We need a reference for our caller. Adding this
952 * reference through the linked list could race with
953 * destruction of the object. If we find the object
954 * has no references, just give up on it.
1c79356b 955 */
0b4e3aa0
A
956 VS_LOCK(entry);
957 if (entry->vs_references == 0) {
1c79356b 958 VS_UNLOCK(entry);
0b4e3aa0 959 goto not_this_one;
1c79356b 960 }
91447636
A
961 pager = vs_to_mem_obj(entry);
962 dp_memory_object_reference(pager);
1c79356b
A
963 VS_UNLOCK(entry);
964
965 /* the arrays are wired, so no deadlock worries */
966
967 objects[num_objects].dpo_object = (vm_offset_t) entry;
968 objects[num_objects].dpo_size = size;
0b4e3aa0 969 pagers [num_objects++] = pager;
1c79356b
A
970 continue;
971
972 not_this_one:
973 /*
974 * Do not return garbage
975 */
976 objects[num_objects].dpo_object = (vm_offset_t) 0;
977 objects[num_objects].dpo_size = 0;
0b4e3aa0 978 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
979
980 }
981
982 VSL_UNLOCK();
983
91447636
A
984 /* clear out any excess allocation */
985 while (num_objects < opotential) {
986 objects[--opotential].dpo_object = (vm_offset_t) 0;
987 objects[opotential].dpo_size = 0;
1c79356b 988 }
91447636
A
989 while (num_objects < ppotential) {
990 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1c79356b
A
991 }
992
91447636
A
993 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
994 vm_map_round_page(oaddr + osize), FALSE);
995 assert(KERN_SUCCESS == kr);
996 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
997 (vm_map_size_t)osize, TRUE, &pcopy);
998 assert(KERN_SUCCESS == kr);
1c79356b 999
91447636
A
1000 *objectsp = (default_pager_object_array_t)objects;
1001 *ocountp = num_objects;
1002 *portsp = (mach_port_array_t)pcopy;
1003 *pcountp = num_objects;
1c79356b 1004
91447636 1005 return KERN_SUCCESS;
1c79356b
A
1006}
1007
1008kern_return_t
1009default_pager_object_pages(
91447636
A
1010 default_pager_t default_pager,
1011 mach_port_t memory_object,
1c79356b
A
1012 default_pager_page_array_t *pagesp,
1013 mach_msg_type_number_t *countp)
1014{
91447636 1015 vm_offset_t addr = 0; /* memory for page offsets */
1c79356b 1016 vm_size_t size = 0; /* current memory size */
91447636
A
1017 vm_map_copy_t copy;
1018 default_pager_page_t * pages = 0;
1019 unsigned int potential;
1020 unsigned int actual;
1c79356b 1021 kern_return_t kr;
91447636 1022 memory_object_t object;
1c79356b 1023
91447636 1024 if (default_pager != default_pager_object)
1c79356b 1025 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1026
91447636 1027 object = (memory_object_t) memory_object;
1c79356b 1028
91447636 1029 potential = 0;
1c79356b
A
1030 for (;;) {
1031 vstruct_t entry;
1032
1033 VSL_LOCK();
1034 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1035 vs_links) {
1036 VS_LOCK(entry);
0b4e3aa0 1037 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1038 VSL_UNLOCK();
1039 goto found_object;
1040 }
1041 VS_UNLOCK(entry);
1042 }
1043 VSL_UNLOCK();
1044
1045 /* did not find the object */
91447636
A
1046 if (0 != addr)
1047 kmem_free(ipc_kernel_map, addr, size);
1c79356b 1048
1c79356b
A
1049 return KERN_INVALID_ARGUMENT;
1050
1051 found_object:
1052
1053 if (!VS_MAP_TRY_LOCK(entry)) {
1054 /* oh well bad luck */
9bccf70c 1055 int wresult;
1c79356b
A
1056
1057 VS_UNLOCK(entry);
1058
91447636 1059 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
9bccf70c
A
1060 wresult = thread_block(THREAD_CONTINUE_NULL);
1061 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1062 continue;
1063 }
1064
1065 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1066 VS_MAP_UNLOCK(entry);
1067 VS_UNLOCK(entry);
1068
1069 if (actual <= potential)
1070 break;
1071
1072 /* allocate more memory */
91447636
A
1073 if (0 != addr)
1074 kmem_free(ipc_kernel_map, addr, size);
1075
1076 size = round_page(actual * sizeof * pages);
1077 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1078 if (KERN_SUCCESS != kr)
1079 return KERN_RESOURCE_SHORTAGE;
1c79356b 1080
1c79356b
A
1081 pages = (default_pager_page_t *)addr;
1082 potential = size / sizeof * pages;
1083 }
1084
1085 /*
91447636 1086 * Clear unused memory.
1c79356b 1087 */
91447636
A
1088 while (actual < potential)
1089 pages[--potential].dpp_offset = 0;
1090
1091 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1092 vm_map_round_page(addr + size), FALSE);
1093 assert(KERN_SUCCESS == kr);
1094 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1095 (vm_map_size_t)size, TRUE, &copy);
1096 assert(KERN_SUCCESS == kr);
1097
1098
1099 *pagesp = (default_pager_page_array_t)copy;
1100 *countp = actual;
1c79356b
A
1101 return KERN_SUCCESS;
1102}