]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-792.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b 1/*
91447636 2 * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
1c79356b
A
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
43866e37 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51/*
52 * Default Pager.
53 * Memory Object Management.
54 */
55
56#include "default_pager_internal.h"
91447636
A
57#include <default_pager/default_pager_object_server.h>
58#include <mach/memory_object_default_server.h>
59#include <mach/memory_object_control.h>
0b4e3aa0 60#include <mach/memory_object_types.h>
1c79356b 61#include <mach/memory_object_server.h>
91447636
A
62#include <mach/upl.h>
63#include <mach/vm_map.h>
0b4e3aa0
A
64#include <vm/memory_object.h>
65#include <vm/vm_pageout.h>
91447636
A
66#include <vm/vm_map.h>
67#include <vm/vm_protos.h>
1c79356b 68
91447636
A
69/* forward declaration */
70vstruct_t vs_object_create(vm_size_t size);
1c79356b
A
71
72/*
73 * List of all vstructs. A specific vstruct is
74 * found directly via its port, this list is
75 * only used for monitoring purposes by the
76 * default_pager_object* calls and by ps_delete
77 * when abstract memory objects must be scanned
78 * to remove any live storage on a segment which
79 * is to be removed.
80 */
81struct vstruct_list_head vstruct_list;
82
0b4e3aa0 83__private_extern__ void
1c79356b
A
84vstruct_list_insert(
85 vstruct_t vs)
86{
87 VSL_LOCK();
88 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
89 vstruct_list.vsl_count++;
90 VSL_UNLOCK();
91}
92
1c79356b 93
0b4e3aa0 94__private_extern__ void
1c79356b
A
95vstruct_list_delete(
96 vstruct_t vs)
97{
98 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
99 vstruct_list.vsl_count--;
100}
101
102/*
103 * We use the sequence numbers on requests to regulate
104 * our parallelism. In general, we allow multiple reads and writes
105 * to proceed in parallel, with the exception that reads must
106 * wait for previous writes to finish. (Because the kernel might
107 * generate a data-request for a page on the heels of a data-write
108 * for the same page, and we must avoid returning stale data.)
109 * terminate requests wait for proceeding reads and writes to finish.
110 */
111
0b4e3aa0
A
112static unsigned int default_pager_total = 0; /* debugging */
113static unsigned int default_pager_wait_seqno = 0; /* debugging */
114static unsigned int default_pager_wait_read = 0; /* debugging */
115static unsigned int default_pager_wait_write = 0; /* debugging */
1c79356b 116
0b4e3aa0 117__private_extern__ void
1c79356b
A
118vs_async_wait(
119 vstruct_t vs)
120{
1c79356b
A
121
122 ASSERT(vs->vs_async_pending >= 0);
123 while (vs->vs_async_pending > 0) {
124 vs->vs_waiting_async = TRUE;
0b4e3aa0 125 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 126 VS_UNLOCK(vs);
9bccf70c 127 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
128 VS_LOCK(vs);
129 }
130 ASSERT(vs->vs_async_pending == 0);
131}
132
1c79356b 133
0b4e3aa0 134#if PARALLEL
1c79356b
A
135/*
136 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
137 *
138 * JMM - Sequence numbers guarantee ordering of requests generated
139 * by a single thread if the receiver is multithreaded and
140 * the interfaces are asynchronous (i.e. sender can generate
141 * more than one request before the first is received in the
142 * pager). Normally, IPC would generate these number in that
143 * case. But we are trying to avoid using IPC for the in-kernel
144 * scenario. Since these are actually invoked synchronously
145 * anyway (in-kernel), we can just fake the sequence number
146 * generation here (thus avoiding the dependence on IPC).
1c79356b 147 */
0b4e3aa0 148__private_extern__ void
1c79356b 149vs_lock(
0b4e3aa0 150 vstruct_t vs)
1c79356b 151{
0b4e3aa0
A
152 mach_port_seqno_t seqno;
153
1c79356b
A
154 default_pager_total++;
155 VS_LOCK(vs);
156
157 seqno = vs->vs_next_seqno++;
158
159 while (vs->vs_seqno != seqno) {
160 default_pager_wait_seqno++;
161 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 162 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 163 VS_UNLOCK(vs);
9bccf70c 164 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
165 VS_LOCK(vs);
166 }
167}
168
169/*
170 * Increments sequence number and unlocks pager.
171 */
0b4e3aa0 172__private_extern__ void
1c79356b
A
173vs_unlock(vstruct_t vs)
174{
1c79356b 175 vs->vs_seqno++;
0b4e3aa0
A
176 if (vs->vs_waiting_seqno) {
177 vs->vs_waiting_seqno = FALSE;
178 VS_UNLOCK(vs);
179 thread_wakeup(&vs->vs_seqno);
180 return;
181 }
1c79356b 182 VS_UNLOCK(vs);
1c79356b
A
183}
184
185/*
186 * Start a read - one more reader. Pager must be locked.
187 */
0b4e3aa0 188__private_extern__ void
1c79356b
A
189vs_start_read(
190 vstruct_t vs)
191{
192 vs->vs_readers++;
193}
194
195/*
196 * Wait for readers. Unlocks and relocks pager if wait needed.
197 */
0b4e3aa0 198__private_extern__ void
1c79356b
A
199vs_wait_for_readers(
200 vstruct_t vs)
201{
202 while (vs->vs_readers != 0) {
203 default_pager_wait_read++;
204 vs->vs_waiting_read = TRUE;
0b4e3aa0 205 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 206 VS_UNLOCK(vs);
9bccf70c 207 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
208 VS_LOCK(vs);
209 }
210}
211
212/*
213 * Finish a read. Pager is unlocked and returns unlocked.
214 */
0b4e3aa0 215__private_extern__ void
1c79356b
A
216vs_finish_read(
217 vstruct_t vs)
218{
219 VS_LOCK(vs);
0b4e3aa0 220 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
221 vs->vs_waiting_read = FALSE;
222 VS_UNLOCK(vs);
0b4e3aa0
A
223 thread_wakeup(&vs->vs_readers);
224 return;
225 }
226 VS_UNLOCK(vs);
1c79356b
A
227}
228
229/*
230 * Start a write - one more writer. Pager must be locked.
231 */
0b4e3aa0 232__private_extern__ void
1c79356b
A
233vs_start_write(
234 vstruct_t vs)
235{
236 vs->vs_writers++;
237}
238
239/*
240 * Wait for writers. Unlocks and relocks pager if wait needed.
241 */
0b4e3aa0 242__private_extern__ void
1c79356b
A
243vs_wait_for_writers(
244 vstruct_t vs)
245{
246 while (vs->vs_writers != 0) {
247 default_pager_wait_write++;
248 vs->vs_waiting_write = TRUE;
0b4e3aa0 249 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 250 VS_UNLOCK(vs);
9bccf70c 251 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
252 VS_LOCK(vs);
253 }
254 vs_async_wait(vs);
255}
256
257/* This is to be used for the transfer from segment code ONLY */
258/* The transfer code holds off vs destruction by keeping the */
259/* vs_async_wait count non-zero. It will not ocnflict with */
260/* other writers on an async basis because it only writes on */
261/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
262
263__private_extern__ void
1c79356b
A
264vs_wait_for_sync_writers(
265 vstruct_t vs)
266{
267 while (vs->vs_writers != 0) {
268 default_pager_wait_write++;
269 vs->vs_waiting_write = TRUE;
0b4e3aa0 270 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 271 VS_UNLOCK(vs);
9bccf70c 272 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
273 VS_LOCK(vs);
274 }
275}
276
277
278/*
279 * Finish a write. Pager is unlocked and returns unlocked.
280 */
0b4e3aa0 281__private_extern__ void
1c79356b
A
282vs_finish_write(
283 vstruct_t vs)
284{
285 VS_LOCK(vs);
0b4e3aa0 286 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
287 vs->vs_waiting_write = FALSE;
288 VS_UNLOCK(vs);
0b4e3aa0
A
289 thread_wakeup(&vs->vs_writers);
290 return;
1c79356b 291 }
0b4e3aa0 292 VS_UNLOCK(vs);
1c79356b 293}
1c79356b
A
294#endif /* PARALLEL */
295
1c79356b
A
296vstruct_t
297vs_object_create(
298 vm_size_t size)
299{
300 vstruct_t vs;
1c79356b
A
301
302 /*
303 * Allocate a vstruct. If there are any problems, then report them
304 * to the console.
305 */
306 vs = ps_vstruct_create(size);
307 if (vs == VSTRUCT_NULL) {
308 dprintf(("vs_object_create: unable to allocate %s\n",
309 "-- either run swapon command or reboot"));
310 return VSTRUCT_NULL;
311 }
312
313 return vs;
314}
315
0b4e3aa0 316#if 0
1c79356b
A
317void default_pager_add(vstruct_t, boolean_t); /* forward */
318
319void
320default_pager_add(
321 vstruct_t vs,
322 boolean_t internal)
323{
0b4e3aa0
A
324 memory_object_t mem_obj = vs->vs_mem_obj;
325 mach_port_t pset;
1c79356b 326 mach_port_mscount_t sync;
0b4e3aa0 327 mach_port_t previous;
1c79356b
A
328 kern_return_t kr;
329 static char here[] = "default_pager_add";
330
331 /*
332 * The port currently has a make-send count of zero,
333 * because either we just created the port or we just
334 * received the port in a memory_object_create request.
335 */
336
337 if (internal) {
338 /* possibly generate an immediate no-senders notification */
339 sync = 0;
340 pset = default_pager_internal_set;
341 } else {
342 /* delay notification till send right is created */
343 sync = 1;
344 pset = default_pager_external_set;
345 }
346
347 ipc_port_make_sonce(mem_obj);
348 ip_lock(mem_obj); /* unlocked in nsrequest below */
349 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
350}
351
0b4e3aa0 352#endif
1c79356b
A
353
354kern_return_t
355dp_memory_object_init(
0b4e3aa0
A
356 memory_object_t mem_obj,
357 memory_object_control_t control,
91447636 358 __unused vm_size_t pager_page_size)
1c79356b 359{
1c79356b 360 vstruct_t vs;
1c79356b
A
361
362 assert(pager_page_size == vm_page_size);
363
0b4e3aa0
A
364 memory_object_control_reference(control);
365
1c79356b 366 vs_lookup(mem_obj, vs);
0b4e3aa0 367 vs_lock(vs);
1c79356b 368
0b4e3aa0 369 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
370 Panic("bad request");
371
0b4e3aa0 372 vs->vs_control = control;
1c79356b
A
373 vs_unlock(vs);
374
375 return KERN_SUCCESS;
376}
377
378kern_return_t
379dp_memory_object_synchronize(
0b4e3aa0
A
380 memory_object_t mem_obj,
381 memory_object_offset_t offset,
382 vm_size_t length,
91447636 383 __unused vm_sync_t flags)
1c79356b 384{
1c79356b 385 vstruct_t vs;
1c79356b
A
386
387 vs_lookup(mem_obj, vs);
0b4e3aa0 388 vs_lock(vs);
1c79356b
A
389 vs_unlock(vs);
390
0b4e3aa0 391 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
392
393 return KERN_SUCCESS;
394}
395
0b4e3aa0
A
396kern_return_t
397dp_memory_object_unmap(
91447636 398 __unused memory_object_t mem_obj)
0b4e3aa0
A
399{
400 panic("dp_memory_object_unmap");
401
402 return KERN_FAILURE;
403}
404
1c79356b
A
405kern_return_t
406dp_memory_object_terminate(
0b4e3aa0 407 memory_object_t mem_obj)
1c79356b 408{
0b4e3aa0 409 memory_object_control_t control;
1c79356b 410 vstruct_t vs;
1c79356b
A
411
412 /*
413 * control port is a receive right, not a send right.
414 */
415
416 vs_lookup(mem_obj, vs);
0b4e3aa0 417 vs_lock(vs);
1c79356b
A
418
419 /*
420 * Wait for read and write requests to terminate.
421 */
422
423 vs_wait_for_readers(vs);
424 vs_wait_for_writers(vs);
425
426 /*
427 * After memory_object_terminate both memory_object_init
428 * and a no-senders notification are possible, so we need
0b4e3aa0
A
429 * to clean up our reference to the memory_object_control
430 * to prepare for a new init.
1c79356b
A
431 */
432
0b4e3aa0
A
433 control = vs->vs_control;
434 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
435
436 /* a bit of special case ugliness here. Wakeup any waiting reads */
437 /* these data requests had to be removed from the seqno traffic */
438 /* based on a performance bottleneck with large memory objects */
439 /* the problem will right itself with the new component based */
440 /* synchronous interface. The new async will be able to return */
441 /* failure during its sync phase. In the mean time ... */
442
0b4e3aa0
A
443 thread_wakeup(&vs->vs_writers);
444 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
445
446 vs_unlock(vs);
447
448 /*
0b4e3aa0 449 * Now we deallocate our reference on the control.
1c79356b 450 */
0b4e3aa0 451 memory_object_control_deallocate(control);
1c79356b
A
452 return KERN_SUCCESS;
453}
454
455void
0b4e3aa0
A
456dp_memory_object_reference(
457 memory_object_t mem_obj)
458{
459 vstruct_t vs;
460
461 vs_lookup_safe(mem_obj, vs);
462 if (vs == VSTRUCT_NULL)
463 return;
464
465 VS_LOCK(vs);
466 assert(vs->vs_references > 0);
467 vs->vs_references++;
468 VS_UNLOCK(vs);
469}
470
0b4e3aa0
A
471void
472dp_memory_object_deallocate(
473 memory_object_t mem_obj)
1c79356b
A
474{
475 vstruct_t vs;
0b4e3aa0 476 mach_port_seqno_t seqno;
1c79356b
A
477
478 /*
0b4e3aa0 479 * Because we don't give out multiple first references
1c79356b 480 * for a memory object, there can't be a race
0b4e3aa0
A
481 * between getting a deallocate call and creating
482 * a new reference for the object.
1c79356b
A
483 */
484
0b4e3aa0
A
485 vs_lookup_safe(mem_obj, vs);
486 if (vs == VSTRUCT_NULL)
487 return;
488
489 VS_LOCK(vs);
490 if (--vs->vs_references > 0) {
491 VS_UNLOCK(vs);
492 return;
493 }
494
495 seqno = vs->vs_next_seqno++;
496 while (vs->vs_seqno != seqno) {
497 default_pager_wait_seqno++;
498 vs->vs_waiting_seqno = TRUE;
499 assert_wait(&vs->vs_seqno, THREAD_UNINT);
500 VS_UNLOCK(vs);
9bccf70c 501 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
502 VS_LOCK(vs);
503 }
504
1c79356b
A
505 vs_async_wait(vs); /* wait for pending async IO */
506
507 /* do not delete the vs structure until the referencing pointers */
508 /* in the vstruct list have been expunged */
509
510 /* get VSL_LOCK out of order by using TRY mechanism */
511 while(!VSL_LOCK_TRY()) {
512 VS_UNLOCK(vs);
513 VSL_LOCK();
514 VSL_UNLOCK();
515 VS_LOCK(vs);
516 vs_async_wait(vs); /* wait for pending async IO */
517 }
0b4e3aa0
A
518
519
1c79356b 520 /*
0b4e3aa0 521 * We shouldn't get a deallocation call
1c79356b
A
522 * when the kernel has the object cached.
523 */
0b4e3aa0 524 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
525 Panic("bad request");
526
527 /*
528 * Unlock the pager (though there should be no one
529 * waiting for it).
530 */
531 VS_UNLOCK(vs);
532
0b4e3aa0
A
533 /* Lock out paging segment removal for the duration of this */
534 /* call. We are vulnerable to losing a paging segment we rely */
535 /* on as soon as we remove ourselves from the VSL and unlock */
536
537 /* Keep our thread from blocking on attempt to trigger backing */
538 /* store release */
539 backing_store_release_trigger_disable += 1;
540
1c79356b
A
541 /*
542 * Remove the memory object port association, and then
543 * the destroy the port itself. We must remove the object
544 * from the port list before deallocating the pager,
545 * because of default_pager_objects.
546 */
547 vstruct_list_delete(vs);
0b4e3aa0
A
548 VSL_UNLOCK();
549
1c79356b
A
550 ps_vstruct_dealloc(vs);
551
0b4e3aa0
A
552 VSL_LOCK();
553 backing_store_release_trigger_disable -= 1;
554 if(backing_store_release_trigger_disable == 0) {
9bccf70c 555 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
556 }
557 VSL_UNLOCK();
558}
559
560kern_return_t
561dp_memory_object_data_request(
0b4e3aa0
A
562 memory_object_t mem_obj,
563 memory_object_offset_t offset,
1c79356b 564 vm_size_t length,
91447636 565 __unused vm_prot_t protection_required)
1c79356b 566{
1c79356b 567 vstruct_t vs;
1c79356b
A
568
569 GSTAT(global_stats.gs_pagein_calls++);
570
571
572 /* CDY at this moment vs_lookup panics when presented with the wrong */
573 /* port. As we are expanding this pager to support user interfaces */
574 /* this should be changed to return kern_failure */
575 vs_lookup(mem_obj, vs);
0b4e3aa0 576 vs_lock(vs);
1c79356b
A
577
578 /* We are going to relax the strict sequencing here for performance */
579 /* reasons. We can do this because we know that the read and */
580 /* write threads are different and we rely on synchronization */
581 /* of read and write requests at the cache memory_object level */
582 /* break out wait_for_writers, all of this goes away when */
583 /* we get real control of seqno with the new component interface */
0b4e3aa0 584
1c79356b
A
585 if (vs->vs_writers != 0) {
586 /* you can't hold on to the seqno and go */
587 /* to sleep like that */
588 vs_unlock(vs); /* bump internal count of seqno */
589 VS_LOCK(vs);
590 while (vs->vs_writers != 0) {
591 default_pager_wait_write++;
592 vs->vs_waiting_write = TRUE;
0b4e3aa0 593 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 594 VS_UNLOCK(vs);
9bccf70c 595 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
596 VS_LOCK(vs);
597 vs_async_wait(vs);
598 }
0b4e3aa0 599 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
600 VS_UNLOCK(vs);
601 return KERN_FAILURE;
602 }
603 vs_start_read(vs);
604 VS_UNLOCK(vs);
605 } else {
606 vs_start_read(vs);
607 vs_unlock(vs);
608 }
609
610 /*
611 * Request must be on a page boundary and a multiple of pages.
612 */
613 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
614 Panic("bad alignment");
615
616 pvs_cluster_read(vs, (vm_offset_t)offset, length);
617
618 vs_finish_read(vs);
619
620 return KERN_SUCCESS;
621}
622
623/*
624 * memory_object_data_initialize: check whether we already have each page, and
625 * write it if we do not. The implementation is far from optimized, and
626 * also assumes that the default_pager is single-threaded.
627 */
628/* It is questionable whether or not a pager should decide what is relevant */
629/* and what is not in data sent from the kernel. Data initialize has been */
630/* changed to copy back all data sent to it in preparation for its eventual */
631/* merge with data return. It is the kernel that should decide what pages */
632/* to write back. As of the writing of this note, this is indeed the case */
633/* the kernel writes back one page at a time through this interface */
634
635kern_return_t
636dp_memory_object_data_initialize(
0b4e3aa0
A
637 memory_object_t mem_obj,
638 memory_object_offset_t offset,
639 vm_size_t size)
1c79356b 640{
1c79356b 641 vstruct_t vs;
1c79356b 642
91447636
A
643 DP_DEBUG(DEBUG_MO_EXTERNAL,
644 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
645 (int)mem_obj, (int)offset, (int)size));
55e303ae 646 GSTAT(global_stats.gs_pages_init += atop_32(size));
1c79356b
A
647
648 vs_lookup(mem_obj, vs);
0b4e3aa0 649 vs_lock(vs);
1c79356b
A
650 vs_start_write(vs);
651 vs_unlock(vs);
652
653 /*
654 * Write the data via clustered writes. vs_cluster_write will
655 * loop if the address range specified crosses cluster
656 * boundaries.
657 */
0b4e3aa0 658 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
659
660 vs_finish_write(vs);
661
662 return KERN_SUCCESS;
663}
664
1c79356b
A
665kern_return_t
666dp_memory_object_data_unlock(
91447636
A
667 __unused memory_object_t mem_obj,
668 __unused memory_object_offset_t offset,
669 __unused vm_size_t size,
670 __unused vm_prot_t desired_access)
1c79356b 671{
0b4e3aa0 672 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
673 return KERN_FAILURE;
674}
675
676
91447636 677/*ARGSUSED8*/
1c79356b
A
678kern_return_t
679dp_memory_object_data_return(
0b4e3aa0
A
680 memory_object_t mem_obj,
681 memory_object_offset_t offset,
91447636
A
682 vm_size_t size,
683 __unused memory_object_offset_t *resid_offset,
684 __unused int *io_error,
685 __unused boolean_t dirty,
686 __unused boolean_t kernel_copy,
687 __unused int upl_flags)
1c79356b 688{
1c79356b 689 vstruct_t vs;
1c79356b 690
91447636
A
691 DP_DEBUG(DEBUG_MO_EXTERNAL,
692 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
693 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
694 GSTAT(global_stats.gs_pageout_calls++);
695
696 /* This routine is called by the pageout thread. The pageout thread */
697 /* cannot be blocked by read activities unless the read activities */
698 /* Therefore the grant of vs lock must be done on a try versus a */
699 /* blocking basis. The code below relies on the fact that the */
700 /* interface is synchronous. Should this interface be again async */
701 /* for some type of pager in the future the pages will have to be */
702 /* returned through a separate, asynchronous path. */
703
704 vs_lookup(mem_obj, vs);
705
706 default_pager_total++;
707 if(!VS_TRY_LOCK(vs)) {
708 /* the call below will not be done by caller when we have */
709 /* a synchronous interface */
710 /* return KERN_LOCK_OWNED; */
711 upl_t upl;
0b4e3aa0
A
712 int page_list_count = 0;
713 memory_object_super_upl_request(vs->vs_control,
714 (memory_object_offset_t)offset,
715 size, size,
716 &upl, NULL, &page_list_count,
717 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 718 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
719 upl_abort(upl,0);
720 upl_deallocate(upl);
1c79356b
A
721 return KERN_SUCCESS;
722 }
723
d12e1678
A
724 if ((vs->vs_seqno != vs->vs_next_seqno++)
725 || (vs->vs_readers)
726 || (vs->vs_xfer_pending)) {
1c79356b 727 upl_t upl;
0b4e3aa0
A
728 int page_list_count = 0;
729
1c79356b
A
730 vs->vs_next_seqno--;
731 VS_UNLOCK(vs);
0b4e3aa0 732
1c79356b
A
733 /* the call below will not be done by caller when we have */
734 /* a synchronous interface */
735 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
736 memory_object_super_upl_request(vs->vs_control,
737 (memory_object_offset_t)offset,
738 size, size,
739 &upl, NULL, &page_list_count,
1c79356b
A
740 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
741 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
742 upl_abort(upl,0);
743 upl_deallocate(upl);
1c79356b
A
744 return KERN_SUCCESS;
745 }
746
0b4e3aa0 747 if ((size % vm_page_size) != 0)
1c79356b
A
748 Panic("bad alignment");
749
750 vs_start_write(vs);
751
752
753 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 754 vs_unlock(vs);
1c79356b
A
755
756 /*
757 * Write the data via clustered writes. vs_cluster_write will
758 * loop if the address range specified crosses cluster
759 * boundaries.
760 */
0b4e3aa0 761 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
762
763 vs_finish_write(vs);
764
765 /* temporary, need a finer lock based on cluster */
766
767 VS_LOCK(vs);
768 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
769 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
770 vs->vs_waiting_async = FALSE;
1c79356b 771 VS_UNLOCK(vs);
0b4e3aa0 772 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
773 } else {
774 VS_UNLOCK(vs);
775 }
776
777
778 return KERN_SUCCESS;
779}
780
0b4e3aa0
A
781/*
782 * Routine: default_pager_memory_object_create
783 * Purpose:
784 * Handle requests for memory objects from the
785 * kernel.
786 * Notes:
787 * Because we only give out the default memory
788 * manager port to the kernel, we don't have to
789 * be so paranoid about the contents.
790 */
1c79356b 791kern_return_t
0b4e3aa0 792default_pager_memory_object_create(
91447636 793 __unused memory_object_default_t dmm,
0b4e3aa0
A
794 vm_size_t new_size,
795 memory_object_t *new_mem_obj)
1c79356b 796{
0b4e3aa0 797 vstruct_t vs;
1c79356b 798
0b4e3aa0
A
799 assert(dmm == default_pager_object);
800
801 vs = vs_object_create(new_size);
802 if (vs == VSTRUCT_NULL)
803 return KERN_RESOURCE_SHORTAGE;
804
805 vs->vs_next_seqno = 0;
806
807 /*
808 * Set up associations between this memory object
809 * and this default_pager structure
810 */
811
812 vs->vs_mem_obj = ISVS;
813 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
814
815 /*
816 * After this, other threads might receive requests
817 * for this memory object or find it in the port list.
818 */
819
820 vstruct_list_insert(vs);
821 *new_mem_obj = vs_to_mem_obj(vs);
822 return KERN_SUCCESS;
1c79356b
A
823}
824
825/*
826 * Create an external object.
827 */
828kern_return_t
829default_pager_object_create(
91447636 830 default_pager_t default_pager,
0b4e3aa0
A
831 vm_size_t size,
832 memory_object_t *mem_objp)
1c79356b
A
833{
834 vstruct_t vs;
1c79356b 835
91447636 836 if (default_pager != default_pager_object)
1c79356b
A
837 return KERN_INVALID_ARGUMENT;
838
839 vs = vs_object_create(size);
0b4e3aa0
A
840 if (vs == VSTRUCT_NULL)
841 return KERN_RESOURCE_SHORTAGE;
1c79356b 842
1c79356b 843 /*
0b4e3aa0 844 * Set up associations between the default pager
1c79356b
A
845 * and this vstruct structure
846 */
0b4e3aa0 847 vs->vs_mem_obj = ISVS;
1c79356b 848 vstruct_list_insert(vs);
0b4e3aa0 849 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
850 return KERN_SUCCESS;
851}
852
853kern_return_t
854default_pager_objects(
91447636 855 default_pager_t default_pager,
1c79356b
A
856 default_pager_object_array_t *objectsp,
857 mach_msg_type_number_t *ocountp,
91447636 858 mach_port_array_t *portsp,
1c79356b
A
859 mach_msg_type_number_t *pcountp)
860{
861 vm_offset_t oaddr = 0; /* memory for objects */
862 vm_size_t osize = 0; /* current size */
863 default_pager_object_t * objects;
91447636 864 unsigned int opotential = 0;
1c79356b 865
91447636 866 vm_map_copy_t pcopy = 0; /* copy handle for pagers */
1c79356b 867 vm_size_t psize = 0; /* current size */
0b4e3aa0 868 memory_object_t * pagers;
91447636 869 unsigned int ppotential = 0;
1c79356b
A
870
871 unsigned int actual;
872 unsigned int num_objects;
873 kern_return_t kr;
874 vstruct_t entry;
1c79356b 875
91447636
A
876 if (default_pager != default_pager_object)
877 return KERN_INVALID_ARGUMENT;
1c79356b
A
878
879 /*
880 * We will send no more than this many
881 */
882 actual = vstruct_list.vsl_count;
1c79356b 883
91447636
A
884 /*
885 * Out out-of-line port arrays are simply kalloc'ed.
886 */
887 psize = round_page(actual * sizeof * pagers);
888 ppotential = psize / sizeof * pagers;
889 pagers = (memory_object_t *)kalloc(psize);
890 if (0 == pagers)
891 return KERN_RESOURCE_SHORTAGE;
892
893 /*
894 * returned out of line data must be allocated out
895 * the ipc_kernel_map, wired down, filled in, and
896 * then "copied in" as if it had been sent by a
897 * user process.
898 */
899 osize = round_page(actual * sizeof * objects);
900 opotential = osize / sizeof * objects;
901 kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
902 if (KERN_SUCCESS != kr) {
903 kfree(pagers, psize);
904 return KERN_RESOURCE_SHORTAGE;
1c79356b 905 }
91447636 906 objects = (default_pager_object_t *)oaddr;
1c79356b 907
1c79356b
A
908
909 /*
910 * Now scan the list.
911 */
912
913 VSL_LOCK();
914
915 num_objects = 0;
916 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
917
91447636
A
918 memory_object_t pager;
919 vm_size_t size;
1c79356b
A
920
921 if ((num_objects >= opotential) ||
922 (num_objects >= ppotential)) {
923
924 /*
925 * This should be rare. In any case,
926 * we will only miss recent objects,
927 * because they are added at the end.
928 */
929 break;
930 }
931
932 /*
933 * Avoid interfering with normal operations
934 */
935 if (!VS_MAP_TRY_LOCK(entry))
936 goto not_this_one;
937 size = ps_vstruct_allocated_size(entry);
938 VS_MAP_UNLOCK(entry);
939
940 VS_LOCK(entry);
941
1c79356b 942 /*
0b4e3aa0
A
943 * We need a reference for our caller. Adding this
944 * reference through the linked list could race with
945 * destruction of the object. If we find the object
946 * has no references, just give up on it.
1c79356b 947 */
0b4e3aa0
A
948 VS_LOCK(entry);
949 if (entry->vs_references == 0) {
1c79356b 950 VS_UNLOCK(entry);
0b4e3aa0 951 goto not_this_one;
1c79356b 952 }
91447636
A
953 pager = vs_to_mem_obj(entry);
954 dp_memory_object_reference(pager);
1c79356b
A
955 VS_UNLOCK(entry);
956
957 /* the arrays are wired, so no deadlock worries */
958
959 objects[num_objects].dpo_object = (vm_offset_t) entry;
960 objects[num_objects].dpo_size = size;
0b4e3aa0 961 pagers [num_objects++] = pager;
1c79356b
A
962 continue;
963
964 not_this_one:
965 /*
966 * Do not return garbage
967 */
968 objects[num_objects].dpo_object = (vm_offset_t) 0;
969 objects[num_objects].dpo_size = 0;
0b4e3aa0 970 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
971
972 }
973
974 VSL_UNLOCK();
975
91447636
A
976 /* clear out any excess allocation */
977 while (num_objects < opotential) {
978 objects[--opotential].dpo_object = (vm_offset_t) 0;
979 objects[opotential].dpo_size = 0;
1c79356b 980 }
91447636
A
981 while (num_objects < ppotential) {
982 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1c79356b
A
983 }
984
91447636
A
985 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
986 vm_map_round_page(oaddr + osize), FALSE);
987 assert(KERN_SUCCESS == kr);
988 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
989 (vm_map_size_t)osize, TRUE, &pcopy);
990 assert(KERN_SUCCESS == kr);
1c79356b 991
91447636
A
992 *objectsp = (default_pager_object_array_t)objects;
993 *ocountp = num_objects;
994 *portsp = (mach_port_array_t)pcopy;
995 *pcountp = num_objects;
1c79356b 996
91447636 997 return KERN_SUCCESS;
1c79356b
A
998}
999
1000kern_return_t
1001default_pager_object_pages(
91447636
A
1002 default_pager_t default_pager,
1003 mach_port_t memory_object,
1c79356b
A
1004 default_pager_page_array_t *pagesp,
1005 mach_msg_type_number_t *countp)
1006{
91447636 1007 vm_offset_t addr = 0; /* memory for page offsets */
1c79356b 1008 vm_size_t size = 0; /* current memory size */
91447636
A
1009 vm_map_copy_t copy;
1010 default_pager_page_t * pages = 0;
1011 unsigned int potential;
1012 unsigned int actual;
1c79356b 1013 kern_return_t kr;
91447636 1014 memory_object_t object;
1c79356b 1015
91447636 1016 if (default_pager != default_pager_object)
1c79356b 1017 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1018
91447636 1019 object = (memory_object_t) memory_object;
1c79356b 1020
91447636 1021 potential = 0;
1c79356b
A
1022 for (;;) {
1023 vstruct_t entry;
1024
1025 VSL_LOCK();
1026 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1027 vs_links) {
1028 VS_LOCK(entry);
0b4e3aa0 1029 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1030 VSL_UNLOCK();
1031 goto found_object;
1032 }
1033 VS_UNLOCK(entry);
1034 }
1035 VSL_UNLOCK();
1036
1037 /* did not find the object */
91447636
A
1038 if (0 != addr)
1039 kmem_free(ipc_kernel_map, addr, size);
1c79356b 1040
1c79356b
A
1041 return KERN_INVALID_ARGUMENT;
1042
1043 found_object:
1044
1045 if (!VS_MAP_TRY_LOCK(entry)) {
1046 /* oh well bad luck */
9bccf70c 1047 int wresult;
1c79356b
A
1048
1049 VS_UNLOCK(entry);
1050
91447636 1051 assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
9bccf70c
A
1052 wresult = thread_block(THREAD_CONTINUE_NULL);
1053 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1054 continue;
1055 }
1056
1057 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1058 VS_MAP_UNLOCK(entry);
1059 VS_UNLOCK(entry);
1060
1061 if (actual <= potential)
1062 break;
1063
1064 /* allocate more memory */
91447636
A
1065 if (0 != addr)
1066 kmem_free(ipc_kernel_map, addr, size);
1067
1068 size = round_page(actual * sizeof * pages);
1069 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1070 if (KERN_SUCCESS != kr)
1071 return KERN_RESOURCE_SHORTAGE;
1c79356b 1072
1c79356b
A
1073 pages = (default_pager_page_t *)addr;
1074 potential = size / sizeof * pages;
1075 }
1076
1077 /*
91447636 1078 * Clear unused memory.
1c79356b 1079 */
91447636
A
1080 while (actual < potential)
1081 pages[--potential].dpp_offset = 0;
1082
1083 kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1084 vm_map_round_page(addr + size), FALSE);
1085 assert(KERN_SUCCESS == kr);
1086 kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1087 (vm_map_size_t)size, TRUE, &copy);
1088 assert(KERN_SUCCESS == kr);
1089
1090
1091 *pagesp = (default_pager_page_array_t)copy;
1092 *countp = actual;
1c79356b
A
1093 return KERN_SUCCESS;
1094}