]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-344.49.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
43866e37
A
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
43866e37
A
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
1c79356b
A
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25/*
26 * @OSF_COPYRIGHT@
27 */
28/*
29 * Mach Operating System
30 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
31 * All Rights Reserved.
32 *
33 * Permission to use, copy, modify and distribute this software and its
34 * documentation is hereby granted, provided that both the copyright
35 * notice and this permission notice appear in all copies of the
36 * software, derivative works or modified versions, and any portions
37 * thereof, and that both notices appear in supporting documentation.
38 *
39 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
40 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
41 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
42 *
43 * Carnegie Mellon requests users of this software to return to
44 *
45 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
46 * School of Computer Science
47 * Carnegie Mellon University
48 * Pittsburgh PA 15213-3890
49 *
50 * any improvements or extensions that they make and grant Carnegie Mellon
51 * the rights to redistribute these changes.
52 */
53
54/*
55 * Default Pager.
56 * Memory Object Management.
57 */
58
59#include "default_pager_internal.h"
0b4e3aa0 60#include <mach/memory_object_types.h>
1c79356b 61#include <mach/memory_object_server.h>
0b4e3aa0
A
62#include <vm/memory_object.h>
63#include <vm/vm_pageout.h>
1c79356b
A
64
65
66/*
67 * List of all vstructs. A specific vstruct is
68 * found directly via its port, this list is
69 * only used for monitoring purposes by the
70 * default_pager_object* calls and by ps_delete
71 * when abstract memory objects must be scanned
72 * to remove any live storage on a segment which
73 * is to be removed.
74 */
75struct vstruct_list_head vstruct_list;
76
0b4e3aa0 77__private_extern__ void
1c79356b
A
78vstruct_list_insert(
79 vstruct_t vs)
80{
81 VSL_LOCK();
82 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
83 vstruct_list.vsl_count++;
84 VSL_UNLOCK();
85}
86
1c79356b 87
0b4e3aa0 88__private_extern__ void
1c79356b
A
89vstruct_list_delete(
90 vstruct_t vs)
91{
92 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
93 vstruct_list.vsl_count--;
94}
95
96/*
97 * We use the sequence numbers on requests to regulate
98 * our parallelism. In general, we allow multiple reads and writes
99 * to proceed in parallel, with the exception that reads must
100 * wait for previous writes to finish. (Because the kernel might
101 * generate a data-request for a page on the heels of a data-write
102 * for the same page, and we must avoid returning stale data.)
103 * terminate requests wait for proceeding reads and writes to finish.
104 */
105
0b4e3aa0
A
106static unsigned int default_pager_total = 0; /* debugging */
107static unsigned int default_pager_wait_seqno = 0; /* debugging */
108static unsigned int default_pager_wait_read = 0; /* debugging */
109static unsigned int default_pager_wait_write = 0; /* debugging */
110static unsigned int default_pager_wait_refs = 0; /* debugging */
1c79356b 111
0b4e3aa0 112__private_extern__ void
1c79356b
A
113vs_async_wait(
114 vstruct_t vs)
115{
1c79356b
A
116
117 ASSERT(vs->vs_async_pending >= 0);
118 while (vs->vs_async_pending > 0) {
119 vs->vs_waiting_async = TRUE;
0b4e3aa0 120 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 121 VS_UNLOCK(vs);
9bccf70c 122 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
123 VS_LOCK(vs);
124 }
125 ASSERT(vs->vs_async_pending == 0);
126}
127
1c79356b 128
0b4e3aa0 129#if PARALLEL
1c79356b
A
130/*
131 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
132 *
133 * JMM - Sequence numbers guarantee ordering of requests generated
134 * by a single thread if the receiver is multithreaded and
135 * the interfaces are asynchronous (i.e. sender can generate
136 * more than one request before the first is received in the
137 * pager). Normally, IPC would generate these number in that
138 * case. But we are trying to avoid using IPC for the in-kernel
139 * scenario. Since these are actually invoked synchronously
140 * anyway (in-kernel), we can just fake the sequence number
141 * generation here (thus avoiding the dependence on IPC).
1c79356b 142 */
0b4e3aa0 143__private_extern__ void
1c79356b 144vs_lock(
0b4e3aa0 145 vstruct_t vs)
1c79356b 146{
0b4e3aa0
A
147 mach_port_seqno_t seqno;
148
1c79356b
A
149 default_pager_total++;
150 VS_LOCK(vs);
151
152 seqno = vs->vs_next_seqno++;
153
154 while (vs->vs_seqno != seqno) {
155 default_pager_wait_seqno++;
156 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 157 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 158 VS_UNLOCK(vs);
9bccf70c 159 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
160 VS_LOCK(vs);
161 }
162}
163
164/*
165 * Increments sequence number and unlocks pager.
166 */
0b4e3aa0 167__private_extern__ void
1c79356b
A
168vs_unlock(vstruct_t vs)
169{
1c79356b 170 vs->vs_seqno++;
0b4e3aa0
A
171 if (vs->vs_waiting_seqno) {
172 vs->vs_waiting_seqno = FALSE;
173 VS_UNLOCK(vs);
174 thread_wakeup(&vs->vs_seqno);
175 return;
176 }
1c79356b 177 VS_UNLOCK(vs);
1c79356b
A
178}
179
180/*
181 * Start a read - one more reader. Pager must be locked.
182 */
0b4e3aa0 183__private_extern__ void
1c79356b
A
184vs_start_read(
185 vstruct_t vs)
186{
187 vs->vs_readers++;
188}
189
190/*
191 * Wait for readers. Unlocks and relocks pager if wait needed.
192 */
0b4e3aa0 193__private_extern__ void
1c79356b
A
194vs_wait_for_readers(
195 vstruct_t vs)
196{
197 while (vs->vs_readers != 0) {
198 default_pager_wait_read++;
199 vs->vs_waiting_read = TRUE;
0b4e3aa0 200 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 201 VS_UNLOCK(vs);
9bccf70c 202 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
203 VS_LOCK(vs);
204 }
205}
206
207/*
208 * Finish a read. Pager is unlocked and returns unlocked.
209 */
0b4e3aa0 210__private_extern__ void
1c79356b
A
211vs_finish_read(
212 vstruct_t vs)
213{
214 VS_LOCK(vs);
0b4e3aa0 215 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
216 vs->vs_waiting_read = FALSE;
217 VS_UNLOCK(vs);
0b4e3aa0
A
218 thread_wakeup(&vs->vs_readers);
219 return;
220 }
221 VS_UNLOCK(vs);
1c79356b
A
222}
223
224/*
225 * Start a write - one more writer. Pager must be locked.
226 */
0b4e3aa0 227__private_extern__ void
1c79356b
A
228vs_start_write(
229 vstruct_t vs)
230{
231 vs->vs_writers++;
232}
233
234/*
235 * Wait for writers. Unlocks and relocks pager if wait needed.
236 */
0b4e3aa0 237__private_extern__ void
1c79356b
A
238vs_wait_for_writers(
239 vstruct_t vs)
240{
241 while (vs->vs_writers != 0) {
242 default_pager_wait_write++;
243 vs->vs_waiting_write = TRUE;
0b4e3aa0 244 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 245 VS_UNLOCK(vs);
9bccf70c 246 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
247 VS_LOCK(vs);
248 }
249 vs_async_wait(vs);
250}
251
252/* This is to be used for the transfer from segment code ONLY */
253/* The transfer code holds off vs destruction by keeping the */
254/* vs_async_wait count non-zero. It will not ocnflict with */
255/* other writers on an async basis because it only writes on */
256/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
257
258__private_extern__ void
1c79356b
A
259vs_wait_for_sync_writers(
260 vstruct_t vs)
261{
262 while (vs->vs_writers != 0) {
263 default_pager_wait_write++;
264 vs->vs_waiting_write = TRUE;
0b4e3aa0 265 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 266 VS_UNLOCK(vs);
9bccf70c 267 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
268 VS_LOCK(vs);
269 }
270}
271
272
273/*
274 * Finish a write. Pager is unlocked and returns unlocked.
275 */
0b4e3aa0 276__private_extern__ void
1c79356b
A
277vs_finish_write(
278 vstruct_t vs)
279{
280 VS_LOCK(vs);
0b4e3aa0 281 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
282 vs->vs_waiting_write = FALSE;
283 VS_UNLOCK(vs);
0b4e3aa0
A
284 thread_wakeup(&vs->vs_writers);
285 return;
1c79356b 286 }
0b4e3aa0 287 VS_UNLOCK(vs);
1c79356b 288}
1c79356b
A
289#endif /* PARALLEL */
290
1c79356b
A
291vstruct_t
292vs_object_create(
293 vm_size_t size)
294{
295 vstruct_t vs;
1c79356b
A
296
297 /*
298 * Allocate a vstruct. If there are any problems, then report them
299 * to the console.
300 */
301 vs = ps_vstruct_create(size);
302 if (vs == VSTRUCT_NULL) {
303 dprintf(("vs_object_create: unable to allocate %s\n",
304 "-- either run swapon command or reboot"));
305 return VSTRUCT_NULL;
306 }
307
308 return vs;
309}
310
0b4e3aa0 311#if 0
1c79356b
A
312void default_pager_add(vstruct_t, boolean_t); /* forward */
313
314void
315default_pager_add(
316 vstruct_t vs,
317 boolean_t internal)
318{
0b4e3aa0
A
319 memory_object_t mem_obj = vs->vs_mem_obj;
320 mach_port_t pset;
1c79356b 321 mach_port_mscount_t sync;
0b4e3aa0 322 mach_port_t previous;
1c79356b
A
323 kern_return_t kr;
324 static char here[] = "default_pager_add";
325
326 /*
327 * The port currently has a make-send count of zero,
328 * because either we just created the port or we just
329 * received the port in a memory_object_create request.
330 */
331
332 if (internal) {
333 /* possibly generate an immediate no-senders notification */
334 sync = 0;
335 pset = default_pager_internal_set;
336 } else {
337 /* delay notification till send right is created */
338 sync = 1;
339 pset = default_pager_external_set;
340 }
341
342 ipc_port_make_sonce(mem_obj);
343 ip_lock(mem_obj); /* unlocked in nsrequest below */
344 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
345}
346
0b4e3aa0 347#endif
1c79356b
A
348
349kern_return_t
350dp_memory_object_init(
0b4e3aa0
A
351 memory_object_t mem_obj,
352 memory_object_control_t control,
1c79356b
A
353 vm_size_t pager_page_size)
354{
1c79356b 355 vstruct_t vs;
1c79356b
A
356
357 assert(pager_page_size == vm_page_size);
358
0b4e3aa0
A
359 memory_object_control_reference(control);
360
1c79356b 361 vs_lookup(mem_obj, vs);
0b4e3aa0 362 vs_lock(vs);
1c79356b 363
0b4e3aa0 364 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
365 Panic("bad request");
366
0b4e3aa0 367 vs->vs_control = control;
1c79356b
A
368 vs_unlock(vs);
369
370 return KERN_SUCCESS;
371}
372
373kern_return_t
374dp_memory_object_synchronize(
0b4e3aa0
A
375 memory_object_t mem_obj,
376 memory_object_offset_t offset,
377 vm_size_t length,
1c79356b
A
378 vm_sync_t flags)
379{
1c79356b 380 vstruct_t vs;
1c79356b
A
381
382 vs_lookup(mem_obj, vs);
0b4e3aa0 383 vs_lock(vs);
1c79356b
A
384 vs_unlock(vs);
385
0b4e3aa0 386 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
387
388 return KERN_SUCCESS;
389}
390
0b4e3aa0
A
391kern_return_t
392dp_memory_object_unmap(
393 memory_object_t mem_obj)
394{
395 panic("dp_memory_object_unmap");
396
397 return KERN_FAILURE;
398}
399
1c79356b
A
400kern_return_t
401dp_memory_object_terminate(
0b4e3aa0 402 memory_object_t mem_obj)
1c79356b 403{
0b4e3aa0 404 memory_object_control_t control;
1c79356b 405 vstruct_t vs;
1c79356b 406 kern_return_t kr;
1c79356b
A
407
408 /*
409 * control port is a receive right, not a send right.
410 */
411
412 vs_lookup(mem_obj, vs);
0b4e3aa0 413 vs_lock(vs);
1c79356b
A
414
415 /*
416 * Wait for read and write requests to terminate.
417 */
418
419 vs_wait_for_readers(vs);
420 vs_wait_for_writers(vs);
421
422 /*
423 * After memory_object_terminate both memory_object_init
424 * and a no-senders notification are possible, so we need
0b4e3aa0
A
425 * to clean up our reference to the memory_object_control
426 * to prepare for a new init.
1c79356b
A
427 */
428
0b4e3aa0
A
429 control = vs->vs_control;
430 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
431
432 /* a bit of special case ugliness here. Wakeup any waiting reads */
433 /* these data requests had to be removed from the seqno traffic */
434 /* based on a performance bottleneck with large memory objects */
435 /* the problem will right itself with the new component based */
436 /* synchronous interface. The new async will be able to return */
437 /* failure during its sync phase. In the mean time ... */
438
0b4e3aa0
A
439 thread_wakeup(&vs->vs_writers);
440 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
441
442 vs_unlock(vs);
443
444 /*
0b4e3aa0 445 * Now we deallocate our reference on the control.
1c79356b 446 */
0b4e3aa0 447 memory_object_control_deallocate(control);
1c79356b
A
448 return KERN_SUCCESS;
449}
450
451void
0b4e3aa0
A
452dp_memory_object_reference(
453 memory_object_t mem_obj)
454{
455 vstruct_t vs;
456
457 vs_lookup_safe(mem_obj, vs);
458 if (vs == VSTRUCT_NULL)
459 return;
460
461 VS_LOCK(vs);
462 assert(vs->vs_references > 0);
463 vs->vs_references++;
464 VS_UNLOCK(vs);
465}
466
467extern ipc_port_t max_pages_trigger_port;
468extern int dp_pages_free;
469extern int maximum_pages_free;
470void
471dp_memory_object_deallocate(
472 memory_object_t mem_obj)
1c79356b
A
473{
474 vstruct_t vs;
0b4e3aa0
A
475 mach_port_seqno_t seqno;
476 ipc_port_t trigger;
1c79356b
A
477
478 /*
0b4e3aa0 479 * Because we don't give out multiple first references
1c79356b 480 * for a memory object, there can't be a race
0b4e3aa0
A
481 * between getting a deallocate call and creating
482 * a new reference for the object.
1c79356b
A
483 */
484
0b4e3aa0
A
485 vs_lookup_safe(mem_obj, vs);
486 if (vs == VSTRUCT_NULL)
487 return;
488
489 VS_LOCK(vs);
490 if (--vs->vs_references > 0) {
491 VS_UNLOCK(vs);
492 return;
493 }
494
495 seqno = vs->vs_next_seqno++;
496 while (vs->vs_seqno != seqno) {
497 default_pager_wait_seqno++;
498 vs->vs_waiting_seqno = TRUE;
499 assert_wait(&vs->vs_seqno, THREAD_UNINT);
500 VS_UNLOCK(vs);
9bccf70c 501 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
502 VS_LOCK(vs);
503 }
504
1c79356b
A
505 vs_async_wait(vs); /* wait for pending async IO */
506
507 /* do not delete the vs structure until the referencing pointers */
508 /* in the vstruct list have been expunged */
509
510 /* get VSL_LOCK out of order by using TRY mechanism */
511 while(!VSL_LOCK_TRY()) {
512 VS_UNLOCK(vs);
513 VSL_LOCK();
514 VSL_UNLOCK();
515 VS_LOCK(vs);
516 vs_async_wait(vs); /* wait for pending async IO */
517 }
0b4e3aa0
A
518
519
1c79356b 520 /*
0b4e3aa0 521 * We shouldn't get a deallocation call
1c79356b
A
522 * when the kernel has the object cached.
523 */
0b4e3aa0 524 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
525 Panic("bad request");
526
527 /*
528 * Unlock the pager (though there should be no one
529 * waiting for it).
530 */
531 VS_UNLOCK(vs);
532
0b4e3aa0
A
533 /* Lock out paging segment removal for the duration of this */
534 /* call. We are vulnerable to losing a paging segment we rely */
535 /* on as soon as we remove ourselves from the VSL and unlock */
536
537 /* Keep our thread from blocking on attempt to trigger backing */
538 /* store release */
539 backing_store_release_trigger_disable += 1;
540
1c79356b
A
541 /*
542 * Remove the memory object port association, and then
543 * the destroy the port itself. We must remove the object
544 * from the port list before deallocating the pager,
545 * because of default_pager_objects.
546 */
547 vstruct_list_delete(vs);
0b4e3aa0
A
548 VSL_UNLOCK();
549
1c79356b
A
550 ps_vstruct_dealloc(vs);
551
0b4e3aa0
A
552 VSL_LOCK();
553 backing_store_release_trigger_disable -= 1;
554 if(backing_store_release_trigger_disable == 0) {
9bccf70c 555 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
556 }
557 VSL_UNLOCK();
0b4e3aa0
A
558
559 PSL_LOCK();
560 if(max_pages_trigger_port
561 && (backing_store_release_trigger_disable == 0)
562 && (dp_pages_free > maximum_pages_free)) {
563 trigger = max_pages_trigger_port;
564 max_pages_trigger_port = NULL;
565 } else
566 trigger = IP_NULL;
567 PSL_UNLOCK();
568
569 if (trigger != IP_NULL) {
570 default_pager_space_alert(trigger, LO_WAT_ALERT);
571 ipc_port_release_send(trigger);
572 }
573
1c79356b
A
574}
575
576kern_return_t
577dp_memory_object_data_request(
0b4e3aa0
A
578 memory_object_t mem_obj,
579 memory_object_offset_t offset,
1c79356b
A
580 vm_size_t length,
581 vm_prot_t protection_required)
582{
1c79356b 583 vstruct_t vs;
1c79356b
A
584
585 GSTAT(global_stats.gs_pagein_calls++);
586
587
588 /* CDY at this moment vs_lookup panics when presented with the wrong */
589 /* port. As we are expanding this pager to support user interfaces */
590 /* this should be changed to return kern_failure */
591 vs_lookup(mem_obj, vs);
0b4e3aa0 592 vs_lock(vs);
1c79356b
A
593
594 /* We are going to relax the strict sequencing here for performance */
595 /* reasons. We can do this because we know that the read and */
596 /* write threads are different and we rely on synchronization */
597 /* of read and write requests at the cache memory_object level */
598 /* break out wait_for_writers, all of this goes away when */
599 /* we get real control of seqno with the new component interface */
0b4e3aa0 600
1c79356b
A
601 if (vs->vs_writers != 0) {
602 /* you can't hold on to the seqno and go */
603 /* to sleep like that */
604 vs_unlock(vs); /* bump internal count of seqno */
605 VS_LOCK(vs);
606 while (vs->vs_writers != 0) {
607 default_pager_wait_write++;
608 vs->vs_waiting_write = TRUE;
0b4e3aa0 609 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 610 VS_UNLOCK(vs);
9bccf70c 611 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
612 VS_LOCK(vs);
613 vs_async_wait(vs);
614 }
0b4e3aa0 615 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
616 VS_UNLOCK(vs);
617 return KERN_FAILURE;
618 }
619 vs_start_read(vs);
620 VS_UNLOCK(vs);
621 } else {
622 vs_start_read(vs);
623 vs_unlock(vs);
624 }
625
626 /*
627 * Request must be on a page boundary and a multiple of pages.
628 */
629 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
630 Panic("bad alignment");
631
632 pvs_cluster_read(vs, (vm_offset_t)offset, length);
633
634 vs_finish_read(vs);
635
636 return KERN_SUCCESS;
637}
638
639/*
640 * memory_object_data_initialize: check whether we already have each page, and
641 * write it if we do not. The implementation is far from optimized, and
642 * also assumes that the default_pager is single-threaded.
643 */
644/* It is questionable whether or not a pager should decide what is relevant */
645/* and what is not in data sent from the kernel. Data initialize has been */
646/* changed to copy back all data sent to it in preparation for its eventual */
647/* merge with data return. It is the kernel that should decide what pages */
648/* to write back. As of the writing of this note, this is indeed the case */
649/* the kernel writes back one page at a time through this interface */
650
651kern_return_t
652dp_memory_object_data_initialize(
0b4e3aa0
A
653 memory_object_t mem_obj,
654 memory_object_offset_t offset,
655 vm_size_t size)
1c79356b 656{
1c79356b 657 vstruct_t vs;
1c79356b
A
658
659 DEBUG(DEBUG_MO_EXTERNAL,
660 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
0b4e3aa0 661 (int)mem_obj, (int)offset, (int)size));
de355530 662 GSTAT(global_stats.gs_pages_init += atop(size));
1c79356b
A
663
664 vs_lookup(mem_obj, vs);
0b4e3aa0 665 vs_lock(vs);
1c79356b
A
666 vs_start_write(vs);
667 vs_unlock(vs);
668
669 /*
670 * Write the data via clustered writes. vs_cluster_write will
671 * loop if the address range specified crosses cluster
672 * boundaries.
673 */
0b4e3aa0 674 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
675
676 vs_finish_write(vs);
677
678 return KERN_SUCCESS;
679}
680
1c79356b
A
681kern_return_t
682dp_memory_object_data_unlock(
683 memory_object_t mem_obj,
0b4e3aa0
A
684 memory_object_offset_t offset,
685 vm_size_t size,
1c79356b
A
686 vm_prot_t desired_access)
687{
0b4e3aa0 688 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
689 return KERN_FAILURE;
690}
691
692
1c79356b
A
693kern_return_t
694dp_memory_object_data_return(
0b4e3aa0
A
695 memory_object_t mem_obj,
696 memory_object_offset_t offset,
697 vm_size_t size,
1c79356b
A
698 boolean_t dirty,
699 boolean_t kernel_copy)
700{
1c79356b 701 vstruct_t vs;
1c79356b
A
702
703 DEBUG(DEBUG_MO_EXTERNAL,
0b4e3aa0
A
704 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
705 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
706 GSTAT(global_stats.gs_pageout_calls++);
707
708 /* This routine is called by the pageout thread. The pageout thread */
709 /* cannot be blocked by read activities unless the read activities */
710 /* Therefore the grant of vs lock must be done on a try versus a */
711 /* blocking basis. The code below relies on the fact that the */
712 /* interface is synchronous. Should this interface be again async */
713 /* for some type of pager in the future the pages will have to be */
714 /* returned through a separate, asynchronous path. */
715
716 vs_lookup(mem_obj, vs);
717
718 default_pager_total++;
719 if(!VS_TRY_LOCK(vs)) {
720 /* the call below will not be done by caller when we have */
721 /* a synchronous interface */
722 /* return KERN_LOCK_OWNED; */
723 upl_t upl;
0b4e3aa0
A
724 int page_list_count = 0;
725 memory_object_super_upl_request(vs->vs_control,
726 (memory_object_offset_t)offset,
727 size, size,
728 &upl, NULL, &page_list_count,
729 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 730 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
731 upl_abort(upl,0);
732 upl_deallocate(upl);
1c79356b
A
733 return KERN_SUCCESS;
734 }
735
d12e1678
A
736 if ((vs->vs_seqno != vs->vs_next_seqno++)
737 || (vs->vs_readers)
738 || (vs->vs_xfer_pending)) {
1c79356b 739 upl_t upl;
0b4e3aa0
A
740 int page_list_count = 0;
741
1c79356b
A
742 vs->vs_next_seqno--;
743 VS_UNLOCK(vs);
0b4e3aa0 744
1c79356b
A
745 /* the call below will not be done by caller when we have */
746 /* a synchronous interface */
747 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
748 memory_object_super_upl_request(vs->vs_control,
749 (memory_object_offset_t)offset,
750 size, size,
751 &upl, NULL, &page_list_count,
1c79356b
A
752 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
753 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
754 upl_abort(upl,0);
755 upl_deallocate(upl);
1c79356b
A
756 return KERN_SUCCESS;
757 }
758
0b4e3aa0 759 if ((size % vm_page_size) != 0)
1c79356b
A
760 Panic("bad alignment");
761
762 vs_start_write(vs);
763
764
765 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 766 vs_unlock(vs);
1c79356b
A
767
768 /*
769 * Write the data via clustered writes. vs_cluster_write will
770 * loop if the address range specified crosses cluster
771 * boundaries.
772 */
0b4e3aa0 773 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
774
775 vs_finish_write(vs);
776
777 /* temporary, need a finer lock based on cluster */
778
779 VS_LOCK(vs);
780 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
781 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
782 vs->vs_waiting_async = FALSE;
1c79356b 783 VS_UNLOCK(vs);
0b4e3aa0 784 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
785 } else {
786 VS_UNLOCK(vs);
787 }
788
789
790 return KERN_SUCCESS;
791}
792
0b4e3aa0
A
793/*
794 * Routine: default_pager_memory_object_create
795 * Purpose:
796 * Handle requests for memory objects from the
797 * kernel.
798 * Notes:
799 * Because we only give out the default memory
800 * manager port to the kernel, we don't have to
801 * be so paranoid about the contents.
802 */
1c79356b 803kern_return_t
0b4e3aa0
A
804default_pager_memory_object_create(
805 memory_object_default_t dmm,
806 vm_size_t new_size,
807 memory_object_t *new_mem_obj)
1c79356b 808{
0b4e3aa0 809 vstruct_t vs;
1c79356b 810
0b4e3aa0
A
811 assert(dmm == default_pager_object);
812
813 vs = vs_object_create(new_size);
814 if (vs == VSTRUCT_NULL)
815 return KERN_RESOURCE_SHORTAGE;
816
817 vs->vs_next_seqno = 0;
818
819 /*
820 * Set up associations between this memory object
821 * and this default_pager structure
822 */
823
824 vs->vs_mem_obj = ISVS;
825 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
826
827 /*
828 * After this, other threads might receive requests
829 * for this memory object or find it in the port list.
830 */
831
832 vstruct_list_insert(vs);
833 *new_mem_obj = vs_to_mem_obj(vs);
834 return KERN_SUCCESS;
1c79356b
A
835}
836
837/*
838 * Create an external object.
839 */
840kern_return_t
841default_pager_object_create(
0b4e3aa0
A
842 default_pager_t pager,
843 vm_size_t size,
844 memory_object_t *mem_objp)
1c79356b
A
845{
846 vstruct_t vs;
1c79356b
A
847 kern_return_t result;
848 struct vstruct_alias *alias_struct;
1c79356b
A
849
850
0b4e3aa0 851 if (pager != default_pager_object)
1c79356b
A
852 return KERN_INVALID_ARGUMENT;
853
854 vs = vs_object_create(size);
0b4e3aa0
A
855 if (vs == VSTRUCT_NULL)
856 return KERN_RESOURCE_SHORTAGE;
1c79356b 857
1c79356b 858 /*
0b4e3aa0 859 * Set up associations between the default pager
1c79356b
A
860 * and this vstruct structure
861 */
0b4e3aa0 862 vs->vs_mem_obj = ISVS;
1c79356b 863 vstruct_list_insert(vs);
0b4e3aa0 864 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
865 return KERN_SUCCESS;
866}
867
868kern_return_t
869default_pager_objects(
0b4e3aa0 870 default_pager_t pager,
1c79356b
A
871 default_pager_object_array_t *objectsp,
872 mach_msg_type_number_t *ocountp,
0b4e3aa0 873 memory_object_array_t *pagersp,
1c79356b
A
874 mach_msg_type_number_t *pcountp)
875{
876 vm_offset_t oaddr = 0; /* memory for objects */
877 vm_size_t osize = 0; /* current size */
878 default_pager_object_t * objects;
879 unsigned int opotential;
880
0b4e3aa0 881 vm_offset_t paddr = 0; /* memory for pagers */
1c79356b 882 vm_size_t psize = 0; /* current size */
0b4e3aa0 883 memory_object_t * pagers;
1c79356b
A
884 unsigned int ppotential;
885
886 unsigned int actual;
887 unsigned int num_objects;
888 kern_return_t kr;
889 vstruct_t entry;
1c79356b
A
890/*
891 if (pager != default_pager_default_port)
892 return KERN_INVALID_ARGUMENT;
893*/
894
895 /* start with the inline memory */
896
897 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
898 (vm_map_copy_t) *objectsp);
899
900 if (kr != KERN_SUCCESS)
901 return kr;
902
de355530 903 osize = round_page(*ocountp * sizeof * objects);
1c79356b 904 kr = vm_map_wire(ipc_kernel_map,
de355530
A
905 trunc_page((vm_offset_t)objects),
906 round_page(((vm_offset_t)objects) + osize),
1c79356b
A
907 VM_PROT_READ|VM_PROT_WRITE, FALSE);
908 osize=0;
909
910 *objectsp = objects;
911 /* we start with the inline space */
912
913
914 num_objects = 0;
915 opotential = *ocountp;
916
0b4e3aa0 917 pagers = (memory_object_t *) *pagersp;
1c79356b
A
918 ppotential = *pcountp;
919
920 VSL_LOCK();
921
922 /*
923 * We will send no more than this many
924 */
925 actual = vstruct_list.vsl_count;
926 VSL_UNLOCK();
927
928 if (opotential < actual) {
929 vm_offset_t newaddr;
930 vm_size_t newsize;
931
de355530 932 newsize = 2 * round_page(actual * sizeof * objects);
1c79356b
A
933
934 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
935 if (kr != KERN_SUCCESS)
936 goto nomemory;
937
938 oaddr = newaddr;
939 osize = newsize;
940 opotential = osize / sizeof * objects;
941 objects = (default_pager_object_t *)oaddr;
942 }
943
944 if (ppotential < actual) {
945 vm_offset_t newaddr;
946 vm_size_t newsize;
947
de355530 948 newsize = 2 * round_page(actual * sizeof * pagers);
1c79356b
A
949
950 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
951 if (kr != KERN_SUCCESS)
952 goto nomemory;
953
954 paddr = newaddr;
955 psize = newsize;
0b4e3aa0
A
956 ppotential = psize / sizeof * pagers;
957 pagers = (memory_object_t *)paddr;
1c79356b
A
958 }
959
960 /*
961 * Now scan the list.
962 */
963
964 VSL_LOCK();
965
966 num_objects = 0;
967 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
968
0b4e3aa0 969 memory_object_t pager;
1c79356b
A
970 vm_size_t size;
971
972 if ((num_objects >= opotential) ||
973 (num_objects >= ppotential)) {
974
975 /*
976 * This should be rare. In any case,
977 * we will only miss recent objects,
978 * because they are added at the end.
979 */
980 break;
981 }
982
983 /*
984 * Avoid interfering with normal operations
985 */
986 if (!VS_MAP_TRY_LOCK(entry))
987 goto not_this_one;
988 size = ps_vstruct_allocated_size(entry);
989 VS_MAP_UNLOCK(entry);
990
991 VS_LOCK(entry);
992
1c79356b 993 /*
0b4e3aa0
A
994 * We need a reference for our caller. Adding this
995 * reference through the linked list could race with
996 * destruction of the object. If we find the object
997 * has no references, just give up on it.
1c79356b 998 */
0b4e3aa0
A
999 VS_LOCK(entry);
1000 if (entry->vs_references == 0) {
1c79356b 1001 VS_UNLOCK(entry);
0b4e3aa0 1002 goto not_this_one;
1c79356b 1003 }
0b4e3aa0 1004 dp_memory_object_reference(vs_to_mem_obj(entry));
1c79356b
A
1005 VS_UNLOCK(entry);
1006
1007 /* the arrays are wired, so no deadlock worries */
1008
1009 objects[num_objects].dpo_object = (vm_offset_t) entry;
1010 objects[num_objects].dpo_size = size;
0b4e3aa0 1011 pagers [num_objects++] = pager;
1c79356b
A
1012 continue;
1013
1014 not_this_one:
1015 /*
1016 * Do not return garbage
1017 */
1018 objects[num_objects].dpo_object = (vm_offset_t) 0;
1019 objects[num_objects].dpo_size = 0;
0b4e3aa0 1020 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
1021
1022 }
1023
1024 VSL_UNLOCK();
1025
1026 /*
1027 * Deallocate and clear unused memory.
1028 * (Returned memory will automagically become pageable.)
1029 */
1030
1031 if (objects == *objectsp) {
1032
1033 /*
1034 * Our returned information fit inline.
1035 * Nothing to deallocate.
1036 */
1037 *ocountp = num_objects;
1038 } else if (actual == 0) {
1039 (void) vm_deallocate(kernel_map, oaddr, osize);
1040
1041 /* return zero items inline */
1042 *ocountp = 0;
1043 } else {
1044 vm_offset_t used;
1045
de355530 1046 used = round_page(actual * sizeof * objects);
1c79356b
A
1047
1048 if (used != osize)
1049 (void) vm_deallocate(kernel_map,
1050 oaddr + used, osize - used);
1051
1052 *objectsp = objects;
1053 *ocountp = num_objects;
1054 }
1055
0b4e3aa0 1056 if (pagers == (memory_object_t *)*pagersp) {
1c79356b
A
1057
1058 /*
1059 * Our returned information fit inline.
1060 * Nothing to deallocate.
1061 */
1062
1063 *pcountp = num_objects;
1064 } else if (actual == 0) {
1065 (void) vm_deallocate(kernel_map, paddr, psize);
1066
1067 /* return zero items inline */
1068 *pcountp = 0;
1069 } else {
1070 vm_offset_t used;
1071
de355530 1072 used = round_page(actual * sizeof * pagers);
1c79356b
A
1073
1074 if (used != psize)
1075 (void) vm_deallocate(kernel_map,
1076 paddr + used, psize - used);
1077
0b4e3aa0 1078 *pagersp = (memory_object_array_t)pagers;
1c79356b
A
1079 *pcountp = num_objects;
1080 }
1081 (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1082 *ocountp + (vm_offset_t)objects, FALSE);
1083 (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1084 *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1085
1086 return KERN_SUCCESS;
1087
1088 nomemory:
1089 {
1090 register int i;
1091 for (i = 0; i < num_objects; i++)
0b4e3aa0
A
1092 if (pagers[i] != MEMORY_OBJECT_NULL)
1093 memory_object_deallocate(pagers[i]);
1c79356b
A
1094 }
1095
1096 if (objects != *objectsp)
1097 (void) vm_deallocate(kernel_map, oaddr, osize);
1098
0b4e3aa0 1099 if (pagers != (memory_object_t *)*pagersp)
1c79356b
A
1100 (void) vm_deallocate(kernel_map, paddr, psize);
1101
1102 return KERN_RESOURCE_SHORTAGE;
1103}
1104
1105kern_return_t
1106default_pager_object_pages(
0b4e3aa0
A
1107 default_pager_t pager,
1108 memory_object_t object,
1c79356b
A
1109 default_pager_page_array_t *pagesp,
1110 mach_msg_type_number_t *countp)
1111{
1112 vm_offset_t addr; /* memory for page offsets */
1113 vm_size_t size = 0; /* current memory size */
1114 default_pager_page_t * pages;
1115 unsigned int potential, actual;
1116 kern_return_t kr;
1117
0b4e3aa0
A
1118
1119 if (pager != default_pager_object)
1c79356b 1120 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1121
1c79356b
A
1122 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1123 (vm_map_copy_t) *pagesp);
1124
1125 if (kr != KERN_SUCCESS)
1126 return kr;
1127
de355530 1128 size = round_page(*countp * sizeof * pages);
1c79356b 1129 kr = vm_map_wire(ipc_kernel_map,
de355530
A
1130 trunc_page((vm_offset_t)pages),
1131 round_page(((vm_offset_t)pages) + size),
1c79356b
A
1132 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1133 size=0;
1134
1135 *pagesp = pages;
1136 /* we start with the inline space */
1137
1138 addr = (vm_offset_t)pages;
1139 potential = *countp;
1140
1141 for (;;) {
1142 vstruct_t entry;
1143
1144 VSL_LOCK();
1145 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1146 vs_links) {
1147 VS_LOCK(entry);
0b4e3aa0 1148 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1149 VSL_UNLOCK();
1150 goto found_object;
1151 }
1152 VS_UNLOCK(entry);
1153 }
1154 VSL_UNLOCK();
1155
1156 /* did not find the object */
1157
1158 if (pages != *pagesp)
1159 (void) vm_deallocate(kernel_map, addr, size);
1160 return KERN_INVALID_ARGUMENT;
1161
1162 found_object:
1163
1164 if (!VS_MAP_TRY_LOCK(entry)) {
1165 /* oh well bad luck */
9bccf70c 1166 int wresult;
1c79356b
A
1167
1168 VS_UNLOCK(entry);
1169
9bccf70c
A
1170 assert_wait_timeout( 1, THREAD_UNINT );
1171 wresult = thread_block(THREAD_CONTINUE_NULL);
1172 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1173 continue;
1174 }
1175
1176 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1177 VS_MAP_UNLOCK(entry);
1178 VS_UNLOCK(entry);
1179
1180 if (actual <= potential)
1181 break;
1182
1183 /* allocate more memory */
1184
1185 if (pages != *pagesp)
1186 (void) vm_deallocate(kernel_map, addr, size);
de355530 1187 size = round_page(actual * sizeof * pages);
1c79356b
A
1188 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1189 if (kr != KERN_SUCCESS)
1190 return kr;
1191 pages = (default_pager_page_t *)addr;
1192 potential = size / sizeof * pages;
1193 }
1194
1195 /*
1196 * Deallocate and clear unused memory.
1197 * (Returned memory will automagically become pageable.)
1198 */
1199
1200 if (pages == *pagesp) {
1201
1202 /*
1203 * Our returned information fit inline.
1204 * Nothing to deallocate.
1205 */
1206
1207 *countp = actual;
1208 } else if (actual == 0) {
1209 (void) vm_deallocate(kernel_map, addr, size);
1210
1211 /* return zero items inline */
1212 *countp = 0;
1213 } else {
1214 vm_offset_t used;
1215
de355530 1216 used = round_page(actual * sizeof * pages);
1c79356b
A
1217
1218 if (used != size)
1219 (void) vm_deallocate(kernel_map,
1220 addr + used, size - used);
1221
1222 *pagesp = pages;
1223 *countp = actual;
1224 }
1225 (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1226 *countp + (vm_offset_t)pages, FALSE);
1227 (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1228 *countp, TRUE, (vm_map_copy_t *)pagesp);
1229 return KERN_SUCCESS;
1230}