]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-344.23.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
de355530
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
de355530
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
de355530
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51/*
52 * Default Pager.
53 * Memory Object Management.
54 */
55
56#include "default_pager_internal.h"
0b4e3aa0 57#include <mach/memory_object_types.h>
1c79356b 58#include <mach/memory_object_server.h>
0b4e3aa0
A
59#include <vm/memory_object.h>
60#include <vm/vm_pageout.h>
1c79356b
A
61
62
63/*
64 * List of all vstructs. A specific vstruct is
65 * found directly via its port, this list is
66 * only used for monitoring purposes by the
67 * default_pager_object* calls and by ps_delete
68 * when abstract memory objects must be scanned
69 * to remove any live storage on a segment which
70 * is to be removed.
71 */
72struct vstruct_list_head vstruct_list;
73
0b4e3aa0 74__private_extern__ void
1c79356b
A
75vstruct_list_insert(
76 vstruct_t vs)
77{
78 VSL_LOCK();
79 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
80 vstruct_list.vsl_count++;
81 VSL_UNLOCK();
82}
83
1c79356b 84
0b4e3aa0 85__private_extern__ void
1c79356b
A
86vstruct_list_delete(
87 vstruct_t vs)
88{
89 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
90 vstruct_list.vsl_count--;
91}
92
93/*
94 * We use the sequence numbers on requests to regulate
95 * our parallelism. In general, we allow multiple reads and writes
96 * to proceed in parallel, with the exception that reads must
97 * wait for previous writes to finish. (Because the kernel might
98 * generate a data-request for a page on the heels of a data-write
99 * for the same page, and we must avoid returning stale data.)
100 * terminate requests wait for proceeding reads and writes to finish.
101 */
102
0b4e3aa0
A
103static unsigned int default_pager_total = 0; /* debugging */
104static unsigned int default_pager_wait_seqno = 0; /* debugging */
105static unsigned int default_pager_wait_read = 0; /* debugging */
106static unsigned int default_pager_wait_write = 0; /* debugging */
107static unsigned int default_pager_wait_refs = 0; /* debugging */
1c79356b 108
0b4e3aa0 109__private_extern__ void
1c79356b
A
110vs_async_wait(
111 vstruct_t vs)
112{
1c79356b
A
113
114 ASSERT(vs->vs_async_pending >= 0);
115 while (vs->vs_async_pending > 0) {
116 vs->vs_waiting_async = TRUE;
0b4e3aa0 117 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 118 VS_UNLOCK(vs);
9bccf70c 119 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
120 VS_LOCK(vs);
121 }
122 ASSERT(vs->vs_async_pending == 0);
123}
124
1c79356b 125
0b4e3aa0 126#if PARALLEL
1c79356b
A
127/*
128 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
129 *
130 * JMM - Sequence numbers guarantee ordering of requests generated
131 * by a single thread if the receiver is multithreaded and
132 * the interfaces are asynchronous (i.e. sender can generate
133 * more than one request before the first is received in the
134 * pager). Normally, IPC would generate these number in that
135 * case. But we are trying to avoid using IPC for the in-kernel
136 * scenario. Since these are actually invoked synchronously
137 * anyway (in-kernel), we can just fake the sequence number
138 * generation here (thus avoiding the dependence on IPC).
1c79356b 139 */
0b4e3aa0 140__private_extern__ void
1c79356b 141vs_lock(
0b4e3aa0 142 vstruct_t vs)
1c79356b 143{
0b4e3aa0
A
144 mach_port_seqno_t seqno;
145
1c79356b
A
146 default_pager_total++;
147 VS_LOCK(vs);
148
149 seqno = vs->vs_next_seqno++;
150
151 while (vs->vs_seqno != seqno) {
152 default_pager_wait_seqno++;
153 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 154 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 155 VS_UNLOCK(vs);
9bccf70c 156 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
157 VS_LOCK(vs);
158 }
159}
160
161/*
162 * Increments sequence number and unlocks pager.
163 */
0b4e3aa0 164__private_extern__ void
1c79356b
A
165vs_unlock(vstruct_t vs)
166{
1c79356b 167 vs->vs_seqno++;
0b4e3aa0
A
168 if (vs->vs_waiting_seqno) {
169 vs->vs_waiting_seqno = FALSE;
170 VS_UNLOCK(vs);
171 thread_wakeup(&vs->vs_seqno);
172 return;
173 }
1c79356b 174 VS_UNLOCK(vs);
1c79356b
A
175}
176
177/*
178 * Start a read - one more reader. Pager must be locked.
179 */
0b4e3aa0 180__private_extern__ void
1c79356b
A
181vs_start_read(
182 vstruct_t vs)
183{
184 vs->vs_readers++;
185}
186
187/*
188 * Wait for readers. Unlocks and relocks pager if wait needed.
189 */
0b4e3aa0 190__private_extern__ void
1c79356b
A
191vs_wait_for_readers(
192 vstruct_t vs)
193{
194 while (vs->vs_readers != 0) {
195 default_pager_wait_read++;
196 vs->vs_waiting_read = TRUE;
0b4e3aa0 197 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 198 VS_UNLOCK(vs);
9bccf70c 199 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
200 VS_LOCK(vs);
201 }
202}
203
204/*
205 * Finish a read. Pager is unlocked and returns unlocked.
206 */
0b4e3aa0 207__private_extern__ void
1c79356b
A
208vs_finish_read(
209 vstruct_t vs)
210{
211 VS_LOCK(vs);
0b4e3aa0 212 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
213 vs->vs_waiting_read = FALSE;
214 VS_UNLOCK(vs);
0b4e3aa0
A
215 thread_wakeup(&vs->vs_readers);
216 return;
217 }
218 VS_UNLOCK(vs);
1c79356b
A
219}
220
221/*
222 * Start a write - one more writer. Pager must be locked.
223 */
0b4e3aa0 224__private_extern__ void
1c79356b
A
225vs_start_write(
226 vstruct_t vs)
227{
228 vs->vs_writers++;
229}
230
231/*
232 * Wait for writers. Unlocks and relocks pager if wait needed.
233 */
0b4e3aa0 234__private_extern__ void
1c79356b
A
235vs_wait_for_writers(
236 vstruct_t vs)
237{
238 while (vs->vs_writers != 0) {
239 default_pager_wait_write++;
240 vs->vs_waiting_write = TRUE;
0b4e3aa0 241 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 242 VS_UNLOCK(vs);
9bccf70c 243 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
244 VS_LOCK(vs);
245 }
246 vs_async_wait(vs);
247}
248
249/* This is to be used for the transfer from segment code ONLY */
250/* The transfer code holds off vs destruction by keeping the */
251/* vs_async_wait count non-zero. It will not ocnflict with */
252/* other writers on an async basis because it only writes on */
253/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
254
255__private_extern__ void
1c79356b
A
256vs_wait_for_sync_writers(
257 vstruct_t vs)
258{
259 while (vs->vs_writers != 0) {
260 default_pager_wait_write++;
261 vs->vs_waiting_write = TRUE;
0b4e3aa0 262 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 263 VS_UNLOCK(vs);
9bccf70c 264 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
265 VS_LOCK(vs);
266 }
267}
268
269
270/*
271 * Finish a write. Pager is unlocked and returns unlocked.
272 */
0b4e3aa0 273__private_extern__ void
1c79356b
A
274vs_finish_write(
275 vstruct_t vs)
276{
277 VS_LOCK(vs);
0b4e3aa0 278 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
279 vs->vs_waiting_write = FALSE;
280 VS_UNLOCK(vs);
0b4e3aa0
A
281 thread_wakeup(&vs->vs_writers);
282 return;
1c79356b 283 }
0b4e3aa0 284 VS_UNLOCK(vs);
1c79356b 285}
1c79356b
A
286#endif /* PARALLEL */
287
1c79356b
A
288vstruct_t
289vs_object_create(
290 vm_size_t size)
291{
292 vstruct_t vs;
1c79356b
A
293
294 /*
295 * Allocate a vstruct. If there are any problems, then report them
296 * to the console.
297 */
298 vs = ps_vstruct_create(size);
299 if (vs == VSTRUCT_NULL) {
300 dprintf(("vs_object_create: unable to allocate %s\n",
301 "-- either run swapon command or reboot"));
302 return VSTRUCT_NULL;
303 }
304
305 return vs;
306}
307
0b4e3aa0 308#if 0
1c79356b
A
309void default_pager_add(vstruct_t, boolean_t); /* forward */
310
311void
312default_pager_add(
313 vstruct_t vs,
314 boolean_t internal)
315{
0b4e3aa0
A
316 memory_object_t mem_obj = vs->vs_mem_obj;
317 mach_port_t pset;
1c79356b 318 mach_port_mscount_t sync;
0b4e3aa0 319 mach_port_t previous;
1c79356b
A
320 kern_return_t kr;
321 static char here[] = "default_pager_add";
322
323 /*
324 * The port currently has a make-send count of zero,
325 * because either we just created the port or we just
326 * received the port in a memory_object_create request.
327 */
328
329 if (internal) {
330 /* possibly generate an immediate no-senders notification */
331 sync = 0;
332 pset = default_pager_internal_set;
333 } else {
334 /* delay notification till send right is created */
335 sync = 1;
336 pset = default_pager_external_set;
337 }
338
339 ipc_port_make_sonce(mem_obj);
340 ip_lock(mem_obj); /* unlocked in nsrequest below */
341 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
342}
343
0b4e3aa0 344#endif
1c79356b
A
345
346kern_return_t
347dp_memory_object_init(
0b4e3aa0
A
348 memory_object_t mem_obj,
349 memory_object_control_t control,
1c79356b
A
350 vm_size_t pager_page_size)
351{
1c79356b 352 vstruct_t vs;
1c79356b
A
353
354 assert(pager_page_size == vm_page_size);
355
0b4e3aa0
A
356 memory_object_control_reference(control);
357
1c79356b 358 vs_lookup(mem_obj, vs);
0b4e3aa0 359 vs_lock(vs);
1c79356b 360
0b4e3aa0 361 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
362 Panic("bad request");
363
0b4e3aa0 364 vs->vs_control = control;
1c79356b
A
365 vs_unlock(vs);
366
367 return KERN_SUCCESS;
368}
369
370kern_return_t
371dp_memory_object_synchronize(
0b4e3aa0
A
372 memory_object_t mem_obj,
373 memory_object_offset_t offset,
374 vm_size_t length,
1c79356b
A
375 vm_sync_t flags)
376{
1c79356b 377 vstruct_t vs;
1c79356b
A
378
379 vs_lookup(mem_obj, vs);
0b4e3aa0 380 vs_lock(vs);
1c79356b
A
381 vs_unlock(vs);
382
0b4e3aa0 383 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
384
385 return KERN_SUCCESS;
386}
387
0b4e3aa0
A
388kern_return_t
389dp_memory_object_unmap(
390 memory_object_t mem_obj)
391{
392 panic("dp_memory_object_unmap");
393
394 return KERN_FAILURE;
395}
396
1c79356b
A
397kern_return_t
398dp_memory_object_terminate(
0b4e3aa0 399 memory_object_t mem_obj)
1c79356b 400{
0b4e3aa0 401 memory_object_control_t control;
1c79356b 402 vstruct_t vs;
1c79356b 403 kern_return_t kr;
1c79356b
A
404
405 /*
406 * control port is a receive right, not a send right.
407 */
408
409 vs_lookup(mem_obj, vs);
0b4e3aa0 410 vs_lock(vs);
1c79356b
A
411
412 /*
413 * Wait for read and write requests to terminate.
414 */
415
416 vs_wait_for_readers(vs);
417 vs_wait_for_writers(vs);
418
419 /*
420 * After memory_object_terminate both memory_object_init
421 * and a no-senders notification are possible, so we need
0b4e3aa0
A
422 * to clean up our reference to the memory_object_control
423 * to prepare for a new init.
1c79356b
A
424 */
425
0b4e3aa0
A
426 control = vs->vs_control;
427 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
428
429 /* a bit of special case ugliness here. Wakeup any waiting reads */
430 /* these data requests had to be removed from the seqno traffic */
431 /* based on a performance bottleneck with large memory objects */
432 /* the problem will right itself with the new component based */
433 /* synchronous interface. The new async will be able to return */
434 /* failure during its sync phase. In the mean time ... */
435
0b4e3aa0
A
436 thread_wakeup(&vs->vs_writers);
437 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
438
439 vs_unlock(vs);
440
441 /*
0b4e3aa0 442 * Now we deallocate our reference on the control.
1c79356b 443 */
0b4e3aa0 444 memory_object_control_deallocate(control);
1c79356b
A
445 return KERN_SUCCESS;
446}
447
448void
0b4e3aa0
A
449dp_memory_object_reference(
450 memory_object_t mem_obj)
451{
452 vstruct_t vs;
453
454 vs_lookup_safe(mem_obj, vs);
455 if (vs == VSTRUCT_NULL)
456 return;
457
458 VS_LOCK(vs);
459 assert(vs->vs_references > 0);
460 vs->vs_references++;
461 VS_UNLOCK(vs);
462}
463
464extern ipc_port_t max_pages_trigger_port;
465extern int dp_pages_free;
466extern int maximum_pages_free;
467void
468dp_memory_object_deallocate(
469 memory_object_t mem_obj)
1c79356b
A
470{
471 vstruct_t vs;
0b4e3aa0
A
472 mach_port_seqno_t seqno;
473 ipc_port_t trigger;
1c79356b
A
474
475 /*
0b4e3aa0 476 * Because we don't give out multiple first references
1c79356b 477 * for a memory object, there can't be a race
0b4e3aa0
A
478 * between getting a deallocate call and creating
479 * a new reference for the object.
1c79356b
A
480 */
481
0b4e3aa0
A
482 vs_lookup_safe(mem_obj, vs);
483 if (vs == VSTRUCT_NULL)
484 return;
485
486 VS_LOCK(vs);
487 if (--vs->vs_references > 0) {
488 VS_UNLOCK(vs);
489 return;
490 }
491
492 seqno = vs->vs_next_seqno++;
493 while (vs->vs_seqno != seqno) {
494 default_pager_wait_seqno++;
495 vs->vs_waiting_seqno = TRUE;
496 assert_wait(&vs->vs_seqno, THREAD_UNINT);
497 VS_UNLOCK(vs);
9bccf70c 498 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
499 VS_LOCK(vs);
500 }
501
1c79356b
A
502 vs_async_wait(vs); /* wait for pending async IO */
503
504 /* do not delete the vs structure until the referencing pointers */
505 /* in the vstruct list have been expunged */
506
507 /* get VSL_LOCK out of order by using TRY mechanism */
508 while(!VSL_LOCK_TRY()) {
509 VS_UNLOCK(vs);
510 VSL_LOCK();
511 VSL_UNLOCK();
512 VS_LOCK(vs);
513 vs_async_wait(vs); /* wait for pending async IO */
514 }
0b4e3aa0
A
515
516
1c79356b 517 /*
0b4e3aa0 518 * We shouldn't get a deallocation call
1c79356b
A
519 * when the kernel has the object cached.
520 */
0b4e3aa0 521 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
522 Panic("bad request");
523
524 /*
525 * Unlock the pager (though there should be no one
526 * waiting for it).
527 */
528 VS_UNLOCK(vs);
529
0b4e3aa0
A
530 /* Lock out paging segment removal for the duration of this */
531 /* call. We are vulnerable to losing a paging segment we rely */
532 /* on as soon as we remove ourselves from the VSL and unlock */
533
534 /* Keep our thread from blocking on attempt to trigger backing */
535 /* store release */
536 backing_store_release_trigger_disable += 1;
537
1c79356b
A
538 /*
539 * Remove the memory object port association, and then
540 * the destroy the port itself. We must remove the object
541 * from the port list before deallocating the pager,
542 * because of default_pager_objects.
543 */
544 vstruct_list_delete(vs);
0b4e3aa0
A
545 VSL_UNLOCK();
546
1c79356b
A
547 ps_vstruct_dealloc(vs);
548
0b4e3aa0
A
549 VSL_LOCK();
550 backing_store_release_trigger_disable -= 1;
551 if(backing_store_release_trigger_disable == 0) {
9bccf70c 552 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
553 }
554 VSL_UNLOCK();
0b4e3aa0
A
555
556 PSL_LOCK();
557 if(max_pages_trigger_port
558 && (backing_store_release_trigger_disable == 0)
559 && (dp_pages_free > maximum_pages_free)) {
560 trigger = max_pages_trigger_port;
561 max_pages_trigger_port = NULL;
562 } else
563 trigger = IP_NULL;
564 PSL_UNLOCK();
565
566 if (trigger != IP_NULL) {
567 default_pager_space_alert(trigger, LO_WAT_ALERT);
568 ipc_port_release_send(trigger);
569 }
570
1c79356b
A
571}
572
573kern_return_t
574dp_memory_object_data_request(
0b4e3aa0
A
575 memory_object_t mem_obj,
576 memory_object_offset_t offset,
1c79356b
A
577 vm_size_t length,
578 vm_prot_t protection_required)
579{
1c79356b 580 vstruct_t vs;
1c79356b
A
581
582 GSTAT(global_stats.gs_pagein_calls++);
583
584
585 /* CDY at this moment vs_lookup panics when presented with the wrong */
586 /* port. As we are expanding this pager to support user interfaces */
587 /* this should be changed to return kern_failure */
588 vs_lookup(mem_obj, vs);
0b4e3aa0 589 vs_lock(vs);
1c79356b
A
590
591 /* We are going to relax the strict sequencing here for performance */
592 /* reasons. We can do this because we know that the read and */
593 /* write threads are different and we rely on synchronization */
594 /* of read and write requests at the cache memory_object level */
595 /* break out wait_for_writers, all of this goes away when */
596 /* we get real control of seqno with the new component interface */
0b4e3aa0 597
1c79356b
A
598 if (vs->vs_writers != 0) {
599 /* you can't hold on to the seqno and go */
600 /* to sleep like that */
601 vs_unlock(vs); /* bump internal count of seqno */
602 VS_LOCK(vs);
603 while (vs->vs_writers != 0) {
604 default_pager_wait_write++;
605 vs->vs_waiting_write = TRUE;
0b4e3aa0 606 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 607 VS_UNLOCK(vs);
9bccf70c 608 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
609 VS_LOCK(vs);
610 vs_async_wait(vs);
611 }
0b4e3aa0 612 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
613 VS_UNLOCK(vs);
614 return KERN_FAILURE;
615 }
616 vs_start_read(vs);
617 VS_UNLOCK(vs);
618 } else {
619 vs_start_read(vs);
620 vs_unlock(vs);
621 }
622
623 /*
624 * Request must be on a page boundary and a multiple of pages.
625 */
626 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
627 Panic("bad alignment");
628
629 pvs_cluster_read(vs, (vm_offset_t)offset, length);
630
631 vs_finish_read(vs);
632
633 return KERN_SUCCESS;
634}
635
636/*
637 * memory_object_data_initialize: check whether we already have each page, and
638 * write it if we do not. The implementation is far from optimized, and
639 * also assumes that the default_pager is single-threaded.
640 */
641/* It is questionable whether or not a pager should decide what is relevant */
642/* and what is not in data sent from the kernel. Data initialize has been */
643/* changed to copy back all data sent to it in preparation for its eventual */
644/* merge with data return. It is the kernel that should decide what pages */
645/* to write back. As of the writing of this note, this is indeed the case */
646/* the kernel writes back one page at a time through this interface */
647
648kern_return_t
649dp_memory_object_data_initialize(
0b4e3aa0
A
650 memory_object_t mem_obj,
651 memory_object_offset_t offset,
652 vm_size_t size)
1c79356b 653{
1c79356b 654 vstruct_t vs;
1c79356b
A
655
656 DEBUG(DEBUG_MO_EXTERNAL,
657 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
0b4e3aa0 658 (int)mem_obj, (int)offset, (int)size));
de355530 659 GSTAT(global_stats.gs_pages_init += atop(size));
1c79356b
A
660
661 vs_lookup(mem_obj, vs);
0b4e3aa0 662 vs_lock(vs);
1c79356b
A
663 vs_start_write(vs);
664 vs_unlock(vs);
665
666 /*
667 * Write the data via clustered writes. vs_cluster_write will
668 * loop if the address range specified crosses cluster
669 * boundaries.
670 */
0b4e3aa0 671 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
672
673 vs_finish_write(vs);
674
675 return KERN_SUCCESS;
676}
677
1c79356b
A
678kern_return_t
679dp_memory_object_data_unlock(
680 memory_object_t mem_obj,
0b4e3aa0
A
681 memory_object_offset_t offset,
682 vm_size_t size,
1c79356b
A
683 vm_prot_t desired_access)
684{
0b4e3aa0 685 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
686 return KERN_FAILURE;
687}
688
689
1c79356b
A
690kern_return_t
691dp_memory_object_data_return(
0b4e3aa0
A
692 memory_object_t mem_obj,
693 memory_object_offset_t offset,
694 vm_size_t size,
1c79356b
A
695 boolean_t dirty,
696 boolean_t kernel_copy)
697{
1c79356b 698 vstruct_t vs;
1c79356b
A
699
700 DEBUG(DEBUG_MO_EXTERNAL,
0b4e3aa0
A
701 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
702 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
703 GSTAT(global_stats.gs_pageout_calls++);
704
705 /* This routine is called by the pageout thread. The pageout thread */
706 /* cannot be blocked by read activities unless the read activities */
707 /* Therefore the grant of vs lock must be done on a try versus a */
708 /* blocking basis. The code below relies on the fact that the */
709 /* interface is synchronous. Should this interface be again async */
710 /* for some type of pager in the future the pages will have to be */
711 /* returned through a separate, asynchronous path. */
712
713 vs_lookup(mem_obj, vs);
714
715 default_pager_total++;
716 if(!VS_TRY_LOCK(vs)) {
717 /* the call below will not be done by caller when we have */
718 /* a synchronous interface */
719 /* return KERN_LOCK_OWNED; */
720 upl_t upl;
0b4e3aa0
A
721 int page_list_count = 0;
722 memory_object_super_upl_request(vs->vs_control,
723 (memory_object_offset_t)offset,
724 size, size,
725 &upl, NULL, &page_list_count,
726 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 727 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
728 upl_abort(upl,0);
729 upl_deallocate(upl);
1c79356b
A
730 return KERN_SUCCESS;
731 }
732
de355530 733 if ((vs->vs_seqno != vs->vs_next_seqno++) || (vs->vs_xfer_pending)) {
1c79356b 734 upl_t upl;
0b4e3aa0
A
735 int page_list_count = 0;
736
1c79356b
A
737 vs->vs_next_seqno--;
738 VS_UNLOCK(vs);
0b4e3aa0 739
1c79356b
A
740 /* the call below will not be done by caller when we have */
741 /* a synchronous interface */
742 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
743 memory_object_super_upl_request(vs->vs_control,
744 (memory_object_offset_t)offset,
745 size, size,
746 &upl, NULL, &page_list_count,
1c79356b
A
747 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
748 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
749 upl_abort(upl,0);
750 upl_deallocate(upl);
1c79356b
A
751 return KERN_SUCCESS;
752 }
753
0b4e3aa0 754 if ((size % vm_page_size) != 0)
1c79356b
A
755 Panic("bad alignment");
756
757 vs_start_write(vs);
758
759
760 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 761 vs_unlock(vs);
1c79356b
A
762
763 /*
764 * Write the data via clustered writes. vs_cluster_write will
765 * loop if the address range specified crosses cluster
766 * boundaries.
767 */
0b4e3aa0 768 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
769
770 vs_finish_write(vs);
771
772 /* temporary, need a finer lock based on cluster */
773
774 VS_LOCK(vs);
775 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
776 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
777 vs->vs_waiting_async = FALSE;
1c79356b 778 VS_UNLOCK(vs);
0b4e3aa0 779 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
780 } else {
781 VS_UNLOCK(vs);
782 }
783
784
785 return KERN_SUCCESS;
786}
787
0b4e3aa0
A
788/*
789 * Routine: default_pager_memory_object_create
790 * Purpose:
791 * Handle requests for memory objects from the
792 * kernel.
793 * Notes:
794 * Because we only give out the default memory
795 * manager port to the kernel, we don't have to
796 * be so paranoid about the contents.
797 */
1c79356b 798kern_return_t
0b4e3aa0
A
799default_pager_memory_object_create(
800 memory_object_default_t dmm,
801 vm_size_t new_size,
802 memory_object_t *new_mem_obj)
1c79356b 803{
0b4e3aa0 804 vstruct_t vs;
1c79356b 805
0b4e3aa0
A
806 assert(dmm == default_pager_object);
807
808 vs = vs_object_create(new_size);
809 if (vs == VSTRUCT_NULL)
810 return KERN_RESOURCE_SHORTAGE;
811
812 vs->vs_next_seqno = 0;
813
814 /*
815 * Set up associations between this memory object
816 * and this default_pager structure
817 */
818
819 vs->vs_mem_obj = ISVS;
820 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
821
822 /*
823 * After this, other threads might receive requests
824 * for this memory object or find it in the port list.
825 */
826
827 vstruct_list_insert(vs);
828 *new_mem_obj = vs_to_mem_obj(vs);
829 return KERN_SUCCESS;
1c79356b
A
830}
831
832/*
833 * Create an external object.
834 */
835kern_return_t
836default_pager_object_create(
0b4e3aa0
A
837 default_pager_t pager,
838 vm_size_t size,
839 memory_object_t *mem_objp)
1c79356b
A
840{
841 vstruct_t vs;
1c79356b
A
842 kern_return_t result;
843 struct vstruct_alias *alias_struct;
1c79356b
A
844
845
0b4e3aa0 846 if (pager != default_pager_object)
1c79356b
A
847 return KERN_INVALID_ARGUMENT;
848
849 vs = vs_object_create(size);
0b4e3aa0
A
850 if (vs == VSTRUCT_NULL)
851 return KERN_RESOURCE_SHORTAGE;
1c79356b 852
1c79356b 853 /*
0b4e3aa0 854 * Set up associations between the default pager
1c79356b
A
855 * and this vstruct structure
856 */
0b4e3aa0 857 vs->vs_mem_obj = ISVS;
1c79356b 858 vstruct_list_insert(vs);
0b4e3aa0 859 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
860 return KERN_SUCCESS;
861}
862
863kern_return_t
864default_pager_objects(
0b4e3aa0 865 default_pager_t pager,
1c79356b
A
866 default_pager_object_array_t *objectsp,
867 mach_msg_type_number_t *ocountp,
0b4e3aa0 868 memory_object_array_t *pagersp,
1c79356b
A
869 mach_msg_type_number_t *pcountp)
870{
871 vm_offset_t oaddr = 0; /* memory for objects */
872 vm_size_t osize = 0; /* current size */
873 default_pager_object_t * objects;
874 unsigned int opotential;
875
0b4e3aa0 876 vm_offset_t paddr = 0; /* memory for pagers */
1c79356b 877 vm_size_t psize = 0; /* current size */
0b4e3aa0 878 memory_object_t * pagers;
1c79356b
A
879 unsigned int ppotential;
880
881 unsigned int actual;
882 unsigned int num_objects;
883 kern_return_t kr;
884 vstruct_t entry;
1c79356b
A
885/*
886 if (pager != default_pager_default_port)
887 return KERN_INVALID_ARGUMENT;
888*/
889
890 /* start with the inline memory */
891
892 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
893 (vm_map_copy_t) *objectsp);
894
895 if (kr != KERN_SUCCESS)
896 return kr;
897
de355530 898 osize = round_page(*ocountp * sizeof * objects);
1c79356b 899 kr = vm_map_wire(ipc_kernel_map,
de355530
A
900 trunc_page((vm_offset_t)objects),
901 round_page(((vm_offset_t)objects) + osize),
1c79356b
A
902 VM_PROT_READ|VM_PROT_WRITE, FALSE);
903 osize=0;
904
905 *objectsp = objects;
906 /* we start with the inline space */
907
908
909 num_objects = 0;
910 opotential = *ocountp;
911
0b4e3aa0 912 pagers = (memory_object_t *) *pagersp;
1c79356b
A
913 ppotential = *pcountp;
914
915 VSL_LOCK();
916
917 /*
918 * We will send no more than this many
919 */
920 actual = vstruct_list.vsl_count;
921 VSL_UNLOCK();
922
923 if (opotential < actual) {
924 vm_offset_t newaddr;
925 vm_size_t newsize;
926
de355530 927 newsize = 2 * round_page(actual * sizeof * objects);
1c79356b
A
928
929 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
930 if (kr != KERN_SUCCESS)
931 goto nomemory;
932
933 oaddr = newaddr;
934 osize = newsize;
935 opotential = osize / sizeof * objects;
936 objects = (default_pager_object_t *)oaddr;
937 }
938
939 if (ppotential < actual) {
940 vm_offset_t newaddr;
941 vm_size_t newsize;
942
de355530 943 newsize = 2 * round_page(actual * sizeof * pagers);
1c79356b
A
944
945 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
946 if (kr != KERN_SUCCESS)
947 goto nomemory;
948
949 paddr = newaddr;
950 psize = newsize;
0b4e3aa0
A
951 ppotential = psize / sizeof * pagers;
952 pagers = (memory_object_t *)paddr;
1c79356b
A
953 }
954
955 /*
956 * Now scan the list.
957 */
958
959 VSL_LOCK();
960
961 num_objects = 0;
962 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
963
0b4e3aa0 964 memory_object_t pager;
1c79356b
A
965 vm_size_t size;
966
967 if ((num_objects >= opotential) ||
968 (num_objects >= ppotential)) {
969
970 /*
971 * This should be rare. In any case,
972 * we will only miss recent objects,
973 * because they are added at the end.
974 */
975 break;
976 }
977
978 /*
979 * Avoid interfering with normal operations
980 */
981 if (!VS_MAP_TRY_LOCK(entry))
982 goto not_this_one;
983 size = ps_vstruct_allocated_size(entry);
984 VS_MAP_UNLOCK(entry);
985
986 VS_LOCK(entry);
987
1c79356b 988 /*
0b4e3aa0
A
989 * We need a reference for our caller. Adding this
990 * reference through the linked list could race with
991 * destruction of the object. If we find the object
992 * has no references, just give up on it.
1c79356b 993 */
0b4e3aa0
A
994 VS_LOCK(entry);
995 if (entry->vs_references == 0) {
1c79356b 996 VS_UNLOCK(entry);
0b4e3aa0 997 goto not_this_one;
1c79356b 998 }
0b4e3aa0 999 dp_memory_object_reference(vs_to_mem_obj(entry));
1c79356b
A
1000 VS_UNLOCK(entry);
1001
1002 /* the arrays are wired, so no deadlock worries */
1003
1004 objects[num_objects].dpo_object = (vm_offset_t) entry;
1005 objects[num_objects].dpo_size = size;
0b4e3aa0 1006 pagers [num_objects++] = pager;
1c79356b
A
1007 continue;
1008
1009 not_this_one:
1010 /*
1011 * Do not return garbage
1012 */
1013 objects[num_objects].dpo_object = (vm_offset_t) 0;
1014 objects[num_objects].dpo_size = 0;
0b4e3aa0 1015 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
1016
1017 }
1018
1019 VSL_UNLOCK();
1020
1021 /*
1022 * Deallocate and clear unused memory.
1023 * (Returned memory will automagically become pageable.)
1024 */
1025
1026 if (objects == *objectsp) {
1027
1028 /*
1029 * Our returned information fit inline.
1030 * Nothing to deallocate.
1031 */
1032 *ocountp = num_objects;
1033 } else if (actual == 0) {
1034 (void) vm_deallocate(kernel_map, oaddr, osize);
1035
1036 /* return zero items inline */
1037 *ocountp = 0;
1038 } else {
1039 vm_offset_t used;
1040
de355530 1041 used = round_page(actual * sizeof * objects);
1c79356b
A
1042
1043 if (used != osize)
1044 (void) vm_deallocate(kernel_map,
1045 oaddr + used, osize - used);
1046
1047 *objectsp = objects;
1048 *ocountp = num_objects;
1049 }
1050
0b4e3aa0 1051 if (pagers == (memory_object_t *)*pagersp) {
1c79356b
A
1052
1053 /*
1054 * Our returned information fit inline.
1055 * Nothing to deallocate.
1056 */
1057
1058 *pcountp = num_objects;
1059 } else if (actual == 0) {
1060 (void) vm_deallocate(kernel_map, paddr, psize);
1061
1062 /* return zero items inline */
1063 *pcountp = 0;
1064 } else {
1065 vm_offset_t used;
1066
de355530 1067 used = round_page(actual * sizeof * pagers);
1c79356b
A
1068
1069 if (used != psize)
1070 (void) vm_deallocate(kernel_map,
1071 paddr + used, psize - used);
1072
0b4e3aa0 1073 *pagersp = (memory_object_array_t)pagers;
1c79356b
A
1074 *pcountp = num_objects;
1075 }
1076 (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1077 *ocountp + (vm_offset_t)objects, FALSE);
1078 (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1079 *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1080
1081 return KERN_SUCCESS;
1082
1083 nomemory:
1084 {
1085 register int i;
1086 for (i = 0; i < num_objects; i++)
0b4e3aa0
A
1087 if (pagers[i] != MEMORY_OBJECT_NULL)
1088 memory_object_deallocate(pagers[i]);
1c79356b
A
1089 }
1090
1091 if (objects != *objectsp)
1092 (void) vm_deallocate(kernel_map, oaddr, osize);
1093
0b4e3aa0 1094 if (pagers != (memory_object_t *)*pagersp)
1c79356b
A
1095 (void) vm_deallocate(kernel_map, paddr, psize);
1096
1097 return KERN_RESOURCE_SHORTAGE;
1098}
1099
1100kern_return_t
1101default_pager_object_pages(
0b4e3aa0
A
1102 default_pager_t pager,
1103 memory_object_t object,
1c79356b
A
1104 default_pager_page_array_t *pagesp,
1105 mach_msg_type_number_t *countp)
1106{
1107 vm_offset_t addr; /* memory for page offsets */
1108 vm_size_t size = 0; /* current memory size */
1109 default_pager_page_t * pages;
1110 unsigned int potential, actual;
1111 kern_return_t kr;
1112
0b4e3aa0
A
1113
1114 if (pager != default_pager_object)
1c79356b 1115 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1116
1c79356b
A
1117 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1118 (vm_map_copy_t) *pagesp);
1119
1120 if (kr != KERN_SUCCESS)
1121 return kr;
1122
de355530 1123 size = round_page(*countp * sizeof * pages);
1c79356b 1124 kr = vm_map_wire(ipc_kernel_map,
de355530
A
1125 trunc_page((vm_offset_t)pages),
1126 round_page(((vm_offset_t)pages) + size),
1c79356b
A
1127 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1128 size=0;
1129
1130 *pagesp = pages;
1131 /* we start with the inline space */
1132
1133 addr = (vm_offset_t)pages;
1134 potential = *countp;
1135
1136 for (;;) {
1137 vstruct_t entry;
1138
1139 VSL_LOCK();
1140 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1141 vs_links) {
1142 VS_LOCK(entry);
0b4e3aa0 1143 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1144 VSL_UNLOCK();
1145 goto found_object;
1146 }
1147 VS_UNLOCK(entry);
1148 }
1149 VSL_UNLOCK();
1150
1151 /* did not find the object */
1152
1153 if (pages != *pagesp)
1154 (void) vm_deallocate(kernel_map, addr, size);
1155 return KERN_INVALID_ARGUMENT;
1156
1157 found_object:
1158
1159 if (!VS_MAP_TRY_LOCK(entry)) {
1160 /* oh well bad luck */
9bccf70c 1161 int wresult;
1c79356b
A
1162
1163 VS_UNLOCK(entry);
1164
9bccf70c
A
1165 assert_wait_timeout( 1, THREAD_UNINT );
1166 wresult = thread_block(THREAD_CONTINUE_NULL);
1167 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1168 continue;
1169 }
1170
1171 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1172 VS_MAP_UNLOCK(entry);
1173 VS_UNLOCK(entry);
1174
1175 if (actual <= potential)
1176 break;
1177
1178 /* allocate more memory */
1179
1180 if (pages != *pagesp)
1181 (void) vm_deallocate(kernel_map, addr, size);
de355530 1182 size = round_page(actual * sizeof * pages);
1c79356b
A
1183 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1184 if (kr != KERN_SUCCESS)
1185 return kr;
1186 pages = (default_pager_page_t *)addr;
1187 potential = size / sizeof * pages;
1188 }
1189
1190 /*
1191 * Deallocate and clear unused memory.
1192 * (Returned memory will automagically become pageable.)
1193 */
1194
1195 if (pages == *pagesp) {
1196
1197 /*
1198 * Our returned information fit inline.
1199 * Nothing to deallocate.
1200 */
1201
1202 *countp = actual;
1203 } else if (actual == 0) {
1204 (void) vm_deallocate(kernel_map, addr, size);
1205
1206 /* return zero items inline */
1207 *countp = 0;
1208 } else {
1209 vm_offset_t used;
1210
de355530 1211 used = round_page(actual * sizeof * pages);
1c79356b
A
1212
1213 if (used != size)
1214 (void) vm_deallocate(kernel_map,
1215 addr + used, size - used);
1216
1217 *pagesp = pages;
1218 *countp = actual;
1219 }
1220 (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1221 *countp + (vm_offset_t)pages, FALSE);
1222 (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1223 *countp, TRUE, (vm_map_copy_t *)pagesp);
1224 return KERN_SUCCESS;
1225}