]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-344.32.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
de355530
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
de355530
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
de355530
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51/*
52 * Default Pager.
53 * Memory Object Management.
54 */
55
56#include "default_pager_internal.h"
0b4e3aa0 57#include <mach/memory_object_types.h>
1c79356b 58#include <mach/memory_object_server.h>
0b4e3aa0
A
59#include <vm/memory_object.h>
60#include <vm/vm_pageout.h>
1c79356b
A
61
62
63/*
64 * List of all vstructs. A specific vstruct is
65 * found directly via its port, this list is
66 * only used for monitoring purposes by the
67 * default_pager_object* calls and by ps_delete
68 * when abstract memory objects must be scanned
69 * to remove any live storage on a segment which
70 * is to be removed.
71 */
72struct vstruct_list_head vstruct_list;
73
0b4e3aa0 74__private_extern__ void
1c79356b
A
75vstruct_list_insert(
76 vstruct_t vs)
77{
78 VSL_LOCK();
79 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
80 vstruct_list.vsl_count++;
81 VSL_UNLOCK();
82}
83
1c79356b 84
0b4e3aa0 85__private_extern__ void
1c79356b
A
86vstruct_list_delete(
87 vstruct_t vs)
88{
89 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
90 vstruct_list.vsl_count--;
91}
92
93/*
94 * We use the sequence numbers on requests to regulate
95 * our parallelism. In general, we allow multiple reads and writes
96 * to proceed in parallel, with the exception that reads must
97 * wait for previous writes to finish. (Because the kernel might
98 * generate a data-request for a page on the heels of a data-write
99 * for the same page, and we must avoid returning stale data.)
100 * terminate requests wait for proceeding reads and writes to finish.
101 */
102
0b4e3aa0
A
103static unsigned int default_pager_total = 0; /* debugging */
104static unsigned int default_pager_wait_seqno = 0; /* debugging */
105static unsigned int default_pager_wait_read = 0; /* debugging */
106static unsigned int default_pager_wait_write = 0; /* debugging */
107static unsigned int default_pager_wait_refs = 0; /* debugging */
1c79356b 108
0b4e3aa0 109__private_extern__ void
1c79356b
A
110vs_async_wait(
111 vstruct_t vs)
112{
1c79356b
A
113
114 ASSERT(vs->vs_async_pending >= 0);
115 while (vs->vs_async_pending > 0) {
116 vs->vs_waiting_async = TRUE;
0b4e3aa0 117 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 118 VS_UNLOCK(vs);
9bccf70c 119 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
120 VS_LOCK(vs);
121 }
122 ASSERT(vs->vs_async_pending == 0);
123}
124
1c79356b 125
0b4e3aa0 126#if PARALLEL
1c79356b
A
127/*
128 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
129 *
130 * JMM - Sequence numbers guarantee ordering of requests generated
131 * by a single thread if the receiver is multithreaded and
132 * the interfaces are asynchronous (i.e. sender can generate
133 * more than one request before the first is received in the
134 * pager). Normally, IPC would generate these number in that
135 * case. But we are trying to avoid using IPC for the in-kernel
136 * scenario. Since these are actually invoked synchronously
137 * anyway (in-kernel), we can just fake the sequence number
138 * generation here (thus avoiding the dependence on IPC).
1c79356b 139 */
0b4e3aa0 140__private_extern__ void
1c79356b 141vs_lock(
0b4e3aa0 142 vstruct_t vs)
1c79356b 143{
0b4e3aa0
A
144 mach_port_seqno_t seqno;
145
1c79356b
A
146 default_pager_total++;
147 VS_LOCK(vs);
148
149 seqno = vs->vs_next_seqno++;
150
151 while (vs->vs_seqno != seqno) {
152 default_pager_wait_seqno++;
153 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 154 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 155 VS_UNLOCK(vs);
9bccf70c 156 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
157 VS_LOCK(vs);
158 }
159}
160
161/*
162 * Increments sequence number and unlocks pager.
163 */
0b4e3aa0 164__private_extern__ void
1c79356b
A
165vs_unlock(vstruct_t vs)
166{
1c79356b 167 vs->vs_seqno++;
0b4e3aa0
A
168 if (vs->vs_waiting_seqno) {
169 vs->vs_waiting_seqno = FALSE;
170 VS_UNLOCK(vs);
171 thread_wakeup(&vs->vs_seqno);
172 return;
173 }
1c79356b 174 VS_UNLOCK(vs);
1c79356b
A
175}
176
177/*
178 * Start a read - one more reader. Pager must be locked.
179 */
0b4e3aa0 180__private_extern__ void
1c79356b
A
181vs_start_read(
182 vstruct_t vs)
183{
184 vs->vs_readers++;
185}
186
187/*
188 * Wait for readers. Unlocks and relocks pager if wait needed.
189 */
0b4e3aa0 190__private_extern__ void
1c79356b
A
191vs_wait_for_readers(
192 vstruct_t vs)
193{
194 while (vs->vs_readers != 0) {
195 default_pager_wait_read++;
196 vs->vs_waiting_read = TRUE;
0b4e3aa0 197 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 198 VS_UNLOCK(vs);
9bccf70c 199 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
200 VS_LOCK(vs);
201 }
202}
203
204/*
205 * Finish a read. Pager is unlocked and returns unlocked.
206 */
0b4e3aa0 207__private_extern__ void
1c79356b
A
208vs_finish_read(
209 vstruct_t vs)
210{
211 VS_LOCK(vs);
0b4e3aa0 212 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
213 vs->vs_waiting_read = FALSE;
214 VS_UNLOCK(vs);
0b4e3aa0
A
215 thread_wakeup(&vs->vs_readers);
216 return;
217 }
218 VS_UNLOCK(vs);
1c79356b
A
219}
220
221/*
222 * Start a write - one more writer. Pager must be locked.
223 */
0b4e3aa0 224__private_extern__ void
1c79356b
A
225vs_start_write(
226 vstruct_t vs)
227{
228 vs->vs_writers++;
229}
230
231/*
232 * Wait for writers. Unlocks and relocks pager if wait needed.
233 */
0b4e3aa0 234__private_extern__ void
1c79356b
A
235vs_wait_for_writers(
236 vstruct_t vs)
237{
238 while (vs->vs_writers != 0) {
239 default_pager_wait_write++;
240 vs->vs_waiting_write = TRUE;
0b4e3aa0 241 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 242 VS_UNLOCK(vs);
9bccf70c 243 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
244 VS_LOCK(vs);
245 }
246 vs_async_wait(vs);
247}
248
249/* This is to be used for the transfer from segment code ONLY */
250/* The transfer code holds off vs destruction by keeping the */
251/* vs_async_wait count non-zero. It will not ocnflict with */
252/* other writers on an async basis because it only writes on */
253/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
254
255__private_extern__ void
1c79356b
A
256vs_wait_for_sync_writers(
257 vstruct_t vs)
258{
259 while (vs->vs_writers != 0) {
260 default_pager_wait_write++;
261 vs->vs_waiting_write = TRUE;
0b4e3aa0 262 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 263 VS_UNLOCK(vs);
9bccf70c 264 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
265 VS_LOCK(vs);
266 }
267}
268
269
270/*
271 * Finish a write. Pager is unlocked and returns unlocked.
272 */
0b4e3aa0 273__private_extern__ void
1c79356b
A
274vs_finish_write(
275 vstruct_t vs)
276{
277 VS_LOCK(vs);
0b4e3aa0 278 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
279 vs->vs_waiting_write = FALSE;
280 VS_UNLOCK(vs);
0b4e3aa0
A
281 thread_wakeup(&vs->vs_writers);
282 return;
1c79356b 283 }
0b4e3aa0 284 VS_UNLOCK(vs);
1c79356b 285}
1c79356b
A
286#endif /* PARALLEL */
287
1c79356b
A
288vstruct_t
289vs_object_create(
290 vm_size_t size)
291{
292 vstruct_t vs;
1c79356b
A
293
294 /*
295 * Allocate a vstruct. If there are any problems, then report them
296 * to the console.
297 */
298 vs = ps_vstruct_create(size);
299 if (vs == VSTRUCT_NULL) {
300 dprintf(("vs_object_create: unable to allocate %s\n",
301 "-- either run swapon command or reboot"));
302 return VSTRUCT_NULL;
303 }
304
305 return vs;
306}
307
0b4e3aa0 308#if 0
1c79356b
A
309void default_pager_add(vstruct_t, boolean_t); /* forward */
310
311void
312default_pager_add(
313 vstruct_t vs,
314 boolean_t internal)
315{
0b4e3aa0
A
316 memory_object_t mem_obj = vs->vs_mem_obj;
317 mach_port_t pset;
1c79356b 318 mach_port_mscount_t sync;
0b4e3aa0 319 mach_port_t previous;
1c79356b
A
320 kern_return_t kr;
321 static char here[] = "default_pager_add";
322
323 /*
324 * The port currently has a make-send count of zero,
325 * because either we just created the port or we just
326 * received the port in a memory_object_create request.
327 */
328
329 if (internal) {
330 /* possibly generate an immediate no-senders notification */
331 sync = 0;
332 pset = default_pager_internal_set;
333 } else {
334 /* delay notification till send right is created */
335 sync = 1;
336 pset = default_pager_external_set;
337 }
338
339 ipc_port_make_sonce(mem_obj);
340 ip_lock(mem_obj); /* unlocked in nsrequest below */
341 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
342}
343
0b4e3aa0 344#endif
1c79356b
A
345
346kern_return_t
347dp_memory_object_init(
0b4e3aa0
A
348 memory_object_t mem_obj,
349 memory_object_control_t control,
1c79356b
A
350 vm_size_t pager_page_size)
351{
1c79356b 352 vstruct_t vs;
1c79356b
A
353
354 assert(pager_page_size == vm_page_size);
355
0b4e3aa0
A
356 memory_object_control_reference(control);
357
1c79356b 358 vs_lookup(mem_obj, vs);
0b4e3aa0 359 vs_lock(vs);
1c79356b 360
0b4e3aa0 361 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
362 Panic("bad request");
363
0b4e3aa0 364 vs->vs_control = control;
1c79356b
A
365 vs_unlock(vs);
366
367 return KERN_SUCCESS;
368}
369
370kern_return_t
371dp_memory_object_synchronize(
0b4e3aa0
A
372 memory_object_t mem_obj,
373 memory_object_offset_t offset,
374 vm_size_t length,
1c79356b
A
375 vm_sync_t flags)
376{
1c79356b 377 vstruct_t vs;
1c79356b
A
378
379 vs_lookup(mem_obj, vs);
0b4e3aa0 380 vs_lock(vs);
1c79356b
A
381 vs_unlock(vs);
382
0b4e3aa0 383 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
384
385 return KERN_SUCCESS;
386}
387
0b4e3aa0
A
388kern_return_t
389dp_memory_object_unmap(
390 memory_object_t mem_obj)
391{
392 panic("dp_memory_object_unmap");
393
394 return KERN_FAILURE;
395}
396
1c79356b
A
397kern_return_t
398dp_memory_object_terminate(
0b4e3aa0 399 memory_object_t mem_obj)
1c79356b 400{
0b4e3aa0 401 memory_object_control_t control;
1c79356b 402 vstruct_t vs;
1c79356b 403 kern_return_t kr;
1c79356b
A
404
405 /*
406 * control port is a receive right, not a send right.
407 */
408
409 vs_lookup(mem_obj, vs);
0b4e3aa0 410 vs_lock(vs);
1c79356b
A
411
412 /*
413 * Wait for read and write requests to terminate.
414 */
415
416 vs_wait_for_readers(vs);
417 vs_wait_for_writers(vs);
418
419 /*
420 * After memory_object_terminate both memory_object_init
421 * and a no-senders notification are possible, so we need
0b4e3aa0
A
422 * to clean up our reference to the memory_object_control
423 * to prepare for a new init.
1c79356b
A
424 */
425
0b4e3aa0
A
426 control = vs->vs_control;
427 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
428
429 /* a bit of special case ugliness here. Wakeup any waiting reads */
430 /* these data requests had to be removed from the seqno traffic */
431 /* based on a performance bottleneck with large memory objects */
432 /* the problem will right itself with the new component based */
433 /* synchronous interface. The new async will be able to return */
434 /* failure during its sync phase. In the mean time ... */
435
0b4e3aa0
A
436 thread_wakeup(&vs->vs_writers);
437 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
438
439 vs_unlock(vs);
440
441 /*
0b4e3aa0 442 * Now we deallocate our reference on the control.
1c79356b 443 */
0b4e3aa0 444 memory_object_control_deallocate(control);
1c79356b
A
445 return KERN_SUCCESS;
446}
447
448void
0b4e3aa0
A
449dp_memory_object_reference(
450 memory_object_t mem_obj)
451{
452 vstruct_t vs;
453
454 vs_lookup_safe(mem_obj, vs);
455 if (vs == VSTRUCT_NULL)
456 return;
457
458 VS_LOCK(vs);
459 assert(vs->vs_references > 0);
460 vs->vs_references++;
461 VS_UNLOCK(vs);
462}
463
464extern ipc_port_t max_pages_trigger_port;
465extern int dp_pages_free;
466extern int maximum_pages_free;
467void
468dp_memory_object_deallocate(
469 memory_object_t mem_obj)
1c79356b
A
470{
471 vstruct_t vs;
0b4e3aa0
A
472 mach_port_seqno_t seqno;
473 ipc_port_t trigger;
1c79356b
A
474
475 /*
0b4e3aa0 476 * Because we don't give out multiple first references
1c79356b 477 * for a memory object, there can't be a race
0b4e3aa0
A
478 * between getting a deallocate call and creating
479 * a new reference for the object.
1c79356b
A
480 */
481
0b4e3aa0
A
482 vs_lookup_safe(mem_obj, vs);
483 if (vs == VSTRUCT_NULL)
484 return;
485
486 VS_LOCK(vs);
487 if (--vs->vs_references > 0) {
488 VS_UNLOCK(vs);
489 return;
490 }
491
492 seqno = vs->vs_next_seqno++;
493 while (vs->vs_seqno != seqno) {
494 default_pager_wait_seqno++;
495 vs->vs_waiting_seqno = TRUE;
496 assert_wait(&vs->vs_seqno, THREAD_UNINT);
497 VS_UNLOCK(vs);
9bccf70c 498 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
499 VS_LOCK(vs);
500 }
501
1c79356b
A
502 vs_async_wait(vs); /* wait for pending async IO */
503
504 /* do not delete the vs structure until the referencing pointers */
505 /* in the vstruct list have been expunged */
506
507 /* get VSL_LOCK out of order by using TRY mechanism */
508 while(!VSL_LOCK_TRY()) {
509 VS_UNLOCK(vs);
510 VSL_LOCK();
511 VSL_UNLOCK();
512 VS_LOCK(vs);
513 vs_async_wait(vs); /* wait for pending async IO */
514 }
0b4e3aa0
A
515
516
1c79356b 517 /*
0b4e3aa0 518 * We shouldn't get a deallocation call
1c79356b
A
519 * when the kernel has the object cached.
520 */
0b4e3aa0 521 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
522 Panic("bad request");
523
524 /*
525 * Unlock the pager (though there should be no one
526 * waiting for it).
527 */
528 VS_UNLOCK(vs);
529
0b4e3aa0
A
530 /* Lock out paging segment removal for the duration of this */
531 /* call. We are vulnerable to losing a paging segment we rely */
532 /* on as soon as we remove ourselves from the VSL and unlock */
533
534 /* Keep our thread from blocking on attempt to trigger backing */
535 /* store release */
536 backing_store_release_trigger_disable += 1;
537
1c79356b
A
538 /*
539 * Remove the memory object port association, and then
540 * the destroy the port itself. We must remove the object
541 * from the port list before deallocating the pager,
542 * because of default_pager_objects.
543 */
544 vstruct_list_delete(vs);
0b4e3aa0
A
545 VSL_UNLOCK();
546
1c79356b
A
547 ps_vstruct_dealloc(vs);
548
0b4e3aa0
A
549 VSL_LOCK();
550 backing_store_release_trigger_disable -= 1;
551 if(backing_store_release_trigger_disable == 0) {
9bccf70c 552 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
553 }
554 VSL_UNLOCK();
0b4e3aa0
A
555
556 PSL_LOCK();
557 if(max_pages_trigger_port
558 && (backing_store_release_trigger_disable == 0)
559 && (dp_pages_free > maximum_pages_free)) {
560 trigger = max_pages_trigger_port;
561 max_pages_trigger_port = NULL;
562 } else
563 trigger = IP_NULL;
564 PSL_UNLOCK();
565
566 if (trigger != IP_NULL) {
567 default_pager_space_alert(trigger, LO_WAT_ALERT);
568 ipc_port_release_send(trigger);
569 }
570
1c79356b
A
571}
572
573kern_return_t
574dp_memory_object_data_request(
0b4e3aa0
A
575 memory_object_t mem_obj,
576 memory_object_offset_t offset,
1c79356b
A
577 vm_size_t length,
578 vm_prot_t protection_required)
579{
1c79356b 580 vstruct_t vs;
1c79356b
A
581
582 GSTAT(global_stats.gs_pagein_calls++);
583
584
585 /* CDY at this moment vs_lookup panics when presented with the wrong */
586 /* port. As we are expanding this pager to support user interfaces */
587 /* this should be changed to return kern_failure */
588 vs_lookup(mem_obj, vs);
0b4e3aa0 589 vs_lock(vs);
1c79356b
A
590
591 /* We are going to relax the strict sequencing here for performance */
592 /* reasons. We can do this because we know that the read and */
593 /* write threads are different and we rely on synchronization */
594 /* of read and write requests at the cache memory_object level */
595 /* break out wait_for_writers, all of this goes away when */
596 /* we get real control of seqno with the new component interface */
0b4e3aa0 597
1c79356b
A
598 if (vs->vs_writers != 0) {
599 /* you can't hold on to the seqno and go */
600 /* to sleep like that */
601 vs_unlock(vs); /* bump internal count of seqno */
602 VS_LOCK(vs);
603 while (vs->vs_writers != 0) {
604 default_pager_wait_write++;
605 vs->vs_waiting_write = TRUE;
0b4e3aa0 606 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 607 VS_UNLOCK(vs);
9bccf70c 608 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
609 VS_LOCK(vs);
610 vs_async_wait(vs);
611 }
0b4e3aa0 612 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
613 VS_UNLOCK(vs);
614 return KERN_FAILURE;
615 }
616 vs_start_read(vs);
617 VS_UNLOCK(vs);
618 } else {
619 vs_start_read(vs);
620 vs_unlock(vs);
621 }
622
623 /*
624 * Request must be on a page boundary and a multiple of pages.
625 */
626 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
627 Panic("bad alignment");
628
629 pvs_cluster_read(vs, (vm_offset_t)offset, length);
630
631 vs_finish_read(vs);
632
633 return KERN_SUCCESS;
634}
635
636/*
637 * memory_object_data_initialize: check whether we already have each page, and
638 * write it if we do not. The implementation is far from optimized, and
639 * also assumes that the default_pager is single-threaded.
640 */
641/* It is questionable whether or not a pager should decide what is relevant */
642/* and what is not in data sent from the kernel. Data initialize has been */
643/* changed to copy back all data sent to it in preparation for its eventual */
644/* merge with data return. It is the kernel that should decide what pages */
645/* to write back. As of the writing of this note, this is indeed the case */
646/* the kernel writes back one page at a time through this interface */
647
648kern_return_t
649dp_memory_object_data_initialize(
0b4e3aa0
A
650 memory_object_t mem_obj,
651 memory_object_offset_t offset,
652 vm_size_t size)
1c79356b 653{
1c79356b 654 vstruct_t vs;
1c79356b
A
655
656 DEBUG(DEBUG_MO_EXTERNAL,
657 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
0b4e3aa0 658 (int)mem_obj, (int)offset, (int)size));
de355530 659 GSTAT(global_stats.gs_pages_init += atop(size));
1c79356b
A
660
661 vs_lookup(mem_obj, vs);
0b4e3aa0 662 vs_lock(vs);
1c79356b
A
663 vs_start_write(vs);
664 vs_unlock(vs);
665
666 /*
667 * Write the data via clustered writes. vs_cluster_write will
668 * loop if the address range specified crosses cluster
669 * boundaries.
670 */
0b4e3aa0 671 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
672
673 vs_finish_write(vs);
674
675 return KERN_SUCCESS;
676}
677
1c79356b
A
678kern_return_t
679dp_memory_object_data_unlock(
680 memory_object_t mem_obj,
0b4e3aa0
A
681 memory_object_offset_t offset,
682 vm_size_t size,
1c79356b
A
683 vm_prot_t desired_access)
684{
0b4e3aa0 685 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
686 return KERN_FAILURE;
687}
688
689
1c79356b
A
690kern_return_t
691dp_memory_object_data_return(
0b4e3aa0
A
692 memory_object_t mem_obj,
693 memory_object_offset_t offset,
694 vm_size_t size,
1c79356b
A
695 boolean_t dirty,
696 boolean_t kernel_copy)
697{
1c79356b 698 vstruct_t vs;
1c79356b
A
699
700 DEBUG(DEBUG_MO_EXTERNAL,
0b4e3aa0
A
701 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
702 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
703 GSTAT(global_stats.gs_pageout_calls++);
704
705 /* This routine is called by the pageout thread. The pageout thread */
706 /* cannot be blocked by read activities unless the read activities */
707 /* Therefore the grant of vs lock must be done on a try versus a */
708 /* blocking basis. The code below relies on the fact that the */
709 /* interface is synchronous. Should this interface be again async */
710 /* for some type of pager in the future the pages will have to be */
711 /* returned through a separate, asynchronous path. */
712
713 vs_lookup(mem_obj, vs);
714
715 default_pager_total++;
716 if(!VS_TRY_LOCK(vs)) {
717 /* the call below will not be done by caller when we have */
718 /* a synchronous interface */
719 /* return KERN_LOCK_OWNED; */
720 upl_t upl;
0b4e3aa0
A
721 int page_list_count = 0;
722 memory_object_super_upl_request(vs->vs_control,
723 (memory_object_offset_t)offset,
724 size, size,
725 &upl, NULL, &page_list_count,
726 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 727 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
728 upl_abort(upl,0);
729 upl_deallocate(upl);
1c79356b
A
730 return KERN_SUCCESS;
731 }
732
d12e1678
A
733 if ((vs->vs_seqno != vs->vs_next_seqno++)
734 || (vs->vs_readers)
735 || (vs->vs_xfer_pending)) {
1c79356b 736 upl_t upl;
0b4e3aa0
A
737 int page_list_count = 0;
738
1c79356b
A
739 vs->vs_next_seqno--;
740 VS_UNLOCK(vs);
0b4e3aa0 741
1c79356b
A
742 /* the call below will not be done by caller when we have */
743 /* a synchronous interface */
744 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
745 memory_object_super_upl_request(vs->vs_control,
746 (memory_object_offset_t)offset,
747 size, size,
748 &upl, NULL, &page_list_count,
1c79356b
A
749 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
750 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
751 upl_abort(upl,0);
752 upl_deallocate(upl);
1c79356b
A
753 return KERN_SUCCESS;
754 }
755
0b4e3aa0 756 if ((size % vm_page_size) != 0)
1c79356b
A
757 Panic("bad alignment");
758
759 vs_start_write(vs);
760
761
762 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 763 vs_unlock(vs);
1c79356b
A
764
765 /*
766 * Write the data via clustered writes. vs_cluster_write will
767 * loop if the address range specified crosses cluster
768 * boundaries.
769 */
0b4e3aa0 770 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
771
772 vs_finish_write(vs);
773
774 /* temporary, need a finer lock based on cluster */
775
776 VS_LOCK(vs);
777 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
778 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
779 vs->vs_waiting_async = FALSE;
1c79356b 780 VS_UNLOCK(vs);
0b4e3aa0 781 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
782 } else {
783 VS_UNLOCK(vs);
784 }
785
786
787 return KERN_SUCCESS;
788}
789
0b4e3aa0
A
790/*
791 * Routine: default_pager_memory_object_create
792 * Purpose:
793 * Handle requests for memory objects from the
794 * kernel.
795 * Notes:
796 * Because we only give out the default memory
797 * manager port to the kernel, we don't have to
798 * be so paranoid about the contents.
799 */
1c79356b 800kern_return_t
0b4e3aa0
A
801default_pager_memory_object_create(
802 memory_object_default_t dmm,
803 vm_size_t new_size,
804 memory_object_t *new_mem_obj)
1c79356b 805{
0b4e3aa0 806 vstruct_t vs;
1c79356b 807
0b4e3aa0
A
808 assert(dmm == default_pager_object);
809
810 vs = vs_object_create(new_size);
811 if (vs == VSTRUCT_NULL)
812 return KERN_RESOURCE_SHORTAGE;
813
814 vs->vs_next_seqno = 0;
815
816 /*
817 * Set up associations between this memory object
818 * and this default_pager structure
819 */
820
821 vs->vs_mem_obj = ISVS;
822 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
823
824 /*
825 * After this, other threads might receive requests
826 * for this memory object or find it in the port list.
827 */
828
829 vstruct_list_insert(vs);
830 *new_mem_obj = vs_to_mem_obj(vs);
831 return KERN_SUCCESS;
1c79356b
A
832}
833
834/*
835 * Create an external object.
836 */
837kern_return_t
838default_pager_object_create(
0b4e3aa0
A
839 default_pager_t pager,
840 vm_size_t size,
841 memory_object_t *mem_objp)
1c79356b
A
842{
843 vstruct_t vs;
1c79356b
A
844 kern_return_t result;
845 struct vstruct_alias *alias_struct;
1c79356b
A
846
847
0b4e3aa0 848 if (pager != default_pager_object)
1c79356b
A
849 return KERN_INVALID_ARGUMENT;
850
851 vs = vs_object_create(size);
0b4e3aa0
A
852 if (vs == VSTRUCT_NULL)
853 return KERN_RESOURCE_SHORTAGE;
1c79356b 854
1c79356b 855 /*
0b4e3aa0 856 * Set up associations between the default pager
1c79356b
A
857 * and this vstruct structure
858 */
0b4e3aa0 859 vs->vs_mem_obj = ISVS;
1c79356b 860 vstruct_list_insert(vs);
0b4e3aa0 861 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
862 return KERN_SUCCESS;
863}
864
865kern_return_t
866default_pager_objects(
0b4e3aa0 867 default_pager_t pager,
1c79356b
A
868 default_pager_object_array_t *objectsp,
869 mach_msg_type_number_t *ocountp,
0b4e3aa0 870 memory_object_array_t *pagersp,
1c79356b
A
871 mach_msg_type_number_t *pcountp)
872{
873 vm_offset_t oaddr = 0; /* memory for objects */
874 vm_size_t osize = 0; /* current size */
875 default_pager_object_t * objects;
876 unsigned int opotential;
877
0b4e3aa0 878 vm_offset_t paddr = 0; /* memory for pagers */
1c79356b 879 vm_size_t psize = 0; /* current size */
0b4e3aa0 880 memory_object_t * pagers;
1c79356b
A
881 unsigned int ppotential;
882
883 unsigned int actual;
884 unsigned int num_objects;
885 kern_return_t kr;
886 vstruct_t entry;
1c79356b
A
887/*
888 if (pager != default_pager_default_port)
889 return KERN_INVALID_ARGUMENT;
890*/
891
892 /* start with the inline memory */
893
894 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
895 (vm_map_copy_t) *objectsp);
896
897 if (kr != KERN_SUCCESS)
898 return kr;
899
de355530 900 osize = round_page(*ocountp * sizeof * objects);
1c79356b 901 kr = vm_map_wire(ipc_kernel_map,
de355530
A
902 trunc_page((vm_offset_t)objects),
903 round_page(((vm_offset_t)objects) + osize),
1c79356b
A
904 VM_PROT_READ|VM_PROT_WRITE, FALSE);
905 osize=0;
906
907 *objectsp = objects;
908 /* we start with the inline space */
909
910
911 num_objects = 0;
912 opotential = *ocountp;
913
0b4e3aa0 914 pagers = (memory_object_t *) *pagersp;
1c79356b
A
915 ppotential = *pcountp;
916
917 VSL_LOCK();
918
919 /*
920 * We will send no more than this many
921 */
922 actual = vstruct_list.vsl_count;
923 VSL_UNLOCK();
924
925 if (opotential < actual) {
926 vm_offset_t newaddr;
927 vm_size_t newsize;
928
de355530 929 newsize = 2 * round_page(actual * sizeof * objects);
1c79356b
A
930
931 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
932 if (kr != KERN_SUCCESS)
933 goto nomemory;
934
935 oaddr = newaddr;
936 osize = newsize;
937 opotential = osize / sizeof * objects;
938 objects = (default_pager_object_t *)oaddr;
939 }
940
941 if (ppotential < actual) {
942 vm_offset_t newaddr;
943 vm_size_t newsize;
944
de355530 945 newsize = 2 * round_page(actual * sizeof * pagers);
1c79356b
A
946
947 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
948 if (kr != KERN_SUCCESS)
949 goto nomemory;
950
951 paddr = newaddr;
952 psize = newsize;
0b4e3aa0
A
953 ppotential = psize / sizeof * pagers;
954 pagers = (memory_object_t *)paddr;
1c79356b
A
955 }
956
957 /*
958 * Now scan the list.
959 */
960
961 VSL_LOCK();
962
963 num_objects = 0;
964 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
965
0b4e3aa0 966 memory_object_t pager;
1c79356b
A
967 vm_size_t size;
968
969 if ((num_objects >= opotential) ||
970 (num_objects >= ppotential)) {
971
972 /*
973 * This should be rare. In any case,
974 * we will only miss recent objects,
975 * because they are added at the end.
976 */
977 break;
978 }
979
980 /*
981 * Avoid interfering with normal operations
982 */
983 if (!VS_MAP_TRY_LOCK(entry))
984 goto not_this_one;
985 size = ps_vstruct_allocated_size(entry);
986 VS_MAP_UNLOCK(entry);
987
988 VS_LOCK(entry);
989
1c79356b 990 /*
0b4e3aa0
A
991 * We need a reference for our caller. Adding this
992 * reference through the linked list could race with
993 * destruction of the object. If we find the object
994 * has no references, just give up on it.
1c79356b 995 */
0b4e3aa0
A
996 VS_LOCK(entry);
997 if (entry->vs_references == 0) {
1c79356b 998 VS_UNLOCK(entry);
0b4e3aa0 999 goto not_this_one;
1c79356b 1000 }
0b4e3aa0 1001 dp_memory_object_reference(vs_to_mem_obj(entry));
1c79356b
A
1002 VS_UNLOCK(entry);
1003
1004 /* the arrays are wired, so no deadlock worries */
1005
1006 objects[num_objects].dpo_object = (vm_offset_t) entry;
1007 objects[num_objects].dpo_size = size;
0b4e3aa0 1008 pagers [num_objects++] = pager;
1c79356b
A
1009 continue;
1010
1011 not_this_one:
1012 /*
1013 * Do not return garbage
1014 */
1015 objects[num_objects].dpo_object = (vm_offset_t) 0;
1016 objects[num_objects].dpo_size = 0;
0b4e3aa0 1017 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
1018
1019 }
1020
1021 VSL_UNLOCK();
1022
1023 /*
1024 * Deallocate and clear unused memory.
1025 * (Returned memory will automagically become pageable.)
1026 */
1027
1028 if (objects == *objectsp) {
1029
1030 /*
1031 * Our returned information fit inline.
1032 * Nothing to deallocate.
1033 */
1034 *ocountp = num_objects;
1035 } else if (actual == 0) {
1036 (void) vm_deallocate(kernel_map, oaddr, osize);
1037
1038 /* return zero items inline */
1039 *ocountp = 0;
1040 } else {
1041 vm_offset_t used;
1042
de355530 1043 used = round_page(actual * sizeof * objects);
1c79356b
A
1044
1045 if (used != osize)
1046 (void) vm_deallocate(kernel_map,
1047 oaddr + used, osize - used);
1048
1049 *objectsp = objects;
1050 *ocountp = num_objects;
1051 }
1052
0b4e3aa0 1053 if (pagers == (memory_object_t *)*pagersp) {
1c79356b
A
1054
1055 /*
1056 * Our returned information fit inline.
1057 * Nothing to deallocate.
1058 */
1059
1060 *pcountp = num_objects;
1061 } else if (actual == 0) {
1062 (void) vm_deallocate(kernel_map, paddr, psize);
1063
1064 /* return zero items inline */
1065 *pcountp = 0;
1066 } else {
1067 vm_offset_t used;
1068
de355530 1069 used = round_page(actual * sizeof * pagers);
1c79356b
A
1070
1071 if (used != psize)
1072 (void) vm_deallocate(kernel_map,
1073 paddr + used, psize - used);
1074
0b4e3aa0 1075 *pagersp = (memory_object_array_t)pagers;
1c79356b
A
1076 *pcountp = num_objects;
1077 }
1078 (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1079 *ocountp + (vm_offset_t)objects, FALSE);
1080 (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1081 *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1082
1083 return KERN_SUCCESS;
1084
1085 nomemory:
1086 {
1087 register int i;
1088 for (i = 0; i < num_objects; i++)
0b4e3aa0
A
1089 if (pagers[i] != MEMORY_OBJECT_NULL)
1090 memory_object_deallocate(pagers[i]);
1c79356b
A
1091 }
1092
1093 if (objects != *objectsp)
1094 (void) vm_deallocate(kernel_map, oaddr, osize);
1095
0b4e3aa0 1096 if (pagers != (memory_object_t *)*pagersp)
1c79356b
A
1097 (void) vm_deallocate(kernel_map, paddr, psize);
1098
1099 return KERN_RESOURCE_SHORTAGE;
1100}
1101
1102kern_return_t
1103default_pager_object_pages(
0b4e3aa0
A
1104 default_pager_t pager,
1105 memory_object_t object,
1c79356b
A
1106 default_pager_page_array_t *pagesp,
1107 mach_msg_type_number_t *countp)
1108{
1109 vm_offset_t addr; /* memory for page offsets */
1110 vm_size_t size = 0; /* current memory size */
1111 default_pager_page_t * pages;
1112 unsigned int potential, actual;
1113 kern_return_t kr;
1114
0b4e3aa0
A
1115
1116 if (pager != default_pager_object)
1c79356b 1117 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1118
1c79356b
A
1119 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1120 (vm_map_copy_t) *pagesp);
1121
1122 if (kr != KERN_SUCCESS)
1123 return kr;
1124
de355530 1125 size = round_page(*countp * sizeof * pages);
1c79356b 1126 kr = vm_map_wire(ipc_kernel_map,
de355530
A
1127 trunc_page((vm_offset_t)pages),
1128 round_page(((vm_offset_t)pages) + size),
1c79356b
A
1129 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1130 size=0;
1131
1132 *pagesp = pages;
1133 /* we start with the inline space */
1134
1135 addr = (vm_offset_t)pages;
1136 potential = *countp;
1137
1138 for (;;) {
1139 vstruct_t entry;
1140
1141 VSL_LOCK();
1142 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1143 vs_links) {
1144 VS_LOCK(entry);
0b4e3aa0 1145 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1146 VSL_UNLOCK();
1147 goto found_object;
1148 }
1149 VS_UNLOCK(entry);
1150 }
1151 VSL_UNLOCK();
1152
1153 /* did not find the object */
1154
1155 if (pages != *pagesp)
1156 (void) vm_deallocate(kernel_map, addr, size);
1157 return KERN_INVALID_ARGUMENT;
1158
1159 found_object:
1160
1161 if (!VS_MAP_TRY_LOCK(entry)) {
1162 /* oh well bad luck */
9bccf70c 1163 int wresult;
1c79356b
A
1164
1165 VS_UNLOCK(entry);
1166
9bccf70c
A
1167 assert_wait_timeout( 1, THREAD_UNINT );
1168 wresult = thread_block(THREAD_CONTINUE_NULL);
1169 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1170 continue;
1171 }
1172
1173 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1174 VS_MAP_UNLOCK(entry);
1175 VS_UNLOCK(entry);
1176
1177 if (actual <= potential)
1178 break;
1179
1180 /* allocate more memory */
1181
1182 if (pages != *pagesp)
1183 (void) vm_deallocate(kernel_map, addr, size);
de355530 1184 size = round_page(actual * sizeof * pages);
1c79356b
A
1185 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1186 if (kr != KERN_SUCCESS)
1187 return kr;
1188 pages = (default_pager_page_t *)addr;
1189 potential = size / sizeof * pages;
1190 }
1191
1192 /*
1193 * Deallocate and clear unused memory.
1194 * (Returned memory will automagically become pageable.)
1195 */
1196
1197 if (pages == *pagesp) {
1198
1199 /*
1200 * Our returned information fit inline.
1201 * Nothing to deallocate.
1202 */
1203
1204 *countp = actual;
1205 } else if (actual == 0) {
1206 (void) vm_deallocate(kernel_map, addr, size);
1207
1208 /* return zero items inline */
1209 *countp = 0;
1210 } else {
1211 vm_offset_t used;
1212
de355530 1213 used = round_page(actual * sizeof * pages);
1c79356b
A
1214
1215 if (used != size)
1216 (void) vm_deallocate(kernel_map,
1217 addr + used, size - used);
1218
1219 *pagesp = pages;
1220 *countp = actual;
1221 }
1222 (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1223 *countp + (vm_offset_t)pages, FALSE);
1224 (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1225 *countp, TRUE, (vm_map_copy_t *)pagesp);
1226 return KERN_SUCCESS;
1227}