]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-517.12.7.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
43866e37 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51/*
52 * Default Pager.
53 * Memory Object Management.
54 */
55
56#include "default_pager_internal.h"
0b4e3aa0 57#include <mach/memory_object_types.h>
1c79356b 58#include <mach/memory_object_server.h>
0b4e3aa0
A
59#include <vm/memory_object.h>
60#include <vm/vm_pageout.h>
1c79356b
A
61
62
63/*
64 * List of all vstructs. A specific vstruct is
65 * found directly via its port, this list is
66 * only used for monitoring purposes by the
67 * default_pager_object* calls and by ps_delete
68 * when abstract memory objects must be scanned
69 * to remove any live storage on a segment which
70 * is to be removed.
71 */
72struct vstruct_list_head vstruct_list;
73
0b4e3aa0 74__private_extern__ void
1c79356b
A
75vstruct_list_insert(
76 vstruct_t vs)
77{
78 VSL_LOCK();
79 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
80 vstruct_list.vsl_count++;
81 VSL_UNLOCK();
82}
83
1c79356b 84
0b4e3aa0 85__private_extern__ void
1c79356b
A
86vstruct_list_delete(
87 vstruct_t vs)
88{
89 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
90 vstruct_list.vsl_count--;
91}
92
93/*
94 * We use the sequence numbers on requests to regulate
95 * our parallelism. In general, we allow multiple reads and writes
96 * to proceed in parallel, with the exception that reads must
97 * wait for previous writes to finish. (Because the kernel might
98 * generate a data-request for a page on the heels of a data-write
99 * for the same page, and we must avoid returning stale data.)
100 * terminate requests wait for proceeding reads and writes to finish.
101 */
102
0b4e3aa0
A
103static unsigned int default_pager_total = 0; /* debugging */
104static unsigned int default_pager_wait_seqno = 0; /* debugging */
105static unsigned int default_pager_wait_read = 0; /* debugging */
106static unsigned int default_pager_wait_write = 0; /* debugging */
107static unsigned int default_pager_wait_refs = 0; /* debugging */
1c79356b 108
0b4e3aa0 109__private_extern__ void
1c79356b
A
110vs_async_wait(
111 vstruct_t vs)
112{
1c79356b
A
113
114 ASSERT(vs->vs_async_pending >= 0);
115 while (vs->vs_async_pending > 0) {
116 vs->vs_waiting_async = TRUE;
0b4e3aa0 117 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
1c79356b 118 VS_UNLOCK(vs);
9bccf70c 119 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
120 VS_LOCK(vs);
121 }
122 ASSERT(vs->vs_async_pending == 0);
123}
124
1c79356b 125
0b4e3aa0 126#if PARALLEL
1c79356b
A
127/*
128 * Waits for correct sequence number. Leaves pager locked.
0b4e3aa0
A
129 *
130 * JMM - Sequence numbers guarantee ordering of requests generated
131 * by a single thread if the receiver is multithreaded and
132 * the interfaces are asynchronous (i.e. sender can generate
133 * more than one request before the first is received in the
134 * pager). Normally, IPC would generate these number in that
135 * case. But we are trying to avoid using IPC for the in-kernel
136 * scenario. Since these are actually invoked synchronously
137 * anyway (in-kernel), we can just fake the sequence number
138 * generation here (thus avoiding the dependence on IPC).
1c79356b 139 */
0b4e3aa0 140__private_extern__ void
1c79356b 141vs_lock(
0b4e3aa0 142 vstruct_t vs)
1c79356b 143{
0b4e3aa0
A
144 mach_port_seqno_t seqno;
145
1c79356b
A
146 default_pager_total++;
147 VS_LOCK(vs);
148
149 seqno = vs->vs_next_seqno++;
150
151 while (vs->vs_seqno != seqno) {
152 default_pager_wait_seqno++;
153 vs->vs_waiting_seqno = TRUE;
0b4e3aa0 154 assert_wait(&vs->vs_seqno, THREAD_UNINT);
1c79356b 155 VS_UNLOCK(vs);
9bccf70c 156 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
157 VS_LOCK(vs);
158 }
159}
160
161/*
162 * Increments sequence number and unlocks pager.
163 */
0b4e3aa0 164__private_extern__ void
1c79356b
A
165vs_unlock(vstruct_t vs)
166{
1c79356b 167 vs->vs_seqno++;
0b4e3aa0
A
168 if (vs->vs_waiting_seqno) {
169 vs->vs_waiting_seqno = FALSE;
170 VS_UNLOCK(vs);
171 thread_wakeup(&vs->vs_seqno);
172 return;
173 }
1c79356b 174 VS_UNLOCK(vs);
1c79356b
A
175}
176
177/*
178 * Start a read - one more reader. Pager must be locked.
179 */
0b4e3aa0 180__private_extern__ void
1c79356b
A
181vs_start_read(
182 vstruct_t vs)
183{
184 vs->vs_readers++;
185}
186
187/*
188 * Wait for readers. Unlocks and relocks pager if wait needed.
189 */
0b4e3aa0 190__private_extern__ void
1c79356b
A
191vs_wait_for_readers(
192 vstruct_t vs)
193{
194 while (vs->vs_readers != 0) {
195 default_pager_wait_read++;
196 vs->vs_waiting_read = TRUE;
0b4e3aa0 197 assert_wait(&vs->vs_readers, THREAD_UNINT);
1c79356b 198 VS_UNLOCK(vs);
9bccf70c 199 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
200 VS_LOCK(vs);
201 }
202}
203
204/*
205 * Finish a read. Pager is unlocked and returns unlocked.
206 */
0b4e3aa0 207__private_extern__ void
1c79356b
A
208vs_finish_read(
209 vstruct_t vs)
210{
211 VS_LOCK(vs);
0b4e3aa0 212 if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
1c79356b
A
213 vs->vs_waiting_read = FALSE;
214 VS_UNLOCK(vs);
0b4e3aa0
A
215 thread_wakeup(&vs->vs_readers);
216 return;
217 }
218 VS_UNLOCK(vs);
1c79356b
A
219}
220
221/*
222 * Start a write - one more writer. Pager must be locked.
223 */
0b4e3aa0 224__private_extern__ void
1c79356b
A
225vs_start_write(
226 vstruct_t vs)
227{
228 vs->vs_writers++;
229}
230
231/*
232 * Wait for writers. Unlocks and relocks pager if wait needed.
233 */
0b4e3aa0 234__private_extern__ void
1c79356b
A
235vs_wait_for_writers(
236 vstruct_t vs)
237{
238 while (vs->vs_writers != 0) {
239 default_pager_wait_write++;
240 vs->vs_waiting_write = TRUE;
0b4e3aa0 241 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 242 VS_UNLOCK(vs);
9bccf70c 243 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
244 VS_LOCK(vs);
245 }
246 vs_async_wait(vs);
247}
248
249/* This is to be used for the transfer from segment code ONLY */
250/* The transfer code holds off vs destruction by keeping the */
251/* vs_async_wait count non-zero. It will not ocnflict with */
252/* other writers on an async basis because it only writes on */
253/* a cluster basis into fresh (as of sync time) cluster locations */
0b4e3aa0
A
254
255__private_extern__ void
1c79356b
A
256vs_wait_for_sync_writers(
257 vstruct_t vs)
258{
259 while (vs->vs_writers != 0) {
260 default_pager_wait_write++;
261 vs->vs_waiting_write = TRUE;
0b4e3aa0 262 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 263 VS_UNLOCK(vs);
9bccf70c 264 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
265 VS_LOCK(vs);
266 }
267}
268
269
270/*
271 * Finish a write. Pager is unlocked and returns unlocked.
272 */
0b4e3aa0 273__private_extern__ void
1c79356b
A
274vs_finish_write(
275 vstruct_t vs)
276{
277 VS_LOCK(vs);
0b4e3aa0 278 if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
1c79356b
A
279 vs->vs_waiting_write = FALSE;
280 VS_UNLOCK(vs);
0b4e3aa0
A
281 thread_wakeup(&vs->vs_writers);
282 return;
1c79356b 283 }
0b4e3aa0 284 VS_UNLOCK(vs);
1c79356b 285}
1c79356b
A
286#endif /* PARALLEL */
287
1c79356b
A
288vstruct_t
289vs_object_create(
290 vm_size_t size)
291{
292 vstruct_t vs;
1c79356b
A
293
294 /*
295 * Allocate a vstruct. If there are any problems, then report them
296 * to the console.
297 */
298 vs = ps_vstruct_create(size);
299 if (vs == VSTRUCT_NULL) {
300 dprintf(("vs_object_create: unable to allocate %s\n",
301 "-- either run swapon command or reboot"));
302 return VSTRUCT_NULL;
303 }
304
305 return vs;
306}
307
0b4e3aa0 308#if 0
1c79356b
A
309void default_pager_add(vstruct_t, boolean_t); /* forward */
310
311void
312default_pager_add(
313 vstruct_t vs,
314 boolean_t internal)
315{
0b4e3aa0
A
316 memory_object_t mem_obj = vs->vs_mem_obj;
317 mach_port_t pset;
1c79356b 318 mach_port_mscount_t sync;
0b4e3aa0 319 mach_port_t previous;
1c79356b
A
320 kern_return_t kr;
321 static char here[] = "default_pager_add";
322
323 /*
324 * The port currently has a make-send count of zero,
325 * because either we just created the port or we just
326 * received the port in a memory_object_create request.
327 */
328
329 if (internal) {
330 /* possibly generate an immediate no-senders notification */
331 sync = 0;
332 pset = default_pager_internal_set;
333 } else {
334 /* delay notification till send right is created */
335 sync = 1;
336 pset = default_pager_external_set;
337 }
338
339 ipc_port_make_sonce(mem_obj);
340 ip_lock(mem_obj); /* unlocked in nsrequest below */
341 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
342}
343
0b4e3aa0 344#endif
1c79356b
A
345
346kern_return_t
347dp_memory_object_init(
0b4e3aa0
A
348 memory_object_t mem_obj,
349 memory_object_control_t control,
1c79356b
A
350 vm_size_t pager_page_size)
351{
1c79356b 352 vstruct_t vs;
1c79356b
A
353
354 assert(pager_page_size == vm_page_size);
355
0b4e3aa0
A
356 memory_object_control_reference(control);
357
1c79356b 358 vs_lookup(mem_obj, vs);
0b4e3aa0 359 vs_lock(vs);
1c79356b 360
0b4e3aa0 361 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
362 Panic("bad request");
363
0b4e3aa0 364 vs->vs_control = control;
1c79356b
A
365 vs_unlock(vs);
366
367 return KERN_SUCCESS;
368}
369
370kern_return_t
371dp_memory_object_synchronize(
0b4e3aa0
A
372 memory_object_t mem_obj,
373 memory_object_offset_t offset,
374 vm_size_t length,
1c79356b
A
375 vm_sync_t flags)
376{
1c79356b 377 vstruct_t vs;
1c79356b
A
378
379 vs_lookup(mem_obj, vs);
0b4e3aa0 380 vs_lock(vs);
1c79356b
A
381 vs_unlock(vs);
382
0b4e3aa0 383 memory_object_synchronize_completed(vs->vs_control, offset, length);
1c79356b
A
384
385 return KERN_SUCCESS;
386}
387
0b4e3aa0
A
388kern_return_t
389dp_memory_object_unmap(
390 memory_object_t mem_obj)
391{
392 panic("dp_memory_object_unmap");
393
394 return KERN_FAILURE;
395}
396
1c79356b
A
397kern_return_t
398dp_memory_object_terminate(
0b4e3aa0 399 memory_object_t mem_obj)
1c79356b 400{
0b4e3aa0 401 memory_object_control_t control;
1c79356b 402 vstruct_t vs;
1c79356b 403 kern_return_t kr;
1c79356b
A
404
405 /*
406 * control port is a receive right, not a send right.
407 */
408
409 vs_lookup(mem_obj, vs);
0b4e3aa0 410 vs_lock(vs);
1c79356b
A
411
412 /*
413 * Wait for read and write requests to terminate.
414 */
415
416 vs_wait_for_readers(vs);
417 vs_wait_for_writers(vs);
418
419 /*
420 * After memory_object_terminate both memory_object_init
421 * and a no-senders notification are possible, so we need
0b4e3aa0
A
422 * to clean up our reference to the memory_object_control
423 * to prepare for a new init.
1c79356b
A
424 */
425
0b4e3aa0
A
426 control = vs->vs_control;
427 vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1c79356b
A
428
429 /* a bit of special case ugliness here. Wakeup any waiting reads */
430 /* these data requests had to be removed from the seqno traffic */
431 /* based on a performance bottleneck with large memory objects */
432 /* the problem will right itself with the new component based */
433 /* synchronous interface. The new async will be able to return */
434 /* failure during its sync phase. In the mean time ... */
435
0b4e3aa0
A
436 thread_wakeup(&vs->vs_writers);
437 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
438
439 vs_unlock(vs);
440
441 /*
0b4e3aa0 442 * Now we deallocate our reference on the control.
1c79356b 443 */
0b4e3aa0 444 memory_object_control_deallocate(control);
1c79356b
A
445 return KERN_SUCCESS;
446}
447
448void
0b4e3aa0
A
449dp_memory_object_reference(
450 memory_object_t mem_obj)
451{
452 vstruct_t vs;
453
454 vs_lookup_safe(mem_obj, vs);
455 if (vs == VSTRUCT_NULL)
456 return;
457
458 VS_LOCK(vs);
459 assert(vs->vs_references > 0);
460 vs->vs_references++;
461 VS_UNLOCK(vs);
462}
463
464extern ipc_port_t max_pages_trigger_port;
465extern int dp_pages_free;
466extern int maximum_pages_free;
467void
468dp_memory_object_deallocate(
469 memory_object_t mem_obj)
1c79356b
A
470{
471 vstruct_t vs;
0b4e3aa0 472 mach_port_seqno_t seqno;
1c79356b
A
473
474 /*
0b4e3aa0 475 * Because we don't give out multiple first references
1c79356b 476 * for a memory object, there can't be a race
0b4e3aa0
A
477 * between getting a deallocate call and creating
478 * a new reference for the object.
1c79356b
A
479 */
480
0b4e3aa0
A
481 vs_lookup_safe(mem_obj, vs);
482 if (vs == VSTRUCT_NULL)
483 return;
484
485 VS_LOCK(vs);
486 if (--vs->vs_references > 0) {
487 VS_UNLOCK(vs);
488 return;
489 }
490
491 seqno = vs->vs_next_seqno++;
492 while (vs->vs_seqno != seqno) {
493 default_pager_wait_seqno++;
494 vs->vs_waiting_seqno = TRUE;
495 assert_wait(&vs->vs_seqno, THREAD_UNINT);
496 VS_UNLOCK(vs);
9bccf70c 497 thread_block(THREAD_CONTINUE_NULL);
0b4e3aa0
A
498 VS_LOCK(vs);
499 }
500
1c79356b
A
501 vs_async_wait(vs); /* wait for pending async IO */
502
503 /* do not delete the vs structure until the referencing pointers */
504 /* in the vstruct list have been expunged */
505
506 /* get VSL_LOCK out of order by using TRY mechanism */
507 while(!VSL_LOCK_TRY()) {
508 VS_UNLOCK(vs);
509 VSL_LOCK();
510 VSL_UNLOCK();
511 VS_LOCK(vs);
512 vs_async_wait(vs); /* wait for pending async IO */
513 }
0b4e3aa0
A
514
515
1c79356b 516 /*
0b4e3aa0 517 * We shouldn't get a deallocation call
1c79356b
A
518 * when the kernel has the object cached.
519 */
0b4e3aa0 520 if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
1c79356b
A
521 Panic("bad request");
522
523 /*
524 * Unlock the pager (though there should be no one
525 * waiting for it).
526 */
527 VS_UNLOCK(vs);
528
0b4e3aa0
A
529 /* Lock out paging segment removal for the duration of this */
530 /* call. We are vulnerable to losing a paging segment we rely */
531 /* on as soon as we remove ourselves from the VSL and unlock */
532
533 /* Keep our thread from blocking on attempt to trigger backing */
534 /* store release */
535 backing_store_release_trigger_disable += 1;
536
1c79356b
A
537 /*
538 * Remove the memory object port association, and then
539 * the destroy the port itself. We must remove the object
540 * from the port list before deallocating the pager,
541 * because of default_pager_objects.
542 */
543 vstruct_list_delete(vs);
0b4e3aa0
A
544 VSL_UNLOCK();
545
1c79356b
A
546 ps_vstruct_dealloc(vs);
547
0b4e3aa0
A
548 VSL_LOCK();
549 backing_store_release_trigger_disable -= 1;
550 if(backing_store_release_trigger_disable == 0) {
9bccf70c 551 thread_wakeup((event_t)&backing_store_release_trigger_disable);
1c79356b
A
552 }
553 VSL_UNLOCK();
554}
555
556kern_return_t
557dp_memory_object_data_request(
0b4e3aa0
A
558 memory_object_t mem_obj,
559 memory_object_offset_t offset,
1c79356b
A
560 vm_size_t length,
561 vm_prot_t protection_required)
562{
1c79356b 563 vstruct_t vs;
1c79356b
A
564
565 GSTAT(global_stats.gs_pagein_calls++);
566
567
568 /* CDY at this moment vs_lookup panics when presented with the wrong */
569 /* port. As we are expanding this pager to support user interfaces */
570 /* this should be changed to return kern_failure */
571 vs_lookup(mem_obj, vs);
0b4e3aa0 572 vs_lock(vs);
1c79356b
A
573
574 /* We are going to relax the strict sequencing here for performance */
575 /* reasons. We can do this because we know that the read and */
576 /* write threads are different and we rely on synchronization */
577 /* of read and write requests at the cache memory_object level */
578 /* break out wait_for_writers, all of this goes away when */
579 /* we get real control of seqno with the new component interface */
0b4e3aa0 580
1c79356b
A
581 if (vs->vs_writers != 0) {
582 /* you can't hold on to the seqno and go */
583 /* to sleep like that */
584 vs_unlock(vs); /* bump internal count of seqno */
585 VS_LOCK(vs);
586 while (vs->vs_writers != 0) {
587 default_pager_wait_write++;
588 vs->vs_waiting_write = TRUE;
0b4e3aa0 589 assert_wait(&vs->vs_writers, THREAD_UNINT);
1c79356b 590 VS_UNLOCK(vs);
9bccf70c 591 thread_block(THREAD_CONTINUE_NULL);
1c79356b
A
592 VS_LOCK(vs);
593 vs_async_wait(vs);
594 }
0b4e3aa0 595 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
1c79356b
A
596 VS_UNLOCK(vs);
597 return KERN_FAILURE;
598 }
599 vs_start_read(vs);
600 VS_UNLOCK(vs);
601 } else {
602 vs_start_read(vs);
603 vs_unlock(vs);
604 }
605
606 /*
607 * Request must be on a page boundary and a multiple of pages.
608 */
609 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
610 Panic("bad alignment");
611
612 pvs_cluster_read(vs, (vm_offset_t)offset, length);
613
614 vs_finish_read(vs);
615
616 return KERN_SUCCESS;
617}
618
619/*
620 * memory_object_data_initialize: check whether we already have each page, and
621 * write it if we do not. The implementation is far from optimized, and
622 * also assumes that the default_pager is single-threaded.
623 */
624/* It is questionable whether or not a pager should decide what is relevant */
625/* and what is not in data sent from the kernel. Data initialize has been */
626/* changed to copy back all data sent to it in preparation for its eventual */
627/* merge with data return. It is the kernel that should decide what pages */
628/* to write back. As of the writing of this note, this is indeed the case */
629/* the kernel writes back one page at a time through this interface */
630
631kern_return_t
632dp_memory_object_data_initialize(
0b4e3aa0
A
633 memory_object_t mem_obj,
634 memory_object_offset_t offset,
635 vm_size_t size)
1c79356b 636{
1c79356b 637 vstruct_t vs;
1c79356b
A
638
639 DEBUG(DEBUG_MO_EXTERNAL,
640 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
0b4e3aa0 641 (int)mem_obj, (int)offset, (int)size));
55e303ae 642 GSTAT(global_stats.gs_pages_init += atop_32(size));
1c79356b
A
643
644 vs_lookup(mem_obj, vs);
0b4e3aa0 645 vs_lock(vs);
1c79356b
A
646 vs_start_write(vs);
647 vs_unlock(vs);
648
649 /*
650 * Write the data via clustered writes. vs_cluster_write will
651 * loop if the address range specified crosses cluster
652 * boundaries.
653 */
0b4e3aa0 654 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
655
656 vs_finish_write(vs);
657
658 return KERN_SUCCESS;
659}
660
1c79356b
A
661kern_return_t
662dp_memory_object_data_unlock(
663 memory_object_t mem_obj,
0b4e3aa0
A
664 memory_object_offset_t offset,
665 vm_size_t size,
1c79356b
A
666 vm_prot_t desired_access)
667{
0b4e3aa0 668 Panic("dp_memory_object_data_unlock: illegal");
1c79356b
A
669 return KERN_FAILURE;
670}
671
672
1c79356b
A
673kern_return_t
674dp_memory_object_data_return(
0b4e3aa0
A
675 memory_object_t mem_obj,
676 memory_object_offset_t offset,
677 vm_size_t size,
1c79356b
A
678 boolean_t dirty,
679 boolean_t kernel_copy)
680{
1c79356b 681 vstruct_t vs;
1c79356b
A
682
683 DEBUG(DEBUG_MO_EXTERNAL,
0b4e3aa0
A
684 ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
685 (int)mem_obj, (int)offset, (int)size));
1c79356b
A
686 GSTAT(global_stats.gs_pageout_calls++);
687
688 /* This routine is called by the pageout thread. The pageout thread */
689 /* cannot be blocked by read activities unless the read activities */
690 /* Therefore the grant of vs lock must be done on a try versus a */
691 /* blocking basis. The code below relies on the fact that the */
692 /* interface is synchronous. Should this interface be again async */
693 /* for some type of pager in the future the pages will have to be */
694 /* returned through a separate, asynchronous path. */
695
696 vs_lookup(mem_obj, vs);
697
698 default_pager_total++;
699 if(!VS_TRY_LOCK(vs)) {
700 /* the call below will not be done by caller when we have */
701 /* a synchronous interface */
702 /* return KERN_LOCK_OWNED; */
703 upl_t upl;
0b4e3aa0
A
704 int page_list_count = 0;
705 memory_object_super_upl_request(vs->vs_control,
706 (memory_object_offset_t)offset,
707 size, size,
708 &upl, NULL, &page_list_count,
709 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
1c79356b 710 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
711 upl_abort(upl,0);
712 upl_deallocate(upl);
1c79356b
A
713 return KERN_SUCCESS;
714 }
715
d12e1678
A
716 if ((vs->vs_seqno != vs->vs_next_seqno++)
717 || (vs->vs_readers)
718 || (vs->vs_xfer_pending)) {
1c79356b 719 upl_t upl;
0b4e3aa0
A
720 int page_list_count = 0;
721
1c79356b
A
722 vs->vs_next_seqno--;
723 VS_UNLOCK(vs);
0b4e3aa0 724
1c79356b
A
725 /* the call below will not be done by caller when we have */
726 /* a synchronous interface */
727 /* return KERN_LOCK_OWNED; */
0b4e3aa0
A
728 memory_object_super_upl_request(vs->vs_control,
729 (memory_object_offset_t)offset,
730 size, size,
731 &upl, NULL, &page_list_count,
1c79356b
A
732 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
733 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
0b4e3aa0
A
734 upl_abort(upl,0);
735 upl_deallocate(upl);
1c79356b
A
736 return KERN_SUCCESS;
737 }
738
0b4e3aa0 739 if ((size % vm_page_size) != 0)
1c79356b
A
740 Panic("bad alignment");
741
742 vs_start_write(vs);
743
744
745 vs->vs_async_pending += 1; /* protect from backing store contraction */
0b4e3aa0 746 vs_unlock(vs);
1c79356b
A
747
748 /*
749 * Write the data via clustered writes. vs_cluster_write will
750 * loop if the address range specified crosses cluster
751 * boundaries.
752 */
0b4e3aa0 753 vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
1c79356b
A
754
755 vs_finish_write(vs);
756
757 /* temporary, need a finer lock based on cluster */
758
759 VS_LOCK(vs);
760 vs->vs_async_pending -= 1; /* release vs_async_wait */
0b4e3aa0
A
761 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
762 vs->vs_waiting_async = FALSE;
1c79356b 763 VS_UNLOCK(vs);
0b4e3aa0 764 thread_wakeup(&vs->vs_async_pending);
1c79356b
A
765 } else {
766 VS_UNLOCK(vs);
767 }
768
769
770 return KERN_SUCCESS;
771}
772
0b4e3aa0
A
773/*
774 * Routine: default_pager_memory_object_create
775 * Purpose:
776 * Handle requests for memory objects from the
777 * kernel.
778 * Notes:
779 * Because we only give out the default memory
780 * manager port to the kernel, we don't have to
781 * be so paranoid about the contents.
782 */
1c79356b 783kern_return_t
0b4e3aa0
A
784default_pager_memory_object_create(
785 memory_object_default_t dmm,
786 vm_size_t new_size,
787 memory_object_t *new_mem_obj)
1c79356b 788{
0b4e3aa0 789 vstruct_t vs;
1c79356b 790
0b4e3aa0
A
791 assert(dmm == default_pager_object);
792
793 vs = vs_object_create(new_size);
794 if (vs == VSTRUCT_NULL)
795 return KERN_RESOURCE_SHORTAGE;
796
797 vs->vs_next_seqno = 0;
798
799 /*
800 * Set up associations between this memory object
801 * and this default_pager structure
802 */
803
804 vs->vs_mem_obj = ISVS;
805 vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
806
807 /*
808 * After this, other threads might receive requests
809 * for this memory object or find it in the port list.
810 */
811
812 vstruct_list_insert(vs);
813 *new_mem_obj = vs_to_mem_obj(vs);
814 return KERN_SUCCESS;
1c79356b
A
815}
816
817/*
818 * Create an external object.
819 */
820kern_return_t
821default_pager_object_create(
0b4e3aa0
A
822 default_pager_t pager,
823 vm_size_t size,
824 memory_object_t *mem_objp)
1c79356b
A
825{
826 vstruct_t vs;
1c79356b
A
827 kern_return_t result;
828 struct vstruct_alias *alias_struct;
1c79356b
A
829
830
0b4e3aa0 831 if (pager != default_pager_object)
1c79356b
A
832 return KERN_INVALID_ARGUMENT;
833
834 vs = vs_object_create(size);
0b4e3aa0
A
835 if (vs == VSTRUCT_NULL)
836 return KERN_RESOURCE_SHORTAGE;
1c79356b 837
1c79356b 838 /*
0b4e3aa0 839 * Set up associations between the default pager
1c79356b
A
840 * and this vstruct structure
841 */
0b4e3aa0 842 vs->vs_mem_obj = ISVS;
1c79356b 843 vstruct_list_insert(vs);
0b4e3aa0 844 *mem_objp = vs_to_mem_obj(vs);
1c79356b
A
845 return KERN_SUCCESS;
846}
847
848kern_return_t
849default_pager_objects(
0b4e3aa0 850 default_pager_t pager,
1c79356b
A
851 default_pager_object_array_t *objectsp,
852 mach_msg_type_number_t *ocountp,
0b4e3aa0 853 memory_object_array_t *pagersp,
1c79356b
A
854 mach_msg_type_number_t *pcountp)
855{
856 vm_offset_t oaddr = 0; /* memory for objects */
857 vm_size_t osize = 0; /* current size */
858 default_pager_object_t * objects;
859 unsigned int opotential;
860
0b4e3aa0 861 vm_offset_t paddr = 0; /* memory for pagers */
1c79356b 862 vm_size_t psize = 0; /* current size */
0b4e3aa0 863 memory_object_t * pagers;
1c79356b
A
864 unsigned int ppotential;
865
866 unsigned int actual;
867 unsigned int num_objects;
868 kern_return_t kr;
869 vstruct_t entry;
1c79356b
A
870/*
871 if (pager != default_pager_default_port)
872 return KERN_INVALID_ARGUMENT;
873*/
874
875 /* start with the inline memory */
876
877 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
878 (vm_map_copy_t) *objectsp);
879
880 if (kr != KERN_SUCCESS)
881 return kr;
882
55e303ae 883 osize = round_page_32(*ocountp * sizeof * objects);
1c79356b 884 kr = vm_map_wire(ipc_kernel_map,
55e303ae
A
885 trunc_page_32((vm_offset_t)objects),
886 round_page_32(((vm_offset_t)objects) + osize),
1c79356b
A
887 VM_PROT_READ|VM_PROT_WRITE, FALSE);
888 osize=0;
889
890 *objectsp = objects;
891 /* we start with the inline space */
892
893
894 num_objects = 0;
895 opotential = *ocountp;
896
0b4e3aa0 897 pagers = (memory_object_t *) *pagersp;
1c79356b
A
898 ppotential = *pcountp;
899
900 VSL_LOCK();
901
902 /*
903 * We will send no more than this many
904 */
905 actual = vstruct_list.vsl_count;
906 VSL_UNLOCK();
907
908 if (opotential < actual) {
909 vm_offset_t newaddr;
910 vm_size_t newsize;
911
55e303ae 912 newsize = 2 * round_page_32(actual * sizeof * objects);
1c79356b
A
913
914 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
915 if (kr != KERN_SUCCESS)
916 goto nomemory;
917
918 oaddr = newaddr;
919 osize = newsize;
920 opotential = osize / sizeof * objects;
921 objects = (default_pager_object_t *)oaddr;
922 }
923
924 if (ppotential < actual) {
925 vm_offset_t newaddr;
926 vm_size_t newsize;
927
55e303ae 928 newsize = 2 * round_page_32(actual * sizeof * pagers);
1c79356b
A
929
930 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
931 if (kr != KERN_SUCCESS)
932 goto nomemory;
933
934 paddr = newaddr;
935 psize = newsize;
0b4e3aa0
A
936 ppotential = psize / sizeof * pagers;
937 pagers = (memory_object_t *)paddr;
1c79356b
A
938 }
939
940 /*
941 * Now scan the list.
942 */
943
944 VSL_LOCK();
945
946 num_objects = 0;
947 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
948
0b4e3aa0 949 memory_object_t pager;
1c79356b
A
950 vm_size_t size;
951
952 if ((num_objects >= opotential) ||
953 (num_objects >= ppotential)) {
954
955 /*
956 * This should be rare. In any case,
957 * we will only miss recent objects,
958 * because they are added at the end.
959 */
960 break;
961 }
962
963 /*
964 * Avoid interfering with normal operations
965 */
966 if (!VS_MAP_TRY_LOCK(entry))
967 goto not_this_one;
968 size = ps_vstruct_allocated_size(entry);
969 VS_MAP_UNLOCK(entry);
970
971 VS_LOCK(entry);
972
1c79356b 973 /*
0b4e3aa0
A
974 * We need a reference for our caller. Adding this
975 * reference through the linked list could race with
976 * destruction of the object. If we find the object
977 * has no references, just give up on it.
1c79356b 978 */
0b4e3aa0
A
979 VS_LOCK(entry);
980 if (entry->vs_references == 0) {
1c79356b 981 VS_UNLOCK(entry);
0b4e3aa0 982 goto not_this_one;
1c79356b 983 }
0b4e3aa0 984 dp_memory_object_reference(vs_to_mem_obj(entry));
1c79356b
A
985 VS_UNLOCK(entry);
986
987 /* the arrays are wired, so no deadlock worries */
988
989 objects[num_objects].dpo_object = (vm_offset_t) entry;
990 objects[num_objects].dpo_size = size;
0b4e3aa0 991 pagers [num_objects++] = pager;
1c79356b
A
992 continue;
993
994 not_this_one:
995 /*
996 * Do not return garbage
997 */
998 objects[num_objects].dpo_object = (vm_offset_t) 0;
999 objects[num_objects].dpo_size = 0;
0b4e3aa0 1000 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1c79356b
A
1001
1002 }
1003
1004 VSL_UNLOCK();
1005
1006 /*
1007 * Deallocate and clear unused memory.
1008 * (Returned memory will automagically become pageable.)
1009 */
1010
1011 if (objects == *objectsp) {
1012
1013 /*
1014 * Our returned information fit inline.
1015 * Nothing to deallocate.
1016 */
1017 *ocountp = num_objects;
1018 } else if (actual == 0) {
1019 (void) vm_deallocate(kernel_map, oaddr, osize);
1020
1021 /* return zero items inline */
1022 *ocountp = 0;
1023 } else {
1024 vm_offset_t used;
1025
55e303ae 1026 used = round_page_32(actual * sizeof * objects);
1c79356b
A
1027
1028 if (used != osize)
1029 (void) vm_deallocate(kernel_map,
1030 oaddr + used, osize - used);
1031
1032 *objectsp = objects;
1033 *ocountp = num_objects;
1034 }
1035
0b4e3aa0 1036 if (pagers == (memory_object_t *)*pagersp) {
1c79356b
A
1037
1038 /*
1039 * Our returned information fit inline.
1040 * Nothing to deallocate.
1041 */
1042
1043 *pcountp = num_objects;
1044 } else if (actual == 0) {
1045 (void) vm_deallocate(kernel_map, paddr, psize);
1046
1047 /* return zero items inline */
1048 *pcountp = 0;
1049 } else {
1050 vm_offset_t used;
1051
55e303ae 1052 used = round_page_32(actual * sizeof * pagers);
1c79356b
A
1053
1054 if (used != psize)
1055 (void) vm_deallocate(kernel_map,
1056 paddr + used, psize - used);
1057
0b4e3aa0 1058 *pagersp = (memory_object_array_t)pagers;
1c79356b
A
1059 *pcountp = num_objects;
1060 }
1061 (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1062 *ocountp + (vm_offset_t)objects, FALSE);
1063 (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1064 *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1065
1066 return KERN_SUCCESS;
1067
1068 nomemory:
1069 {
1070 register int i;
1071 for (i = 0; i < num_objects; i++)
0b4e3aa0
A
1072 if (pagers[i] != MEMORY_OBJECT_NULL)
1073 memory_object_deallocate(pagers[i]);
1c79356b
A
1074 }
1075
1076 if (objects != *objectsp)
1077 (void) vm_deallocate(kernel_map, oaddr, osize);
1078
0b4e3aa0 1079 if (pagers != (memory_object_t *)*pagersp)
1c79356b
A
1080 (void) vm_deallocate(kernel_map, paddr, psize);
1081
1082 return KERN_RESOURCE_SHORTAGE;
1083}
1084
1085kern_return_t
1086default_pager_object_pages(
0b4e3aa0
A
1087 default_pager_t pager,
1088 memory_object_t object,
1c79356b
A
1089 default_pager_page_array_t *pagesp,
1090 mach_msg_type_number_t *countp)
1091{
1092 vm_offset_t addr; /* memory for page offsets */
1093 vm_size_t size = 0; /* current memory size */
1094 default_pager_page_t * pages;
1095 unsigned int potential, actual;
1096 kern_return_t kr;
1097
0b4e3aa0
A
1098
1099 if (pager != default_pager_object)
1c79356b 1100 return KERN_INVALID_ARGUMENT;
0b4e3aa0 1101
1c79356b
A
1102 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1103 (vm_map_copy_t) *pagesp);
1104
1105 if (kr != KERN_SUCCESS)
1106 return kr;
1107
55e303ae 1108 size = round_page_32(*countp * sizeof * pages);
1c79356b 1109 kr = vm_map_wire(ipc_kernel_map,
55e303ae
A
1110 trunc_page_32((vm_offset_t)pages),
1111 round_page_32(((vm_offset_t)pages) + size),
1c79356b
A
1112 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1113 size=0;
1114
1115 *pagesp = pages;
1116 /* we start with the inline space */
1117
1118 addr = (vm_offset_t)pages;
1119 potential = *countp;
1120
1121 for (;;) {
1122 vstruct_t entry;
1123
1124 VSL_LOCK();
1125 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1126 vs_links) {
1127 VS_LOCK(entry);
0b4e3aa0 1128 if (vs_to_mem_obj(entry) == object) {
1c79356b
A
1129 VSL_UNLOCK();
1130 goto found_object;
1131 }
1132 VS_UNLOCK(entry);
1133 }
1134 VSL_UNLOCK();
1135
1136 /* did not find the object */
1137
1138 if (pages != *pagesp)
1139 (void) vm_deallocate(kernel_map, addr, size);
1140 return KERN_INVALID_ARGUMENT;
1141
1142 found_object:
1143
1144 if (!VS_MAP_TRY_LOCK(entry)) {
1145 /* oh well bad luck */
9bccf70c 1146 int wresult;
1c79356b
A
1147
1148 VS_UNLOCK(entry);
1149
9bccf70c
A
1150 assert_wait_timeout( 1, THREAD_UNINT );
1151 wresult = thread_block(THREAD_CONTINUE_NULL);
1152 assert(wresult == THREAD_TIMED_OUT);
1c79356b
A
1153 continue;
1154 }
1155
1156 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1157 VS_MAP_UNLOCK(entry);
1158 VS_UNLOCK(entry);
1159
1160 if (actual <= potential)
1161 break;
1162
1163 /* allocate more memory */
1164
1165 if (pages != *pagesp)
1166 (void) vm_deallocate(kernel_map, addr, size);
55e303ae 1167 size = round_page_32(actual * sizeof * pages);
1c79356b
A
1168 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1169 if (kr != KERN_SUCCESS)
1170 return kr;
1171 pages = (default_pager_page_t *)addr;
1172 potential = size / sizeof * pages;
1173 }
1174
1175 /*
1176 * Deallocate and clear unused memory.
1177 * (Returned memory will automagically become pageable.)
1178 */
1179
1180 if (pages == *pagesp) {
1181
1182 /*
1183 * Our returned information fit inline.
1184 * Nothing to deallocate.
1185 */
1186
1187 *countp = actual;
1188 } else if (actual == 0) {
1189 (void) vm_deallocate(kernel_map, addr, size);
1190
1191 /* return zero items inline */
1192 *countp = 0;
1193 } else {
1194 vm_offset_t used;
1195
55e303ae 1196 used = round_page_32(actual * sizeof * pages);
1c79356b
A
1197
1198 if (used != size)
1199 (void) vm_deallocate(kernel_map,
1200 addr + used, size - used);
1201
1202 *pagesp = pages;
1203 *countp = actual;
1204 }
1205 (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1206 *countp + (vm_offset_t)pages, FALSE);
1207 (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1208 *countp, TRUE, (vm_map_copy_t *)pagesp);
1209 return KERN_SUCCESS;
1210}