]> git.saurik.com Git - apple/xnu.git/blame - osfmk/default_pager/dp_memory_object.c
xnu-124.13.tar.gz
[apple/xnu.git] / osfmk / default_pager / dp_memory_object.c
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * Mach Operating System
27 * Copyright (c) 1991,1990,1989 Carnegie Mellon University
28 * All Rights Reserved.
29 *
30 * Permission to use, copy, modify and distribute this software and its
31 * documentation is hereby granted, provided that both the copyright
32 * notice and this permission notice appear in all copies of the
33 * software, derivative works or modified versions, and any portions
34 * thereof, and that both notices appear in supporting documentation.
35 *
36 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
37 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
38 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
39 *
40 * Carnegie Mellon requests users of this software to return to
41 *
42 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
43 * School of Computer Science
44 * Carnegie Mellon University
45 * Pittsburgh PA 15213-3890
46 *
47 * any improvements or extensions that they make and grant Carnegie Mellon
48 * the rights to redistribute these changes.
49 */
50
51/*
52 * Default Pager.
53 * Memory Object Management.
54 */
55
56#include "default_pager_internal.h"
57#include <mach/memory_object_server.h>
58#include <vm/vm_pageout.h> /* include for upl_t */
59
60
61/*
62 * List of all vstructs. A specific vstruct is
63 * found directly via its port, this list is
64 * only used for monitoring purposes by the
65 * default_pager_object* calls and by ps_delete
66 * when abstract memory objects must be scanned
67 * to remove any live storage on a segment which
68 * is to be removed.
69 */
70struct vstruct_list_head vstruct_list;
71
72void vstruct_list_insert(vstruct_t vs); /* forward */
73
74void
75vstruct_list_insert(
76 vstruct_t vs)
77{
78 VSL_LOCK();
79 queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
80 vstruct_list.vsl_count++;
81 VSL_UNLOCK();
82}
83
84void vstruct_list_delete(vstruct_t vs); /* forward */
85
86void
87vstruct_list_delete(
88 vstruct_t vs)
89{
90 queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
91 vstruct_list.vsl_count--;
92}
93
94/*
95 * We use the sequence numbers on requests to regulate
96 * our parallelism. In general, we allow multiple reads and writes
97 * to proceed in parallel, with the exception that reads must
98 * wait for previous writes to finish. (Because the kernel might
99 * generate a data-request for a page on the heels of a data-write
100 * for the same page, and we must avoid returning stale data.)
101 * terminate requests wait for proceeding reads and writes to finish.
102 */
103
104unsigned int default_pager_total = 0; /* debugging */
105unsigned int default_pager_wait_seqno = 0; /* debugging */
106unsigned int default_pager_wait_read = 0; /* debugging */
107unsigned int default_pager_wait_write = 0; /* debugging */
108unsigned int default_pager_wait_refs = 0; /* debugging */
109
110void vs_async_wait(vstruct_t); /* forward */
111
112void
113vs_async_wait(
114 vstruct_t vs)
115{
116 static char here[] = "vs_async_wait";
117
118 ASSERT(vs->vs_async_pending >= 0);
119 while (vs->vs_async_pending > 0) {
120 vs->vs_waiting_async = TRUE;
121 assert_wait(&vs->vs_waiting_async, THREAD_UNINT);
122 VS_UNLOCK(vs);
123 thread_block((void (*)(void))0);
124 VS_LOCK(vs);
125 }
126 ASSERT(vs->vs_async_pending == 0);
127}
128
129#if PARALLEL
130void vs_lock(vstruct_t, mach_port_seqno_t);
131void vs_unlock(vstruct_t);
132void vs_start_read(vstruct_t);
133void vs_wait_for_readers(vstruct_t);
134void vs_finish_read(vstruct_t);
135void vs_start_write(vstruct_t);
136void vs_wait_for_writers(vstruct_t);
137void vs_finish_write(vstruct_t);
138void vs_wait_for_refs(vstruct_t);
139void vs_finish_refs(vstruct_t);
140
141/*
142 * Waits for correct sequence number. Leaves pager locked.
143 * JMM - Sequence numbers guarantee ordering, but in a preemptible
144 * kernel, they are generated without locks, and so their
145 * generation order is undefined (and therefore unreliable).
146 * Since we ned to fix this anyway, and I needed to get rid
147 * rid of asymmetry in the interface definitions, I have
148 * punted this to here.
149 */
150void
151vs_lock(
152 vstruct_t vs,
153 mach_port_seqno_t seqno)
154{
155 default_pager_total++;
156 VS_LOCK(vs);
157
158 seqno = vs->vs_next_seqno++;
159
160 while (vs->vs_seqno != seqno) {
161 default_pager_wait_seqno++;
162 vs->vs_waiting_seqno = TRUE;
163 assert_wait(&vs->vs_waiting_seqno, THREAD_UNINT);
164 VS_UNLOCK(vs);
165 thread_block((void (*)(void))0);
166 VS_LOCK(vs);
167 }
168}
169
170/*
171 * Increments sequence number and unlocks pager.
172 */
173void
174vs_unlock(vstruct_t vs)
175{
176 boolean_t need_wakeups = vs->vs_waiting_seqno;
177
178 vs->vs_waiting_seqno = FALSE;
179 vs->vs_seqno++;
180 VS_UNLOCK(vs);
181 if (need_wakeups)
182 thread_wakeup(&vs->vs_waiting_seqno);
183}
184
185/*
186 * Start a read - one more reader. Pager must be locked.
187 */
188void
189vs_start_read(
190 vstruct_t vs)
191{
192 vs->vs_readers++;
193}
194
195/*
196 * Wait for readers. Unlocks and relocks pager if wait needed.
197 */
198void
199vs_wait_for_readers(
200 vstruct_t vs)
201{
202 while (vs->vs_readers != 0) {
203 default_pager_wait_read++;
204 vs->vs_waiting_read = TRUE;
205 assert_wait(&vs->vs_waiting_read, THREAD_UNINT);
206 VS_UNLOCK(vs);
207 thread_block((void (*)(void))0);
208 VS_LOCK(vs);
209 }
210}
211
212/*
213 * Finish a read. Pager is unlocked and returns unlocked.
214 */
215void
216vs_finish_read(
217 vstruct_t vs)
218{
219 VS_LOCK(vs);
220 if (--vs->vs_readers == 0) {
221 boolean_t need_wakeups = vs->vs_waiting_read;
222
223 vs->vs_waiting_read = FALSE;
224 VS_UNLOCK(vs);
225 if (need_wakeups)
226 thread_wakeup(&vs->vs_waiting_read);
227 } else
228 VS_UNLOCK(vs);
229}
230
231/*
232 * Start a write - one more writer. Pager must be locked.
233 */
234void
235vs_start_write(
236 vstruct_t vs)
237{
238 vs->vs_writers++;
239}
240
241/*
242 * Wait for writers. Unlocks and relocks pager if wait needed.
243 */
244void
245vs_wait_for_writers(
246 vstruct_t vs)
247{
248 while (vs->vs_writers != 0) {
249 default_pager_wait_write++;
250 vs->vs_waiting_write = TRUE;
251 assert_wait(&vs->vs_waiting_write, THREAD_UNINT);
252 VS_UNLOCK(vs);
253 thread_block((void (*)(void))0);
254 VS_LOCK(vs);
255 }
256 vs_async_wait(vs);
257}
258
259/* This is to be used for the transfer from segment code ONLY */
260/* The transfer code holds off vs destruction by keeping the */
261/* vs_async_wait count non-zero. It will not ocnflict with */
262/* other writers on an async basis because it only writes on */
263/* a cluster basis into fresh (as of sync time) cluster locations */
264void
265vs_wait_for_sync_writers(
266 vstruct_t vs)
267{
268 while (vs->vs_writers != 0) {
269 default_pager_wait_write++;
270 vs->vs_waiting_write = TRUE;
271 assert_wait(&vs->vs_waiting_write, THREAD_UNINT);
272 VS_UNLOCK(vs);
273 thread_block((void (*)(void))0);
274 VS_LOCK(vs);
275 }
276}
277
278
279/*
280 * Finish a write. Pager is unlocked and returns unlocked.
281 */
282void
283vs_finish_write(
284 vstruct_t vs)
285{
286 VS_LOCK(vs);
287 if (--vs->vs_writers == 0) {
288 boolean_t need_wakeups = vs->vs_waiting_write;
289
290 vs->vs_waiting_write = FALSE;
291 VS_UNLOCK(vs);
292 if (need_wakeups)
293 thread_wakeup(&vs->vs_waiting_write);
294 } else
295 VS_UNLOCK(vs);
296}
297
298/*
299 * Wait for concurrent default_pager_objects.
300 * Unlocks and relocks pager if wait needed.
301 */
302void
303vs_wait_for_refs(
304 vstruct_t vs)
305{
306 while (vs->vs_name_refs == 0) {
307 default_pager_wait_refs++;
308 vs->vs_waiting_refs = TRUE;
309 assert_wait(&vs->vs_waiting_refs, THREAD_UNINT);
310 VS_UNLOCK(vs);
311 thread_block((void (*)(void))0);
312 VS_LOCK(vs);
313 }
314}
315
316/*
317 * Finished creating name refs - wake up waiters.
318 */
319void
320vs_finish_refs(
321 vstruct_t vs)
322{
323 boolean_t need_wakeups = vs->vs_waiting_refs;
324 vs->vs_waiting_refs = FALSE;
325 if (need_wakeups)
326 thread_wakeup(&vs->vs_waiting_refs);
327}
328
329#else /* PARALLEL */
330
331#define vs_lock(vs,seqno)
332#define vs_unlock(vs)
333#define vs_start_read(vs)
334#define vs_wait_for_readers(vs)
335#define vs_finish_read(vs)
336#define vs_start_write(vs)
337#define vs_wait_for_writers(vs)
338#define vs_wait_for_sync_writers(vs)
339#define vs_finish_write(vs)
340#define vs_wait_for_refs(vs)
341#define vs_finish_refs(vs)
342
343#endif /* PARALLEL */
344
345vstruct_t vs_object_create(vm_size_t); /* forward */
346
347vstruct_t
348vs_object_create(
349 vm_size_t size)
350{
351 vstruct_t vs;
352 static char here[] = "vs_object_create";
353
354 /*
355 * Allocate a vstruct. If there are any problems, then report them
356 * to the console.
357 */
358 vs = ps_vstruct_create(size);
359 if (vs == VSTRUCT_NULL) {
360 dprintf(("vs_object_create: unable to allocate %s\n",
361 "-- either run swapon command or reboot"));
362 return VSTRUCT_NULL;
363 }
364
365 return vs;
366}
367
368mach_port_urefs_t default_pager_max_urefs = 10000;
369
370/*
371 * Check user reference count on memory object control port.
372 * Vstruct must be locked.
373 * Unlocks and re-locks vstruct if needs to call kernel.
374 */
375void vs_check_request(vstruct_t, MACH_PORT_FACE); /* forward */
376
377void
378vs_check_request(
379 vstruct_t vs,
380 MACH_PORT_FACE control_port)
381{
382 mach_port_delta_t delta;
383 kern_return_t kr;
384 static char here[] = "vs_check_request";
385
386 if (++vs->vs_control_refs > default_pager_max_urefs) {
387 delta = 1 - vs->vs_control_refs;
388 vs->vs_control_refs = 1;
389
390 VS_UNLOCK(vs);
391
392 /*
393 * Deallocate excess user references.
394 */
395
396 {
397/* find a better interface for this, what will we use as a component */
398 int i;
399 delta = -delta;
400 for(i=0; i<delta; i++)
401 ipc_port_release_send(control_port);
402 }
403
404 VS_LOCK(vs);
405 }
406}
407
408void default_pager_add(vstruct_t, boolean_t); /* forward */
409
410void
411default_pager_add(
412 vstruct_t vs,
413 boolean_t internal)
414{
415 MACH_PORT_FACE mem_obj = vs->vs_mem_obj_port;
416 MACH_PORT_FACE pset;
417 mach_port_mscount_t sync;
418 MACH_PORT_FACE previous;
419 kern_return_t kr;
420 static char here[] = "default_pager_add";
421
422 /*
423 * The port currently has a make-send count of zero,
424 * because either we just created the port or we just
425 * received the port in a memory_object_create request.
426 */
427
428 if (internal) {
429 /* possibly generate an immediate no-senders notification */
430 sync = 0;
431 pset = default_pager_internal_set;
432 } else {
433 /* delay notification till send right is created */
434 sync = 1;
435 pset = default_pager_external_set;
436 }
437
438 ipc_port_make_sonce(mem_obj);
439 ip_lock(mem_obj); /* unlocked in nsrequest below */
440 ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
441}
442
443
444/*
445 * Routine: dp_memory_object_create
446 * Purpose:
447 * Handle requests for memory objects from the
448 * kernel.
449 * Notes:
450 * Because we only give out the default memory
451 * manager port to the kernel, we don't have to
452 * be so paranoid about the contents.
453 */
454kern_return_t
455dp_memory_object_create(
456 MACH_PORT_FACE dmm,
457 MACH_PORT_FACE *new_mem_obj,
458 vm_size_t new_size)
459{
460 mach_port_seqno_t seqno;
461 vstruct_t vs;
462 MACH_PORT_FACE pager;
463 static char here[] = "memory_object_create";
464
465 assert(dmm == default_pager_default_port);
466
467 vs = vs_object_create(new_size);
468 if (vs == VSTRUCT_NULL)
469 return KERN_RESOURCE_SHORTAGE;
470
471 pager = *new_mem_obj = ipc_port_alloc_kernel();
472 assert (pager != IP_NULL);
473 (void) ipc_port_make_send(pager);
474
475 {
476 struct vstruct_alias *alias_struct;
477
478 alias_struct = (struct vstruct_alias *)
479 kalloc(sizeof(struct vstruct_alias));
480 if(alias_struct != NULL) {
481 alias_struct->vs = vs;
482 alias_struct->name = ISVS;
483 pager->alias = (int) alias_struct;
484 }
485 else Panic("Out of kernel memory");
486
487 /* JMM - Add binding to this pager under components */
488 pager_mux_hash_insert(pager, &dp_memory_object_subsystem);
489 vs->vs_next_seqno = 0;
490 pager->ip_receiver = ipc_space_kernel;
491 }
492
493 /*
494 * Set up associations between this port
495 * and this default_pager structure
496 */
497
498 vs->vs_mem_obj_port = pager;
499
500 /*
501 * After this, other threads might receive requests
502 * for this memory object or find it in the port list.
503 */
504
505 vstruct_list_insert(vs);
506 default_pager_add(vs, TRUE);
507
508 return KERN_SUCCESS;
509}
510
511kern_return_t
512dp_memory_object_init(
513 MACH_PORT_FACE mem_obj,
514 MACH_PORT_FACE control_port,
515 vm_size_t pager_page_size)
516{
517 mach_port_seqno_t seqno;
518 vstruct_t vs;
519 static char here[] = "memory_object_init";
520
521 assert(pager_page_size == vm_page_size);
522
523 vs_lookup(mem_obj, vs);
524 vs_lock(vs, seqno);
525
526 if (vs->vs_control_port != MACH_PORT_NULL)
527 Panic("bad request");
528
529 vs->vs_control_port = control_port;
530 vs->vs_control_refs = 1;
531 vs->vs_object_name = MACH_PORT_NULL;
532 vs->vs_name_refs = 1;
533
534 vs_unlock(vs);
535
536 return KERN_SUCCESS;
537}
538
539kern_return_t
540dp_memory_object_synchronize(
541 MACH_PORT_FACE mem_obj,
542 MACH_PORT_FACE control_port,
543 vm_object_offset_t offset,
544 vm_offset_t length,
545 vm_sync_t flags)
546{
547 mach_port_seqno_t seqno;
548 vstruct_t vs;
549 static char here[] = "memory_object_synchronize";
550
551 vs_lookup(mem_obj, vs);
552 vs_lock(vs, seqno);
553 vs_check_request(vs, control_port);
554 vs_unlock(vs);
555
556 memory_object_synchronize_completed(
557 vm_object_lookup(control_port),
558 offset, length);
559
560 return KERN_SUCCESS;
561}
562
563kern_return_t
564dp_memory_object_terminate(
565 MACH_PORT_FACE mem_obj,
566 MACH_PORT_FACE control_port)
567{
568 mach_port_seqno_t seqno;
569 vstruct_t vs;
570 mach_port_urefs_t request_refs;
571 kern_return_t kr;
572 static char here[] = "memory_object_terminate";
573
574 /*
575 * control port is a receive right, not a send right.
576 */
577
578 vs_lookup(mem_obj, vs);
579 vs_lock(vs, seqno);
580
581 /*
582 * Wait for read and write requests to terminate.
583 */
584
585 vs_wait_for_readers(vs);
586 vs_wait_for_writers(vs);
587
588 /*
589 * After memory_object_terminate both memory_object_init
590 * and a no-senders notification are possible, so we need
591 * to clean up the request and name ports but leave
592 * the mem_obj port.
593 *
594 * A concurrent default_pager_objects might be allocating
595 * more references for the name port. In this case,
596 * we must first wait for it to finish.
597 */
598
599 vs_wait_for_refs(vs);
600
601 vs->vs_control_port = MACH_PORT_NULL;
602
603 /* a bit of special case ugliness here. Wakeup any waiting reads */
604 /* these data requests had to be removed from the seqno traffic */
605 /* based on a performance bottleneck with large memory objects */
606 /* the problem will right itself with the new component based */
607 /* synchronous interface. The new async will be able to return */
608 /* failure during its sync phase. In the mean time ... */
609
610 thread_wakeup(&vs->vs_waiting_write);
611 thread_wakeup(&vs->vs_waiting_async);
612
613 request_refs = vs->vs_control_refs;
614 vs->vs_control_refs = 0;
615
616 vs->vs_object_name = MACH_PORT_NULL;
617
618 assert(vs->vs_name_refs != 0);
619 vs->vs_name_refs = 0;
620
621 vs_unlock(vs);
622
623 /*
624 * Now we deallocate our various port rights.
625 */
626
627 {
628 int i;
629 for(i=0; i<request_refs; i++)
630 ipc_port_release_send(control_port);
631 }
632 if(control_port->alias != (int)NULL)
633 kfree((vm_offset_t) (control_port->alias),
634 sizeof(struct vstruct_alias));
635 ipc_port_release_receive(control_port);
636 return KERN_SUCCESS;
637}
638
639void
640default_pager_no_senders(
641 MACH_PORT_FACE mem_obj,
642 mach_port_seqno_t seqno,
643 mach_port_mscount_t mscount)
644{
645 vstruct_t vs;
646 static char here[] = "default_pager_no_senders";
647
648 /*
649 * Because we don't give out multiple send rights
650 * for a memory object, there can't be a race
651 * between getting a no-senders notification
652 * and creating a new send right for the object.
653 * Hence we don't keep track of mscount.
654 */
655
656 vs_lookup(mem_obj, vs);
657 vs_lock(vs, seqno);
658 vs_async_wait(vs); /* wait for pending async IO */
659
660 /* do not delete the vs structure until the referencing pointers */
661 /* in the vstruct list have been expunged */
662
663 /* get VSL_LOCK out of order by using TRY mechanism */
664 while(!VSL_LOCK_TRY()) {
665 VS_UNLOCK(vs);
666 VSL_LOCK();
667 VSL_UNLOCK();
668 VS_LOCK(vs);
669 vs_async_wait(vs); /* wait for pending async IO */
670 }
671 /*
672 * We shouldn't get a no-senders notification
673 * when the kernel has the object cached.
674 */
675 if (vs->vs_control_port != MACH_PORT_NULL)
676 Panic("bad request");
677
678 /*
679 * Unlock the pager (though there should be no one
680 * waiting for it).
681 */
682 VS_UNLOCK(vs);
683
684 /*
685 * Remove the memory object port association, and then
686 * the destroy the port itself. We must remove the object
687 * from the port list before deallocating the pager,
688 * because of default_pager_objects.
689 */
690 vstruct_list_delete(vs);
691 ps_vstruct_dealloc(vs);
692
693 /*
694 * Recover memory that we might have wasted because
695 * of name conflicts
696 */
697 while (!queue_empty(&vstruct_list.vsl_leak_queue)) {
698 vs = (vstruct_t) queue_first(&vstruct_list.vsl_leak_queue);
699 queue_remove_first(&vstruct_list.vsl_leak_queue, vs, vstruct_t,
700 vs_links);
701 kfree((vm_offset_t) vs, sizeof *vs);
702 }
703 VSL_UNLOCK();
704}
705
706kern_return_t
707dp_memory_object_data_request(
708 MACH_PORT_FACE mem_obj,
709 MACH_PORT_FACE reply_to,
710 vm_object_offset_t offset,
711 vm_size_t length,
712 vm_prot_t protection_required)
713{
714 mach_port_seqno_t seqno;
715 vstruct_t vs;
716 static char here[] = "memory_object_data_request";
717
718 GSTAT(global_stats.gs_pagein_calls++);
719
720
721 /* CDY at this moment vs_lookup panics when presented with the wrong */
722 /* port. As we are expanding this pager to support user interfaces */
723 /* this should be changed to return kern_failure */
724 vs_lookup(mem_obj, vs);
725 vs_lock(vs, seqno);
726 vs_check_request(vs, reply_to);
727
728 /* We are going to relax the strict sequencing here for performance */
729 /* reasons. We can do this because we know that the read and */
730 /* write threads are different and we rely on synchronization */
731 /* of read and write requests at the cache memory_object level */
732 /* break out wait_for_writers, all of this goes away when */
733 /* we get real control of seqno with the new component interface */
734 if (vs->vs_writers != 0) {
735 /* you can't hold on to the seqno and go */
736 /* to sleep like that */
737 vs_unlock(vs); /* bump internal count of seqno */
738 VS_LOCK(vs);
739 while (vs->vs_writers != 0) {
740 default_pager_wait_write++;
741 vs->vs_waiting_write = TRUE;
742 assert_wait(&vs->vs_waiting_write, THREAD_UNINT);
743 VS_UNLOCK(vs);
744 thread_block((void (*)(void))0);
745 VS_LOCK(vs);
746 vs_async_wait(vs);
747 }
748 if(vs->vs_control_port == MACH_PORT_NULL) {
749 VS_UNLOCK(vs);
750 return KERN_FAILURE;
751 }
752 vs_start_read(vs);
753 VS_UNLOCK(vs);
754 } else {
755 vs_start_read(vs);
756 vs_unlock(vs);
757 }
758
759 /*
760 * Request must be on a page boundary and a multiple of pages.
761 */
762 if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
763 Panic("bad alignment");
764
765 pvs_cluster_read(vs, (vm_offset_t)offset, length);
766
767 vs_finish_read(vs);
768
769 return KERN_SUCCESS;
770}
771
772/*
773 * memory_object_data_initialize: check whether we already have each page, and
774 * write it if we do not. The implementation is far from optimized, and
775 * also assumes that the default_pager is single-threaded.
776 */
777/* It is questionable whether or not a pager should decide what is relevant */
778/* and what is not in data sent from the kernel. Data initialize has been */
779/* changed to copy back all data sent to it in preparation for its eventual */
780/* merge with data return. It is the kernel that should decide what pages */
781/* to write back. As of the writing of this note, this is indeed the case */
782/* the kernel writes back one page at a time through this interface */
783
784kern_return_t
785dp_memory_object_data_initialize(
786 MACH_PORT_FACE mem_obj,
787 MACH_PORT_FACE control_port,
788 vm_object_offset_t offset,
789 pointer_t addr,
790 vm_size_t data_cnt)
791{
792 mach_port_seqno_t seqno;
793 vstruct_t vs;
794 static char here[] = "memory_object_data_initialize";
795
796#ifdef lint
797 control_port++;
798#endif /* lint */
799
800 DEBUG(DEBUG_MO_EXTERNAL,
801 ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
802 (int)mem_obj, (int)offset, (int)data_cnt));
803 GSTAT(global_stats.gs_pages_init += atop(data_cnt));
804
805 vs_lookup(mem_obj, vs);
806 vs_lock(vs, seqno);
807 vs_check_request(vs, control_port);
808 vs_start_write(vs);
809 vs_unlock(vs);
810
811 /*
812 * Write the data via clustered writes. vs_cluster_write will
813 * loop if the address range specified crosses cluster
814 * boundaries.
815 */
816 vs_cluster_write(vs, 0, (vm_offset_t)offset, data_cnt, FALSE, 0);
817
818 vs_finish_write(vs);
819
820 return KERN_SUCCESS;
821}
822
823kern_return_t
824dp_memory_object_lock_completed(
825 memory_object_t mem_obj,
826 MACH_PORT_FACE control_port,
827 vm_object_offset_t offset,
828 vm_size_t length)
829{
830 mach_port_seqno_t seqno;
831 static char here[] = "memory_object_lock_completed";
832
833#ifdef lint
834 mem_obj++;
835 seqno++;
836 control_port++;
837 offset++;
838 length++;
839#endif /* lint */
840
841 Panic("illegal");
842 return KERN_FAILURE;
843}
844
845kern_return_t
846dp_memory_object_data_unlock(
847 memory_object_t mem_obj,
848 MACH_PORT_FACE control_port,
849 vm_object_offset_t offset,
850 vm_size_t data_cnt,
851 vm_prot_t desired_access)
852{
853 static char here[] = "memory_object_data_unlock";
854
855 Panic("illegal");
856 return KERN_FAILURE;
857}
858
859
860kern_return_t
861dp_memory_object_supply_completed(
862 memory_object_t mem_obj,
863 MACH_PORT_FACE control_port,
864 vm_object_offset_t offset,
865 vm_size_t length,
866 kern_return_t result,
867 vm_offset_t error_offset)
868{
869 static char here[] = "memory_object_supply_completed";
870
871 Panic("illegal");
872 return KERN_FAILURE;
873}
874
875kern_return_t
876dp_memory_object_data_return(
877 MACH_PORT_FACE mem_obj,
878 MACH_PORT_FACE control_port,
879 vm_object_offset_t offset,
880 pointer_t addr,
881 vm_size_t data_cnt,
882 boolean_t dirty,
883 boolean_t kernel_copy)
884{
885 mach_port_seqno_t seqno;
886 vstruct_t vs;
887 static char here[] = "memory_object_data_return";
888
889#ifdef lint
890 control_port++;
891 dirty++;
892 kernel_copy++;
893#endif /* lint */
894
895 DEBUG(DEBUG_MO_EXTERNAL,
896 ("mem_obj=0x%x,offset=0x%x,addr=0x%xcnt=0x%x\n",
897 (int)mem_obj, (int)offset, (int)addr, (int)data_cnt));
898 GSTAT(global_stats.gs_pageout_calls++);
899
900 /* This routine is called by the pageout thread. The pageout thread */
901 /* cannot be blocked by read activities unless the read activities */
902 /* Therefore the grant of vs lock must be done on a try versus a */
903 /* blocking basis. The code below relies on the fact that the */
904 /* interface is synchronous. Should this interface be again async */
905 /* for some type of pager in the future the pages will have to be */
906 /* returned through a separate, asynchronous path. */
907
908 vs_lookup(mem_obj, vs);
909
910 default_pager_total++;
911 if(!VS_TRY_LOCK(vs)) {
912 /* the call below will not be done by caller when we have */
913 /* a synchronous interface */
914 /* return KERN_LOCK_OWNED; */
915 upl_t upl;
916 upl_system_list_request((vm_object_t)
917 vs->vs_control_port->ip_kobject,
918 offset, data_cnt, data_cnt, &upl, NULL, 0,
919 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
920 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
921 uc_upl_abort(upl,0);
922 ipc_port_release_send(control_port);
923 return KERN_SUCCESS;
924 }
925
926
927
928 if ((vs->vs_seqno != vs->vs_next_seqno++) || (vs->vs_xfer_pending)) {
929 upl_t upl;
930 vs->vs_next_seqno--;
931 VS_UNLOCK(vs);
932 /* the call below will not be done by caller when we have */
933 /* a synchronous interface */
934 /* return KERN_LOCK_OWNED; */
935 upl_system_list_request((vm_object_t)
936 vs->vs_control_port->ip_kobject,
937 offset, data_cnt, data_cnt, &upl, NULL, 0,
938 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
939 | UPL_NO_SYNC | UPL_COPYOUT_FROM);
940 uc_upl_abort(upl,0);
941 ipc_port_release_send(control_port);
942 return KERN_SUCCESS;
943 }
944
945 if ((data_cnt % vm_page_size) != 0)
946 Panic("bad alignment");
947
948 vs_start_write(vs);
949
950
951 vs->vs_async_pending += 1; /* protect from backing store contraction */
952
953 /* unroll vs_check_request to avoid re-locking vs */
954
955 if (++vs->vs_control_refs > default_pager_max_urefs) {
956 mach_port_delta_t delta;
957
958 delta = 1 - vs->vs_control_refs;
959 vs->vs_control_refs = 1;
960
961 vs_unlock(vs);
962
963 /*
964 * Deallocate excess user references.
965 */
966
967 {
968 int i;
969 delta = -delta;
970 for(i=0; i<delta; i++)
971 ipc_port_release_send(control_port);
972 }
973
974 } else {
975 vs_unlock(vs);
976 }
977
978 /*
979 * Write the data via clustered writes. vs_cluster_write will
980 * loop if the address range specified crosses cluster
981 * boundaries.
982 */
983 vs_cluster_write(vs, 0, (vm_offset_t)offset, data_cnt, FALSE, 0);
984
985 vs_finish_write(vs);
986
987 /* temporary, need a finer lock based on cluster */
988
989 VS_LOCK(vs);
990 vs->vs_async_pending -= 1; /* release vs_async_wait */
991 if (vs->vs_async_pending == 0) {
992 VS_UNLOCK(vs);
993 thread_wakeup(&vs->vs_waiting_async);
994 } else {
995 VS_UNLOCK(vs);
996 }
997
998
999 return KERN_SUCCESS;
1000}
1001
1002kern_return_t
1003dp_memory_object_change_completed(
1004 memory_object_t mem_obj,
1005 memory_object_control_t memory_control,
1006 memory_object_flavor_t flavor)
1007{
1008 static char here[] = "memory_object_change_completed";
1009
1010 Panic("illegal");
1011 return KERN_FAILURE;
1012}
1013
1014/*
1015 * Create an external object.
1016 */
1017kern_return_t
1018default_pager_object_create(
1019 MACH_PORT_FACE pager,
1020 MACH_PORT_FACE *mem_obj,
1021 vm_size_t size)
1022{
1023 vstruct_t vs;
1024 MACH_PORT_FACE port;
1025 kern_return_t result;
1026 struct vstruct_alias *alias_struct;
1027 static char here[] = "default_pager_object_create";
1028
1029
1030 if (pager != default_pager_default_port)
1031 return KERN_INVALID_ARGUMENT;
1032
1033 vs = vs_object_create(size);
1034
1035 port = ipc_port_alloc_kernel();
1036 ipc_port_make_send(port);
1037 /* register abstract memory object port with pager mux routine */
1038 /* (directs kernel internal calls to the right pager). */
1039 alias_struct = (struct vstruct_alias *)
1040 kalloc(sizeof(struct vstruct_alias));
1041 if(alias_struct != NULL) {
1042 alias_struct->vs = vs;
1043 alias_struct->name = ISVS;
1044 port->alias = (int) alias_struct;
1045 }
1046 else Panic("Out of kernel memory");
1047
1048 /*
1049 * Set up associations between these ports
1050 * and this vstruct structure
1051 */
1052
1053 vs->vs_mem_obj_port = port;
1054 vstruct_list_insert(vs);
1055 default_pager_add(vs, FALSE);
1056
1057 *mem_obj = port;
1058
1059 return KERN_SUCCESS;
1060}
1061
1062kern_return_t
1063default_pager_objects(
1064 MACH_PORT_FACE pager,
1065 default_pager_object_array_t *objectsp,
1066 mach_msg_type_number_t *ocountp,
1067 mach_port_array_t *portsp,
1068 mach_msg_type_number_t *pcountp)
1069{
1070 vm_offset_t oaddr = 0; /* memory for objects */
1071 vm_size_t osize = 0; /* current size */
1072 default_pager_object_t * objects;
1073 unsigned int opotential;
1074
1075 vm_offset_t paddr = 0; /* memory for ports */
1076 vm_size_t psize = 0; /* current size */
1077 MACH_PORT_FACE * ports;
1078 unsigned int ppotential;
1079
1080 unsigned int actual;
1081 unsigned int num_objects;
1082 kern_return_t kr;
1083 vstruct_t entry;
1084 static char here[] = "default_pager_objects";
1085/*
1086 if (pager != default_pager_default_port)
1087 return KERN_INVALID_ARGUMENT;
1088*/
1089
1090 /* start with the inline memory */
1091
1092 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
1093 (vm_map_copy_t) *objectsp);
1094
1095 if (kr != KERN_SUCCESS)
1096 return kr;
1097
1098 osize = round_page(*ocountp * sizeof * objects);
1099 kr = vm_map_wire(ipc_kernel_map,
1100 trunc_page((vm_offset_t)objects),
1101 round_page(((vm_offset_t)objects) + osize),
1102 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1103 osize=0;
1104
1105 *objectsp = objects;
1106 /* we start with the inline space */
1107
1108
1109 num_objects = 0;
1110 opotential = *ocountp;
1111
1112 ports = (MACH_PORT_FACE *) *portsp;
1113 ppotential = *pcountp;
1114
1115 VSL_LOCK();
1116
1117 /*
1118 * We will send no more than this many
1119 */
1120 actual = vstruct_list.vsl_count;
1121 VSL_UNLOCK();
1122
1123 if (opotential < actual) {
1124 vm_offset_t newaddr;
1125 vm_size_t newsize;
1126
1127 newsize = 2 * round_page(actual * sizeof * objects);
1128
1129 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
1130 if (kr != KERN_SUCCESS)
1131 goto nomemory;
1132
1133 oaddr = newaddr;
1134 osize = newsize;
1135 opotential = osize / sizeof * objects;
1136 objects = (default_pager_object_t *)oaddr;
1137 }
1138
1139 if (ppotential < actual) {
1140 vm_offset_t newaddr;
1141 vm_size_t newsize;
1142
1143 newsize = 2 * round_page(actual * sizeof * ports);
1144
1145 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
1146 if (kr != KERN_SUCCESS)
1147 goto nomemory;
1148
1149 paddr = newaddr;
1150 psize = newsize;
1151 ppotential = psize / sizeof * ports;
1152 ports = (MACH_PORT_FACE *)paddr;
1153 }
1154
1155 /*
1156 * Now scan the list.
1157 */
1158
1159 VSL_LOCK();
1160
1161 num_objects = 0;
1162 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
1163
1164 MACH_PORT_FACE port;
1165 vm_size_t size;
1166
1167 if ((num_objects >= opotential) ||
1168 (num_objects >= ppotential)) {
1169
1170 /*
1171 * This should be rare. In any case,
1172 * we will only miss recent objects,
1173 * because they are added at the end.
1174 */
1175 break;
1176 }
1177
1178 /*
1179 * Avoid interfering with normal operations
1180 */
1181 if (!VS_MAP_TRY_LOCK(entry))
1182 goto not_this_one;
1183 size = ps_vstruct_allocated_size(entry);
1184 VS_MAP_UNLOCK(entry);
1185
1186 VS_LOCK(entry);
1187
1188 port = entry->vs_object_name;
1189 if (port == MACH_PORT_NULL) {
1190
1191 /*
1192 * The object is waiting for no-senders
1193 * or memory_object_init.
1194 */
1195 VS_UNLOCK(entry);
1196 goto not_this_one;
1197 }
1198
1199 /*
1200 * We need a reference for the reply message.
1201 * While we are unlocked, the bucket queue
1202 * can change and the object might be terminated.
1203 * memory_object_terminate will wait for us,
1204 * preventing deallocation of the entry.
1205 */
1206
1207 if (--entry->vs_name_refs == 0) {
1208 VS_UNLOCK(entry);
1209
1210 /* keep the list locked, wont take long */
1211
1212 {
1213 int i;
1214 for(i=0; i<default_pager_max_urefs; i++)
1215 ipc_port_make_send(port);
1216 }
1217 VS_LOCK(entry);
1218
1219 entry->vs_name_refs += default_pager_max_urefs;
1220 vs_finish_refs(entry);
1221 }
1222 VS_UNLOCK(entry);
1223
1224 /* the arrays are wired, so no deadlock worries */
1225
1226 objects[num_objects].dpo_object = (vm_offset_t) entry;
1227 objects[num_objects].dpo_size = size;
1228 ports [num_objects++] = port;
1229 continue;
1230
1231 not_this_one:
1232 /*
1233 * Do not return garbage
1234 */
1235 objects[num_objects].dpo_object = (vm_offset_t) 0;
1236 objects[num_objects].dpo_size = 0;
1237 ports [num_objects++] = MACH_PORT_NULL;
1238
1239 }
1240
1241 VSL_UNLOCK();
1242
1243 /*
1244 * Deallocate and clear unused memory.
1245 * (Returned memory will automagically become pageable.)
1246 */
1247
1248 if (objects == *objectsp) {
1249
1250 /*
1251 * Our returned information fit inline.
1252 * Nothing to deallocate.
1253 */
1254 *ocountp = num_objects;
1255 } else if (actual == 0) {
1256 (void) vm_deallocate(kernel_map, oaddr, osize);
1257
1258 /* return zero items inline */
1259 *ocountp = 0;
1260 } else {
1261 vm_offset_t used;
1262
1263 used = round_page(actual * sizeof * objects);
1264
1265 if (used != osize)
1266 (void) vm_deallocate(kernel_map,
1267 oaddr + used, osize - used);
1268
1269 *objectsp = objects;
1270 *ocountp = num_objects;
1271 }
1272
1273 if (ports == (MACH_PORT_FACE *)*portsp) {
1274
1275 /*
1276 * Our returned information fit inline.
1277 * Nothing to deallocate.
1278 */
1279
1280 *pcountp = num_objects;
1281 } else if (actual == 0) {
1282 (void) vm_deallocate(kernel_map, paddr, psize);
1283
1284 /* return zero items inline */
1285 *pcountp = 0;
1286 } else {
1287 vm_offset_t used;
1288
1289 used = round_page(actual * sizeof * ports);
1290
1291 if (used != psize)
1292 (void) vm_deallocate(kernel_map,
1293 paddr + used, psize - used);
1294
1295 *portsp = (mach_port_array_t)ports;
1296 *pcountp = num_objects;
1297 }
1298 (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1299 *ocountp + (vm_offset_t)objects, FALSE);
1300 (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1301 *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1302
1303 return KERN_SUCCESS;
1304
1305 nomemory:
1306 {
1307 register int i;
1308 for (i = 0; i < num_objects; i++)
1309 ipc_port_dealloc_kernel(ports[i]);
1310 }
1311
1312 if (objects != *objectsp)
1313 (void) vm_deallocate(kernel_map, oaddr, osize);
1314
1315 if (ports != (MACH_PORT_FACE *)*portsp)
1316 (void) vm_deallocate(kernel_map, paddr, psize);
1317
1318 return KERN_RESOURCE_SHORTAGE;
1319}
1320
1321kern_return_t
1322default_pager_object_pages(
1323 MACH_PORT_FACE pager,
1324 MACH_PORT_FACE object,
1325 default_pager_page_array_t *pagesp,
1326 mach_msg_type_number_t *countp)
1327{
1328 vm_offset_t addr; /* memory for page offsets */
1329 vm_size_t size = 0; /* current memory size */
1330 default_pager_page_t * pages;
1331 unsigned int potential, actual;
1332 kern_return_t kr;
1333
1334/*
1335 if (pager != default_pager_default_port)
1336 return KERN_INVALID_ARGUMENT;
1337*/
1338 kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1339 (vm_map_copy_t) *pagesp);
1340
1341 if (kr != KERN_SUCCESS)
1342 return kr;
1343
1344 size = round_page(*countp * sizeof * pages);
1345 kr = vm_map_wire(ipc_kernel_map,
1346 trunc_page((vm_offset_t)pages),
1347 round_page(((vm_offset_t)pages) + size),
1348 VM_PROT_READ|VM_PROT_WRITE, FALSE);
1349 size=0;
1350
1351 *pagesp = pages;
1352 /* we start with the inline space */
1353
1354 addr = (vm_offset_t)pages;
1355 potential = *countp;
1356
1357 for (;;) {
1358 vstruct_t entry;
1359
1360 VSL_LOCK();
1361 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1362 vs_links) {
1363 VS_LOCK(entry);
1364 if (entry->vs_object_name == object) {
1365 VSL_UNLOCK();
1366 goto found_object;
1367 }
1368 VS_UNLOCK(entry);
1369 }
1370 VSL_UNLOCK();
1371
1372 /* did not find the object */
1373
1374 if (pages != *pagesp)
1375 (void) vm_deallocate(kernel_map, addr, size);
1376 return KERN_INVALID_ARGUMENT;
1377
1378 found_object:
1379
1380 if (!VS_MAP_TRY_LOCK(entry)) {
1381 /* oh well bad luck */
1382 int wait_result;
1383
1384 VS_UNLOCK(entry);
1385
1386 assert_wait_timeout( 1, THREAD_INTERRUPTIBLE);
1387 wait_result = thread_block((void (*)(void)) 0);
1388 if (wait_result != THREAD_TIMED_OUT)
1389 thread_cancel_timer();
1390 continue;
1391 }
1392
1393 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1394 VS_MAP_UNLOCK(entry);
1395 VS_UNLOCK(entry);
1396
1397 if (actual <= potential)
1398 break;
1399
1400 /* allocate more memory */
1401
1402 if (pages != *pagesp)
1403 (void) vm_deallocate(kernel_map, addr, size);
1404 size = round_page(actual * sizeof * pages);
1405 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1406 if (kr != KERN_SUCCESS)
1407 return kr;
1408 pages = (default_pager_page_t *)addr;
1409 potential = size / sizeof * pages;
1410 }
1411
1412 /*
1413 * Deallocate and clear unused memory.
1414 * (Returned memory will automagically become pageable.)
1415 */
1416
1417 if (pages == *pagesp) {
1418
1419 /*
1420 * Our returned information fit inline.
1421 * Nothing to deallocate.
1422 */
1423
1424 *countp = actual;
1425 } else if (actual == 0) {
1426 (void) vm_deallocate(kernel_map, addr, size);
1427
1428 /* return zero items inline */
1429 *countp = 0;
1430 } else {
1431 vm_offset_t used;
1432
1433 used = round_page(actual * sizeof * pages);
1434
1435 if (used != size)
1436 (void) vm_deallocate(kernel_map,
1437 addr + used, size - used);
1438
1439 *pagesp = pages;
1440 *countp = actual;
1441 }
1442 (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1443 *countp + (vm_offset_t)pages, FALSE);
1444 (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1445 *countp, TRUE, (vm_map_copy_t *)pagesp);
1446 return KERN_SUCCESS;
1447}