osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  31  * All Rights Reserved.
  32  *
  33  * Permission to use, copy, modify and distribute this software and its
  34  * documentation is hereby granted, provided that both the copyright
  35  * notice and this permission notice appear in all copies of the
  36  * software, derivative works or modified versions, and any portions
  37  * thereof, and that both notices appear in supporting documentation.
  38  *
  39  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  40  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  41  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  42  *
  43  * Carnegie Mellon requests users of this software to return to
  44  *
  45  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  46  *  School of Computer Science
  47  *  Carnegie Mellon University
  48  *  Pittsburgh PA 15213-3890
  49  *
  50  * any improvements or extensions that they make and grant Carnegie Mellon
  51  * the rights to redistribute these changes.
  52  */
  53
  54 /*
  55  *      Default Pager.
  56  *              Memory Object Management.
  57  */
  58
  59 #include "default_pager_internal.h"
  60 #include <mach/memory_object_types.h>
  61 #include <mach/memory_object_server.h>
  62 #include <vm/memory_object.h>
  63 #include <vm/vm_pageout.h>
  64
  65
  66 /*
  67  * List of all vstructs.  A specific vstruct is
  68  * found directly via its port, this list is
  69  * only used for monitoring purposes by the
  70  * default_pager_object* calls and by ps_delete
  71  * when abstract memory objects must be scanned
  72  * to remove any live storage on a segment which
  73  * is to be removed.
  74  */
  75 struct vstruct_list_head        vstruct_list;
  76
  77 __private_extern__ void
  78 vstruct_list_insert(
  79         vstruct_t vs)
  80 {
  81         VSL_LOCK();
  82         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  83         vstruct_list.vsl_count++;
  84         VSL_UNLOCK();
  85 }
  86
  87
  88 __private_extern__ void
  89 vstruct_list_delete(
  90         vstruct_t vs)
  91 {
  92         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  93         vstruct_list.vsl_count--;
  94 }
  95
  96 /*
  97  * We use the sequence numbers on requests to regulate
  98  * our parallelism.  In general, we allow multiple reads and writes
  99  * to proceed in parallel, with the exception that reads must
 100  * wait for previous writes to finish.  (Because the kernel might
 101  * generate a data-request for a page on the heels of a data-write
 102  * for the same page, and we must avoid returning stale data.)
 103  * terminate requests wait for proceeding reads and writes to finish.
 104  */
 105
 106 static unsigned int     default_pager_total = 0;                /* debugging */
 107 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 108 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 109 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 110 static unsigned int     default_pager_wait_refs = 0;            /* debugging */
 111
 112 __private_extern__ void
 113 vs_async_wait(
 114         vstruct_t       vs)
 115 {
 116
 117         ASSERT(vs->vs_async_pending >= 0);
 118         while (vs->vs_async_pending > 0) {
 119                 vs->vs_waiting_async = TRUE;
 120                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 121                 VS_UNLOCK(vs);
 122                 thread_block(THREAD_CONTINUE_NULL);
 123                 VS_LOCK(vs);
 124         }
 125         ASSERT(vs->vs_async_pending == 0);
 126 }
 127
 128
 129 #if     PARALLEL
 130 /*
 131  * Waits for correct sequence number.  Leaves pager locked.
 132  *
 133  * JMM - Sequence numbers guarantee ordering of requests generated
 134  *       by a single thread if the receiver is multithreaded and
 135  *       the interfaces are asynchronous (i.e. sender can generate
 136  *       more than one request before the first is received in the
 137  *       pager).  Normally, IPC would generate these number in that
 138  *       case.  But we are trying to avoid using IPC for the in-kernel
 139  *       scenario. Since these are actually invoked synchronously
 140  *       anyway (in-kernel), we can just fake the sequence number
 141  *       generation here (thus avoiding the dependence on IPC).
 142  */
 143 __private_extern__ void
 144 vs_lock(
 145         vstruct_t               vs)
 146 {
 147         mach_port_seqno_t       seqno;
 148
 149         default_pager_total++;
 150         VS_LOCK(vs);
 151
 152         seqno = vs->vs_next_seqno++;
 153
 154         while (vs->vs_seqno != seqno) {
 155                 default_pager_wait_seqno++;
 156                 vs->vs_waiting_seqno = TRUE;
 157                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 158                 VS_UNLOCK(vs);
 159                 thread_block(THREAD_CONTINUE_NULL);
 160                 VS_LOCK(vs);
 161         }
 162 }
 163
 164 /*
 165  * Increments sequence number and unlocks pager.
 166  */
 167 __private_extern__ void
 168 vs_unlock(vstruct_t vs)
 169 {
 170         vs->vs_seqno++;
 171         if (vs->vs_waiting_seqno) {
 172                 vs->vs_waiting_seqno = FALSE;
 173                 VS_UNLOCK(vs);
 174                 thread_wakeup(&vs->vs_seqno);
 175                 return;
 176         }
 177         VS_UNLOCK(vs);
 178 }
 179
 180 /*
 181  * Start a read - one more reader.  Pager must be locked.
 182  */
 183 __private_extern__ void
 184 vs_start_read(
 185         vstruct_t vs)
 186 {
 187         vs->vs_readers++;
 188 }
 189
 190 /*
 191  * Wait for readers.  Unlocks and relocks pager if wait needed.
 192  */
 193 __private_extern__ void
 194 vs_wait_for_readers(
 195         vstruct_t vs)
 196 {
 197         while (vs->vs_readers != 0) {
 198                 default_pager_wait_read++;
 199                 vs->vs_waiting_read = TRUE;
 200                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 201                 VS_UNLOCK(vs);
 202                 thread_block(THREAD_CONTINUE_NULL);
 203                 VS_LOCK(vs);
 204         }
 205 }
 206
 207 /*
 208  * Finish a read.  Pager is unlocked and returns unlocked.
 209  */
 210 __private_extern__ void
 211 vs_finish_read(
 212         vstruct_t vs)
 213 {
 214         VS_LOCK(vs);
 215         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 216                 vs->vs_waiting_read = FALSE;
 217                 VS_UNLOCK(vs);
 218                 thread_wakeup(&vs->vs_readers);
 219                 return;
 220         }
 221         VS_UNLOCK(vs);
 222 }
 223
 224 /*
 225  * Start a write - one more writer.  Pager must be locked.
 226  */
 227 __private_extern__ void
 228 vs_start_write(
 229         vstruct_t vs)
 230 {
 231         vs->vs_writers++;
 232 }
 233
 234 /*
 235  * Wait for writers.  Unlocks and relocks pager if wait needed.
 236  */
 237 __private_extern__ void
 238 vs_wait_for_writers(
 239         vstruct_t vs)
 240 {
 241         while (vs->vs_writers != 0) {
 242                 default_pager_wait_write++;
 243                 vs->vs_waiting_write = TRUE;
 244                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 245                 VS_UNLOCK(vs);
 246                 thread_block(THREAD_CONTINUE_NULL);
 247                 VS_LOCK(vs);
 248         }
 249         vs_async_wait(vs);
 250 }
 251
 252 /* This is to be used for the transfer from segment code ONLY */
 253 /* The transfer code holds off vs destruction by keeping the  */
 254 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 255 /* other writers on an async basis because it only writes on  */
 256 /* a cluster basis into fresh (as of sync time) cluster locations */
 257
 258 __private_extern__ void
 259 vs_wait_for_sync_writers(
 260         vstruct_t vs)
 261 {
 262         while (vs->vs_writers != 0) {
 263                 default_pager_wait_write++;
 264                 vs->vs_waiting_write = TRUE;
 265                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 266                 VS_UNLOCK(vs);
 267                 thread_block(THREAD_CONTINUE_NULL);
 268                 VS_LOCK(vs);
 269         }
 270 }
 271
 272
 273 /*
 274  * Finish a write.  Pager is unlocked and returns unlocked.
 275  */
 276 __private_extern__ void
 277 vs_finish_write(
 278         vstruct_t vs)
 279 {
 280         VS_LOCK(vs);
 281         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 282                 vs->vs_waiting_write = FALSE;
 283                 VS_UNLOCK(vs);
 284                 thread_wakeup(&vs->vs_writers);
 285                 return;
 286         }
 287         VS_UNLOCK(vs);
 288 }
 289 #endif  /* PARALLEL */
 290
 291 vstruct_t
 292 vs_object_create(
 293         vm_size_t size)
 294 {
 295         vstruct_t       vs;
 296
 297         /*
 298          * Allocate a vstruct. If there are any problems, then report them
 299          * to the console.
 300          */
 301         vs = ps_vstruct_create(size);
 302         if (vs == VSTRUCT_NULL) {
 303                 dprintf(("vs_object_create: unable to allocate %s\n",
 304                          "-- either run swapon command or reboot"));
 305                 return VSTRUCT_NULL;
 306         }
 307
 308         return vs;
 309 }
 310
 311 #if 0
 312 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 313
 314 void
 315 default_pager_add(
 316         vstruct_t vs,
 317         boolean_t internal)
 318 {
 319         memory_object_t         mem_obj = vs->vs_mem_obj;
 320         mach_port_t             pset;
 321         mach_port_mscount_t     sync;
 322         mach_port_t             previous;
 323         kern_return_t           kr;
 324         static char             here[] = "default_pager_add";
 325
 326         /*
 327          * The port currently has a make-send count of zero,
 328          * because either we just created the port or we just
 329          * received the port in a memory_object_create request.
 330          */
 331
 332         if (internal) {
 333                 /* possibly generate an immediate no-senders notification */
 334                 sync = 0;
 335                 pset = default_pager_internal_set;
 336         } else {
 337                 /* delay notification till send right is created */
 338                 sync = 1;
 339                 pset = default_pager_external_set;
 340         }
 341
 342         ipc_port_make_sonce(mem_obj);
 343         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 344         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 345 }
 346
 347 #endif
 348
 349 kern_return_t
 350 dp_memory_object_init(
 351         memory_object_t         mem_obj,
 352         memory_object_control_t control,
 353         vm_size_t               pager_page_size)
 354 {
 355         vstruct_t               vs;
 356
 357         assert(pager_page_size == vm_page_size);
 358
 359         memory_object_control_reference(control);
 360
 361         vs_lookup(mem_obj, vs);
 362         vs_lock(vs);
 363
 364         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 365                 Panic("bad request");
 366
 367         vs->vs_control = control;
 368         vs_unlock(vs);
 369
 370         return KERN_SUCCESS;
 371 }
 372
 373 kern_return_t
 374 dp_memory_object_synchronize(
 375         memory_object_t         mem_obj,
 376         memory_object_offset_t  offset,
 377         vm_size_t               length,
 378         vm_sync_t               flags)
 379 {
 380         vstruct_t       vs;
 381
 382         vs_lookup(mem_obj, vs);
 383         vs_lock(vs);
 384         vs_unlock(vs);
 385
 386         memory_object_synchronize_completed(vs->vs_control, offset, length);
 387
 388         return KERN_SUCCESS;
 389 }
 390
 391 kern_return_t
 392 dp_memory_object_unmap(
 393         memory_object_t         mem_obj)
 394 {
 395         panic("dp_memory_object_unmap");
 396
 397         return KERN_FAILURE;
 398 }
 399
 400 kern_return_t
 401 dp_memory_object_terminate(
 402         memory_object_t         mem_obj)
 403 {
 404         memory_object_control_t control;
 405         vstruct_t               vs;
 406         kern_return_t           kr;
 407
 408         /*
 409          * control port is a receive right, not a send right.
 410          */
 411
 412         vs_lookup(mem_obj, vs);
 413         vs_lock(vs);
 414
 415         /*
 416          * Wait for read and write requests to terminate.
 417          */
 418
 419         vs_wait_for_readers(vs);
 420         vs_wait_for_writers(vs);
 421
 422         /*
 423          * After memory_object_terminate both memory_object_init
 424          * and a no-senders notification are possible, so we need
 425          * to clean up our reference to the memory_object_control
 426          * to prepare for a new init.
 427          */
 428
 429         control = vs->vs_control;
 430         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 431
 432         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 433         /* these data requests had to be removed from the seqno traffic   */
 434         /* based on a performance bottleneck with large memory objects    */
 435         /* the problem will right itself with the new component based     */
 436         /* synchronous interface.  The new async will be able to return   */
 437         /* failure during its sync phase.   In the mean time ... */
 438
 439         thread_wakeup(&vs->vs_writers);
 440         thread_wakeup(&vs->vs_async_pending);
 441
 442         vs_unlock(vs);
 443
 444         /*
 445          * Now we deallocate our reference on the control.
 446          */
 447         memory_object_control_deallocate(control);
 448         return KERN_SUCCESS;
 449 }
 450
 451 void
 452 dp_memory_object_reference(
 453         memory_object_t         mem_obj)
 454 {
 455         vstruct_t               vs;
 456
 457         vs_lookup_safe(mem_obj, vs);
 458         if (vs == VSTRUCT_NULL)
 459                 return;
 460
 461         VS_LOCK(vs);
 462         assert(vs->vs_references > 0);
 463         vs->vs_references++;
 464         VS_UNLOCK(vs);
 465 }
 466
 467 extern ipc_port_t       max_pages_trigger_port;
 468 extern int              dp_pages_free;
 469 extern int              maximum_pages_free;
 470 void
 471 dp_memory_object_deallocate(
 472         memory_object_t         mem_obj)
 473 {
 474         vstruct_t               vs;
 475         mach_port_seqno_t       seqno;
 476         ipc_port_t              trigger;
 477
 478         /*
 479          * Because we don't give out multiple first references
 480          * for a memory object, there can't be a race
 481          * between getting a deallocate call and creating
 482          * a new reference for the object.
 483          */
 484
 485         vs_lookup_safe(mem_obj, vs);
 486         if (vs == VSTRUCT_NULL)
 487                 return;
 488
 489         VS_LOCK(vs);
 490         if (--vs->vs_references > 0) {
 491                 VS_UNLOCK(vs);
 492                 return;
 493         }
 494
 495         seqno = vs->vs_next_seqno++;
 496         while (vs->vs_seqno != seqno) {
 497                 default_pager_wait_seqno++;
 498                 vs->vs_waiting_seqno = TRUE;
 499                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 500                 VS_UNLOCK(vs);
 501                 thread_block(THREAD_CONTINUE_NULL);
 502                 VS_LOCK(vs);
 503         }
 504
 505         vs_async_wait(vs);      /* wait for pending async IO */
 506
 507         /* do not delete the vs structure until the referencing pointers */
 508         /* in the vstruct list have been expunged */
 509
 510         /* get VSL_LOCK out of order by using TRY mechanism */
 511         while(!VSL_LOCK_TRY()) {
 512                 VS_UNLOCK(vs);
 513                 VSL_LOCK();
 514                 VSL_UNLOCK();
 515                 VS_LOCK(vs);
 516                 vs_async_wait(vs);      /* wait for pending async IO */
 517         }
 518
 519
 520         /*
 521          * We shouldn't get a deallocation call
 522          * when the kernel has the object cached.
 523          */
 524         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 525                 Panic("bad request");
 526
 527         /*
 528          * Unlock the pager (though there should be no one
 529          * waiting for it).
 530          */
 531         VS_UNLOCK(vs);
 532
 533         /* Lock out paging segment removal for the duration of this */
 534         /* call.  We are vulnerable to losing a paging segment we rely */
 535         /* on as soon as we remove ourselves from the VSL and unlock */
 536
 537         /* Keep our thread from blocking on attempt to trigger backing */
 538         /* store release */
 539         backing_store_release_trigger_disable += 1;
 540
 541         /*
 542          * Remove the memory object port association, and then
 543          * the destroy the port itself.  We must remove the object
 544          * from the port list before deallocating the pager,
 545          * because of default_pager_objects.
 546          */
 547         vstruct_list_delete(vs);
 548         VSL_UNLOCK();
 549
 550         ps_vstruct_dealloc(vs);
 551
 552         VSL_LOCK();
 553         backing_store_release_trigger_disable -= 1;
 554         if(backing_store_release_trigger_disable == 0) {
 555                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 556         }
 557         VSL_UNLOCK();
 558
 559         PSL_LOCK();
 560         if(max_pages_trigger_port
 561                 && (backing_store_release_trigger_disable == 0)
 562                 && (dp_pages_free > maximum_pages_free)) {
 563                 trigger = max_pages_trigger_port;
 564                 max_pages_trigger_port = NULL;
 565         } else
 566                 trigger = IP_NULL;
 567         PSL_UNLOCK();
 568
 569         if (trigger != IP_NULL) {
 570                 default_pager_space_alert(trigger, LO_WAT_ALERT);
 571                 ipc_port_release_send(trigger);
 572         }
 573
 574 }
 575
 576 kern_return_t
 577 dp_memory_object_data_request(
 578         memory_object_t         mem_obj,
 579         memory_object_offset_t  offset,
 580         vm_size_t               length,
 581         vm_prot_t               protection_required)
 582 {
 583         vstruct_t               vs;
 584
 585         GSTAT(global_stats.gs_pagein_calls++);
 586
 587
 588         /* CDY at this moment vs_lookup panics when presented with the wrong */
 589         /* port.  As we are expanding this pager to support user interfaces */
 590         /* this should be changed to return kern_failure */
 591         vs_lookup(mem_obj, vs);
 592         vs_lock(vs);
 593
 594         /* We are going to relax the strict sequencing here for performance */
 595         /* reasons.  We can do this because we know that the read and */
 596         /* write threads are different and we rely on synchronization */
 597         /* of read and write requests at the cache memory_object level */
 598         /* break out wait_for_writers, all of this goes away when */
 599         /* we get real control of seqno with the new component interface */
 600
 601         if (vs->vs_writers != 0) {
 602                 /* you can't hold on to the seqno and go */
 603                 /* to sleep like that */
 604                 vs_unlock(vs);  /* bump internal count of seqno */
 605                 VS_LOCK(vs);
 606                 while (vs->vs_writers != 0) {
 607                         default_pager_wait_write++;
 608                         vs->vs_waiting_write = TRUE;
 609                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 610                         VS_UNLOCK(vs);
 611                         thread_block(THREAD_CONTINUE_NULL);
 612                         VS_LOCK(vs);
 613                         vs_async_wait(vs);
 614                 }
 615                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 616                         VS_UNLOCK(vs);
 617                         return KERN_FAILURE;
 618                 }
 619                 vs_start_read(vs);
 620                 VS_UNLOCK(vs);
 621         } else {
 622                 vs_start_read(vs);
 623                 vs_unlock(vs);
 624         }
 625
 626         /*
 627          * Request must be on a page boundary and a multiple of pages.
 628          */
 629         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 630                 Panic("bad alignment");
 631
 632         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 633
 634         vs_finish_read(vs);
 635
 636         return KERN_SUCCESS;
 637 }
 638
 639 /*
 640  * memory_object_data_initialize: check whether we already have each page, and
 641  * write it if we do not.  The implementation is far from optimized, and
 642  * also assumes that the default_pager is single-threaded.
 643  */
 644 /*  It is questionable whether or not a pager should decide what is relevant */
 645 /* and what is not in data sent from the kernel.  Data initialize has been */
 646 /* changed to copy back all data sent to it in preparation for its eventual */
 647 /* merge with data return.  It is the kernel that should decide what pages */
 648 /* to write back.  As of the writing of this note, this is indeed the case */
 649 /* the kernel writes back one page at a time through this interface */
 650
 651 kern_return_t
 652 dp_memory_object_data_initialize(
 653         memory_object_t         mem_obj,
 654         memory_object_offset_t  offset,
 655         vm_size_t               size)
 656 {
 657         vstruct_t       vs;
 658
 659         DEBUG(DEBUG_MO_EXTERNAL,
 660               ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 661                (int)mem_obj, (int)offset, (int)size));
 662         GSTAT(global_stats.gs_pages_init += atop(size));
 663
 664         vs_lookup(mem_obj, vs);
 665         vs_lock(vs);
 666         vs_start_write(vs);
 667         vs_unlock(vs);
 668
 669         /*
 670          * Write the data via clustered writes. vs_cluster_write will
 671          * loop if the address range specified crosses cluster
 672          * boundaries.
 673          */
 674         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 675
 676         vs_finish_write(vs);
 677
 678         return KERN_SUCCESS;
 679 }
 680
 681 kern_return_t
 682 dp_memory_object_data_unlock(
 683         memory_object_t         mem_obj,
 684         memory_object_offset_t  offset,
 685         vm_size_t               size,
 686         vm_prot_t               desired_access)
 687 {
 688         Panic("dp_memory_object_data_unlock: illegal");
 689         return KERN_FAILURE;
 690 }
 691
 692
 693 kern_return_t
 694 dp_memory_object_data_return(
 695         memory_object_t         mem_obj,
 696         memory_object_offset_t  offset,
 697         vm_size_t               size,
 698         boolean_t               dirty,
 699         boolean_t               kernel_copy)
 700 {
 701         vstruct_t       vs;
 702
 703         DEBUG(DEBUG_MO_EXTERNAL,
 704               ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 705                (int)mem_obj, (int)offset, (int)size));
 706         GSTAT(global_stats.gs_pageout_calls++);
 707
 708         /* This routine is called by the pageout thread.  The pageout thread */
 709         /* cannot be blocked by read activities unless the read activities   */
 710         /* Therefore the grant of vs lock must be done on a try versus a      */
 711         /* blocking basis.  The code below relies on the fact that the       */
 712         /* interface is synchronous.  Should this interface be again async   */
 713         /* for some type  of pager in the future the pages will have to be   */
 714         /* returned through a separate, asynchronous path.                   */
 715
 716         vs_lookup(mem_obj, vs);
 717
 718         default_pager_total++;
 719         if(!VS_TRY_LOCK(vs)) {
 720                 /* the call below will not be done by caller when we have */
 721                 /* a synchronous interface */
 722                 /* return KERN_LOCK_OWNED; */
 723                 upl_t           upl;
 724                 int             page_list_count = 0;
 725                 memory_object_super_upl_request(vs->vs_control,
 726                                         (memory_object_offset_t)offset,
 727                                         size, size,
 728                                         &upl, NULL, &page_list_count,
 729                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 730                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 731                 upl_abort(upl,0);
 732                 upl_deallocate(upl);
 733                 return KERN_SUCCESS;
 734         }
 735
 736         if ((vs->vs_seqno != vs->vs_next_seqno++)
 737                         || (vs->vs_readers)
 738                         || (vs->vs_xfer_pending)) {
 739                 upl_t   upl;
 740                 int     page_list_count = 0;
 741
 742                 vs->vs_next_seqno--;
 743                 VS_UNLOCK(vs);
 744
 745                 /* the call below will not be done by caller when we have */
 746                 /* a synchronous interface */
 747                 /* return KERN_LOCK_OWNED; */
 748                 memory_object_super_upl_request(vs->vs_control,
 749                                 (memory_object_offset_t)offset,
 750                                 size, size,
 751                                 &upl, NULL, &page_list_count,
 752                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 753                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 754                 upl_abort(upl,0);
 755                 upl_deallocate(upl);
 756                 return KERN_SUCCESS;
 757         }
 758
 759         if ((size % vm_page_size) != 0)
 760                 Panic("bad alignment");
 761
 762         vs_start_write(vs);
 763
 764
 765         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 766         vs_unlock(vs);
 767
 768         /*
 769          * Write the data via clustered writes. vs_cluster_write will
 770          * loop if the address range specified crosses cluster
 771          * boundaries.
 772          */
 773         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 774
 775         vs_finish_write(vs);
 776
 777         /* temporary, need a finer lock based on cluster */
 778
 779         VS_LOCK(vs);
 780         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 781         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 782                 vs->vs_waiting_async = FALSE;
 783                 VS_UNLOCK(vs);
 784                 thread_wakeup(&vs->vs_async_pending);
 785         } else {
 786                 VS_UNLOCK(vs);
 787         }
 788
 789
 790         return KERN_SUCCESS;
 791 }
 792
 793 /*
 794  * Routine:     default_pager_memory_object_create
 795  * Purpose:
 796  *      Handle requests for memory objects from the
 797  *      kernel.
 798  * Notes:
 799  *      Because we only give out the default memory
 800  *      manager port to the kernel, we don't have to
 801  *      be so paranoid about the contents.
 802  */
 803 kern_return_t
 804 default_pager_memory_object_create(
 805         memory_object_default_t dmm,
 806         vm_size_t               new_size,
 807         memory_object_t         *new_mem_obj)
 808 {
 809         vstruct_t               vs;
 810
 811         assert(dmm == default_pager_object);
 812
 813         vs = vs_object_create(new_size);
 814         if (vs == VSTRUCT_NULL)
 815                 return KERN_RESOURCE_SHORTAGE;
 816
 817         vs->vs_next_seqno = 0;
 818
 819         /*
 820          * Set up associations between this memory object
 821          * and this default_pager structure
 822          */
 823
 824         vs->vs_mem_obj = ISVS;
 825         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 826
 827         /*
 828          * After this, other threads might receive requests
 829          * for this memory object or find it in the port list.
 830          */
 831
 832         vstruct_list_insert(vs);
 833         *new_mem_obj = vs_to_mem_obj(vs);
 834         return KERN_SUCCESS;
 835 }
 836
 837 /*
 838  * Create an external object.
 839  */
 840 kern_return_t
 841 default_pager_object_create(
 842         default_pager_t pager,
 843         vm_size_t       size,
 844         memory_object_t *mem_objp)
 845 {
 846         vstruct_t       vs;
 847         kern_return_t   result;
 848         struct vstruct_alias    *alias_struct;
 849
 850
 851         if (pager != default_pager_object)
 852                 return KERN_INVALID_ARGUMENT;
 853
 854         vs = vs_object_create(size);
 855         if (vs == VSTRUCT_NULL)
 856                 return KERN_RESOURCE_SHORTAGE;
 857
 858         /*
 859          * Set up associations between the default pager
 860          * and this vstruct structure
 861          */
 862         vs->vs_mem_obj = ISVS;
 863         vstruct_list_insert(vs);
 864         *mem_objp = vs_to_mem_obj(vs);
 865         return KERN_SUCCESS;
 866 }
 867
 868 kern_return_t
 869 default_pager_objects(
 870         default_pager_t                 pager,
 871         default_pager_object_array_t    *objectsp,
 872         mach_msg_type_number_t          *ocountp,
 873         memory_object_array_t           *pagersp,
 874         mach_msg_type_number_t          *pcountp)
 875 {
 876         vm_offset_t             oaddr = 0;      /* memory for objects */
 877         vm_size_t               osize = 0;      /* current size */
 878         default_pager_object_t  * objects;
 879         unsigned int            opotential;
 880
 881         vm_offset_t             paddr = 0;      /* memory for pagers */
 882         vm_size_t               psize = 0;      /* current size */
 883         memory_object_t         * pagers;
 884         unsigned int            ppotential;
 885
 886         unsigned int            actual;
 887         unsigned int            num_objects;
 888         kern_return_t           kr;
 889         vstruct_t               entry;
 890 /*
 891         if (pager != default_pager_default_port)
 892                 return KERN_INVALID_ARGUMENT;
 893 */
 894
 895         /* start with the inline memory */
 896
 897         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
 898                                                 (vm_map_copy_t) *objectsp);
 899
 900         if (kr != KERN_SUCCESS)
 901                 return kr;
 902
 903         osize = round_page(*ocountp * sizeof * objects);
 904         kr = vm_map_wire(ipc_kernel_map,
 905                         trunc_page((vm_offset_t)objects),
 906                         round_page(((vm_offset_t)objects) + osize),
 907                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
 908         osize=0;
 909
 910         *objectsp = objects;
 911         /* we start with the inline space */
 912
 913
 914         num_objects = 0;
 915         opotential = *ocountp;
 916
 917         pagers = (memory_object_t *) *pagersp;
 918         ppotential = *pcountp;
 919
 920         VSL_LOCK();
 921
 922         /*
 923          * We will send no more than this many
 924          */
 925         actual = vstruct_list.vsl_count;
 926         VSL_UNLOCK();
 927
 928         if (opotential < actual) {
 929                 vm_offset_t     newaddr;
 930                 vm_size_t       newsize;
 931
 932                 newsize = 2 * round_page(actual * sizeof * objects);
 933
 934                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 935                 if (kr != KERN_SUCCESS)
 936                         goto nomemory;
 937
 938                 oaddr = newaddr;
 939                 osize = newsize;
 940                 opotential = osize / sizeof * objects;
 941                 objects = (default_pager_object_t *)oaddr;
 942         }
 943
 944         if (ppotential < actual) {
 945                 vm_offset_t     newaddr;
 946                 vm_size_t       newsize;
 947
 948                 newsize = 2 * round_page(actual * sizeof * pagers);
 949
 950                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 951                 if (kr != KERN_SUCCESS)
 952                         goto nomemory;
 953
 954                 paddr = newaddr;
 955                 psize = newsize;
 956                 ppotential = psize / sizeof * pagers;
 957                 pagers = (memory_object_t *)paddr;
 958         }
 959
 960         /*
 961          * Now scan the list.
 962          */
 963
 964         VSL_LOCK();
 965
 966         num_objects = 0;
 967         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 968
 969                 memory_object_t         pager;
 970                 vm_size_t               size;
 971
 972                 if ((num_objects >= opotential) ||
 973                     (num_objects >= ppotential)) {
 974
 975                         /*
 976                          * This should be rare.  In any case,
 977                          * we will only miss recent objects,
 978                          * because they are added at the end.
 979                          */
 980                         break;
 981                 }
 982
 983                 /*
 984                  * Avoid interfering with normal operations
 985                  */
 986                 if (!VS_MAP_TRY_LOCK(entry))
 987                         goto not_this_one;
 988                 size = ps_vstruct_allocated_size(entry);
 989                 VS_MAP_UNLOCK(entry);
 990
 991                 VS_LOCK(entry);
 992
 993                 /*
 994                  * We need a reference for our caller.  Adding this
 995                  * reference through the linked list could race with
 996                  * destruction of the object.  If we find the object
 997                  * has no references, just give up on it.
 998                  */
 999                 VS_LOCK(entry);
1000                 if (entry->vs_references == 0) {
1001                         VS_UNLOCK(entry);
1002                         goto not_this_one;
1003                 }
1004                 dp_memory_object_reference(vs_to_mem_obj(entry));
1005                 VS_UNLOCK(entry);
1006
1007                 /* the arrays are wired, so no deadlock worries */
1008
1009                 objects[num_objects].dpo_object = (vm_offset_t) entry;
1010                 objects[num_objects].dpo_size = size;
1011                 pagers [num_objects++] = pager;
1012                 continue;
1013
1014             not_this_one:
1015                 /*
1016                  * Do not return garbage
1017                  */
1018                 objects[num_objects].dpo_object = (vm_offset_t) 0;
1019                 objects[num_objects].dpo_size = 0;
1020                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1021
1022         }
1023
1024         VSL_UNLOCK();
1025
1026         /*
1027          * Deallocate and clear unused memory.
1028          * (Returned memory will automagically become pageable.)
1029          */
1030
1031         if (objects == *objectsp) {
1032
1033                 /*
1034                  * Our returned information fit inline.
1035                  * Nothing to deallocate.
1036                  */
1037                 *ocountp = num_objects;
1038         } else if (actual == 0) {
1039                 (void) vm_deallocate(kernel_map, oaddr, osize);
1040
1041                 /* return zero items inline */
1042                 *ocountp = 0;
1043         } else {
1044                 vm_offset_t used;
1045
1046                 used = round_page(actual * sizeof * objects);
1047
1048                 if (used != osize)
1049                         (void) vm_deallocate(kernel_map,
1050                                              oaddr + used, osize - used);
1051
1052                 *objectsp = objects;
1053                 *ocountp = num_objects;
1054         }
1055
1056         if (pagers == (memory_object_t *)*pagersp) {
1057
1058                 /*
1059                  * Our returned information fit inline.
1060                  * Nothing to deallocate.
1061                  */
1062
1063                 *pcountp = num_objects;
1064         } else if (actual == 0) {
1065                 (void) vm_deallocate(kernel_map, paddr, psize);
1066
1067                 /* return zero items inline */
1068                 *pcountp = 0;
1069         } else {
1070                 vm_offset_t used;
1071
1072                 used = round_page(actual * sizeof * pagers);
1073
1074                 if (used != psize)
1075                         (void) vm_deallocate(kernel_map,
1076                                              paddr + used, psize - used);
1077
1078                 *pagersp = (memory_object_array_t)pagers;
1079                 *pcountp = num_objects;
1080         }
1081         (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1082                         *ocountp + (vm_offset_t)objects, FALSE);
1083         (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1084                         *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1085
1086         return KERN_SUCCESS;
1087
1088     nomemory:
1089         {
1090                 register int    i;
1091                 for (i = 0; i < num_objects; i++)
1092                         if (pagers[i] != MEMORY_OBJECT_NULL)
1093                                 memory_object_deallocate(pagers[i]);
1094         }
1095
1096         if (objects != *objectsp)
1097                 (void) vm_deallocate(kernel_map, oaddr, osize);
1098
1099         if (pagers != (memory_object_t *)*pagersp)
1100                 (void) vm_deallocate(kernel_map, paddr, psize);
1101
1102         return KERN_RESOURCE_SHORTAGE;
1103 }
1104
1105 kern_return_t
1106 default_pager_object_pages(
1107         default_pager_t                 pager,
1108         memory_object_t                 object,
1109         default_pager_page_array_t      *pagesp,
1110         mach_msg_type_number_t          *countp)
1111 {
1112         vm_offset_t                     addr;   /* memory for page offsets */
1113         vm_size_t                       size = 0; /* current memory size */
1114         default_pager_page_t            * pages;
1115         unsigned int                    potential, actual;
1116         kern_return_t                   kr;
1117
1118
1119         if (pager != default_pager_object)
1120                 return KERN_INVALID_ARGUMENT;
1121
1122         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1123                                                 (vm_map_copy_t) *pagesp);
1124
1125         if (kr != KERN_SUCCESS)
1126                 return kr;
1127
1128         size = round_page(*countp * sizeof * pages);
1129         kr = vm_map_wire(ipc_kernel_map,
1130                         trunc_page((vm_offset_t)pages),
1131                         round_page(((vm_offset_t)pages) + size),
1132                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
1133         size=0;
1134
1135         *pagesp = pages;
1136         /* we start with the inline space */
1137
1138         addr = (vm_offset_t)pages;
1139         potential = *countp;
1140
1141         for (;;) {
1142                 vstruct_t       entry;
1143
1144                 VSL_LOCK();
1145                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1146                               vs_links) {
1147                         VS_LOCK(entry);
1148                         if (vs_to_mem_obj(entry) == object) {
1149                                 VSL_UNLOCK();
1150                                 goto found_object;
1151                         }
1152                         VS_UNLOCK(entry);
1153                 }
1154                 VSL_UNLOCK();
1155
1156                 /* did not find the object */
1157
1158                 if (pages != *pagesp)
1159                         (void) vm_deallocate(kernel_map, addr, size);
1160                 return KERN_INVALID_ARGUMENT;
1161
1162             found_object:
1163
1164                 if (!VS_MAP_TRY_LOCK(entry)) {
1165                         /* oh well bad luck */
1166                         int wresult;
1167
1168                         VS_UNLOCK(entry);
1169
1170                         assert_wait_timeout( 1, THREAD_UNINT );
1171                         wresult = thread_block(THREAD_CONTINUE_NULL);
1172                         assert(wresult == THREAD_TIMED_OUT);
1173                         continue;
1174                 }
1175
1176                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1177                 VS_MAP_UNLOCK(entry);
1178                 VS_UNLOCK(entry);
1179
1180                 if (actual <= potential)
1181                         break;
1182
1183                 /* allocate more memory */
1184
1185                 if (pages != *pagesp)
1186                         (void) vm_deallocate(kernel_map, addr, size);
1187                 size = round_page(actual * sizeof * pages);
1188                 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1189                 if (kr != KERN_SUCCESS)
1190                         return kr;
1191                 pages = (default_pager_page_t *)addr;
1192                 potential = size / sizeof * pages;
1193         }
1194
1195         /*
1196          * Deallocate and clear unused memory.
1197          * (Returned memory will automagically become pageable.)
1198          */
1199
1200         if (pages == *pagesp) {
1201
1202                 /*
1203                  * Our returned information fit inline.
1204                  * Nothing to deallocate.
1205                  */
1206
1207                 *countp = actual;
1208         } else if (actual == 0) {
1209                 (void) vm_deallocate(kernel_map, addr, size);
1210
1211                 /* return zero items inline */
1212                 *countp = 0;
1213         } else {
1214                 vm_offset_t used;
1215
1216                 used = round_page(actual * sizeof * pages);
1217
1218                 if (used != size)
1219                         (void) vm_deallocate(kernel_map,
1220                                              addr + used, size - used);
1221
1222                 *pagesp = pages;
1223                 *countp = actual;
1224         }
1225         (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1226                         *countp + (vm_offset_t)pages, FALSE);
1227         (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1228                         *countp, TRUE, (vm_map_copy_t *)pagesp);
1229         return KERN_SUCCESS;
1230 }