osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  31  * All Rights Reserved.
  32  *
  33  * Permission to use, copy, modify and distribute this software and its
  34  * documentation is hereby granted, provided that both the copyright
  35  * notice and this permission notice appear in all copies of the
  36  * software, derivative works or modified versions, and any portions
  37  * thereof, and that both notices appear in supporting documentation.
  38  *
  39  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  40  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  41  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  42  *
  43  * Carnegie Mellon requests users of this software to return to
  44  *
  45  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  46  *  School of Computer Science
  47  *  Carnegie Mellon University
  48  *  Pittsburgh PA 15213-3890
  49  *
  50  * any improvements or extensions that they make and grant Carnegie Mellon
  51  * the rights to redistribute these changes.
  52  */
  53
  54 /*
  55  *      Default Pager.
  56  *              Memory Object Management.
  57  */
  58
  59 #include "default_pager_internal.h"
  60 #include <mach/memory_object_types.h>
  61 #include <mach/memory_object_server.h>
  62 #include <vm/memory_object.h>
  63 #include <vm/vm_pageout.h>
  64
  65
  66 /*
  67  * List of all vstructs.  A specific vstruct is
  68  * found directly via its port, this list is
  69  * only used for monitoring purposes by the
  70  * default_pager_object* calls and by ps_delete
  71  * when abstract memory objects must be scanned
  72  * to remove any live storage on a segment which
  73  * is to be removed.
  74  */
  75 struct vstruct_list_head        vstruct_list;
  76
  77 __private_extern__ void
  78 vstruct_list_insert(
  79         vstruct_t vs)
  80 {
  81         VSL_LOCK();
  82         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  83         vstruct_list.vsl_count++;
  84         VSL_UNLOCK();
  85 }
  86
  87
  88 __private_extern__ void
  89 vstruct_list_delete(
  90         vstruct_t vs)
  91 {
  92         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  93         vstruct_list.vsl_count--;
  94 }
  95
  96 /*
  97  * We use the sequence numbers on requests to regulate
  98  * our parallelism.  In general, we allow multiple reads and writes
  99  * to proceed in parallel, with the exception that reads must
 100  * wait for previous writes to finish.  (Because the kernel might
 101  * generate a data-request for a page on the heels of a data-write
 102  * for the same page, and we must avoid returning stale data.)
 103  * terminate requests wait for proceeding reads and writes to finish.
 104  */
 105
 106 static unsigned int     default_pager_total = 0;                /* debugging */
 107 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 108 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 109 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 110 static unsigned int     default_pager_wait_refs = 0;            /* debugging */
 111
 112 __private_extern__ void
 113 vs_async_wait(
 114         vstruct_t       vs)
 115 {
 116
 117         ASSERT(vs->vs_async_pending >= 0);
 118         while (vs->vs_async_pending > 0) {
 119                 vs->vs_waiting_async = TRUE;
 120                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 121                 VS_UNLOCK(vs);
 122                 thread_block(THREAD_CONTINUE_NULL);
 123                 VS_LOCK(vs);
 124         }
 125         ASSERT(vs->vs_async_pending == 0);
 126 }
 127
 128
 129 #if     PARALLEL
 130 /*
 131  * Waits for correct sequence number.  Leaves pager locked.
 132  *
 133  * JMM - Sequence numbers guarantee ordering of requests generated
 134  *       by a single thread if the receiver is multithreaded and
 135  *       the interfaces are asynchronous (i.e. sender can generate
 136  *       more than one request before the first is received in the
 137  *       pager).  Normally, IPC would generate these number in that
 138  *       case.  But we are trying to avoid using IPC for the in-kernel
 139  *       scenario. Since these are actually invoked synchronously
 140  *       anyway (in-kernel), we can just fake the sequence number
 141  *       generation here (thus avoiding the dependence on IPC).
 142  */
 143 __private_extern__ void
 144 vs_lock(
 145         vstruct_t               vs)
 146 {
 147         mach_port_seqno_t       seqno;
 148
 149         default_pager_total++;
 150         VS_LOCK(vs);
 151
 152         seqno = vs->vs_next_seqno++;
 153
 154         while (vs->vs_seqno != seqno) {
 155                 default_pager_wait_seqno++;
 156                 vs->vs_waiting_seqno = TRUE;
 157                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 158                 VS_UNLOCK(vs);
 159                 thread_block(THREAD_CONTINUE_NULL);
 160                 VS_LOCK(vs);
 161         }
 162 }
 163
 164 /*
 165  * Increments sequence number and unlocks pager.
 166  */
 167 __private_extern__ void
 168 vs_unlock(vstruct_t vs)
 169 {
 170         vs->vs_seqno++;
 171         if (vs->vs_waiting_seqno) {
 172                 vs->vs_waiting_seqno = FALSE;
 173                 VS_UNLOCK(vs);
 174                 thread_wakeup(&vs->vs_seqno);
 175                 return;
 176         }
 177         VS_UNLOCK(vs);
 178 }
 179
 180 /*
 181  * Start a read - one more reader.  Pager must be locked.
 182  */
 183 __private_extern__ void
 184 vs_start_read(
 185         vstruct_t vs)
 186 {
 187         vs->vs_readers++;
 188 }
 189
 190 /*
 191  * Wait for readers.  Unlocks and relocks pager if wait needed.
 192  */
 193 __private_extern__ void
 194 vs_wait_for_readers(
 195         vstruct_t vs)
 196 {
 197         while (vs->vs_readers != 0) {
 198                 default_pager_wait_read++;
 199                 vs->vs_waiting_read = TRUE;
 200                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 201                 VS_UNLOCK(vs);
 202                 thread_block(THREAD_CONTINUE_NULL);
 203                 VS_LOCK(vs);
 204         }
 205 }
 206
 207 /*
 208  * Finish a read.  Pager is unlocked and returns unlocked.
 209  */
 210 __private_extern__ void
 211 vs_finish_read(
 212         vstruct_t vs)
 213 {
 214         VS_LOCK(vs);
 215         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 216                 vs->vs_waiting_read = FALSE;
 217                 VS_UNLOCK(vs);
 218                 thread_wakeup(&vs->vs_readers);
 219                 return;
 220         }
 221         VS_UNLOCK(vs);
 222 }
 223
 224 /*
 225  * Start a write - one more writer.  Pager must be locked.
 226  */
 227 __private_extern__ void
 228 vs_start_write(
 229         vstruct_t vs)
 230 {
 231         vs->vs_writers++;
 232 }
 233
 234 /*
 235  * Wait for writers.  Unlocks and relocks pager if wait needed.
 236  */
 237 __private_extern__ void
 238 vs_wait_for_writers(
 239         vstruct_t vs)
 240 {
 241         while (vs->vs_writers != 0) {
 242                 default_pager_wait_write++;
 243                 vs->vs_waiting_write = TRUE;
 244                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 245                 VS_UNLOCK(vs);
 246                 thread_block(THREAD_CONTINUE_NULL);
 247                 VS_LOCK(vs);
 248         }
 249         vs_async_wait(vs);
 250 }
 251
 252 /* This is to be used for the transfer from segment code ONLY */
 253 /* The transfer code holds off vs destruction by keeping the  */
 254 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 255 /* other writers on an async basis because it only writes on  */
 256 /* a cluster basis into fresh (as of sync time) cluster locations */
 257
 258 __private_extern__ void
 259 vs_wait_for_sync_writers(
 260         vstruct_t vs)
 261 {
 262         while (vs->vs_writers != 0) {
 263                 default_pager_wait_write++;
 264                 vs->vs_waiting_write = TRUE;
 265                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 266                 VS_UNLOCK(vs);
 267                 thread_block(THREAD_CONTINUE_NULL);
 268                 VS_LOCK(vs);
 269         }
 270 }
 271
 272
 273 /*
 274  * Finish a write.  Pager is unlocked and returns unlocked.
 275  */
 276 __private_extern__ void
 277 vs_finish_write(
 278         vstruct_t vs)
 279 {
 280         VS_LOCK(vs);
 281         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 282                 vs->vs_waiting_write = FALSE;
 283                 VS_UNLOCK(vs);
 284                 thread_wakeup(&vs->vs_writers);
 285                 return;
 286         }
 287         VS_UNLOCK(vs);
 288 }
 289 #endif  /* PARALLEL */
 290
 291 vstruct_t
 292 vs_object_create(
 293         vm_size_t size)
 294 {
 295         vstruct_t       vs;
 296
 297         /*
 298          * Allocate a vstruct. If there are any problems, then report them
 299          * to the console.
 300          */
 301         vs = ps_vstruct_create(size);
 302         if (vs == VSTRUCT_NULL) {
 303                 dprintf(("vs_object_create: unable to allocate %s\n",
 304                          "-- either run swapon command or reboot"));
 305                 return VSTRUCT_NULL;
 306         }
 307
 308         return vs;
 309 }
 310
 311 #if 0
 312 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 313
 314 void
 315 default_pager_add(
 316         vstruct_t vs,
 317         boolean_t internal)
 318 {
 319         memory_object_t         mem_obj = vs->vs_mem_obj;
 320         mach_port_t             pset;
 321         mach_port_mscount_t     sync;
 322         mach_port_t             previous;
 323         kern_return_t           kr;
 324         static char             here[] = "default_pager_add";
 325
 326         /*
 327          * The port currently has a make-send count of zero,
 328          * because either we just created the port or we just
 329          * received the port in a memory_object_create request.
 330          */
 331
 332         if (internal) {
 333                 /* possibly generate an immediate no-senders notification */
 334                 sync = 0;
 335                 pset = default_pager_internal_set;
 336         } else {
 337                 /* delay notification till send right is created */
 338                 sync = 1;
 339                 pset = default_pager_external_set;
 340         }
 341
 342         ipc_port_make_sonce(mem_obj);
 343         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 344         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 345 }
 346
 347 #endif
 348
 349 kern_return_t
 350 dp_memory_object_init(
 351         memory_object_t         mem_obj,
 352         memory_object_control_t control,
 353         vm_size_t               pager_page_size)
 354 {
 355         vstruct_t               vs;
 356
 357         assert(pager_page_size == vm_page_size);
 358
 359         memory_object_control_reference(control);
 360
 361         vs_lookup(mem_obj, vs);
 362         vs_lock(vs);
 363
 364         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 365                 Panic("bad request");
 366
 367         vs->vs_control = control;
 368         vs_unlock(vs);
 369
 370         return KERN_SUCCESS;
 371 }
 372
 373 kern_return_t
 374 dp_memory_object_synchronize(
 375         memory_object_t         mem_obj,
 376         memory_object_offset_t  offset,
 377         vm_size_t               length,
 378         vm_sync_t               flags)
 379 {
 380         vstruct_t       vs;
 381
 382         vs_lookup(mem_obj, vs);
 383         vs_lock(vs);
 384         vs_unlock(vs);
 385
 386         memory_object_synchronize_completed(vs->vs_control, offset, length);
 387
 388         return KERN_SUCCESS;
 389 }
 390
 391 kern_return_t
 392 dp_memory_object_unmap(
 393         memory_object_t         mem_obj)
 394 {
 395         panic("dp_memory_object_unmap");
 396
 397         return KERN_FAILURE;
 398 }
 399
 400 kern_return_t
 401 dp_memory_object_terminate(
 402         memory_object_t         mem_obj)
 403 {
 404         memory_object_control_t control;
 405         vstruct_t               vs;
 406         kern_return_t           kr;
 407
 408         /*
 409          * control port is a receive right, not a send right.
 410          */
 411
 412         vs_lookup(mem_obj, vs);
 413         vs_lock(vs);
 414
 415         /*
 416          * Wait for read and write requests to terminate.
 417          */
 418
 419         vs_wait_for_readers(vs);
 420         vs_wait_for_writers(vs);
 421
 422         /*
 423          * After memory_object_terminate both memory_object_init
 424          * and a no-senders notification are possible, so we need
 425          * to clean up our reference to the memory_object_control
 426          * to prepare for a new init.
 427          */
 428
 429         control = vs->vs_control;
 430         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 431
 432         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 433         /* these data requests had to be removed from the seqno traffic   */
 434         /* based on a performance bottleneck with large memory objects    */
 435         /* the problem will right itself with the new component based     */
 436         /* synchronous interface.  The new async will be able to return   */
 437         /* failure during its sync phase.   In the mean time ... */
 438
 439         thread_wakeup(&vs->vs_writers);
 440         thread_wakeup(&vs->vs_async_pending);
 441
 442         vs_unlock(vs);
 443
 444         /*
 445          * Now we deallocate our reference on the control.
 446          */
 447         memory_object_control_deallocate(control);
 448         return KERN_SUCCESS;
 449 }
 450
 451 void
 452 dp_memory_object_reference(
 453         memory_object_t         mem_obj)
 454 {
 455         vstruct_t               vs;
 456
 457         vs_lookup_safe(mem_obj, vs);
 458         if (vs == VSTRUCT_NULL)
 459                 return;
 460
 461         VS_LOCK(vs);
 462         assert(vs->vs_references > 0);
 463         vs->vs_references++;
 464         VS_UNLOCK(vs);
 465 }
 466
 467 extern ipc_port_t       max_pages_trigger_port;
 468 extern int              dp_pages_free;
 469 extern int              maximum_pages_free;
 470 void
 471 dp_memory_object_deallocate(
 472         memory_object_t         mem_obj)
 473 {
 474         vstruct_t               vs;
 475         mach_port_seqno_t       seqno;
 476
 477         /*
 478          * Because we don't give out multiple first references
 479          * for a memory object, there can't be a race
 480          * between getting a deallocate call and creating
 481          * a new reference for the object.
 482          */
 483
 484         vs_lookup_safe(mem_obj, vs);
 485         if (vs == VSTRUCT_NULL)
 486                 return;
 487
 488         VS_LOCK(vs);
 489         if (--vs->vs_references > 0) {
 490                 VS_UNLOCK(vs);
 491                 return;
 492         }
 493
 494         seqno = vs->vs_next_seqno++;
 495         while (vs->vs_seqno != seqno) {
 496                 default_pager_wait_seqno++;
 497                 vs->vs_waiting_seqno = TRUE;
 498                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 499                 VS_UNLOCK(vs);
 500                 thread_block(THREAD_CONTINUE_NULL);
 501                 VS_LOCK(vs);
 502         }
 503
 504         vs_async_wait(vs);      /* wait for pending async IO */
 505
 506         /* do not delete the vs structure until the referencing pointers */
 507         /* in the vstruct list have been expunged */
 508
 509         /* get VSL_LOCK out of order by using TRY mechanism */
 510         while(!VSL_LOCK_TRY()) {
 511                 VS_UNLOCK(vs);
 512                 VSL_LOCK();
 513                 VSL_UNLOCK();
 514                 VS_LOCK(vs);
 515                 vs_async_wait(vs);      /* wait for pending async IO */
 516         }
 517
 518
 519         /*
 520          * We shouldn't get a deallocation call
 521          * when the kernel has the object cached.
 522          */
 523         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 524                 Panic("bad request");
 525
 526         /*
 527          * Unlock the pager (though there should be no one
 528          * waiting for it).
 529          */
 530         VS_UNLOCK(vs);
 531
 532         /* Lock out paging segment removal for the duration of this */
 533         /* call.  We are vulnerable to losing a paging segment we rely */
 534         /* on as soon as we remove ourselves from the VSL and unlock */
 535
 536         /* Keep our thread from blocking on attempt to trigger backing */
 537         /* store release */
 538         backing_store_release_trigger_disable += 1;
 539
 540         /*
 541          * Remove the memory object port association, and then
 542          * the destroy the port itself.  We must remove the object
 543          * from the port list before deallocating the pager,
 544          * because of default_pager_objects.
 545          */
 546         vstruct_list_delete(vs);
 547         VSL_UNLOCK();
 548
 549         ps_vstruct_dealloc(vs);
 550
 551         VSL_LOCK();
 552         backing_store_release_trigger_disable -= 1;
 553         if(backing_store_release_trigger_disable == 0) {
 554                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 555         }
 556         VSL_UNLOCK();
 557 }
 558
 559 kern_return_t
 560 dp_memory_object_data_request(
 561         memory_object_t         mem_obj,
 562         memory_object_offset_t  offset,
 563         vm_size_t               length,
 564         vm_prot_t               protection_required)
 565 {
 566         vstruct_t               vs;
 567
 568         GSTAT(global_stats.gs_pagein_calls++);
 569
 570
 571         /* CDY at this moment vs_lookup panics when presented with the wrong */
 572         /* port.  As we are expanding this pager to support user interfaces */
 573         /* this should be changed to return kern_failure */
 574         vs_lookup(mem_obj, vs);
 575         vs_lock(vs);
 576
 577         /* We are going to relax the strict sequencing here for performance */
 578         /* reasons.  We can do this because we know that the read and */
 579         /* write threads are different and we rely on synchronization */
 580         /* of read and write requests at the cache memory_object level */
 581         /* break out wait_for_writers, all of this goes away when */
 582         /* we get real control of seqno with the new component interface */
 583
 584         if (vs->vs_writers != 0) {
 585                 /* you can't hold on to the seqno and go */
 586                 /* to sleep like that */
 587                 vs_unlock(vs);  /* bump internal count of seqno */
 588                 VS_LOCK(vs);
 589                 while (vs->vs_writers != 0) {
 590                         default_pager_wait_write++;
 591                         vs->vs_waiting_write = TRUE;
 592                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 593                         VS_UNLOCK(vs);
 594                         thread_block(THREAD_CONTINUE_NULL);
 595                         VS_LOCK(vs);
 596                         vs_async_wait(vs);
 597                 }
 598                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 599                         VS_UNLOCK(vs);
 600                         return KERN_FAILURE;
 601                 }
 602                 vs_start_read(vs);
 603                 VS_UNLOCK(vs);
 604         } else {
 605                 vs_start_read(vs);
 606                 vs_unlock(vs);
 607         }
 608
 609         /*
 610          * Request must be on a page boundary and a multiple of pages.
 611          */
 612         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 613                 Panic("bad alignment");
 614
 615         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 616
 617         vs_finish_read(vs);
 618
 619         return KERN_SUCCESS;
 620 }
 621
 622 /*
 623  * memory_object_data_initialize: check whether we already have each page, and
 624  * write it if we do not.  The implementation is far from optimized, and
 625  * also assumes that the default_pager is single-threaded.
 626  */
 627 /*  It is questionable whether or not a pager should decide what is relevant */
 628 /* and what is not in data sent from the kernel.  Data initialize has been */
 629 /* changed to copy back all data sent to it in preparation for its eventual */
 630 /* merge with data return.  It is the kernel that should decide what pages */
 631 /* to write back.  As of the writing of this note, this is indeed the case */
 632 /* the kernel writes back one page at a time through this interface */
 633
 634 kern_return_t
 635 dp_memory_object_data_initialize(
 636         memory_object_t         mem_obj,
 637         memory_object_offset_t  offset,
 638         vm_size_t               size)
 639 {
 640         vstruct_t       vs;
 641
 642         DEBUG(DEBUG_MO_EXTERNAL,
 643               ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 644                (int)mem_obj, (int)offset, (int)size));
 645         GSTAT(global_stats.gs_pages_init += atop_32(size));
 646
 647         vs_lookup(mem_obj, vs);
 648         vs_lock(vs);
 649         vs_start_write(vs);
 650         vs_unlock(vs);
 651
 652         /*
 653          * Write the data via clustered writes. vs_cluster_write will
 654          * loop if the address range specified crosses cluster
 655          * boundaries.
 656          */
 657         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 658
 659         vs_finish_write(vs);
 660
 661         return KERN_SUCCESS;
 662 }
 663
 664 kern_return_t
 665 dp_memory_object_data_unlock(
 666         memory_object_t         mem_obj,
 667         memory_object_offset_t  offset,
 668         vm_size_t               size,
 669         vm_prot_t               desired_access)
 670 {
 671         Panic("dp_memory_object_data_unlock: illegal");
 672         return KERN_FAILURE;
 673 }
 674
 675
 676 kern_return_t
 677 dp_memory_object_data_return(
 678         memory_object_t         mem_obj,
 679         memory_object_offset_t  offset,
 680         vm_size_t               size,
 681         boolean_t               dirty,
 682         boolean_t               kernel_copy)
 683 {
 684         vstruct_t       vs;
 685
 686         DEBUG(DEBUG_MO_EXTERNAL,
 687               ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 688                (int)mem_obj, (int)offset, (int)size));
 689         GSTAT(global_stats.gs_pageout_calls++);
 690
 691         /* This routine is called by the pageout thread.  The pageout thread */
 692         /* cannot be blocked by read activities unless the read activities   */
 693         /* Therefore the grant of vs lock must be done on a try versus a      */
 694         /* blocking basis.  The code below relies on the fact that the       */
 695         /* interface is synchronous.  Should this interface be again async   */
 696         /* for some type  of pager in the future the pages will have to be   */
 697         /* returned through a separate, asynchronous path.                   */
 698
 699         vs_lookup(mem_obj, vs);
 700
 701         default_pager_total++;
 702         if(!VS_TRY_LOCK(vs)) {
 703                 /* the call below will not be done by caller when we have */
 704                 /* a synchronous interface */
 705                 /* return KERN_LOCK_OWNED; */
 706                 upl_t           upl;
 707                 int             page_list_count = 0;
 708                 memory_object_super_upl_request(vs->vs_control,
 709                                         (memory_object_offset_t)offset,
 710                                         size, size,
 711                                         &upl, NULL, &page_list_count,
 712                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 713                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 714                 upl_abort(upl,0);
 715                 upl_deallocate(upl);
 716                 return KERN_SUCCESS;
 717         }
 718
 719         if ((vs->vs_seqno != vs->vs_next_seqno++)
 720                         || (vs->vs_readers)
 721                         || (vs->vs_xfer_pending)) {
 722                 upl_t   upl;
 723                 int     page_list_count = 0;
 724
 725                 vs->vs_next_seqno--;
 726                 VS_UNLOCK(vs);
 727
 728                 /* the call below will not be done by caller when we have */
 729                 /* a synchronous interface */
 730                 /* return KERN_LOCK_OWNED; */
 731                 memory_object_super_upl_request(vs->vs_control,
 732                                 (memory_object_offset_t)offset,
 733                                 size, size,
 734                                 &upl, NULL, &page_list_count,
 735                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 736                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 737                 upl_abort(upl,0);
 738                 upl_deallocate(upl);
 739                 return KERN_SUCCESS;
 740         }
 741
 742         if ((size % vm_page_size) != 0)
 743                 Panic("bad alignment");
 744
 745         vs_start_write(vs);
 746
 747
 748         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 749         vs_unlock(vs);
 750
 751         /*
 752          * Write the data via clustered writes. vs_cluster_write will
 753          * loop if the address range specified crosses cluster
 754          * boundaries.
 755          */
 756         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 757
 758         vs_finish_write(vs);
 759
 760         /* temporary, need a finer lock based on cluster */
 761
 762         VS_LOCK(vs);
 763         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 764         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 765                 vs->vs_waiting_async = FALSE;
 766                 VS_UNLOCK(vs);
 767                 thread_wakeup(&vs->vs_async_pending);
 768         } else {
 769                 VS_UNLOCK(vs);
 770         }
 771
 772
 773         return KERN_SUCCESS;
 774 }
 775
 776 /*
 777  * Routine:     default_pager_memory_object_create
 778  * Purpose:
 779  *      Handle requests for memory objects from the
 780  *      kernel.
 781  * Notes:
 782  *      Because we only give out the default memory
 783  *      manager port to the kernel, we don't have to
 784  *      be so paranoid about the contents.
 785  */
 786 kern_return_t
 787 default_pager_memory_object_create(
 788         memory_object_default_t dmm,
 789         vm_size_t               new_size,
 790         memory_object_t         *new_mem_obj)
 791 {
 792         vstruct_t               vs;
 793
 794         assert(dmm == default_pager_object);
 795
 796         vs = vs_object_create(new_size);
 797         if (vs == VSTRUCT_NULL)
 798                 return KERN_RESOURCE_SHORTAGE;
 799
 800         vs->vs_next_seqno = 0;
 801
 802         /*
 803          * Set up associations between this memory object
 804          * and this default_pager structure
 805          */
 806
 807         vs->vs_mem_obj = ISVS;
 808         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 809
 810         /*
 811          * After this, other threads might receive requests
 812          * for this memory object or find it in the port list.
 813          */
 814
 815         vstruct_list_insert(vs);
 816         *new_mem_obj = vs_to_mem_obj(vs);
 817         return KERN_SUCCESS;
 818 }
 819
 820 /*
 821  * Create an external object.
 822  */
 823 kern_return_t
 824 default_pager_object_create(
 825         default_pager_t pager,
 826         vm_size_t       size,
 827         memory_object_t *mem_objp)
 828 {
 829         vstruct_t       vs;
 830         kern_return_t   result;
 831         struct vstruct_alias    *alias_struct;
 832
 833
 834         if (pager != default_pager_object)
 835                 return KERN_INVALID_ARGUMENT;
 836
 837         vs = vs_object_create(size);
 838         if (vs == VSTRUCT_NULL)
 839                 return KERN_RESOURCE_SHORTAGE;
 840
 841         /*
 842          * Set up associations between the default pager
 843          * and this vstruct structure
 844          */
 845         vs->vs_mem_obj = ISVS;
 846         vstruct_list_insert(vs);
 847         *mem_objp = vs_to_mem_obj(vs);
 848         return KERN_SUCCESS;
 849 }
 850
 851 kern_return_t
 852 default_pager_objects(
 853         default_pager_t                 pager,
 854         default_pager_object_array_t    *objectsp,
 855         mach_msg_type_number_t          *ocountp,
 856         memory_object_array_t           *pagersp,
 857         mach_msg_type_number_t          *pcountp)
 858 {
 859         vm_offset_t             oaddr = 0;      /* memory for objects */
 860         vm_size_t               osize = 0;      /* current size */
 861         default_pager_object_t  * objects;
 862         unsigned int            opotential;
 863
 864         vm_offset_t             paddr = 0;      /* memory for pagers */
 865         vm_size_t               psize = 0;      /* current size */
 866         memory_object_t         * pagers;
 867         unsigned int            ppotential;
 868
 869         unsigned int            actual;
 870         unsigned int            num_objects;
 871         kern_return_t           kr;
 872         vstruct_t               entry;
 873 /*
 874         if (pager != default_pager_default_port)
 875                 return KERN_INVALID_ARGUMENT;
 876 */
 877
 878         /* start with the inline memory */
 879
 880         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
 881                                                 (vm_map_copy_t) *objectsp);
 882
 883         if (kr != KERN_SUCCESS)
 884                 return kr;
 885
 886         osize = round_page_32(*ocountp * sizeof * objects);
 887         kr = vm_map_wire(ipc_kernel_map,
 888                         trunc_page_32((vm_offset_t)objects),
 889                         round_page_32(((vm_offset_t)objects) + osize),
 890                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
 891         osize=0;
 892
 893         *objectsp = objects;
 894         /* we start with the inline space */
 895
 896
 897         num_objects = 0;
 898         opotential = *ocountp;
 899
 900         pagers = (memory_object_t *) *pagersp;
 901         ppotential = *pcountp;
 902
 903         VSL_LOCK();
 904
 905         /*
 906          * We will send no more than this many
 907          */
 908         actual = vstruct_list.vsl_count;
 909         VSL_UNLOCK();
 910
 911         if (opotential < actual) {
 912                 vm_offset_t     newaddr;
 913                 vm_size_t       newsize;
 914
 915                 newsize = 2 * round_page_32(actual * sizeof * objects);
 916
 917                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 918                 if (kr != KERN_SUCCESS)
 919                         goto nomemory;
 920
 921                 oaddr = newaddr;
 922                 osize = newsize;
 923                 opotential = osize / sizeof * objects;
 924                 objects = (default_pager_object_t *)oaddr;
 925         }
 926
 927         if (ppotential < actual) {
 928                 vm_offset_t     newaddr;
 929                 vm_size_t       newsize;
 930
 931                 newsize = 2 * round_page_32(actual * sizeof * pagers);
 932
 933                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 934                 if (kr != KERN_SUCCESS)
 935                         goto nomemory;
 936
 937                 paddr = newaddr;
 938                 psize = newsize;
 939                 ppotential = psize / sizeof * pagers;
 940                 pagers = (memory_object_t *)paddr;
 941         }
 942
 943         /*
 944          * Now scan the list.
 945          */
 946
 947         VSL_LOCK();
 948
 949         num_objects = 0;
 950         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 951
 952                 memory_object_t         pager;
 953                 vm_size_t               size;
 954
 955                 if ((num_objects >= opotential) ||
 956                     (num_objects >= ppotential)) {
 957
 958                         /*
 959                          * This should be rare.  In any case,
 960                          * we will only miss recent objects,
 961                          * because they are added at the end.
 962                          */
 963                         break;
 964                 }
 965
 966                 /*
 967                  * Avoid interfering with normal operations
 968                  */
 969                 if (!VS_MAP_TRY_LOCK(entry))
 970                         goto not_this_one;
 971                 size = ps_vstruct_allocated_size(entry);
 972                 VS_MAP_UNLOCK(entry);
 973
 974                 VS_LOCK(entry);
 975
 976                 /*
 977                  * We need a reference for our caller.  Adding this
 978                  * reference through the linked list could race with
 979                  * destruction of the object.  If we find the object
 980                  * has no references, just give up on it.
 981                  */
 982                 VS_LOCK(entry);
 983                 if (entry->vs_references == 0) {
 984                         VS_UNLOCK(entry);
 985                         goto not_this_one;
 986                 }
 987                 dp_memory_object_reference(vs_to_mem_obj(entry));
 988                 VS_UNLOCK(entry);
 989
 990                 /* the arrays are wired, so no deadlock worries */
 991
 992                 objects[num_objects].dpo_object = (vm_offset_t) entry;
 993                 objects[num_objects].dpo_size = size;
 994                 pagers [num_objects++] = pager;
 995                 continue;
 996
 997             not_this_one:
 998                 /*
 999                  * Do not return garbage
1000                  */
1001                 objects[num_objects].dpo_object = (vm_offset_t) 0;
1002                 objects[num_objects].dpo_size = 0;
1003                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1004
1005         }
1006
1007         VSL_UNLOCK();
1008
1009         /*
1010          * Deallocate and clear unused memory.
1011          * (Returned memory will automagically become pageable.)
1012          */
1013
1014         if (objects == *objectsp) {
1015
1016                 /*
1017                  * Our returned information fit inline.
1018                  * Nothing to deallocate.
1019                  */
1020                 *ocountp = num_objects;
1021         } else if (actual == 0) {
1022                 (void) vm_deallocate(kernel_map, oaddr, osize);
1023
1024                 /* return zero items inline */
1025                 *ocountp = 0;
1026         } else {
1027                 vm_offset_t used;
1028
1029                 used = round_page_32(actual * sizeof * objects);
1030
1031                 if (used != osize)
1032                         (void) vm_deallocate(kernel_map,
1033                                              oaddr + used, osize - used);
1034
1035                 *objectsp = objects;
1036                 *ocountp = num_objects;
1037         }
1038
1039         if (pagers == (memory_object_t *)*pagersp) {
1040
1041                 /*
1042                  * Our returned information fit inline.
1043                  * Nothing to deallocate.
1044                  */
1045
1046                 *pcountp = num_objects;
1047         } else if (actual == 0) {
1048                 (void) vm_deallocate(kernel_map, paddr, psize);
1049
1050                 /* return zero items inline */
1051                 *pcountp = 0;
1052         } else {
1053                 vm_offset_t used;
1054
1055                 used = round_page_32(actual * sizeof * pagers);
1056
1057                 if (used != psize)
1058                         (void) vm_deallocate(kernel_map,
1059                                              paddr + used, psize - used);
1060
1061                 *pagersp = (memory_object_array_t)pagers;
1062                 *pcountp = num_objects;
1063         }
1064         (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1065                         *ocountp + (vm_offset_t)objects, FALSE);
1066         (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1067                         *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1068
1069         return KERN_SUCCESS;
1070
1071     nomemory:
1072         {
1073                 register int    i;
1074                 for (i = 0; i < num_objects; i++)
1075                         if (pagers[i] != MEMORY_OBJECT_NULL)
1076                                 memory_object_deallocate(pagers[i]);
1077         }
1078
1079         if (objects != *objectsp)
1080                 (void) vm_deallocate(kernel_map, oaddr, osize);
1081
1082         if (pagers != (memory_object_t *)*pagersp)
1083                 (void) vm_deallocate(kernel_map, paddr, psize);
1084
1085         return KERN_RESOURCE_SHORTAGE;
1086 }
1087
1088 kern_return_t
1089 default_pager_object_pages(
1090         default_pager_t                 pager,
1091         memory_object_t                 object,
1092         default_pager_page_array_t      *pagesp,
1093         mach_msg_type_number_t          *countp)
1094 {
1095         vm_offset_t                     addr;   /* memory for page offsets */
1096         vm_size_t                       size = 0; /* current memory size */
1097         default_pager_page_t            * pages;
1098         unsigned int                    potential, actual;
1099         kern_return_t                   kr;
1100
1101
1102         if (pager != default_pager_object)
1103                 return KERN_INVALID_ARGUMENT;
1104
1105         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1106                                                 (vm_map_copy_t) *pagesp);
1107
1108         if (kr != KERN_SUCCESS)
1109                 return kr;
1110
1111         size = round_page_32(*countp * sizeof * pages);
1112         kr = vm_map_wire(ipc_kernel_map,
1113                         trunc_page_32((vm_offset_t)pages),
1114                         round_page_32(((vm_offset_t)pages) + size),
1115                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
1116         size=0;
1117
1118         *pagesp = pages;
1119         /* we start with the inline space */
1120
1121         addr = (vm_offset_t)pages;
1122         potential = *countp;
1123
1124         for (;;) {
1125                 vstruct_t       entry;
1126
1127                 VSL_LOCK();
1128                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1129                               vs_links) {
1130                         VS_LOCK(entry);
1131                         if (vs_to_mem_obj(entry) == object) {
1132                                 VSL_UNLOCK();
1133                                 goto found_object;
1134                         }
1135                         VS_UNLOCK(entry);
1136                 }
1137                 VSL_UNLOCK();
1138
1139                 /* did not find the object */
1140
1141                 if (pages != *pagesp)
1142                         (void) vm_deallocate(kernel_map, addr, size);
1143                 return KERN_INVALID_ARGUMENT;
1144
1145             found_object:
1146
1147                 if (!VS_MAP_TRY_LOCK(entry)) {
1148                         /* oh well bad luck */
1149                         int wresult;
1150
1151                         VS_UNLOCK(entry);
1152
1153                         assert_wait_timeout( 1, THREAD_UNINT );
1154                         wresult = thread_block(THREAD_CONTINUE_NULL);
1155                         assert(wresult == THREAD_TIMED_OUT);
1156                         continue;
1157                 }
1158
1159                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1160                 VS_MAP_UNLOCK(entry);
1161                 VS_UNLOCK(entry);
1162
1163                 if (actual <= potential)
1164                         break;
1165
1166                 /* allocate more memory */
1167
1168                 if (pages != *pagesp)
1169                         (void) vm_deallocate(kernel_map, addr, size);
1170                 size = round_page_32(actual * sizeof * pages);
1171                 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1172                 if (kr != KERN_SUCCESS)
1173                         return kr;
1174                 pages = (default_pager_page_t *)addr;
1175                 potential = size / sizeof * pages;
1176         }
1177
1178         /*
1179          * Deallocate and clear unused memory.
1180          * (Returned memory will automagically become pageable.)
1181          */
1182
1183         if (pages == *pagesp) {
1184
1185                 /*
1186                  * Our returned information fit inline.
1187                  * Nothing to deallocate.
1188                  */
1189
1190                 *countp = actual;
1191         } else if (actual == 0) {
1192                 (void) vm_deallocate(kernel_map, addr, size);
1193
1194                 /* return zero items inline */
1195                 *countp = 0;
1196         } else {
1197                 vm_offset_t used;
1198
1199                 used = round_page_32(actual * sizeof * pages);
1200
1201                 if (used != size)
1202                         (void) vm_deallocate(kernel_map,
1203                                              addr + used, size - used);
1204
1205                 *pagesp = pages;
1206                 *countp = actual;
1207         }
1208         (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1209                         *countp + (vm_offset_t)pages, FALSE);
1210         (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1211                         *countp, TRUE, (vm_map_copy_t *)pagesp);
1212         return KERN_SUCCESS;
1213 }