osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50
  51 /*
  52  *      Default Pager.
  53  *              Memory Object Management.
  54  */
  55
  56 #include "default_pager_internal.h"
  57 #include <default_pager/default_pager_object_server.h>
  58 #include <mach/memory_object_default_server.h>
  59 #include <mach/memory_object_control.h>
  60 #include <mach/memory_object_types.h>
  61 #include <mach/memory_object_server.h>
  62 #include <mach/upl.h>
  63 #include <mach/vm_map.h>
  64 #include <vm/memory_object.h>
  65 #include <vm/vm_pageout.h>
  66 #include <vm/vm_map.h>
  67 #include <vm/vm_protos.h>
  68
  69 /* forward declaration */
  70 vstruct_t vs_object_create(vm_size_t size);
  71
  72 /*
  73  * List of all vstructs.  A specific vstruct is
  74  * found directly via its port, this list is
  75  * only used for monitoring purposes by the
  76  * default_pager_object* calls and by ps_delete
  77  * when abstract memory objects must be scanned
  78  * to remove any live storage on a segment which
  79  * is to be removed.
  80  */
  81 struct vstruct_list_head        vstruct_list;
  82
  83 __private_extern__ void
  84 vstruct_list_insert(
  85         vstruct_t vs)
  86 {
  87         VSL_LOCK();
  88         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  89         vstruct_list.vsl_count++;
  90         VSL_UNLOCK();
  91 }
  92
  93
  94 __private_extern__ void
  95 vstruct_list_delete(
  96         vstruct_t vs)
  97 {
  98         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  99         vstruct_list.vsl_count--;
 100 }
 101
 102 /*
 103  * We use the sequence numbers on requests to regulate
 104  * our parallelism.  In general, we allow multiple reads and writes
 105  * to proceed in parallel, with the exception that reads must
 106  * wait for previous writes to finish.  (Because the kernel might
 107  * generate a data-request for a page on the heels of a data-write
 108  * for the same page, and we must avoid returning stale data.)
 109  * terminate requests wait for proceeding reads and writes to finish.
 110  */
 111
 112 static unsigned int     default_pager_total = 0;                /* debugging */
 113 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 114 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 115 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 116
 117 __private_extern__ void
 118 vs_async_wait(
 119         vstruct_t       vs)
 120 {
 121
 122         ASSERT(vs->vs_async_pending >= 0);
 123         while (vs->vs_async_pending > 0) {
 124                 vs->vs_waiting_async = TRUE;
 125                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 126                 VS_UNLOCK(vs);
 127                 thread_block(THREAD_CONTINUE_NULL);
 128                 VS_LOCK(vs);
 129         }
 130         ASSERT(vs->vs_async_pending == 0);
 131 }
 132
 133
 134 #if     PARALLEL
 135 /*
 136  * Waits for correct sequence number.  Leaves pager locked.
 137  *
 138  * JMM - Sequence numbers guarantee ordering of requests generated
 139  *       by a single thread if the receiver is multithreaded and
 140  *       the interfaces are asynchronous (i.e. sender can generate
 141  *       more than one request before the first is received in the
 142  *       pager).  Normally, IPC would generate these number in that
 143  *       case.  But we are trying to avoid using IPC for the in-kernel
 144  *       scenario. Since these are actually invoked synchronously
 145  *       anyway (in-kernel), we can just fake the sequence number
 146  *       generation here (thus avoiding the dependence on IPC).
 147  */
 148 __private_extern__ void
 149 vs_lock(
 150         vstruct_t               vs)
 151 {
 152         mach_port_seqno_t       seqno;
 153
 154         default_pager_total++;
 155         VS_LOCK(vs);
 156
 157         seqno = vs->vs_next_seqno++;
 158
 159         while (vs->vs_seqno != seqno) {
 160                 default_pager_wait_seqno++;
 161                 vs->vs_waiting_seqno = TRUE;
 162                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 163                 VS_UNLOCK(vs);
 164                 thread_block(THREAD_CONTINUE_NULL);
 165                 VS_LOCK(vs);
 166         }
 167 }
 168
 169 /*
 170  * Increments sequence number and unlocks pager.
 171  */
 172 __private_extern__ void
 173 vs_unlock(vstruct_t vs)
 174 {
 175         vs->vs_seqno++;
 176         if (vs->vs_waiting_seqno) {
 177                 vs->vs_waiting_seqno = FALSE;
 178                 VS_UNLOCK(vs);
 179                 thread_wakeup(&vs->vs_seqno);
 180                 return;
 181         }
 182         VS_UNLOCK(vs);
 183 }
 184
 185 /*
 186  * Start a read - one more reader.  Pager must be locked.
 187  */
 188 __private_extern__ void
 189 vs_start_read(
 190         vstruct_t vs)
 191 {
 192         vs->vs_readers++;
 193 }
 194
 195 /*
 196  * Wait for readers.  Unlocks and relocks pager if wait needed.
 197  */
 198 __private_extern__ void
 199 vs_wait_for_readers(
 200         vstruct_t vs)
 201 {
 202         while (vs->vs_readers != 0) {
 203                 default_pager_wait_read++;
 204                 vs->vs_waiting_read = TRUE;
 205                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 206                 VS_UNLOCK(vs);
 207                 thread_block(THREAD_CONTINUE_NULL);
 208                 VS_LOCK(vs);
 209         }
 210 }
 211
 212 /*
 213  * Finish a read.  Pager is unlocked and returns unlocked.
 214  */
 215 __private_extern__ void
 216 vs_finish_read(
 217         vstruct_t vs)
 218 {
 219         VS_LOCK(vs);
 220         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 221                 vs->vs_waiting_read = FALSE;
 222                 VS_UNLOCK(vs);
 223                 thread_wakeup(&vs->vs_readers);
 224                 return;
 225         }
 226         VS_UNLOCK(vs);
 227 }
 228
 229 /*
 230  * Start a write - one more writer.  Pager must be locked.
 231  */
 232 __private_extern__ void
 233 vs_start_write(
 234         vstruct_t vs)
 235 {
 236         vs->vs_writers++;
 237 }
 238
 239 /*
 240  * Wait for writers.  Unlocks and relocks pager if wait needed.
 241  */
 242 __private_extern__ void
 243 vs_wait_for_writers(
 244         vstruct_t vs)
 245 {
 246         while (vs->vs_writers != 0) {
 247                 default_pager_wait_write++;
 248                 vs->vs_waiting_write = TRUE;
 249                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 250                 VS_UNLOCK(vs);
 251                 thread_block(THREAD_CONTINUE_NULL);
 252                 VS_LOCK(vs);
 253         }
 254         vs_async_wait(vs);
 255 }
 256
 257 /* This is to be used for the transfer from segment code ONLY */
 258 /* The transfer code holds off vs destruction by keeping the  */
 259 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 260 /* other writers on an async basis because it only writes on  */
 261 /* a cluster basis into fresh (as of sync time) cluster locations */
 262
 263 __private_extern__ void
 264 vs_wait_for_sync_writers(
 265         vstruct_t vs)
 266 {
 267         while (vs->vs_writers != 0) {
 268                 default_pager_wait_write++;
 269                 vs->vs_waiting_write = TRUE;
 270                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 271                 VS_UNLOCK(vs);
 272                 thread_block(THREAD_CONTINUE_NULL);
 273                 VS_LOCK(vs);
 274         }
 275 }
 276
 277
 278 /*
 279  * Finish a write.  Pager is unlocked and returns unlocked.
 280  */
 281 __private_extern__ void
 282 vs_finish_write(
 283         vstruct_t vs)
 284 {
 285         VS_LOCK(vs);
 286         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 287                 vs->vs_waiting_write = FALSE;
 288                 VS_UNLOCK(vs);
 289                 thread_wakeup(&vs->vs_writers);
 290                 return;
 291         }
 292         VS_UNLOCK(vs);
 293 }
 294 #endif  /* PARALLEL */
 295
 296 vstruct_t
 297 vs_object_create(
 298         vm_size_t size)
 299 {
 300         vstruct_t       vs;
 301
 302         /*
 303          * Allocate a vstruct. If there are any problems, then report them
 304          * to the console.
 305          */
 306         vs = ps_vstruct_create(size);
 307         if (vs == VSTRUCT_NULL) {
 308                 dprintf(("vs_object_create: unable to allocate %s\n",
 309                          "-- either run swapon command or reboot"));
 310                 return VSTRUCT_NULL;
 311         }
 312
 313         return vs;
 314 }
 315
 316 #if 0
 317 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 318
 319 void
 320 default_pager_add(
 321         vstruct_t vs,
 322         boolean_t internal)
 323 {
 324         memory_object_t         mem_obj = vs->vs_mem_obj;
 325         mach_port_t             pset;
 326         mach_port_mscount_t     sync;
 327         mach_port_t             previous;
 328         kern_return_t           kr;
 329         static char             here[] = "default_pager_add";
 330
 331         /*
 332          * The port currently has a make-send count of zero,
 333          * because either we just created the port or we just
 334          * received the port in a memory_object_create request.
 335          */
 336
 337         if (internal) {
 338                 /* possibly generate an immediate no-senders notification */
 339                 sync = 0;
 340                 pset = default_pager_internal_set;
 341         } else {
 342                 /* delay notification till send right is created */
 343                 sync = 1;
 344                 pset = default_pager_external_set;
 345         }
 346
 347         ipc_port_make_sonce(mem_obj);
 348         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 349         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 350 }
 351
 352 #endif
 353
 354 const struct memory_object_pager_ops default_pager_ops = {
 355         dp_memory_object_reference,
 356         dp_memory_object_deallocate,
 357         dp_memory_object_init,
 358         dp_memory_object_terminate,
 359         dp_memory_object_data_request,
 360         dp_memory_object_data_return,
 361         dp_memory_object_data_initialize,
 362         dp_memory_object_data_unlock,
 363         dp_memory_object_synchronize,
 364         dp_memory_object_unmap,
 365         "default pager"
 366 };
 367
 368 kern_return_t
 369 dp_memory_object_init(
 370         memory_object_t         mem_obj,
 371         memory_object_control_t control,
 372         __unused vm_size_t pager_page_size)
 373 {
 374         vstruct_t               vs;
 375
 376         assert(pager_page_size == vm_page_size);
 377
 378         memory_object_control_reference(control);
 379
 380         vs_lookup(mem_obj, vs);
 381         vs_lock(vs);
 382
 383         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 384                 Panic("bad request");
 385
 386         vs->vs_control = control;
 387         vs_unlock(vs);
 388
 389         return KERN_SUCCESS;
 390 }
 391
 392 kern_return_t
 393 dp_memory_object_synchronize(
 394         memory_object_t         mem_obj,
 395         memory_object_offset_t  offset,
 396         vm_size_t               length,
 397         __unused vm_sync_t              flags)
 398 {
 399         vstruct_t       vs;
 400
 401         vs_lookup(mem_obj, vs);
 402         vs_lock(vs);
 403         vs_unlock(vs);
 404
 405         memory_object_synchronize_completed(vs->vs_control, offset, length);
 406
 407         return KERN_SUCCESS;
 408 }
 409
 410 kern_return_t
 411 dp_memory_object_unmap(
 412         __unused memory_object_t                mem_obj)
 413 {
 414         panic("dp_memory_object_unmap");
 415
 416         return KERN_FAILURE;
 417 }
 418
 419 kern_return_t
 420 dp_memory_object_terminate(
 421         memory_object_t         mem_obj)
 422 {
 423         memory_object_control_t control;
 424         vstruct_t               vs;
 425
 426         /*
 427          * control port is a receive right, not a send right.
 428          */
 429
 430         vs_lookup(mem_obj, vs);
 431         vs_lock(vs);
 432
 433         /*
 434          * Wait for read and write requests to terminate.
 435          */
 436
 437         vs_wait_for_readers(vs);
 438         vs_wait_for_writers(vs);
 439
 440         /*
 441          * After memory_object_terminate both memory_object_init
 442          * and a no-senders notification are possible, so we need
 443          * to clean up our reference to the memory_object_control
 444          * to prepare for a new init.
 445          */
 446
 447         control = vs->vs_control;
 448         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 449
 450         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 451         /* these data requests had to be removed from the seqno traffic   */
 452         /* based on a performance bottleneck with large memory objects    */
 453         /* the problem will right itself with the new component based     */
 454         /* synchronous interface.  The new async will be able to return   */
 455         /* failure during its sync phase.   In the mean time ... */
 456
 457         thread_wakeup(&vs->vs_writers);
 458         thread_wakeup(&vs->vs_async_pending);
 459
 460         vs_unlock(vs);
 461
 462         /*
 463          * Now we deallocate our reference on the control.
 464          */
 465         memory_object_control_deallocate(control);
 466         return KERN_SUCCESS;
 467 }
 468
 469 void
 470 dp_memory_object_reference(
 471         memory_object_t         mem_obj)
 472 {
 473         vstruct_t               vs;
 474
 475         vs_lookup_safe(mem_obj, vs);
 476         if (vs == VSTRUCT_NULL)
 477                 return;
 478
 479         VS_LOCK(vs);
 480         assert(vs->vs_references > 0);
 481         vs->vs_references++;
 482         VS_UNLOCK(vs);
 483 }
 484
 485 void
 486 dp_memory_object_deallocate(
 487         memory_object_t         mem_obj)
 488 {
 489         vstruct_t               vs;
 490         mach_port_seqno_t       seqno;
 491
 492         /*
 493          * Because we don't give out multiple first references
 494          * for a memory object, there can't be a race
 495          * between getting a deallocate call and creating
 496          * a new reference for the object.
 497          */
 498
 499         vs_lookup_safe(mem_obj, vs);
 500         if (vs == VSTRUCT_NULL)
 501                 return;
 502
 503         VS_LOCK(vs);
 504         if (--vs->vs_references > 0) {
 505                 VS_UNLOCK(vs);
 506                 return;
 507         }
 508
 509         seqno = vs->vs_next_seqno++;
 510         while (vs->vs_seqno != seqno) {
 511                 default_pager_wait_seqno++;
 512                 vs->vs_waiting_seqno = TRUE;
 513                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 514                 VS_UNLOCK(vs);
 515                 thread_block(THREAD_CONTINUE_NULL);
 516                 VS_LOCK(vs);
 517         }
 518
 519         vs_async_wait(vs);      /* wait for pending async IO */
 520
 521         /* do not delete the vs structure until the referencing pointers */
 522         /* in the vstruct list have been expunged */
 523
 524         /* get VSL_LOCK out of order by using TRY mechanism */
 525         while(!VSL_LOCK_TRY()) {
 526                 VS_UNLOCK(vs);
 527                 VSL_LOCK();
 528                 VSL_UNLOCK();
 529                 VS_LOCK(vs);
 530                 vs_async_wait(vs);      /* wait for pending async IO */
 531         }
 532
 533
 534         /*
 535          * We shouldn't get a deallocation call
 536          * when the kernel has the object cached.
 537          */
 538         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 539                 Panic("bad request");
 540
 541         /*
 542          * Unlock the pager (though there should be no one
 543          * waiting for it).
 544          */
 545         VS_UNLOCK(vs);
 546
 547         /* Lock out paging segment removal for the duration of this */
 548         /* call.  We are vulnerable to losing a paging segment we rely */
 549         /* on as soon as we remove ourselves from the VSL and unlock */
 550
 551         /* Keep our thread from blocking on attempt to trigger backing */
 552         /* store release */
 553         backing_store_release_trigger_disable += 1;
 554
 555         /*
 556          * Remove the memory object port association, and then
 557          * the destroy the port itself.  We must remove the object
 558          * from the port list before deallocating the pager,
 559          * because of default_pager_objects.
 560          */
 561         vstruct_list_delete(vs);
 562         VSL_UNLOCK();
 563
 564         ps_vstruct_dealloc(vs);
 565
 566         VSL_LOCK();
 567         backing_store_release_trigger_disable -= 1;
 568         if(backing_store_release_trigger_disable == 0) {
 569                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 570         }
 571         VSL_UNLOCK();
 572 }
 573
 574 kern_return_t
 575 dp_memory_object_data_request(
 576         memory_object_t         mem_obj,
 577         memory_object_offset_t  offset,
 578         vm_size_t               length,
 579         __unused vm_prot_t              protection_required)
 580 {
 581         vstruct_t               vs;
 582
 583         GSTAT(global_stats.gs_pagein_calls++);
 584
 585
 586         /* CDY at this moment vs_lookup panics when presented with the wrong */
 587         /* port.  As we are expanding this pager to support user interfaces */
 588         /* this should be changed to return kern_failure */
 589         vs_lookup(mem_obj, vs);
 590         vs_lock(vs);
 591
 592         /* We are going to relax the strict sequencing here for performance */
 593         /* reasons.  We can do this because we know that the read and */
 594         /* write threads are different and we rely on synchronization */
 595         /* of read and write requests at the cache memory_object level */
 596         /* break out wait_for_writers, all of this goes away when */
 597         /* we get real control of seqno with the new component interface */
 598
 599         if (vs->vs_writers != 0) {
 600                 /* you can't hold on to the seqno and go */
 601                 /* to sleep like that */
 602                 vs_unlock(vs);  /* bump internal count of seqno */
 603                 VS_LOCK(vs);
 604                 while (vs->vs_writers != 0) {
 605                         default_pager_wait_write++;
 606                         vs->vs_waiting_write = TRUE;
 607                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 608                         VS_UNLOCK(vs);
 609                         thread_block(THREAD_CONTINUE_NULL);
 610                         VS_LOCK(vs);
 611                         vs_async_wait(vs);
 612                 }
 613                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 614                         VS_UNLOCK(vs);
 615                         return KERN_FAILURE;
 616                 }
 617                 vs_start_read(vs);
 618                 VS_UNLOCK(vs);
 619         } else {
 620                 vs_start_read(vs);
 621                 vs_unlock(vs);
 622         }
 623
 624         /*
 625          * Request must be on a page boundary and a multiple of pages.
 626          */
 627         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 628                 Panic("bad alignment");
 629
 630         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 631
 632         vs_finish_read(vs);
 633
 634         return KERN_SUCCESS;
 635 }
 636
 637 /*
 638  * memory_object_data_initialize: check whether we already have each page, and
 639  * write it if we do not.  The implementation is far from optimized, and
 640  * also assumes that the default_pager is single-threaded.
 641  */
 642 /*  It is questionable whether or not a pager should decide what is relevant */
 643 /* and what is not in data sent from the kernel.  Data initialize has been */
 644 /* changed to copy back all data sent to it in preparation for its eventual */
 645 /* merge with data return.  It is the kernel that should decide what pages */
 646 /* to write back.  As of the writing of this note, this is indeed the case */
 647 /* the kernel writes back one page at a time through this interface */
 648
 649 kern_return_t
 650 dp_memory_object_data_initialize(
 651         memory_object_t         mem_obj,
 652         memory_object_offset_t  offset,
 653         vm_size_t               size)
 654 {
 655         vstruct_t       vs;
 656
 657         DP_DEBUG(DEBUG_MO_EXTERNAL,
 658                  ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 659                   (int)mem_obj, (int)offset, (int)size));
 660         GSTAT(global_stats.gs_pages_init += atop_32(size));
 661
 662         vs_lookup(mem_obj, vs);
 663         vs_lock(vs);
 664         vs_start_write(vs);
 665         vs_unlock(vs);
 666
 667         /*
 668          * Write the data via clustered writes. vs_cluster_write will
 669          * loop if the address range specified crosses cluster
 670          * boundaries.
 671          */
 672         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 673
 674         vs_finish_write(vs);
 675
 676         return KERN_SUCCESS;
 677 }
 678
 679 kern_return_t
 680 dp_memory_object_data_unlock(
 681         __unused memory_object_t                mem_obj,
 682         __unused memory_object_offset_t offset,
 683         __unused vm_size_t              size,
 684         __unused vm_prot_t              desired_access)
 685 {
 686         Panic("dp_memory_object_data_unlock: illegal");
 687         return KERN_FAILURE;
 688 }
 689
 690
 691 /*ARGSUSED8*/
 692 kern_return_t
 693 dp_memory_object_data_return(
 694         memory_object_t         mem_obj,
 695         memory_object_offset_t  offset,
 696         vm_size_t                       size,
 697         __unused memory_object_offset_t *resid_offset,
 698         __unused int            *io_error,
 699         __unused boolean_t      dirty,
 700         __unused boolean_t      kernel_copy,
 701         __unused int    upl_flags)
 702 {
 703         vstruct_t       vs;
 704
 705         DP_DEBUG(DEBUG_MO_EXTERNAL,
 706                  ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 707                   (int)mem_obj, (int)offset, (int)size));
 708         GSTAT(global_stats.gs_pageout_calls++);
 709
 710         /* This routine is called by the pageout thread.  The pageout thread */
 711         /* cannot be blocked by read activities unless the read activities   */
 712         /* Therefore the grant of vs lock must be done on a try versus a      */
 713         /* blocking basis.  The code below relies on the fact that the       */
 714         /* interface is synchronous.  Should this interface be again async   */
 715         /* for some type  of pager in the future the pages will have to be   */
 716         /* returned through a separate, asynchronous path.                   */
 717
 718         vs_lookup(mem_obj, vs);
 719
 720         default_pager_total++;
 721         if(!VS_TRY_LOCK(vs)) {
 722                 /* the call below will not be done by caller when we have */
 723                 /* a synchronous interface */
 724                 /* return KERN_LOCK_OWNED; */
 725                 upl_t           upl;
 726                 unsigned int    page_list_count = 0;
 727                 memory_object_super_upl_request(vs->vs_control,
 728                                         (memory_object_offset_t)offset,
 729                                         size, size,
 730                                         &upl, NULL, &page_list_count,
 731                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 732                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 733                 upl_abort(upl,0);
 734                 upl_deallocate(upl);
 735                 return KERN_SUCCESS;
 736         }
 737
 738         if ((vs->vs_seqno != vs->vs_next_seqno++)
 739                         || (vs->vs_readers)
 740                         || (vs->vs_xfer_pending)) {
 741                 upl_t           upl;
 742                 unsigned int    page_list_count = 0;
 743
 744                 vs->vs_next_seqno--;
 745                 VS_UNLOCK(vs);
 746
 747                 /* the call below will not be done by caller when we have */
 748                 /* a synchronous interface */
 749                 /* return KERN_LOCK_OWNED; */
 750                 memory_object_super_upl_request(vs->vs_control,
 751                                 (memory_object_offset_t)offset,
 752                                 size, size,
 753                                 &upl, NULL, &page_list_count,
 754                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 755                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 756                 upl_abort(upl,0);
 757                 upl_deallocate(upl);
 758                 return KERN_SUCCESS;
 759         }
 760
 761         if ((size % vm_page_size) != 0)
 762                 Panic("bad alignment");
 763
 764         vs_start_write(vs);
 765
 766
 767         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 768         vs_unlock(vs);
 769
 770         /*
 771          * Write the data via clustered writes. vs_cluster_write will
 772          * loop if the address range specified crosses cluster
 773          * boundaries.
 774          */
 775         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 776
 777         vs_finish_write(vs);
 778
 779         /* temporary, need a finer lock based on cluster */
 780
 781         VS_LOCK(vs);
 782         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 783         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 784                 vs->vs_waiting_async = FALSE;
 785                 VS_UNLOCK(vs);
 786                 thread_wakeup(&vs->vs_async_pending);
 787         } else {
 788                 VS_UNLOCK(vs);
 789         }
 790
 791
 792         return KERN_SUCCESS;
 793 }
 794
 795 /*
 796  * Routine:     default_pager_memory_object_create
 797  * Purpose:
 798  *      Handle requests for memory objects from the
 799  *      kernel.
 800  * Notes:
 801  *      Because we only give out the default memory
 802  *      manager port to the kernel, we don't have to
 803  *      be so paranoid about the contents.
 804  */
 805 kern_return_t
 806 default_pager_memory_object_create(
 807         __unused memory_object_default_t        dmm,
 808         vm_size_t               new_size,
 809         memory_object_t         *new_mem_obj)
 810 {
 811         vstruct_t               vs;
 812
 813         assert(dmm == default_pager_object);
 814
 815         vs = vs_object_create(new_size);
 816         if (vs == VSTRUCT_NULL)
 817                 return KERN_RESOURCE_SHORTAGE;
 818
 819         vs->vs_next_seqno = 0;
 820
 821         /*
 822          * Set up associations between this memory object
 823          * and this default_pager structure
 824          */
 825
 826         vs->vs_pager_ops = &default_pager_ops;
 827         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 828
 829         /*
 830          * After this, other threads might receive requests
 831          * for this memory object or find it in the port list.
 832          */
 833
 834         vstruct_list_insert(vs);
 835         *new_mem_obj = vs_to_mem_obj(vs);
 836         return KERN_SUCCESS;
 837 }
 838
 839 /*
 840  * Create an external object.
 841  */
 842 kern_return_t
 843 default_pager_object_create(
 844         default_pager_t default_pager,
 845         vm_size_t       size,
 846         memory_object_t *mem_objp)
 847 {
 848         vstruct_t       vs;
 849
 850         if (default_pager != default_pager_object)
 851                 return KERN_INVALID_ARGUMENT;
 852
 853         vs = vs_object_create(size);
 854         if (vs == VSTRUCT_NULL)
 855                 return KERN_RESOURCE_SHORTAGE;
 856
 857         /*
 858          * Set up associations between the default pager
 859          * and this vstruct structure
 860          */
 861         vs->vs_pager_ops = &default_pager_ops;
 862         vstruct_list_insert(vs);
 863         *mem_objp = vs_to_mem_obj(vs);
 864         return KERN_SUCCESS;
 865 }
 866
 867 kern_return_t
 868 default_pager_objects(
 869         default_pager_t                 default_pager,
 870         default_pager_object_array_t    *objectsp,
 871         mach_msg_type_number_t          *ocountp,
 872         mach_port_array_t               *portsp,
 873         mach_msg_type_number_t          *pcountp)
 874 {
 875         vm_offset_t             oaddr = 0;      /* memory for objects */
 876         vm_size_t               osize = 0;      /* current size */
 877         default_pager_object_t  * objects;
 878         unsigned int            opotential = 0;
 879
 880         vm_map_copy_t           pcopy = 0;      /* copy handle for pagers */
 881         vm_size_t               psize = 0;      /* current size */
 882         memory_object_t         * pagers;
 883         unsigned int            ppotential = 0;
 884
 885         unsigned int            actual;
 886         unsigned int            num_objects;
 887         kern_return_t           kr;
 888         vstruct_t               entry;
 889
 890         if (default_pager != default_pager_object)
 891                 return KERN_INVALID_ARGUMENT;
 892
 893         /*
 894          * We will send no more than this many
 895          */
 896         actual = vstruct_list.vsl_count;
 897
 898         /*
 899          * Out out-of-line port arrays are simply kalloc'ed.
 900          */
 901         psize = round_page(actual * sizeof * pagers);
 902         ppotential = psize / sizeof * pagers;
 903         pagers = (memory_object_t *)kalloc(psize);
 904         if (0 == pagers)
 905                 return KERN_RESOURCE_SHORTAGE;
 906
 907         /*
 908          * returned out of line data must be allocated out
 909          * the ipc_kernel_map, wired down, filled in, and
 910          * then "copied in" as if it had been sent by a
 911          * user process.
 912          */
 913         osize = round_page(actual * sizeof * objects);
 914         opotential = osize / sizeof * objects;
 915         kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
 916         if (KERN_SUCCESS != kr) {
 917                 kfree(pagers, psize);
 918                 return KERN_RESOURCE_SHORTAGE;
 919         }
 920         objects = (default_pager_object_t *)oaddr;
 921
 922
 923         /*
 924          * Now scan the list.
 925          */
 926
 927         VSL_LOCK();
 928
 929         num_objects = 0;
 930         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 931
 932                 memory_object_t                 pager;
 933                 vm_size_t                       size;
 934
 935                 if ((num_objects >= opotential) ||
 936                     (num_objects >= ppotential)) {
 937
 938                         /*
 939                          * This should be rare.  In any case,
 940                          * we will only miss recent objects,
 941                          * because they are added at the end.
 942                          */
 943                         break;
 944                 }
 945
 946                 /*
 947                  * Avoid interfering with normal operations
 948                  */
 949                 if (!VS_MAP_TRY_LOCK(entry))
 950                         goto not_this_one;
 951                 size = ps_vstruct_allocated_size(entry);
 952                 VS_MAP_UNLOCK(entry);
 953
 954                 VS_LOCK(entry);
 955
 956                 /*
 957                  * We need a reference for our caller.  Adding this
 958                  * reference through the linked list could race with
 959                  * destruction of the object.  If we find the object
 960                  * has no references, just give up on it.
 961                  */
 962                 VS_LOCK(entry);
 963                 if (entry->vs_references == 0) {
 964                         VS_UNLOCK(entry);
 965                         goto not_this_one;
 966                 }
 967                 pager = vs_to_mem_obj(entry);
 968                 dp_memory_object_reference(pager);
 969                 VS_UNLOCK(entry);
 970
 971                 /* the arrays are wired, so no deadlock worries */
 972
 973                 objects[num_objects].dpo_object = (vm_offset_t) entry;
 974                 objects[num_objects].dpo_size = size;
 975                 pagers [num_objects++] = pager;
 976                 continue;
 977
 978             not_this_one:
 979                 /*
 980                  * Do not return garbage
 981                  */
 982                 objects[num_objects].dpo_object = (vm_offset_t) 0;
 983                 objects[num_objects].dpo_size = 0;
 984                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
 985
 986         }
 987
 988         VSL_UNLOCK();
 989
 990         /* clear out any excess allocation */
 991         while (num_objects < opotential) {
 992                 objects[--opotential].dpo_object = (vm_offset_t) 0;
 993                 objects[opotential].dpo_size = 0;
 994         }
 995         while (num_objects < ppotential) {
 996                 pagers[--ppotential] = MEMORY_OBJECT_NULL;
 997         }
 998
 999         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
1000                            vm_map_round_page(oaddr + osize), FALSE);
1001         assert(KERN_SUCCESS == kr);
1002         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
1003                            (vm_map_size_t)osize, TRUE, &pcopy);
1004         assert(KERN_SUCCESS == kr);
1005
1006         *objectsp = (default_pager_object_array_t)objects;
1007         *ocountp = num_objects;
1008         *portsp = (mach_port_array_t)pcopy;
1009         *pcountp = num_objects;
1010
1011         return KERN_SUCCESS;
1012 }
1013
1014 kern_return_t
1015 default_pager_object_pages(
1016         default_pager_t         default_pager,
1017         mach_port_t                     memory_object,
1018         default_pager_page_array_t      *pagesp,
1019         mach_msg_type_number_t          *countp)
1020 {
1021         vm_offset_t                     addr = 0; /* memory for page offsets */
1022         vm_size_t                       size = 0; /* current memory size */
1023         vm_map_copy_t                   copy;
1024         default_pager_page_t            * pages = 0;
1025         unsigned int                    potential;
1026         unsigned int                    actual;
1027         kern_return_t                   kr;
1028         memory_object_t                 object;
1029
1030         if (default_pager != default_pager_object)
1031                 return KERN_INVALID_ARGUMENT;
1032
1033         object = (memory_object_t) memory_object;
1034
1035         potential = 0;
1036         for (;;) {
1037                 vstruct_t       entry;
1038
1039                 VSL_LOCK();
1040                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1041                               vs_links) {
1042                         VS_LOCK(entry);
1043                         if (vs_to_mem_obj(entry) == object) {
1044                                 VSL_UNLOCK();
1045                                 goto found_object;
1046                         }
1047                         VS_UNLOCK(entry);
1048                 }
1049                 VSL_UNLOCK();
1050
1051                 /* did not find the object */
1052                 if (0 != addr)
1053                         kmem_free(ipc_kernel_map, addr, size);
1054
1055                 return KERN_INVALID_ARGUMENT;
1056
1057             found_object:
1058
1059                 if (!VS_MAP_TRY_LOCK(entry)) {
1060                         /* oh well bad luck */
1061                         int wresult;
1062
1063                         VS_UNLOCK(entry);
1064
1065                         assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
1066                         wresult = thread_block(THREAD_CONTINUE_NULL);
1067                         assert(wresult == THREAD_TIMED_OUT);
1068                         continue;
1069                 }
1070
1071                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1072                 VS_MAP_UNLOCK(entry);
1073                 VS_UNLOCK(entry);
1074
1075                 if (actual <= potential)
1076                         break;
1077
1078                 /* allocate more memory */
1079                 if (0 != addr)
1080                         kmem_free(ipc_kernel_map, addr, size);
1081
1082                 size = round_page(actual * sizeof * pages);
1083                 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1084                 if (KERN_SUCCESS != kr)
1085                         return KERN_RESOURCE_SHORTAGE;
1086
1087                 pages = (default_pager_page_t *)addr;
1088                 potential = size / sizeof * pages;
1089         }
1090
1091         /*
1092          * Clear unused memory.
1093          */
1094         while (actual < potential)
1095                 pages[--potential].dpp_offset = 0;
1096
1097         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1098                            vm_map_round_page(addr + size), FALSE);
1099         assert(KERN_SUCCESS == kr);
1100         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1101                            (vm_map_size_t)size, TRUE, &copy);
1102         assert(KERN_SUCCESS == kr);
1103
1104
1105         *pagesp = (default_pager_page_array_t)copy;
1106         *countp = actual;
1107         return KERN_SUCCESS;
1108 }