osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50
  51 /*
  52  *      Default Pager.
  53  *              Memory Object Management.
  54  */
  55
  56 #include "default_pager_internal.h"
  57 #include <default_pager/default_pager_object_server.h>
  58 #include <mach/memory_object_default_server.h>
  59 #include <mach/memory_object_control.h>
  60 #include <mach/memory_object_types.h>
  61 #include <mach/memory_object_server.h>
  62 #include <mach/upl.h>
  63 #include <mach/vm_map.h>
  64 #include <vm/memory_object.h>
  65 #include <vm/vm_pageout.h>
  66 #include <vm/vm_map.h>
  67 #include <vm/vm_protos.h>
  68
  69 /* forward declaration */
  70 vstruct_t vs_object_create(vm_size_t size);
  71
  72 /*
  73  * List of all vstructs.  A specific vstruct is
  74  * found directly via its port, this list is
  75  * only used for monitoring purposes by the
  76  * default_pager_object* calls and by ps_delete
  77  * when abstract memory objects must be scanned
  78  * to remove any live storage on a segment which
  79  * is to be removed.
  80  */
  81 struct vstruct_list_head        vstruct_list;
  82
  83 __private_extern__ void
  84 vstruct_list_insert(
  85         vstruct_t vs)
  86 {
  87         VSL_LOCK();
  88         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  89         vstruct_list.vsl_count++;
  90         VSL_UNLOCK();
  91 }
  92
  93
  94 __private_extern__ void
  95 vstruct_list_delete(
  96         vstruct_t vs)
  97 {
  98         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  99         vstruct_list.vsl_count--;
 100 }
 101
 102 /*
 103  * We use the sequence numbers on requests to regulate
 104  * our parallelism.  In general, we allow multiple reads and writes
 105  * to proceed in parallel, with the exception that reads must
 106  * wait for previous writes to finish.  (Because the kernel might
 107  * generate a data-request for a page on the heels of a data-write
 108  * for the same page, and we must avoid returning stale data.)
 109  * terminate requests wait for proceeding reads and writes to finish.
 110  */
 111
 112 static unsigned int     default_pager_total = 0;                /* debugging */
 113 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 114 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 115 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 116
 117 __private_extern__ void
 118 vs_async_wait(
 119         vstruct_t       vs)
 120 {
 121
 122         ASSERT(vs->vs_async_pending >= 0);
 123         while (vs->vs_async_pending > 0) {
 124                 vs->vs_waiting_async = TRUE;
 125                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 126                 VS_UNLOCK(vs);
 127                 thread_block(THREAD_CONTINUE_NULL);
 128                 VS_LOCK(vs);
 129         }
 130         ASSERT(vs->vs_async_pending == 0);
 131 }
 132
 133
 134 #if     PARALLEL
 135 /*
 136  * Waits for correct sequence number.  Leaves pager locked.
 137  *
 138  * JMM - Sequence numbers guarantee ordering of requests generated
 139  *       by a single thread if the receiver is multithreaded and
 140  *       the interfaces are asynchronous (i.e. sender can generate
 141  *       more than one request before the first is received in the
 142  *       pager).  Normally, IPC would generate these number in that
 143  *       case.  But we are trying to avoid using IPC for the in-kernel
 144  *       scenario. Since these are actually invoked synchronously
 145  *       anyway (in-kernel), we can just fake the sequence number
 146  *       generation here (thus avoiding the dependence on IPC).
 147  */
 148 __private_extern__ void
 149 vs_lock(
 150         vstruct_t               vs)
 151 {
 152         mach_port_seqno_t       seqno;
 153
 154         default_pager_total++;
 155         VS_LOCK(vs);
 156
 157         seqno = vs->vs_next_seqno++;
 158
 159         while (vs->vs_seqno != seqno) {
 160                 default_pager_wait_seqno++;
 161                 vs->vs_waiting_seqno = TRUE;
 162                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 163                 VS_UNLOCK(vs);
 164                 thread_block(THREAD_CONTINUE_NULL);
 165                 VS_LOCK(vs);
 166         }
 167 }
 168
 169 /*
 170  * Increments sequence number and unlocks pager.
 171  */
 172 __private_extern__ void
 173 vs_unlock(vstruct_t vs)
 174 {
 175         vs->vs_seqno++;
 176         if (vs->vs_waiting_seqno) {
 177                 vs->vs_waiting_seqno = FALSE;
 178                 VS_UNLOCK(vs);
 179                 thread_wakeup(&vs->vs_seqno);
 180                 return;
 181         }
 182         VS_UNLOCK(vs);
 183 }
 184
 185 /*
 186  * Start a read - one more reader.  Pager must be locked.
 187  */
 188 __private_extern__ void
 189 vs_start_read(
 190         vstruct_t vs)
 191 {
 192         vs->vs_readers++;
 193 }
 194
 195 /*
 196  * Wait for readers.  Unlocks and relocks pager if wait needed.
 197  */
 198 __private_extern__ void
 199 vs_wait_for_readers(
 200         vstruct_t vs)
 201 {
 202         while (vs->vs_readers != 0) {
 203                 default_pager_wait_read++;
 204                 vs->vs_waiting_read = TRUE;
 205                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 206                 VS_UNLOCK(vs);
 207                 thread_block(THREAD_CONTINUE_NULL);
 208                 VS_LOCK(vs);
 209         }
 210 }
 211
 212 /*
 213  * Finish a read.  Pager is unlocked and returns unlocked.
 214  */
 215 __private_extern__ void
 216 vs_finish_read(
 217         vstruct_t vs)
 218 {
 219         VS_LOCK(vs);
 220         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 221                 vs->vs_waiting_read = FALSE;
 222                 VS_UNLOCK(vs);
 223                 thread_wakeup(&vs->vs_readers);
 224                 return;
 225         }
 226         VS_UNLOCK(vs);
 227 }
 228
 229 /*
 230  * Start a write - one more writer.  Pager must be locked.
 231  */
 232 __private_extern__ void
 233 vs_start_write(
 234         vstruct_t vs)
 235 {
 236         vs->vs_writers++;
 237 }
 238
 239 /*
 240  * Wait for writers.  Unlocks and relocks pager if wait needed.
 241  */
 242 __private_extern__ void
 243 vs_wait_for_writers(
 244         vstruct_t vs)
 245 {
 246         while (vs->vs_writers != 0) {
 247                 default_pager_wait_write++;
 248                 vs->vs_waiting_write = TRUE;
 249                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 250                 VS_UNLOCK(vs);
 251                 thread_block(THREAD_CONTINUE_NULL);
 252                 VS_LOCK(vs);
 253         }
 254         vs_async_wait(vs);
 255 }
 256
 257 /* This is to be used for the transfer from segment code ONLY */
 258 /* The transfer code holds off vs destruction by keeping the  */
 259 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 260 /* other writers on an async basis because it only writes on  */
 261 /* a cluster basis into fresh (as of sync time) cluster locations */
 262
 263 __private_extern__ void
 264 vs_wait_for_sync_writers(
 265         vstruct_t vs)
 266 {
 267         while (vs->vs_writers != 0) {
 268                 default_pager_wait_write++;
 269                 vs->vs_waiting_write = TRUE;
 270                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 271                 VS_UNLOCK(vs);
 272                 thread_block(THREAD_CONTINUE_NULL);
 273                 VS_LOCK(vs);
 274         }
 275 }
 276
 277
 278 /*
 279  * Finish a write.  Pager is unlocked and returns unlocked.
 280  */
 281 __private_extern__ void
 282 vs_finish_write(
 283         vstruct_t vs)
 284 {
 285         VS_LOCK(vs);
 286         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 287                 vs->vs_waiting_write = FALSE;
 288                 VS_UNLOCK(vs);
 289                 thread_wakeup(&vs->vs_writers);
 290                 return;
 291         }
 292         VS_UNLOCK(vs);
 293 }
 294 #endif  /* PARALLEL */
 295
 296 vstruct_t
 297 vs_object_create(
 298         vm_size_t size)
 299 {
 300         vstruct_t       vs;
 301
 302         /*
 303          * Allocate a vstruct. If there are any problems, then report them
 304          * to the console.
 305          */
 306         vs = ps_vstruct_create(size);
 307         if (vs == VSTRUCT_NULL) {
 308                 dprintf(("vs_object_create: unable to allocate %s\n",
 309                          "-- either run swapon command or reboot"));
 310                 return VSTRUCT_NULL;
 311         }
 312
 313         return vs;
 314 }
 315
 316 #if 0
 317 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 318
 319 void
 320 default_pager_add(
 321         vstruct_t vs,
 322         boolean_t internal)
 323 {
 324         memory_object_t         mem_obj = vs->vs_mem_obj;
 325         mach_port_t             pset;
 326         mach_port_mscount_t     sync;
 327         mach_port_t             previous;
 328         kern_return_t           kr;
 329         static char             here[] = "default_pager_add";
 330
 331         /*
 332          * The port currently has a make-send count of zero,
 333          * because either we just created the port or we just
 334          * received the port in a memory_object_create request.
 335          */
 336
 337         if (internal) {
 338                 /* possibly generate an immediate no-senders notification */
 339                 sync = 0;
 340                 pset = default_pager_internal_set;
 341         } else {
 342                 /* delay notification till send right is created */
 343                 sync = 1;
 344                 pset = default_pager_external_set;
 345         }
 346
 347         ipc_port_make_sonce(mem_obj);
 348         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 349         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 350 }
 351
 352 #endif
 353
 354 kern_return_t
 355 dp_memory_object_init(
 356         memory_object_t         mem_obj,
 357         memory_object_control_t control,
 358         __unused vm_size_t pager_page_size)
 359 {
 360         vstruct_t               vs;
 361
 362         assert(pager_page_size == vm_page_size);
 363
 364         memory_object_control_reference(control);
 365
 366         vs_lookup(mem_obj, vs);
 367         vs_lock(vs);
 368
 369         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 370                 Panic("bad request");
 371
 372         vs->vs_control = control;
 373         vs_unlock(vs);
 374
 375         return KERN_SUCCESS;
 376 }
 377
 378 kern_return_t
 379 dp_memory_object_synchronize(
 380         memory_object_t         mem_obj,
 381         memory_object_offset_t  offset,
 382         vm_size_t               length,
 383         __unused vm_sync_t              flags)
 384 {
 385         vstruct_t       vs;
 386
 387         vs_lookup(mem_obj, vs);
 388         vs_lock(vs);
 389         vs_unlock(vs);
 390
 391         memory_object_synchronize_completed(vs->vs_control, offset, length);
 392
 393         return KERN_SUCCESS;
 394 }
 395
 396 kern_return_t
 397 dp_memory_object_unmap(
 398         __unused memory_object_t                mem_obj)
 399 {
 400         panic("dp_memory_object_unmap");
 401
 402         return KERN_FAILURE;
 403 }
 404
 405 kern_return_t
 406 dp_memory_object_terminate(
 407         memory_object_t         mem_obj)
 408 {
 409         memory_object_control_t control;
 410         vstruct_t               vs;
 411
 412         /*
 413          * control port is a receive right, not a send right.
 414          */
 415
 416         vs_lookup(mem_obj, vs);
 417         vs_lock(vs);
 418
 419         /*
 420          * Wait for read and write requests to terminate.
 421          */
 422
 423         vs_wait_for_readers(vs);
 424         vs_wait_for_writers(vs);
 425
 426         /*
 427          * After memory_object_terminate both memory_object_init
 428          * and a no-senders notification are possible, so we need
 429          * to clean up our reference to the memory_object_control
 430          * to prepare for a new init.
 431          */
 432
 433         control = vs->vs_control;
 434         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 435
 436         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 437         /* these data requests had to be removed from the seqno traffic   */
 438         /* based on a performance bottleneck with large memory objects    */
 439         /* the problem will right itself with the new component based     */
 440         /* synchronous interface.  The new async will be able to return   */
 441         /* failure during its sync phase.   In the mean time ... */
 442
 443         thread_wakeup(&vs->vs_writers);
 444         thread_wakeup(&vs->vs_async_pending);
 445
 446         vs_unlock(vs);
 447
 448         /*
 449          * Now we deallocate our reference on the control.
 450          */
 451         memory_object_control_deallocate(control);
 452         return KERN_SUCCESS;
 453 }
 454
 455 void
 456 dp_memory_object_reference(
 457         memory_object_t         mem_obj)
 458 {
 459         vstruct_t               vs;
 460
 461         vs_lookup_safe(mem_obj, vs);
 462         if (vs == VSTRUCT_NULL)
 463                 return;
 464
 465         VS_LOCK(vs);
 466         assert(vs->vs_references > 0);
 467         vs->vs_references++;
 468         VS_UNLOCK(vs);
 469 }
 470
 471 void
 472 dp_memory_object_deallocate(
 473         memory_object_t         mem_obj)
 474 {
 475         vstruct_t               vs;
 476         mach_port_seqno_t       seqno;
 477
 478         /*
 479          * Because we don't give out multiple first references
 480          * for a memory object, there can't be a race
 481          * between getting a deallocate call and creating
 482          * a new reference for the object.
 483          */
 484
 485         vs_lookup_safe(mem_obj, vs);
 486         if (vs == VSTRUCT_NULL)
 487                 return;
 488
 489         VS_LOCK(vs);
 490         if (--vs->vs_references > 0) {
 491                 VS_UNLOCK(vs);
 492                 return;
 493         }
 494
 495         seqno = vs->vs_next_seqno++;
 496         while (vs->vs_seqno != seqno) {
 497                 default_pager_wait_seqno++;
 498                 vs->vs_waiting_seqno = TRUE;
 499                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 500                 VS_UNLOCK(vs);
 501                 thread_block(THREAD_CONTINUE_NULL);
 502                 VS_LOCK(vs);
 503         }
 504
 505         vs_async_wait(vs);      /* wait for pending async IO */
 506
 507         /* do not delete the vs structure until the referencing pointers */
 508         /* in the vstruct list have been expunged */
 509
 510         /* get VSL_LOCK out of order by using TRY mechanism */
 511         while(!VSL_LOCK_TRY()) {
 512                 VS_UNLOCK(vs);
 513                 VSL_LOCK();
 514                 VSL_UNLOCK();
 515                 VS_LOCK(vs);
 516                 vs_async_wait(vs);      /* wait for pending async IO */
 517         }
 518
 519
 520         /*
 521          * We shouldn't get a deallocation call
 522          * when the kernel has the object cached.
 523          */
 524         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 525                 Panic("bad request");
 526
 527         /*
 528          * Unlock the pager (though there should be no one
 529          * waiting for it).
 530          */
 531         VS_UNLOCK(vs);
 532
 533         /* Lock out paging segment removal for the duration of this */
 534         /* call.  We are vulnerable to losing a paging segment we rely */
 535         /* on as soon as we remove ourselves from the VSL and unlock */
 536
 537         /* Keep our thread from blocking on attempt to trigger backing */
 538         /* store release */
 539         backing_store_release_trigger_disable += 1;
 540
 541         /*
 542          * Remove the memory object port association, and then
 543          * the destroy the port itself.  We must remove the object
 544          * from the port list before deallocating the pager,
 545          * because of default_pager_objects.
 546          */
 547         vstruct_list_delete(vs);
 548         VSL_UNLOCK();
 549
 550         ps_vstruct_dealloc(vs);
 551
 552         VSL_LOCK();
 553         backing_store_release_trigger_disable -= 1;
 554         if(backing_store_release_trigger_disable == 0) {
 555                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 556         }
 557         VSL_UNLOCK();
 558 }
 559
 560 kern_return_t
 561 dp_memory_object_data_request(
 562         memory_object_t         mem_obj,
 563         memory_object_offset_t  offset,
 564         vm_size_t               length,
 565         __unused vm_prot_t              protection_required)
 566 {
 567         vstruct_t               vs;
 568
 569         GSTAT(global_stats.gs_pagein_calls++);
 570
 571
 572         /* CDY at this moment vs_lookup panics when presented with the wrong */
 573         /* port.  As we are expanding this pager to support user interfaces */
 574         /* this should be changed to return kern_failure */
 575         vs_lookup(mem_obj, vs);
 576         vs_lock(vs);
 577
 578         /* We are going to relax the strict sequencing here for performance */
 579         /* reasons.  We can do this because we know that the read and */
 580         /* write threads are different and we rely on synchronization */
 581         /* of read and write requests at the cache memory_object level */
 582         /* break out wait_for_writers, all of this goes away when */
 583         /* we get real control of seqno with the new component interface */
 584
 585         if (vs->vs_writers != 0) {
 586                 /* you can't hold on to the seqno and go */
 587                 /* to sleep like that */
 588                 vs_unlock(vs);  /* bump internal count of seqno */
 589                 VS_LOCK(vs);
 590                 while (vs->vs_writers != 0) {
 591                         default_pager_wait_write++;
 592                         vs->vs_waiting_write = TRUE;
 593                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 594                         VS_UNLOCK(vs);
 595                         thread_block(THREAD_CONTINUE_NULL);
 596                         VS_LOCK(vs);
 597                         vs_async_wait(vs);
 598                 }
 599                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 600                         VS_UNLOCK(vs);
 601                         return KERN_FAILURE;
 602                 }
 603                 vs_start_read(vs);
 604                 VS_UNLOCK(vs);
 605         } else {
 606                 vs_start_read(vs);
 607                 vs_unlock(vs);
 608         }
 609
 610         /*
 611          * Request must be on a page boundary and a multiple of pages.
 612          */
 613         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 614                 Panic("bad alignment");
 615
 616         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 617
 618         vs_finish_read(vs);
 619
 620         return KERN_SUCCESS;
 621 }
 622
 623 /*
 624  * memory_object_data_initialize: check whether we already have each page, and
 625  * write it if we do not.  The implementation is far from optimized, and
 626  * also assumes that the default_pager is single-threaded.
 627  */
 628 /*  It is questionable whether or not a pager should decide what is relevant */
 629 /* and what is not in data sent from the kernel.  Data initialize has been */
 630 /* changed to copy back all data sent to it in preparation for its eventual */
 631 /* merge with data return.  It is the kernel that should decide what pages */
 632 /* to write back.  As of the writing of this note, this is indeed the case */
 633 /* the kernel writes back one page at a time through this interface */
 634
 635 kern_return_t
 636 dp_memory_object_data_initialize(
 637         memory_object_t         mem_obj,
 638         memory_object_offset_t  offset,
 639         vm_size_t               size)
 640 {
 641         vstruct_t       vs;
 642
 643         DP_DEBUG(DEBUG_MO_EXTERNAL,
 644                  ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 645                   (int)mem_obj, (int)offset, (int)size));
 646         GSTAT(global_stats.gs_pages_init += atop_32(size));
 647
 648         vs_lookup(mem_obj, vs);
 649         vs_lock(vs);
 650         vs_start_write(vs);
 651         vs_unlock(vs);
 652
 653         /*
 654          * Write the data via clustered writes. vs_cluster_write will
 655          * loop if the address range specified crosses cluster
 656          * boundaries.
 657          */
 658         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 659
 660         vs_finish_write(vs);
 661
 662         return KERN_SUCCESS;
 663 }
 664
 665 kern_return_t
 666 dp_memory_object_data_unlock(
 667         __unused memory_object_t                mem_obj,
 668         __unused memory_object_offset_t offset,
 669         __unused vm_size_t              size,
 670         __unused vm_prot_t              desired_access)
 671 {
 672         Panic("dp_memory_object_data_unlock: illegal");
 673         return KERN_FAILURE;
 674 }
 675
 676
 677 /*ARGSUSED8*/
 678 kern_return_t
 679 dp_memory_object_data_return(
 680         memory_object_t         mem_obj,
 681         memory_object_offset_t  offset,
 682         vm_size_t                       size,
 683         __unused memory_object_offset_t *resid_offset,
 684         __unused int            *io_error,
 685         __unused boolean_t      dirty,
 686         __unused boolean_t      kernel_copy,
 687         __unused int    upl_flags)
 688 {
 689         vstruct_t       vs;
 690
 691         DP_DEBUG(DEBUG_MO_EXTERNAL,
 692                  ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 693                   (int)mem_obj, (int)offset, (int)size));
 694         GSTAT(global_stats.gs_pageout_calls++);
 695
 696         /* This routine is called by the pageout thread.  The pageout thread */
 697         /* cannot be blocked by read activities unless the read activities   */
 698         /* Therefore the grant of vs lock must be done on a try versus a      */
 699         /* blocking basis.  The code below relies on the fact that the       */
 700         /* interface is synchronous.  Should this interface be again async   */
 701         /* for some type  of pager in the future the pages will have to be   */
 702         /* returned through a separate, asynchronous path.                   */
 703
 704         vs_lookup(mem_obj, vs);
 705
 706         default_pager_total++;
 707         if(!VS_TRY_LOCK(vs)) {
 708                 /* the call below will not be done by caller when we have */
 709                 /* a synchronous interface */
 710                 /* return KERN_LOCK_OWNED; */
 711                 upl_t           upl;
 712                 int             page_list_count = 0;
 713                 memory_object_super_upl_request(vs->vs_control,
 714                                         (memory_object_offset_t)offset,
 715                                         size, size,
 716                                         &upl, NULL, &page_list_count,
 717                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 718                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 719                 upl_abort(upl,0);
 720                 upl_deallocate(upl);
 721                 return KERN_SUCCESS;
 722         }
 723
 724         if ((vs->vs_seqno != vs->vs_next_seqno++)
 725                         || (vs->vs_readers)
 726                         || (vs->vs_xfer_pending)) {
 727                 upl_t   upl;
 728                 int     page_list_count = 0;
 729
 730                 vs->vs_next_seqno--;
 731                 VS_UNLOCK(vs);
 732
 733                 /* the call below will not be done by caller when we have */
 734                 /* a synchronous interface */
 735                 /* return KERN_LOCK_OWNED; */
 736                 memory_object_super_upl_request(vs->vs_control,
 737                                 (memory_object_offset_t)offset,
 738                                 size, size,
 739                                 &upl, NULL, &page_list_count,
 740                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 741                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 742                 upl_abort(upl,0);
 743                 upl_deallocate(upl);
 744                 return KERN_SUCCESS;
 745         }
 746
 747         if ((size % vm_page_size) != 0)
 748                 Panic("bad alignment");
 749
 750         vs_start_write(vs);
 751
 752
 753         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 754         vs_unlock(vs);
 755
 756         /*
 757          * Write the data via clustered writes. vs_cluster_write will
 758          * loop if the address range specified crosses cluster
 759          * boundaries.
 760          */
 761         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 762
 763         vs_finish_write(vs);
 764
 765         /* temporary, need a finer lock based on cluster */
 766
 767         VS_LOCK(vs);
 768         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 769         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 770                 vs->vs_waiting_async = FALSE;
 771                 VS_UNLOCK(vs);
 772                 thread_wakeup(&vs->vs_async_pending);
 773         } else {
 774                 VS_UNLOCK(vs);
 775         }
 776
 777
 778         return KERN_SUCCESS;
 779 }
 780
 781 /*
 782  * Routine:     default_pager_memory_object_create
 783  * Purpose:
 784  *      Handle requests for memory objects from the
 785  *      kernel.
 786  * Notes:
 787  *      Because we only give out the default memory
 788  *      manager port to the kernel, we don't have to
 789  *      be so paranoid about the contents.
 790  */
 791 kern_return_t
 792 default_pager_memory_object_create(
 793         __unused memory_object_default_t        dmm,
 794         vm_size_t               new_size,
 795         memory_object_t         *new_mem_obj)
 796 {
 797         vstruct_t               vs;
 798
 799         assert(dmm == default_pager_object);
 800
 801         vs = vs_object_create(new_size);
 802         if (vs == VSTRUCT_NULL)
 803                 return KERN_RESOURCE_SHORTAGE;
 804
 805         vs->vs_next_seqno = 0;
 806
 807         /*
 808          * Set up associations between this memory object
 809          * and this default_pager structure
 810          */
 811
 812         vs->vs_mem_obj = ISVS;
 813         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 814
 815         /*
 816          * After this, other threads might receive requests
 817          * for this memory object or find it in the port list.
 818          */
 819
 820         vstruct_list_insert(vs);
 821         *new_mem_obj = vs_to_mem_obj(vs);
 822         return KERN_SUCCESS;
 823 }
 824
 825 /*
 826  * Create an external object.
 827  */
 828 kern_return_t
 829 default_pager_object_create(
 830         default_pager_t default_pager,
 831         vm_size_t       size,
 832         memory_object_t *mem_objp)
 833 {
 834         vstruct_t       vs;
 835
 836         if (default_pager != default_pager_object)
 837                 return KERN_INVALID_ARGUMENT;
 838
 839         vs = vs_object_create(size);
 840         if (vs == VSTRUCT_NULL)
 841                 return KERN_RESOURCE_SHORTAGE;
 842
 843         /*
 844          * Set up associations between the default pager
 845          * and this vstruct structure
 846          */
 847         vs->vs_mem_obj = ISVS;
 848         vstruct_list_insert(vs);
 849         *mem_objp = vs_to_mem_obj(vs);
 850         return KERN_SUCCESS;
 851 }
 852
 853 kern_return_t
 854 default_pager_objects(
 855         default_pager_t                 default_pager,
 856         default_pager_object_array_t    *objectsp,
 857         mach_msg_type_number_t          *ocountp,
 858         mach_port_array_t               *portsp,
 859         mach_msg_type_number_t          *pcountp)
 860 {
 861         vm_offset_t             oaddr = 0;      /* memory for objects */
 862         vm_size_t               osize = 0;      /* current size */
 863         default_pager_object_t  * objects;
 864         unsigned int            opotential = 0;
 865
 866         vm_map_copy_t           pcopy = 0;      /* copy handle for pagers */
 867         vm_size_t               psize = 0;      /* current size */
 868         memory_object_t         * pagers;
 869         unsigned int            ppotential = 0;
 870
 871         unsigned int            actual;
 872         unsigned int            num_objects;
 873         kern_return_t           kr;
 874         vstruct_t               entry;
 875
 876         if (default_pager != default_pager_object)
 877                 return KERN_INVALID_ARGUMENT;
 878
 879         /*
 880          * We will send no more than this many
 881          */
 882         actual = vstruct_list.vsl_count;
 883
 884         /*
 885          * Out out-of-line port arrays are simply kalloc'ed.
 886          */
 887         psize = round_page(actual * sizeof * pagers);
 888         ppotential = psize / sizeof * pagers;
 889         pagers = (memory_object_t *)kalloc(psize);
 890         if (0 == pagers)
 891                 return KERN_RESOURCE_SHORTAGE;
 892
 893         /*
 894          * returned out of line data must be allocated out
 895          * the ipc_kernel_map, wired down, filled in, and
 896          * then "copied in" as if it had been sent by a
 897          * user process.
 898          */
 899         osize = round_page(actual * sizeof * objects);
 900         opotential = osize / sizeof * objects;
 901         kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
 902         if (KERN_SUCCESS != kr) {
 903                 kfree(pagers, psize);
 904                 return KERN_RESOURCE_SHORTAGE;
 905         }
 906         objects = (default_pager_object_t *)oaddr;
 907
 908
 909         /*
 910          * Now scan the list.
 911          */
 912
 913         VSL_LOCK();
 914
 915         num_objects = 0;
 916         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 917
 918                 memory_object_t                 pager;
 919                 vm_size_t                       size;
 920
 921                 if ((num_objects >= opotential) ||
 922                     (num_objects >= ppotential)) {
 923
 924                         /*
 925                          * This should be rare.  In any case,
 926                          * we will only miss recent objects,
 927                          * because they are added at the end.
 928                          */
 929                         break;
 930                 }
 931
 932                 /*
 933                  * Avoid interfering with normal operations
 934                  */
 935                 if (!VS_MAP_TRY_LOCK(entry))
 936                         goto not_this_one;
 937                 size = ps_vstruct_allocated_size(entry);
 938                 VS_MAP_UNLOCK(entry);
 939
 940                 VS_LOCK(entry);
 941
 942                 /*
 943                  * We need a reference for our caller.  Adding this
 944                  * reference through the linked list could race with
 945                  * destruction of the object.  If we find the object
 946                  * has no references, just give up on it.
 947                  */
 948                 VS_LOCK(entry);
 949                 if (entry->vs_references == 0) {
 950                         VS_UNLOCK(entry);
 951                         goto not_this_one;
 952                 }
 953                 pager = vs_to_mem_obj(entry);
 954                 dp_memory_object_reference(pager);
 955                 VS_UNLOCK(entry);
 956
 957                 /* the arrays are wired, so no deadlock worries */
 958
 959                 objects[num_objects].dpo_object = (vm_offset_t) entry;
 960                 objects[num_objects].dpo_size = size;
 961                 pagers [num_objects++] = pager;
 962                 continue;
 963
 964             not_this_one:
 965                 /*
 966                  * Do not return garbage
 967                  */
 968                 objects[num_objects].dpo_object = (vm_offset_t) 0;
 969                 objects[num_objects].dpo_size = 0;
 970                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
 971
 972         }
 973
 974         VSL_UNLOCK();
 975
 976         /* clear out any excess allocation */
 977         while (num_objects < opotential) {
 978                 objects[--opotential].dpo_object = (vm_offset_t) 0;
 979                 objects[opotential].dpo_size = 0;
 980         }
 981         while (num_objects < ppotential) {
 982                 pagers[--ppotential] = MEMORY_OBJECT_NULL;
 983         }
 984
 985         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
 986                            vm_map_round_page(oaddr + osize), FALSE);
 987         assert(KERN_SUCCESS == kr);
 988         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
 989                            (vm_map_size_t)osize, TRUE, &pcopy);
 990         assert(KERN_SUCCESS == kr);
 991
 992         *objectsp = (default_pager_object_array_t)objects;
 993         *ocountp = num_objects;
 994         *portsp = (mach_port_array_t)pcopy;
 995         *pcountp = num_objects;
 996
 997         return KERN_SUCCESS;
 998 }
 999
1000 kern_return_t
1001 default_pager_object_pages(
1002         default_pager_t         default_pager,
1003         mach_port_t                     memory_object,
1004         default_pager_page_array_t      *pagesp,
1005         mach_msg_type_number_t          *countp)
1006 {
1007         vm_offset_t                     addr = 0; /* memory for page offsets */
1008         vm_size_t                       size = 0; /* current memory size */
1009         vm_map_copy_t                   copy;
1010         default_pager_page_t            * pages = 0;
1011         unsigned int                    potential;
1012         unsigned int                    actual;
1013         kern_return_t                   kr;
1014         memory_object_t                 object;
1015
1016         if (default_pager != default_pager_object)
1017                 return KERN_INVALID_ARGUMENT;
1018
1019         object = (memory_object_t) memory_object;
1020
1021         potential = 0;
1022         for (;;) {
1023                 vstruct_t       entry;
1024
1025                 VSL_LOCK();
1026                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1027                               vs_links) {
1028                         VS_LOCK(entry);
1029                         if (vs_to_mem_obj(entry) == object) {
1030                                 VSL_UNLOCK();
1031                                 goto found_object;
1032                         }
1033                         VS_UNLOCK(entry);
1034                 }
1035                 VSL_UNLOCK();
1036
1037                 /* did not find the object */
1038                 if (0 != addr)
1039                         kmem_free(ipc_kernel_map, addr, size);
1040
1041                 return KERN_INVALID_ARGUMENT;
1042
1043             found_object:
1044
1045                 if (!VS_MAP_TRY_LOCK(entry)) {
1046                         /* oh well bad luck */
1047                         int wresult;
1048
1049                         VS_UNLOCK(entry);
1050
1051                         assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
1052                         wresult = thread_block(THREAD_CONTINUE_NULL);
1053                         assert(wresult == THREAD_TIMED_OUT);
1054                         continue;
1055                 }
1056
1057                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1058                 VS_MAP_UNLOCK(entry);
1059                 VS_UNLOCK(entry);
1060
1061                 if (actual <= potential)
1062                         break;
1063
1064                 /* allocate more memory */
1065                 if (0 != addr)
1066                         kmem_free(ipc_kernel_map, addr, size);
1067
1068                 size = round_page(actual * sizeof * pages);
1069                 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1070                 if (KERN_SUCCESS != kr)
1071                         return KERN_RESOURCE_SHORTAGE;
1072
1073                 pages = (default_pager_page_t *)addr;
1074                 potential = size / sizeof * pages;
1075         }
1076
1077         /*
1078          * Clear unused memory.
1079          */
1080         while (actual < potential)
1081                 pages[--potential].dpp_offset = 0;
1082
1083         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1084                            vm_map_round_page(addr + size), FALSE);
1085         assert(KERN_SUCCESS == kr);
1086         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1087                            (vm_map_size_t)size, TRUE, &copy);
1088         assert(KERN_SUCCESS == kr);
1089
1090
1091         *pagesp = (default_pager_page_array_t)copy;
1092         *countp = actual;
1093         return KERN_SUCCESS;
1094 }