osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50
  51 /*
  52  *      Default Pager.
  53  *              Memory Object Management.
  54  */
  55
  56 #include "default_pager_internal.h"
  57 #include <mach/memory_object_types.h>
  58 #include <mach/memory_object_server.h>
  59 #include <vm/memory_object.h>
  60 #include <vm/vm_pageout.h>
  61
  62
  63 /*
  64  * List of all vstructs.  A specific vstruct is
  65  * found directly via its port, this list is
  66  * only used for monitoring purposes by the
  67  * default_pager_object* calls and by ps_delete
  68  * when abstract memory objects must be scanned
  69  * to remove any live storage on a segment which
  70  * is to be removed.
  71  */
  72 struct vstruct_list_head        vstruct_list;
  73
  74 __private_extern__ void
  75 vstruct_list_insert(
  76         vstruct_t vs)
  77 {
  78         VSL_LOCK();
  79         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  80         vstruct_list.vsl_count++;
  81         VSL_UNLOCK();
  82 }
  83
  84
  85 __private_extern__ void
  86 vstruct_list_delete(
  87         vstruct_t vs)
  88 {
  89         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  90         vstruct_list.vsl_count--;
  91 }
  92
  93 /*
  94  * We use the sequence numbers on requests to regulate
  95  * our parallelism.  In general, we allow multiple reads and writes
  96  * to proceed in parallel, with the exception that reads must
  97  * wait for previous writes to finish.  (Because the kernel might
  98  * generate a data-request for a page on the heels of a data-write
  99  * for the same page, and we must avoid returning stale data.)
 100  * terminate requests wait for proceeding reads and writes to finish.
 101  */
 102
 103 static unsigned int     default_pager_total = 0;                /* debugging */
 104 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 105 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 106 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 107 static unsigned int     default_pager_wait_refs = 0;            /* debugging */
 108
 109 __private_extern__ void
 110 vs_async_wait(
 111         vstruct_t       vs)
 112 {
 113
 114         ASSERT(vs->vs_async_pending >= 0);
 115         while (vs->vs_async_pending > 0) {
 116                 vs->vs_waiting_async = TRUE;
 117                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 118                 VS_UNLOCK(vs);
 119                 thread_block(THREAD_CONTINUE_NULL);
 120                 VS_LOCK(vs);
 121         }
 122         ASSERT(vs->vs_async_pending == 0);
 123 }
 124
 125
 126 #if     PARALLEL
 127 /*
 128  * Waits for correct sequence number.  Leaves pager locked.
 129  *
 130  * JMM - Sequence numbers guarantee ordering of requests generated
 131  *       by a single thread if the receiver is multithreaded and
 132  *       the interfaces are asynchronous (i.e. sender can generate
 133  *       more than one request before the first is received in the
 134  *       pager).  Normally, IPC would generate these number in that
 135  *       case.  But we are trying to avoid using IPC for the in-kernel
 136  *       scenario. Since these are actually invoked synchronously
 137  *       anyway (in-kernel), we can just fake the sequence number
 138  *       generation here (thus avoiding the dependence on IPC).
 139  */
 140 __private_extern__ void
 141 vs_lock(
 142         vstruct_t               vs)
 143 {
 144         mach_port_seqno_t       seqno;
 145
 146         default_pager_total++;
 147         VS_LOCK(vs);
 148
 149         seqno = vs->vs_next_seqno++;
 150
 151         while (vs->vs_seqno != seqno) {
 152                 default_pager_wait_seqno++;
 153                 vs->vs_waiting_seqno = TRUE;
 154                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 155                 VS_UNLOCK(vs);
 156                 thread_block(THREAD_CONTINUE_NULL);
 157                 VS_LOCK(vs);
 158         }
 159 }
 160
 161 /*
 162  * Increments sequence number and unlocks pager.
 163  */
 164 __private_extern__ void
 165 vs_unlock(vstruct_t vs)
 166 {
 167         vs->vs_seqno++;
 168         if (vs->vs_waiting_seqno) {
 169                 vs->vs_waiting_seqno = FALSE;
 170                 VS_UNLOCK(vs);
 171                 thread_wakeup(&vs->vs_seqno);
 172                 return;
 173         }
 174         VS_UNLOCK(vs);
 175 }
 176
 177 /*
 178  * Start a read - one more reader.  Pager must be locked.
 179  */
 180 __private_extern__ void
 181 vs_start_read(
 182         vstruct_t vs)
 183 {
 184         vs->vs_readers++;
 185 }
 186
 187 /*
 188  * Wait for readers.  Unlocks and relocks pager if wait needed.
 189  */
 190 __private_extern__ void
 191 vs_wait_for_readers(
 192         vstruct_t vs)
 193 {
 194         while (vs->vs_readers != 0) {
 195                 default_pager_wait_read++;
 196                 vs->vs_waiting_read = TRUE;
 197                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 198                 VS_UNLOCK(vs);
 199                 thread_block(THREAD_CONTINUE_NULL);
 200                 VS_LOCK(vs);
 201         }
 202 }
 203
 204 /*
 205  * Finish a read.  Pager is unlocked and returns unlocked.
 206  */
 207 __private_extern__ void
 208 vs_finish_read(
 209         vstruct_t vs)
 210 {
 211         VS_LOCK(vs);
 212         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 213                 vs->vs_waiting_read = FALSE;
 214                 VS_UNLOCK(vs);
 215                 thread_wakeup(&vs->vs_readers);
 216                 return;
 217         }
 218         VS_UNLOCK(vs);
 219 }
 220
 221 /*
 222  * Start a write - one more writer.  Pager must be locked.
 223  */
 224 __private_extern__ void
 225 vs_start_write(
 226         vstruct_t vs)
 227 {
 228         vs->vs_writers++;
 229 }
 230
 231 /*
 232  * Wait for writers.  Unlocks and relocks pager if wait needed.
 233  */
 234 __private_extern__ void
 235 vs_wait_for_writers(
 236         vstruct_t vs)
 237 {
 238         while (vs->vs_writers != 0) {
 239                 default_pager_wait_write++;
 240                 vs->vs_waiting_write = TRUE;
 241                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 242                 VS_UNLOCK(vs);
 243                 thread_block(THREAD_CONTINUE_NULL);
 244                 VS_LOCK(vs);
 245         }
 246         vs_async_wait(vs);
 247 }
 248
 249 /* This is to be used for the transfer from segment code ONLY */
 250 /* The transfer code holds off vs destruction by keeping the  */
 251 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 252 /* other writers on an async basis because it only writes on  */
 253 /* a cluster basis into fresh (as of sync time) cluster locations */
 254
 255 __private_extern__ void
 256 vs_wait_for_sync_writers(
 257         vstruct_t vs)
 258 {
 259         while (vs->vs_writers != 0) {
 260                 default_pager_wait_write++;
 261                 vs->vs_waiting_write = TRUE;
 262                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 263                 VS_UNLOCK(vs);
 264                 thread_block(THREAD_CONTINUE_NULL);
 265                 VS_LOCK(vs);
 266         }
 267 }
 268
 269
 270 /*
 271  * Finish a write.  Pager is unlocked and returns unlocked.
 272  */
 273 __private_extern__ void
 274 vs_finish_write(
 275         vstruct_t vs)
 276 {
 277         VS_LOCK(vs);
 278         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 279                 vs->vs_waiting_write = FALSE;
 280                 VS_UNLOCK(vs);
 281                 thread_wakeup(&vs->vs_writers);
 282                 return;
 283         }
 284         VS_UNLOCK(vs);
 285 }
 286 #endif  /* PARALLEL */
 287
 288 vstruct_t
 289 vs_object_create(
 290         vm_size_t size)
 291 {
 292         vstruct_t       vs;
 293
 294         /*
 295          * Allocate a vstruct. If there are any problems, then report them
 296          * to the console.
 297          */
 298         vs = ps_vstruct_create(size);
 299         if (vs == VSTRUCT_NULL) {
 300                 dprintf(("vs_object_create: unable to allocate %s\n",
 301                          "-- either run swapon command or reboot"));
 302                 return VSTRUCT_NULL;
 303         }
 304
 305         return vs;
 306 }
 307
 308 #if 0
 309 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 310
 311 void
 312 default_pager_add(
 313         vstruct_t vs,
 314         boolean_t internal)
 315 {
 316         memory_object_t         mem_obj = vs->vs_mem_obj;
 317         mach_port_t             pset;
 318         mach_port_mscount_t     sync;
 319         mach_port_t             previous;
 320         kern_return_t           kr;
 321         static char             here[] = "default_pager_add";
 322
 323         /*
 324          * The port currently has a make-send count of zero,
 325          * because either we just created the port or we just
 326          * received the port in a memory_object_create request.
 327          */
 328
 329         if (internal) {
 330                 /* possibly generate an immediate no-senders notification */
 331                 sync = 0;
 332                 pset = default_pager_internal_set;
 333         } else {
 334                 /* delay notification till send right is created */
 335                 sync = 1;
 336                 pset = default_pager_external_set;
 337         }
 338
 339         ipc_port_make_sonce(mem_obj);
 340         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 341         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 342 }
 343
 344 #endif
 345
 346 kern_return_t
 347 dp_memory_object_init(
 348         memory_object_t         mem_obj,
 349         memory_object_control_t control,
 350         vm_size_t               pager_page_size)
 351 {
 352         vstruct_t               vs;
 353
 354         assert(pager_page_size == vm_page_size);
 355
 356         memory_object_control_reference(control);
 357
 358         vs_lookup(mem_obj, vs);
 359         vs_lock(vs);
 360
 361         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 362                 Panic("bad request");
 363
 364         vs->vs_control = control;
 365         vs_unlock(vs);
 366
 367         return KERN_SUCCESS;
 368 }
 369
 370 kern_return_t
 371 dp_memory_object_synchronize(
 372         memory_object_t         mem_obj,
 373         memory_object_offset_t  offset,
 374         vm_size_t               length,
 375         vm_sync_t               flags)
 376 {
 377         vstruct_t       vs;
 378
 379         vs_lookup(mem_obj, vs);
 380         vs_lock(vs);
 381         vs_unlock(vs);
 382
 383         memory_object_synchronize_completed(vs->vs_control, offset, length);
 384
 385         return KERN_SUCCESS;
 386 }
 387
 388 kern_return_t
 389 dp_memory_object_unmap(
 390         memory_object_t         mem_obj)
 391 {
 392         panic("dp_memory_object_unmap");
 393
 394         return KERN_FAILURE;
 395 }
 396
 397 kern_return_t
 398 dp_memory_object_terminate(
 399         memory_object_t         mem_obj)
 400 {
 401         memory_object_control_t control;
 402         vstruct_t               vs;
 403         kern_return_t           kr;
 404
 405         /*
 406          * control port is a receive right, not a send right.
 407          */
 408
 409         vs_lookup(mem_obj, vs);
 410         vs_lock(vs);
 411
 412         /*
 413          * Wait for read and write requests to terminate.
 414          */
 415
 416         vs_wait_for_readers(vs);
 417         vs_wait_for_writers(vs);
 418
 419         /*
 420          * After memory_object_terminate both memory_object_init
 421          * and a no-senders notification are possible, so we need
 422          * to clean up our reference to the memory_object_control
 423          * to prepare for a new init.
 424          */
 425
 426         control = vs->vs_control;
 427         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 428
 429         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 430         /* these data requests had to be removed from the seqno traffic   */
 431         /* based on a performance bottleneck with large memory objects    */
 432         /* the problem will right itself with the new component based     */
 433         /* synchronous interface.  The new async will be able to return   */
 434         /* failure during its sync phase.   In the mean time ... */
 435
 436         thread_wakeup(&vs->vs_writers);
 437         thread_wakeup(&vs->vs_async_pending);
 438
 439         vs_unlock(vs);
 440
 441         /*
 442          * Now we deallocate our reference on the control.
 443          */
 444         memory_object_control_deallocate(control);
 445         return KERN_SUCCESS;
 446 }
 447
 448 void
 449 dp_memory_object_reference(
 450         memory_object_t         mem_obj)
 451 {
 452         vstruct_t               vs;
 453
 454         vs_lookup_safe(mem_obj, vs);
 455         if (vs == VSTRUCT_NULL)
 456                 return;
 457
 458         VS_LOCK(vs);
 459         assert(vs->vs_references > 0);
 460         vs->vs_references++;
 461         VS_UNLOCK(vs);
 462 }
 463
 464 extern ipc_port_t       max_pages_trigger_port;
 465 extern int              dp_pages_free;
 466 extern int              maximum_pages_free;
 467 void
 468 dp_memory_object_deallocate(
 469         memory_object_t         mem_obj)
 470 {
 471         vstruct_t               vs;
 472         mach_port_seqno_t       seqno;
 473
 474         /*
 475          * Because we don't give out multiple first references
 476          * for a memory object, there can't be a race
 477          * between getting a deallocate call and creating
 478          * a new reference for the object.
 479          */
 480
 481         vs_lookup_safe(mem_obj, vs);
 482         if (vs == VSTRUCT_NULL)
 483                 return;
 484
 485         VS_LOCK(vs);
 486         if (--vs->vs_references > 0) {
 487                 VS_UNLOCK(vs);
 488                 return;
 489         }
 490
 491         seqno = vs->vs_next_seqno++;
 492         while (vs->vs_seqno != seqno) {
 493                 default_pager_wait_seqno++;
 494                 vs->vs_waiting_seqno = TRUE;
 495                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 496                 VS_UNLOCK(vs);
 497                 thread_block(THREAD_CONTINUE_NULL);
 498                 VS_LOCK(vs);
 499         }
 500
 501         vs_async_wait(vs);      /* wait for pending async IO */
 502
 503         /* do not delete the vs structure until the referencing pointers */
 504         /* in the vstruct list have been expunged */
 505
 506         /* get VSL_LOCK out of order by using TRY mechanism */
 507         while(!VSL_LOCK_TRY()) {
 508                 VS_UNLOCK(vs);
 509                 VSL_LOCK();
 510                 VSL_UNLOCK();
 511                 VS_LOCK(vs);
 512                 vs_async_wait(vs);      /* wait for pending async IO */
 513         }
 514
 515
 516         /*
 517          * We shouldn't get a deallocation call
 518          * when the kernel has the object cached.
 519          */
 520         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 521                 Panic("bad request");
 522
 523         /*
 524          * Unlock the pager (though there should be no one
 525          * waiting for it).
 526          */
 527         VS_UNLOCK(vs);
 528
 529         /* Lock out paging segment removal for the duration of this */
 530         /* call.  We are vulnerable to losing a paging segment we rely */
 531         /* on as soon as we remove ourselves from the VSL and unlock */
 532
 533         /* Keep our thread from blocking on attempt to trigger backing */
 534         /* store release */
 535         backing_store_release_trigger_disable += 1;
 536
 537         /*
 538          * Remove the memory object port association, and then
 539          * the destroy the port itself.  We must remove the object
 540          * from the port list before deallocating the pager,
 541          * because of default_pager_objects.
 542          */
 543         vstruct_list_delete(vs);
 544         VSL_UNLOCK();
 545
 546         ps_vstruct_dealloc(vs);
 547
 548         VSL_LOCK();
 549         backing_store_release_trigger_disable -= 1;
 550         if(backing_store_release_trigger_disable == 0) {
 551                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 552         }
 553         VSL_UNLOCK();
 554 }
 555
 556 kern_return_t
 557 dp_memory_object_data_request(
 558         memory_object_t         mem_obj,
 559         memory_object_offset_t  offset,
 560         vm_size_t               length,
 561         vm_prot_t               protection_required)
 562 {
 563         vstruct_t               vs;
 564
 565         GSTAT(global_stats.gs_pagein_calls++);
 566
 567
 568         /* CDY at this moment vs_lookup panics when presented with the wrong */
 569         /* port.  As we are expanding this pager to support user interfaces */
 570         /* this should be changed to return kern_failure */
 571         vs_lookup(mem_obj, vs);
 572         vs_lock(vs);
 573
 574         /* We are going to relax the strict sequencing here for performance */
 575         /* reasons.  We can do this because we know that the read and */
 576         /* write threads are different and we rely on synchronization */
 577         /* of read and write requests at the cache memory_object level */
 578         /* break out wait_for_writers, all of this goes away when */
 579         /* we get real control of seqno with the new component interface */
 580
 581         if (vs->vs_writers != 0) {
 582                 /* you can't hold on to the seqno and go */
 583                 /* to sleep like that */
 584                 vs_unlock(vs);  /* bump internal count of seqno */
 585                 VS_LOCK(vs);
 586                 while (vs->vs_writers != 0) {
 587                         default_pager_wait_write++;
 588                         vs->vs_waiting_write = TRUE;
 589                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 590                         VS_UNLOCK(vs);
 591                         thread_block(THREAD_CONTINUE_NULL);
 592                         VS_LOCK(vs);
 593                         vs_async_wait(vs);
 594                 }
 595                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 596                         VS_UNLOCK(vs);
 597                         return KERN_FAILURE;
 598                 }
 599                 vs_start_read(vs);
 600                 VS_UNLOCK(vs);
 601         } else {
 602                 vs_start_read(vs);
 603                 vs_unlock(vs);
 604         }
 605
 606         /*
 607          * Request must be on a page boundary and a multiple of pages.
 608          */
 609         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 610                 Panic("bad alignment");
 611
 612         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 613
 614         vs_finish_read(vs);
 615
 616         return KERN_SUCCESS;
 617 }
 618
 619 /*
 620  * memory_object_data_initialize: check whether we already have each page, and
 621  * write it if we do not.  The implementation is far from optimized, and
 622  * also assumes that the default_pager is single-threaded.
 623  */
 624 /*  It is questionable whether or not a pager should decide what is relevant */
 625 /* and what is not in data sent from the kernel.  Data initialize has been */
 626 /* changed to copy back all data sent to it in preparation for its eventual */
 627 /* merge with data return.  It is the kernel that should decide what pages */
 628 /* to write back.  As of the writing of this note, this is indeed the case */
 629 /* the kernel writes back one page at a time through this interface */
 630
 631 kern_return_t
 632 dp_memory_object_data_initialize(
 633         memory_object_t         mem_obj,
 634         memory_object_offset_t  offset,
 635         vm_size_t               size)
 636 {
 637         vstruct_t       vs;
 638
 639         DEBUG(DEBUG_MO_EXTERNAL,
 640               ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 641                (int)mem_obj, (int)offset, (int)size));
 642         GSTAT(global_stats.gs_pages_init += atop_32(size));
 643
 644         vs_lookup(mem_obj, vs);
 645         vs_lock(vs);
 646         vs_start_write(vs);
 647         vs_unlock(vs);
 648
 649         /*
 650          * Write the data via clustered writes. vs_cluster_write will
 651          * loop if the address range specified crosses cluster
 652          * boundaries.
 653          */
 654         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 655
 656         vs_finish_write(vs);
 657
 658         return KERN_SUCCESS;
 659 }
 660
 661 kern_return_t
 662 dp_memory_object_data_unlock(
 663         memory_object_t         mem_obj,
 664         memory_object_offset_t  offset,
 665         vm_size_t               size,
 666         vm_prot_t               desired_access)
 667 {
 668         Panic("dp_memory_object_data_unlock: illegal");
 669         return KERN_FAILURE;
 670 }
 671
 672
 673 kern_return_t
 674 dp_memory_object_data_return(
 675         memory_object_t         mem_obj,
 676         memory_object_offset_t  offset,
 677         vm_size_t               size,
 678         boolean_t               dirty,
 679         boolean_t               kernel_copy)
 680 {
 681         vstruct_t       vs;
 682
 683         DEBUG(DEBUG_MO_EXTERNAL,
 684               ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 685                (int)mem_obj, (int)offset, (int)size));
 686         GSTAT(global_stats.gs_pageout_calls++);
 687
 688         /* This routine is called by the pageout thread.  The pageout thread */
 689         /* cannot be blocked by read activities unless the read activities   */
 690         /* Therefore the grant of vs lock must be done on a try versus a      */
 691         /* blocking basis.  The code below relies on the fact that the       */
 692         /* interface is synchronous.  Should this interface be again async   */
 693         /* for some type  of pager in the future the pages will have to be   */
 694         /* returned through a separate, asynchronous path.                   */
 695
 696         vs_lookup(mem_obj, vs);
 697
 698         default_pager_total++;
 699         if(!VS_TRY_LOCK(vs)) {
 700                 /* the call below will not be done by caller when we have */
 701                 /* a synchronous interface */
 702                 /* return KERN_LOCK_OWNED; */
 703                 upl_t           upl;
 704                 int             page_list_count = 0;
 705                 memory_object_super_upl_request(vs->vs_control,
 706                                         (memory_object_offset_t)offset,
 707                                         size, size,
 708                                         &upl, NULL, &page_list_count,
 709                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 710                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 711                 upl_abort(upl,0);
 712                 upl_deallocate(upl);
 713                 return KERN_SUCCESS;
 714         }
 715
 716         if ((vs->vs_seqno != vs->vs_next_seqno++)
 717                         || (vs->vs_readers)
 718                         || (vs->vs_xfer_pending)) {
 719                 upl_t   upl;
 720                 int     page_list_count = 0;
 721
 722                 vs->vs_next_seqno--;
 723                 VS_UNLOCK(vs);
 724
 725                 /* the call below will not be done by caller when we have */
 726                 /* a synchronous interface */
 727                 /* return KERN_LOCK_OWNED; */
 728                 memory_object_super_upl_request(vs->vs_control,
 729                                 (memory_object_offset_t)offset,
 730                                 size, size,
 731                                 &upl, NULL, &page_list_count,
 732                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 733                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 734                 upl_abort(upl,0);
 735                 upl_deallocate(upl);
 736                 return KERN_SUCCESS;
 737         }
 738
 739         if ((size % vm_page_size) != 0)
 740                 Panic("bad alignment");
 741
 742         vs_start_write(vs);
 743
 744
 745         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 746         vs_unlock(vs);
 747
 748         /*
 749          * Write the data via clustered writes. vs_cluster_write will
 750          * loop if the address range specified crosses cluster
 751          * boundaries.
 752          */
 753         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 754
 755         vs_finish_write(vs);
 756
 757         /* temporary, need a finer lock based on cluster */
 758
 759         VS_LOCK(vs);
 760         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 761         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 762                 vs->vs_waiting_async = FALSE;
 763                 VS_UNLOCK(vs);
 764                 thread_wakeup(&vs->vs_async_pending);
 765         } else {
 766                 VS_UNLOCK(vs);
 767         }
 768
 769
 770         return KERN_SUCCESS;
 771 }
 772
 773 /*
 774  * Routine:     default_pager_memory_object_create
 775  * Purpose:
 776  *      Handle requests for memory objects from the
 777  *      kernel.
 778  * Notes:
 779  *      Because we only give out the default memory
 780  *      manager port to the kernel, we don't have to
 781  *      be so paranoid about the contents.
 782  */
 783 kern_return_t
 784 default_pager_memory_object_create(
 785         memory_object_default_t dmm,
 786         vm_size_t               new_size,
 787         memory_object_t         *new_mem_obj)
 788 {
 789         vstruct_t               vs;
 790
 791         assert(dmm == default_pager_object);
 792
 793         vs = vs_object_create(new_size);
 794         if (vs == VSTRUCT_NULL)
 795                 return KERN_RESOURCE_SHORTAGE;
 796
 797         vs->vs_next_seqno = 0;
 798
 799         /*
 800          * Set up associations between this memory object
 801          * and this default_pager structure
 802          */
 803
 804         vs->vs_mem_obj = ISVS;
 805         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 806
 807         /*
 808          * After this, other threads might receive requests
 809          * for this memory object or find it in the port list.
 810          */
 811
 812         vstruct_list_insert(vs);
 813         *new_mem_obj = vs_to_mem_obj(vs);
 814         return KERN_SUCCESS;
 815 }
 816
 817 /*
 818  * Create an external object.
 819  */
 820 kern_return_t
 821 default_pager_object_create(
 822         default_pager_t pager,
 823         vm_size_t       size,
 824         memory_object_t *mem_objp)
 825 {
 826         vstruct_t       vs;
 827         kern_return_t   result;
 828         struct vstruct_alias    *alias_struct;
 829
 830
 831         if (pager != default_pager_object)
 832                 return KERN_INVALID_ARGUMENT;
 833
 834         vs = vs_object_create(size);
 835         if (vs == VSTRUCT_NULL)
 836                 return KERN_RESOURCE_SHORTAGE;
 837
 838         /*
 839          * Set up associations between the default pager
 840          * and this vstruct structure
 841          */
 842         vs->vs_mem_obj = ISVS;
 843         vstruct_list_insert(vs);
 844         *mem_objp = vs_to_mem_obj(vs);
 845         return KERN_SUCCESS;
 846 }
 847
 848 kern_return_t
 849 default_pager_objects(
 850         default_pager_t                 pager,
 851         default_pager_object_array_t    *objectsp,
 852         mach_msg_type_number_t          *ocountp,
 853         memory_object_array_t           *pagersp,
 854         mach_msg_type_number_t          *pcountp)
 855 {
 856         vm_offset_t             oaddr = 0;      /* memory for objects */
 857         vm_size_t               osize = 0;      /* current size */
 858         default_pager_object_t  * objects;
 859         unsigned int            opotential;
 860
 861         vm_offset_t             paddr = 0;      /* memory for pagers */
 862         vm_size_t               psize = 0;      /* current size */
 863         memory_object_t         * pagers;
 864         unsigned int            ppotential;
 865
 866         unsigned int            actual;
 867         unsigned int            num_objects;
 868         kern_return_t           kr;
 869         vstruct_t               entry;
 870 /*
 871         if (pager != default_pager_default_port)
 872                 return KERN_INVALID_ARGUMENT;
 873 */
 874
 875         /* start with the inline memory */
 876
 877         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&objects,
 878                                                 (vm_map_copy_t) *objectsp);
 879
 880         if (kr != KERN_SUCCESS)
 881                 return kr;
 882
 883         osize = round_page_32(*ocountp * sizeof * objects);
 884         kr = vm_map_wire(ipc_kernel_map,
 885                         trunc_page_32((vm_offset_t)objects),
 886                         round_page_32(((vm_offset_t)objects) + osize),
 887                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
 888         osize=0;
 889
 890         *objectsp = objects;
 891         /* we start with the inline space */
 892
 893
 894         num_objects = 0;
 895         opotential = *ocountp;
 896
 897         pagers = (memory_object_t *) *pagersp;
 898         ppotential = *pcountp;
 899
 900         VSL_LOCK();
 901
 902         /*
 903          * We will send no more than this many
 904          */
 905         actual = vstruct_list.vsl_count;
 906         VSL_UNLOCK();
 907
 908         if (opotential < actual) {
 909                 vm_offset_t     newaddr;
 910                 vm_size_t       newsize;
 911
 912                 newsize = 2 * round_page_32(actual * sizeof * objects);
 913
 914                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 915                 if (kr != KERN_SUCCESS)
 916                         goto nomemory;
 917
 918                 oaddr = newaddr;
 919                 osize = newsize;
 920                 opotential = osize / sizeof * objects;
 921                 objects = (default_pager_object_t *)oaddr;
 922         }
 923
 924         if (ppotential < actual) {
 925                 vm_offset_t     newaddr;
 926                 vm_size_t       newsize;
 927
 928                 newsize = 2 * round_page_32(actual * sizeof * pagers);
 929
 930                 kr = vm_allocate(kernel_map, &newaddr, newsize, TRUE);
 931                 if (kr != KERN_SUCCESS)
 932                         goto nomemory;
 933
 934                 paddr = newaddr;
 935                 psize = newsize;
 936                 ppotential = psize / sizeof * pagers;
 937                 pagers = (memory_object_t *)paddr;
 938         }
 939
 940         /*
 941          * Now scan the list.
 942          */
 943
 944         VSL_LOCK();
 945
 946         num_objects = 0;
 947         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 948
 949                 memory_object_t         pager;
 950                 vm_size_t               size;
 951
 952                 if ((num_objects >= opotential) ||
 953                     (num_objects >= ppotential)) {
 954
 955                         /*
 956                          * This should be rare.  In any case,
 957                          * we will only miss recent objects,
 958                          * because they are added at the end.
 959                          */
 960                         break;
 961                 }
 962
 963                 /*
 964                  * Avoid interfering with normal operations
 965                  */
 966                 if (!VS_MAP_TRY_LOCK(entry))
 967                         goto not_this_one;
 968                 size = ps_vstruct_allocated_size(entry);
 969                 VS_MAP_UNLOCK(entry);
 970
 971                 VS_LOCK(entry);
 972
 973                 /*
 974                  * We need a reference for our caller.  Adding this
 975                  * reference through the linked list could race with
 976                  * destruction of the object.  If we find the object
 977                  * has no references, just give up on it.
 978                  */
 979                 VS_LOCK(entry);
 980                 if (entry->vs_references == 0) {
 981                         VS_UNLOCK(entry);
 982                         goto not_this_one;
 983                 }
 984                 dp_memory_object_reference(vs_to_mem_obj(entry));
 985                 VS_UNLOCK(entry);
 986
 987                 /* the arrays are wired, so no deadlock worries */
 988
 989                 objects[num_objects].dpo_object = (vm_offset_t) entry;
 990                 objects[num_objects].dpo_size = size;
 991                 pagers [num_objects++] = pager;
 992                 continue;
 993
 994             not_this_one:
 995                 /*
 996                  * Do not return garbage
 997                  */
 998                 objects[num_objects].dpo_object = (vm_offset_t) 0;
 999                 objects[num_objects].dpo_size = 0;
1000                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1001
1002         }
1003
1004         VSL_UNLOCK();
1005
1006         /*
1007          * Deallocate and clear unused memory.
1008          * (Returned memory will automagically become pageable.)
1009          */
1010
1011         if (objects == *objectsp) {
1012
1013                 /*
1014                  * Our returned information fit inline.
1015                  * Nothing to deallocate.
1016                  */
1017                 *ocountp = num_objects;
1018         } else if (actual == 0) {
1019                 (void) vm_deallocate(kernel_map, oaddr, osize);
1020
1021                 /* return zero items inline */
1022                 *ocountp = 0;
1023         } else {
1024                 vm_offset_t used;
1025
1026                 used = round_page_32(actual * sizeof * objects);
1027
1028                 if (used != osize)
1029                         (void) vm_deallocate(kernel_map,
1030                                              oaddr + used, osize - used);
1031
1032                 *objectsp = objects;
1033                 *ocountp = num_objects;
1034         }
1035
1036         if (pagers == (memory_object_t *)*pagersp) {
1037
1038                 /*
1039                  * Our returned information fit inline.
1040                  * Nothing to deallocate.
1041                  */
1042
1043                 *pcountp = num_objects;
1044         } else if (actual == 0) {
1045                 (void) vm_deallocate(kernel_map, paddr, psize);
1046
1047                 /* return zero items inline */
1048                 *pcountp = 0;
1049         } else {
1050                 vm_offset_t used;
1051
1052                 used = round_page_32(actual * sizeof * pagers);
1053
1054                 if (used != psize)
1055                         (void) vm_deallocate(kernel_map,
1056                                              paddr + used, psize - used);
1057
1058                 *pagersp = (memory_object_array_t)pagers;
1059                 *pcountp = num_objects;
1060         }
1061         (void) vm_map_unwire(kernel_map, (vm_offset_t)objects,
1062                         *ocountp + (vm_offset_t)objects, FALSE);
1063         (void) vm_map_copyin(kernel_map, (vm_offset_t)objects,
1064                         *ocountp, TRUE, (vm_map_copy_t *)objectsp);
1065
1066         return KERN_SUCCESS;
1067
1068     nomemory:
1069         {
1070                 register int    i;
1071                 for (i = 0; i < num_objects; i++)
1072                         if (pagers[i] != MEMORY_OBJECT_NULL)
1073                                 memory_object_deallocate(pagers[i]);
1074         }
1075
1076         if (objects != *objectsp)
1077                 (void) vm_deallocate(kernel_map, oaddr, osize);
1078
1079         if (pagers != (memory_object_t *)*pagersp)
1080                 (void) vm_deallocate(kernel_map, paddr, psize);
1081
1082         return KERN_RESOURCE_SHORTAGE;
1083 }
1084
1085 kern_return_t
1086 default_pager_object_pages(
1087         default_pager_t                 pager,
1088         memory_object_t                 object,
1089         default_pager_page_array_t      *pagesp,
1090         mach_msg_type_number_t          *countp)
1091 {
1092         vm_offset_t                     addr;   /* memory for page offsets */
1093         vm_size_t                       size = 0; /* current memory size */
1094         default_pager_page_t            * pages;
1095         unsigned int                    potential, actual;
1096         kern_return_t                   kr;
1097
1098
1099         if (pager != default_pager_object)
1100                 return KERN_INVALID_ARGUMENT;
1101
1102         kr = vm_map_copyout(ipc_kernel_map, (vm_offset_t *)&pages,
1103                                                 (vm_map_copy_t) *pagesp);
1104
1105         if (kr != KERN_SUCCESS)
1106                 return kr;
1107
1108         size = round_page_32(*countp * sizeof * pages);
1109         kr = vm_map_wire(ipc_kernel_map,
1110                         trunc_page_32((vm_offset_t)pages),
1111                         round_page_32(((vm_offset_t)pages) + size),
1112                         VM_PROT_READ|VM_PROT_WRITE, FALSE);
1113         size=0;
1114
1115         *pagesp = pages;
1116         /* we start with the inline space */
1117
1118         addr = (vm_offset_t)pages;
1119         potential = *countp;
1120
1121         for (;;) {
1122                 vstruct_t       entry;
1123
1124                 VSL_LOCK();
1125                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1126                               vs_links) {
1127                         VS_LOCK(entry);
1128                         if (vs_to_mem_obj(entry) == object) {
1129                                 VSL_UNLOCK();
1130                                 goto found_object;
1131                         }
1132                         VS_UNLOCK(entry);
1133                 }
1134                 VSL_UNLOCK();
1135
1136                 /* did not find the object */
1137
1138                 if (pages != *pagesp)
1139                         (void) vm_deallocate(kernel_map, addr, size);
1140                 return KERN_INVALID_ARGUMENT;
1141
1142             found_object:
1143
1144                 if (!VS_MAP_TRY_LOCK(entry)) {
1145                         /* oh well bad luck */
1146                         int wresult;
1147
1148                         VS_UNLOCK(entry);
1149
1150                         assert_wait_timeout( 1, THREAD_UNINT );
1151                         wresult = thread_block(THREAD_CONTINUE_NULL);
1152                         assert(wresult == THREAD_TIMED_OUT);
1153                         continue;
1154                 }
1155
1156                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1157                 VS_MAP_UNLOCK(entry);
1158                 VS_UNLOCK(entry);
1159
1160                 if (actual <= potential)
1161                         break;
1162
1163                 /* allocate more memory */
1164
1165                 if (pages != *pagesp)
1166                         (void) vm_deallocate(kernel_map, addr, size);
1167                 size = round_page_32(actual * sizeof * pages);
1168                 kr = vm_allocate(kernel_map, &addr, size, TRUE);
1169                 if (kr != KERN_SUCCESS)
1170                         return kr;
1171                 pages = (default_pager_page_t *)addr;
1172                 potential = size / sizeof * pages;
1173         }
1174
1175         /*
1176          * Deallocate and clear unused memory.
1177          * (Returned memory will automagically become pageable.)
1178          */
1179
1180         if (pages == *pagesp) {
1181
1182                 /*
1183                  * Our returned information fit inline.
1184                  * Nothing to deallocate.
1185                  */
1186
1187                 *countp = actual;
1188         } else if (actual == 0) {
1189                 (void) vm_deallocate(kernel_map, addr, size);
1190
1191                 /* return zero items inline */
1192                 *countp = 0;
1193         } else {
1194                 vm_offset_t used;
1195
1196                 used = round_page_32(actual * sizeof * pages);
1197
1198                 if (used != size)
1199                         (void) vm_deallocate(kernel_map,
1200                                              addr + used, size - used);
1201
1202                 *pagesp = pages;
1203                 *countp = actual;
1204         }
1205         (void) vm_map_unwire(kernel_map, (vm_offset_t)pages,
1206                         *countp + (vm_offset_t)pages, FALSE);
1207         (void) vm_map_copyin(kernel_map, (vm_offset_t)pages,
1208                         *countp, TRUE, (vm_map_copy_t *)pagesp);
1209         return KERN_SUCCESS;
1210 }