osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56
  57 /*
  58  *      Default Pager.
  59  *              Memory Object Management.
  60  */
  61
  62 #include "default_pager_internal.h"
  63 #include <default_pager/default_pager_object_server.h>
  64 #include <mach/memory_object_default_server.h>
  65 #include <mach/memory_object_control.h>
  66 #include <mach/memory_object_types.h>
  67 #include <mach/memory_object_server.h>
  68 #include <mach/upl.h>
  69 #include <mach/vm_map.h>
  70 #include <vm/memory_object.h>
  71 #include <vm/vm_pageout.h>
  72 #include <vm/vm_map.h>
  73 #include <vm/vm_protos.h>
  74
  75 /* forward declaration */
  76 vstruct_t vs_object_create(dp_size_t size);
  77
  78 /*
  79  * List of all vstructs.  A specific vstruct is
  80  * found directly via its port, this list is
  81  * only used for monitoring purposes by the
  82  * default_pager_object* calls and by ps_delete
  83  * when abstract memory objects must be scanned
  84  * to remove any live storage on a segment which
  85  * is to be removed.
  86  */
  87 struct vstruct_list_head        vstruct_list;
  88
  89 __private_extern__ void
  90 vstruct_list_insert(
  91         vstruct_t vs)
  92 {
  93         VSL_LOCK();
  94         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  95         vstruct_list.vsl_count++;
  96         VSL_UNLOCK();
  97 }
  98
  99
 100 __private_extern__ void
 101 vstruct_list_delete(
 102         vstruct_t vs)
 103 {
 104         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
 105         vstruct_list.vsl_count--;
 106 }
 107
 108 /*
 109  * We use the sequence numbers on requests to regulate
 110  * our parallelism.  In general, we allow multiple reads and writes
 111  * to proceed in parallel, with the exception that reads must
 112  * wait for previous writes to finish.  (Because the kernel might
 113  * generate a data-request for a page on the heels of a data-write
 114  * for the same page, and we must avoid returning stale data.)
 115  * terminate requests wait for proceeding reads and writes to finish.
 116  */
 117
 118 static unsigned int     default_pager_total = 0;                /* debugging */
 119 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 120 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 121 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 122
 123 __private_extern__ void
 124 vs_async_wait(
 125         vstruct_t       vs)
 126 {
 127
 128         ASSERT(vs->vs_async_pending >= 0);
 129         while (vs->vs_async_pending > 0) {
 130                 vs->vs_waiting_async = TRUE;
 131                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 132                 VS_UNLOCK(vs);
 133                 thread_block(THREAD_CONTINUE_NULL);
 134                 VS_LOCK(vs);
 135         }
 136         ASSERT(vs->vs_async_pending == 0);
 137 }
 138
 139
 140 #if     PARALLEL
 141 /*
 142  * Waits for correct sequence number.  Leaves pager locked.
 143  *
 144  * JMM - Sequence numbers guarantee ordering of requests generated
 145  *       by a single thread if the receiver is multithreaded and
 146  *       the interfaces are asynchronous (i.e. sender can generate
 147  *       more than one request before the first is received in the
 148  *       pager).  Normally, IPC would generate these number in that
 149  *       case.  But we are trying to avoid using IPC for the in-kernel
 150  *       scenario. Since these are actually invoked synchronously
 151  *       anyway (in-kernel), we can just fake the sequence number
 152  *       generation here (thus avoiding the dependence on IPC).
 153  */
 154 __private_extern__ void
 155 vs_lock(
 156         vstruct_t               vs)
 157 {
 158         mach_port_seqno_t       seqno;
 159
 160         default_pager_total++;
 161         VS_LOCK(vs);
 162
 163         seqno = vs->vs_next_seqno++;
 164
 165         while (vs->vs_seqno != seqno) {
 166                 default_pager_wait_seqno++;
 167                 vs->vs_waiting_seqno = TRUE;
 168                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 169                 VS_UNLOCK(vs);
 170                 thread_block(THREAD_CONTINUE_NULL);
 171                 VS_LOCK(vs);
 172         }
 173 }
 174
 175 /*
 176  * Increments sequence number and unlocks pager.
 177  */
 178 __private_extern__ void
 179 vs_unlock(vstruct_t vs)
 180 {
 181         vs->vs_seqno++;
 182         if (vs->vs_waiting_seqno) {
 183                 vs->vs_waiting_seqno = FALSE;
 184                 VS_UNLOCK(vs);
 185                 thread_wakeup(&vs->vs_seqno);
 186                 return;
 187         }
 188         VS_UNLOCK(vs);
 189 }
 190
 191 /*
 192  * Start a read - one more reader.  Pager must be locked.
 193  */
 194 __private_extern__ void
 195 vs_start_read(
 196         vstruct_t vs)
 197 {
 198         vs->vs_readers++;
 199 }
 200
 201 /*
 202  * Wait for readers.  Unlocks and relocks pager if wait needed.
 203  */
 204 __private_extern__ void
 205 vs_wait_for_readers(
 206         vstruct_t vs)
 207 {
 208         while (vs->vs_readers != 0) {
 209                 default_pager_wait_read++;
 210                 vs->vs_waiting_read = TRUE;
 211                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 212                 VS_UNLOCK(vs);
 213                 thread_block(THREAD_CONTINUE_NULL);
 214                 VS_LOCK(vs);
 215         }
 216 }
 217
 218 /*
 219  * Finish a read.  Pager is unlocked and returns unlocked.
 220  */
 221 __private_extern__ void
 222 vs_finish_read(
 223         vstruct_t vs)
 224 {
 225         VS_LOCK(vs);
 226         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 227                 vs->vs_waiting_read = FALSE;
 228                 VS_UNLOCK(vs);
 229                 thread_wakeup(&vs->vs_readers);
 230                 return;
 231         }
 232         VS_UNLOCK(vs);
 233 }
 234
 235 /*
 236  * Start a write - one more writer.  Pager must be locked.
 237  */
 238 __private_extern__ void
 239 vs_start_write(
 240         vstruct_t vs)
 241 {
 242         vs->vs_writers++;
 243 }
 244
 245 /*
 246  * Wait for writers.  Unlocks and relocks pager if wait needed.
 247  */
 248 __private_extern__ void
 249 vs_wait_for_writers(
 250         vstruct_t vs)
 251 {
 252         while (vs->vs_writers != 0) {
 253                 default_pager_wait_write++;
 254                 vs->vs_waiting_write = TRUE;
 255                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 256                 VS_UNLOCK(vs);
 257                 thread_block(THREAD_CONTINUE_NULL);
 258                 VS_LOCK(vs);
 259         }
 260         vs_async_wait(vs);
 261 }
 262
 263 /* This is to be used for the transfer from segment code ONLY */
 264 /* The transfer code holds off vs destruction by keeping the  */
 265 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 266 /* other writers on an async basis because it only writes on  */
 267 /* a cluster basis into fresh (as of sync time) cluster locations */
 268
 269 __private_extern__ void
 270 vs_wait_for_sync_writers(
 271         vstruct_t vs)
 272 {
 273         while (vs->vs_writers != 0) {
 274                 default_pager_wait_write++;
 275                 vs->vs_waiting_write = TRUE;
 276                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 277                 VS_UNLOCK(vs);
 278                 thread_block(THREAD_CONTINUE_NULL);
 279                 VS_LOCK(vs);
 280         }
 281 }
 282
 283
 284 /*
 285  * Finish a write.  Pager is unlocked and returns unlocked.
 286  */
 287 __private_extern__ void
 288 vs_finish_write(
 289         vstruct_t vs)
 290 {
 291         VS_LOCK(vs);
 292         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 293                 vs->vs_waiting_write = FALSE;
 294                 VS_UNLOCK(vs);
 295                 thread_wakeup(&vs->vs_writers);
 296                 return;
 297         }
 298         VS_UNLOCK(vs);
 299 }
 300 #endif  /* PARALLEL */
 301
 302 vstruct_t
 303 vs_object_create(
 304         dp_size_t size)
 305 {
 306         vstruct_t       vs;
 307
 308         /*
 309          * Allocate a vstruct. If there are any problems, then report them
 310          * to the console.
 311          */
 312         vs = ps_vstruct_create(size);
 313         if (vs == VSTRUCT_NULL) {
 314                 dprintf(("vs_object_create: unable to allocate %s\n",
 315                          "-- either run swapon command or reboot"));
 316                 return VSTRUCT_NULL;
 317         }
 318
 319         return vs;
 320 }
 321
 322 #if 0
 323 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 324
 325 void
 326 default_pager_add(
 327         vstruct_t vs,
 328         boolean_t internal)
 329 {
 330         memory_object_t         mem_obj = vs->vs_mem_obj;
 331         mach_port_t             pset;
 332         mach_port_mscount_t     sync;
 333         mach_port_t             previous;
 334         kern_return_t           kr;
 335         static char             here[] = "default_pager_add";
 336
 337         /*
 338          * The port currently has a make-send count of zero,
 339          * because either we just created the port or we just
 340          * received the port in a memory_object_create request.
 341          */
 342
 343         if (internal) {
 344                 /* possibly generate an immediate no-senders notification */
 345                 sync = 0;
 346                 pset = default_pager_internal_set;
 347         } else {
 348                 /* delay notification till send right is created */
 349                 sync = 1;
 350                 pset = default_pager_external_set;
 351         }
 352
 353         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 354         ipc_port_make_sonce_locked(mem_obj);
 355         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 356 }
 357
 358 #endif
 359
 360 const struct memory_object_pager_ops default_pager_ops = {
 361         dp_memory_object_reference,
 362         dp_memory_object_deallocate,
 363         dp_memory_object_init,
 364         dp_memory_object_terminate,
 365         dp_memory_object_data_request,
 366         dp_memory_object_data_return,
 367         dp_memory_object_data_initialize,
 368         dp_memory_object_data_unlock,
 369         dp_memory_object_synchronize,
 370         dp_memory_object_map,
 371         dp_memory_object_last_unmap,
 372         dp_memory_object_data_reclaim,
 373         "default pager"
 374 };
 375
 376 kern_return_t
 377 dp_memory_object_init(
 378         memory_object_t         mem_obj,
 379         memory_object_control_t control,
 380         __unused memory_object_cluster_size_t pager_page_size)
 381 {
 382         vstruct_t               vs;
 383
 384         assert(pager_page_size == vm_page_size);
 385
 386         memory_object_control_reference(control);
 387
 388         vs_lookup(mem_obj, vs);
 389         vs_lock(vs);
 390
 391         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 392                 Panic("bad request");
 393
 394         vs->vs_control = control;
 395         vs_unlock(vs);
 396
 397         return KERN_SUCCESS;
 398 }
 399
 400 kern_return_t
 401 dp_memory_object_synchronize(
 402         memory_object_t         mem_obj,
 403         memory_object_offset_t  offset,
 404         memory_object_size_t            length,
 405         __unused vm_sync_t              flags)
 406 {
 407         vstruct_t       vs;
 408
 409         vs_lookup(mem_obj, vs);
 410         vs_lock(vs);
 411         vs_unlock(vs);
 412
 413         memory_object_synchronize_completed(vs->vs_control, offset, length);
 414
 415         return KERN_SUCCESS;
 416 }
 417
 418 kern_return_t
 419 dp_memory_object_map(
 420         __unused memory_object_t        mem_obj,
 421         __unused vm_prot_t              prot)
 422 {
 423         panic("dp_memory_object_map");
 424         return KERN_FAILURE;
 425 }
 426
 427 kern_return_t
 428 dp_memory_object_last_unmap(
 429         __unused memory_object_t        mem_obj)
 430 {
 431         panic("dp_memory_object_last_unmap");
 432         return KERN_FAILURE;
 433 }
 434
 435 kern_return_t
 436 dp_memory_object_data_reclaim(
 437         memory_object_t         mem_obj,
 438         boolean_t               reclaim_backing_store)
 439 {
 440         vstruct_t               vs;
 441         kern_return_t           retval;
 442
 443         vs_lookup(mem_obj, vs);
 444         for (;;) {
 445                 vs_lock(vs);
 446                 vs_async_wait(vs);
 447                 if (!vs->vs_xfer_pending) {
 448                         break;
 449                 }
 450         }
 451         vs->vs_xfer_pending = TRUE;
 452         vs_unlock(vs);
 453
 454         retval = ps_vstruct_reclaim(vs, TRUE, reclaim_backing_store);
 455
 456         vs_lock(vs);
 457         vs->vs_xfer_pending = FALSE;
 458         vs_unlock(vs);
 459
 460         return retval;
 461 }
 462
 463 kern_return_t
 464 dp_memory_object_terminate(
 465         memory_object_t         mem_obj)
 466 {
 467         memory_object_control_t control;
 468         vstruct_t               vs;
 469
 470         /*
 471          * control port is a receive right, not a send right.
 472          */
 473
 474         vs_lookup(mem_obj, vs);
 475         vs_lock(vs);
 476
 477         /*
 478          * Wait for read and write requests to terminate.
 479          */
 480
 481         vs_wait_for_readers(vs);
 482         vs_wait_for_writers(vs);
 483
 484         /*
 485          * After memory_object_terminate both memory_object_init
 486          * and a no-senders notification are possible, so we need
 487          * to clean up our reference to the memory_object_control
 488          * to prepare for a new init.
 489          */
 490
 491         control = vs->vs_control;
 492         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 493
 494         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 495         /* these data requests had to be removed from the seqno traffic   */
 496         /* based on a performance bottleneck with large memory objects    */
 497         /* the problem will right itself with the new component based     */
 498         /* synchronous interface.  The new async will be able to return   */
 499         /* failure during its sync phase.   In the mean time ... */
 500
 501         thread_wakeup(&vs->vs_writers);
 502         thread_wakeup(&vs->vs_async_pending);
 503
 504         vs_unlock(vs);
 505
 506         /*
 507          * Now we deallocate our reference on the control.
 508          */
 509         memory_object_control_deallocate(control);
 510         return KERN_SUCCESS;
 511 }
 512
 513 void
 514 dp_memory_object_reference(
 515         memory_object_t         mem_obj)
 516 {
 517         vstruct_t               vs;
 518
 519         vs_lookup_safe(mem_obj, vs);
 520         if (vs == VSTRUCT_NULL)
 521                 return;
 522
 523         VS_LOCK(vs);
 524         assert(vs->vs_references > 0);
 525         vs->vs_references++;
 526         VS_UNLOCK(vs);
 527 }
 528
 529 void
 530 dp_memory_object_deallocate(
 531         memory_object_t         mem_obj)
 532 {
 533         vstruct_t               vs;
 534         mach_port_seqno_t       seqno;
 535
 536         /*
 537          * Because we don't give out multiple first references
 538          * for a memory object, there can't be a race
 539          * between getting a deallocate call and creating
 540          * a new reference for the object.
 541          */
 542
 543         vs_lookup_safe(mem_obj, vs);
 544         if (vs == VSTRUCT_NULL)
 545                 return;
 546
 547         VS_LOCK(vs);
 548         if (--vs->vs_references > 0) {
 549                 VS_UNLOCK(vs);
 550                 return;
 551         }
 552
 553         seqno = vs->vs_next_seqno++;
 554         while (vs->vs_seqno != seqno) {
 555                 default_pager_wait_seqno++;
 556                 vs->vs_waiting_seqno = TRUE;
 557                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 558                 VS_UNLOCK(vs);
 559                 thread_block(THREAD_CONTINUE_NULL);
 560                 VS_LOCK(vs);
 561         }
 562
 563         vs_async_wait(vs);      /* wait for pending async IO */
 564
 565         /* do not delete the vs structure until the referencing pointers */
 566         /* in the vstruct list have been expunged */
 567
 568         /* get VSL_LOCK out of order by using TRY mechanism */
 569         while(!VSL_LOCK_TRY()) {
 570                 VS_UNLOCK(vs);
 571                 VSL_LOCK();
 572                 VSL_UNLOCK();
 573                 VS_LOCK(vs);
 574                 vs_async_wait(vs);      /* wait for pending async IO */
 575         }
 576
 577
 578         /*
 579          * We shouldn't get a deallocation call
 580          * when the kernel has the object cached.
 581          */
 582         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 583                 Panic("bad request");
 584
 585         /*
 586          * Unlock the pager (though there should be no one
 587          * waiting for it).
 588          */
 589         VS_UNLOCK(vs);
 590
 591         /* Lock out paging segment removal for the duration of this */
 592         /* call.  We are vulnerable to losing a paging segment we rely */
 593         /* on as soon as we remove ourselves from the VSL and unlock */
 594
 595         /* Keep our thread from blocking on attempt to trigger backing */
 596         /* store release */
 597         backing_store_release_trigger_disable += 1;
 598
 599         /*
 600          * Remove the memory object port association, and then
 601          * the destroy the port itself.  We must remove the object
 602          * from the port list before deallocating the pager,
 603          * because of default_pager_objects.
 604          */
 605         vstruct_list_delete(vs);
 606         VSL_UNLOCK();
 607
 608         ps_vstruct_dealloc(vs);
 609
 610         VSL_LOCK();
 611         backing_store_release_trigger_disable -= 1;
 612         if(backing_store_release_trigger_disable == 0) {
 613                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 614         }
 615         VSL_UNLOCK();
 616 }
 617
 618 kern_return_t
 619 dp_memory_object_data_request(
 620         memory_object_t         mem_obj,
 621         memory_object_offset_t  offset,
 622         memory_object_cluster_size_t            length,
 623         __unused vm_prot_t      protection_required,
 624         memory_object_fault_info_t      fault_info)
 625 {
 626         vstruct_t               vs;
 627         kern_return_t           kr = KERN_SUCCESS;
 628
 629         GSTAT(global_stats.gs_pagein_calls++);
 630
 631
 632         /* CDY at this moment vs_lookup panics when presented with the wrong */
 633         /* port.  As we are expanding this pager to support user interfaces */
 634         /* this should be changed to return kern_failure */
 635         vs_lookup(mem_obj, vs);
 636         vs_lock(vs);
 637
 638         /* We are going to relax the strict sequencing here for performance */
 639         /* reasons.  We can do this because we know that the read and */
 640         /* write threads are different and we rely on synchronization */
 641         /* of read and write requests at the cache memory_object level */
 642         /* break out wait_for_writers, all of this goes away when */
 643         /* we get real control of seqno with the new component interface */
 644
 645         if (vs->vs_writers != 0) {
 646                 /* you can't hold on to the seqno and go */
 647                 /* to sleep like that */
 648                 vs_unlock(vs);  /* bump internal count of seqno */
 649                 VS_LOCK(vs);
 650                 while (vs->vs_writers != 0) {
 651                         default_pager_wait_write++;
 652                         vs->vs_waiting_write = TRUE;
 653                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 654                         VS_UNLOCK(vs);
 655                         thread_block(THREAD_CONTINUE_NULL);
 656                         VS_LOCK(vs);
 657                         vs_async_wait(vs);
 658                 }
 659                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 660                         VS_UNLOCK(vs);
 661                         return KERN_FAILURE;
 662                 }
 663                 vs_start_read(vs);
 664                 VS_UNLOCK(vs);
 665         } else {
 666                 vs_start_read(vs);
 667                 vs_unlock(vs);
 668         }
 669
 670         /*
 671          * Request must be on a page boundary and a multiple of pages.
 672          */
 673         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 674                 Panic("bad alignment");
 675
 676         assert((dp_offset_t) offset == offset);
 677         kr = pvs_cluster_read(vs, (dp_offset_t) offset, length, fault_info);
 678
 679         /* Regular data requests have a non-zero length and always return KERN_SUCCESS.
 680            Their actual success is determined by the fact that they provide a page or not,
 681            i.e whether we call upl_commit() or upl_abort().  A length of 0 means that the
 682            caller is only asking if the pager has a copy of that page or not.  The answer to
 683            that question is provided by the return value.  KERN_SUCCESS means that the pager
 684            does have that page.
 685         */
 686         if(length) {
 687                 kr = KERN_SUCCESS;
 688         }
 689
 690         vs_finish_read(vs);
 691
 692         return kr;
 693 }
 694
 695 /*
 696  * memory_object_data_initialize: check whether we already have each page, and
 697  * write it if we do not.  The implementation is far from optimized, and
 698  * also assumes that the default_pager is single-threaded.
 699  */
 700 /*  It is questionable whether or not a pager should decide what is relevant */
 701 /* and what is not in data sent from the kernel.  Data initialize has been */
 702 /* changed to copy back all data sent to it in preparation for its eventual */
 703 /* merge with data return.  It is the kernel that should decide what pages */
 704 /* to write back.  As of the writing of this note, this is indeed the case */
 705 /* the kernel writes back one page at a time through this interface */
 706
 707 kern_return_t
 708 dp_memory_object_data_initialize(
 709         memory_object_t         mem_obj,
 710         memory_object_offset_t  offset,
 711         memory_object_cluster_size_t            size)
 712 {
 713         vstruct_t       vs;
 714
 715         DP_DEBUG(DEBUG_MO_EXTERNAL,
 716                  ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 717                   (int)mem_obj, (int)offset, (int)size));
 718         GSTAT(global_stats.gs_pages_init += atop_32(size));
 719
 720         vs_lookup(mem_obj, vs);
 721         vs_lock(vs);
 722         vs_start_write(vs);
 723         vs_unlock(vs);
 724
 725         /*
 726          * Write the data via clustered writes. vs_cluster_write will
 727          * loop if the address range specified crosses cluster
 728          * boundaries.
 729          */
 730         assert((upl_offset_t) offset == offset);
 731         vs_cluster_write(vs, 0, (upl_offset_t)offset, size, FALSE, 0);
 732
 733         vs_finish_write(vs);
 734
 735         return KERN_SUCCESS;
 736 }
 737
 738 kern_return_t
 739 dp_memory_object_data_unlock(
 740         __unused memory_object_t                mem_obj,
 741         __unused memory_object_offset_t offset,
 742         __unused memory_object_size_t           size,
 743         __unused vm_prot_t              desired_access)
 744 {
 745         Panic("dp_memory_object_data_unlock: illegal");
 746         return KERN_FAILURE;
 747 }
 748
 749
 750 /*ARGSUSED8*/
 751 kern_return_t
 752 dp_memory_object_data_return(
 753         memory_object_t         mem_obj,
 754         memory_object_offset_t  offset,
 755         memory_object_cluster_size_t                    size,
 756         __unused memory_object_offset_t *resid_offset,
 757         __unused int            *io_error,
 758         __unused boolean_t      dirty,
 759         __unused boolean_t      kernel_copy,
 760         __unused int    upl_flags)
 761 {
 762         vstruct_t       vs;
 763
 764         DP_DEBUG(DEBUG_MO_EXTERNAL,
 765                  ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 766                   (int)mem_obj, (int)offset, (int)size));
 767         GSTAT(global_stats.gs_pageout_calls++);
 768
 769         /* This routine is called by the pageout thread.  The pageout thread */
 770         /* cannot be blocked by read activities unless the read activities   */
 771         /* Therefore the grant of vs lock must be done on a try versus a      */
 772         /* blocking basis.  The code below relies on the fact that the       */
 773         /* interface is synchronous.  Should this interface be again async   */
 774         /* for some type  of pager in the future the pages will have to be   */
 775         /* returned through a separate, asynchronous path.                   */
 776
 777         vs_lookup(mem_obj, vs);
 778
 779         default_pager_total++;
 780
 781         /* might be unreachable if VS_TRY_LOCK is, by definition, always true */
 782         __unreachable_ok_push
 783         if(!VS_TRY_LOCK(vs)) {
 784                 /* the call below will not be done by caller when we have */
 785                 /* a synchronous interface */
 786                 /* return KERN_LOCK_OWNED; */
 787                 upl_t           upl;
 788                 unsigned int    page_list_count = 0;
 789                 memory_object_super_upl_request(vs->vs_control,
 790                                         (memory_object_offset_t)offset,
 791                                         size, size,
 792                                         &upl, NULL, &page_list_count,
 793                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 794                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 795                 upl_abort(upl,0);
 796                 upl_deallocate(upl);
 797                 return KERN_SUCCESS;
 798         }
 799         __unreachable_ok_pop
 800
 801         if ((vs->vs_seqno != vs->vs_next_seqno++)
 802                         || (vs->vs_readers)
 803                         || (vs->vs_xfer_pending)) {
 804                 upl_t           upl;
 805                 unsigned int    page_list_count = 0;
 806
 807                 vs->vs_next_seqno--;
 808                 VS_UNLOCK(vs);
 809
 810                 /* the call below will not be done by caller when we have */
 811                 /* a synchronous interface */
 812                 /* return KERN_LOCK_OWNED; */
 813                 memory_object_super_upl_request(vs->vs_control,
 814                                 (memory_object_offset_t)offset,
 815                                 size, size,
 816                                 &upl, NULL, &page_list_count,
 817                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 818                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 819                 upl_abort(upl,0);
 820                 upl_deallocate(upl);
 821                 return KERN_SUCCESS;
 822         }
 823
 824         if ((size % vm_page_size) != 0)
 825                 Panic("bad alignment");
 826
 827         vs_start_write(vs);
 828
 829
 830         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 831         vs_unlock(vs);
 832
 833         /*
 834          * Write the data via clustered writes. vs_cluster_write will
 835          * loop if the address range specified crosses cluster
 836          * boundaries.
 837          */
 838         assert((upl_offset_t) offset == offset);
 839         vs_cluster_write(vs, 0, (upl_offset_t) offset, size, FALSE, 0);
 840
 841         vs_finish_write(vs);
 842
 843         /* temporary, need a finer lock based on cluster */
 844
 845         VS_LOCK(vs);
 846         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 847         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 848                 vs->vs_waiting_async = FALSE;
 849                 VS_UNLOCK(vs);
 850                 thread_wakeup(&vs->vs_async_pending);
 851         } else {
 852                 VS_UNLOCK(vs);
 853         }
 854
 855
 856         return KERN_SUCCESS;
 857 }
 858
 859 /*
 860  * Routine:     default_pager_memory_object_create
 861  * Purpose:
 862  *      Handle requests for memory objects from the
 863  *      kernel.
 864  * Notes:
 865  *      Because we only give out the default memory
 866  *      manager port to the kernel, we don't have to
 867  *      be so paranoid about the contents.
 868  */
 869 kern_return_t
 870 default_pager_memory_object_create(
 871         __unused memory_object_default_t        dmm,
 872         vm_size_t               new_size,
 873         memory_object_t         *new_mem_obj)
 874 {
 875         vstruct_t               vs;
 876
 877         assert(dmm == default_pager_object);
 878
 879         if ((dp_size_t) new_size != new_size) {
 880                 /* 32-bit overflow */
 881                 return KERN_INVALID_ARGUMENT;
 882         }
 883
 884         vs = vs_object_create((dp_size_t) new_size);
 885         if (vs == VSTRUCT_NULL)
 886                 return KERN_RESOURCE_SHORTAGE;
 887
 888         vs->vs_next_seqno = 0;
 889
 890         /*
 891          * Set up associations between this memory object
 892          * and this default_pager structure
 893          */
 894
 895         vs->vs_pager_ops = &default_pager_ops;
 896         vs->vs_pager_header.io_bits = IKOT_MEMORY_OBJECT;
 897
 898         /*
 899          * After this, other threads might receive requests
 900          * for this memory object or find it in the port list.
 901          */
 902
 903         vstruct_list_insert(vs);
 904         *new_mem_obj = vs_to_mem_obj(vs);
 905         return KERN_SUCCESS;
 906 }
 907
 908 /*
 909  * Create an external object.
 910  */
 911 kern_return_t
 912 default_pager_object_create(
 913         default_pager_t default_pager,
 914         vm_size_t       size,
 915         memory_object_t *mem_objp)
 916 {
 917         vstruct_t       vs;
 918
 919         if (default_pager != default_pager_object)
 920                 return KERN_INVALID_ARGUMENT;
 921
 922         if ((dp_size_t) size != size) {
 923                 /* 32-bit overflow */
 924                 return KERN_INVALID_ARGUMENT;
 925         }
 926
 927         vs = vs_object_create((dp_size_t) size);
 928         if (vs == VSTRUCT_NULL)
 929                 return KERN_RESOURCE_SHORTAGE;
 930
 931         /*
 932          * Set up associations between the default pager
 933          * and this vstruct structure
 934          */
 935         vs->vs_pager_ops = &default_pager_ops;
 936         vstruct_list_insert(vs);
 937         *mem_objp = vs_to_mem_obj(vs);
 938         return KERN_SUCCESS;
 939 }
 940
 941 kern_return_t
 942 default_pager_objects(
 943         default_pager_t                 default_pager,
 944         default_pager_object_array_t    *objectsp,
 945         mach_msg_type_number_t          *ocountp,
 946         mach_port_array_t               *portsp,
 947         mach_msg_type_number_t          *pcountp)
 948 {
 949         vm_offset_t             oaddr = 0;      /* memory for objects */
 950         vm_size_t               osize = 0;      /* current size */
 951         default_pager_object_t  * objects;
 952         unsigned int            opotential = 0;
 953
 954         vm_map_copy_t           pcopy = 0;      /* copy handle for pagers */
 955         vm_size_t               psize = 0;      /* current size */
 956         memory_object_t         * pagers;
 957         unsigned int            ppotential = 0;
 958
 959         unsigned int            actual;
 960         unsigned int            num_objects;
 961         kern_return_t           kr;
 962         vstruct_t               entry;
 963
 964         if (default_pager != default_pager_object)
 965                 return KERN_INVALID_ARGUMENT;
 966
 967         /*
 968          * We will send no more than this many
 969          */
 970         actual = vstruct_list.vsl_count;
 971
 972         /*
 973          * Out out-of-line port arrays are simply kalloc'ed.
 974          */
 975         psize = vm_map_round_page(actual * sizeof (*pagers),
 976                                   vm_map_page_mask(ipc_kernel_map));
 977         ppotential = (unsigned int) (psize / sizeof (*pagers));
 978         pagers = (memory_object_t *)kalloc(psize);
 979         if (0 == pagers)
 980                 return KERN_RESOURCE_SHORTAGE;
 981
 982         /*
 983          * returned out of line data must be allocated out
 984          * the ipc_kernel_map, wired down, filled in, and
 985          * then "copied in" as if it had been sent by a
 986          * user process.
 987          */
 988         osize = vm_map_round_page(actual * sizeof (*objects),
 989                                   vm_map_page_mask(ipc_kernel_map));
 990         opotential = (unsigned int) (osize / sizeof (*objects));
 991         kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
 992         if (KERN_SUCCESS != kr) {
 993                 kfree(pagers, psize);
 994                 return KERN_RESOURCE_SHORTAGE;
 995         }
 996         objects = (default_pager_object_t *)oaddr;
 997
 998
 999         /*
1000          * Now scan the list.
1001          */
1002
1003         VSL_LOCK();
1004
1005         num_objects = 0;
1006         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
1007
1008                 memory_object_t                 pager;
1009                 vm_size_t                       size;
1010
1011                 if ((num_objects >= opotential) ||
1012                     (num_objects >= ppotential)) {
1013
1014                         /*
1015                          * This should be rare.  In any case,
1016                          * we will only miss recent objects,
1017                          * because they are added at the end.
1018                          */
1019                         break;
1020                 }
1021
1022                 /*
1023                  * Avoid interfering with normal operations
1024                  */
1025                 if (!VS_MAP_TRY_LOCK(entry))
1026                         goto not_this_one;
1027                 size = ps_vstruct_allocated_size(entry);
1028                 VS_MAP_UNLOCK(entry);
1029
1030                 VS_LOCK(entry);
1031
1032                 /*
1033                  * We need a reference for our caller.  Adding this
1034                  * reference through the linked list could race with
1035                  * destruction of the object.  If we find the object
1036                  * has no references, just give up on it.
1037                  */
1038                 VS_LOCK(entry);
1039                 if (entry->vs_references == 0) {
1040                         VS_UNLOCK(entry);
1041                         goto not_this_one;
1042                 }
1043                 pager = vs_to_mem_obj(entry);
1044                 dp_memory_object_reference(pager);
1045                 VS_UNLOCK(entry);
1046
1047                 /* the arrays are wired, so no deadlock worries */
1048
1049                 objects[num_objects].dpo_object = (vm_offset_t) entry;
1050                 objects[num_objects].dpo_size = size;
1051                 pagers [num_objects++] = pager;
1052                 continue;
1053
1054             not_this_one:
1055                 /*
1056                  * Do not return garbage
1057                  */
1058                 objects[num_objects].dpo_object = (vm_offset_t) 0;
1059                 objects[num_objects].dpo_size = 0;
1060                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
1061
1062         }
1063
1064         VSL_UNLOCK();
1065
1066         /* clear out any excess allocation */
1067         while (num_objects < opotential) {
1068                 objects[--opotential].dpo_object = (vm_offset_t) 0;
1069                 objects[opotential].dpo_size = 0;
1070         }
1071         while (num_objects < ppotential) {
1072                 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1073         }
1074
1075         kr = vm_map_unwire(ipc_kernel_map,
1076                            vm_map_trunc_page(oaddr,
1077                                              vm_map_page_mask(ipc_kernel_map)),
1078                            vm_map_round_page(oaddr + osize,
1079                                              vm_map_page_mask(ipc_kernel_map)),
1080                            FALSE);
1081         assert(KERN_SUCCESS == kr);
1082         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
1083                            (vm_map_size_t)osize, TRUE, &pcopy);
1084         assert(KERN_SUCCESS == kr);
1085
1086         *objectsp = (default_pager_object_array_t)objects;
1087         *ocountp = num_objects;
1088         *portsp = (mach_port_array_t)pcopy;
1089         *pcountp = num_objects;
1090
1091         return KERN_SUCCESS;
1092 }
1093
1094 kern_return_t
1095 default_pager_object_pages(
1096         default_pager_t         default_pager,
1097         mach_port_t                     memory_object,
1098         default_pager_page_array_t      *pagesp,
1099         mach_msg_type_number_t          *countp)
1100 {
1101         vm_offset_t                     addr = 0; /* memory for page offsets */
1102         vm_size_t                       size = 0; /* current memory size */
1103         vm_map_copy_t                   copy;
1104         default_pager_page_t            * pages = 0;
1105         unsigned int                    potential;
1106         unsigned int                    actual;
1107         kern_return_t                   kr;
1108         memory_object_t                 object;
1109
1110         if (default_pager != default_pager_object)
1111                 return KERN_INVALID_ARGUMENT;
1112
1113         object = (memory_object_t) memory_object;
1114
1115         potential = 0;
1116         for (;;) {
1117                 vstruct_t       entry;
1118
1119                 VSL_LOCK();
1120                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1121                               vs_links) {
1122                         VS_LOCK(entry);
1123                         if (vs_to_mem_obj(entry) == object) {
1124                                 VSL_UNLOCK();
1125                                 goto found_object;
1126                         }
1127                         VS_UNLOCK(entry);
1128                 }
1129                 VSL_UNLOCK();
1130
1131                 /* did not find the object */
1132                 if (0 != addr)
1133                         kmem_free(ipc_kernel_map, addr, size);
1134
1135                 return KERN_INVALID_ARGUMENT;
1136
1137             found_object:
1138
1139                 if (!VS_MAP_TRY_LOCK(entry)) {
1140                         /* oh well bad luck */
1141                         int wresult;
1142
1143                         VS_UNLOCK(entry);
1144
1145                         assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
1146                         wresult = thread_block(THREAD_CONTINUE_NULL);
1147                         assert(wresult == THREAD_TIMED_OUT);
1148                         continue;
1149                 }
1150
1151                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1152                 VS_MAP_UNLOCK(entry);
1153                 VS_UNLOCK(entry);
1154
1155                 if (actual <= potential)
1156                         break;
1157
1158                 /* allocate more memory */
1159                 if (0 != addr)
1160                         kmem_free(ipc_kernel_map, addr, size);
1161
1162                 size = vm_map_round_page(actual * sizeof (*pages),
1163                                          vm_map_page_mask(ipc_kernel_map));
1164                 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1165                 if (KERN_SUCCESS != kr)
1166                         return KERN_RESOURCE_SHORTAGE;
1167
1168                 pages = (default_pager_page_t *)addr;
1169                 potential = (unsigned int) (size / sizeof (*pages));
1170         }
1171
1172         /*
1173          * Clear unused memory.
1174          */
1175         while (actual < potential)
1176                 pages[--potential].dpp_offset = 0;
1177
1178         kr = vm_map_unwire(ipc_kernel_map,
1179                            vm_map_trunc_page(addr,
1180                                              vm_map_page_mask(ipc_kernel_map)),
1181                            vm_map_round_page(addr + size,
1182                                              vm_map_page_mask(ipc_kernel_map)),
1183                            FALSE);
1184         assert(KERN_SUCCESS == kr);
1185         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1186                            (vm_map_size_t)size, TRUE, &copy);
1187         assert(KERN_SUCCESS == kr);
1188
1189
1190         *pagesp = (default_pager_page_array_t)copy;
1191         *countp = actual;
1192         return KERN_SUCCESS;
1193 }