osfmk/default_pager/dp_memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /*
  31  * @OSF_COPYRIGHT@
  32  */
  33 /*
  34  * Mach Operating System
  35  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  36  * All Rights Reserved.
  37  *
  38  * Permission to use, copy, modify and distribute this software and its
  39  * documentation is hereby granted, provided that both the copyright
  40  * notice and this permission notice appear in all copies of the
  41  * software, derivative works or modified versions, and any portions
  42  * thereof, and that both notices appear in supporting documentation.
  43  *
  44  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  45  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  46  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  47  *
  48  * Carnegie Mellon requests users of this software to return to
  49  *
  50  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  51  *  School of Computer Science
  52  *  Carnegie Mellon University
  53  *  Pittsburgh PA 15213-3890
  54  *
  55  * any improvements or extensions that they make and grant Carnegie Mellon
  56  * the rights to redistribute these changes.
  57  */
  58
  59 /*
  60  *      Default Pager.
  61  *              Memory Object Management.
  62  */
  63
  64 #include "default_pager_internal.h"
  65 #include <default_pager/default_pager_object_server.h>
  66 #include <mach/memory_object_default_server.h>
  67 #include <mach/memory_object_control.h>
  68 #include <mach/memory_object_types.h>
  69 #include <mach/memory_object_server.h>
  70 #include <mach/upl.h>
  71 #include <mach/vm_map.h>
  72 #include <vm/memory_object.h>
  73 #include <vm/vm_pageout.h>
  74 #include <vm/vm_map.h>
  75 #include <vm/vm_protos.h>
  76
  77 /* forward declaration */
  78 vstruct_t vs_object_create(vm_size_t size);
  79
  80 /*
  81  * List of all vstructs.  A specific vstruct is
  82  * found directly via its port, this list is
  83  * only used for monitoring purposes by the
  84  * default_pager_object* calls and by ps_delete
  85  * when abstract memory objects must be scanned
  86  * to remove any live storage on a segment which
  87  * is to be removed.
  88  */
  89 struct vstruct_list_head        vstruct_list;
  90
  91 __private_extern__ void
  92 vstruct_list_insert(
  93         vstruct_t vs)
  94 {
  95         VSL_LOCK();
  96         queue_enter(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
  97         vstruct_list.vsl_count++;
  98         VSL_UNLOCK();
  99 }
 100
 101
 102 __private_extern__ void
 103 vstruct_list_delete(
 104         vstruct_t vs)
 105 {
 106         queue_remove(&vstruct_list.vsl_queue, vs, vstruct_t, vs_links);
 107         vstruct_list.vsl_count--;
 108 }
 109
 110 /*
 111  * We use the sequence numbers on requests to regulate
 112  * our parallelism.  In general, we allow multiple reads and writes
 113  * to proceed in parallel, with the exception that reads must
 114  * wait for previous writes to finish.  (Because the kernel might
 115  * generate a data-request for a page on the heels of a data-write
 116  * for the same page, and we must avoid returning stale data.)
 117  * terminate requests wait for proceeding reads and writes to finish.
 118  */
 119
 120 static unsigned int     default_pager_total = 0;                /* debugging */
 121 static unsigned int     default_pager_wait_seqno = 0;           /* debugging */
 122 static unsigned int     default_pager_wait_read = 0;            /* debugging */
 123 static unsigned int     default_pager_wait_write = 0;           /* debugging */
 124
 125 __private_extern__ void
 126 vs_async_wait(
 127         vstruct_t       vs)
 128 {
 129
 130         ASSERT(vs->vs_async_pending >= 0);
 131         while (vs->vs_async_pending > 0) {
 132                 vs->vs_waiting_async = TRUE;
 133                 assert_wait(&vs->vs_async_pending, THREAD_UNINT);
 134                 VS_UNLOCK(vs);
 135                 thread_block(THREAD_CONTINUE_NULL);
 136                 VS_LOCK(vs);
 137         }
 138         ASSERT(vs->vs_async_pending == 0);
 139 }
 140
 141
 142 #if     PARALLEL
 143 /*
 144  * Waits for correct sequence number.  Leaves pager locked.
 145  *
 146  * JMM - Sequence numbers guarantee ordering of requests generated
 147  *       by a single thread if the receiver is multithreaded and
 148  *       the interfaces are asynchronous (i.e. sender can generate
 149  *       more than one request before the first is received in the
 150  *       pager).  Normally, IPC would generate these number in that
 151  *       case.  But we are trying to avoid using IPC for the in-kernel
 152  *       scenario. Since these are actually invoked synchronously
 153  *       anyway (in-kernel), we can just fake the sequence number
 154  *       generation here (thus avoiding the dependence on IPC).
 155  */
 156 __private_extern__ void
 157 vs_lock(
 158         vstruct_t               vs)
 159 {
 160         mach_port_seqno_t       seqno;
 161
 162         default_pager_total++;
 163         VS_LOCK(vs);
 164
 165         seqno = vs->vs_next_seqno++;
 166
 167         while (vs->vs_seqno != seqno) {
 168                 default_pager_wait_seqno++;
 169                 vs->vs_waiting_seqno = TRUE;
 170                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 171                 VS_UNLOCK(vs);
 172                 thread_block(THREAD_CONTINUE_NULL);
 173                 VS_LOCK(vs);
 174         }
 175 }
 176
 177 /*
 178  * Increments sequence number and unlocks pager.
 179  */
 180 __private_extern__ void
 181 vs_unlock(vstruct_t vs)
 182 {
 183         vs->vs_seqno++;
 184         if (vs->vs_waiting_seqno) {
 185                 vs->vs_waiting_seqno = FALSE;
 186                 VS_UNLOCK(vs);
 187                 thread_wakeup(&vs->vs_seqno);
 188                 return;
 189         }
 190         VS_UNLOCK(vs);
 191 }
 192
 193 /*
 194  * Start a read - one more reader.  Pager must be locked.
 195  */
 196 __private_extern__ void
 197 vs_start_read(
 198         vstruct_t vs)
 199 {
 200         vs->vs_readers++;
 201 }
 202
 203 /*
 204  * Wait for readers.  Unlocks and relocks pager if wait needed.
 205  */
 206 __private_extern__ void
 207 vs_wait_for_readers(
 208         vstruct_t vs)
 209 {
 210         while (vs->vs_readers != 0) {
 211                 default_pager_wait_read++;
 212                 vs->vs_waiting_read = TRUE;
 213                 assert_wait(&vs->vs_readers, THREAD_UNINT);
 214                 VS_UNLOCK(vs);
 215                 thread_block(THREAD_CONTINUE_NULL);
 216                 VS_LOCK(vs);
 217         }
 218 }
 219
 220 /*
 221  * Finish a read.  Pager is unlocked and returns unlocked.
 222  */
 223 __private_extern__ void
 224 vs_finish_read(
 225         vstruct_t vs)
 226 {
 227         VS_LOCK(vs);
 228         if (--vs->vs_readers == 0 && vs->vs_waiting_read) {
 229                 vs->vs_waiting_read = FALSE;
 230                 VS_UNLOCK(vs);
 231                 thread_wakeup(&vs->vs_readers);
 232                 return;
 233         }
 234         VS_UNLOCK(vs);
 235 }
 236
 237 /*
 238  * Start a write - one more writer.  Pager must be locked.
 239  */
 240 __private_extern__ void
 241 vs_start_write(
 242         vstruct_t vs)
 243 {
 244         vs->vs_writers++;
 245 }
 246
 247 /*
 248  * Wait for writers.  Unlocks and relocks pager if wait needed.
 249  */
 250 __private_extern__ void
 251 vs_wait_for_writers(
 252         vstruct_t vs)
 253 {
 254         while (vs->vs_writers != 0) {
 255                 default_pager_wait_write++;
 256                 vs->vs_waiting_write = TRUE;
 257                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 258                 VS_UNLOCK(vs);
 259                 thread_block(THREAD_CONTINUE_NULL);
 260                 VS_LOCK(vs);
 261         }
 262         vs_async_wait(vs);
 263 }
 264
 265 /* This is to be used for the transfer from segment code ONLY */
 266 /* The transfer code holds off vs destruction by keeping the  */
 267 /* vs_async_wait count non-zero.  It will not ocnflict with   */
 268 /* other writers on an async basis because it only writes on  */
 269 /* a cluster basis into fresh (as of sync time) cluster locations */
 270
 271 __private_extern__ void
 272 vs_wait_for_sync_writers(
 273         vstruct_t vs)
 274 {
 275         while (vs->vs_writers != 0) {
 276                 default_pager_wait_write++;
 277                 vs->vs_waiting_write = TRUE;
 278                 assert_wait(&vs->vs_writers, THREAD_UNINT);
 279                 VS_UNLOCK(vs);
 280                 thread_block(THREAD_CONTINUE_NULL);
 281                 VS_LOCK(vs);
 282         }
 283 }
 284
 285
 286 /*
 287  * Finish a write.  Pager is unlocked and returns unlocked.
 288  */
 289 __private_extern__ void
 290 vs_finish_write(
 291         vstruct_t vs)
 292 {
 293         VS_LOCK(vs);
 294         if (--vs->vs_writers == 0 && vs->vs_waiting_write) {
 295                 vs->vs_waiting_write = FALSE;
 296                 VS_UNLOCK(vs);
 297                 thread_wakeup(&vs->vs_writers);
 298                 return;
 299         }
 300         VS_UNLOCK(vs);
 301 }
 302 #endif  /* PARALLEL */
 303
 304 vstruct_t
 305 vs_object_create(
 306         vm_size_t size)
 307 {
 308         vstruct_t       vs;
 309
 310         /*
 311          * Allocate a vstruct. If there are any problems, then report them
 312          * to the console.
 313          */
 314         vs = ps_vstruct_create(size);
 315         if (vs == VSTRUCT_NULL) {
 316                 dprintf(("vs_object_create: unable to allocate %s\n",
 317                          "-- either run swapon command or reboot"));
 318                 return VSTRUCT_NULL;
 319         }
 320
 321         return vs;
 322 }
 323
 324 #if 0
 325 void default_pager_add(vstruct_t, boolean_t);   /* forward */
 326
 327 void
 328 default_pager_add(
 329         vstruct_t vs,
 330         boolean_t internal)
 331 {
 332         memory_object_t         mem_obj = vs->vs_mem_obj;
 333         mach_port_t             pset;
 334         mach_port_mscount_t     sync;
 335         mach_port_t             previous;
 336         kern_return_t           kr;
 337         static char             here[] = "default_pager_add";
 338
 339         /*
 340          * The port currently has a make-send count of zero,
 341          * because either we just created the port or we just
 342          * received the port in a memory_object_create request.
 343          */
 344
 345         if (internal) {
 346                 /* possibly generate an immediate no-senders notification */
 347                 sync = 0;
 348                 pset = default_pager_internal_set;
 349         } else {
 350                 /* delay notification till send right is created */
 351                 sync = 1;
 352                 pset = default_pager_external_set;
 353         }
 354
 355         ipc_port_make_sonce(mem_obj);
 356         ip_lock(mem_obj);  /* unlocked in nsrequest below */
 357         ipc_port_nsrequest(mem_obj, sync, mem_obj, &previous);
 358 }
 359
 360 #endif
 361
 362 const struct memory_object_pager_ops default_pager_ops = {
 363         dp_memory_object_reference,
 364         dp_memory_object_deallocate,
 365         dp_memory_object_init,
 366         dp_memory_object_terminate,
 367         dp_memory_object_data_request,
 368         dp_memory_object_data_return,
 369         dp_memory_object_data_initialize,
 370         dp_memory_object_data_unlock,
 371         dp_memory_object_synchronize,
 372         dp_memory_object_unmap,
 373         "default pager"
 374 };
 375
 376 kern_return_t
 377 dp_memory_object_init(
 378         memory_object_t         mem_obj,
 379         memory_object_control_t control,
 380         __unused vm_size_t pager_page_size)
 381 {
 382         vstruct_t               vs;
 383
 384         assert(pager_page_size == vm_page_size);
 385
 386         memory_object_control_reference(control);
 387
 388         vs_lookup(mem_obj, vs);
 389         vs_lock(vs);
 390
 391         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 392                 Panic("bad request");
 393
 394         vs->vs_control = control;
 395         vs_unlock(vs);
 396
 397         return KERN_SUCCESS;
 398 }
 399
 400 kern_return_t
 401 dp_memory_object_synchronize(
 402         memory_object_t         mem_obj,
 403         memory_object_offset_t  offset,
 404         vm_size_t               length,
 405         __unused vm_sync_t              flags)
 406 {
 407         vstruct_t       vs;
 408
 409         vs_lookup(mem_obj, vs);
 410         vs_lock(vs);
 411         vs_unlock(vs);
 412
 413         memory_object_synchronize_completed(vs->vs_control, offset, length);
 414
 415         return KERN_SUCCESS;
 416 }
 417
 418 kern_return_t
 419 dp_memory_object_unmap(
 420         __unused memory_object_t                mem_obj)
 421 {
 422         panic("dp_memory_object_unmap");
 423
 424         return KERN_FAILURE;
 425 }
 426
 427 kern_return_t
 428 dp_memory_object_terminate(
 429         memory_object_t         mem_obj)
 430 {
 431         memory_object_control_t control;
 432         vstruct_t               vs;
 433
 434         /*
 435          * control port is a receive right, not a send right.
 436          */
 437
 438         vs_lookup(mem_obj, vs);
 439         vs_lock(vs);
 440
 441         /*
 442          * Wait for read and write requests to terminate.
 443          */
 444
 445         vs_wait_for_readers(vs);
 446         vs_wait_for_writers(vs);
 447
 448         /*
 449          * After memory_object_terminate both memory_object_init
 450          * and a no-senders notification are possible, so we need
 451          * to clean up our reference to the memory_object_control
 452          * to prepare for a new init.
 453          */
 454
 455         control = vs->vs_control;
 456         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
 457
 458         /* a bit of special case ugliness here.  Wakeup any waiting reads */
 459         /* these data requests had to be removed from the seqno traffic   */
 460         /* based on a performance bottleneck with large memory objects    */
 461         /* the problem will right itself with the new component based     */
 462         /* synchronous interface.  The new async will be able to return   */
 463         /* failure during its sync phase.   In the mean time ... */
 464
 465         thread_wakeup(&vs->vs_writers);
 466         thread_wakeup(&vs->vs_async_pending);
 467
 468         vs_unlock(vs);
 469
 470         /*
 471          * Now we deallocate our reference on the control.
 472          */
 473         memory_object_control_deallocate(control);
 474         return KERN_SUCCESS;
 475 }
 476
 477 void
 478 dp_memory_object_reference(
 479         memory_object_t         mem_obj)
 480 {
 481         vstruct_t               vs;
 482
 483         vs_lookup_safe(mem_obj, vs);
 484         if (vs == VSTRUCT_NULL)
 485                 return;
 486
 487         VS_LOCK(vs);
 488         assert(vs->vs_references > 0);
 489         vs->vs_references++;
 490         VS_UNLOCK(vs);
 491 }
 492
 493 void
 494 dp_memory_object_deallocate(
 495         memory_object_t         mem_obj)
 496 {
 497         vstruct_t               vs;
 498         mach_port_seqno_t       seqno;
 499
 500         /*
 501          * Because we don't give out multiple first references
 502          * for a memory object, there can't be a race
 503          * between getting a deallocate call and creating
 504          * a new reference for the object.
 505          */
 506
 507         vs_lookup_safe(mem_obj, vs);
 508         if (vs == VSTRUCT_NULL)
 509                 return;
 510
 511         VS_LOCK(vs);
 512         if (--vs->vs_references > 0) {
 513                 VS_UNLOCK(vs);
 514                 return;
 515         }
 516
 517         seqno = vs->vs_next_seqno++;
 518         while (vs->vs_seqno != seqno) {
 519                 default_pager_wait_seqno++;
 520                 vs->vs_waiting_seqno = TRUE;
 521                 assert_wait(&vs->vs_seqno, THREAD_UNINT);
 522                 VS_UNLOCK(vs);
 523                 thread_block(THREAD_CONTINUE_NULL);
 524                 VS_LOCK(vs);
 525         }
 526
 527         vs_async_wait(vs);      /* wait for pending async IO */
 528
 529         /* do not delete the vs structure until the referencing pointers */
 530         /* in the vstruct list have been expunged */
 531
 532         /* get VSL_LOCK out of order by using TRY mechanism */
 533         while(!VSL_LOCK_TRY()) {
 534                 VS_UNLOCK(vs);
 535                 VSL_LOCK();
 536                 VSL_UNLOCK();
 537                 VS_LOCK(vs);
 538                 vs_async_wait(vs);      /* wait for pending async IO */
 539         }
 540
 541
 542         /*
 543          * We shouldn't get a deallocation call
 544          * when the kernel has the object cached.
 545          */
 546         if (vs->vs_control != MEMORY_OBJECT_CONTROL_NULL)
 547                 Panic("bad request");
 548
 549         /*
 550          * Unlock the pager (though there should be no one
 551          * waiting for it).
 552          */
 553         VS_UNLOCK(vs);
 554
 555         /* Lock out paging segment removal for the duration of this */
 556         /* call.  We are vulnerable to losing a paging segment we rely */
 557         /* on as soon as we remove ourselves from the VSL and unlock */
 558
 559         /* Keep our thread from blocking on attempt to trigger backing */
 560         /* store release */
 561         backing_store_release_trigger_disable += 1;
 562
 563         /*
 564          * Remove the memory object port association, and then
 565          * the destroy the port itself.  We must remove the object
 566          * from the port list before deallocating the pager,
 567          * because of default_pager_objects.
 568          */
 569         vstruct_list_delete(vs);
 570         VSL_UNLOCK();
 571
 572         ps_vstruct_dealloc(vs);
 573
 574         VSL_LOCK();
 575         backing_store_release_trigger_disable -= 1;
 576         if(backing_store_release_trigger_disable == 0) {
 577                 thread_wakeup((event_t)&backing_store_release_trigger_disable);
 578         }
 579         VSL_UNLOCK();
 580 }
 581
 582 kern_return_t
 583 dp_memory_object_data_request(
 584         memory_object_t         mem_obj,
 585         memory_object_offset_t  offset,
 586         vm_size_t               length,
 587         __unused vm_prot_t              protection_required)
 588 {
 589         vstruct_t               vs;
 590
 591         GSTAT(global_stats.gs_pagein_calls++);
 592
 593
 594         /* CDY at this moment vs_lookup panics when presented with the wrong */
 595         /* port.  As we are expanding this pager to support user interfaces */
 596         /* this should be changed to return kern_failure */
 597         vs_lookup(mem_obj, vs);
 598         vs_lock(vs);
 599
 600         /* We are going to relax the strict sequencing here for performance */
 601         /* reasons.  We can do this because we know that the read and */
 602         /* write threads are different and we rely on synchronization */
 603         /* of read and write requests at the cache memory_object level */
 604         /* break out wait_for_writers, all of this goes away when */
 605         /* we get real control of seqno with the new component interface */
 606
 607         if (vs->vs_writers != 0) {
 608                 /* you can't hold on to the seqno and go */
 609                 /* to sleep like that */
 610                 vs_unlock(vs);  /* bump internal count of seqno */
 611                 VS_LOCK(vs);
 612                 while (vs->vs_writers != 0) {
 613                         default_pager_wait_write++;
 614                         vs->vs_waiting_write = TRUE;
 615                         assert_wait(&vs->vs_writers, THREAD_UNINT);
 616                         VS_UNLOCK(vs);
 617                         thread_block(THREAD_CONTINUE_NULL);
 618                         VS_LOCK(vs);
 619                         vs_async_wait(vs);
 620                 }
 621                 if(vs->vs_control == MEMORY_OBJECT_CONTROL_NULL) {
 622                         VS_UNLOCK(vs);
 623                         return KERN_FAILURE;
 624                 }
 625                 vs_start_read(vs);
 626                 VS_UNLOCK(vs);
 627         } else {
 628                 vs_start_read(vs);
 629                 vs_unlock(vs);
 630         }
 631
 632         /*
 633          * Request must be on a page boundary and a multiple of pages.
 634          */
 635         if ((offset & vm_page_mask) != 0 || (length & vm_page_mask) != 0)
 636                 Panic("bad alignment");
 637
 638         pvs_cluster_read(vs, (vm_offset_t)offset, length);
 639
 640         vs_finish_read(vs);
 641
 642         return KERN_SUCCESS;
 643 }
 644
 645 /*
 646  * memory_object_data_initialize: check whether we already have each page, and
 647  * write it if we do not.  The implementation is far from optimized, and
 648  * also assumes that the default_pager is single-threaded.
 649  */
 650 /*  It is questionable whether or not a pager should decide what is relevant */
 651 /* and what is not in data sent from the kernel.  Data initialize has been */
 652 /* changed to copy back all data sent to it in preparation for its eventual */
 653 /* merge with data return.  It is the kernel that should decide what pages */
 654 /* to write back.  As of the writing of this note, this is indeed the case */
 655 /* the kernel writes back one page at a time through this interface */
 656
 657 kern_return_t
 658 dp_memory_object_data_initialize(
 659         memory_object_t         mem_obj,
 660         memory_object_offset_t  offset,
 661         vm_size_t               size)
 662 {
 663         vstruct_t       vs;
 664
 665         DP_DEBUG(DEBUG_MO_EXTERNAL,
 666                  ("mem_obj=0x%x,offset=0x%x,cnt=0x%x\n",
 667                   (int)mem_obj, (int)offset, (int)size));
 668         GSTAT(global_stats.gs_pages_init += atop_32(size));
 669
 670         vs_lookup(mem_obj, vs);
 671         vs_lock(vs);
 672         vs_start_write(vs);
 673         vs_unlock(vs);
 674
 675         /*
 676          * Write the data via clustered writes. vs_cluster_write will
 677          * loop if the address range specified crosses cluster
 678          * boundaries.
 679          */
 680         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 681
 682         vs_finish_write(vs);
 683
 684         return KERN_SUCCESS;
 685 }
 686
 687 kern_return_t
 688 dp_memory_object_data_unlock(
 689         __unused memory_object_t                mem_obj,
 690         __unused memory_object_offset_t offset,
 691         __unused vm_size_t              size,
 692         __unused vm_prot_t              desired_access)
 693 {
 694         Panic("dp_memory_object_data_unlock: illegal");
 695         return KERN_FAILURE;
 696 }
 697
 698
 699 /*ARGSUSED8*/
 700 kern_return_t
 701 dp_memory_object_data_return(
 702         memory_object_t         mem_obj,
 703         memory_object_offset_t  offset,
 704         vm_size_t                       size,
 705         __unused memory_object_offset_t *resid_offset,
 706         __unused int            *io_error,
 707         __unused boolean_t      dirty,
 708         __unused boolean_t      kernel_copy,
 709         __unused int    upl_flags)
 710 {
 711         vstruct_t       vs;
 712
 713         DP_DEBUG(DEBUG_MO_EXTERNAL,
 714                  ("mem_obj=0x%x,offset=0x%x,size=0x%x\n",
 715                   (int)mem_obj, (int)offset, (int)size));
 716         GSTAT(global_stats.gs_pageout_calls++);
 717
 718         /* This routine is called by the pageout thread.  The pageout thread */
 719         /* cannot be blocked by read activities unless the read activities   */
 720         /* Therefore the grant of vs lock must be done on a try versus a      */
 721         /* blocking basis.  The code below relies on the fact that the       */
 722         /* interface is synchronous.  Should this interface be again async   */
 723         /* for some type  of pager in the future the pages will have to be   */
 724         /* returned through a separate, asynchronous path.                   */
 725
 726         vs_lookup(mem_obj, vs);
 727
 728         default_pager_total++;
 729         if(!VS_TRY_LOCK(vs)) {
 730                 /* the call below will not be done by caller when we have */
 731                 /* a synchronous interface */
 732                 /* return KERN_LOCK_OWNED; */
 733                 upl_t           upl;
 734                 unsigned int    page_list_count = 0;
 735                 memory_object_super_upl_request(vs->vs_control,
 736                                         (memory_object_offset_t)offset,
 737                                         size, size,
 738                                         &upl, NULL, &page_list_count,
 739                                         UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 740                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 741                 upl_abort(upl,0);
 742                 upl_deallocate(upl);
 743                 return KERN_SUCCESS;
 744         }
 745
 746         if ((vs->vs_seqno != vs->vs_next_seqno++)
 747                         || (vs->vs_readers)
 748                         || (vs->vs_xfer_pending)) {
 749                 upl_t           upl;
 750                 unsigned int    page_list_count = 0;
 751
 752                 vs->vs_next_seqno--;
 753                 VS_UNLOCK(vs);
 754
 755                 /* the call below will not be done by caller when we have */
 756                 /* a synchronous interface */
 757                 /* return KERN_LOCK_OWNED; */
 758                 memory_object_super_upl_request(vs->vs_control,
 759                                 (memory_object_offset_t)offset,
 760                                 size, size,
 761                                 &upl, NULL, &page_list_count,
 762                                 UPL_NOBLOCK | UPL_CLEAN_IN_PLACE
 763                                         | UPL_NO_SYNC | UPL_COPYOUT_FROM);
 764                 upl_abort(upl,0);
 765                 upl_deallocate(upl);
 766                 return KERN_SUCCESS;
 767         }
 768
 769         if ((size % vm_page_size) != 0)
 770                 Panic("bad alignment");
 771
 772         vs_start_write(vs);
 773
 774
 775         vs->vs_async_pending += 1;  /* protect from backing store contraction */
 776         vs_unlock(vs);
 777
 778         /*
 779          * Write the data via clustered writes. vs_cluster_write will
 780          * loop if the address range specified crosses cluster
 781          * boundaries.
 782          */
 783         vs_cluster_write(vs, 0, (vm_offset_t)offset, size, FALSE, 0);
 784
 785         vs_finish_write(vs);
 786
 787         /* temporary, need a finer lock based on cluster */
 788
 789         VS_LOCK(vs);
 790         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 791         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 792                 vs->vs_waiting_async = FALSE;
 793                 VS_UNLOCK(vs);
 794                 thread_wakeup(&vs->vs_async_pending);
 795         } else {
 796                 VS_UNLOCK(vs);
 797         }
 798
 799
 800         return KERN_SUCCESS;
 801 }
 802
 803 /*
 804  * Routine:     default_pager_memory_object_create
 805  * Purpose:
 806  *      Handle requests for memory objects from the
 807  *      kernel.
 808  * Notes:
 809  *      Because we only give out the default memory
 810  *      manager port to the kernel, we don't have to
 811  *      be so paranoid about the contents.
 812  */
 813 kern_return_t
 814 default_pager_memory_object_create(
 815         __unused memory_object_default_t        dmm,
 816         vm_size_t               new_size,
 817         memory_object_t         *new_mem_obj)
 818 {
 819         vstruct_t               vs;
 820
 821         assert(dmm == default_pager_object);
 822
 823         vs = vs_object_create(new_size);
 824         if (vs == VSTRUCT_NULL)
 825                 return KERN_RESOURCE_SHORTAGE;
 826
 827         vs->vs_next_seqno = 0;
 828
 829         /*
 830          * Set up associations between this memory object
 831          * and this default_pager structure
 832          */
 833
 834         vs->vs_pager_ops = &default_pager_ops;
 835         vs->vs_mem_obj_ikot = IKOT_MEMORY_OBJECT;
 836
 837         /*
 838          * After this, other threads might receive requests
 839          * for this memory object or find it in the port list.
 840          */
 841
 842         vstruct_list_insert(vs);
 843         *new_mem_obj = vs_to_mem_obj(vs);
 844         return KERN_SUCCESS;
 845 }
 846
 847 /*
 848  * Create an external object.
 849  */
 850 kern_return_t
 851 default_pager_object_create(
 852         default_pager_t default_pager,
 853         vm_size_t       size,
 854         memory_object_t *mem_objp)
 855 {
 856         vstruct_t       vs;
 857
 858         if (default_pager != default_pager_object)
 859                 return KERN_INVALID_ARGUMENT;
 860
 861         vs = vs_object_create(size);
 862         if (vs == VSTRUCT_NULL)
 863                 return KERN_RESOURCE_SHORTAGE;
 864
 865         /*
 866          * Set up associations between the default pager
 867          * and this vstruct structure
 868          */
 869         vs->vs_pager_ops = &default_pager_ops;
 870         vstruct_list_insert(vs);
 871         *mem_objp = vs_to_mem_obj(vs);
 872         return KERN_SUCCESS;
 873 }
 874
 875 kern_return_t
 876 default_pager_objects(
 877         default_pager_t                 default_pager,
 878         default_pager_object_array_t    *objectsp,
 879         mach_msg_type_number_t          *ocountp,
 880         mach_port_array_t               *portsp,
 881         mach_msg_type_number_t          *pcountp)
 882 {
 883         vm_offset_t             oaddr = 0;      /* memory for objects */
 884         vm_size_t               osize = 0;      /* current size */
 885         default_pager_object_t  * objects;
 886         unsigned int            opotential = 0;
 887
 888         vm_map_copy_t           pcopy = 0;      /* copy handle for pagers */
 889         vm_size_t               psize = 0;      /* current size */
 890         memory_object_t         * pagers;
 891         unsigned int            ppotential = 0;
 892
 893         unsigned int            actual;
 894         unsigned int            num_objects;
 895         kern_return_t           kr;
 896         vstruct_t               entry;
 897
 898         if (default_pager != default_pager_object)
 899                 return KERN_INVALID_ARGUMENT;
 900
 901         /*
 902          * We will send no more than this many
 903          */
 904         actual = vstruct_list.vsl_count;
 905
 906         /*
 907          * Out out-of-line port arrays are simply kalloc'ed.
 908          */
 909         psize = round_page(actual * sizeof * pagers);
 910         ppotential = psize / sizeof * pagers;
 911         pagers = (memory_object_t *)kalloc(psize);
 912         if (0 == pagers)
 913                 return KERN_RESOURCE_SHORTAGE;
 914
 915         /*
 916          * returned out of line data must be allocated out
 917          * the ipc_kernel_map, wired down, filled in, and
 918          * then "copied in" as if it had been sent by a
 919          * user process.
 920          */
 921         osize = round_page(actual * sizeof * objects);
 922         opotential = osize / sizeof * objects;
 923         kr = kmem_alloc(ipc_kernel_map, &oaddr, osize);
 924         if (KERN_SUCCESS != kr) {
 925                 kfree(pagers, psize);
 926                 return KERN_RESOURCE_SHORTAGE;
 927         }
 928         objects = (default_pager_object_t *)oaddr;
 929
 930
 931         /*
 932          * Now scan the list.
 933          */
 934
 935         VSL_LOCK();
 936
 937         num_objects = 0;
 938         queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t, vs_links) {
 939
 940                 memory_object_t                 pager;
 941                 vm_size_t                       size;
 942
 943                 if ((num_objects >= opotential) ||
 944                     (num_objects >= ppotential)) {
 945
 946                         /*
 947                          * This should be rare.  In any case,
 948                          * we will only miss recent objects,
 949                          * because they are added at the end.
 950                          */
 951                         break;
 952                 }
 953
 954                 /*
 955                  * Avoid interfering with normal operations
 956                  */
 957                 if (!VS_MAP_TRY_LOCK(entry))
 958                         goto not_this_one;
 959                 size = ps_vstruct_allocated_size(entry);
 960                 VS_MAP_UNLOCK(entry);
 961
 962                 VS_LOCK(entry);
 963
 964                 /*
 965                  * We need a reference for our caller.  Adding this
 966                  * reference through the linked list could race with
 967                  * destruction of the object.  If we find the object
 968                  * has no references, just give up on it.
 969                  */
 970                 VS_LOCK(entry);
 971                 if (entry->vs_references == 0) {
 972                         VS_UNLOCK(entry);
 973                         goto not_this_one;
 974                 }
 975                 pager = vs_to_mem_obj(entry);
 976                 dp_memory_object_reference(pager);
 977                 VS_UNLOCK(entry);
 978
 979                 /* the arrays are wired, so no deadlock worries */
 980
 981                 objects[num_objects].dpo_object = (vm_offset_t) entry;
 982                 objects[num_objects].dpo_size = size;
 983                 pagers [num_objects++] = pager;
 984                 continue;
 985
 986             not_this_one:
 987                 /*
 988                  * Do not return garbage
 989                  */
 990                 objects[num_objects].dpo_object = (vm_offset_t) 0;
 991                 objects[num_objects].dpo_size = 0;
 992                 pagers[num_objects++] = MEMORY_OBJECT_NULL;
 993
 994         }
 995
 996         VSL_UNLOCK();
 997
 998         /* clear out any excess allocation */
 999         while (num_objects < opotential) {
1000                 objects[--opotential].dpo_object = (vm_offset_t) 0;
1001                 objects[opotential].dpo_size = 0;
1002         }
1003         while (num_objects < ppotential) {
1004                 pagers[--ppotential] = MEMORY_OBJECT_NULL;
1005         }
1006
1007         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(oaddr),
1008                            vm_map_round_page(oaddr + osize), FALSE);
1009         assert(KERN_SUCCESS == kr);
1010         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)oaddr,
1011                            (vm_map_size_t)osize, TRUE, &pcopy);
1012         assert(KERN_SUCCESS == kr);
1013
1014         *objectsp = (default_pager_object_array_t)objects;
1015         *ocountp = num_objects;
1016         *portsp = (mach_port_array_t)pcopy;
1017         *pcountp = num_objects;
1018
1019         return KERN_SUCCESS;
1020 }
1021
1022 kern_return_t
1023 default_pager_object_pages(
1024         default_pager_t         default_pager,
1025         mach_port_t                     memory_object,
1026         default_pager_page_array_t      *pagesp,
1027         mach_msg_type_number_t          *countp)
1028 {
1029         vm_offset_t                     addr = 0; /* memory for page offsets */
1030         vm_size_t                       size = 0; /* current memory size */
1031         vm_map_copy_t                   copy;
1032         default_pager_page_t            * pages = 0;
1033         unsigned int                    potential;
1034         unsigned int                    actual;
1035         kern_return_t                   kr;
1036         memory_object_t                 object;
1037
1038         if (default_pager != default_pager_object)
1039                 return KERN_INVALID_ARGUMENT;
1040
1041         object = (memory_object_t) memory_object;
1042
1043         potential = 0;
1044         for (;;) {
1045                 vstruct_t       entry;
1046
1047                 VSL_LOCK();
1048                 queue_iterate(&vstruct_list.vsl_queue, entry, vstruct_t,
1049                               vs_links) {
1050                         VS_LOCK(entry);
1051                         if (vs_to_mem_obj(entry) == object) {
1052                                 VSL_UNLOCK();
1053                                 goto found_object;
1054                         }
1055                         VS_UNLOCK(entry);
1056                 }
1057                 VSL_UNLOCK();
1058
1059                 /* did not find the object */
1060                 if (0 != addr)
1061                         kmem_free(ipc_kernel_map, addr, size);
1062
1063                 return KERN_INVALID_ARGUMENT;
1064
1065             found_object:
1066
1067                 if (!VS_MAP_TRY_LOCK(entry)) {
1068                         /* oh well bad luck */
1069                         int wresult;
1070
1071                         VS_UNLOCK(entry);
1072
1073                         assert_wait_timeout((event_t)assert_wait_timeout, THREAD_UNINT, 1, 1000*NSEC_PER_USEC);
1074                         wresult = thread_block(THREAD_CONTINUE_NULL);
1075                         assert(wresult == THREAD_TIMED_OUT);
1076                         continue;
1077                 }
1078
1079                 actual = ps_vstruct_allocated_pages(entry, pages, potential);
1080                 VS_MAP_UNLOCK(entry);
1081                 VS_UNLOCK(entry);
1082
1083                 if (actual <= potential)
1084                         break;
1085
1086                 /* allocate more memory */
1087                 if (0 != addr)
1088                         kmem_free(ipc_kernel_map, addr, size);
1089
1090                 size = round_page(actual * sizeof * pages);
1091                 kr = kmem_alloc(ipc_kernel_map, &addr, size);
1092                 if (KERN_SUCCESS != kr)
1093                         return KERN_RESOURCE_SHORTAGE;
1094
1095                 pages = (default_pager_page_t *)addr;
1096                 potential = size / sizeof * pages;
1097         }
1098
1099         /*
1100          * Clear unused memory.
1101          */
1102         while (actual < potential)
1103                 pages[--potential].dpp_offset = 0;
1104
1105         kr = vm_map_unwire(ipc_kernel_map, vm_map_trunc_page(addr),
1106                            vm_map_round_page(addr + size), FALSE);
1107         assert(KERN_SUCCESS == kr);
1108         kr = vm_map_copyin(ipc_kernel_map, (vm_map_address_t)addr,
1109                            (vm_map_size_t)size, TRUE, &copy);
1110         assert(KERN_SUCCESS == kr);
1111
1112
1113         *pagesp = (default_pager_page_array_t)copy;
1114         *countp = actual;
1115         return KERN_SUCCESS;
1116 }