osfmk/vm/memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*
  24  * @OSF_COPYRIGHT@
  25  */
  26 /*
  27  * Mach Operating System
  28  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  29  * All Rights Reserved.
  30  *
  31  * Permission to use, copy, modify and distribute this software and its
  32  * documentation is hereby granted, provided that both the copyright
  33  * notice and this permission notice appear in all copies of the
  34  * software, derivative works or modified versions, and any portions
  35  * thereof, and that both notices appear in supporting documentation.
  36  *
  37  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  38  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  39  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  40  *
  41  * Carnegie Mellon requests users of this software to return to
  42  *
  43  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  44  *  School of Computer Science
  45  *  Carnegie Mellon University
  46  *  Pittsburgh PA 15213-3890
  47  *
  48  * any improvements or extensions that they make and grant Carnegie Mellon
  49  * the rights to redistribute these changes.
  50  */
  51 /*
  52  */
  53 /*
  54  *      File:   vm/memory_object.c
  55  *      Author: Michael Wayne Young
  56  *
  57  *      External memory management interface control functions.
  58  */
  59
  60 #include <advisory_pageout.h>
  61
  62 /*
  63  *      Interface dependencies:
  64  */
  65
  66 #include <mach/std_types.h>     /* For pointer_t */
  67 #include <mach/mach_types.h>
  68
  69 #include <mach/mig.h>
  70 #include <mach/kern_return.h>
  71 #include <mach/memory_object.h>
  72 #include <mach/memory_object_default.h>
  73 #include <mach/memory_object_control_server.h>
  74 #include <mach/host_priv_server.h>
  75 #include <mach/boolean.h>
  76 #include <mach/vm_prot.h>
  77 #include <mach/message.h>
  78
  79 /*
  80  *      Implementation dependencies:
  81  */
  82 #include <string.h>             /* For memcpy() */
  83
  84 #include <kern/xpr.h>
  85 #include <kern/host.h>
  86 #include <kern/thread.h>        /* For current_thread() */
  87 #include <kern/ipc_mig.h>
  88 #include <kern/misc_protos.h>
  89
  90 #include <vm/vm_object.h>
  91 #include <vm/vm_fault.h>
  92 #include <vm/memory_object.h>
  93 #include <vm/vm_page.h>
  94 #include <vm/vm_pageout.h>
  95 #include <vm/pmap.h>            /* For pmap_clear_modify */
  96 #include <vm/vm_kern.h>         /* For kernel_map, vm_move */
  97 #include <vm/vm_map.h>          /* For vm_map_pageable */
  98
  99 #if     MACH_PAGEMAP
 100 #include <vm/vm_external.h>
 101 #endif  /* MACH_PAGEMAP */
 102
 103 #include <vm/vm_protos.h>
 104
 105
 106 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
 107 vm_size_t               memory_manager_default_cluster = 0;
 108 decl_mutex_data(,       memory_manager_default_lock)
 109
 110
 111 /*
 112  *      Routine:        memory_object_should_return_page
 113  *
 114  *      Description:
 115  *              Determine whether the given page should be returned,
 116  *              based on the page's state and on the given return policy.
 117  *
 118  *              We should return the page if one of the following is true:
 119  *
 120  *              1. Page is dirty and should_return is not RETURN_NONE.
 121  *              2. Page is precious and should_return is RETURN_ALL.
 122  *              3. Should_return is RETURN_ANYTHING.
 123  *
 124  *              As a side effect, m->dirty will be made consistent
 125  *              with pmap_is_modified(m), if should_return is not
 126  *              MEMORY_OBJECT_RETURN_NONE.
 127  */
 128
 129 #define memory_object_should_return_page(m, should_return) \
 130     (should_return != MEMORY_OBJECT_RETURN_NONE && \
 131      (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
 132       ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
 133       (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
 134
 135 typedef int     memory_object_lock_result_t;
 136
 137 #define MEMORY_OBJECT_LOCK_RESULT_DONE          0
 138 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    1
 139 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN    2
 140 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   3
 141
 142 memory_object_lock_result_t memory_object_lock_page(
 143                                 vm_page_t               m,
 144                                 memory_object_return_t  should_return,
 145                                 boolean_t               should_flush,
 146                                 vm_prot_t               prot);
 147
 148 /*
 149  *      Routine:        memory_object_lock_page
 150  *
 151  *      Description:
 152  *              Perform the appropriate lock operations on the
 153  *              given page.  See the description of
 154  *              "memory_object_lock_request" for the meanings
 155  *              of the arguments.
 156  *
 157  *              Returns an indication that the operation
 158  *              completed, blocked, or that the page must
 159  *              be cleaned.
 160  */
 161 memory_object_lock_result_t
 162 memory_object_lock_page(
 163         vm_page_t               m,
 164         memory_object_return_t  should_return,
 165         boolean_t               should_flush,
 166         vm_prot_t               prot)
 167 {
 168         XPR(XPR_MEMORY_OBJECT,
 169             "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
 170             (integer_t)m, should_return, should_flush, prot, 0);
 171
 172         /*
 173          *      If we cannot change access to the page,
 174          *      either because a mapping is in progress
 175          *      (busy page) or because a mapping has been
 176          *      wired, then give up.
 177          */
 178
 179         if (m->busy || m->cleaning)
 180                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 181
 182         /*
 183          *      Don't worry about pages for which the kernel
 184          *      does not have any data.
 185          */
 186
 187         if (m->absent || m->error || m->restart) {
 188                 if(m->error && should_flush) {
 189                         /* dump the page, pager wants us to */
 190                         /* clean it up and there is no      */
 191                         /* relevant data to return */
 192                         if(m->wire_count == 0) {
 193                                 VM_PAGE_FREE(m);
 194                                 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 195                         }
 196                 } else {
 197                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 198                 }
 199         }
 200
 201         assert(!m->fictitious);
 202
 203         if (m->wire_count != 0) {
 204                 /*
 205                  *      If no change would take place
 206                  *      anyway, return successfully.
 207                  *
 208                  *      No change means:
 209                  *              Not flushing AND
 210                  *              No change to page lock [2 checks]  AND
 211                  *              Should not return page
 212                  *
 213                  * XXX  This doesn't handle sending a copy of a wired
 214                  * XXX  page to the pager, but that will require some
 215                  * XXX  significant surgery.
 216                  */
 217                 if (!should_flush &&
 218                     (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
 219                     ! memory_object_should_return_page(m, should_return)) {
 220
 221                         /*
 222                          *      Restart page unlock requests,
 223                          *      even though no change took place.
 224                          *      [Memory managers may be expecting
 225                          *      to see new requests.]
 226                          */
 227                         m->unlock_request = VM_PROT_NONE;
 228                         PAGE_WAKEUP(m);
 229
 230                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 231                 }
 232
 233                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 234         }
 235
 236         /*
 237          *      If the page is to be flushed, allow
 238          *      that to be done as part of the protection.
 239          */
 240
 241         if (should_flush)
 242                 prot = VM_PROT_ALL;
 243
 244         /*
 245          *      Set the page lock.
 246          *
 247          *      If we are decreasing permission, do it now;
 248          *      let the fault handler take care of increases
 249          *      (pmap_page_protect may not increase protection).
 250          */
 251
 252         if (prot != VM_PROT_NO_CHANGE) {
 253                 if ((m->page_lock ^ prot) & prot) {
 254                         pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
 255                 }
 256 #if 0
 257                 /* code associated with the vestigial
 258                  * memory_object_data_unlock
 259                  */
 260                 m->page_lock = prot;
 261                 m->lock_supplied = TRUE;
 262                 if (prot != VM_PROT_NONE)
 263                         m->unusual = TRUE;
 264                 else
 265                         m->unusual = FALSE;
 266
 267                 /*
 268                  *      Restart any past unlock requests, even if no
 269                  *      change resulted.  If the manager explicitly
 270                  *      requested no protection change, then it is assumed
 271                  *      to be remembering past requests.
 272                  */
 273
 274                 m->unlock_request = VM_PROT_NONE;
 275 #endif /* 0 */
 276                 PAGE_WAKEUP(m);
 277         }
 278
 279         /*
 280          *      Handle page returning.
 281          */
 282
 283         if (memory_object_should_return_page(m, should_return)) {
 284
 285                 /*
 286                  *      If we weren't planning
 287                  *      to flush the page anyway,
 288                  *      we may need to remove the
 289                  *      page from the pageout
 290                  *      system and from physical
 291                  *      maps now.
 292                  */
 293
 294                 vm_page_lock_queues();
 295                 VM_PAGE_QUEUES_REMOVE(m);
 296                 vm_page_unlock_queues();
 297
 298                 if (!should_flush)
 299                         pmap_disconnect(m->phys_page);
 300
 301                 if (m->dirty)
 302                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
 303                 else
 304                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 305         }
 306
 307         /*
 308          *      Handle flushing
 309          */
 310
 311         if (should_flush) {
 312                 VM_PAGE_FREE(m);
 313         } else {
 314                 /*
 315                  *      XXX Make clean but not flush a paging hint,
 316                  *      and deactivate the pages.  This is a hack
 317                  *      because it overloads flush/clean with
 318                  *      implementation-dependent meaning.  This only
 319                  *      happens to pages that are already clean.
 320                  */
 321
 322                 if (vm_page_deactivate_hint &&
 323                     (should_return != MEMORY_OBJECT_RETURN_NONE)) {
 324                         vm_page_lock_queues();
 325                         vm_page_deactivate(m);
 326                         vm_page_unlock_queues();
 327                 }
 328         }
 329
 330         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 331 }
 332
 333 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync)    \
 334 MACRO_BEGIN                                                             \
 335                                                                         \
 336         register int            upl_flags;                              \
 337                                                                         \
 338         vm_object_unlock(object);                                       \
 339                                                                         \
 340                 if (iosync)                                             \
 341                         upl_flags = UPL_MSYNC | UPL_IOSYNC;             \
 342                 else                                                    \
 343                         upl_flags = UPL_MSYNC;                          \
 344                                                                         \
 345                 (void) memory_object_data_return(object->pager,         \
 346                 po,                                                     \
 347                 data_cnt,                                               \
 348                 ro,                                                     \
 349                 ioerr,                                                  \
 350                 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN),       \
 351                 !should_flush,                                          \
 352                 upl_flags);                                             \
 353                                                                         \
 354         vm_object_lock(object);                                         \
 355 MACRO_END
 356
 357 /*
 358  *      Routine:        memory_object_lock_request [user interface]
 359  *
 360  *      Description:
 361  *              Control use of the data associated with the given
 362  *              memory object.  For each page in the given range,
 363  *              perform the following operations, in order:
 364  *                      1)  restrict access to the page (disallow
 365  *                          forms specified by "prot");
 366  *                      2)  return data to the manager (if "should_return"
 367  *                          is RETURN_DIRTY and the page is dirty, or
 368  *                          "should_return" is RETURN_ALL and the page
 369  *                          is either dirty or precious); and,
 370  *                      3)  flush the cached copy (if "should_flush"
 371  *                          is asserted).
 372  *              The set of pages is defined by a starting offset
 373  *              ("offset") and size ("size").  Only pages with the
 374  *              same page alignment as the starting offset are
 375  *              considered.
 376  *
 377  *              A single acknowledgement is sent (to the "reply_to"
 378  *              port) when these actions are complete.  If successful,
 379  *              the naked send right for reply_to is consumed.
 380  */
 381
 382 kern_return_t
 383 memory_object_lock_request(
 384         memory_object_control_t         control,
 385         memory_object_offset_t          offset,
 386         memory_object_size_t            size,
 387         memory_object_offset_t  *       resid_offset,
 388         int                     *       io_errno,
 389         memory_object_return_t          should_return,
 390         int                             flags,
 391         vm_prot_t                       prot)
 392 {
 393         vm_object_t     object;
 394         __unused boolean_t should_flush;
 395
 396         should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
 397
 398         XPR(XPR_MEMORY_OBJECT,
 399             "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
 400             (integer_t)control, offset, size,
 401             (((should_return&1)<<1)|should_flush), prot);
 402
 403         /*
 404          *      Check for bogus arguments.
 405          */
 406         object = memory_object_control_to_vm_object(control);
 407         if (object == VM_OBJECT_NULL)
 408                 return (KERN_INVALID_ARGUMENT);
 409
 410         if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
 411                 return (KERN_INVALID_ARGUMENT);
 412
 413         size = round_page_64(size);
 414
 415         /*
 416          *      Lock the object, and acquire a paging reference to
 417          *      prevent the memory_object reference from being released.
 418          */
 419         vm_object_lock(object);
 420         vm_object_paging_begin(object);
 421         offset -= object->paging_offset;
 422
 423         (void)vm_object_update(object,
 424                 offset, size, resid_offset, io_errno, should_return, flags, prot);
 425
 426         vm_object_paging_end(object);
 427         vm_object_unlock(object);
 428
 429         return (KERN_SUCCESS);
 430 }
 431
 432 /*
 433  *      memory_object_release_name:  [interface]
 434  *
 435  *      Enforces name semantic on memory_object reference count decrement
 436  *      This routine should not be called unless the caller holds a name
 437  *      reference gained through the memory_object_named_create or the
 438  *      memory_object_rename call.
 439  *      If the TERMINATE_IDLE flag is set, the call will return if the
 440  *      reference count is not 1. i.e. idle with the only remaining reference
 441  *      being the name.
 442  *      If the decision is made to proceed the name field flag is set to
 443  *      false and the reference count is decremented.  If the RESPECT_CACHE
 444  *      flag is set and the reference count has gone to zero, the
 445  *      memory_object is checked to see if it is cacheable otherwise when
 446  *      the reference count is zero, it is simply terminated.
 447  */
 448
 449 kern_return_t
 450 memory_object_release_name(
 451         memory_object_control_t control,
 452         int                             flags)
 453 {
 454         vm_object_t     object;
 455
 456         object = memory_object_control_to_vm_object(control);
 457         if (object == VM_OBJECT_NULL)
 458                 return (KERN_INVALID_ARGUMENT);
 459
 460         return vm_object_release_name(object, flags);
 461 }
 462
 463
 464
 465 /*
 466  *      Routine:        memory_object_destroy [user interface]
 467  *      Purpose:
 468  *              Shut down a memory object, despite the
 469  *              presence of address map (or other) references
 470  *              to the vm_object.
 471  */
 472 kern_return_t
 473 memory_object_destroy(
 474         memory_object_control_t control,
 475         kern_return_t           reason)
 476 {
 477         vm_object_t             object;
 478
 479         object = memory_object_control_to_vm_object(control);
 480         if (object == VM_OBJECT_NULL)
 481                 return (KERN_INVALID_ARGUMENT);
 482
 483         return (vm_object_destroy(object, reason));
 484 }
 485
 486 /*
 487  *      Routine:        vm_object_sync
 488  *
 489  *      Kernel internal function to synch out pages in a given
 490  *      range within an object to its memory manager.  Much the
 491  *      same as memory_object_lock_request but page protection
 492  *      is not changed.
 493  *
 494  *      If the should_flush and should_return flags are true pages
 495  *      are flushed, that is dirty & precious pages are written to
 496  *      the memory manager and then discarded.  If should_return
 497  *      is false, only precious pages are returned to the memory
 498  *      manager.
 499  *
 500  *      If should flush is false and should_return true, the memory
 501  *      manager's copy of the pages is updated.  If should_return
 502  *      is also false, only the precious pages are updated.  This
 503  *      last option is of limited utility.
 504  *
 505  *      Returns:
 506  *      FALSE           if no pages were returned to the pager
 507  *      TRUE            otherwise.
 508  */
 509
 510 boolean_t
 511 vm_object_sync(
 512         vm_object_t             object,
 513         vm_object_offset_t      offset,
 514         vm_object_size_t        size,
 515         boolean_t               should_flush,
 516         boolean_t               should_return,
 517         boolean_t               should_iosync)
 518 {
 519         boolean_t       rv;
 520         int             flags;
 521
 522         XPR(XPR_VM_OBJECT,
 523             "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
 524             (integer_t)object, offset, size, should_flush, should_return);
 525
 526         /*
 527          * Lock the object, and acquire a paging reference to
 528          * prevent the memory_object and control ports from
 529          * being destroyed.
 530          */
 531         vm_object_lock(object);
 532         vm_object_paging_begin(object);
 533
 534         if (should_flush)
 535                 flags = MEMORY_OBJECT_DATA_FLUSH;
 536         else
 537                 flags = 0;
 538
 539         if (should_iosync)
 540                 flags |= MEMORY_OBJECT_IO_SYNC;
 541
 542         rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
 543                 (should_return) ?
 544                         MEMORY_OBJECT_RETURN_ALL :
 545                         MEMORY_OBJECT_RETURN_NONE,
 546                 flags,
 547                 VM_PROT_NO_CHANGE);
 548
 549
 550         vm_object_paging_end(object);
 551         vm_object_unlock(object);
 552         return rv;
 553 }
 554
 555
 556
 557
 558 static int
 559 vm_object_update_extent(
 560         vm_object_t             object,
 561         vm_object_offset_t      offset,
 562         vm_object_offset_t      offset_end,
 563         vm_object_offset_t      *offset_resid,
 564         int                     *io_errno,
 565         boolean_t               should_flush,
 566         memory_object_return_t  should_return,
 567         boolean_t               should_iosync,
 568         vm_prot_t               prot)
 569 {
 570         vm_page_t       m;
 571         int             retval = 0;
 572         vm_size_t       data_cnt = 0;
 573         vm_object_offset_t      paging_offset = 0;
 574         vm_object_offset_t      last_offset = offset;
 575         memory_object_lock_result_t     page_lock_result;
 576         memory_object_lock_result_t     pageout_action;
 577
 578         pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
 579
 580         for (;
 581              offset < offset_end && object->resident_page_count;
 582              offset += PAGE_SIZE_64) {
 583
 584                 /*
 585                  * Limit the number of pages to be cleaned at once.
 586                  */
 587                 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
 588                         LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
 589                                                pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 590                         data_cnt = 0;
 591                 }
 592
 593                 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
 594                         page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
 595
 596                         XPR(XPR_MEMORY_OBJECT,
 597                             "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
 598                             (integer_t)object, offset, page_lock_result, 0, 0);
 599
 600                         switch (page_lock_result)
 601                         {
 602                           case MEMORY_OBJECT_LOCK_RESULT_DONE:
 603                             /*
 604                              *  End of a cluster of dirty pages.
 605                              */
 606                             if (data_cnt) {
 607                                     LIST_REQ_PAGEOUT_PAGES(object,
 608                                                            data_cnt, pageout_action,
 609                                                            paging_offset, offset_resid, io_errno, should_iosync);
 610                                     data_cnt = 0;
 611                                     continue;
 612                             }
 613                             break;
 614
 615                           case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
 616                             /*
 617                              *  Since it is necessary to block,
 618                              *  clean any dirty pages now.
 619                              */
 620                             if (data_cnt) {
 621                                     LIST_REQ_PAGEOUT_PAGES(object,
 622                                                            data_cnt, pageout_action,
 623                                                            paging_offset, offset_resid, io_errno, should_iosync);
 624                                     data_cnt = 0;
 625                                     continue;
 626                             }
 627                             PAGE_SLEEP(object, m, THREAD_UNINT);
 628                             continue;
 629
 630                           case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
 631                           case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
 632                             /*
 633                              * The clean and return cases are similar.
 634                              *
 635                              * if this would form a discontiguous block,
 636                              * clean the old pages and start anew.
 637                              *
 638                              * Mark the page busy since we will unlock the
 639                              * object if we issue the LIST_REQ_PAGEOUT
 640                              */
 641                             m->busy = TRUE;
 642                             if (data_cnt &&
 643                                 ((last_offset != offset) || (pageout_action != page_lock_result))) {
 644                                     LIST_REQ_PAGEOUT_PAGES(object,
 645                                                            data_cnt, pageout_action,
 646                                                            paging_offset, offset_resid, io_errno, should_iosync);
 647                                     data_cnt = 0;
 648                             }
 649                             m->busy = FALSE;
 650
 651                             if (m->cleaning) {
 652                                     PAGE_SLEEP(object, m, THREAD_UNINT);
 653                                     continue;
 654                             }
 655                             if (data_cnt == 0) {
 656                                     pageout_action = page_lock_result;
 657                                     paging_offset = offset;
 658                             }
 659                             data_cnt += PAGE_SIZE;
 660                             last_offset = offset + PAGE_SIZE_64;
 661
 662                             vm_page_lock_queues();
 663                             /*
 664                              * Clean
 665                              */
 666                             m->list_req_pending = TRUE;
 667                             m->cleaning = TRUE;
 668
 669                             if (should_flush) {
 670                                     /*
 671                                      * and add additional state
 672                                      * for the flush
 673                                      */
 674                                     m->busy = TRUE;
 675                                     m->pageout = TRUE;
 676                                     vm_page_wire(m);
 677                             }
 678                             vm_page_unlock_queues();
 679
 680                             retval = 1;
 681                             break;
 682                         }
 683                         break;
 684                 }
 685         }
 686         /*
 687          *      We have completed the scan for applicable pages.
 688          *      Clean any pages that have been saved.
 689          */
 690         if (data_cnt) {
 691                 LIST_REQ_PAGEOUT_PAGES(object,
 692                                        data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 693         }
 694         return (retval);
 695 }
 696
 697
 698
 699 /*
 700  *      Routine:        vm_object_update
 701  *      Description:
 702  *              Work function for m_o_lock_request(), vm_o_sync().
 703  *
 704  *              Called with object locked and paging ref taken.
 705  */
 706 kern_return_t
 707 vm_object_update(
 708         register vm_object_t            object,
 709         register vm_object_offset_t     offset,
 710         register vm_object_size_t       size,
 711         register vm_object_offset_t     *resid_offset,
 712         int                             *io_errno,
 713         memory_object_return_t          should_return,
 714         int                             flags,
 715         vm_prot_t                       protection)
 716 {
 717         vm_object_t             copy_object;
 718         boolean_t               data_returned = FALSE;
 719         boolean_t               update_cow;
 720         boolean_t               should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
 721         boolean_t               should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
 722         int                     num_of_extents;
 723         int                     n;
 724 #define MAX_EXTENTS     8
 725 #define EXTENT_SIZE     (1024 * 1024 * 256)
 726 #define RESIDENT_LIMIT  (1024 * 32)
 727         struct extent {
 728                 vm_object_offset_t e_base;
 729                 vm_object_offset_t e_min;
 730                 vm_object_offset_t e_max;
 731         } extents[MAX_EXTENTS];
 732
 733         /*
 734          *      To avoid blocking while scanning for pages, save
 735          *      dirty pages to be cleaned all at once.
 736          *
 737          *      XXXO A similar strategy could be used to limit the
 738          *      number of times that a scan must be restarted for
 739          *      other reasons.  Those pages that would require blocking
 740          *      could be temporarily collected in another list, or
 741          *      their offsets could be recorded in a small array.
 742          */
 743
 744         /*
 745          * XXX  NOTE: May want to consider converting this to a page list
 746          * XXX  vm_map_copy interface.  Need to understand object
 747          * XXX  coalescing implications before doing so.
 748          */
 749
 750         update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
 751                         && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
 752                                         !(flags & MEMORY_OBJECT_DATA_PURGE)))
 753                                 || (flags & MEMORY_OBJECT_COPY_SYNC);
 754
 755
 756         if((((copy_object = object->copy) != NULL) && update_cow) ||
 757                                         (flags & MEMORY_OBJECT_DATA_SYNC)) {
 758                 vm_map_size_t           i;
 759                 vm_map_size_t           copy_size;
 760                 vm_map_offset_t         copy_offset;
 761                 vm_prot_t               prot;
 762                 vm_page_t               page;
 763                 vm_page_t               top_page;
 764                 kern_return_t           error = 0;
 765
 766                 if(copy_object != NULL) {
 767                    /* translate offset with respect to shadow's offset */
 768                    copy_offset = (offset >= copy_object->shadow_offset)?
 769                         (vm_map_offset_t)(offset - copy_object->shadow_offset) :
 770                         (vm_map_offset_t) 0;
 771                    if(copy_offset > copy_object->size)
 772                         copy_offset = copy_object->size;
 773
 774                    /* clip size with respect to shadow offset */
 775                    if (offset >= copy_object->shadow_offset) {
 776                            copy_size = size;
 777                    } else if (size >= copy_object->shadow_offset - offset) {
 778                            copy_size = size -
 779                                    (copy_object->shadow_offset - offset);
 780                    } else {
 781                            copy_size = 0;
 782                    }
 783
 784                    if (copy_offset + copy_size > copy_object->size) {
 785                            if (copy_object->size >= copy_offset) {
 786                                    copy_size = copy_object->size - copy_offset;
 787                            } else {
 788                                    copy_size = 0;
 789                            }
 790                    }
 791
 792                    copy_size+=copy_offset;
 793
 794                    vm_object_unlock(object);
 795                    vm_object_lock(copy_object);
 796                 } else {
 797                         copy_object = object;
 798
 799                         copy_size   = offset + size;
 800                         copy_offset = offset;
 801                 }
 802
 803                 vm_object_paging_begin(copy_object);
 804                 for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
 805         RETRY_COW_OF_LOCK_REQUEST:
 806                         prot =  VM_PROT_WRITE|VM_PROT_READ;
 807                         switch (vm_fault_page(copy_object, i,
 808                                 VM_PROT_WRITE|VM_PROT_READ,
 809                                 FALSE,
 810                                 THREAD_UNINT,
 811                                 copy_offset,
 812                                 copy_offset+copy_size,
 813                                 VM_BEHAVIOR_SEQUENTIAL,
 814                                 &prot,
 815                                 &page,
 816                                 &top_page,
 817                                 (int *)0,
 818                                 &error,
 819                                 FALSE,
 820                                 FALSE, NULL, 0)) {
 821
 822                         case VM_FAULT_SUCCESS:
 823                                 if(top_page) {
 824                                         vm_fault_cleanup(
 825                                                 page->object, top_page);
 826                                         PAGE_WAKEUP_DONE(page);
 827                                         vm_page_lock_queues();
 828                                         if (!page->active && !page->inactive)
 829                                                 vm_page_activate(page);
 830                                         vm_page_unlock_queues();
 831                                         vm_object_lock(copy_object);
 832                                         vm_object_paging_begin(copy_object);
 833                                 } else {
 834                                         PAGE_WAKEUP_DONE(page);
 835                                         vm_page_lock_queues();
 836                                         if (!page->active && !page->inactive)
 837                                                 vm_page_activate(page);
 838                                         vm_page_unlock_queues();
 839                                 }
 840                                 break;
 841                         case VM_FAULT_RETRY:
 842                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 843                                 vm_object_lock(copy_object);
 844                                 vm_object_paging_begin(copy_object);
 845                                 goto RETRY_COW_OF_LOCK_REQUEST;
 846                         case VM_FAULT_INTERRUPTED:
 847                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 848                                 vm_object_lock(copy_object);
 849                                 vm_object_paging_begin(copy_object);
 850                                 goto RETRY_COW_OF_LOCK_REQUEST;
 851                         case VM_FAULT_MEMORY_SHORTAGE:
 852                                 VM_PAGE_WAIT();
 853                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 854                                 vm_object_lock(copy_object);
 855                                 vm_object_paging_begin(copy_object);
 856                                 goto RETRY_COW_OF_LOCK_REQUEST;
 857                         case VM_FAULT_FICTITIOUS_SHORTAGE:
 858                                 vm_page_more_fictitious();
 859                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 860                                 vm_object_lock(copy_object);
 861                                 vm_object_paging_begin(copy_object);
 862                                 goto RETRY_COW_OF_LOCK_REQUEST;
 863                         case VM_FAULT_MEMORY_ERROR:
 864                                 vm_object_lock(object);
 865                                 goto BYPASS_COW_COPYIN;
 866                         }
 867
 868                 }
 869                 vm_object_paging_end(copy_object);
 870                 if(copy_object != object) {
 871                         vm_object_unlock(copy_object);
 872                         vm_object_lock(object);
 873                 }
 874         }
 875         if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
 876                         return KERN_SUCCESS;
 877         }
 878         if(((copy_object = object->copy) != NULL) &&
 879                                         (flags & MEMORY_OBJECT_DATA_PURGE)) {
 880                 copy_object->shadow_severed = TRUE;
 881                 copy_object->shadowed = FALSE;
 882                 copy_object->shadow = NULL;
 883                 /* delete the ref the COW was holding on the target object */
 884                 vm_object_deallocate(object);
 885         }
 886 BYPASS_COW_COPYIN:
 887
 888         /*
 889          * when we have a really large range to check relative
 890          * to the number of actual resident pages, we'd like
 891          * to use the resident page list to drive our checks
 892          * however, the object lock will get dropped while processing
 893          * the page which means the resident queue can change which
 894          * means we can't walk the queue as we process the pages
 895          * we also want to do the processing in offset order to allow
 896          * 'runs' of pages to be collected if we're being told to
 897          * flush to disk... the resident page queue is NOT ordered.
 898          *
 899          * a temporary solution (until we figure out how to deal with
 900          * large address spaces more generically) is to pre-flight
 901          * the resident page queue (if it's small enough) and develop
 902          * a collection of extents (that encompass actual resident pages)
 903          * to visit.  This will at least allow us to deal with some of the
 904          * more pathological cases in a more efficient manner.  The current
 905          * worst case (a single resident page at the end of an extremely large
 906          * range) can take minutes to complete for ranges in the terrabyte
 907          * category... since this routine is called when truncating a file,
 908          * and we currently support files up to 16 Tbytes in size, this
 909          * is not a theoretical problem
 910          */
 911
 912         if ((object->resident_page_count < RESIDENT_LIMIT) &&
 913             (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
 914                 vm_page_t               next;
 915                 vm_object_offset_t      start;
 916                 vm_object_offset_t      end;
 917                 vm_object_size_t        e_mask;
 918                 vm_page_t               m;
 919
 920                 start = offset;
 921                 end   = offset + size;
 922                 num_of_extents = 0;
 923                 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
 924
 925                 m = (vm_page_t) queue_first(&object->memq);
 926
 927                 while (!queue_end(&object->memq, (queue_entry_t) m)) {
 928                         next = (vm_page_t) queue_next(&m->listq);
 929
 930                         if ((m->offset >= start) && (m->offset < end)) {
 931                                 /*
 932                                  * this is a page we're interested in
 933                                  * try to fit it into a current extent
 934                                  */
 935                                 for (n = 0; n < num_of_extents; n++) {
 936                                         if ((m->offset & e_mask) == extents[n].e_base) {
 937                                                 /*
 938                                                  * use (PAGE_SIZE - 1) to determine the
 939                                                  * max offset so that we don't wrap if
 940                                                  * we're at the last page of the space
 941                                                  */
 942                                                 if (m->offset < extents[n].e_min)
 943                                                         extents[n].e_min = m->offset;
 944                                                 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
 945                                                         extents[n].e_max = m->offset + (PAGE_SIZE - 1);
 946                                                 break;
 947                                         }
 948                                 }
 949                                 if (n == num_of_extents) {
 950                                         /*
 951                                          * didn't find a current extent that can encompass
 952                                          * this page
 953                                          */
 954                                         if (n < MAX_EXTENTS) {
 955                                                 /*
 956                                                  * if we still have room,
 957                                                  * create a new extent
 958                                                  */
 959                                                 extents[n].e_base = m->offset & e_mask;
 960                                                 extents[n].e_min  = m->offset;
 961                                                 extents[n].e_max  = m->offset + (PAGE_SIZE - 1);
 962
 963                                                 num_of_extents++;
 964                                         } else {
 965                                                 /*
 966                                                  * no room to create a new extent...
 967                                                  * fall back to a single extent based
 968                                                  * on the min and max page offsets
 969                                                  * we find in the range we're interested in...
 970                                                  * first, look through the extent list and
 971                                                  * develop the overall min and max for the
 972                                                  * pages we've looked at up to this point
 973                                                  */
 974                                                 for (n = 1; n < num_of_extents; n++) {
 975                                                         if (extents[n].e_min < extents[0].e_min)
 976                                                                 extents[0].e_min = extents[n].e_min;
 977                                                         if (extents[n].e_max > extents[0].e_max)
 978                                                                 extents[0].e_max = extents[n].e_max;
 979                                                 }
 980                                                 /*
 981                                                  * now setup to run through the remaining pages
 982                                                  * to determine the overall min and max
 983                                                  * offset for the specified range
 984                                                  */
 985                                                 extents[0].e_base = 0;
 986                                                 e_mask = 0;
 987                                                 num_of_extents = 1;
 988
 989                                                 /*
 990                                                  * by continuing, we'll reprocess the
 991                                                  * page that forced us to abandon trying
 992                                                  * to develop multiple extents
 993                                                  */
 994                                                 continue;
 995                                         }
 996                                 }
 997                         }
 998                         m = next;
 999                 }
1000         } else {
1001                 extents[0].e_min = offset;
1002                 extents[0].e_max = offset + (size - 1);
1003
1004                 num_of_extents = 1;
1005         }
1006         for (n = 0; n < num_of_extents; n++) {
1007                 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1008                                             should_flush, should_return, should_iosync, protection))
1009                         data_returned = TRUE;
1010         }
1011         return (data_returned);
1012 }
1013
1014
1015 /*
1016  *      Routine:        memory_object_synchronize_completed [user interface]
1017  *
1018  *      Tell kernel that previously synchronized data
1019  *      (memory_object_synchronize) has been queue or placed on the
1020  *      backing storage.
1021  *
1022  *      Note: there may be multiple synchronize requests for a given
1023  *      memory object outstanding but they will not overlap.
1024  */
1025
1026 kern_return_t
1027 memory_object_synchronize_completed(
1028         memory_object_control_t control,
1029         memory_object_offset_t  offset,
1030         vm_offset_t                     length)
1031 {
1032         vm_object_t                     object;
1033         msync_req_t                     msr;
1034
1035         object = memory_object_control_to_vm_object(control);
1036
1037         XPR(XPR_MEMORY_OBJECT,
1038             "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1039             (integer_t)object, offset, length, 0, 0);
1040
1041         /*
1042          *      Look for bogus arguments
1043          */
1044
1045         if (object == VM_OBJECT_NULL)
1046                 return (KERN_INVALID_ARGUMENT);
1047
1048         vm_object_lock(object);
1049
1050 /*
1051  *      search for sync request structure
1052  */
1053         queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1054                 if (msr->offset == offset && msr->length == length) {
1055                         queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1056                         break;
1057                 }
1058         }/* queue_iterate */
1059
1060         if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1061                 vm_object_unlock(object);
1062                 return KERN_INVALID_ARGUMENT;
1063         }
1064
1065         msr_lock(msr);
1066         vm_object_unlock(object);
1067         msr->flag = VM_MSYNC_DONE;
1068         msr_unlock(msr);
1069         thread_wakeup((event_t) msr);
1070
1071         return KERN_SUCCESS;
1072 }/* memory_object_synchronize_completed */
1073
1074 static kern_return_t
1075 vm_object_set_attributes_common(
1076         vm_object_t     object,
1077         boolean_t       may_cache,
1078         memory_object_copy_strategy_t copy_strategy,
1079         boolean_t       temporary,
1080         memory_object_cluster_size_t    cluster_size,
1081         boolean_t       silent_overwrite,
1082         boolean_t       advisory_pageout)
1083 {
1084         boolean_t       object_became_ready;
1085
1086         XPR(XPR_MEMORY_OBJECT,
1087             "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1088             (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1089
1090         if (object == VM_OBJECT_NULL)
1091                 return(KERN_INVALID_ARGUMENT);
1092
1093         /*
1094          *      Verify the attributes of importance
1095          */
1096
1097         switch(copy_strategy) {
1098                 case MEMORY_OBJECT_COPY_NONE:
1099                 case MEMORY_OBJECT_COPY_DELAY:
1100                         break;
1101                 default:
1102                         return(KERN_INVALID_ARGUMENT);
1103         }
1104
1105 #if     !ADVISORY_PAGEOUT
1106         if (silent_overwrite || advisory_pageout)
1107                 return(KERN_INVALID_ARGUMENT);
1108
1109 #endif  /* !ADVISORY_PAGEOUT */
1110         if (may_cache)
1111                 may_cache = TRUE;
1112         if (temporary)
1113                 temporary = TRUE;
1114         if (cluster_size != 0) {
1115                 int     pages_per_cluster;
1116                 pages_per_cluster = atop_32(cluster_size);
1117                 /*
1118                  * Cluster size must be integral multiple of page size,
1119                  * and be a power of 2 number of pages.
1120                  */
1121                 if ((cluster_size & (PAGE_SIZE-1)) ||
1122                     ((pages_per_cluster-1) & pages_per_cluster))
1123                         return KERN_INVALID_ARGUMENT;
1124         }
1125
1126         vm_object_lock(object);
1127
1128         /*
1129          *      Copy the attributes
1130          */
1131         assert(!object->internal);
1132         object_became_ready = !object->pager_ready;
1133         object->copy_strategy = copy_strategy;
1134         object->can_persist = may_cache;
1135         object->temporary = temporary;
1136         object->silent_overwrite = silent_overwrite;
1137         object->advisory_pageout = advisory_pageout;
1138         if (cluster_size == 0)
1139                 cluster_size = PAGE_SIZE;
1140         object->cluster_size = cluster_size;
1141
1142         assert(cluster_size >= PAGE_SIZE &&
1143                cluster_size % PAGE_SIZE == 0);
1144
1145         /*
1146          *      Wake up anyone waiting for the ready attribute
1147          *      to become asserted.
1148          */
1149
1150         if (object_became_ready) {
1151                 object->pager_ready = TRUE;
1152                 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1153         }
1154
1155         vm_object_unlock(object);
1156
1157         return(KERN_SUCCESS);
1158 }
1159
1160 /*
1161  *      Set the memory object attribute as provided.
1162  *
1163  *      XXX This routine cannot be completed until the vm_msync, clean
1164  *           in place, and cluster work is completed. See ifdef notyet
1165  *           below and note that vm_object_set_attributes_common()
1166  *           may have to be expanded.
1167  */
1168 kern_return_t
1169 memory_object_change_attributes(
1170         memory_object_control_t         control,
1171         memory_object_flavor_t          flavor,
1172         memory_object_info_t            attributes,
1173         mach_msg_type_number_t          count)
1174 {
1175         vm_object_t                     object;
1176         kern_return_t                   result = KERN_SUCCESS;
1177         boolean_t                       temporary;
1178         boolean_t                       may_cache;
1179         boolean_t                       invalidate;
1180         memory_object_cluster_size_t    cluster_size;
1181         memory_object_copy_strategy_t   copy_strategy;
1182         boolean_t                       silent_overwrite;
1183         boolean_t                       advisory_pageout;
1184
1185         object = memory_object_control_to_vm_object(control);
1186         if (object == VM_OBJECT_NULL)
1187                 return (KERN_INVALID_ARGUMENT);
1188
1189         vm_object_lock(object);
1190
1191         temporary = object->temporary;
1192         may_cache = object->can_persist;
1193         copy_strategy = object->copy_strategy;
1194         silent_overwrite = object->silent_overwrite;
1195         advisory_pageout = object->advisory_pageout;
1196 #if notyet
1197         invalidate = object->invalidate;
1198 #endif
1199         cluster_size = object->cluster_size;
1200         vm_object_unlock(object);
1201
1202         switch (flavor) {
1203             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1204             {
1205                 old_memory_object_behave_info_t     behave;
1206
1207                 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1208                         result = KERN_INVALID_ARGUMENT;
1209                         break;
1210                 }
1211
1212                 behave = (old_memory_object_behave_info_t) attributes;
1213
1214                 temporary = behave->temporary;
1215                 invalidate = behave->invalidate;
1216                 copy_strategy = behave->copy_strategy;
1217
1218                 break;
1219             }
1220
1221             case MEMORY_OBJECT_BEHAVIOR_INFO:
1222             {
1223                 memory_object_behave_info_t     behave;
1224
1225                 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1226                         result = KERN_INVALID_ARGUMENT;
1227                         break;
1228                 }
1229
1230                 behave = (memory_object_behave_info_t) attributes;
1231
1232                 temporary = behave->temporary;
1233                 invalidate = behave->invalidate;
1234                 copy_strategy = behave->copy_strategy;
1235                 silent_overwrite = behave->silent_overwrite;
1236                 advisory_pageout = behave->advisory_pageout;
1237                 break;
1238             }
1239
1240             case MEMORY_OBJECT_PERFORMANCE_INFO:
1241             {
1242                 memory_object_perf_info_t       perf;
1243
1244                 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1245                         result = KERN_INVALID_ARGUMENT;
1246                         break;
1247                 }
1248
1249                 perf = (memory_object_perf_info_t) attributes;
1250
1251                 may_cache = perf->may_cache;
1252                 cluster_size = round_page_32(perf->cluster_size);
1253
1254                 break;
1255             }
1256
1257             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1258             {
1259                 old_memory_object_attr_info_t   attr;
1260
1261                 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1262                         result = KERN_INVALID_ARGUMENT;
1263                         break;
1264                 }
1265
1266                 attr = (old_memory_object_attr_info_t) attributes;
1267
1268                 may_cache = attr->may_cache;
1269                 copy_strategy = attr->copy_strategy;
1270                 cluster_size = page_size;
1271
1272                 break;
1273             }
1274
1275             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1276             {
1277                 memory_object_attr_info_t       attr;
1278
1279                 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1280                         result = KERN_INVALID_ARGUMENT;
1281                         break;
1282                 }
1283
1284                 attr = (memory_object_attr_info_t) attributes;
1285
1286                 copy_strategy = attr->copy_strategy;
1287                 may_cache = attr->may_cache_object;
1288                 cluster_size = attr->cluster_size;
1289                 temporary = attr->temporary;
1290
1291                 break;
1292             }
1293
1294             default:
1295                 result = KERN_INVALID_ARGUMENT;
1296                 break;
1297         }
1298
1299         if (result != KERN_SUCCESS)
1300                 return(result);
1301
1302         if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1303                 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1304                 temporary = TRUE;
1305         } else {
1306                 temporary = FALSE;
1307         }
1308
1309         /*
1310          * XXX  may_cache may become a tri-valued variable to handle
1311          * XXX  uncache if not in use.
1312          */
1313         return (vm_object_set_attributes_common(object,
1314                                                      may_cache,
1315                                                      copy_strategy,
1316                                                      temporary,
1317                                                      cluster_size,
1318                                                      silent_overwrite,
1319                                                      advisory_pageout));
1320 }
1321
1322 kern_return_t
1323 memory_object_get_attributes(
1324         memory_object_control_t control,
1325         memory_object_flavor_t  flavor,
1326         memory_object_info_t    attributes,     /* pointer to OUT array */
1327         mach_msg_type_number_t  *count)         /* IN/OUT */
1328 {
1329         kern_return_t           ret = KERN_SUCCESS;
1330         vm_object_t             object;
1331
1332         object = memory_object_control_to_vm_object(control);
1333         if (object == VM_OBJECT_NULL)
1334                 return (KERN_INVALID_ARGUMENT);
1335
1336         vm_object_lock(object);
1337
1338         switch (flavor) {
1339             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1340             {
1341                 old_memory_object_behave_info_t behave;
1342
1343                 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1344                         ret = KERN_INVALID_ARGUMENT;
1345                         break;
1346                 }
1347
1348                 behave = (old_memory_object_behave_info_t) attributes;
1349                 behave->copy_strategy = object->copy_strategy;
1350                 behave->temporary = object->temporary;
1351 #if notyet      /* remove when vm_msync complies and clean in place fini */
1352                 behave->invalidate = object->invalidate;
1353 #else
1354                 behave->invalidate = FALSE;
1355 #endif
1356
1357                 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1358                 break;
1359             }
1360
1361             case MEMORY_OBJECT_BEHAVIOR_INFO:
1362             {
1363                 memory_object_behave_info_t     behave;
1364
1365                 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1366                         ret = KERN_INVALID_ARGUMENT;
1367                         break;
1368                 }
1369
1370                 behave = (memory_object_behave_info_t) attributes;
1371                 behave->copy_strategy = object->copy_strategy;
1372                 behave->temporary = object->temporary;
1373 #if notyet      /* remove when vm_msync complies and clean in place fini */
1374                 behave->invalidate = object->invalidate;
1375 #else
1376                 behave->invalidate = FALSE;
1377 #endif
1378                 behave->advisory_pageout = object->advisory_pageout;
1379                 behave->silent_overwrite = object->silent_overwrite;
1380                 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1381                 break;
1382             }
1383
1384             case MEMORY_OBJECT_PERFORMANCE_INFO:
1385             {
1386                 memory_object_perf_info_t       perf;
1387
1388                 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1389                         ret = KERN_INVALID_ARGUMENT;
1390                         break;
1391                 }
1392
1393                 perf = (memory_object_perf_info_t) attributes;
1394                 perf->cluster_size = object->cluster_size;
1395                 perf->may_cache = object->can_persist;
1396
1397                 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1398                 break;
1399             }
1400
1401             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1402             {
1403                 old_memory_object_attr_info_t       attr;
1404
1405                 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1406                         ret = KERN_INVALID_ARGUMENT;
1407                         break;
1408                 }
1409
1410                 attr = (old_memory_object_attr_info_t) attributes;
1411                 attr->may_cache = object->can_persist;
1412                 attr->copy_strategy = object->copy_strategy;
1413
1414                 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1415                 break;
1416             }
1417
1418             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1419             {
1420                 memory_object_attr_info_t       attr;
1421
1422                 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1423                         ret = KERN_INVALID_ARGUMENT;
1424                         break;
1425                 }
1426
1427                 attr = (memory_object_attr_info_t) attributes;
1428                 attr->copy_strategy = object->copy_strategy;
1429                 attr->cluster_size = object->cluster_size;
1430                 attr->may_cache_object = object->can_persist;
1431                 attr->temporary = object->temporary;
1432
1433                 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1434                 break;
1435             }
1436
1437             default:
1438                 ret = KERN_INVALID_ARGUMENT;
1439                 break;
1440         }
1441
1442         vm_object_unlock(object);
1443
1444         return(ret);
1445 }
1446
1447
1448 kern_return_t
1449 memory_object_iopl_request(
1450         ipc_port_t              port,
1451         memory_object_offset_t  offset,
1452         upl_size_t              *upl_size,
1453         upl_t                   *upl_ptr,
1454         upl_page_info_array_t   user_page_list,
1455         unsigned int            *page_list_count,
1456         int                     *flags)
1457 {
1458         vm_object_t             object;
1459         kern_return_t           ret;
1460         int                     caller_flags;
1461
1462         caller_flags = *flags;
1463
1464         if (caller_flags & ~UPL_VALID_FLAGS) {
1465                 /*
1466                  * For forward compatibility's sake,
1467                  * reject any unknown flag.
1468                  */
1469                 return KERN_INVALID_VALUE;
1470         }
1471
1472         if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1473                 vm_named_entry_t        named_entry;
1474
1475                 named_entry = (vm_named_entry_t)port->ip_kobject;
1476                 /* a few checks to make sure user is obeying rules */
1477                 if(*upl_size == 0) {
1478                         if(offset >= named_entry->size)
1479                                 return(KERN_INVALID_RIGHT);
1480                         *upl_size = named_entry->size - offset;
1481                 }
1482                 if(caller_flags & UPL_COPYOUT_FROM) {
1483                         if((named_entry->protection & VM_PROT_READ)
1484                                                 != VM_PROT_READ) {
1485                                 return(KERN_INVALID_RIGHT);
1486                         }
1487                 } else {
1488                         if((named_entry->protection &
1489                                 (VM_PROT_READ | VM_PROT_WRITE))
1490                                 != (VM_PROT_READ | VM_PROT_WRITE)) {
1491                                 return(KERN_INVALID_RIGHT);
1492                         }
1493                 }
1494                 if(named_entry->size < (offset + *upl_size))
1495                         return(KERN_INVALID_ARGUMENT);
1496
1497                 /* the callers parameter offset is defined to be the */
1498                 /* offset from beginning of named entry offset in object */
1499                 offset = offset + named_entry->offset;
1500
1501                 if(named_entry->is_sub_map)
1502                         return (KERN_INVALID_ARGUMENT);
1503
1504                 named_entry_lock(named_entry);
1505
1506                 if (named_entry->is_pager) {
1507                         object = vm_object_enter(named_entry->backing.pager,
1508                                         named_entry->offset + named_entry->size,
1509                                         named_entry->internal,
1510                                         FALSE,
1511                                         FALSE);
1512                         if (object == VM_OBJECT_NULL) {
1513                                 named_entry_unlock(named_entry);
1514                                 return(KERN_INVALID_OBJECT);
1515                         }
1516
1517                         /* JMM - drop reference on pager here? */
1518
1519                         /* create an extra reference for the named entry */
1520                         vm_object_lock(object);
1521                         vm_object_reference_locked(object);
1522                         named_entry->backing.object = object;
1523                         named_entry->is_pager = FALSE;
1524                         named_entry_unlock(named_entry);
1525
1526                         /* wait for object to be ready */
1527                         while (!object->pager_ready) {
1528                                 vm_object_wait(object,
1529                                                 VM_OBJECT_EVENT_PAGER_READY,
1530                                                 THREAD_UNINT);
1531                                 vm_object_lock(object);
1532                         }
1533                         vm_object_unlock(object);
1534                 } else {
1535                         /* This is the case where we are going to map */
1536                         /* an already mapped object.  If the object is */
1537                         /* not ready it is internal.  An external     */
1538                         /* object cannot be mapped until it is ready  */
1539                         /* we can therefore avoid the ready check     */
1540                         /* in this case.  */
1541                         object = named_entry->backing.object;
1542                         vm_object_reference(object);
1543                         named_entry_unlock(named_entry);
1544                 }
1545         } else  {
1546                 memory_object_control_t control;
1547                 control = (memory_object_control_t)port->ip_kobject;
1548                 if (control == NULL)
1549                         return (KERN_INVALID_ARGUMENT);
1550                 object = memory_object_control_to_vm_object(control);
1551                 if (object == VM_OBJECT_NULL)
1552                         return (KERN_INVALID_ARGUMENT);
1553                 vm_object_reference(object);
1554         }
1555         if (object == VM_OBJECT_NULL)
1556                 return (KERN_INVALID_ARGUMENT);
1557
1558         if (!object->private) {
1559                 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1560                         *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1561                 if (object->phys_contiguous) {
1562                         *flags = UPL_PHYS_CONTIG;
1563                 } else {
1564                         *flags = 0;
1565                 }
1566         } else {
1567                 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1568         }
1569
1570         ret = vm_object_iopl_request(object,
1571                                      offset,
1572                                      *upl_size,
1573                                      upl_ptr,
1574                                      user_page_list,
1575                                      page_list_count,
1576                                      caller_flags);
1577         vm_object_deallocate(object);
1578         return ret;
1579 }
1580
1581 /*
1582  *      Routine:        memory_object_upl_request [interface]
1583  *      Purpose:
1584  *              Cause the population of a portion of a vm_object.
1585  *              Depending on the nature of the request, the pages
1586  *              returned may be contain valid data or be uninitialized.
1587  *
1588  */
1589
1590 kern_return_t
1591 memory_object_upl_request(
1592         memory_object_control_t control,
1593         memory_object_offset_t  offset,
1594         upl_size_t              size,
1595         upl_t                   *upl_ptr,
1596         upl_page_info_array_t   user_page_list,
1597         unsigned int            *page_list_count,
1598         int                     cntrl_flags)
1599 {
1600         vm_object_t             object;
1601
1602         object = memory_object_control_to_vm_object(control);
1603         if (object == VM_OBJECT_NULL)
1604                 return (KERN_INVALID_ARGUMENT);
1605
1606         return vm_object_upl_request(object,
1607                                      offset,
1608                                      size,
1609                                      upl_ptr,
1610                                      user_page_list,
1611                                      page_list_count,
1612                                      cntrl_flags);
1613 }
1614
1615 /*
1616  *      Routine:        memory_object_super_upl_request [interface]
1617  *      Purpose:
1618  *              Cause the population of a portion of a vm_object
1619  *              in much the same way as memory_object_upl_request.
1620  *              Depending on the nature of the request, the pages
1621  *              returned may be contain valid data or be uninitialized.
1622  *              However, the region may be expanded up to the super
1623  *              cluster size provided.
1624  */
1625
1626 kern_return_t
1627 memory_object_super_upl_request(
1628         memory_object_control_t control,
1629         memory_object_offset_t  offset,
1630         upl_size_t              size,
1631         upl_size_t              super_cluster,
1632         upl_t                   *upl,
1633         upl_page_info_t         *user_page_list,
1634         unsigned int            *page_list_count,
1635         int                     cntrl_flags)
1636 {
1637         vm_object_t             object;
1638
1639         object = memory_object_control_to_vm_object(control);
1640         if (object == VM_OBJECT_NULL)
1641                 return (KERN_INVALID_ARGUMENT);
1642
1643         return vm_object_super_upl_request(object,
1644                                            offset,
1645                                            size,
1646                                            super_cluster,
1647                                            upl,
1648                                            user_page_list,
1649                                            page_list_count,
1650                                            cntrl_flags);
1651 }
1652
1653 int vm_stat_discard_cleared_reply = 0;
1654 int vm_stat_discard_cleared_unset = 0;
1655 int vm_stat_discard_cleared_too_late = 0;
1656
1657
1658
1659 /*
1660  *      Routine:        host_default_memory_manager [interface]
1661  *      Purpose:
1662  *              set/get the default memory manager port and default cluster
1663  *              size.
1664  *
1665  *              If successful, consumes the supplied naked send right.
1666  */
1667 kern_return_t
1668 host_default_memory_manager(
1669         host_priv_t             host_priv,
1670         memory_object_default_t *default_manager,
1671         memory_object_cluster_size_t cluster_size)
1672 {
1673         memory_object_default_t current_manager;
1674         memory_object_default_t new_manager;
1675         memory_object_default_t returned_manager;
1676
1677         if (host_priv == HOST_PRIV_NULL)
1678                 return(KERN_INVALID_HOST);
1679
1680         assert(host_priv == &realhost);
1681
1682         new_manager = *default_manager;
1683         mutex_lock(&memory_manager_default_lock);
1684         current_manager = memory_manager_default;
1685
1686         if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1687                 /*
1688                  *      Retrieve the current value.
1689                  */
1690                 memory_object_default_reference(current_manager);
1691                 returned_manager = current_manager;
1692         } else {
1693                 /*
1694                  *      Retrieve the current value,
1695                  *      and replace it with the supplied value.
1696                  *      We return the old reference to the caller
1697                  *      but we have to take a reference on the new
1698                  *      one.
1699                  */
1700
1701                 returned_manager = current_manager;
1702                 memory_manager_default = new_manager;
1703                 memory_object_default_reference(new_manager);
1704
1705                 if (cluster_size % PAGE_SIZE != 0) {
1706 #if 0
1707                         mutex_unlock(&memory_manager_default_lock);
1708                         return KERN_INVALID_ARGUMENT;
1709 #else
1710                         cluster_size = round_page_32(cluster_size);
1711 #endif
1712                 }
1713                 memory_manager_default_cluster = cluster_size;
1714
1715                 /*
1716                  *      In case anyone's been waiting for a memory
1717                  *      manager to be established, wake them up.
1718                  */
1719
1720                 thread_wakeup((event_t) &memory_manager_default);
1721         }
1722
1723         mutex_unlock(&memory_manager_default_lock);
1724
1725         *default_manager = returned_manager;
1726         return(KERN_SUCCESS);
1727 }
1728
1729 /*
1730  *      Routine:        memory_manager_default_reference
1731  *      Purpose:
1732  *              Returns a naked send right for the default
1733  *              memory manager.  The returned right is always
1734  *              valid (not IP_NULL or IP_DEAD).
1735  */
1736
1737 __private_extern__ memory_object_default_t
1738 memory_manager_default_reference(
1739         memory_object_cluster_size_t *cluster_size)
1740 {
1741         memory_object_default_t current_manager;
1742
1743         mutex_lock(&memory_manager_default_lock);
1744         current_manager = memory_manager_default;
1745         while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1746                 wait_result_t res;
1747
1748                 res = thread_sleep_mutex((event_t) &memory_manager_default,
1749                                          &memory_manager_default_lock,
1750                                          THREAD_UNINT);
1751                 assert(res == THREAD_AWAKENED);
1752                 current_manager = memory_manager_default;
1753         }
1754         memory_object_default_reference(current_manager);
1755         *cluster_size = memory_manager_default_cluster;
1756         mutex_unlock(&memory_manager_default_lock);
1757
1758         return current_manager;
1759 }
1760
1761 /*
1762  *      Routine:        memory_manager_default_check
1763  *
1764  *      Purpose:
1765  *              Check whether a default memory manager has been set
1766  *              up yet, or not. Returns KERN_SUCCESS if dmm exists,
1767  *              and KERN_FAILURE if dmm does not exist.
1768  *
1769  *              If there is no default memory manager, log an error,
1770  *              but only the first time.
1771  *
1772  */
1773 __private_extern__ kern_return_t
1774 memory_manager_default_check(void)
1775 {
1776         memory_object_default_t current;
1777
1778         mutex_lock(&memory_manager_default_lock);
1779         current = memory_manager_default;
1780         if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1781                 static boolean_t logged;        /* initialized to 0 */
1782                 boolean_t       complain = !logged;
1783                 logged = TRUE;
1784                 mutex_unlock(&memory_manager_default_lock);
1785                 if (complain)
1786                         printf("Warning: No default memory manager\n");
1787                 return(KERN_FAILURE);
1788         } else {
1789                 mutex_unlock(&memory_manager_default_lock);
1790                 return(KERN_SUCCESS);
1791         }
1792 }
1793
1794 __private_extern__ void
1795 memory_manager_default_init(void)
1796 {
1797         memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1798         mutex_init(&memory_manager_default_lock, 0);
1799 }
1800
1801
1802
1803 /* Allow manipulation of individual page state.  This is actually part of */
1804 /* the UPL regimen but takes place on the object rather than on a UPL */
1805
1806 kern_return_t
1807 memory_object_page_op(
1808         memory_object_control_t control,
1809         memory_object_offset_t  offset,
1810         int                     ops,
1811         ppnum_t                 *phys_entry,
1812         int                     *flags)
1813 {
1814         vm_object_t             object;
1815         vm_page_t               dst_page;
1816
1817
1818         object = memory_object_control_to_vm_object(control);
1819         if (object == VM_OBJECT_NULL)
1820                 return (KERN_INVALID_ARGUMENT);
1821
1822         vm_object_lock(object);
1823
1824         if(ops & UPL_POP_PHYSICAL) {
1825                 if(object->phys_contiguous) {
1826                         if (phys_entry) {
1827                                 *phys_entry = (ppnum_t)
1828                                         (object->shadow_offset >> 12);
1829                         }
1830                         vm_object_unlock(object);
1831                         return KERN_SUCCESS;
1832                 } else {
1833                         vm_object_unlock(object);
1834                         return KERN_INVALID_OBJECT;
1835                 }
1836         }
1837         if(object->phys_contiguous) {
1838                 vm_object_unlock(object);
1839                 return KERN_INVALID_OBJECT;
1840         }
1841
1842         while(TRUE) {
1843                 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
1844                         vm_object_unlock(object);
1845                         return KERN_FAILURE;
1846                 }
1847
1848                 /* Sync up on getting the busy bit */
1849                 if((dst_page->busy || dst_page->cleaning) &&
1850                            (((ops & UPL_POP_SET) &&
1851                            (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
1852                         /* someone else is playing with the page, we will */
1853                         /* have to wait */
1854                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1855                         continue;
1856                 }
1857
1858                 if (ops & UPL_POP_DUMP) {
1859                         vm_page_lock_queues();
1860
1861                         if (dst_page->no_isync == FALSE)
1862                                 pmap_disconnect(dst_page->phys_page);
1863                         vm_page_free(dst_page);
1864
1865                         vm_page_unlock_queues();
1866                         break;
1867                 }
1868
1869                 if (flags) {
1870                         *flags = 0;
1871
1872                         /* Get the condition of flags before requested ops */
1873                         /* are undertaken */
1874
1875                         if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
1876                         if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
1877                         if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
1878                         if(dst_page->absent) *flags |= UPL_POP_ABSENT;
1879                         if(dst_page->busy) *flags |= UPL_POP_BUSY;
1880                 }
1881
1882                 /* The caller should have made a call either contingent with */
1883                 /* or prior to this call to set UPL_POP_BUSY */
1884                 if(ops & UPL_POP_SET) {
1885                         /* The protection granted with this assert will */
1886                         /* not be complete.  If the caller violates the */
1887                         /* convention and attempts to change page state */
1888                         /* without first setting busy we may not see it */
1889                         /* because the page may already be busy.  However */
1890                         /* if such violations occur we will assert sooner */
1891                         /* or later. */
1892                         assert(dst_page->busy || (ops & UPL_POP_BUSY));
1893                         if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
1894                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
1895                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
1896                         if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
1897                         if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
1898                 }
1899
1900                 if(ops & UPL_POP_CLR) {
1901                         assert(dst_page->busy);
1902                         if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
1903                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
1904                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
1905                         if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
1906                         if (ops & UPL_POP_BUSY) {
1907                                 dst_page->busy = FALSE;
1908                                 PAGE_WAKEUP(dst_page);
1909                         }
1910                 }
1911
1912                 if (dst_page->encrypted) {
1913                         /*
1914                          * ENCRYPTED SWAP:
1915                          * We need to decrypt this encrypted page before the
1916                          * caller can access its contents.
1917                          * But if the caller really wants to access the page's
1918                          * contents, they have to keep the page "busy".
1919                          * Otherwise, the page could get recycled or re-encrypted
1920                          * at any time.
1921                          */
1922                         if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
1923                             dst_page->busy) {
1924                                 /*
1925                                  * The page is stable enough to be accessed by
1926                                  * the caller, so make sure its contents are
1927                                  * not encrypted.
1928                                  */
1929                                 vm_page_decrypt(dst_page, 0);
1930                         } else {
1931                                 /*
1932                                  * The page is not busy, so don't bother
1933                                  * decrypting it, since anything could
1934                                  * happen to it between now and when the
1935                                  * caller wants to access it.
1936                                  * We should not give the caller access
1937                                  * to this page.
1938                                  */
1939                                 assert(!phys_entry);
1940                         }
1941                 }
1942
1943                 if (phys_entry) {
1944                         /*
1945                          * The physical page number will remain valid
1946                          * only if the page is kept busy.
1947                          * ENCRYPTED SWAP: make sure we don't let the
1948                          * caller access an encrypted page.
1949                          */
1950                         assert(dst_page->busy);
1951                         assert(!dst_page->encrypted);
1952                         *phys_entry = dst_page->phys_page;
1953                 }
1954
1955                 break;
1956         }
1957
1958         vm_object_unlock(object);
1959         return KERN_SUCCESS;
1960
1961 }
1962
1963 /*
1964  * memory_object_range_op offers performance enhancement over
1965  * memory_object_page_op for page_op functions which do not require page
1966  * level state to be returned from the call.  Page_op was created to provide
1967  * a low-cost alternative to page manipulation via UPLs when only a single
1968  * page was involved.  The range_op call establishes the ability in the _op
1969  * family of functions to work on multiple pages where the lack of page level
1970  * state handling allows the caller to avoid the overhead of the upl structures.
1971  */
1972
1973 kern_return_t
1974 memory_object_range_op(
1975         memory_object_control_t control,
1976         memory_object_offset_t  offset_beg,
1977         memory_object_offset_t  offset_end,
1978         int                     ops,
1979         int                     *range)
1980 {
1981         memory_object_offset_t  offset;
1982         vm_object_t             object;
1983         vm_page_t               dst_page;
1984
1985         object = memory_object_control_to_vm_object(control);
1986         if (object == VM_OBJECT_NULL)
1987                 return (KERN_INVALID_ARGUMENT);
1988
1989         if (object->resident_page_count == 0) {
1990                 if (range) {
1991                         if (ops & UPL_ROP_PRESENT)
1992                                 *range = 0;
1993                         else
1994                                 *range = offset_end - offset_beg;
1995                 }
1996                 return KERN_SUCCESS;
1997         }
1998         vm_object_lock(object);
1999
2000         if (object->phys_contiguous) {
2001                 vm_object_unlock(object);
2002                 return KERN_INVALID_OBJECT;
2003         }
2004
2005         offset = offset_beg;
2006
2007         while (offset < offset_end) {
2008                 dst_page = vm_page_lookup(object, offset);
2009                 if (dst_page != VM_PAGE_NULL) {
2010                         if (ops & UPL_ROP_DUMP) {
2011                                 if (dst_page->busy || dst_page->cleaning) {
2012                                         /*
2013                                          * someone else is playing with the
2014                                          * page, we will have to wait
2015                                          */
2016                                         PAGE_SLEEP(object,
2017                                                 dst_page, THREAD_UNINT);
2018                                         /*
2019                                          * need to relook the page up since it's
2020                                          * state may have changed while we slept
2021                                          * it might even belong to a different object
2022                                          * at this point
2023                                          */
2024                                         continue;
2025                                 }
2026                                 vm_page_lock_queues();
2027
2028                                 if (dst_page->no_isync == FALSE)
2029                                         pmap_disconnect(dst_page->phys_page);
2030                                 vm_page_free(dst_page);
2031
2032                                 vm_page_unlock_queues();
2033                         } else if (ops & UPL_ROP_ABSENT)
2034                                 break;
2035                 } else if (ops & UPL_ROP_PRESENT)
2036                         break;
2037
2038                 offset += PAGE_SIZE;
2039         }
2040         vm_object_unlock(object);
2041
2042         if (range)
2043                 *range = offset - offset_beg;
2044
2045         return KERN_SUCCESS;
2046 }
2047
2048
2049 kern_return_t
2050 memory_object_pages_resident(
2051         memory_object_control_t control,
2052         boolean_t                       *       has_pages_resident)
2053 {
2054         vm_object_t             object;
2055
2056         *has_pages_resident = FALSE;
2057
2058         object = memory_object_control_to_vm_object(control);
2059         if (object == VM_OBJECT_NULL)
2060                 return (KERN_INVALID_ARGUMENT);
2061
2062         if (object->resident_page_count)
2063                 *has_pages_resident = TRUE;
2064
2065         return (KERN_SUCCESS);
2066 }
2067
2068
2069 static zone_t mem_obj_control_zone;
2070
2071 __private_extern__ void
2072 memory_object_control_bootstrap(void)
2073 {
2074         int     i;
2075
2076         i = (vm_size_t) sizeof (struct memory_object_control);
2077         mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
2078         return;
2079 }
2080
2081 __private_extern__ memory_object_control_t
2082 memory_object_control_allocate(
2083         vm_object_t             object)
2084 {
2085         memory_object_control_t control;
2086
2087         control = (memory_object_control_t)zalloc(mem_obj_control_zone);
2088         if (control != MEMORY_OBJECT_CONTROL_NULL)
2089                 control->object = object;
2090         return (control);
2091 }
2092
2093 __private_extern__ void
2094 memory_object_control_collapse(
2095         memory_object_control_t control,
2096         vm_object_t             object)
2097 {
2098         assert((control->object != VM_OBJECT_NULL) &&
2099                (control->object != object));
2100         control->object = object;
2101 }
2102
2103 __private_extern__ vm_object_t
2104 memory_object_control_to_vm_object(
2105         memory_object_control_t control)
2106 {
2107         if (control == MEMORY_OBJECT_CONTROL_NULL)
2108                 return VM_OBJECT_NULL;
2109
2110         return (control->object);
2111 }
2112
2113 memory_object_control_t
2114 convert_port_to_mo_control(
2115         __unused mach_port_t    port)
2116 {
2117         return MEMORY_OBJECT_CONTROL_NULL;
2118 }
2119
2120
2121 mach_port_t
2122 convert_mo_control_to_port(
2123         __unused memory_object_control_t        control)
2124 {
2125         return MACH_PORT_NULL;
2126 }
2127
2128 void
2129 memory_object_control_reference(
2130         __unused memory_object_control_t        control)
2131 {
2132         return;
2133 }
2134
2135 /*
2136  * We only every issue one of these references, so kill it
2137  * when that gets released (should switch the real reference
2138  * counting in true port-less EMMI).
2139  */
2140 void
2141 memory_object_control_deallocate(
2142         memory_object_control_t control)
2143 {
2144         zfree(mem_obj_control_zone, control);
2145 }
2146
2147 void
2148 memory_object_control_disable(
2149         memory_object_control_t control)
2150 {
2151         assert(control->object != VM_OBJECT_NULL);
2152         control->object = VM_OBJECT_NULL;
2153 }
2154
2155 void
2156 memory_object_default_reference(
2157         memory_object_default_t dmm)
2158 {
2159         ipc_port_make_send(dmm);
2160 }
2161
2162 void
2163 memory_object_default_deallocate(
2164         memory_object_default_t dmm)
2165 {
2166         ipc_port_release_send(dmm);
2167 }
2168
2169 memory_object_t
2170 convert_port_to_memory_object(
2171         __unused mach_port_t    port)
2172 {
2173         return (MEMORY_OBJECT_NULL);
2174 }
2175
2176
2177 mach_port_t
2178 convert_memory_object_to_port(
2179         __unused memory_object_t        object)
2180 {
2181         return (MACH_PORT_NULL);
2182 }
2183
2184
2185 /* Routine memory_object_reference */
2186 void memory_object_reference(
2187         memory_object_t memory_object)
2188 {
2189
2190 #ifdef  MACH_BSD
2191         if (memory_object->pager == &vnode_pager_workaround) {
2192                 vnode_pager_reference(memory_object);
2193         } else if (memory_object->pager == &device_pager_workaround) {
2194                 device_pager_reference(memory_object);
2195         } else
2196 #endif
2197                 dp_memory_object_reference(memory_object);
2198 }
2199
2200 /* Routine memory_object_deallocate */
2201 void memory_object_deallocate(
2202         memory_object_t memory_object)
2203 {
2204
2205 #ifdef  MACH_BSD
2206         if (memory_object->pager == &vnode_pager_workaround) {
2207                 vnode_pager_deallocate(memory_object);
2208         } else if (memory_object->pager == &device_pager_workaround) {
2209                 device_pager_deallocate(memory_object);
2210         } else
2211 #endif
2212                 dp_memory_object_deallocate(memory_object);
2213 }
2214
2215
2216 /* Routine memory_object_init */
2217 kern_return_t memory_object_init
2218 (
2219         memory_object_t memory_object,
2220         memory_object_control_t memory_control,
2221         memory_object_cluster_size_t memory_object_page_size
2222 )
2223 {
2224 #ifdef  MACH_BSD
2225         if (memory_object->pager == &vnode_pager_workaround) {
2226                 return vnode_pager_init(memory_object,
2227                                         memory_control,
2228                                         memory_object_page_size);
2229         } else if (memory_object->pager == &device_pager_workaround) {
2230                 return device_pager_init(memory_object,
2231                                          memory_control,
2232                                          memory_object_page_size);
2233         } else
2234 #endif
2235                 return dp_memory_object_init(memory_object,
2236                                              memory_control,
2237                                              memory_object_page_size);
2238 }
2239
2240 /* Routine memory_object_terminate */
2241 kern_return_t memory_object_terminate
2242 (
2243         memory_object_t memory_object
2244 )
2245 {
2246 #ifdef  MACH_BSD
2247         if (memory_object->pager == &vnode_pager_workaround) {
2248                 return vnode_pager_terminate(memory_object);
2249         } else if (memory_object->pager == &device_pager_workaround) {
2250                 return device_pager_terminate(memory_object);
2251         } else
2252 #endif
2253                 return dp_memory_object_terminate(memory_object);
2254 }
2255
2256 /* Routine memory_object_data_request */
2257 kern_return_t memory_object_data_request
2258 (
2259         memory_object_t memory_object,
2260         memory_object_offset_t offset,
2261         memory_object_cluster_size_t length,
2262         vm_prot_t desired_access
2263 )
2264 {
2265 #ifdef  MACH_BSD
2266         if (memory_object->pager == &vnode_pager_workaround) {
2267                 return vnode_pager_data_request(memory_object,
2268                                                 offset,
2269                                                 length,
2270                                                 desired_access);
2271         } else if (memory_object->pager == &device_pager_workaround) {
2272                 return device_pager_data_request(memory_object,
2273                                                  offset,
2274                                                  length,
2275                                                  desired_access);
2276         } else
2277 #endif
2278                 return dp_memory_object_data_request(memory_object,
2279                                                      offset,
2280                                                      length,
2281                                                      desired_access);
2282 }
2283
2284 /* Routine memory_object_data_return */
2285 kern_return_t memory_object_data_return
2286 (
2287         memory_object_t memory_object,
2288         memory_object_offset_t offset,
2289         vm_size_t size,
2290         memory_object_offset_t *resid_offset,
2291         int     *io_error,
2292         boolean_t dirty,
2293         boolean_t kernel_copy,
2294         int     upl_flags
2295 )
2296 {
2297 #ifdef MACH_BSD
2298         if (memory_object->pager == &vnode_pager_workaround) {
2299                 return vnode_pager_data_return(memory_object,
2300                                                offset,
2301                                                size,
2302                                                resid_offset,
2303                                                io_error,
2304                                                dirty,
2305                                                kernel_copy,
2306                                                upl_flags);
2307         } else if (memory_object->pager == &device_pager_workaround) {
2308
2309                 return device_pager_data_return(memory_object,
2310                                                 offset,
2311                                                 size,
2312                                                 dirty,
2313                                                 kernel_copy,
2314                                                 upl_flags);
2315         }
2316         else
2317 #endif
2318         {
2319                 return dp_memory_object_data_return(memory_object,
2320                                                     offset,
2321                                                     size,
2322                                                     NULL,
2323                                                     NULL,
2324                                                     dirty,
2325                                                     kernel_copy,
2326                                                     upl_flags);
2327         }
2328 }
2329
2330 /* Routine memory_object_data_initialize */
2331 kern_return_t memory_object_data_initialize
2332 (
2333         memory_object_t memory_object,
2334         memory_object_offset_t offset,
2335         vm_size_t size
2336 )
2337 {
2338 #ifdef MACH_BSD
2339         if (memory_object->pager == &vnode_pager_workaround) {
2340                 return vnode_pager_data_initialize(memory_object,
2341                                                    offset,
2342                                                    size);
2343         } else if (memory_object->pager == &device_pager_workaround) {
2344                 return device_pager_data_initialize(memory_object,
2345                                                     offset,
2346                                                     size);
2347         } else
2348 #endif
2349                 return dp_memory_object_data_initialize(memory_object,
2350                                                         offset,
2351                                                         size);
2352 }
2353
2354 /* Routine memory_object_data_unlock */
2355 kern_return_t memory_object_data_unlock
2356 (
2357         memory_object_t memory_object,
2358         memory_object_offset_t offset,
2359         vm_size_t size,
2360         vm_prot_t desired_access
2361 )
2362 {
2363 #ifdef MACH_BSD
2364         if (memory_object->pager == &vnode_pager_workaround) {
2365                 return vnode_pager_data_unlock(memory_object,
2366                                                offset,
2367                                                size,
2368                                                desired_access);
2369         } else if (memory_object->pager == &device_pager_workaround) {
2370                 return device_pager_data_unlock(memory_object,
2371                                                 offset,
2372                                                 size,
2373                                                 desired_access);
2374         } else
2375 #endif
2376                 return dp_memory_object_data_unlock(memory_object,
2377                                                     offset,
2378                                                     size,
2379                                                     desired_access);
2380 }
2381
2382 /* Routine memory_object_synchronize */
2383 kern_return_t memory_object_synchronize
2384 (
2385         memory_object_t memory_object,
2386         memory_object_offset_t offset,
2387         vm_size_t size,
2388         vm_sync_t sync_flags
2389 )
2390 {
2391 #ifdef MACH_BSD
2392         if (memory_object->pager == &vnode_pager_workaround) {
2393                 return vnode_pager_synchronize(memory_object,
2394                                                offset,
2395                                                size,
2396                                                sync_flags);
2397         } else if (memory_object->pager == &device_pager_workaround) {
2398                 return device_pager_synchronize(memory_object,
2399                                                 offset,
2400                                                 size,
2401                                                 sync_flags);
2402         } else
2403 #endif
2404                 return dp_memory_object_synchronize(memory_object,
2405                                                     offset,
2406                                                     size,
2407                                                     sync_flags);
2408 }
2409
2410 /* Routine memory_object_unmap */
2411 kern_return_t memory_object_unmap
2412 (
2413         memory_object_t memory_object
2414 )
2415 {
2416 #ifdef MACH_BSD
2417         if (memory_object->pager == &vnode_pager_workaround) {
2418                 return vnode_pager_unmap(memory_object);
2419         } else if (memory_object->pager == &device_pager_workaround) {
2420                 return device_pager_unmap(memory_object);
2421         } else
2422 #endif
2423                 return dp_memory_object_unmap(memory_object);
2424 }
2425
2426 /* Routine memory_object_create */
2427 kern_return_t memory_object_create
2428 (
2429         memory_object_default_t default_memory_manager,
2430         vm_size_t new_memory_object_size,
2431         memory_object_t *new_memory_object
2432 )
2433 {
2434         return default_pager_memory_object_create(default_memory_manager,
2435                                                   new_memory_object_size,
2436                                                   new_memory_object);
2437 }
2438
2439 upl_t
2440 convert_port_to_upl(
2441         ipc_port_t      port)
2442 {
2443         upl_t upl;
2444
2445         ip_lock(port);
2446         if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2447                         ip_unlock(port);
2448                         return (upl_t)NULL;
2449         }
2450         upl = (upl_t) port->ip_kobject;
2451         ip_unlock(port);
2452         upl_lock(upl);
2453         upl->ref_count+=1;
2454         upl_unlock(upl);
2455         return upl;
2456 }
2457
2458 mach_port_t
2459 convert_upl_to_port(
2460         __unused upl_t          upl)
2461 {
2462         return MACH_PORT_NULL;
2463 }
2464
2465 __private_extern__ void
2466 upl_no_senders(
2467         __unused ipc_port_t                             port,
2468         __unused mach_port_mscount_t    mscount)
2469 {
2470         return;
2471 }