osfmk/vm/memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /*
  31  * @OSF_COPYRIGHT@
  32  */
  33 /*
  34  * Mach Operating System
  35  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  36  * All Rights Reserved.
  37  *
  38  * Permission to use, copy, modify and distribute this software and its
  39  * documentation is hereby granted, provided that both the copyright
  40  * notice and this permission notice appear in all copies of the
  41  * software, derivative works or modified versions, and any portions
  42  * thereof, and that both notices appear in supporting documentation.
  43  *
  44  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  45  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  46  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  47  *
  48  * Carnegie Mellon requests users of this software to return to
  49  *
  50  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  51  *  School of Computer Science
  52  *  Carnegie Mellon University
  53  *  Pittsburgh PA 15213-3890
  54  *
  55  * any improvements or extensions that they make and grant Carnegie Mellon
  56  * the rights to redistribute these changes.
  57  */
  58 /*
  59  */
  60 /*
  61  *      File:   vm/memory_object.c
  62  *      Author: Michael Wayne Young
  63  *
  64  *      External memory management interface control functions.
  65  */
  66
  67 #include <advisory_pageout.h>
  68
  69 /*
  70  *      Interface dependencies:
  71  */
  72
  73 #include <mach/std_types.h>     /* For pointer_t */
  74 #include <mach/mach_types.h>
  75
  76 #include <mach/mig.h>
  77 #include <mach/kern_return.h>
  78 #include <mach/memory_object.h>
  79 #include <mach/memory_object_default.h>
  80 #include <mach/memory_object_control_server.h>
  81 #include <mach/host_priv_server.h>
  82 #include <mach/boolean.h>
  83 #include <mach/vm_prot.h>
  84 #include <mach/message.h>
  85
  86 /*
  87  *      Implementation dependencies:
  88  */
  89 #include <string.h>             /* For memcpy() */
  90
  91 #include <kern/xpr.h>
  92 #include <kern/host.h>
  93 #include <kern/thread.h>        /* For current_thread() */
  94 #include <kern/ipc_mig.h>
  95 #include <kern/misc_protos.h>
  96
  97 #include <vm/vm_object.h>
  98 #include <vm/vm_fault.h>
  99 #include <vm/memory_object.h>
 100 #include <vm/vm_page.h>
 101 #include <vm/vm_pageout.h>
 102 #include <vm/pmap.h>            /* For pmap_clear_modify */
 103 #include <vm/vm_kern.h>         /* For kernel_map, vm_move */
 104 #include <vm/vm_map.h>          /* For vm_map_pageable */
 105
 106 #if     MACH_PAGEMAP
 107 #include <vm/vm_external.h>
 108 #endif  /* MACH_PAGEMAP */
 109
 110 #include <vm/vm_protos.h>
 111
 112
 113 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
 114 vm_size_t               memory_manager_default_cluster = 0;
 115 decl_mutex_data(,       memory_manager_default_lock)
 116
 117
 118 /*
 119  *      Routine:        memory_object_should_return_page
 120  *
 121  *      Description:
 122  *              Determine whether the given page should be returned,
 123  *              based on the page's state and on the given return policy.
 124  *
 125  *              We should return the page if one of the following is true:
 126  *
 127  *              1. Page is dirty and should_return is not RETURN_NONE.
 128  *              2. Page is precious and should_return is RETURN_ALL.
 129  *              3. Should_return is RETURN_ANYTHING.
 130  *
 131  *              As a side effect, m->dirty will be made consistent
 132  *              with pmap_is_modified(m), if should_return is not
 133  *              MEMORY_OBJECT_RETURN_NONE.
 134  */
 135
 136 #define memory_object_should_return_page(m, should_return) \
 137     (should_return != MEMORY_OBJECT_RETURN_NONE && \
 138      (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
 139       ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
 140       (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
 141
 142 typedef int     memory_object_lock_result_t;
 143
 144 #define MEMORY_OBJECT_LOCK_RESULT_DONE          0
 145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    1
 146 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN    2
 147 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   3
 148
 149 memory_object_lock_result_t memory_object_lock_page(
 150                                 vm_page_t               m,
 151                                 memory_object_return_t  should_return,
 152                                 boolean_t               should_flush,
 153                                 vm_prot_t               prot);
 154
 155 /*
 156  *      Routine:        memory_object_lock_page
 157  *
 158  *      Description:
 159  *              Perform the appropriate lock operations on the
 160  *              given page.  See the description of
 161  *              "memory_object_lock_request" for the meanings
 162  *              of the arguments.
 163  *
 164  *              Returns an indication that the operation
 165  *              completed, blocked, or that the page must
 166  *              be cleaned.
 167  */
 168 memory_object_lock_result_t
 169 memory_object_lock_page(
 170         vm_page_t               m,
 171         memory_object_return_t  should_return,
 172         boolean_t               should_flush,
 173         vm_prot_t               prot)
 174 {
 175         XPR(XPR_MEMORY_OBJECT,
 176             "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
 177             (integer_t)m, should_return, should_flush, prot, 0);
 178
 179         /*
 180          *      If we cannot change access to the page,
 181          *      either because a mapping is in progress
 182          *      (busy page) or because a mapping has been
 183          *      wired, then give up.
 184          */
 185
 186         if (m->busy || m->cleaning)
 187                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 188
 189         /*
 190          *      Don't worry about pages for which the kernel
 191          *      does not have any data.
 192          */
 193
 194         if (m->absent || m->error || m->restart) {
 195                 if(m->error && should_flush) {
 196                         /* dump the page, pager wants us to */
 197                         /* clean it up and there is no      */
 198                         /* relevant data to return */
 199                         if(m->wire_count == 0) {
 200                                 VM_PAGE_FREE(m);
 201                                 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 202                         }
 203                 } else {
 204                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 205                 }
 206         }
 207
 208         assert(!m->fictitious);
 209
 210         if (m->wire_count != 0) {
 211                 /*
 212                  *      If no change would take place
 213                  *      anyway, return successfully.
 214                  *
 215                  *      No change means:
 216                  *              Not flushing AND
 217                  *              No change to page lock [2 checks]  AND
 218                  *              Should not return page
 219                  *
 220                  * XXX  This doesn't handle sending a copy of a wired
 221                  * XXX  page to the pager, but that will require some
 222                  * XXX  significant surgery.
 223                  */
 224                 if (!should_flush &&
 225                     (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
 226                     ! memory_object_should_return_page(m, should_return)) {
 227
 228                         /*
 229                          *      Restart page unlock requests,
 230                          *      even though no change took place.
 231                          *      [Memory managers may be expecting
 232                          *      to see new requests.]
 233                          */
 234                         m->unlock_request = VM_PROT_NONE;
 235                         PAGE_WAKEUP(m);
 236
 237                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 238                 }
 239
 240                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 241         }
 242
 243         /*
 244          *      If the page is to be flushed, allow
 245          *      that to be done as part of the protection.
 246          */
 247
 248         if (should_flush)
 249                 prot = VM_PROT_ALL;
 250
 251         /*
 252          *      Set the page lock.
 253          *
 254          *      If we are decreasing permission, do it now;
 255          *      let the fault handler take care of increases
 256          *      (pmap_page_protect may not increase protection).
 257          */
 258
 259         if (prot != VM_PROT_NO_CHANGE) {
 260                 if ((m->page_lock ^ prot) & prot) {
 261                         pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
 262                 }
 263 #if 0
 264                 /* code associated with the vestigial
 265                  * memory_object_data_unlock
 266                  */
 267                 m->page_lock = prot;
 268                 m->lock_supplied = TRUE;
 269                 if (prot != VM_PROT_NONE)
 270                         m->unusual = TRUE;
 271                 else
 272                         m->unusual = FALSE;
 273
 274                 /*
 275                  *      Restart any past unlock requests, even if no
 276                  *      change resulted.  If the manager explicitly
 277                  *      requested no protection change, then it is assumed
 278                  *      to be remembering past requests.
 279                  */
 280
 281                 m->unlock_request = VM_PROT_NONE;
 282 #endif /* 0 */
 283                 PAGE_WAKEUP(m);
 284         }
 285
 286         /*
 287          *      Handle page returning.
 288          */
 289
 290         if (memory_object_should_return_page(m, should_return)) {
 291
 292                 /*
 293                  *      If we weren't planning
 294                  *      to flush the page anyway,
 295                  *      we may need to remove the
 296                  *      page from the pageout
 297                  *      system and from physical
 298                  *      maps now.
 299                  */
 300
 301                 vm_page_lock_queues();
 302                 VM_PAGE_QUEUES_REMOVE(m);
 303                 vm_page_unlock_queues();
 304
 305                 if (!should_flush)
 306                         pmap_disconnect(m->phys_page);
 307
 308                 if (m->dirty)
 309                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
 310                 else
 311                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 312         }
 313
 314         /*
 315          *      Handle flushing
 316          */
 317
 318         if (should_flush) {
 319                 VM_PAGE_FREE(m);
 320         } else {
 321                 /*
 322                  *      XXX Make clean but not flush a paging hint,
 323                  *      and deactivate the pages.  This is a hack
 324                  *      because it overloads flush/clean with
 325                  *      implementation-dependent meaning.  This only
 326                  *      happens to pages that are already clean.
 327                  */
 328
 329                 if (vm_page_deactivate_hint &&
 330                     (should_return != MEMORY_OBJECT_RETURN_NONE)) {
 331                         vm_page_lock_queues();
 332                         vm_page_deactivate(m);
 333                         vm_page_unlock_queues();
 334                 }
 335         }
 336
 337         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 338 }
 339
 340 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync)    \
 341 MACRO_BEGIN                                                             \
 342                                                                         \
 343         register int            upl_flags;                              \
 344                                                                         \
 345         vm_object_unlock(object);                                       \
 346                                                                         \
 347                 if (iosync)                                             \
 348                         upl_flags = UPL_MSYNC | UPL_IOSYNC;             \
 349                 else                                                    \
 350                         upl_flags = UPL_MSYNC;                          \
 351                                                                         \
 352                 (void) memory_object_data_return(object->pager,         \
 353                 po,                                                     \
 354                 data_cnt,                                               \
 355                 ro,                                                     \
 356                 ioerr,                                                  \
 357                 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN),       \
 358                 !should_flush,                                          \
 359                 upl_flags);                                             \
 360                                                                         \
 361         vm_object_lock(object);                                         \
 362 MACRO_END
 363
 364 /*
 365  *      Routine:        memory_object_lock_request [user interface]
 366  *
 367  *      Description:
 368  *              Control use of the data associated with the given
 369  *              memory object.  For each page in the given range,
 370  *              perform the following operations, in order:
 371  *                      1)  restrict access to the page (disallow
 372  *                          forms specified by "prot");
 373  *                      2)  return data to the manager (if "should_return"
 374  *                          is RETURN_DIRTY and the page is dirty, or
 375  *                          "should_return" is RETURN_ALL and the page
 376  *                          is either dirty or precious); and,
 377  *                      3)  flush the cached copy (if "should_flush"
 378  *                          is asserted).
 379  *              The set of pages is defined by a starting offset
 380  *              ("offset") and size ("size").  Only pages with the
 381  *              same page alignment as the starting offset are
 382  *              considered.
 383  *
 384  *              A single acknowledgement is sent (to the "reply_to"
 385  *              port) when these actions are complete.  If successful,
 386  *              the naked send right for reply_to is consumed.
 387  */
 388
 389 kern_return_t
 390 memory_object_lock_request(
 391         memory_object_control_t         control,
 392         memory_object_offset_t          offset,
 393         memory_object_size_t            size,
 394         memory_object_offset_t  *       resid_offset,
 395         int                     *       io_errno,
 396         memory_object_return_t          should_return,
 397         int                             flags,
 398         vm_prot_t                       prot)
 399 {
 400         vm_object_t     object;
 401         __unused boolean_t should_flush;
 402
 403         should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
 404
 405         XPR(XPR_MEMORY_OBJECT,
 406             "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
 407             (integer_t)control, offset, size,
 408             (((should_return&1)<<1)|should_flush), prot);
 409
 410         /*
 411          *      Check for bogus arguments.
 412          */
 413         object = memory_object_control_to_vm_object(control);
 414         if (object == VM_OBJECT_NULL)
 415                 return (KERN_INVALID_ARGUMENT);
 416
 417         if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
 418                 return (KERN_INVALID_ARGUMENT);
 419
 420         size = round_page_64(size);
 421
 422         /*
 423          *      Lock the object, and acquire a paging reference to
 424          *      prevent the memory_object reference from being released.
 425          */
 426         vm_object_lock(object);
 427         vm_object_paging_begin(object);
 428         offset -= object->paging_offset;
 429
 430         (void)vm_object_update(object,
 431                 offset, size, resid_offset, io_errno, should_return, flags, prot);
 432
 433         vm_object_paging_end(object);
 434         vm_object_unlock(object);
 435
 436         return (KERN_SUCCESS);
 437 }
 438
 439 /*
 440  *      memory_object_release_name:  [interface]
 441  *
 442  *      Enforces name semantic on memory_object reference count decrement
 443  *      This routine should not be called unless the caller holds a name
 444  *      reference gained through the memory_object_named_create or the
 445  *      memory_object_rename call.
 446  *      If the TERMINATE_IDLE flag is set, the call will return if the
 447  *      reference count is not 1. i.e. idle with the only remaining reference
 448  *      being the name.
 449  *      If the decision is made to proceed the name field flag is set to
 450  *      false and the reference count is decremented.  If the RESPECT_CACHE
 451  *      flag is set and the reference count has gone to zero, the
 452  *      memory_object is checked to see if it is cacheable otherwise when
 453  *      the reference count is zero, it is simply terminated.
 454  */
 455
 456 kern_return_t
 457 memory_object_release_name(
 458         memory_object_control_t control,
 459         int                             flags)
 460 {
 461         vm_object_t     object;
 462
 463         object = memory_object_control_to_vm_object(control);
 464         if (object == VM_OBJECT_NULL)
 465                 return (KERN_INVALID_ARGUMENT);
 466
 467         return vm_object_release_name(object, flags);
 468 }
 469
 470
 471
 472 /*
 473  *      Routine:        memory_object_destroy [user interface]
 474  *      Purpose:
 475  *              Shut down a memory object, despite the
 476  *              presence of address map (or other) references
 477  *              to the vm_object.
 478  */
 479 kern_return_t
 480 memory_object_destroy(
 481         memory_object_control_t control,
 482         kern_return_t           reason)
 483 {
 484         vm_object_t             object;
 485
 486         object = memory_object_control_to_vm_object(control);
 487         if (object == VM_OBJECT_NULL)
 488                 return (KERN_INVALID_ARGUMENT);
 489
 490         return (vm_object_destroy(object, reason));
 491 }
 492
 493 /*
 494  *      Routine:        vm_object_sync
 495  *
 496  *      Kernel internal function to synch out pages in a given
 497  *      range within an object to its memory manager.  Much the
 498  *      same as memory_object_lock_request but page protection
 499  *      is not changed.
 500  *
 501  *      If the should_flush and should_return flags are true pages
 502  *      are flushed, that is dirty & precious pages are written to
 503  *      the memory manager and then discarded.  If should_return
 504  *      is false, only precious pages are returned to the memory
 505  *      manager.
 506  *
 507  *      If should flush is false and should_return true, the memory
 508  *      manager's copy of the pages is updated.  If should_return
 509  *      is also false, only the precious pages are updated.  This
 510  *      last option is of limited utility.
 511  *
 512  *      Returns:
 513  *      FALSE           if no pages were returned to the pager
 514  *      TRUE            otherwise.
 515  */
 516
 517 boolean_t
 518 vm_object_sync(
 519         vm_object_t             object,
 520         vm_object_offset_t      offset,
 521         vm_object_size_t        size,
 522         boolean_t               should_flush,
 523         boolean_t               should_return,
 524         boolean_t               should_iosync)
 525 {
 526         boolean_t       rv;
 527         int             flags;
 528
 529         XPR(XPR_VM_OBJECT,
 530             "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
 531             (integer_t)object, offset, size, should_flush, should_return);
 532
 533         /*
 534          * Lock the object, and acquire a paging reference to
 535          * prevent the memory_object and control ports from
 536          * being destroyed.
 537          */
 538         vm_object_lock(object);
 539         vm_object_paging_begin(object);
 540
 541         if (should_flush)
 542                 flags = MEMORY_OBJECT_DATA_FLUSH;
 543         else
 544                 flags = 0;
 545
 546         if (should_iosync)
 547                 flags |= MEMORY_OBJECT_IO_SYNC;
 548
 549         rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
 550                 (should_return) ?
 551                         MEMORY_OBJECT_RETURN_ALL :
 552                         MEMORY_OBJECT_RETURN_NONE,
 553                 flags,
 554                 VM_PROT_NO_CHANGE);
 555
 556
 557         vm_object_paging_end(object);
 558         vm_object_unlock(object);
 559         return rv;
 560 }
 561
 562
 563
 564
 565 static int
 566 vm_object_update_extent(
 567         vm_object_t             object,
 568         vm_object_offset_t      offset,
 569         vm_object_offset_t      offset_end,
 570         vm_object_offset_t      *offset_resid,
 571         int                     *io_errno,
 572         boolean_t               should_flush,
 573         memory_object_return_t  should_return,
 574         boolean_t               should_iosync,
 575         vm_prot_t               prot)
 576 {
 577         vm_page_t       m;
 578         int             retval = 0;
 579         vm_size_t       data_cnt = 0;
 580         vm_object_offset_t      paging_offset = 0;
 581         vm_object_offset_t      last_offset = offset;
 582         memory_object_lock_result_t     page_lock_result;
 583         memory_object_lock_result_t     pageout_action;
 584
 585         pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
 586
 587         for (;
 588              offset < offset_end && object->resident_page_count;
 589              offset += PAGE_SIZE_64) {
 590
 591                 /*
 592                  * Limit the number of pages to be cleaned at once.
 593                  */
 594                 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
 595                         LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
 596                                                pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 597                         data_cnt = 0;
 598                 }
 599
 600                 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
 601                         page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
 602
 603                         XPR(XPR_MEMORY_OBJECT,
 604                             "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
 605                             (integer_t)object, offset, page_lock_result, 0, 0);
 606
 607                         switch (page_lock_result)
 608                         {
 609                           case MEMORY_OBJECT_LOCK_RESULT_DONE:
 610                             /*
 611                              *  End of a cluster of dirty pages.
 612                              */
 613                             if (data_cnt) {
 614                                     LIST_REQ_PAGEOUT_PAGES(object,
 615                                                            data_cnt, pageout_action,
 616                                                            paging_offset, offset_resid, io_errno, should_iosync);
 617                                     data_cnt = 0;
 618                                     continue;
 619                             }
 620                             break;
 621
 622                           case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
 623                             /*
 624                              *  Since it is necessary to block,
 625                              *  clean any dirty pages now.
 626                              */
 627                             if (data_cnt) {
 628                                     LIST_REQ_PAGEOUT_PAGES(object,
 629                                                            data_cnt, pageout_action,
 630                                                            paging_offset, offset_resid, io_errno, should_iosync);
 631                                     data_cnt = 0;
 632                                     continue;
 633                             }
 634                             PAGE_SLEEP(object, m, THREAD_UNINT);
 635                             continue;
 636
 637                           case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
 638                           case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
 639                             /*
 640                              * The clean and return cases are similar.
 641                              *
 642                              * if this would form a discontiguous block,
 643                              * clean the old pages and start anew.
 644                              *
 645                              * Mark the page busy since we will unlock the
 646                              * object if we issue the LIST_REQ_PAGEOUT
 647                              */
 648                             m->busy = TRUE;
 649                             if (data_cnt &&
 650                                 ((last_offset != offset) || (pageout_action != page_lock_result))) {
 651                                     LIST_REQ_PAGEOUT_PAGES(object,
 652                                                            data_cnt, pageout_action,
 653                                                            paging_offset, offset_resid, io_errno, should_iosync);
 654                                     data_cnt = 0;
 655                             }
 656                             m->busy = FALSE;
 657
 658                             if (m->cleaning) {
 659                                     PAGE_SLEEP(object, m, THREAD_UNINT);
 660                                     continue;
 661                             }
 662                             if (data_cnt == 0) {
 663                                     pageout_action = page_lock_result;
 664                                     paging_offset = offset;
 665                             }
 666                             data_cnt += PAGE_SIZE;
 667                             last_offset = offset + PAGE_SIZE_64;
 668
 669                             vm_page_lock_queues();
 670                             /*
 671                              * Clean
 672                              */
 673                             m->list_req_pending = TRUE;
 674                             m->cleaning = TRUE;
 675
 676                             if (should_flush) {
 677                                     /*
 678                                      * and add additional state
 679                                      * for the flush
 680                                      */
 681                                     m->busy = TRUE;
 682                                     m->pageout = TRUE;
 683                                     vm_page_wire(m);
 684                             }
 685                             vm_page_unlock_queues();
 686
 687                             retval = 1;
 688                             break;
 689                         }
 690                         break;
 691                 }
 692         }
 693         /*
 694          *      We have completed the scan for applicable pages.
 695          *      Clean any pages that have been saved.
 696          */
 697         if (data_cnt) {
 698                 LIST_REQ_PAGEOUT_PAGES(object,
 699                                        data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 700         }
 701         return (retval);
 702 }
 703
 704
 705
 706 /*
 707  *      Routine:        vm_object_update
 708  *      Description:
 709  *              Work function for m_o_lock_request(), vm_o_sync().
 710  *
 711  *              Called with object locked and paging ref taken.
 712  */
 713 kern_return_t
 714 vm_object_update(
 715         register vm_object_t            object,
 716         register vm_object_offset_t     offset,
 717         register vm_object_size_t       size,
 718         register vm_object_offset_t     *resid_offset,
 719         int                             *io_errno,
 720         memory_object_return_t          should_return,
 721         int                             flags,
 722         vm_prot_t                       protection)
 723 {
 724         vm_object_t             copy_object;
 725         boolean_t               data_returned = FALSE;
 726         boolean_t               update_cow;
 727         boolean_t               should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
 728         boolean_t               should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
 729         int                     num_of_extents;
 730         int                     n;
 731 #define MAX_EXTENTS     8
 732 #define EXTENT_SIZE     (1024 * 1024 * 256)
 733 #define RESIDENT_LIMIT  (1024 * 32)
 734         struct extent {
 735                 vm_object_offset_t e_base;
 736                 vm_object_offset_t e_min;
 737                 vm_object_offset_t e_max;
 738         } extents[MAX_EXTENTS];
 739
 740         /*
 741          *      To avoid blocking while scanning for pages, save
 742          *      dirty pages to be cleaned all at once.
 743          *
 744          *      XXXO A similar strategy could be used to limit the
 745          *      number of times that a scan must be restarted for
 746          *      other reasons.  Those pages that would require blocking
 747          *      could be temporarily collected in another list, or
 748          *      their offsets could be recorded in a small array.
 749          */
 750
 751         /*
 752          * XXX  NOTE: May want to consider converting this to a page list
 753          * XXX  vm_map_copy interface.  Need to understand object
 754          * XXX  coalescing implications before doing so.
 755          */
 756
 757         update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
 758                         && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
 759                                         !(flags & MEMORY_OBJECT_DATA_PURGE)))
 760                                 || (flags & MEMORY_OBJECT_COPY_SYNC);
 761
 762
 763         if((((copy_object = object->copy) != NULL) && update_cow) ||
 764                                         (flags & MEMORY_OBJECT_DATA_SYNC)) {
 765                 vm_map_size_t           i;
 766                 vm_map_size_t           copy_size;
 767                 vm_map_offset_t         copy_offset;
 768                 vm_prot_t               prot;
 769                 vm_page_t               page;
 770                 vm_page_t               top_page;
 771                 kern_return_t           error = 0;
 772
 773                 if(copy_object != NULL) {
 774                    /* translate offset with respect to shadow's offset */
 775                    copy_offset = (offset >= copy_object->shadow_offset)?
 776                         (vm_map_offset_t)(offset - copy_object->shadow_offset) :
 777                         (vm_map_offset_t) 0;
 778                    if(copy_offset > copy_object->size)
 779                         copy_offset = copy_object->size;
 780
 781                    /* clip size with respect to shadow offset */
 782                    if (offset >= copy_object->shadow_offset) {
 783                            copy_size = size;
 784                    } else if (size >= copy_object->shadow_offset - offset) {
 785                            copy_size = size -
 786                                    (copy_object->shadow_offset - offset);
 787                    } else {
 788                            copy_size = 0;
 789                    }
 790
 791                    if (copy_offset + copy_size > copy_object->size) {
 792                            if (copy_object->size >= copy_offset) {
 793                                    copy_size = copy_object->size - copy_offset;
 794                            } else {
 795                                    copy_size = 0;
 796                            }
 797                    }
 798
 799                    copy_size+=copy_offset;
 800
 801                    vm_object_unlock(object);
 802                    vm_object_lock(copy_object);
 803                 } else {
 804                         copy_object = object;
 805
 806                         copy_size   = offset + size;
 807                         copy_offset = offset;
 808                 }
 809
 810                 vm_object_paging_begin(copy_object);
 811                 for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
 812         RETRY_COW_OF_LOCK_REQUEST:
 813                         prot =  VM_PROT_WRITE|VM_PROT_READ;
 814                         switch (vm_fault_page(copy_object, i,
 815                                 VM_PROT_WRITE|VM_PROT_READ,
 816                                 FALSE,
 817                                 THREAD_UNINT,
 818                                 copy_offset,
 819                                 copy_offset+copy_size,
 820                                 VM_BEHAVIOR_SEQUENTIAL,
 821                                 &prot,
 822                                 &page,
 823                                 &top_page,
 824                                 (int *)0,
 825                                 &error,
 826                                 FALSE,
 827                                 FALSE, NULL, 0)) {
 828
 829                         case VM_FAULT_SUCCESS:
 830                                 if(top_page) {
 831                                         vm_fault_cleanup(
 832                                                 page->object, top_page);
 833                                         PAGE_WAKEUP_DONE(page);
 834                                         vm_page_lock_queues();
 835                                         if (!page->active && !page->inactive)
 836                                                 vm_page_activate(page);
 837                                         vm_page_unlock_queues();
 838                                         vm_object_lock(copy_object);
 839                                         vm_object_paging_begin(copy_object);
 840                                 } else {
 841                                         PAGE_WAKEUP_DONE(page);
 842                                         vm_page_lock_queues();
 843                                         if (!page->active && !page->inactive)
 844                                                 vm_page_activate(page);
 845                                         vm_page_unlock_queues();
 846                                 }
 847                                 break;
 848                         case VM_FAULT_RETRY:
 849                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 850                                 vm_object_lock(copy_object);
 851                                 vm_object_paging_begin(copy_object);
 852                                 goto RETRY_COW_OF_LOCK_REQUEST;
 853                         case VM_FAULT_INTERRUPTED:
 854                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 855                                 vm_object_lock(copy_object);
 856                                 vm_object_paging_begin(copy_object);
 857                                 goto RETRY_COW_OF_LOCK_REQUEST;
 858                         case VM_FAULT_MEMORY_SHORTAGE:
 859                                 VM_PAGE_WAIT();
 860                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 861                                 vm_object_lock(copy_object);
 862                                 vm_object_paging_begin(copy_object);
 863                                 goto RETRY_COW_OF_LOCK_REQUEST;
 864                         case VM_FAULT_FICTITIOUS_SHORTAGE:
 865                                 vm_page_more_fictitious();
 866                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 867                                 vm_object_lock(copy_object);
 868                                 vm_object_paging_begin(copy_object);
 869                                 goto RETRY_COW_OF_LOCK_REQUEST;
 870                         case VM_FAULT_MEMORY_ERROR:
 871                                 vm_object_lock(object);
 872                                 goto BYPASS_COW_COPYIN;
 873                         }
 874
 875                 }
 876                 vm_object_paging_end(copy_object);
 877                 if(copy_object != object) {
 878                         vm_object_unlock(copy_object);
 879                         vm_object_lock(object);
 880                 }
 881         }
 882         if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
 883                         return KERN_SUCCESS;
 884         }
 885         if(((copy_object = object->copy) != NULL) &&
 886                                         (flags & MEMORY_OBJECT_DATA_PURGE)) {
 887                 copy_object->shadow_severed = TRUE;
 888                 copy_object->shadowed = FALSE;
 889                 copy_object->shadow = NULL;
 890                 /* delete the ref the COW was holding on the target object */
 891                 vm_object_deallocate(object);
 892         }
 893 BYPASS_COW_COPYIN:
 894
 895         /*
 896          * when we have a really large range to check relative
 897          * to the number of actual resident pages, we'd like
 898          * to use the resident page list to drive our checks
 899          * however, the object lock will get dropped while processing
 900          * the page which means the resident queue can change which
 901          * means we can't walk the queue as we process the pages
 902          * we also want to do the processing in offset order to allow
 903          * 'runs' of pages to be collected if we're being told to
 904          * flush to disk... the resident page queue is NOT ordered.
 905          *
 906          * a temporary solution (until we figure out how to deal with
 907          * large address spaces more generically) is to pre-flight
 908          * the resident page queue (if it's small enough) and develop
 909          * a collection of extents (that encompass actual resident pages)
 910          * to visit.  This will at least allow us to deal with some of the
 911          * more pathological cases in a more efficient manner.  The current
 912          * worst case (a single resident page at the end of an extremely large
 913          * range) can take minutes to complete for ranges in the terrabyte
 914          * category... since this routine is called when truncating a file,
 915          * and we currently support files up to 16 Tbytes in size, this
 916          * is not a theoretical problem
 917          */
 918
 919         if ((object->resident_page_count < RESIDENT_LIMIT) &&
 920             (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
 921                 vm_page_t               next;
 922                 vm_object_offset_t      start;
 923                 vm_object_offset_t      end;
 924                 vm_object_size_t        e_mask;
 925                 vm_page_t               m;
 926
 927                 start = offset;
 928                 end   = offset + size;
 929                 num_of_extents = 0;
 930                 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
 931
 932                 m = (vm_page_t) queue_first(&object->memq);
 933
 934                 while (!queue_end(&object->memq, (queue_entry_t) m)) {
 935                         next = (vm_page_t) queue_next(&m->listq);
 936
 937                         if ((m->offset >= start) && (m->offset < end)) {
 938                                 /*
 939                                  * this is a page we're interested in
 940                                  * try to fit it into a current extent
 941                                  */
 942                                 for (n = 0; n < num_of_extents; n++) {
 943                                         if ((m->offset & e_mask) == extents[n].e_base) {
 944                                                 /*
 945                                                  * use (PAGE_SIZE - 1) to determine the
 946                                                  * max offset so that we don't wrap if
 947                                                  * we're at the last page of the space
 948                                                  */
 949                                                 if (m->offset < extents[n].e_min)
 950                                                         extents[n].e_min = m->offset;
 951                                                 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
 952                                                         extents[n].e_max = m->offset + (PAGE_SIZE - 1);
 953                                                 break;
 954                                         }
 955                                 }
 956                                 if (n == num_of_extents) {
 957                                         /*
 958                                          * didn't find a current extent that can encompass
 959                                          * this page
 960                                          */
 961                                         if (n < MAX_EXTENTS) {
 962                                                 /*
 963                                                  * if we still have room,
 964                                                  * create a new extent
 965                                                  */
 966                                                 extents[n].e_base = m->offset & e_mask;
 967                                                 extents[n].e_min  = m->offset;
 968                                                 extents[n].e_max  = m->offset + (PAGE_SIZE - 1);
 969
 970                                                 num_of_extents++;
 971                                         } else {
 972                                                 /*
 973                                                  * no room to create a new extent...
 974                                                  * fall back to a single extent based
 975                                                  * on the min and max page offsets
 976                                                  * we find in the range we're interested in...
 977                                                  * first, look through the extent list and
 978                                                  * develop the overall min and max for the
 979                                                  * pages we've looked at up to this point
 980                                                  */
 981                                                 for (n = 1; n < num_of_extents; n++) {
 982                                                         if (extents[n].e_min < extents[0].e_min)
 983                                                                 extents[0].e_min = extents[n].e_min;
 984                                                         if (extents[n].e_max > extents[0].e_max)
 985                                                                 extents[0].e_max = extents[n].e_max;
 986                                                 }
 987                                                 /*
 988                                                  * now setup to run through the remaining pages
 989                                                  * to determine the overall min and max
 990                                                  * offset for the specified range
 991                                                  */
 992                                                 extents[0].e_base = 0;
 993                                                 e_mask = 0;
 994                                                 num_of_extents = 1;
 995
 996                                                 /*
 997                                                  * by continuing, we'll reprocess the
 998                                                  * page that forced us to abandon trying
 999                                                  * to develop multiple extents
1000                                                  */
1001                                                 continue;
1002                                         }
1003                                 }
1004                         }
1005                         m = next;
1006                 }
1007         } else {
1008                 extents[0].e_min = offset;
1009                 extents[0].e_max = offset + (size - 1);
1010
1011                 num_of_extents = 1;
1012         }
1013         for (n = 0; n < num_of_extents; n++) {
1014                 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1015                                             should_flush, should_return, should_iosync, protection))
1016                         data_returned = TRUE;
1017         }
1018         return (data_returned);
1019 }
1020
1021
1022 /*
1023  *      Routine:        memory_object_synchronize_completed [user interface]
1024  *
1025  *      Tell kernel that previously synchronized data
1026  *      (memory_object_synchronize) has been queue or placed on the
1027  *      backing storage.
1028  *
1029  *      Note: there may be multiple synchronize requests for a given
1030  *      memory object outstanding but they will not overlap.
1031  */
1032
1033 kern_return_t
1034 memory_object_synchronize_completed(
1035         memory_object_control_t control,
1036         memory_object_offset_t  offset,
1037         vm_offset_t                     length)
1038 {
1039         vm_object_t                     object;
1040         msync_req_t                     msr;
1041
1042         object = memory_object_control_to_vm_object(control);
1043
1044         XPR(XPR_MEMORY_OBJECT,
1045             "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1046             (integer_t)object, offset, length, 0, 0);
1047
1048         /*
1049          *      Look for bogus arguments
1050          */
1051
1052         if (object == VM_OBJECT_NULL)
1053                 return (KERN_INVALID_ARGUMENT);
1054
1055         vm_object_lock(object);
1056
1057 /*
1058  *      search for sync request structure
1059  */
1060         queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1061                 if (msr->offset == offset && msr->length == length) {
1062                         queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1063                         break;
1064                 }
1065         }/* queue_iterate */
1066
1067         if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1068                 vm_object_unlock(object);
1069                 return KERN_INVALID_ARGUMENT;
1070         }
1071
1072         msr_lock(msr);
1073         vm_object_unlock(object);
1074         msr->flag = VM_MSYNC_DONE;
1075         msr_unlock(msr);
1076         thread_wakeup((event_t) msr);
1077
1078         return KERN_SUCCESS;
1079 }/* memory_object_synchronize_completed */
1080
1081 static kern_return_t
1082 vm_object_set_attributes_common(
1083         vm_object_t     object,
1084         boolean_t       may_cache,
1085         memory_object_copy_strategy_t copy_strategy,
1086         boolean_t       temporary,
1087         memory_object_cluster_size_t    cluster_size,
1088         boolean_t       silent_overwrite,
1089         boolean_t       advisory_pageout)
1090 {
1091         boolean_t       object_became_ready;
1092
1093         XPR(XPR_MEMORY_OBJECT,
1094             "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1095             (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1096
1097         if (object == VM_OBJECT_NULL)
1098                 return(KERN_INVALID_ARGUMENT);
1099
1100         /*
1101          *      Verify the attributes of importance
1102          */
1103
1104         switch(copy_strategy) {
1105                 case MEMORY_OBJECT_COPY_NONE:
1106                 case MEMORY_OBJECT_COPY_DELAY:
1107                         break;
1108                 default:
1109                         return(KERN_INVALID_ARGUMENT);
1110         }
1111
1112 #if     !ADVISORY_PAGEOUT
1113         if (silent_overwrite || advisory_pageout)
1114                 return(KERN_INVALID_ARGUMENT);
1115
1116 #endif  /* !ADVISORY_PAGEOUT */
1117         if (may_cache)
1118                 may_cache = TRUE;
1119         if (temporary)
1120                 temporary = TRUE;
1121         if (cluster_size != 0) {
1122                 int     pages_per_cluster;
1123                 pages_per_cluster = atop_32(cluster_size);
1124                 /*
1125                  * Cluster size must be integral multiple of page size,
1126                  * and be a power of 2 number of pages.
1127                  */
1128                 if ((cluster_size & (PAGE_SIZE-1)) ||
1129                     ((pages_per_cluster-1) & pages_per_cluster))
1130                         return KERN_INVALID_ARGUMENT;
1131         }
1132
1133         vm_object_lock(object);
1134
1135         /*
1136          *      Copy the attributes
1137          */
1138         assert(!object->internal);
1139         object_became_ready = !object->pager_ready;
1140         object->copy_strategy = copy_strategy;
1141         object->can_persist = may_cache;
1142         object->temporary = temporary;
1143         object->silent_overwrite = silent_overwrite;
1144         object->advisory_pageout = advisory_pageout;
1145         if (cluster_size == 0)
1146                 cluster_size = PAGE_SIZE;
1147         object->cluster_size = cluster_size;
1148
1149         assert(cluster_size >= PAGE_SIZE &&
1150                cluster_size % PAGE_SIZE == 0);
1151
1152         /*
1153          *      Wake up anyone waiting for the ready attribute
1154          *      to become asserted.
1155          */
1156
1157         if (object_became_ready) {
1158                 object->pager_ready = TRUE;
1159                 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1160         }
1161
1162         vm_object_unlock(object);
1163
1164         return(KERN_SUCCESS);
1165 }
1166
1167 /*
1168  *      Set the memory object attribute as provided.
1169  *
1170  *      XXX This routine cannot be completed until the vm_msync, clean
1171  *           in place, and cluster work is completed. See ifdef notyet
1172  *           below and note that vm_object_set_attributes_common()
1173  *           may have to be expanded.
1174  */
1175 kern_return_t
1176 memory_object_change_attributes(
1177         memory_object_control_t         control,
1178         memory_object_flavor_t          flavor,
1179         memory_object_info_t            attributes,
1180         mach_msg_type_number_t          count)
1181 {
1182         vm_object_t                     object;
1183         kern_return_t                   result = KERN_SUCCESS;
1184         boolean_t                       temporary;
1185         boolean_t                       may_cache;
1186         boolean_t                       invalidate;
1187         memory_object_cluster_size_t    cluster_size;
1188         memory_object_copy_strategy_t   copy_strategy;
1189         boolean_t                       silent_overwrite;
1190         boolean_t                       advisory_pageout;
1191
1192         object = memory_object_control_to_vm_object(control);
1193         if (object == VM_OBJECT_NULL)
1194                 return (KERN_INVALID_ARGUMENT);
1195
1196         vm_object_lock(object);
1197
1198         temporary = object->temporary;
1199         may_cache = object->can_persist;
1200         copy_strategy = object->copy_strategy;
1201         silent_overwrite = object->silent_overwrite;
1202         advisory_pageout = object->advisory_pageout;
1203 #if notyet
1204         invalidate = object->invalidate;
1205 #endif
1206         cluster_size = object->cluster_size;
1207         vm_object_unlock(object);
1208
1209         switch (flavor) {
1210             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1211             {
1212                 old_memory_object_behave_info_t     behave;
1213
1214                 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1215                         result = KERN_INVALID_ARGUMENT;
1216                         break;
1217                 }
1218
1219                 behave = (old_memory_object_behave_info_t) attributes;
1220
1221                 temporary = behave->temporary;
1222                 invalidate = behave->invalidate;
1223                 copy_strategy = behave->copy_strategy;
1224
1225                 break;
1226             }
1227
1228             case MEMORY_OBJECT_BEHAVIOR_INFO:
1229             {
1230                 memory_object_behave_info_t     behave;
1231
1232                 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1233                         result = KERN_INVALID_ARGUMENT;
1234                         break;
1235                 }
1236
1237                 behave = (memory_object_behave_info_t) attributes;
1238
1239                 temporary = behave->temporary;
1240                 invalidate = behave->invalidate;
1241                 copy_strategy = behave->copy_strategy;
1242                 silent_overwrite = behave->silent_overwrite;
1243                 advisory_pageout = behave->advisory_pageout;
1244                 break;
1245             }
1246
1247             case MEMORY_OBJECT_PERFORMANCE_INFO:
1248             {
1249                 memory_object_perf_info_t       perf;
1250
1251                 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1252                         result = KERN_INVALID_ARGUMENT;
1253                         break;
1254                 }
1255
1256                 perf = (memory_object_perf_info_t) attributes;
1257
1258                 may_cache = perf->may_cache;
1259                 cluster_size = round_page_32(perf->cluster_size);
1260
1261                 break;
1262             }
1263
1264             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1265             {
1266                 old_memory_object_attr_info_t   attr;
1267
1268                 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1269                         result = KERN_INVALID_ARGUMENT;
1270                         break;
1271                 }
1272
1273                 attr = (old_memory_object_attr_info_t) attributes;
1274
1275                 may_cache = attr->may_cache;
1276                 copy_strategy = attr->copy_strategy;
1277                 cluster_size = page_size;
1278
1279                 break;
1280             }
1281
1282             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1283             {
1284                 memory_object_attr_info_t       attr;
1285
1286                 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1287                         result = KERN_INVALID_ARGUMENT;
1288                         break;
1289                 }
1290
1291                 attr = (memory_object_attr_info_t) attributes;
1292
1293                 copy_strategy = attr->copy_strategy;
1294                 may_cache = attr->may_cache_object;
1295                 cluster_size = attr->cluster_size;
1296                 temporary = attr->temporary;
1297
1298                 break;
1299             }
1300
1301             default:
1302                 result = KERN_INVALID_ARGUMENT;
1303                 break;
1304         }
1305
1306         if (result != KERN_SUCCESS)
1307                 return(result);
1308
1309         if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1310                 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1311                 temporary = TRUE;
1312         } else {
1313                 temporary = FALSE;
1314         }
1315
1316         /*
1317          * XXX  may_cache may become a tri-valued variable to handle
1318          * XXX  uncache if not in use.
1319          */
1320         return (vm_object_set_attributes_common(object,
1321                                                      may_cache,
1322                                                      copy_strategy,
1323                                                      temporary,
1324                                                      cluster_size,
1325                                                      silent_overwrite,
1326                                                      advisory_pageout));
1327 }
1328
1329 kern_return_t
1330 memory_object_get_attributes(
1331         memory_object_control_t control,
1332         memory_object_flavor_t  flavor,
1333         memory_object_info_t    attributes,     /* pointer to OUT array */
1334         mach_msg_type_number_t  *count)         /* IN/OUT */
1335 {
1336         kern_return_t           ret = KERN_SUCCESS;
1337         vm_object_t             object;
1338
1339         object = memory_object_control_to_vm_object(control);
1340         if (object == VM_OBJECT_NULL)
1341                 return (KERN_INVALID_ARGUMENT);
1342
1343         vm_object_lock(object);
1344
1345         switch (flavor) {
1346             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1347             {
1348                 old_memory_object_behave_info_t behave;
1349
1350                 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1351                         ret = KERN_INVALID_ARGUMENT;
1352                         break;
1353                 }
1354
1355                 behave = (old_memory_object_behave_info_t) attributes;
1356                 behave->copy_strategy = object->copy_strategy;
1357                 behave->temporary = object->temporary;
1358 #if notyet      /* remove when vm_msync complies and clean in place fini */
1359                 behave->invalidate = object->invalidate;
1360 #else
1361                 behave->invalidate = FALSE;
1362 #endif
1363
1364                 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1365                 break;
1366             }
1367
1368             case MEMORY_OBJECT_BEHAVIOR_INFO:
1369             {
1370                 memory_object_behave_info_t     behave;
1371
1372                 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1373                         ret = KERN_INVALID_ARGUMENT;
1374                         break;
1375                 }
1376
1377                 behave = (memory_object_behave_info_t) attributes;
1378                 behave->copy_strategy = object->copy_strategy;
1379                 behave->temporary = object->temporary;
1380 #if notyet      /* remove when vm_msync complies and clean in place fini */
1381                 behave->invalidate = object->invalidate;
1382 #else
1383                 behave->invalidate = FALSE;
1384 #endif
1385                 behave->advisory_pageout = object->advisory_pageout;
1386                 behave->silent_overwrite = object->silent_overwrite;
1387                 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1388                 break;
1389             }
1390
1391             case MEMORY_OBJECT_PERFORMANCE_INFO:
1392             {
1393                 memory_object_perf_info_t       perf;
1394
1395                 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1396                         ret = KERN_INVALID_ARGUMENT;
1397                         break;
1398                 }
1399
1400                 perf = (memory_object_perf_info_t) attributes;
1401                 perf->cluster_size = object->cluster_size;
1402                 perf->may_cache = object->can_persist;
1403
1404                 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1405                 break;
1406             }
1407
1408             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1409             {
1410                 old_memory_object_attr_info_t       attr;
1411
1412                 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1413                         ret = KERN_INVALID_ARGUMENT;
1414                         break;
1415                 }
1416
1417                 attr = (old_memory_object_attr_info_t) attributes;
1418                 attr->may_cache = object->can_persist;
1419                 attr->copy_strategy = object->copy_strategy;
1420
1421                 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1422                 break;
1423             }
1424
1425             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1426             {
1427                 memory_object_attr_info_t       attr;
1428
1429                 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1430                         ret = KERN_INVALID_ARGUMENT;
1431                         break;
1432                 }
1433
1434                 attr = (memory_object_attr_info_t) attributes;
1435                 attr->copy_strategy = object->copy_strategy;
1436                 attr->cluster_size = object->cluster_size;
1437                 attr->may_cache_object = object->can_persist;
1438                 attr->temporary = object->temporary;
1439
1440                 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1441                 break;
1442             }
1443
1444             default:
1445                 ret = KERN_INVALID_ARGUMENT;
1446                 break;
1447         }
1448
1449         vm_object_unlock(object);
1450
1451         return(ret);
1452 }
1453
1454
1455 kern_return_t
1456 memory_object_iopl_request(
1457         ipc_port_t              port,
1458         memory_object_offset_t  offset,
1459         upl_size_t              *upl_size,
1460         upl_t                   *upl_ptr,
1461         upl_page_info_array_t   user_page_list,
1462         unsigned int            *page_list_count,
1463         int                     *flags)
1464 {
1465         vm_object_t             object;
1466         kern_return_t           ret;
1467         int                     caller_flags;
1468
1469         caller_flags = *flags;
1470
1471         if (caller_flags & ~UPL_VALID_FLAGS) {
1472                 /*
1473                  * For forward compatibility's sake,
1474                  * reject any unknown flag.
1475                  */
1476                 return KERN_INVALID_VALUE;
1477         }
1478
1479         if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1480                 vm_named_entry_t        named_entry;
1481
1482                 named_entry = (vm_named_entry_t)port->ip_kobject;
1483                 /* a few checks to make sure user is obeying rules */
1484                 if(*upl_size == 0) {
1485                         if(offset >= named_entry->size)
1486                                 return(KERN_INVALID_RIGHT);
1487                         *upl_size = named_entry->size - offset;
1488                 }
1489                 if(caller_flags & UPL_COPYOUT_FROM) {
1490                         if((named_entry->protection & VM_PROT_READ)
1491                                                 != VM_PROT_READ) {
1492                                 return(KERN_INVALID_RIGHT);
1493                         }
1494                 } else {
1495                         if((named_entry->protection &
1496                                 (VM_PROT_READ | VM_PROT_WRITE))
1497                                 != (VM_PROT_READ | VM_PROT_WRITE)) {
1498                                 return(KERN_INVALID_RIGHT);
1499                         }
1500                 }
1501                 if(named_entry->size < (offset + *upl_size))
1502                         return(KERN_INVALID_ARGUMENT);
1503
1504                 /* the callers parameter offset is defined to be the */
1505                 /* offset from beginning of named entry offset in object */
1506                 offset = offset + named_entry->offset;
1507
1508                 if(named_entry->is_sub_map)
1509                         return (KERN_INVALID_ARGUMENT);
1510
1511                 named_entry_lock(named_entry);
1512
1513                 if (named_entry->is_pager) {
1514                         object = vm_object_enter(named_entry->backing.pager,
1515                                         named_entry->offset + named_entry->size,
1516                                         named_entry->internal,
1517                                         FALSE,
1518                                         FALSE);
1519                         if (object == VM_OBJECT_NULL) {
1520                                 named_entry_unlock(named_entry);
1521                                 return(KERN_INVALID_OBJECT);
1522                         }
1523
1524                         /* JMM - drop reference on pager here? */
1525
1526                         /* create an extra reference for the named entry */
1527                         vm_object_lock(object);
1528                         vm_object_reference_locked(object);
1529                         named_entry->backing.object = object;
1530                         named_entry->is_pager = FALSE;
1531                         named_entry_unlock(named_entry);
1532
1533                         /* wait for object to be ready */
1534                         while (!object->pager_ready) {
1535                                 vm_object_wait(object,
1536                                                 VM_OBJECT_EVENT_PAGER_READY,
1537                                                 THREAD_UNINT);
1538                                 vm_object_lock(object);
1539                         }
1540                         vm_object_unlock(object);
1541                 } else {
1542                         /* This is the case where we are going to map */
1543                         /* an already mapped object.  If the object is */
1544                         /* not ready it is internal.  An external     */
1545                         /* object cannot be mapped until it is ready  */
1546                         /* we can therefore avoid the ready check     */
1547                         /* in this case.  */
1548                         object = named_entry->backing.object;
1549                         vm_object_reference(object);
1550                         named_entry_unlock(named_entry);
1551                 }
1552         } else  {
1553                 memory_object_control_t control;
1554                 control = (memory_object_control_t)port->ip_kobject;
1555                 if (control == NULL)
1556                         return (KERN_INVALID_ARGUMENT);
1557                 object = memory_object_control_to_vm_object(control);
1558                 if (object == VM_OBJECT_NULL)
1559                         return (KERN_INVALID_ARGUMENT);
1560                 vm_object_reference(object);
1561         }
1562         if (object == VM_OBJECT_NULL)
1563                 return (KERN_INVALID_ARGUMENT);
1564
1565         if (!object->private) {
1566                 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1567                         *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1568                 if (object->phys_contiguous) {
1569                         *flags = UPL_PHYS_CONTIG;
1570                 } else {
1571                         *flags = 0;
1572                 }
1573         } else {
1574                 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1575         }
1576
1577         ret = vm_object_iopl_request(object,
1578                                      offset,
1579                                      *upl_size,
1580                                      upl_ptr,
1581                                      user_page_list,
1582                                      page_list_count,
1583                                      caller_flags);
1584         vm_object_deallocate(object);
1585         return ret;
1586 }
1587
1588 /*
1589  *      Routine:        memory_object_upl_request [interface]
1590  *      Purpose:
1591  *              Cause the population of a portion of a vm_object.
1592  *              Depending on the nature of the request, the pages
1593  *              returned may be contain valid data or be uninitialized.
1594  *
1595  */
1596
1597 kern_return_t
1598 memory_object_upl_request(
1599         memory_object_control_t control,
1600         memory_object_offset_t  offset,
1601         upl_size_t              size,
1602         upl_t                   *upl_ptr,
1603         upl_page_info_array_t   user_page_list,
1604         unsigned int            *page_list_count,
1605         int                     cntrl_flags)
1606 {
1607         vm_object_t             object;
1608
1609         object = memory_object_control_to_vm_object(control);
1610         if (object == VM_OBJECT_NULL)
1611                 return (KERN_INVALID_ARGUMENT);
1612
1613         return vm_object_upl_request(object,
1614                                      offset,
1615                                      size,
1616                                      upl_ptr,
1617                                      user_page_list,
1618                                      page_list_count,
1619                                      cntrl_flags);
1620 }
1621
1622 /*
1623  *      Routine:        memory_object_super_upl_request [interface]
1624  *      Purpose:
1625  *              Cause the population of a portion of a vm_object
1626  *              in much the same way as memory_object_upl_request.
1627  *              Depending on the nature of the request, the pages
1628  *              returned may be contain valid data or be uninitialized.
1629  *              However, the region may be expanded up to the super
1630  *              cluster size provided.
1631  */
1632
1633 kern_return_t
1634 memory_object_super_upl_request(
1635         memory_object_control_t control,
1636         memory_object_offset_t  offset,
1637         upl_size_t              size,
1638         upl_size_t              super_cluster,
1639         upl_t                   *upl,
1640         upl_page_info_t         *user_page_list,
1641         unsigned int            *page_list_count,
1642         int                     cntrl_flags)
1643 {
1644         vm_object_t             object;
1645
1646         object = memory_object_control_to_vm_object(control);
1647         if (object == VM_OBJECT_NULL)
1648                 return (KERN_INVALID_ARGUMENT);
1649
1650         return vm_object_super_upl_request(object,
1651                                            offset,
1652                                            size,
1653                                            super_cluster,
1654                                            upl,
1655                                            user_page_list,
1656                                            page_list_count,
1657                                            cntrl_flags);
1658 }
1659
1660 int vm_stat_discard_cleared_reply = 0;
1661 int vm_stat_discard_cleared_unset = 0;
1662 int vm_stat_discard_cleared_too_late = 0;
1663
1664
1665
1666 /*
1667  *      Routine:        host_default_memory_manager [interface]
1668  *      Purpose:
1669  *              set/get the default memory manager port and default cluster
1670  *              size.
1671  *
1672  *              If successful, consumes the supplied naked send right.
1673  */
1674 kern_return_t
1675 host_default_memory_manager(
1676         host_priv_t             host_priv,
1677         memory_object_default_t *default_manager,
1678         memory_object_cluster_size_t cluster_size)
1679 {
1680         memory_object_default_t current_manager;
1681         memory_object_default_t new_manager;
1682         memory_object_default_t returned_manager;
1683
1684         if (host_priv == HOST_PRIV_NULL)
1685                 return(KERN_INVALID_HOST);
1686
1687         assert(host_priv == &realhost);
1688
1689         new_manager = *default_manager;
1690         mutex_lock(&memory_manager_default_lock);
1691         current_manager = memory_manager_default;
1692
1693         if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1694                 /*
1695                  *      Retrieve the current value.
1696                  */
1697                 memory_object_default_reference(current_manager);
1698                 returned_manager = current_manager;
1699         } else {
1700                 /*
1701                  *      Retrieve the current value,
1702                  *      and replace it with the supplied value.
1703                  *      We return the old reference to the caller
1704                  *      but we have to take a reference on the new
1705                  *      one.
1706                  */
1707
1708                 returned_manager = current_manager;
1709                 memory_manager_default = new_manager;
1710                 memory_object_default_reference(new_manager);
1711
1712                 if (cluster_size % PAGE_SIZE != 0) {
1713 #if 0
1714                         mutex_unlock(&memory_manager_default_lock);
1715                         return KERN_INVALID_ARGUMENT;
1716 #else
1717                         cluster_size = round_page_32(cluster_size);
1718 #endif
1719                 }
1720                 memory_manager_default_cluster = cluster_size;
1721
1722                 /*
1723                  *      In case anyone's been waiting for a memory
1724                  *      manager to be established, wake them up.
1725                  */
1726
1727                 thread_wakeup((event_t) &memory_manager_default);
1728         }
1729
1730         mutex_unlock(&memory_manager_default_lock);
1731
1732         *default_manager = returned_manager;
1733         return(KERN_SUCCESS);
1734 }
1735
1736 /*
1737  *      Routine:        memory_manager_default_reference
1738  *      Purpose:
1739  *              Returns a naked send right for the default
1740  *              memory manager.  The returned right is always
1741  *              valid (not IP_NULL or IP_DEAD).
1742  */
1743
1744 __private_extern__ memory_object_default_t
1745 memory_manager_default_reference(
1746         memory_object_cluster_size_t *cluster_size)
1747 {
1748         memory_object_default_t current_manager;
1749
1750         mutex_lock(&memory_manager_default_lock);
1751         current_manager = memory_manager_default;
1752         while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1753                 wait_result_t res;
1754
1755                 res = thread_sleep_mutex((event_t) &memory_manager_default,
1756                                          &memory_manager_default_lock,
1757                                          THREAD_UNINT);
1758                 assert(res == THREAD_AWAKENED);
1759                 current_manager = memory_manager_default;
1760         }
1761         memory_object_default_reference(current_manager);
1762         *cluster_size = memory_manager_default_cluster;
1763         mutex_unlock(&memory_manager_default_lock);
1764
1765         return current_manager;
1766 }
1767
1768 /*
1769  *      Routine:        memory_manager_default_check
1770  *
1771  *      Purpose:
1772  *              Check whether a default memory manager has been set
1773  *              up yet, or not. Returns KERN_SUCCESS if dmm exists,
1774  *              and KERN_FAILURE if dmm does not exist.
1775  *
1776  *              If there is no default memory manager, log an error,
1777  *              but only the first time.
1778  *
1779  */
1780 __private_extern__ kern_return_t
1781 memory_manager_default_check(void)
1782 {
1783         memory_object_default_t current;
1784
1785         mutex_lock(&memory_manager_default_lock);
1786         current = memory_manager_default;
1787         if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1788                 static boolean_t logged;        /* initialized to 0 */
1789                 boolean_t       complain = !logged;
1790                 logged = TRUE;
1791                 mutex_unlock(&memory_manager_default_lock);
1792                 if (complain)
1793                         printf("Warning: No default memory manager\n");
1794                 return(KERN_FAILURE);
1795         } else {
1796                 mutex_unlock(&memory_manager_default_lock);
1797                 return(KERN_SUCCESS);
1798         }
1799 }
1800
1801 __private_extern__ void
1802 memory_manager_default_init(void)
1803 {
1804         memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1805         mutex_init(&memory_manager_default_lock, 0);
1806 }
1807
1808
1809
1810 /* Allow manipulation of individual page state.  This is actually part of */
1811 /* the UPL regimen but takes place on the object rather than on a UPL */
1812
1813 kern_return_t
1814 memory_object_page_op(
1815         memory_object_control_t control,
1816         memory_object_offset_t  offset,
1817         int                     ops,
1818         ppnum_t                 *phys_entry,
1819         int                     *flags)
1820 {
1821         vm_object_t             object;
1822         vm_page_t               dst_page;
1823
1824
1825         object = memory_object_control_to_vm_object(control);
1826         if (object == VM_OBJECT_NULL)
1827                 return (KERN_INVALID_ARGUMENT);
1828
1829         vm_object_lock(object);
1830
1831         if(ops & UPL_POP_PHYSICAL) {
1832                 if(object->phys_contiguous) {
1833                         if (phys_entry) {
1834                                 *phys_entry = (ppnum_t)
1835                                         (object->shadow_offset >> 12);
1836                         }
1837                         vm_object_unlock(object);
1838                         return KERN_SUCCESS;
1839                 } else {
1840                         vm_object_unlock(object);
1841                         return KERN_INVALID_OBJECT;
1842                 }
1843         }
1844         if(object->phys_contiguous) {
1845                 vm_object_unlock(object);
1846                 return KERN_INVALID_OBJECT;
1847         }
1848
1849         while(TRUE) {
1850                 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
1851                         vm_object_unlock(object);
1852                         return KERN_FAILURE;
1853                 }
1854
1855                 /* Sync up on getting the busy bit */
1856                 if((dst_page->busy || dst_page->cleaning) &&
1857                            (((ops & UPL_POP_SET) &&
1858                            (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
1859                         /* someone else is playing with the page, we will */
1860                         /* have to wait */
1861                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1862                         continue;
1863                 }
1864
1865                 if (ops & UPL_POP_DUMP) {
1866                         vm_page_lock_queues();
1867
1868                         if (dst_page->no_isync == FALSE)
1869                                 pmap_disconnect(dst_page->phys_page);
1870                         vm_page_free(dst_page);
1871
1872                         vm_page_unlock_queues();
1873                         break;
1874                 }
1875
1876                 if (flags) {
1877                         *flags = 0;
1878
1879                         /* Get the condition of flags before requested ops */
1880                         /* are undertaken */
1881
1882                         if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
1883                         if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
1884                         if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
1885                         if(dst_page->absent) *flags |= UPL_POP_ABSENT;
1886                         if(dst_page->busy) *flags |= UPL_POP_BUSY;
1887                 }
1888
1889                 /* The caller should have made a call either contingent with */
1890                 /* or prior to this call to set UPL_POP_BUSY */
1891                 if(ops & UPL_POP_SET) {
1892                         /* The protection granted with this assert will */
1893                         /* not be complete.  If the caller violates the */
1894                         /* convention and attempts to change page state */
1895                         /* without first setting busy we may not see it */
1896                         /* because the page may already be busy.  However */
1897                         /* if such violations occur we will assert sooner */
1898                         /* or later. */
1899                         assert(dst_page->busy || (ops & UPL_POP_BUSY));
1900                         if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
1901                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
1902                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
1903                         if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
1904                         if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
1905                 }
1906
1907                 if(ops & UPL_POP_CLR) {
1908                         assert(dst_page->busy);
1909                         if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
1910                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
1911                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
1912                         if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
1913                         if (ops & UPL_POP_BUSY) {
1914                                 dst_page->busy = FALSE;
1915                                 PAGE_WAKEUP(dst_page);
1916                         }
1917                 }
1918
1919                 if (dst_page->encrypted) {
1920                         /*
1921                          * ENCRYPTED SWAP:
1922                          * We need to decrypt this encrypted page before the
1923                          * caller can access its contents.
1924                          * But if the caller really wants to access the page's
1925                          * contents, they have to keep the page "busy".
1926                          * Otherwise, the page could get recycled or re-encrypted
1927                          * at any time.
1928                          */
1929                         if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
1930                             dst_page->busy) {
1931                                 /*
1932                                  * The page is stable enough to be accessed by
1933                                  * the caller, so make sure its contents are
1934                                  * not encrypted.
1935                                  */
1936                                 vm_page_decrypt(dst_page, 0);
1937                         } else {
1938                                 /*
1939                                  * The page is not busy, so don't bother
1940                                  * decrypting it, since anything could
1941                                  * happen to it between now and when the
1942                                  * caller wants to access it.
1943                                  * We should not give the caller access
1944                                  * to this page.
1945                                  */
1946                                 assert(!phys_entry);
1947                         }
1948                 }
1949
1950                 if (phys_entry) {
1951                         /*
1952                          * The physical page number will remain valid
1953                          * only if the page is kept busy.
1954                          * ENCRYPTED SWAP: make sure we don't let the
1955                          * caller access an encrypted page.
1956                          */
1957                         assert(dst_page->busy);
1958                         assert(!dst_page->encrypted);
1959                         *phys_entry = dst_page->phys_page;
1960                 }
1961
1962                 break;
1963         }
1964
1965         vm_object_unlock(object);
1966         return KERN_SUCCESS;
1967
1968 }
1969
1970 /*
1971  * memory_object_range_op offers performance enhancement over
1972  * memory_object_page_op for page_op functions which do not require page
1973  * level state to be returned from the call.  Page_op was created to provide
1974  * a low-cost alternative to page manipulation via UPLs when only a single
1975  * page was involved.  The range_op call establishes the ability in the _op
1976  * family of functions to work on multiple pages where the lack of page level
1977  * state handling allows the caller to avoid the overhead of the upl structures.
1978  */
1979
1980 kern_return_t
1981 memory_object_range_op(
1982         memory_object_control_t control,
1983         memory_object_offset_t  offset_beg,
1984         memory_object_offset_t  offset_end,
1985         int                     ops,
1986         int                     *range)
1987 {
1988         memory_object_offset_t  offset;
1989         vm_object_t             object;
1990         vm_page_t               dst_page;
1991
1992         object = memory_object_control_to_vm_object(control);
1993         if (object == VM_OBJECT_NULL)
1994                 return (KERN_INVALID_ARGUMENT);
1995
1996         if (object->resident_page_count == 0) {
1997                 if (range) {
1998                         if (ops & UPL_ROP_PRESENT)
1999                                 *range = 0;
2000                         else
2001                                 *range = offset_end - offset_beg;
2002                 }
2003                 return KERN_SUCCESS;
2004         }
2005         vm_object_lock(object);
2006
2007         if (object->phys_contiguous) {
2008                 vm_object_unlock(object);
2009                 return KERN_INVALID_OBJECT;
2010         }
2011
2012         offset = offset_beg;
2013
2014         while (offset < offset_end) {
2015                 dst_page = vm_page_lookup(object, offset);
2016                 if (dst_page != VM_PAGE_NULL) {
2017                         if (ops & UPL_ROP_DUMP) {
2018                                 if (dst_page->busy || dst_page->cleaning) {
2019                                         /*
2020                                          * someone else is playing with the
2021                                          * page, we will have to wait
2022                                          */
2023                                         PAGE_SLEEP(object,
2024                                                 dst_page, THREAD_UNINT);
2025                                         /*
2026                                          * need to relook the page up since it's
2027                                          * state may have changed while we slept
2028                                          * it might even belong to a different object
2029                                          * at this point
2030                                          */
2031                                         continue;
2032                                 }
2033                                 vm_page_lock_queues();
2034
2035                                 if (dst_page->no_isync == FALSE)
2036                                         pmap_disconnect(dst_page->phys_page);
2037                                 vm_page_free(dst_page);
2038
2039                                 vm_page_unlock_queues();
2040                         } else if (ops & UPL_ROP_ABSENT)
2041                                 break;
2042                 } else if (ops & UPL_ROP_PRESENT)
2043                         break;
2044
2045                 offset += PAGE_SIZE;
2046         }
2047         vm_object_unlock(object);
2048
2049         if (range)
2050                 *range = offset - offset_beg;
2051
2052         return KERN_SUCCESS;
2053 }
2054
2055
2056 kern_return_t
2057 memory_object_pages_resident(
2058         memory_object_control_t control,
2059         boolean_t                       *       has_pages_resident)
2060 {
2061         vm_object_t             object;
2062
2063         *has_pages_resident = FALSE;
2064
2065         object = memory_object_control_to_vm_object(control);
2066         if (object == VM_OBJECT_NULL)
2067                 return (KERN_INVALID_ARGUMENT);
2068
2069         if (object->resident_page_count)
2070                 *has_pages_resident = TRUE;
2071
2072         return (KERN_SUCCESS);
2073 }
2074
2075
2076 static zone_t mem_obj_control_zone;
2077
2078 __private_extern__ void
2079 memory_object_control_bootstrap(void)
2080 {
2081         int     i;
2082
2083         i = (vm_size_t) sizeof (struct memory_object_control);
2084         mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
2085         return;
2086 }
2087
2088 __private_extern__ memory_object_control_t
2089 memory_object_control_allocate(
2090         vm_object_t             object)
2091 {
2092         memory_object_control_t control;
2093
2094         control = (memory_object_control_t)zalloc(mem_obj_control_zone);
2095         if (control != MEMORY_OBJECT_CONTROL_NULL)
2096                 control->object = object;
2097         return (control);
2098 }
2099
2100 __private_extern__ void
2101 memory_object_control_collapse(
2102         memory_object_control_t control,
2103         vm_object_t             object)
2104 {
2105         assert((control->object != VM_OBJECT_NULL) &&
2106                (control->object != object));
2107         control->object = object;
2108 }
2109
2110 __private_extern__ vm_object_t
2111 memory_object_control_to_vm_object(
2112         memory_object_control_t control)
2113 {
2114         if (control == MEMORY_OBJECT_CONTROL_NULL)
2115                 return VM_OBJECT_NULL;
2116
2117         return (control->object);
2118 }
2119
2120 memory_object_control_t
2121 convert_port_to_mo_control(
2122         __unused mach_port_t    port)
2123 {
2124         return MEMORY_OBJECT_CONTROL_NULL;
2125 }
2126
2127
2128 mach_port_t
2129 convert_mo_control_to_port(
2130         __unused memory_object_control_t        control)
2131 {
2132         return MACH_PORT_NULL;
2133 }
2134
2135 void
2136 memory_object_control_reference(
2137         __unused memory_object_control_t        control)
2138 {
2139         return;
2140 }
2141
2142 /*
2143  * We only every issue one of these references, so kill it
2144  * when that gets released (should switch the real reference
2145  * counting in true port-less EMMI).
2146  */
2147 void
2148 memory_object_control_deallocate(
2149         memory_object_control_t control)
2150 {
2151         zfree(mem_obj_control_zone, control);
2152 }
2153
2154 void
2155 memory_object_control_disable(
2156         memory_object_control_t control)
2157 {
2158         assert(control->object != VM_OBJECT_NULL);
2159         control->object = VM_OBJECT_NULL;
2160 }
2161
2162 void
2163 memory_object_default_reference(
2164         memory_object_default_t dmm)
2165 {
2166         ipc_port_make_send(dmm);
2167 }
2168
2169 void
2170 memory_object_default_deallocate(
2171         memory_object_default_t dmm)
2172 {
2173         ipc_port_release_send(dmm);
2174 }
2175
2176 memory_object_t
2177 convert_port_to_memory_object(
2178         __unused mach_port_t    port)
2179 {
2180         return (MEMORY_OBJECT_NULL);
2181 }
2182
2183
2184 mach_port_t
2185 convert_memory_object_to_port(
2186         __unused memory_object_t        object)
2187 {
2188         return (MACH_PORT_NULL);
2189 }
2190
2191
2192 /* Routine memory_object_reference */
2193 void memory_object_reference(
2194         memory_object_t memory_object)
2195 {
2196
2197 #ifdef  MACH_BSD
2198         if (memory_object->pager == &vnode_pager_workaround) {
2199                 vnode_pager_reference(memory_object);
2200         } else if (memory_object->pager == &device_pager_workaround) {
2201                 device_pager_reference(memory_object);
2202         } else
2203 #endif
2204                 dp_memory_object_reference(memory_object);
2205 }
2206
2207 /* Routine memory_object_deallocate */
2208 void memory_object_deallocate(
2209         memory_object_t memory_object)
2210 {
2211
2212 #ifdef  MACH_BSD
2213         if (memory_object->pager == &vnode_pager_workaround) {
2214                 vnode_pager_deallocate(memory_object);
2215         } else if (memory_object->pager == &device_pager_workaround) {
2216                 device_pager_deallocate(memory_object);
2217         } else
2218 #endif
2219                 dp_memory_object_deallocate(memory_object);
2220 }
2221
2222
2223 /* Routine memory_object_init */
2224 kern_return_t memory_object_init
2225 (
2226         memory_object_t memory_object,
2227         memory_object_control_t memory_control,
2228         memory_object_cluster_size_t memory_object_page_size
2229 )
2230 {
2231 #ifdef  MACH_BSD
2232         if (memory_object->pager == &vnode_pager_workaround) {
2233                 return vnode_pager_init(memory_object,
2234                                         memory_control,
2235                                         memory_object_page_size);
2236         } else if (memory_object->pager == &device_pager_workaround) {
2237                 return device_pager_init(memory_object,
2238                                          memory_control,
2239                                          memory_object_page_size);
2240         } else
2241 #endif
2242                 return dp_memory_object_init(memory_object,
2243                                              memory_control,
2244                                              memory_object_page_size);
2245 }
2246
2247 /* Routine memory_object_terminate */
2248 kern_return_t memory_object_terminate
2249 (
2250         memory_object_t memory_object
2251 )
2252 {
2253 #ifdef  MACH_BSD
2254         if (memory_object->pager == &vnode_pager_workaround) {
2255                 return vnode_pager_terminate(memory_object);
2256         } else if (memory_object->pager == &device_pager_workaround) {
2257                 return device_pager_terminate(memory_object);
2258         } else
2259 #endif
2260                 return dp_memory_object_terminate(memory_object);
2261 }
2262
2263 /* Routine memory_object_data_request */
2264 kern_return_t memory_object_data_request
2265 (
2266         memory_object_t memory_object,
2267         memory_object_offset_t offset,
2268         memory_object_cluster_size_t length,
2269         vm_prot_t desired_access
2270 )
2271 {
2272 #ifdef  MACH_BSD
2273         if (memory_object->pager == &vnode_pager_workaround) {
2274                 return vnode_pager_data_request(memory_object,
2275                                                 offset,
2276                                                 length,
2277                                                 desired_access);
2278         } else if (memory_object->pager == &device_pager_workaround) {
2279                 return device_pager_data_request(memory_object,
2280                                                  offset,
2281                                                  length,
2282                                                  desired_access);
2283         } else
2284 #endif
2285                 return dp_memory_object_data_request(memory_object,
2286                                                      offset,
2287                                                      length,
2288                                                      desired_access);
2289 }
2290
2291 /* Routine memory_object_data_return */
2292 kern_return_t memory_object_data_return
2293 (
2294         memory_object_t memory_object,
2295         memory_object_offset_t offset,
2296         vm_size_t size,
2297         memory_object_offset_t *resid_offset,
2298         int     *io_error,
2299         boolean_t dirty,
2300         boolean_t kernel_copy,
2301         int     upl_flags
2302 )
2303 {
2304 #ifdef MACH_BSD
2305         if (memory_object->pager == &vnode_pager_workaround) {
2306                 return vnode_pager_data_return(memory_object,
2307                                                offset,
2308                                                size,
2309                                                resid_offset,
2310                                                io_error,
2311                                                dirty,
2312                                                kernel_copy,
2313                                                upl_flags);
2314         } else if (memory_object->pager == &device_pager_workaround) {
2315
2316                 return device_pager_data_return(memory_object,
2317                                                 offset,
2318                                                 size,
2319                                                 dirty,
2320                                                 kernel_copy,
2321                                                 upl_flags);
2322         }
2323         else
2324 #endif
2325         {
2326                 return dp_memory_object_data_return(memory_object,
2327                                                     offset,
2328                                                     size,
2329                                                     NULL,
2330                                                     NULL,
2331                                                     dirty,
2332                                                     kernel_copy,
2333                                                     upl_flags);
2334         }
2335 }
2336
2337 /* Routine memory_object_data_initialize */
2338 kern_return_t memory_object_data_initialize
2339 (
2340         memory_object_t memory_object,
2341         memory_object_offset_t offset,
2342         vm_size_t size
2343 )
2344 {
2345 #ifdef MACH_BSD
2346         if (memory_object->pager == &vnode_pager_workaround) {
2347                 return vnode_pager_data_initialize(memory_object,
2348                                                    offset,
2349                                                    size);
2350         } else if (memory_object->pager == &device_pager_workaround) {
2351                 return device_pager_data_initialize(memory_object,
2352                                                     offset,
2353                                                     size);
2354         } else
2355 #endif
2356                 return dp_memory_object_data_initialize(memory_object,
2357                                                         offset,
2358                                                         size);
2359 }
2360
2361 /* Routine memory_object_data_unlock */
2362 kern_return_t memory_object_data_unlock
2363 (
2364         memory_object_t memory_object,
2365         memory_object_offset_t offset,
2366         vm_size_t size,
2367         vm_prot_t desired_access
2368 )
2369 {
2370 #ifdef MACH_BSD
2371         if (memory_object->pager == &vnode_pager_workaround) {
2372                 return vnode_pager_data_unlock(memory_object,
2373                                                offset,
2374                                                size,
2375                                                desired_access);
2376         } else if (memory_object->pager == &device_pager_workaround) {
2377                 return device_pager_data_unlock(memory_object,
2378                                                 offset,
2379                                                 size,
2380                                                 desired_access);
2381         } else
2382 #endif
2383                 return dp_memory_object_data_unlock(memory_object,
2384                                                     offset,
2385                                                     size,
2386                                                     desired_access);
2387 }
2388
2389 /* Routine memory_object_synchronize */
2390 kern_return_t memory_object_synchronize
2391 (
2392         memory_object_t memory_object,
2393         memory_object_offset_t offset,
2394         vm_size_t size,
2395         vm_sync_t sync_flags
2396 )
2397 {
2398 #ifdef MACH_BSD
2399         if (memory_object->pager == &vnode_pager_workaround) {
2400                 return vnode_pager_synchronize(memory_object,
2401                                                offset,
2402                                                size,
2403                                                sync_flags);
2404         } else if (memory_object->pager == &device_pager_workaround) {
2405                 return device_pager_synchronize(memory_object,
2406                                                 offset,
2407                                                 size,
2408                                                 sync_flags);
2409         } else
2410 #endif
2411                 return dp_memory_object_synchronize(memory_object,
2412                                                     offset,
2413                                                     size,
2414                                                     sync_flags);
2415 }
2416
2417 /* Routine memory_object_unmap */
2418 kern_return_t memory_object_unmap
2419 (
2420         memory_object_t memory_object
2421 )
2422 {
2423 #ifdef MACH_BSD
2424         if (memory_object->pager == &vnode_pager_workaround) {
2425                 return vnode_pager_unmap(memory_object);
2426         } else if (memory_object->pager == &device_pager_workaround) {
2427                 return device_pager_unmap(memory_object);
2428         } else
2429 #endif
2430                 return dp_memory_object_unmap(memory_object);
2431 }
2432
2433 /* Routine memory_object_create */
2434 kern_return_t memory_object_create
2435 (
2436         memory_object_default_t default_memory_manager,
2437         vm_size_t new_memory_object_size,
2438         memory_object_t *new_memory_object
2439 )
2440 {
2441         return default_pager_memory_object_create(default_memory_manager,
2442                                                   new_memory_object_size,
2443                                                   new_memory_object);
2444 }
2445
2446 upl_t
2447 convert_port_to_upl(
2448         ipc_port_t      port)
2449 {
2450         upl_t upl;
2451
2452         ip_lock(port);
2453         if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2454                         ip_unlock(port);
2455                         return (upl_t)NULL;
2456         }
2457         upl = (upl_t) port->ip_kobject;
2458         ip_unlock(port);
2459         upl_lock(upl);
2460         upl->ref_count+=1;
2461         upl_unlock(upl);
2462         return upl;
2463 }
2464
2465 mach_port_t
2466 convert_upl_to_port(
2467         __unused upl_t          upl)
2468 {
2469         return MACH_PORT_NULL;
2470 }
2471
2472 __private_extern__ void
2473 upl_no_senders(
2474         __unused ipc_port_t                             port,
2475         __unused mach_port_mscount_t    mscount)
2476 {
2477         return;
2478 }