osfmk/vm/memory_object.c

   1 /*
   2  * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*
  29  * @OSF_COPYRIGHT@
  30  */
  31 /*
  32  * Mach Operating System
  33  * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
  34  * All Rights Reserved.
  35  *
  36  * Permission to use, copy, modify and distribute this software and its
  37  * documentation is hereby granted, provided that both the copyright
  38  * notice and this permission notice appear in all copies of the
  39  * software, derivative works or modified versions, and any portions
  40  * thereof, and that both notices appear in supporting documentation.
  41  *
  42  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  43  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  44  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  45  *
  46  * Carnegie Mellon requests users of this software to return to
  47  *
  48  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  49  *  School of Computer Science
  50  *  Carnegie Mellon University
  51  *  Pittsburgh PA 15213-3890
  52  *
  53  * any improvements or extensions that they make and grant Carnegie Mellon
  54  * the rights to redistribute these changes.
  55  */
  56 /*
  57  */
  58 /*
  59  *      File:   vm/memory_object.c
  60  *      Author: Michael Wayne Young
  61  *
  62  *      External memory management interface control functions.
  63  */
  64
  65 #include <advisory_pageout.h>
  66
  67 /*
  68  *      Interface dependencies:
  69  */
  70
  71 #include <mach/std_types.h>     /* For pointer_t */
  72 #include <mach/mach_types.h>
  73
  74 #include <mach/mig.h>
  75 #include <mach/kern_return.h>
  76 #include <mach/memory_object.h>
  77 #include <mach/memory_object_default.h>
  78 #include <mach/memory_object_control_server.h>
  79 #include <mach/host_priv_server.h>
  80 #include <mach/boolean.h>
  81 #include <mach/vm_prot.h>
  82 #include <mach/message.h>
  83
  84 /*
  85  *      Implementation dependencies:
  86  */
  87 #include <string.h>             /* For memcpy() */
  88
  89 #include <kern/xpr.h>
  90 #include <kern/host.h>
  91 #include <kern/thread.h>        /* For current_thread() */
  92 #include <kern/ipc_mig.h>
  93 #include <kern/misc_protos.h>
  94
  95 #include <vm/vm_object.h>
  96 #include <vm/vm_fault.h>
  97 #include <vm/memory_object.h>
  98 #include <vm/vm_page.h>
  99 #include <vm/vm_pageout.h>
 100 #include <vm/pmap.h>            /* For pmap_clear_modify */
 101 #include <vm/vm_kern.h>         /* For kernel_map, vm_move */
 102 #include <vm/vm_map.h>          /* For vm_map_pageable */
 103
 104 #if     MACH_PAGEMAP
 105 #include <vm/vm_external.h>
 106 #endif  /* MACH_PAGEMAP */
 107
 108 #include <vm/vm_protos.h>
 109
 110
 111 memory_object_default_t memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
 112 vm_size_t               memory_manager_default_cluster = 0;
 113 decl_mutex_data(,       memory_manager_default_lock)
 114
 115
 116 /*
 117  *      Routine:        memory_object_should_return_page
 118  *
 119  *      Description:
 120  *              Determine whether the given page should be returned,
 121  *              based on the page's state and on the given return policy.
 122  *
 123  *              We should return the page if one of the following is true:
 124  *
 125  *              1. Page is dirty and should_return is not RETURN_NONE.
 126  *              2. Page is precious and should_return is RETURN_ALL.
 127  *              3. Should_return is RETURN_ANYTHING.
 128  *
 129  *              As a side effect, m->dirty will be made consistent
 130  *              with pmap_is_modified(m), if should_return is not
 131  *              MEMORY_OBJECT_RETURN_NONE.
 132  */
 133
 134 #define memory_object_should_return_page(m, should_return) \
 135     (should_return != MEMORY_OBJECT_RETURN_NONE && \
 136      (((m)->dirty || ((m)->dirty = pmap_is_modified((m)->phys_page))) || \
 137       ((m)->precious && (should_return) == MEMORY_OBJECT_RETURN_ALL) || \
 138       (should_return) == MEMORY_OBJECT_RETURN_ANYTHING))
 139
 140 typedef int     memory_object_lock_result_t;
 141
 142 #define MEMORY_OBJECT_LOCK_RESULT_DONE          0
 143 #define MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK    1
 144 #define MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN    2
 145 #define MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN   3
 146
 147 memory_object_lock_result_t memory_object_lock_page(
 148                                 vm_page_t               m,
 149                                 memory_object_return_t  should_return,
 150                                 boolean_t               should_flush,
 151                                 vm_prot_t               prot);
 152
 153 /*
 154  *      Routine:        memory_object_lock_page
 155  *
 156  *      Description:
 157  *              Perform the appropriate lock operations on the
 158  *              given page.  See the description of
 159  *              "memory_object_lock_request" for the meanings
 160  *              of the arguments.
 161  *
 162  *              Returns an indication that the operation
 163  *              completed, blocked, or that the page must
 164  *              be cleaned.
 165  */
 166 memory_object_lock_result_t
 167 memory_object_lock_page(
 168         vm_page_t               m,
 169         memory_object_return_t  should_return,
 170         boolean_t               should_flush,
 171         vm_prot_t               prot)
 172 {
 173         XPR(XPR_MEMORY_OBJECT,
 174             "m_o_lock_page, page 0x%X rtn %d flush %d prot %d\n",
 175             (integer_t)m, should_return, should_flush, prot, 0);
 176
 177         /*
 178          *      If we cannot change access to the page,
 179          *      either because a mapping is in progress
 180          *      (busy page) or because a mapping has been
 181          *      wired, then give up.
 182          */
 183
 184         if (m->busy || m->cleaning)
 185                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 186
 187         /*
 188          *      Don't worry about pages for which the kernel
 189          *      does not have any data.
 190          */
 191
 192         if (m->absent || m->error || m->restart) {
 193                 if(m->error && should_flush) {
 194                         /* dump the page, pager wants us to */
 195                         /* clean it up and there is no      */
 196                         /* relevant data to return */
 197                         if(m->wire_count == 0) {
 198                                 VM_PAGE_FREE(m);
 199                                 return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 200                         }
 201                 } else {
 202                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 203                 }
 204         }
 205
 206         assert(!m->fictitious);
 207
 208         if (m->wire_count != 0) {
 209                 /*
 210                  *      If no change would take place
 211                  *      anyway, return successfully.
 212                  *
 213                  *      No change means:
 214                  *              Not flushing AND
 215                  *              No change to page lock [2 checks]  AND
 216                  *              Should not return page
 217                  *
 218                  * XXX  This doesn't handle sending a copy of a wired
 219                  * XXX  page to the pager, but that will require some
 220                  * XXX  significant surgery.
 221                  */
 222                 if (!should_flush &&
 223                     (m->page_lock == prot || prot == VM_PROT_NO_CHANGE) &&
 224                     ! memory_object_should_return_page(m, should_return)) {
 225
 226                         /*
 227                          *      Restart page unlock requests,
 228                          *      even though no change took place.
 229                          *      [Memory managers may be expecting
 230                          *      to see new requests.]
 231                          */
 232                         m->unlock_request = VM_PROT_NONE;
 233                         PAGE_WAKEUP(m);
 234
 235                         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 236                 }
 237
 238                 return(MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK);
 239         }
 240
 241         /*
 242          *      If the page is to be flushed, allow
 243          *      that to be done as part of the protection.
 244          */
 245
 246         if (should_flush)
 247                 prot = VM_PROT_ALL;
 248
 249         /*
 250          *      Set the page lock.
 251          *
 252          *      If we are decreasing permission, do it now;
 253          *      let the fault handler take care of increases
 254          *      (pmap_page_protect may not increase protection).
 255          */
 256
 257         if (prot != VM_PROT_NO_CHANGE) {
 258                 if ((m->page_lock ^ prot) & prot) {
 259                         pmap_page_protect(m->phys_page, VM_PROT_ALL & ~prot);
 260                 }
 261 #if 0
 262                 /* code associated with the vestigial
 263                  * memory_object_data_unlock
 264                  */
 265                 m->page_lock = prot;
 266                 m->lock_supplied = TRUE;
 267                 if (prot != VM_PROT_NONE)
 268                         m->unusual = TRUE;
 269                 else
 270                         m->unusual = FALSE;
 271
 272                 /*
 273                  *      Restart any past unlock requests, even if no
 274                  *      change resulted.  If the manager explicitly
 275                  *      requested no protection change, then it is assumed
 276                  *      to be remembering past requests.
 277                  */
 278
 279                 m->unlock_request = VM_PROT_NONE;
 280 #endif /* 0 */
 281                 PAGE_WAKEUP(m);
 282         }
 283
 284         /*
 285          *      Handle page returning.
 286          */
 287
 288         if (memory_object_should_return_page(m, should_return)) {
 289
 290                 /*
 291                  *      If we weren't planning
 292                  *      to flush the page anyway,
 293                  *      we may need to remove the
 294                  *      page from the pageout
 295                  *      system and from physical
 296                  *      maps now.
 297                  */
 298
 299                 vm_page_lock_queues();
 300                 VM_PAGE_QUEUES_REMOVE(m);
 301                 vm_page_unlock_queues();
 302
 303                 if (!should_flush)
 304                         pmap_disconnect(m->phys_page);
 305
 306                 if (m->dirty)
 307                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN);
 308                 else
 309                         return(MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN);
 310         }
 311
 312         /*
 313          *      Handle flushing
 314          */
 315
 316         if (should_flush) {
 317                 VM_PAGE_FREE(m);
 318         } else {
 319                 /*
 320                  *      XXX Make clean but not flush a paging hint,
 321                  *      and deactivate the pages.  This is a hack
 322                  *      because it overloads flush/clean with
 323                  *      implementation-dependent meaning.  This only
 324                  *      happens to pages that are already clean.
 325                  */
 326
 327                 if (vm_page_deactivate_hint &&
 328                     (should_return != MEMORY_OBJECT_RETURN_NONE)) {
 329                         vm_page_lock_queues();
 330                         vm_page_deactivate(m);
 331                         vm_page_unlock_queues();
 332                 }
 333         }
 334
 335         return(MEMORY_OBJECT_LOCK_RESULT_DONE);
 336 }
 337
 338 #define LIST_REQ_PAGEOUT_PAGES(object, data_cnt, action, po, ro, ioerr, iosync)    \
 339 MACRO_BEGIN                                                             \
 340                                                                         \
 341         register int            upl_flags;                              \
 342                                                                         \
 343         vm_object_unlock(object);                                       \
 344                                                                         \
 345                 if (iosync)                                             \
 346                         upl_flags = UPL_MSYNC | UPL_IOSYNC;             \
 347                 else                                                    \
 348                         upl_flags = UPL_MSYNC;                          \
 349                                                                         \
 350                 (void) memory_object_data_return(object->pager,         \
 351                 po,                                                     \
 352                 data_cnt,                                               \
 353                 ro,                                                     \
 354                 ioerr,                                                  \
 355                 (action == MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN),       \
 356                 !should_flush,                                          \
 357                 upl_flags);                                             \
 358                                                                         \
 359         vm_object_lock(object);                                         \
 360 MACRO_END
 361
 362 /*
 363  *      Routine:        memory_object_lock_request [user interface]
 364  *
 365  *      Description:
 366  *              Control use of the data associated with the given
 367  *              memory object.  For each page in the given range,
 368  *              perform the following operations, in order:
 369  *                      1)  restrict access to the page (disallow
 370  *                          forms specified by "prot");
 371  *                      2)  return data to the manager (if "should_return"
 372  *                          is RETURN_DIRTY and the page is dirty, or
 373  *                          "should_return" is RETURN_ALL and the page
 374  *                          is either dirty or precious); and,
 375  *                      3)  flush the cached copy (if "should_flush"
 376  *                          is asserted).
 377  *              The set of pages is defined by a starting offset
 378  *              ("offset") and size ("size").  Only pages with the
 379  *              same page alignment as the starting offset are
 380  *              considered.
 381  *
 382  *              A single acknowledgement is sent (to the "reply_to"
 383  *              port) when these actions are complete.  If successful,
 384  *              the naked send right for reply_to is consumed.
 385  */
 386
 387 kern_return_t
 388 memory_object_lock_request(
 389         memory_object_control_t         control,
 390         memory_object_offset_t          offset,
 391         memory_object_size_t            size,
 392         memory_object_offset_t  *       resid_offset,
 393         int                     *       io_errno,
 394         memory_object_return_t          should_return,
 395         int                             flags,
 396         vm_prot_t                       prot)
 397 {
 398         vm_object_t     object;
 399         __unused boolean_t should_flush;
 400
 401         should_flush = flags & MEMORY_OBJECT_DATA_FLUSH;
 402
 403         XPR(XPR_MEMORY_OBJECT,
 404             "m_o_lock_request, control 0x%X off 0x%X size 0x%X flags %X prot %X\n",
 405             (integer_t)control, offset, size,
 406             (((should_return&1)<<1)|should_flush), prot);
 407
 408         /*
 409          *      Check for bogus arguments.
 410          */
 411         object = memory_object_control_to_vm_object(control);
 412         if (object == VM_OBJECT_NULL)
 413                 return (KERN_INVALID_ARGUMENT);
 414
 415         if ((prot & ~VM_PROT_ALL) != 0 && prot != VM_PROT_NO_CHANGE)
 416                 return (KERN_INVALID_ARGUMENT);
 417
 418         size = round_page_64(size);
 419
 420         /*
 421          *      Lock the object, and acquire a paging reference to
 422          *      prevent the memory_object reference from being released.
 423          */
 424         vm_object_lock(object);
 425         vm_object_paging_begin(object);
 426         offset -= object->paging_offset;
 427
 428         (void)vm_object_update(object,
 429                 offset, size, resid_offset, io_errno, should_return, flags, prot);
 430
 431         vm_object_paging_end(object);
 432         vm_object_unlock(object);
 433
 434         return (KERN_SUCCESS);
 435 }
 436
 437 /*
 438  *      memory_object_release_name:  [interface]
 439  *
 440  *      Enforces name semantic on memory_object reference count decrement
 441  *      This routine should not be called unless the caller holds a name
 442  *      reference gained through the memory_object_named_create or the
 443  *      memory_object_rename call.
 444  *      If the TERMINATE_IDLE flag is set, the call will return if the
 445  *      reference count is not 1. i.e. idle with the only remaining reference
 446  *      being the name.
 447  *      If the decision is made to proceed the name field flag is set to
 448  *      false and the reference count is decremented.  If the RESPECT_CACHE
 449  *      flag is set and the reference count has gone to zero, the
 450  *      memory_object is checked to see if it is cacheable otherwise when
 451  *      the reference count is zero, it is simply terminated.
 452  */
 453
 454 kern_return_t
 455 memory_object_release_name(
 456         memory_object_control_t control,
 457         int                             flags)
 458 {
 459         vm_object_t     object;
 460
 461         object = memory_object_control_to_vm_object(control);
 462         if (object == VM_OBJECT_NULL)
 463                 return (KERN_INVALID_ARGUMENT);
 464
 465         return vm_object_release_name(object, flags);
 466 }
 467
 468
 469
 470 /*
 471  *      Routine:        memory_object_destroy [user interface]
 472  *      Purpose:
 473  *              Shut down a memory object, despite the
 474  *              presence of address map (or other) references
 475  *              to the vm_object.
 476  */
 477 kern_return_t
 478 memory_object_destroy(
 479         memory_object_control_t control,
 480         kern_return_t           reason)
 481 {
 482         vm_object_t             object;
 483
 484         object = memory_object_control_to_vm_object(control);
 485         if (object == VM_OBJECT_NULL)
 486                 return (KERN_INVALID_ARGUMENT);
 487
 488         return (vm_object_destroy(object, reason));
 489 }
 490
 491 /*
 492  *      Routine:        vm_object_sync
 493  *
 494  *      Kernel internal function to synch out pages in a given
 495  *      range within an object to its memory manager.  Much the
 496  *      same as memory_object_lock_request but page protection
 497  *      is not changed.
 498  *
 499  *      If the should_flush and should_return flags are true pages
 500  *      are flushed, that is dirty & precious pages are written to
 501  *      the memory manager and then discarded.  If should_return
 502  *      is false, only precious pages are returned to the memory
 503  *      manager.
 504  *
 505  *      If should flush is false and should_return true, the memory
 506  *      manager's copy of the pages is updated.  If should_return
 507  *      is also false, only the precious pages are updated.  This
 508  *      last option is of limited utility.
 509  *
 510  *      Returns:
 511  *      FALSE           if no pages were returned to the pager
 512  *      TRUE            otherwise.
 513  */
 514
 515 boolean_t
 516 vm_object_sync(
 517         vm_object_t             object,
 518         vm_object_offset_t      offset,
 519         vm_object_size_t        size,
 520         boolean_t               should_flush,
 521         boolean_t               should_return,
 522         boolean_t               should_iosync)
 523 {
 524         boolean_t       rv;
 525         int             flags;
 526
 527         XPR(XPR_VM_OBJECT,
 528             "vm_o_sync, object 0x%X, offset 0x%X size 0x%x flush %d rtn %d\n",
 529             (integer_t)object, offset, size, should_flush, should_return);
 530
 531         /*
 532          * Lock the object, and acquire a paging reference to
 533          * prevent the memory_object and control ports from
 534          * being destroyed.
 535          */
 536         vm_object_lock(object);
 537         vm_object_paging_begin(object);
 538
 539         if (should_flush)
 540                 flags = MEMORY_OBJECT_DATA_FLUSH;
 541         else
 542                 flags = 0;
 543
 544         if (should_iosync)
 545                 flags |= MEMORY_OBJECT_IO_SYNC;
 546
 547         rv = vm_object_update(object, offset, (vm_object_size_t)size, NULL, NULL,
 548                 (should_return) ?
 549                         MEMORY_OBJECT_RETURN_ALL :
 550                         MEMORY_OBJECT_RETURN_NONE,
 551                 flags,
 552                 VM_PROT_NO_CHANGE);
 553
 554
 555         vm_object_paging_end(object);
 556         vm_object_unlock(object);
 557         return rv;
 558 }
 559
 560
 561
 562
 563 static int
 564 vm_object_update_extent(
 565         vm_object_t             object,
 566         vm_object_offset_t      offset,
 567         vm_object_offset_t      offset_end,
 568         vm_object_offset_t      *offset_resid,
 569         int                     *io_errno,
 570         boolean_t               should_flush,
 571         memory_object_return_t  should_return,
 572         boolean_t               should_iosync,
 573         vm_prot_t               prot)
 574 {
 575         vm_page_t       m;
 576         int             retval = 0;
 577         vm_size_t       data_cnt = 0;
 578         vm_object_offset_t      paging_offset = 0;
 579         vm_object_offset_t      last_offset = offset;
 580         memory_object_lock_result_t     page_lock_result;
 581         memory_object_lock_result_t     pageout_action;
 582
 583         pageout_action = MEMORY_OBJECT_LOCK_RESULT_DONE;
 584
 585         for (;
 586              offset < offset_end && object->resident_page_count;
 587              offset += PAGE_SIZE_64) {
 588
 589                 /*
 590                  * Limit the number of pages to be cleaned at once.
 591                  */
 592                 if (data_cnt >= PAGE_SIZE * MAX_UPL_TRANSFER) {
 593                         LIST_REQ_PAGEOUT_PAGES(object, data_cnt,
 594                                                pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 595                         data_cnt = 0;
 596                 }
 597
 598                 while ((m = vm_page_lookup(object, offset)) != VM_PAGE_NULL) {
 599                         page_lock_result = memory_object_lock_page(m, should_return, should_flush, prot);
 600
 601                         XPR(XPR_MEMORY_OBJECT,
 602                             "m_o_update: lock_page, obj 0x%X offset 0x%X result %d\n",
 603                             (integer_t)object, offset, page_lock_result, 0, 0);
 604
 605                         switch (page_lock_result)
 606                         {
 607                           case MEMORY_OBJECT_LOCK_RESULT_DONE:
 608                             /*
 609                              *  End of a cluster of dirty pages.
 610                              */
 611                             if (data_cnt) {
 612                                     LIST_REQ_PAGEOUT_PAGES(object,
 613                                                            data_cnt, pageout_action,
 614                                                            paging_offset, offset_resid, io_errno, should_iosync);
 615                                     data_cnt = 0;
 616                                     continue;
 617                             }
 618                             break;
 619
 620                           case MEMORY_OBJECT_LOCK_RESULT_MUST_BLOCK:
 621                             /*
 622                              *  Since it is necessary to block,
 623                              *  clean any dirty pages now.
 624                              */
 625                             if (data_cnt) {
 626                                     LIST_REQ_PAGEOUT_PAGES(object,
 627                                                            data_cnt, pageout_action,
 628                                                            paging_offset, offset_resid, io_errno, should_iosync);
 629                                     data_cnt = 0;
 630                                     continue;
 631                             }
 632                             PAGE_SLEEP(object, m, THREAD_UNINT);
 633                             continue;
 634
 635                           case MEMORY_OBJECT_LOCK_RESULT_MUST_CLEAN:
 636                           case MEMORY_OBJECT_LOCK_RESULT_MUST_RETURN:
 637                             /*
 638                              * The clean and return cases are similar.
 639                              *
 640                              * if this would form a discontiguous block,
 641                              * clean the old pages and start anew.
 642                              *
 643                              * Mark the page busy since we will unlock the
 644                              * object if we issue the LIST_REQ_PAGEOUT
 645                              */
 646                             m->busy = TRUE;
 647                             if (data_cnt &&
 648                                 ((last_offset != offset) || (pageout_action != page_lock_result))) {
 649                                     LIST_REQ_PAGEOUT_PAGES(object,
 650                                                            data_cnt, pageout_action,
 651                                                            paging_offset, offset_resid, io_errno, should_iosync);
 652                                     data_cnt = 0;
 653                             }
 654                             m->busy = FALSE;
 655
 656                             if (m->cleaning) {
 657                                     PAGE_SLEEP(object, m, THREAD_UNINT);
 658                                     continue;
 659                             }
 660                             if (data_cnt == 0) {
 661                                     pageout_action = page_lock_result;
 662                                     paging_offset = offset;
 663                             }
 664                             data_cnt += PAGE_SIZE;
 665                             last_offset = offset + PAGE_SIZE_64;
 666
 667                             vm_page_lock_queues();
 668                             /*
 669                              * Clean
 670                              */
 671                             m->list_req_pending = TRUE;
 672                             m->cleaning = TRUE;
 673
 674                             if (should_flush) {
 675                                     /*
 676                                      * and add additional state
 677                                      * for the flush
 678                                      */
 679                                     m->busy = TRUE;
 680                                     m->pageout = TRUE;
 681                                     vm_page_wire(m);
 682                             }
 683                             vm_page_unlock_queues();
 684
 685                             retval = 1;
 686                             break;
 687                         }
 688                         break;
 689                 }
 690         }
 691         /*
 692          *      We have completed the scan for applicable pages.
 693          *      Clean any pages that have been saved.
 694          */
 695         if (data_cnt) {
 696                 LIST_REQ_PAGEOUT_PAGES(object,
 697                                        data_cnt, pageout_action, paging_offset, offset_resid, io_errno, should_iosync);
 698         }
 699         return (retval);
 700 }
 701
 702
 703
 704 /*
 705  *      Routine:        vm_object_update
 706  *      Description:
 707  *              Work function for m_o_lock_request(), vm_o_sync().
 708  *
 709  *              Called with object locked and paging ref taken.
 710  */
 711 kern_return_t
 712 vm_object_update(
 713         register vm_object_t            object,
 714         register vm_object_offset_t     offset,
 715         register vm_object_size_t       size,
 716         register vm_object_offset_t     *resid_offset,
 717         int                             *io_errno,
 718         memory_object_return_t          should_return,
 719         int                             flags,
 720         vm_prot_t                       protection)
 721 {
 722         vm_object_t             copy_object;
 723         boolean_t               data_returned = FALSE;
 724         boolean_t               update_cow;
 725         boolean_t               should_flush = (flags & MEMORY_OBJECT_DATA_FLUSH) ? TRUE : FALSE;
 726         boolean_t               should_iosync = (flags & MEMORY_OBJECT_IO_SYNC) ? TRUE : FALSE;
 727         int                     num_of_extents;
 728         int                     n;
 729 #define MAX_EXTENTS     8
 730 #define EXTENT_SIZE     (1024 * 1024 * 256)
 731 #define RESIDENT_LIMIT  (1024 * 32)
 732         struct extent {
 733                 vm_object_offset_t e_base;
 734                 vm_object_offset_t e_min;
 735                 vm_object_offset_t e_max;
 736         } extents[MAX_EXTENTS];
 737
 738         /*
 739          *      To avoid blocking while scanning for pages, save
 740          *      dirty pages to be cleaned all at once.
 741          *
 742          *      XXXO A similar strategy could be used to limit the
 743          *      number of times that a scan must be restarted for
 744          *      other reasons.  Those pages that would require blocking
 745          *      could be temporarily collected in another list, or
 746          *      their offsets could be recorded in a small array.
 747          */
 748
 749         /*
 750          * XXX  NOTE: May want to consider converting this to a page list
 751          * XXX  vm_map_copy interface.  Need to understand object
 752          * XXX  coalescing implications before doing so.
 753          */
 754
 755         update_cow = ((flags & MEMORY_OBJECT_DATA_FLUSH)
 756                         && (!(flags & MEMORY_OBJECT_DATA_NO_CHANGE) &&
 757                                         !(flags & MEMORY_OBJECT_DATA_PURGE)))
 758                                 || (flags & MEMORY_OBJECT_COPY_SYNC);
 759
 760
 761         if((((copy_object = object->copy) != NULL) && update_cow) ||
 762                                         (flags & MEMORY_OBJECT_DATA_SYNC)) {
 763                 vm_map_size_t           i;
 764                 vm_map_size_t           copy_size;
 765                 vm_map_offset_t         copy_offset;
 766                 vm_prot_t               prot;
 767                 vm_page_t               page;
 768                 vm_page_t               top_page;
 769                 kern_return_t           error = 0;
 770
 771                 if(copy_object != NULL) {
 772                    /* translate offset with respect to shadow's offset */
 773                    copy_offset = (offset >= copy_object->shadow_offset)?
 774                         (vm_map_offset_t)(offset - copy_object->shadow_offset) :
 775                         (vm_map_offset_t) 0;
 776                    if(copy_offset > copy_object->size)
 777                         copy_offset = copy_object->size;
 778
 779                    /* clip size with respect to shadow offset */
 780                    if (offset >= copy_object->shadow_offset) {
 781                            copy_size = size;
 782                    } else if (size >= copy_object->shadow_offset - offset) {
 783                            copy_size = size -
 784                                    (copy_object->shadow_offset - offset);
 785                    } else {
 786                            copy_size = 0;
 787                    }
 788
 789                    if (copy_offset + copy_size > copy_object->size) {
 790                            if (copy_object->size >= copy_offset) {
 791                                    copy_size = copy_object->size - copy_offset;
 792                            } else {
 793                                    copy_size = 0;
 794                            }
 795                    }
 796
 797                    copy_size+=copy_offset;
 798
 799                    vm_object_unlock(object);
 800                    vm_object_lock(copy_object);
 801                 } else {
 802                         copy_object = object;
 803
 804                         copy_size   = offset + size;
 805                         copy_offset = offset;
 806                 }
 807
 808                 vm_object_paging_begin(copy_object);
 809                 for (i=copy_offset; i<copy_size; i+=PAGE_SIZE) {
 810         RETRY_COW_OF_LOCK_REQUEST:
 811                         prot =  VM_PROT_WRITE|VM_PROT_READ;
 812                         switch (vm_fault_page(copy_object, i,
 813                                 VM_PROT_WRITE|VM_PROT_READ,
 814                                 FALSE,
 815                                 THREAD_UNINT,
 816                                 copy_offset,
 817                                 copy_offset+copy_size,
 818                                 VM_BEHAVIOR_SEQUENTIAL,
 819                                 &prot,
 820                                 &page,
 821                                 &top_page,
 822                                 (int *)0,
 823                                 &error,
 824                                 FALSE,
 825                                 FALSE, NULL, 0)) {
 826
 827                         case VM_FAULT_SUCCESS:
 828                                 if(top_page) {
 829                                         vm_fault_cleanup(
 830                                                 page->object, top_page);
 831                                         PAGE_WAKEUP_DONE(page);
 832                                         vm_page_lock_queues();
 833                                         if (!page->active && !page->inactive)
 834                                                 vm_page_activate(page);
 835                                         vm_page_unlock_queues();
 836                                         vm_object_lock(copy_object);
 837                                         vm_object_paging_begin(copy_object);
 838                                 } else {
 839                                         PAGE_WAKEUP_DONE(page);
 840                                         vm_page_lock_queues();
 841                                         if (!page->active && !page->inactive)
 842                                                 vm_page_activate(page);
 843                                         vm_page_unlock_queues();
 844                                 }
 845                                 break;
 846                         case VM_FAULT_RETRY:
 847                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 848                                 vm_object_lock(copy_object);
 849                                 vm_object_paging_begin(copy_object);
 850                                 goto RETRY_COW_OF_LOCK_REQUEST;
 851                         case VM_FAULT_INTERRUPTED:
 852                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 853                                 vm_object_lock(copy_object);
 854                                 vm_object_paging_begin(copy_object);
 855                                 goto RETRY_COW_OF_LOCK_REQUEST;
 856                         case VM_FAULT_MEMORY_SHORTAGE:
 857                                 VM_PAGE_WAIT();
 858                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 859                                 vm_object_lock(copy_object);
 860                                 vm_object_paging_begin(copy_object);
 861                                 goto RETRY_COW_OF_LOCK_REQUEST;
 862                         case VM_FAULT_FICTITIOUS_SHORTAGE:
 863                                 vm_page_more_fictitious();
 864                                 prot =  VM_PROT_WRITE|VM_PROT_READ;
 865                                 vm_object_lock(copy_object);
 866                                 vm_object_paging_begin(copy_object);
 867                                 goto RETRY_COW_OF_LOCK_REQUEST;
 868                         case VM_FAULT_MEMORY_ERROR:
 869                                 vm_object_lock(object);
 870                                 goto BYPASS_COW_COPYIN;
 871                         }
 872
 873                 }
 874                 vm_object_paging_end(copy_object);
 875                 if(copy_object != object) {
 876                         vm_object_unlock(copy_object);
 877                         vm_object_lock(object);
 878                 }
 879         }
 880         if((flags & (MEMORY_OBJECT_DATA_SYNC | MEMORY_OBJECT_COPY_SYNC))) {
 881                         return KERN_SUCCESS;
 882         }
 883         if(((copy_object = object->copy) != NULL) &&
 884                                         (flags & MEMORY_OBJECT_DATA_PURGE)) {
 885                 copy_object->shadow_severed = TRUE;
 886                 copy_object->shadowed = FALSE;
 887                 copy_object->shadow = NULL;
 888                 /* delete the ref the COW was holding on the target object */
 889                 vm_object_deallocate(object);
 890         }
 891 BYPASS_COW_COPYIN:
 892
 893         /*
 894          * when we have a really large range to check relative
 895          * to the number of actual resident pages, we'd like
 896          * to use the resident page list to drive our checks
 897          * however, the object lock will get dropped while processing
 898          * the page which means the resident queue can change which
 899          * means we can't walk the queue as we process the pages
 900          * we also want to do the processing in offset order to allow
 901          * 'runs' of pages to be collected if we're being told to
 902          * flush to disk... the resident page queue is NOT ordered.
 903          *
 904          * a temporary solution (until we figure out how to deal with
 905          * large address spaces more generically) is to pre-flight
 906          * the resident page queue (if it's small enough) and develop
 907          * a collection of extents (that encompass actual resident pages)
 908          * to visit.  This will at least allow us to deal with some of the
 909          * more pathological cases in a more efficient manner.  The current
 910          * worst case (a single resident page at the end of an extremely large
 911          * range) can take minutes to complete for ranges in the terrabyte
 912          * category... since this routine is called when truncating a file,
 913          * and we currently support files up to 16 Tbytes in size, this
 914          * is not a theoretical problem
 915          */
 916
 917         if ((object->resident_page_count < RESIDENT_LIMIT) &&
 918             (atop_64(size) > (unsigned)(object->resident_page_count/(8 * MAX_EXTENTS)))) {
 919                 vm_page_t               next;
 920                 vm_object_offset_t      start;
 921                 vm_object_offset_t      end;
 922                 vm_object_size_t        e_mask;
 923                 vm_page_t               m;
 924
 925                 start = offset;
 926                 end   = offset + size;
 927                 num_of_extents = 0;
 928                 e_mask = ~((vm_object_size_t)(EXTENT_SIZE - 1));
 929
 930                 m = (vm_page_t) queue_first(&object->memq);
 931
 932                 while (!queue_end(&object->memq, (queue_entry_t) m)) {
 933                         next = (vm_page_t) queue_next(&m->listq);
 934
 935                         if ((m->offset >= start) && (m->offset < end)) {
 936                                 /*
 937                                  * this is a page we're interested in
 938                                  * try to fit it into a current extent
 939                                  */
 940                                 for (n = 0; n < num_of_extents; n++) {
 941                                         if ((m->offset & e_mask) == extents[n].e_base) {
 942                                                 /*
 943                                                  * use (PAGE_SIZE - 1) to determine the
 944                                                  * max offset so that we don't wrap if
 945                                                  * we're at the last page of the space
 946                                                  */
 947                                                 if (m->offset < extents[n].e_min)
 948                                                         extents[n].e_min = m->offset;
 949                                                 else if ((m->offset + (PAGE_SIZE - 1)) > extents[n].e_max)
 950                                                         extents[n].e_max = m->offset + (PAGE_SIZE - 1);
 951                                                 break;
 952                                         }
 953                                 }
 954                                 if (n == num_of_extents) {
 955                                         /*
 956                                          * didn't find a current extent that can encompass
 957                                          * this page
 958                                          */
 959                                         if (n < MAX_EXTENTS) {
 960                                                 /*
 961                                                  * if we still have room,
 962                                                  * create a new extent
 963                                                  */
 964                                                 extents[n].e_base = m->offset & e_mask;
 965                                                 extents[n].e_min  = m->offset;
 966                                                 extents[n].e_max  = m->offset + (PAGE_SIZE - 1);
 967
 968                                                 num_of_extents++;
 969                                         } else {
 970                                                 /*
 971                                                  * no room to create a new extent...
 972                                                  * fall back to a single extent based
 973                                                  * on the min and max page offsets
 974                                                  * we find in the range we're interested in...
 975                                                  * first, look through the extent list and
 976                                                  * develop the overall min and max for the
 977                                                  * pages we've looked at up to this point
 978                                                  */
 979                                                 for (n = 1; n < num_of_extents; n++) {
 980                                                         if (extents[n].e_min < extents[0].e_min)
 981                                                                 extents[0].e_min = extents[n].e_min;
 982                                                         if (extents[n].e_max > extents[0].e_max)
 983                                                                 extents[0].e_max = extents[n].e_max;
 984                                                 }
 985                                                 /*
 986                                                  * now setup to run through the remaining pages
 987                                                  * to determine the overall min and max
 988                                                  * offset for the specified range
 989                                                  */
 990                                                 extents[0].e_base = 0;
 991                                                 e_mask = 0;
 992                                                 num_of_extents = 1;
 993
 994                                                 /*
 995                                                  * by continuing, we'll reprocess the
 996                                                  * page that forced us to abandon trying
 997                                                  * to develop multiple extents
 998                                                  */
 999                                                 continue;
1000                                         }
1001                                 }
1002                         }
1003                         m = next;
1004                 }
1005         } else {
1006                 extents[0].e_min = offset;
1007                 extents[0].e_max = offset + (size - 1);
1008
1009                 num_of_extents = 1;
1010         }
1011         for (n = 0; n < num_of_extents; n++) {
1012                 if (vm_object_update_extent(object, extents[n].e_min, extents[n].e_max, resid_offset, io_errno,
1013                                             should_flush, should_return, should_iosync, protection))
1014                         data_returned = TRUE;
1015         }
1016         return (data_returned);
1017 }
1018
1019
1020 /*
1021  *      Routine:        memory_object_synchronize_completed [user interface]
1022  *
1023  *      Tell kernel that previously synchronized data
1024  *      (memory_object_synchronize) has been queue or placed on the
1025  *      backing storage.
1026  *
1027  *      Note: there may be multiple synchronize requests for a given
1028  *      memory object outstanding but they will not overlap.
1029  */
1030
1031 kern_return_t
1032 memory_object_synchronize_completed(
1033         memory_object_control_t control,
1034         memory_object_offset_t  offset,
1035         vm_offset_t                     length)
1036 {
1037         vm_object_t                     object;
1038         msync_req_t                     msr;
1039
1040         object = memory_object_control_to_vm_object(control);
1041
1042         XPR(XPR_MEMORY_OBJECT,
1043             "m_o_sync_completed, object 0x%X, offset 0x%X length 0x%X\n",
1044             (integer_t)object, offset, length, 0, 0);
1045
1046         /*
1047          *      Look for bogus arguments
1048          */
1049
1050         if (object == VM_OBJECT_NULL)
1051                 return (KERN_INVALID_ARGUMENT);
1052
1053         vm_object_lock(object);
1054
1055 /*
1056  *      search for sync request structure
1057  */
1058         queue_iterate(&object->msr_q, msr, msync_req_t, msr_q) {
1059                 if (msr->offset == offset && msr->length == length) {
1060                         queue_remove(&object->msr_q, msr, msync_req_t, msr_q);
1061                         break;
1062                 }
1063         }/* queue_iterate */
1064
1065         if (queue_end(&object->msr_q, (queue_entry_t)msr)) {
1066                 vm_object_unlock(object);
1067                 return KERN_INVALID_ARGUMENT;
1068         }
1069
1070         msr_lock(msr);
1071         vm_object_unlock(object);
1072         msr->flag = VM_MSYNC_DONE;
1073         msr_unlock(msr);
1074         thread_wakeup((event_t) msr);
1075
1076         return KERN_SUCCESS;
1077 }/* memory_object_synchronize_completed */
1078
1079 static kern_return_t
1080 vm_object_set_attributes_common(
1081         vm_object_t     object,
1082         boolean_t       may_cache,
1083         memory_object_copy_strategy_t copy_strategy,
1084         boolean_t       temporary,
1085         memory_object_cluster_size_t    cluster_size,
1086         boolean_t       silent_overwrite,
1087         boolean_t       advisory_pageout)
1088 {
1089         boolean_t       object_became_ready;
1090
1091         XPR(XPR_MEMORY_OBJECT,
1092             "m_o_set_attr_com, object 0x%X flg %x strat %d\n",
1093             (integer_t)object, (may_cache&1)|((temporary&1)<1), copy_strategy, 0, 0);
1094
1095         if (object == VM_OBJECT_NULL)
1096                 return(KERN_INVALID_ARGUMENT);
1097
1098         /*
1099          *      Verify the attributes of importance
1100          */
1101
1102         switch(copy_strategy) {
1103                 case MEMORY_OBJECT_COPY_NONE:
1104                 case MEMORY_OBJECT_COPY_DELAY:
1105                         break;
1106                 default:
1107                         return(KERN_INVALID_ARGUMENT);
1108         }
1109
1110 #if     !ADVISORY_PAGEOUT
1111         if (silent_overwrite || advisory_pageout)
1112                 return(KERN_INVALID_ARGUMENT);
1113
1114 #endif  /* !ADVISORY_PAGEOUT */
1115         if (may_cache)
1116                 may_cache = TRUE;
1117         if (temporary)
1118                 temporary = TRUE;
1119         if (cluster_size != 0) {
1120                 int     pages_per_cluster;
1121                 pages_per_cluster = atop_32(cluster_size);
1122                 /*
1123                  * Cluster size must be integral multiple of page size,
1124                  * and be a power of 2 number of pages.
1125                  */
1126                 if ((cluster_size & (PAGE_SIZE-1)) ||
1127                     ((pages_per_cluster-1) & pages_per_cluster))
1128                         return KERN_INVALID_ARGUMENT;
1129         }
1130
1131         vm_object_lock(object);
1132
1133         /*
1134          *      Copy the attributes
1135          */
1136         assert(!object->internal);
1137         object_became_ready = !object->pager_ready;
1138         object->copy_strategy = copy_strategy;
1139         object->can_persist = may_cache;
1140         object->temporary = temporary;
1141         object->silent_overwrite = silent_overwrite;
1142         object->advisory_pageout = advisory_pageout;
1143         if (cluster_size == 0)
1144                 cluster_size = PAGE_SIZE;
1145         object->cluster_size = cluster_size;
1146
1147         assert(cluster_size >= PAGE_SIZE &&
1148                cluster_size % PAGE_SIZE == 0);
1149
1150         /*
1151          *      Wake up anyone waiting for the ready attribute
1152          *      to become asserted.
1153          */
1154
1155         if (object_became_ready) {
1156                 object->pager_ready = TRUE;
1157                 vm_object_wakeup(object, VM_OBJECT_EVENT_PAGER_READY);
1158         }
1159
1160         vm_object_unlock(object);
1161
1162         return(KERN_SUCCESS);
1163 }
1164
1165 /*
1166  *      Set the memory object attribute as provided.
1167  *
1168  *      XXX This routine cannot be completed until the vm_msync, clean
1169  *           in place, and cluster work is completed. See ifdef notyet
1170  *           below and note that vm_object_set_attributes_common()
1171  *           may have to be expanded.
1172  */
1173 kern_return_t
1174 memory_object_change_attributes(
1175         memory_object_control_t         control,
1176         memory_object_flavor_t          flavor,
1177         memory_object_info_t            attributes,
1178         mach_msg_type_number_t          count)
1179 {
1180         vm_object_t                     object;
1181         kern_return_t                   result = KERN_SUCCESS;
1182         boolean_t                       temporary;
1183         boolean_t                       may_cache;
1184         boolean_t                       invalidate;
1185         memory_object_cluster_size_t    cluster_size;
1186         memory_object_copy_strategy_t   copy_strategy;
1187         boolean_t                       silent_overwrite;
1188         boolean_t                       advisory_pageout;
1189
1190         object = memory_object_control_to_vm_object(control);
1191         if (object == VM_OBJECT_NULL)
1192                 return (KERN_INVALID_ARGUMENT);
1193
1194         vm_object_lock(object);
1195
1196         temporary = object->temporary;
1197         may_cache = object->can_persist;
1198         copy_strategy = object->copy_strategy;
1199         silent_overwrite = object->silent_overwrite;
1200         advisory_pageout = object->advisory_pageout;
1201 #if notyet
1202         invalidate = object->invalidate;
1203 #endif
1204         cluster_size = object->cluster_size;
1205         vm_object_unlock(object);
1206
1207         switch (flavor) {
1208             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1209             {
1210                 old_memory_object_behave_info_t     behave;
1211
1212                 if (count != OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1213                         result = KERN_INVALID_ARGUMENT;
1214                         break;
1215                 }
1216
1217                 behave = (old_memory_object_behave_info_t) attributes;
1218
1219                 temporary = behave->temporary;
1220                 invalidate = behave->invalidate;
1221                 copy_strategy = behave->copy_strategy;
1222
1223                 break;
1224             }
1225
1226             case MEMORY_OBJECT_BEHAVIOR_INFO:
1227             {
1228                 memory_object_behave_info_t     behave;
1229
1230                 if (count != MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1231                         result = KERN_INVALID_ARGUMENT;
1232                         break;
1233                 }
1234
1235                 behave = (memory_object_behave_info_t) attributes;
1236
1237                 temporary = behave->temporary;
1238                 invalidate = behave->invalidate;
1239                 copy_strategy = behave->copy_strategy;
1240                 silent_overwrite = behave->silent_overwrite;
1241                 advisory_pageout = behave->advisory_pageout;
1242                 break;
1243             }
1244
1245             case MEMORY_OBJECT_PERFORMANCE_INFO:
1246             {
1247                 memory_object_perf_info_t       perf;
1248
1249                 if (count != MEMORY_OBJECT_PERF_INFO_COUNT) {
1250                         result = KERN_INVALID_ARGUMENT;
1251                         break;
1252                 }
1253
1254                 perf = (memory_object_perf_info_t) attributes;
1255
1256                 may_cache = perf->may_cache;
1257                 cluster_size = round_page_32(perf->cluster_size);
1258
1259                 break;
1260             }
1261
1262             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1263             {
1264                 old_memory_object_attr_info_t   attr;
1265
1266                 if (count != OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1267                         result = KERN_INVALID_ARGUMENT;
1268                         break;
1269                 }
1270
1271                 attr = (old_memory_object_attr_info_t) attributes;
1272
1273                 may_cache = attr->may_cache;
1274                 copy_strategy = attr->copy_strategy;
1275                 cluster_size = page_size;
1276
1277                 break;
1278             }
1279
1280             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1281             {
1282                 memory_object_attr_info_t       attr;
1283
1284                 if (count != MEMORY_OBJECT_ATTR_INFO_COUNT) {
1285                         result = KERN_INVALID_ARGUMENT;
1286                         break;
1287                 }
1288
1289                 attr = (memory_object_attr_info_t) attributes;
1290
1291                 copy_strategy = attr->copy_strategy;
1292                 may_cache = attr->may_cache_object;
1293                 cluster_size = attr->cluster_size;
1294                 temporary = attr->temporary;
1295
1296                 break;
1297             }
1298
1299             default:
1300                 result = KERN_INVALID_ARGUMENT;
1301                 break;
1302         }
1303
1304         if (result != KERN_SUCCESS)
1305                 return(result);
1306
1307         if (copy_strategy == MEMORY_OBJECT_COPY_TEMPORARY) {
1308                 copy_strategy = MEMORY_OBJECT_COPY_DELAY;
1309                 temporary = TRUE;
1310         } else {
1311                 temporary = FALSE;
1312         }
1313
1314         /*
1315          * XXX  may_cache may become a tri-valued variable to handle
1316          * XXX  uncache if not in use.
1317          */
1318         return (vm_object_set_attributes_common(object,
1319                                                      may_cache,
1320                                                      copy_strategy,
1321                                                      temporary,
1322                                                      cluster_size,
1323                                                      silent_overwrite,
1324                                                      advisory_pageout));
1325 }
1326
1327 kern_return_t
1328 memory_object_get_attributes(
1329         memory_object_control_t control,
1330         memory_object_flavor_t  flavor,
1331         memory_object_info_t    attributes,     /* pointer to OUT array */
1332         mach_msg_type_number_t  *count)         /* IN/OUT */
1333 {
1334         kern_return_t           ret = KERN_SUCCESS;
1335         vm_object_t             object;
1336
1337         object = memory_object_control_to_vm_object(control);
1338         if (object == VM_OBJECT_NULL)
1339                 return (KERN_INVALID_ARGUMENT);
1340
1341         vm_object_lock(object);
1342
1343         switch (flavor) {
1344             case OLD_MEMORY_OBJECT_BEHAVIOR_INFO:
1345             {
1346                 old_memory_object_behave_info_t behave;
1347
1348                 if (*count < OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1349                         ret = KERN_INVALID_ARGUMENT;
1350                         break;
1351                 }
1352
1353                 behave = (old_memory_object_behave_info_t) attributes;
1354                 behave->copy_strategy = object->copy_strategy;
1355                 behave->temporary = object->temporary;
1356 #if notyet      /* remove when vm_msync complies and clean in place fini */
1357                 behave->invalidate = object->invalidate;
1358 #else
1359                 behave->invalidate = FALSE;
1360 #endif
1361
1362                 *count = OLD_MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1363                 break;
1364             }
1365
1366             case MEMORY_OBJECT_BEHAVIOR_INFO:
1367             {
1368                 memory_object_behave_info_t     behave;
1369
1370                 if (*count < MEMORY_OBJECT_BEHAVE_INFO_COUNT) {
1371                         ret = KERN_INVALID_ARGUMENT;
1372                         break;
1373                 }
1374
1375                 behave = (memory_object_behave_info_t) attributes;
1376                 behave->copy_strategy = object->copy_strategy;
1377                 behave->temporary = object->temporary;
1378 #if notyet      /* remove when vm_msync complies and clean in place fini */
1379                 behave->invalidate = object->invalidate;
1380 #else
1381                 behave->invalidate = FALSE;
1382 #endif
1383                 behave->advisory_pageout = object->advisory_pageout;
1384                 behave->silent_overwrite = object->silent_overwrite;
1385                 *count = MEMORY_OBJECT_BEHAVE_INFO_COUNT;
1386                 break;
1387             }
1388
1389             case MEMORY_OBJECT_PERFORMANCE_INFO:
1390             {
1391                 memory_object_perf_info_t       perf;
1392
1393                 if (*count < MEMORY_OBJECT_PERF_INFO_COUNT) {
1394                         ret = KERN_INVALID_ARGUMENT;
1395                         break;
1396                 }
1397
1398                 perf = (memory_object_perf_info_t) attributes;
1399                 perf->cluster_size = object->cluster_size;
1400                 perf->may_cache = object->can_persist;
1401
1402                 *count = MEMORY_OBJECT_PERF_INFO_COUNT;
1403                 break;
1404             }
1405
1406             case OLD_MEMORY_OBJECT_ATTRIBUTE_INFO:
1407             {
1408                 old_memory_object_attr_info_t       attr;
1409
1410                 if (*count < OLD_MEMORY_OBJECT_ATTR_INFO_COUNT) {
1411                         ret = KERN_INVALID_ARGUMENT;
1412                         break;
1413                 }
1414
1415                 attr = (old_memory_object_attr_info_t) attributes;
1416                 attr->may_cache = object->can_persist;
1417                 attr->copy_strategy = object->copy_strategy;
1418
1419                 *count = OLD_MEMORY_OBJECT_ATTR_INFO_COUNT;
1420                 break;
1421             }
1422
1423             case MEMORY_OBJECT_ATTRIBUTE_INFO:
1424             {
1425                 memory_object_attr_info_t       attr;
1426
1427                 if (*count < MEMORY_OBJECT_ATTR_INFO_COUNT) {
1428                         ret = KERN_INVALID_ARGUMENT;
1429                         break;
1430                 }
1431
1432                 attr = (memory_object_attr_info_t) attributes;
1433                 attr->copy_strategy = object->copy_strategy;
1434                 attr->cluster_size = object->cluster_size;
1435                 attr->may_cache_object = object->can_persist;
1436                 attr->temporary = object->temporary;
1437
1438                 *count = MEMORY_OBJECT_ATTR_INFO_COUNT;
1439                 break;
1440             }
1441
1442             default:
1443                 ret = KERN_INVALID_ARGUMENT;
1444                 break;
1445         }
1446
1447         vm_object_unlock(object);
1448
1449         return(ret);
1450 }
1451
1452
1453 kern_return_t
1454 memory_object_iopl_request(
1455         ipc_port_t              port,
1456         memory_object_offset_t  offset,
1457         upl_size_t              *upl_size,
1458         upl_t                   *upl_ptr,
1459         upl_page_info_array_t   user_page_list,
1460         unsigned int            *page_list_count,
1461         int                     *flags)
1462 {
1463         vm_object_t             object;
1464         kern_return_t           ret;
1465         int                     caller_flags;
1466
1467         caller_flags = *flags;
1468
1469         if (caller_flags & ~UPL_VALID_FLAGS) {
1470                 /*
1471                  * For forward compatibility's sake,
1472                  * reject any unknown flag.
1473                  */
1474                 return KERN_INVALID_VALUE;
1475         }
1476
1477         if (ip_kotype(port) == IKOT_NAMED_ENTRY) {
1478                 vm_named_entry_t        named_entry;
1479
1480                 named_entry = (vm_named_entry_t)port->ip_kobject;
1481                 /* a few checks to make sure user is obeying rules */
1482                 if(*upl_size == 0) {
1483                         if(offset >= named_entry->size)
1484                                 return(KERN_INVALID_RIGHT);
1485                         *upl_size = named_entry->size - offset;
1486                 }
1487                 if(caller_flags & UPL_COPYOUT_FROM) {
1488                         if((named_entry->protection & VM_PROT_READ)
1489                                                 != VM_PROT_READ) {
1490                                 return(KERN_INVALID_RIGHT);
1491                         }
1492                 } else {
1493                         if((named_entry->protection &
1494                                 (VM_PROT_READ | VM_PROT_WRITE))
1495                                 != (VM_PROT_READ | VM_PROT_WRITE)) {
1496                                 return(KERN_INVALID_RIGHT);
1497                         }
1498                 }
1499                 if(named_entry->size < (offset + *upl_size))
1500                         return(KERN_INVALID_ARGUMENT);
1501
1502                 /* the callers parameter offset is defined to be the */
1503                 /* offset from beginning of named entry offset in object */
1504                 offset = offset + named_entry->offset;
1505
1506                 if(named_entry->is_sub_map)
1507                         return (KERN_INVALID_ARGUMENT);
1508
1509                 named_entry_lock(named_entry);
1510
1511                 if (named_entry->is_pager) {
1512                         object = vm_object_enter(named_entry->backing.pager,
1513                                         named_entry->offset + named_entry->size,
1514                                         named_entry->internal,
1515                                         FALSE,
1516                                         FALSE);
1517                         if (object == VM_OBJECT_NULL) {
1518                                 named_entry_unlock(named_entry);
1519                                 return(KERN_INVALID_OBJECT);
1520                         }
1521
1522                         /* JMM - drop reference on pager here? */
1523
1524                         /* create an extra reference for the named entry */
1525                         vm_object_lock(object);
1526                         vm_object_reference_locked(object);
1527                         named_entry->backing.object = object;
1528                         named_entry->is_pager = FALSE;
1529                         named_entry_unlock(named_entry);
1530
1531                         /* wait for object to be ready */
1532                         while (!object->pager_ready) {
1533                                 vm_object_wait(object,
1534                                                 VM_OBJECT_EVENT_PAGER_READY,
1535                                                 THREAD_UNINT);
1536                                 vm_object_lock(object);
1537                         }
1538                         vm_object_unlock(object);
1539                 } else {
1540                         /* This is the case where we are going to map */
1541                         /* an already mapped object.  If the object is */
1542                         /* not ready it is internal.  An external     */
1543                         /* object cannot be mapped until it is ready  */
1544                         /* we can therefore avoid the ready check     */
1545                         /* in this case.  */
1546                         object = named_entry->backing.object;
1547                         vm_object_reference(object);
1548                         named_entry_unlock(named_entry);
1549                 }
1550         } else  {
1551                 memory_object_control_t control;
1552                 control = (memory_object_control_t)port->ip_kobject;
1553                 if (control == NULL)
1554                         return (KERN_INVALID_ARGUMENT);
1555                 object = memory_object_control_to_vm_object(control);
1556                 if (object == VM_OBJECT_NULL)
1557                         return (KERN_INVALID_ARGUMENT);
1558                 vm_object_reference(object);
1559         }
1560         if (object == VM_OBJECT_NULL)
1561                 return (KERN_INVALID_ARGUMENT);
1562
1563         if (!object->private) {
1564                 if (*upl_size > (MAX_UPL_TRANSFER*PAGE_SIZE))
1565                         *upl_size = (MAX_UPL_TRANSFER*PAGE_SIZE);
1566                 if (object->phys_contiguous) {
1567                         *flags = UPL_PHYS_CONTIG;
1568                 } else {
1569                         *flags = 0;
1570                 }
1571         } else {
1572                 *flags = UPL_DEV_MEMORY | UPL_PHYS_CONTIG;
1573         }
1574
1575         ret = vm_object_iopl_request(object,
1576                                      offset,
1577                                      *upl_size,
1578                                      upl_ptr,
1579                                      user_page_list,
1580                                      page_list_count,
1581                                      caller_flags);
1582         vm_object_deallocate(object);
1583         return ret;
1584 }
1585
1586 /*
1587  *      Routine:        memory_object_upl_request [interface]
1588  *      Purpose:
1589  *              Cause the population of a portion of a vm_object.
1590  *              Depending on the nature of the request, the pages
1591  *              returned may be contain valid data or be uninitialized.
1592  *
1593  */
1594
1595 kern_return_t
1596 memory_object_upl_request(
1597         memory_object_control_t control,
1598         memory_object_offset_t  offset,
1599         upl_size_t              size,
1600         upl_t                   *upl_ptr,
1601         upl_page_info_array_t   user_page_list,
1602         unsigned int            *page_list_count,
1603         int                     cntrl_flags)
1604 {
1605         vm_object_t             object;
1606
1607         object = memory_object_control_to_vm_object(control);
1608         if (object == VM_OBJECT_NULL)
1609                 return (KERN_INVALID_ARGUMENT);
1610
1611         return vm_object_upl_request(object,
1612                                      offset,
1613                                      size,
1614                                      upl_ptr,
1615                                      user_page_list,
1616                                      page_list_count,
1617                                      cntrl_flags);
1618 }
1619
1620 /*
1621  *      Routine:        memory_object_super_upl_request [interface]
1622  *      Purpose:
1623  *              Cause the population of a portion of a vm_object
1624  *              in much the same way as memory_object_upl_request.
1625  *              Depending on the nature of the request, the pages
1626  *              returned may be contain valid data or be uninitialized.
1627  *              However, the region may be expanded up to the super
1628  *              cluster size provided.
1629  */
1630
1631 kern_return_t
1632 memory_object_super_upl_request(
1633         memory_object_control_t control,
1634         memory_object_offset_t  offset,
1635         upl_size_t              size,
1636         upl_size_t              super_cluster,
1637         upl_t                   *upl,
1638         upl_page_info_t         *user_page_list,
1639         unsigned int            *page_list_count,
1640         int                     cntrl_flags)
1641 {
1642         vm_object_t             object;
1643
1644         object = memory_object_control_to_vm_object(control);
1645         if (object == VM_OBJECT_NULL)
1646                 return (KERN_INVALID_ARGUMENT);
1647
1648         return vm_object_super_upl_request(object,
1649                                            offset,
1650                                            size,
1651                                            super_cluster,
1652                                            upl,
1653                                            user_page_list,
1654                                            page_list_count,
1655                                            cntrl_flags);
1656 }
1657
1658 int vm_stat_discard_cleared_reply = 0;
1659 int vm_stat_discard_cleared_unset = 0;
1660 int vm_stat_discard_cleared_too_late = 0;
1661
1662
1663
1664 /*
1665  *      Routine:        host_default_memory_manager [interface]
1666  *      Purpose:
1667  *              set/get the default memory manager port and default cluster
1668  *              size.
1669  *
1670  *              If successful, consumes the supplied naked send right.
1671  */
1672 kern_return_t
1673 host_default_memory_manager(
1674         host_priv_t             host_priv,
1675         memory_object_default_t *default_manager,
1676         memory_object_cluster_size_t cluster_size)
1677 {
1678         memory_object_default_t current_manager;
1679         memory_object_default_t new_manager;
1680         memory_object_default_t returned_manager;
1681
1682         if (host_priv == HOST_PRIV_NULL)
1683                 return(KERN_INVALID_HOST);
1684
1685         assert(host_priv == &realhost);
1686
1687         new_manager = *default_manager;
1688         mutex_lock(&memory_manager_default_lock);
1689         current_manager = memory_manager_default;
1690
1691         if (new_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1692                 /*
1693                  *      Retrieve the current value.
1694                  */
1695                 memory_object_default_reference(current_manager);
1696                 returned_manager = current_manager;
1697         } else {
1698                 /*
1699                  *      Retrieve the current value,
1700                  *      and replace it with the supplied value.
1701                  *      We return the old reference to the caller
1702                  *      but we have to take a reference on the new
1703                  *      one.
1704                  */
1705
1706                 returned_manager = current_manager;
1707                 memory_manager_default = new_manager;
1708                 memory_object_default_reference(new_manager);
1709
1710                 if (cluster_size % PAGE_SIZE != 0) {
1711 #if 0
1712                         mutex_unlock(&memory_manager_default_lock);
1713                         return KERN_INVALID_ARGUMENT;
1714 #else
1715                         cluster_size = round_page_32(cluster_size);
1716 #endif
1717                 }
1718                 memory_manager_default_cluster = cluster_size;
1719
1720                 /*
1721                  *      In case anyone's been waiting for a memory
1722                  *      manager to be established, wake them up.
1723                  */
1724
1725                 thread_wakeup((event_t) &memory_manager_default);
1726         }
1727
1728         mutex_unlock(&memory_manager_default_lock);
1729
1730         *default_manager = returned_manager;
1731         return(KERN_SUCCESS);
1732 }
1733
1734 /*
1735  *      Routine:        memory_manager_default_reference
1736  *      Purpose:
1737  *              Returns a naked send right for the default
1738  *              memory manager.  The returned right is always
1739  *              valid (not IP_NULL or IP_DEAD).
1740  */
1741
1742 __private_extern__ memory_object_default_t
1743 memory_manager_default_reference(
1744         memory_object_cluster_size_t *cluster_size)
1745 {
1746         memory_object_default_t current_manager;
1747
1748         mutex_lock(&memory_manager_default_lock);
1749         current_manager = memory_manager_default;
1750         while (current_manager == MEMORY_OBJECT_DEFAULT_NULL) {
1751                 wait_result_t res;
1752
1753                 res = thread_sleep_mutex((event_t) &memory_manager_default,
1754                                          &memory_manager_default_lock,
1755                                          THREAD_UNINT);
1756                 assert(res == THREAD_AWAKENED);
1757                 current_manager = memory_manager_default;
1758         }
1759         memory_object_default_reference(current_manager);
1760         *cluster_size = memory_manager_default_cluster;
1761         mutex_unlock(&memory_manager_default_lock);
1762
1763         return current_manager;
1764 }
1765
1766 /*
1767  *      Routine:        memory_manager_default_check
1768  *
1769  *      Purpose:
1770  *              Check whether a default memory manager has been set
1771  *              up yet, or not. Returns KERN_SUCCESS if dmm exists,
1772  *              and KERN_FAILURE if dmm does not exist.
1773  *
1774  *              If there is no default memory manager, log an error,
1775  *              but only the first time.
1776  *
1777  */
1778 __private_extern__ kern_return_t
1779 memory_manager_default_check(void)
1780 {
1781         memory_object_default_t current;
1782
1783         mutex_lock(&memory_manager_default_lock);
1784         current = memory_manager_default;
1785         if (current == MEMORY_OBJECT_DEFAULT_NULL) {
1786                 static boolean_t logged;        /* initialized to 0 */
1787                 boolean_t       complain = !logged;
1788                 logged = TRUE;
1789                 mutex_unlock(&memory_manager_default_lock);
1790                 if (complain)
1791                         printf("Warning: No default memory manager\n");
1792                 return(KERN_FAILURE);
1793         } else {
1794                 mutex_unlock(&memory_manager_default_lock);
1795                 return(KERN_SUCCESS);
1796         }
1797 }
1798
1799 __private_extern__ void
1800 memory_manager_default_init(void)
1801 {
1802         memory_manager_default = MEMORY_OBJECT_DEFAULT_NULL;
1803         mutex_init(&memory_manager_default_lock, 0);
1804 }
1805
1806
1807
1808 /* Allow manipulation of individual page state.  This is actually part of */
1809 /* the UPL regimen but takes place on the object rather than on a UPL */
1810
1811 kern_return_t
1812 memory_object_page_op(
1813         memory_object_control_t control,
1814         memory_object_offset_t  offset,
1815         int                     ops,
1816         ppnum_t                 *phys_entry,
1817         int                     *flags)
1818 {
1819         vm_object_t             object;
1820         vm_page_t               dst_page;
1821
1822
1823         object = memory_object_control_to_vm_object(control);
1824         if (object == VM_OBJECT_NULL)
1825                 return (KERN_INVALID_ARGUMENT);
1826
1827         vm_object_lock(object);
1828
1829         if(ops & UPL_POP_PHYSICAL) {
1830                 if(object->phys_contiguous) {
1831                         if (phys_entry) {
1832                                 *phys_entry = (ppnum_t)
1833                                         (object->shadow_offset >> 12);
1834                         }
1835                         vm_object_unlock(object);
1836                         return KERN_SUCCESS;
1837                 } else {
1838                         vm_object_unlock(object);
1839                         return KERN_INVALID_OBJECT;
1840                 }
1841         }
1842         if(object->phys_contiguous) {
1843                 vm_object_unlock(object);
1844                 return KERN_INVALID_OBJECT;
1845         }
1846
1847         while(TRUE) {
1848                 if((dst_page = vm_page_lookup(object,offset)) == VM_PAGE_NULL) {
1849                         vm_object_unlock(object);
1850                         return KERN_FAILURE;
1851                 }
1852
1853                 /* Sync up on getting the busy bit */
1854                 if((dst_page->busy || dst_page->cleaning) &&
1855                            (((ops & UPL_POP_SET) &&
1856                            (ops & UPL_POP_BUSY)) || (ops & UPL_POP_DUMP))) {
1857                         /* someone else is playing with the page, we will */
1858                         /* have to wait */
1859                         PAGE_SLEEP(object, dst_page, THREAD_UNINT);
1860                         continue;
1861                 }
1862
1863                 if (ops & UPL_POP_DUMP) {
1864                         vm_page_lock_queues();
1865
1866                         if (dst_page->no_isync == FALSE)
1867                                 pmap_disconnect(dst_page->phys_page);
1868                         vm_page_free(dst_page);
1869
1870                         vm_page_unlock_queues();
1871                         break;
1872                 }
1873
1874                 if (flags) {
1875                         *flags = 0;
1876
1877                         /* Get the condition of flags before requested ops */
1878                         /* are undertaken */
1879
1880                         if(dst_page->dirty) *flags |= UPL_POP_DIRTY;
1881                         if(dst_page->pageout) *flags |= UPL_POP_PAGEOUT;
1882                         if(dst_page->precious) *flags |= UPL_POP_PRECIOUS;
1883                         if(dst_page->absent) *flags |= UPL_POP_ABSENT;
1884                         if(dst_page->busy) *flags |= UPL_POP_BUSY;
1885                 }
1886
1887                 /* The caller should have made a call either contingent with */
1888                 /* or prior to this call to set UPL_POP_BUSY */
1889                 if(ops & UPL_POP_SET) {
1890                         /* The protection granted with this assert will */
1891                         /* not be complete.  If the caller violates the */
1892                         /* convention and attempts to change page state */
1893                         /* without first setting busy we may not see it */
1894                         /* because the page may already be busy.  However */
1895                         /* if such violations occur we will assert sooner */
1896                         /* or later. */
1897                         assert(dst_page->busy || (ops & UPL_POP_BUSY));
1898                         if (ops & UPL_POP_DIRTY) dst_page->dirty = TRUE;
1899                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = TRUE;
1900                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = TRUE;
1901                         if (ops & UPL_POP_ABSENT) dst_page->absent = TRUE;
1902                         if (ops & UPL_POP_BUSY) dst_page->busy = TRUE;
1903                 }
1904
1905                 if(ops & UPL_POP_CLR) {
1906                         assert(dst_page->busy);
1907                         if (ops & UPL_POP_DIRTY) dst_page->dirty = FALSE;
1908                         if (ops & UPL_POP_PAGEOUT) dst_page->pageout = FALSE;
1909                         if (ops & UPL_POP_PRECIOUS) dst_page->precious = FALSE;
1910                         if (ops & UPL_POP_ABSENT) dst_page->absent = FALSE;
1911                         if (ops & UPL_POP_BUSY) {
1912                                 dst_page->busy = FALSE;
1913                                 PAGE_WAKEUP(dst_page);
1914                         }
1915                 }
1916
1917                 if (dst_page->encrypted) {
1918                         /*
1919                          * ENCRYPTED SWAP:
1920                          * We need to decrypt this encrypted page before the
1921                          * caller can access its contents.
1922                          * But if the caller really wants to access the page's
1923                          * contents, they have to keep the page "busy".
1924                          * Otherwise, the page could get recycled or re-encrypted
1925                          * at any time.
1926                          */
1927                         if ((ops & UPL_POP_SET) && (ops & UPL_POP_BUSY) &&
1928                             dst_page->busy) {
1929                                 /*
1930                                  * The page is stable enough to be accessed by
1931                                  * the caller, so make sure its contents are
1932                                  * not encrypted.
1933                                  */
1934                                 vm_page_decrypt(dst_page, 0);
1935                         } else {
1936                                 /*
1937                                  * The page is not busy, so don't bother
1938                                  * decrypting it, since anything could
1939                                  * happen to it between now and when the
1940                                  * caller wants to access it.
1941                                  * We should not give the caller access
1942                                  * to this page.
1943                                  */
1944                                 assert(!phys_entry);
1945                         }
1946                 }
1947
1948                 if (phys_entry) {
1949                         /*
1950                          * The physical page number will remain valid
1951                          * only if the page is kept busy.
1952                          * ENCRYPTED SWAP: make sure we don't let the
1953                          * caller access an encrypted page.
1954                          */
1955                         assert(dst_page->busy);
1956                         assert(!dst_page->encrypted);
1957                         *phys_entry = dst_page->phys_page;
1958                 }
1959
1960                 break;
1961         }
1962
1963         vm_object_unlock(object);
1964         return KERN_SUCCESS;
1965
1966 }
1967
1968 /*
1969  * memory_object_range_op offers performance enhancement over
1970  * memory_object_page_op for page_op functions which do not require page
1971  * level state to be returned from the call.  Page_op was created to provide
1972  * a low-cost alternative to page manipulation via UPLs when only a single
1973  * page was involved.  The range_op call establishes the ability in the _op
1974  * family of functions to work on multiple pages where the lack of page level
1975  * state handling allows the caller to avoid the overhead of the upl structures.
1976  */
1977
1978 kern_return_t
1979 memory_object_range_op(
1980         memory_object_control_t control,
1981         memory_object_offset_t  offset_beg,
1982         memory_object_offset_t  offset_end,
1983         int                     ops,
1984         int                     *range)
1985 {
1986         memory_object_offset_t  offset;
1987         vm_object_t             object;
1988         vm_page_t               dst_page;
1989
1990         object = memory_object_control_to_vm_object(control);
1991         if (object == VM_OBJECT_NULL)
1992                 return (KERN_INVALID_ARGUMENT);
1993
1994         if (object->resident_page_count == 0) {
1995                 if (range) {
1996                         if (ops & UPL_ROP_PRESENT)
1997                                 *range = 0;
1998                         else
1999                                 *range = offset_end - offset_beg;
2000                 }
2001                 return KERN_SUCCESS;
2002         }
2003         vm_object_lock(object);
2004
2005         if (object->phys_contiguous) {
2006                 vm_object_unlock(object);
2007                 return KERN_INVALID_OBJECT;
2008         }
2009
2010         offset = offset_beg;
2011
2012         while (offset < offset_end) {
2013                 dst_page = vm_page_lookup(object, offset);
2014                 if (dst_page != VM_PAGE_NULL) {
2015                         if (ops & UPL_ROP_DUMP) {
2016                                 if (dst_page->busy || dst_page->cleaning) {
2017                                         /*
2018                                          * someone else is playing with the
2019                                          * page, we will have to wait
2020                                          */
2021                                         PAGE_SLEEP(object,
2022                                                 dst_page, THREAD_UNINT);
2023                                         /*
2024                                          * need to relook the page up since it's
2025                                          * state may have changed while we slept
2026                                          * it might even belong to a different object
2027                                          * at this point
2028                                          */
2029                                         continue;
2030                                 }
2031                                 vm_page_lock_queues();
2032
2033                                 if (dst_page->no_isync == FALSE)
2034                                         pmap_disconnect(dst_page->phys_page);
2035                                 vm_page_free(dst_page);
2036
2037                                 vm_page_unlock_queues();
2038                         } else if (ops & UPL_ROP_ABSENT)
2039                                 break;
2040                 } else if (ops & UPL_ROP_PRESENT)
2041                         break;
2042
2043                 offset += PAGE_SIZE;
2044         }
2045         vm_object_unlock(object);
2046
2047         if (range)
2048                 *range = offset - offset_beg;
2049
2050         return KERN_SUCCESS;
2051 }
2052
2053
2054 kern_return_t
2055 memory_object_pages_resident(
2056         memory_object_control_t control,
2057         boolean_t                       *       has_pages_resident)
2058 {
2059         vm_object_t             object;
2060
2061         *has_pages_resident = FALSE;
2062
2063         object = memory_object_control_to_vm_object(control);
2064         if (object == VM_OBJECT_NULL)
2065                 return (KERN_INVALID_ARGUMENT);
2066
2067         if (object->resident_page_count)
2068                 *has_pages_resident = TRUE;
2069
2070         return (KERN_SUCCESS);
2071 }
2072
2073
2074 static zone_t mem_obj_control_zone;
2075
2076 __private_extern__ void
2077 memory_object_control_bootstrap(void)
2078 {
2079         int     i;
2080
2081         i = (vm_size_t) sizeof (struct memory_object_control);
2082         mem_obj_control_zone = zinit (i, 8192*i, 4096, "mem_obj_control");
2083         return;
2084 }
2085
2086 __private_extern__ memory_object_control_t
2087 memory_object_control_allocate(
2088         vm_object_t             object)
2089 {
2090         memory_object_control_t control;
2091
2092         control = (memory_object_control_t)zalloc(mem_obj_control_zone);
2093         if (control != MEMORY_OBJECT_CONTROL_NULL)
2094                 control->object = object;
2095         return (control);
2096 }
2097
2098 __private_extern__ void
2099 memory_object_control_collapse(
2100         memory_object_control_t control,
2101         vm_object_t             object)
2102 {
2103         assert((control->object != VM_OBJECT_NULL) &&
2104                (control->object != object));
2105         control->object = object;
2106 }
2107
2108 __private_extern__ vm_object_t
2109 memory_object_control_to_vm_object(
2110         memory_object_control_t control)
2111 {
2112         if (control == MEMORY_OBJECT_CONTROL_NULL)
2113                 return VM_OBJECT_NULL;
2114
2115         return (control->object);
2116 }
2117
2118 memory_object_control_t
2119 convert_port_to_mo_control(
2120         __unused mach_port_t    port)
2121 {
2122         return MEMORY_OBJECT_CONTROL_NULL;
2123 }
2124
2125
2126 mach_port_t
2127 convert_mo_control_to_port(
2128         __unused memory_object_control_t        control)
2129 {
2130         return MACH_PORT_NULL;
2131 }
2132
2133 void
2134 memory_object_control_reference(
2135         __unused memory_object_control_t        control)
2136 {
2137         return;
2138 }
2139
2140 /*
2141  * We only every issue one of these references, so kill it
2142  * when that gets released (should switch the real reference
2143  * counting in true port-less EMMI).
2144  */
2145 void
2146 memory_object_control_deallocate(
2147         memory_object_control_t control)
2148 {
2149         zfree(mem_obj_control_zone, control);
2150 }
2151
2152 void
2153 memory_object_control_disable(
2154         memory_object_control_t control)
2155 {
2156         assert(control->object != VM_OBJECT_NULL);
2157         control->object = VM_OBJECT_NULL;
2158 }
2159
2160 void
2161 memory_object_default_reference(
2162         memory_object_default_t dmm)
2163 {
2164         ipc_port_make_send(dmm);
2165 }
2166
2167 void
2168 memory_object_default_deallocate(
2169         memory_object_default_t dmm)
2170 {
2171         ipc_port_release_send(dmm);
2172 }
2173
2174 memory_object_t
2175 convert_port_to_memory_object(
2176         __unused mach_port_t    port)
2177 {
2178         return (MEMORY_OBJECT_NULL);
2179 }
2180
2181
2182 mach_port_t
2183 convert_memory_object_to_port(
2184         __unused memory_object_t        object)
2185 {
2186         return (MACH_PORT_NULL);
2187 }
2188
2189
2190 /* Routine memory_object_reference */
2191 void memory_object_reference(
2192         memory_object_t memory_object)
2193 {
2194
2195 #ifdef  MACH_BSD
2196         if (memory_object->pager == &vnode_pager_workaround) {
2197                 vnode_pager_reference(memory_object);
2198         } else if (memory_object->pager == &device_pager_workaround) {
2199                 device_pager_reference(memory_object);
2200         } else
2201 #endif
2202                 dp_memory_object_reference(memory_object);
2203 }
2204
2205 /* Routine memory_object_deallocate */
2206 void memory_object_deallocate(
2207         memory_object_t memory_object)
2208 {
2209
2210 #ifdef  MACH_BSD
2211         if (memory_object->pager == &vnode_pager_workaround) {
2212                 vnode_pager_deallocate(memory_object);
2213         } else if (memory_object->pager == &device_pager_workaround) {
2214                 device_pager_deallocate(memory_object);
2215         } else
2216 #endif
2217                 dp_memory_object_deallocate(memory_object);
2218 }
2219
2220
2221 /* Routine memory_object_init */
2222 kern_return_t memory_object_init
2223 (
2224         memory_object_t memory_object,
2225         memory_object_control_t memory_control,
2226         memory_object_cluster_size_t memory_object_page_size
2227 )
2228 {
2229 #ifdef  MACH_BSD
2230         if (memory_object->pager == &vnode_pager_workaround) {
2231                 return vnode_pager_init(memory_object,
2232                                         memory_control,
2233                                         memory_object_page_size);
2234         } else if (memory_object->pager == &device_pager_workaround) {
2235                 return device_pager_init(memory_object,
2236                                          memory_control,
2237                                          memory_object_page_size);
2238         } else
2239 #endif
2240                 return dp_memory_object_init(memory_object,
2241                                              memory_control,
2242                                              memory_object_page_size);
2243 }
2244
2245 /* Routine memory_object_terminate */
2246 kern_return_t memory_object_terminate
2247 (
2248         memory_object_t memory_object
2249 )
2250 {
2251 #ifdef  MACH_BSD
2252         if (memory_object->pager == &vnode_pager_workaround) {
2253                 return vnode_pager_terminate(memory_object);
2254         } else if (memory_object->pager == &device_pager_workaround) {
2255                 return device_pager_terminate(memory_object);
2256         } else
2257 #endif
2258                 return dp_memory_object_terminate(memory_object);
2259 }
2260
2261 /* Routine memory_object_data_request */
2262 kern_return_t memory_object_data_request
2263 (
2264         memory_object_t memory_object,
2265         memory_object_offset_t offset,
2266         memory_object_cluster_size_t length,
2267         vm_prot_t desired_access
2268 )
2269 {
2270 #ifdef  MACH_BSD
2271         if (memory_object->pager == &vnode_pager_workaround) {
2272                 return vnode_pager_data_request(memory_object,
2273                                                 offset,
2274                                                 length,
2275                                                 desired_access);
2276         } else if (memory_object->pager == &device_pager_workaround) {
2277                 return device_pager_data_request(memory_object,
2278                                                  offset,
2279                                                  length,
2280                                                  desired_access);
2281         } else
2282 #endif
2283                 return dp_memory_object_data_request(memory_object,
2284                                                      offset,
2285                                                      length,
2286                                                      desired_access);
2287 }
2288
2289 /* Routine memory_object_data_return */
2290 kern_return_t memory_object_data_return
2291 (
2292         memory_object_t memory_object,
2293         memory_object_offset_t offset,
2294         vm_size_t size,
2295         memory_object_offset_t *resid_offset,
2296         int     *io_error,
2297         boolean_t dirty,
2298         boolean_t kernel_copy,
2299         int     upl_flags
2300 )
2301 {
2302 #ifdef MACH_BSD
2303         if (memory_object->pager == &vnode_pager_workaround) {
2304                 return vnode_pager_data_return(memory_object,
2305                                                offset,
2306                                                size,
2307                                                resid_offset,
2308                                                io_error,
2309                                                dirty,
2310                                                kernel_copy,
2311                                                upl_flags);
2312         } else if (memory_object->pager == &device_pager_workaround) {
2313
2314                 return device_pager_data_return(memory_object,
2315                                                 offset,
2316                                                 size,
2317                                                 dirty,
2318                                                 kernel_copy,
2319                                                 upl_flags);
2320         }
2321         else
2322 #endif
2323         {
2324                 return dp_memory_object_data_return(memory_object,
2325                                                     offset,
2326                                                     size,
2327                                                     NULL,
2328                                                     NULL,
2329                                                     dirty,
2330                                                     kernel_copy,
2331                                                     upl_flags);
2332         }
2333 }
2334
2335 /* Routine memory_object_data_initialize */
2336 kern_return_t memory_object_data_initialize
2337 (
2338         memory_object_t memory_object,
2339         memory_object_offset_t offset,
2340         vm_size_t size
2341 )
2342 {
2343 #ifdef MACH_BSD
2344         if (memory_object->pager == &vnode_pager_workaround) {
2345                 return vnode_pager_data_initialize(memory_object,
2346                                                    offset,
2347                                                    size);
2348         } else if (memory_object->pager == &device_pager_workaround) {
2349                 return device_pager_data_initialize(memory_object,
2350                                                     offset,
2351                                                     size);
2352         } else
2353 #endif
2354                 return dp_memory_object_data_initialize(memory_object,
2355                                                         offset,
2356                                                         size);
2357 }
2358
2359 /* Routine memory_object_data_unlock */
2360 kern_return_t memory_object_data_unlock
2361 (
2362         memory_object_t memory_object,
2363         memory_object_offset_t offset,
2364         vm_size_t size,
2365         vm_prot_t desired_access
2366 )
2367 {
2368 #ifdef MACH_BSD
2369         if (memory_object->pager == &vnode_pager_workaround) {
2370                 return vnode_pager_data_unlock(memory_object,
2371                                                offset,
2372                                                size,
2373                                                desired_access);
2374         } else if (memory_object->pager == &device_pager_workaround) {
2375                 return device_pager_data_unlock(memory_object,
2376                                                 offset,
2377                                                 size,
2378                                                 desired_access);
2379         } else
2380 #endif
2381                 return dp_memory_object_data_unlock(memory_object,
2382                                                     offset,
2383                                                     size,
2384                                                     desired_access);
2385 }
2386
2387 /* Routine memory_object_synchronize */
2388 kern_return_t memory_object_synchronize
2389 (
2390         memory_object_t memory_object,
2391         memory_object_offset_t offset,
2392         vm_size_t size,
2393         vm_sync_t sync_flags
2394 )
2395 {
2396 #ifdef MACH_BSD
2397         if (memory_object->pager == &vnode_pager_workaround) {
2398                 return vnode_pager_synchronize(memory_object,
2399                                                offset,
2400                                                size,
2401                                                sync_flags);
2402         } else if (memory_object->pager == &device_pager_workaround) {
2403                 return device_pager_synchronize(memory_object,
2404                                                 offset,
2405                                                 size,
2406                                                 sync_flags);
2407         } else
2408 #endif
2409                 return dp_memory_object_synchronize(memory_object,
2410                                                     offset,
2411                                                     size,
2412                                                     sync_flags);
2413 }
2414
2415 /* Routine memory_object_unmap */
2416 kern_return_t memory_object_unmap
2417 (
2418         memory_object_t memory_object
2419 )
2420 {
2421 #ifdef MACH_BSD
2422         if (memory_object->pager == &vnode_pager_workaround) {
2423                 return vnode_pager_unmap(memory_object);
2424         } else if (memory_object->pager == &device_pager_workaround) {
2425                 return device_pager_unmap(memory_object);
2426         } else
2427 #endif
2428                 return dp_memory_object_unmap(memory_object);
2429 }
2430
2431 /* Routine memory_object_create */
2432 kern_return_t memory_object_create
2433 (
2434         memory_object_default_t default_memory_manager,
2435         vm_size_t new_memory_object_size,
2436         memory_object_t *new_memory_object
2437 )
2438 {
2439         return default_pager_memory_object_create(default_memory_manager,
2440                                                   new_memory_object_size,
2441                                                   new_memory_object);
2442 }
2443
2444 upl_t
2445 convert_port_to_upl(
2446         ipc_port_t      port)
2447 {
2448         upl_t upl;
2449
2450         ip_lock(port);
2451         if (!ip_active(port) || (ip_kotype(port) != IKOT_UPL)) {
2452                         ip_unlock(port);
2453                         return (upl_t)NULL;
2454         }
2455         upl = (upl_t) port->ip_kobject;
2456         ip_unlock(port);
2457         upl_lock(upl);
2458         upl->ref_count+=1;
2459         upl_unlock(upl);
2460         return upl;
2461 }
2462
2463 mach_port_t
2464 convert_upl_to_port(
2465         __unused upl_t          upl)
2466 {
2467         return MACH_PORT_NULL;
2468 }
2469
2470 __private_extern__ void
2471 upl_no_senders(
2472         __unused ipc_port_t                             port,
2473         __unused mach_port_mscount_t    mscount)
2474 {
2475         return;
2476 }