osfmk/default_pager/dp_backing_store.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*
  23  * @OSF_COPYRIGHT@
  24  */
  25 /*
  26  * Mach Operating System
  27  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  28  * All Rights Reserved.
  29  *
  30  * Permission to use, copy, modify and distribute this software and its
  31  * documentation is hereby granted, provided that both the copyright
  32  * notice and this permission notice appear in all copies of the
  33  * software, derivative works or modified versions, and any portions
  34  * thereof, and that both notices appear in supporting documentation.
  35  *
  36  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  37  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  38  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  39  *
  40  * Carnegie Mellon requests users of this software to return to
  41  *
  42  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  43  *  School of Computer Science
  44  *  Carnegie Mellon University
  45  *  Pittsburgh PA 15213-3890
  46  *
  47  * any improvements or extensions that they make and grant Carnegie Mellon
  48  * the rights to redistribute these changes.
  49  */
  50
  51 /*
  52  *      Default Pager.
  53  *              Paging File Management.
  54  */
  55
  56 #include <mach/memory_object_control.h>
  57 #include <mach/memory_object_server.h>
  58 #include "default_pager_internal.h"
  59 #include <default_pager/default_pager_alerts.h>
  60 #include <ipc/ipc_port.h>
  61 #include <ipc/ipc_space.h>
  62 #include <kern/queue.h>
  63 #include <kern/counters.h>
  64 #include <kern/sched_prim.h>
  65 #include <vm/vm_kern.h>
  66 #include <vm/vm_pageout.h>
  67 /* CDY CDY */
  68 #include <vm/vm_map.h>
  69
  70 /*
  71  * ALLOC_STRIDE... the maximum number of bytes allocated from
  72  * a swap file before moving on to the next swap file... if
  73  * all swap files reside on a single disk, this value should
  74  * be very large (this is the default assumption)... if the
  75  * swap files are spread across multiple disks, than this value
  76  * should be small (128 * 1024)...
  77  *
  78  * This should be determined dynamically in the future
  79  */
  80
  81 #define ALLOC_STRIDE  (1024 * 1024 * 1024)
  82 int physical_transfer_cluster_count = 0;
  83
  84 #define VM_SUPER_CLUSTER        0x40000
  85 #define VM_SUPER_PAGES          64
  86
  87 /*
  88  * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
  89  * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
  90  */
  91 #define VSTRUCT_DEF_CLSHIFT     2
  92 int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
  93 int default_pager_clsize = 0;
  94
  95 /* statistics */
  96 unsigned int clustered_writes[VM_SUPER_PAGES+1];
  97 unsigned int clustered_reads[VM_SUPER_PAGES+1];
  98
  99 /*
 100  * Globals used for asynchronous paging operations:
 101  *      vs_async_list:  head of list of to-be-completed I/O ops
 102  *      async_num_queued: number of pages completed, but not yet
 103  *              processed by async thread.
 104  *      async_requests_out: number of pages of requests not completed.
 105  */
 106
 107 #if 0
 108 struct vs_async *vs_async_list;
 109 int     async_num_queued;
 110 int     async_requests_out;
 111 #endif
 112
 113
 114 #define VS_ASYNC_REUSE 1
 115 struct vs_async *vs_async_free_list;
 116
 117 mutex_t default_pager_async_lock;       /* Protects globals above */
 118
 119
 120 int vs_alloc_async_failed = 0;                  /* statistics */
 121 int vs_alloc_async_count = 0;                   /* statistics */
 122 struct vs_async *vs_alloc_async(void);          /* forward */
 123 void vs_free_async(struct vs_async *vsa);       /* forward */
 124
 125
 126 #define VS_ALLOC_ASYNC()        vs_alloc_async()
 127 #define VS_FREE_ASYNC(vsa)      vs_free_async(vsa)
 128
 129 #define VS_ASYNC_LOCK()         mutex_lock(&default_pager_async_lock)
 130 #define VS_ASYNC_UNLOCK()       mutex_unlock(&default_pager_async_lock)
 131 #define VS_ASYNC_LOCK_INIT()    mutex_init(&default_pager_async_lock,  \
 132                                                 ETAP_IO_DEV_PAGEH)
 133 #define VS_ASYNC_LOCK_ADDR()    (&default_pager_async_lock)
 134 /*
 135  *  Paging Space Hysteresis triggers and the target notification port
 136  *
 137  */
 138
 139 unsigned int    minimum_pages_remaining = 0;
 140 unsigned int    maximum_pages_free = 0;
 141 ipc_port_t      min_pages_trigger_port = NULL;
 142 ipc_port_t      max_pages_trigger_port = NULL;
 143
 144 boolean_t       bs_low = FALSE;
 145 int             backing_store_release_trigger_disable = 0;
 146
 147
 148
 149 /*
 150  * Object sizes are rounded up to the next power of 2,
 151  * unless they are bigger than a given maximum size.
 152  */
 153 vm_size_t       max_doubled_size = 4 * 1024 * 1024;     /* 4 meg */
 154
 155 /*
 156  * List of all backing store and segments.
 157  */
 158 struct backing_store_list_head backing_store_list;
 159 paging_segment_t        paging_segments[MAX_NUM_PAGING_SEGMENTS];
 160 mutex_t                 paging_segments_lock;
 161 int                     paging_segment_max = 0;
 162 int                     paging_segment_count = 0;
 163 int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
 164
 165
 166 /*
 167  * Total pages free in system
 168  * This differs from clusters committed/avail which is a measure of the
 169  * over commitment of paging segments to backing store.  An idea which is
 170  * likely to be deprecated.
 171  */
 172 unsigned  int   dp_pages_free = 0;
 173 unsigned  int   cluster_transfer_minimum = 100;
 174
 175 kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int);      /* forward */
 176 kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int);      /* forward */
 177
 178
 179 default_pager_thread_t *
 180 get_read_buffer()
 181 {
 182         int     i;
 183
 184         DPT_LOCK(dpt_lock);
 185         while(TRUE) {
 186                 for (i=0; i<default_pager_internal_count; i++) {
 187                         if(dpt_array[i]->checked_out == FALSE) {
 188                           dpt_array[i]->checked_out = TRUE;
 189                           DPT_UNLOCK(dpt_lock);
 190                           return  dpt_array[i];
 191                         }
 192                 }
 193                 DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
 194         }
 195 }
 196
 197 void
 198 bs_initialize(void)
 199 {
 200         int i;
 201
 202         /*
 203          * List of all backing store.
 204          */
 205         BSL_LOCK_INIT();
 206         queue_init(&backing_store_list.bsl_queue);
 207         PSL_LOCK_INIT();
 208
 209         VS_ASYNC_LOCK_INIT();
 210 #if     VS_ASYNC_REUSE
 211         vs_async_free_list = NULL;
 212 #endif  /* VS_ASYNC_REUSE */
 213
 214         for (i = 0; i < VM_SUPER_PAGES + 1; i++) {
 215                 clustered_writes[i] = 0;
 216                 clustered_reads[i] = 0;
 217         }
 218
 219 }
 220
 221 /*
 222  * When things do not quite workout...
 223  */
 224 void bs_no_paging_space(boolean_t);     /* forward */
 225
 226 void
 227 bs_no_paging_space(
 228         boolean_t out_of_memory)
 229 {
 230
 231         if (out_of_memory)
 232                 dprintf(("*** OUT OF MEMORY ***\n"));
 233         panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
 234 }
 235
 236 void bs_more_space(int);        /* forward */
 237 void bs_commit(int);            /* forward */
 238
 239 boolean_t       user_warned = FALSE;
 240 unsigned int    clusters_committed = 0;
 241 unsigned int    clusters_available = 0;
 242 unsigned int    clusters_committed_peak = 0;
 243
 244 void
 245 bs_more_space(
 246         int     nclusters)
 247 {
 248         BSL_LOCK();
 249         /*
 250          * Account for new paging space.
 251          */
 252         clusters_available += nclusters;
 253
 254         if (clusters_available >= clusters_committed) {
 255                 if (verbose && user_warned) {
 256                         printf("%s%s - %d excess clusters now.\n",
 257                                my_name,
 258                                "paging space is OK now",
 259                                clusters_available - clusters_committed);
 260                         user_warned = FALSE;
 261                         clusters_committed_peak = 0;
 262                 }
 263         } else {
 264                 if (verbose && user_warned) {
 265                         printf("%s%s - still short of %d clusters.\n",
 266                                my_name,
 267                                "WARNING: paging space over-committed",
 268                                clusters_committed - clusters_available);
 269                         clusters_committed_peak -= nclusters;
 270                 }
 271         }
 272         BSL_UNLOCK();
 273
 274         return;
 275 }
 276
 277 void
 278 bs_commit(
 279         int     nclusters)
 280 {
 281         BSL_LOCK();
 282         clusters_committed += nclusters;
 283         if (clusters_committed > clusters_available) {
 284                 if (verbose && !user_warned) {
 285                         user_warned = TRUE;
 286                         printf("%s%s - short of %d clusters.\n",
 287                                my_name,
 288                                "WARNING: paging space over-committed",
 289                                clusters_committed - clusters_available);
 290                 }
 291                 if (clusters_committed > clusters_committed_peak) {
 292                         clusters_committed_peak = clusters_committed;
 293                 }
 294         } else {
 295                 if (verbose && user_warned) {
 296                         printf("%s%s - was short of up to %d clusters.\n",
 297                                my_name,
 298                                "paging space is OK now",
 299                                clusters_committed_peak - clusters_available);
 300                         user_warned = FALSE;
 301                         clusters_committed_peak = 0;
 302                 }
 303         }
 304         BSL_UNLOCK();
 305
 306         return;
 307 }
 308
 309 int default_pager_info_verbose = 1;
 310
 311 void
 312 bs_global_info(
 313         vm_size_t       *totalp,
 314         vm_size_t       *freep)
 315 {
 316         vm_size_t               pages_total, pages_free;
 317         paging_segment_t        ps;
 318         int                     i;
 319
 320         PSL_LOCK();
 321         pages_total = pages_free = 0;
 322         for (i = 0; i <= paging_segment_max; i++) {
 323                 ps = paging_segments[i];
 324                 if (ps == PAGING_SEGMENT_NULL)
 325                         continue;
 326
 327                 /*
 328                  * no need to lock: by the time this data
 329                  * gets back to any remote requestor it
 330                  * will be obsolete anyways
 331                  */
 332                 pages_total += ps->ps_pgnum;
 333                 pages_free += ps->ps_clcount << ps->ps_clshift;
 334                 DEBUG(DEBUG_BS_INTERNAL,
 335                       ("segment #%d: %d total, %d free\n",
 336                        i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
 337         }
 338         *totalp = pages_total;
 339         *freep = pages_free;
 340         if (verbose && user_warned && default_pager_info_verbose) {
 341                 if (clusters_available < clusters_committed) {
 342                         printf("%s %d clusters committed, %d available.\n",
 343                                my_name,
 344                                clusters_committed,
 345                                clusters_available);
 346                 }
 347         }
 348         PSL_UNLOCK();
 349 }
 350
 351 backing_store_t backing_store_alloc(void);      /* forward */
 352
 353 backing_store_t
 354 backing_store_alloc(void)
 355 {
 356         backing_store_t bs;
 357
 358         bs = (backing_store_t) kalloc(sizeof (struct backing_store));
 359         if (bs == BACKING_STORE_NULL)
 360                 panic("backing_store_alloc: no memory");
 361
 362         BS_LOCK_INIT(bs);
 363         bs->bs_port = MACH_PORT_NULL;
 364         bs->bs_priority = 0;
 365         bs->bs_clsize = 0;
 366         bs->bs_pages_total = 0;
 367         bs->bs_pages_in = 0;
 368         bs->bs_pages_in_fail = 0;
 369         bs->bs_pages_out = 0;
 370         bs->bs_pages_out_fail = 0;
 371
 372         return bs;
 373 }
 374
 375 backing_store_t backing_store_lookup(MACH_PORT_FACE);   /* forward */
 376
 377 /* Even in both the component space and external versions of this pager, */
 378 /* backing_store_lookup will be called from tasks in the application space */
 379 backing_store_t
 380 backing_store_lookup(
 381         MACH_PORT_FACE port)
 382 {
 383         backing_store_t bs;
 384
 385 /*
 386         port is currently backed with a vs structure in the alias field
 387         we could create an ISBS alias and a port_is_bs call but frankly
 388         I see no reason for the test, the bs->port == port check below
 389         will work properly on junk entries.
 390
 391         if ((port == MACH_PORT_NULL) || port_is_vs(port))
 392 */
 393         if ((port == MACH_PORT_NULL))
 394                 return BACKING_STORE_NULL;
 395
 396         BSL_LOCK();
 397         queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t,
 398                       bs_links) {
 399                 BS_LOCK(bs);
 400                 if (bs->bs_port == port) {
 401                         BSL_UNLOCK();
 402                         /* Success, return it locked. */
 403                         return bs;
 404                 }
 405                 BS_UNLOCK(bs);
 406         }
 407         BSL_UNLOCK();
 408         return BACKING_STORE_NULL;
 409 }
 410
 411 void backing_store_add(backing_store_t);        /* forward */
 412
 413 void
 414 backing_store_add(
 415         backing_store_t bs)
 416 {
 417         MACH_PORT_FACE          port = bs->bs_port;
 418         MACH_PORT_FACE          pset = default_pager_default_set;
 419         kern_return_t           kr = KERN_SUCCESS;
 420
 421         if (kr != KERN_SUCCESS)
 422                 panic("backing_store_add: add to set");
 423
 424 }
 425
 426 /*
 427  * Set up default page shift, but only if not already
 428  * set and argument is within range.
 429  */
 430 boolean_t
 431 bs_set_default_clsize(unsigned int npages)
 432 {
 433         switch(npages){
 434             case 1:
 435             case 2:
 436             case 4:
 437             case 8:
 438                 if (default_pager_clsize == 0)  /* if not yet set */
 439                         vstruct_def_clshift = local_log2(npages);
 440                 return(TRUE);
 441         }
 442         return(FALSE);
 443 }
 444
 445 int bs_get_global_clsize(int clsize);   /* forward */
 446
 447 int
 448 bs_get_global_clsize(
 449         int     clsize)
 450 {
 451         int                     i;
 452         memory_object_default_t dmm;
 453         kern_return_t           kr;
 454
 455         /*
 456          * Only allow setting of cluster size once. If called
 457          * with no cluster size (default), we use the compiled-in default
 458          * for the duration. The same cluster size is used for all
 459          * paging segments.
 460          */
 461         if (default_pager_clsize == 0) {
 462                 /*
 463                  * Keep cluster size in bit shift because it's quicker
 464                  * arithmetic, and easier to keep at a power of 2.
 465                  */
 466                 if (clsize != NO_CLSIZE) {
 467                         for (i = 0; (1 << i) < clsize; i++);
 468                         if (i > MAX_CLUSTER_SHIFT)
 469                                 i = MAX_CLUSTER_SHIFT;
 470                         vstruct_def_clshift = i;
 471                 }
 472                 default_pager_clsize = (1 << vstruct_def_clshift);
 473
 474                 /*
 475                  * Let the user know the new (and definitive) cluster size.
 476                  */
 477                 if (verbose)
 478                         printf("%scluster size = %d page%s\n",
 479                                 my_name, default_pager_clsize,
 480                                 (default_pager_clsize == 1) ? "" : "s");
 481
 482                 /*
 483                  * Let the kernel know too, in case it hasn't used the
 484                  * default value provided in main() yet.
 485                  */
 486                 dmm = default_pager_object;
 487                 clsize = default_pager_clsize * vm_page_size;   /* in bytes */
 488                 kr = host_default_memory_manager(host_priv_self(),
 489                                                  &dmm,
 490                                                  clsize);
 491                 memory_object_default_deallocate(dmm);
 492
 493                 if (kr != KERN_SUCCESS) {
 494                    panic("bs_get_global_cl_size:host_default_memory_manager");
 495                 }
 496                 if (dmm != default_pager_object) {
 497                   panic("bs_get_global_cl_size:there is another default pager");
 498                 }
 499         }
 500         ASSERT(default_pager_clsize > 0 &&
 501                (default_pager_clsize & (default_pager_clsize - 1)) == 0);
 502
 503         return default_pager_clsize;
 504 }
 505
 506 kern_return_t
 507 default_pager_backing_store_create(
 508         memory_object_default_t pager,
 509         int                     priority,
 510         int                     clsize,         /* in bytes */
 511         MACH_PORT_FACE          *backing_store)
 512 {
 513         backing_store_t bs;
 514         MACH_PORT_FACE  port;
 515         kern_return_t   kr;
 516         struct vstruct_alias *alias_struct;
 517
 518         if (pager != default_pager_object)
 519                 return KERN_INVALID_ARGUMENT;
 520
 521         bs = backing_store_alloc();
 522         port = ipc_port_alloc_kernel();
 523         ipc_port_make_send(port);
 524         assert (port != IP_NULL);
 525
 526         DEBUG(DEBUG_BS_EXTERNAL,
 527               ("priority=%d clsize=%d bs_port=0x%x\n",
 528                priority, clsize, (int) backing_store));
 529
 530         alias_struct = (struct vstruct_alias *)
 531                                 kalloc(sizeof (struct vstruct_alias));
 532         if(alias_struct != NULL) {
 533                 alias_struct->vs = (struct vstruct *)bs;
 534                 alias_struct->name = ISVS;
 535                 port->alias = (int) alias_struct;
 536         }
 537         else {
 538                 ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
 539                 kfree((vm_offset_t)bs, sizeof (struct backing_store));
 540                 return KERN_RESOURCE_SHORTAGE;
 541         }
 542
 543         bs->bs_port = port;
 544         if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI)
 545                 priority = BS_MAXPRI;
 546         else if (priority == BS_NOPRI)
 547                 priority = BS_MAXPRI;
 548         else
 549                 priority = BS_MINPRI;
 550         bs->bs_priority = priority;
 551
 552         bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
 553
 554         BSL_LOCK();
 555         queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
 556                     bs_links);
 557         BSL_UNLOCK();
 558
 559         backing_store_add(bs);
 560
 561         *backing_store = port;
 562         return KERN_SUCCESS;
 563 }
 564
 565 kern_return_t
 566 default_pager_backing_store_info(
 567         MACH_PORT_FACE          backing_store,
 568         backing_store_flavor_t  flavour,
 569         backing_store_info_t    info,
 570         mach_msg_type_number_t  *size)
 571 {
 572         backing_store_t                 bs;
 573         backing_store_basic_info_t      basic;
 574         int                             i;
 575         paging_segment_t                ps;
 576
 577         if (flavour != BACKING_STORE_BASIC_INFO ||
 578             *size < BACKING_STORE_BASIC_INFO_COUNT)
 579                 return KERN_INVALID_ARGUMENT;
 580
 581         basic = (backing_store_basic_info_t)info;
 582         *size = BACKING_STORE_BASIC_INFO_COUNT;
 583
 584         VSTATS_LOCK(&global_stats.gs_lock);
 585         basic->pageout_calls    = global_stats.gs_pageout_calls;
 586         basic->pagein_calls     = global_stats.gs_pagein_calls;
 587         basic->pages_in         = global_stats.gs_pages_in;
 588         basic->pages_out        = global_stats.gs_pages_out;
 589         basic->pages_unavail    = global_stats.gs_pages_unavail;
 590         basic->pages_init       = global_stats.gs_pages_init;
 591         basic->pages_init_writes= global_stats.gs_pages_init_writes;
 592         VSTATS_UNLOCK(&global_stats.gs_lock);
 593
 594         if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
 595                 return KERN_INVALID_ARGUMENT;
 596
 597         basic->bs_pages_total   = bs->bs_pages_total;
 598         PSL_LOCK();
 599         bs->bs_pages_free = 0;
 600         for (i = 0; i <= paging_segment_max; i++) {
 601                 ps = paging_segments[i];
 602                 if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) {
 603                         PS_LOCK(ps);
 604                         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
 605                         PS_UNLOCK(ps);
 606                 }
 607         }
 608         PSL_UNLOCK();
 609         basic->bs_pages_free    = bs->bs_pages_free;
 610         basic->bs_pages_in      = bs->bs_pages_in;
 611         basic->bs_pages_in_fail = bs->bs_pages_in_fail;
 612         basic->bs_pages_out     = bs->bs_pages_out;
 613         basic->bs_pages_out_fail= bs->bs_pages_out_fail;
 614
 615         basic->bs_priority      = bs->bs_priority;
 616         basic->bs_clsize        = ptoa_32(bs->bs_clsize);       /* in bytes */
 617
 618         BS_UNLOCK(bs);
 619
 620         return KERN_SUCCESS;
 621 }
 622
 623 int ps_delete(paging_segment_t);        /* forward */
 624
 625 int
 626 ps_delete(
 627         paging_segment_t ps)
 628 {
 629         vstruct_t       vs;
 630         kern_return_t   error = KERN_SUCCESS;
 631         int             vs_count;
 632
 633         VSL_LOCK();             /* get the lock on the list of vs's      */
 634
 635         /* The lock relationship and sequence is farily complicated      */
 636         /* this code looks at a live list, locking and unlocking the list */
 637         /* as it traverses it.  It depends on the locking behavior of    */
 638         /* default_pager_no_senders.  no_senders always locks the vstruct */
 639         /* targeted for removal before locking the vstruct list.  However */
 640         /* it will remove that member of the list without locking its    */
 641         /* neighbors.  We can be sure when we hold a lock on a vstruct   */
 642         /* it cannot be removed from the list but we must hold the list  */
 643         /* lock to be sure that its pointers to its neighbors are valid. */
 644         /* Also, we can hold off destruction of a vstruct when the list  */
 645         /* lock and the vs locks are not being held by bumping the       */
 646         /* vs_async_pending count.      */
 647
 648
 649         while(backing_store_release_trigger_disable != 0) {
 650                 VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
 651         }
 652
 653         /* we will choose instead to hold a send right */
 654         vs_count = vstruct_list.vsl_count;
 655         vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
 656         if(vs == (vstruct_t)&vstruct_list)  {
 657                 VSL_UNLOCK();
 658                 return KERN_SUCCESS;
 659         }
 660         VS_LOCK(vs);
 661         vs_async_wait(vs);  /* wait for any pending async writes */
 662         if ((vs_count != 0) && (vs != NULL))
 663                 vs->vs_async_pending += 1;  /* hold parties calling  */
 664                                             /* vs_async_wait */
 665         VS_UNLOCK(vs);
 666         VSL_UNLOCK();
 667         while((vs_count != 0) && (vs != NULL)) {
 668                 /* We take the count of AMO's before beginning the         */
 669                 /* transfer of of the target segment.                      */
 670                 /* We are guaranteed that the target segment cannot get    */
 671                 /* more users.  We also know that queue entries are        */
 672                 /* made at the back of the list.  If some of the entries   */
 673                 /* we would check disappear while we are traversing the    */
 674                 /* list then we will either check new entries which        */
 675                 /* do not have any backing store in the target segment     */
 676                 /* or re-check old entries.  This might not be optimal     */
 677                 /* but it will always be correct. The alternative is to    */
 678                 /* take a snapshot of the list.                            */
 679                 vstruct_t       next_vs;
 680
 681                 if(dp_pages_free < cluster_transfer_minimum)
 682                         error = KERN_FAILURE;
 683                 else {
 684                         vm_object_t     transfer_object;
 685                         int             count;
 686                         upl_t           upl;
 687
 688                         transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
 689                         count = 0;
 690                         error = vm_object_upl_request(transfer_object,
 691                                 (vm_object_offset_t)0, VM_SUPER_CLUSTER,
 692                                 &upl, NULL, &count,
 693                                 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
 694                                             | UPL_SET_INTERNAL);
 695                         if(error == KERN_SUCCESS) {
 696                                 error = ps_vstruct_transfer_from_segment(
 697                                                         vs, ps, upl);
 698                                 upl_commit(upl, NULL);
 699                                 upl_deallocate(upl);
 700                         } else {
 701                                 error = KERN_FAILURE;
 702                         }
 703                         vm_object_deallocate(transfer_object);
 704                 }
 705                 if(error) {
 706                         VS_LOCK(vs);
 707                         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 708                         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 709                                 vs->vs_waiting_async = FALSE;
 710                                 VS_UNLOCK(vs);
 711                                 thread_wakeup(&vs->vs_async_pending);
 712                         } else {
 713                                 VS_UNLOCK(vs);
 714                         }
 715                         return KERN_FAILURE;
 716                 }
 717
 718                 VSL_LOCK();
 719
 720                 while(backing_store_release_trigger_disable != 0) {
 721                         VSL_SLEEP(&backing_store_release_trigger_disable,
 722                                   THREAD_UNINT);
 723                 }
 724
 725                 next_vs = (vstruct_t) queue_next(&(vs->vs_links));
 726                 if((next_vs != (vstruct_t)&vstruct_list) &&
 727                                 (vs != next_vs) && (vs_count != 1)) {
 728                         VS_LOCK(next_vs);
 729                         vs_async_wait(next_vs);  /* wait for any  */
 730                                                  /* pending async writes */
 731                         next_vs->vs_async_pending += 1; /* hold parties  */
 732                                                 /* calling vs_async_wait */
 733                         VS_UNLOCK(next_vs);
 734                 }
 735                 VSL_UNLOCK();
 736                 VS_LOCK(vs);
 737                 vs->vs_async_pending -= 1;
 738                 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 739                         vs->vs_waiting_async = FALSE;
 740                         VS_UNLOCK(vs);
 741                         thread_wakeup(&vs->vs_async_pending);
 742                 } else {
 743                         VS_UNLOCK(vs);
 744                 }
 745                 if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list))
 746                         vs = NULL;
 747                 else
 748                         vs = next_vs;
 749                 vs_count--;
 750         }
 751         return KERN_SUCCESS;
 752 }
 753
 754
 755 kern_return_t
 756 default_pager_backing_store_delete(
 757         MACH_PORT_FACE backing_store)
 758 {
 759         backing_store_t         bs;
 760         int                     i;
 761         paging_segment_t        ps;
 762         int                     error;
 763         int                     interim_pages_removed = 0;
 764         kern_return_t           kr;
 765
 766         if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
 767                 return KERN_INVALID_ARGUMENT;
 768
 769 #if 0
 770         /* not implemented */
 771         BS_UNLOCK(bs);
 772         return KERN_FAILURE;
 773 #endif
 774
 775     restart:
 776         PSL_LOCK();
 777         error = KERN_SUCCESS;
 778         for (i = 0; i <= paging_segment_max; i++) {
 779                 ps = paging_segments[i];
 780                 if (ps != PAGING_SEGMENT_NULL &&
 781                     ps->ps_bs == bs &&
 782                     ! ps->ps_going_away) {
 783                         PS_LOCK(ps);
 784                         /* disable access to this segment */
 785                         ps->ps_going_away = TRUE;
 786                         PS_UNLOCK(ps);
 787                         /*
 788                          * The "ps" segment is "off-line" now,
 789                          * we can try and delete it...
 790                          */
 791                         if(dp_pages_free < (cluster_transfer_minimum
 792                                                         + ps->ps_pgcount)) {
 793                                 error = KERN_FAILURE;
 794                                 PSL_UNLOCK();
 795                         }
 796                         else {
 797                                 /* remove all pages associated with the  */
 798                                 /* segment from the list of free pages   */
 799                                 /* when transfer is through, all target  */
 800                                 /* segment pages will appear to be free  */
 801
 802                                 dp_pages_free -=  ps->ps_pgcount;
 803                                 interim_pages_removed += ps->ps_pgcount;
 804                                 PSL_UNLOCK();
 805                                 error = ps_delete(ps);
 806                         }
 807                         if (error != KERN_SUCCESS) {
 808                                 /*
 809                                  * We couldn't delete the segment,
 810                                  * probably because there's not enough
 811                                  * virtual memory left.
 812                                  * Re-enable all the segments.
 813                                  */
 814                                 PSL_LOCK();
 815                                 break;
 816                         }
 817                         goto restart;
 818                 }
 819         }
 820
 821         if (error != KERN_SUCCESS) {
 822                 for (i = 0; i <= paging_segment_max; i++) {
 823                         ps = paging_segments[i];
 824                         if (ps != PAGING_SEGMENT_NULL &&
 825                             ps->ps_bs == bs &&
 826                             ps->ps_going_away) {
 827                                 PS_LOCK(ps);
 828                                 /* re-enable access to this segment */
 829                                 ps->ps_going_away = FALSE;
 830                                 PS_UNLOCK(ps);
 831                         }
 832                 }
 833                 dp_pages_free += interim_pages_removed;
 834                 PSL_UNLOCK();
 835                 BS_UNLOCK(bs);
 836                 return error;
 837         }
 838
 839         for (i = 0; i <= paging_segment_max; i++) {
 840                 ps = paging_segments[i];
 841                 if (ps != PAGING_SEGMENT_NULL &&
 842                     ps->ps_bs == bs) {
 843                         if(ps->ps_going_away) {
 844                                 paging_segments[i] = PAGING_SEGMENT_NULL;
 845                                 paging_segment_count--;
 846                                 PS_LOCK(ps);
 847                                 kfree((vm_offset_t)ps->ps_bmap,
 848                                                 RMAPSIZE(ps->ps_ncls));
 849                                 kfree((vm_offset_t)ps, sizeof *ps);
 850                         }
 851                 }
 852         }
 853
 854         /* Scan the entire ps array separately to make certain we find the */
 855         /* proper paging_segment_max                                       */
 856         for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
 857                 if(paging_segments[i] != PAGING_SEGMENT_NULL)
 858                    paging_segment_max = i;
 859         }
 860
 861         PSL_UNLOCK();
 862
 863         /*
 864          * All the segments have been deleted.
 865          * We can remove the backing store.
 866          */
 867
 868         /*
 869          * Disable lookups of this backing store.
 870          */
 871         if((void *)bs->bs_port->alias != NULL)
 872                 kfree((vm_offset_t) bs->bs_port->alias,
 873                                 sizeof (struct vstruct_alias));
 874         ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
 875         bs->bs_port = MACH_PORT_NULL;
 876         BS_UNLOCK(bs);
 877
 878         /*
 879          * Remove backing store from backing_store list.
 880          */
 881         BSL_LOCK();
 882         queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t,
 883                      bs_links);
 884         BSL_UNLOCK();
 885
 886         /*
 887          * Free the backing store structure.
 888          */
 889         kfree((vm_offset_t)bs, sizeof *bs);
 890
 891         return KERN_SUCCESS;
 892 }
 893
 894 int     ps_enter(paging_segment_t);     /* forward */
 895
 896 int
 897 ps_enter(
 898         paging_segment_t ps)
 899 {
 900         int i;
 901
 902         PSL_LOCK();
 903
 904         for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
 905                 if (paging_segments[i] == PAGING_SEGMENT_NULL)
 906                         break;
 907         }
 908
 909         if (i < MAX_NUM_PAGING_SEGMENTS) {
 910                 paging_segments[i] = ps;
 911                 if (i > paging_segment_max)
 912                         paging_segment_max = i;
 913                 paging_segment_count++;
 914                 if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) ||
 915                         (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI))
 916                         ps_select_array[ps->ps_bs->bs_priority] = 0;
 917                 i = 0;
 918         } else {
 919                 PSL_UNLOCK();
 920                 return KERN_RESOURCE_SHORTAGE;
 921         }
 922
 923         PSL_UNLOCK();
 924         return i;
 925 }
 926
 927 #ifdef DEVICE_PAGING
 928 kern_return_t
 929 default_pager_add_segment(
 930         MACH_PORT_FACE  backing_store,
 931         MACH_PORT_FACE  device,
 932         recnum_t        offset,
 933         recnum_t        count,
 934         int             record_size)
 935 {
 936         backing_store_t         bs;
 937         paging_segment_t        ps;
 938         int                     i;
 939         int                     error;
 940
 941         if ((bs = backing_store_lookup(backing_store))
 942             == BACKING_STORE_NULL)
 943                 return KERN_INVALID_ARGUMENT;
 944
 945         PSL_LOCK();
 946         for (i = 0; i <= paging_segment_max; i++) {
 947                 ps = paging_segments[i];
 948                 if (ps == PAGING_SEGMENT_NULL)
 949                         continue;
 950
 951                 /*
 952                  * Check for overlap on same device.
 953                  */
 954                 if (!(ps->ps_device != device
 955                       || offset >= ps->ps_offset + ps->ps_recnum
 956                       || offset + count <= ps->ps_offset)) {
 957                         PSL_UNLOCK();
 958                         BS_UNLOCK(bs);
 959                         return KERN_INVALID_ARGUMENT;
 960                 }
 961         }
 962         PSL_UNLOCK();
 963
 964         /*
 965          * Set up the paging segment
 966          */
 967         ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
 968         if (ps == PAGING_SEGMENT_NULL) {
 969                 BS_UNLOCK(bs);
 970                 return KERN_RESOURCE_SHORTAGE;
 971         }
 972
 973         ps->ps_segtype = PS_PARTITION;
 974         ps->ps_device = device;
 975         ps->ps_offset = offset;
 976         ps->ps_record_shift = local_log2(vm_page_size / record_size);
 977         ps->ps_recnum = count;
 978         ps->ps_pgnum = count >> ps->ps_record_shift;
 979
 980         ps->ps_pgcount = ps->ps_pgnum;
 981         ps->ps_clshift = local_log2(bs->bs_clsize);
 982         ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
 983         ps->ps_hint = 0;
 984
 985         PS_LOCK_INIT(ps);
 986         ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
 987         if (!ps->ps_bmap) {
 988                 kfree((vm_offset_t)ps, sizeof *ps);
 989                 BS_UNLOCK(bs);
 990                 return KERN_RESOURCE_SHORTAGE;
 991         }
 992         for (i = 0; i < ps->ps_ncls; i++) {
 993                 clrbit(ps->ps_bmap, i);
 994         }
 995
 996         ps->ps_going_away = FALSE;
 997         ps->ps_bs = bs;
 998
 999         if ((error = ps_enter(ps)) != 0) {
1000                 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
1001                 kfree((vm_offset_t)ps, sizeof *ps);
1002                 BS_UNLOCK(bs);
1003                 return KERN_RESOURCE_SHORTAGE;
1004         }
1005
1006         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
1007         bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
1008         BS_UNLOCK(bs);
1009
1010         PSL_LOCK();
1011         dp_pages_free += ps->ps_pgcount;
1012         PSL_UNLOCK();
1013
1014         bs_more_space(ps->ps_clcount);
1015
1016         DEBUG(DEBUG_BS_INTERNAL,
1017               ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1018                device, offset, count, record_size,
1019                ps->ps_record_shift, ps->ps_pgnum));
1020
1021         return KERN_SUCCESS;
1022 }
1023
1024 boolean_t
1025 bs_add_device(
1026         char            *dev_name,
1027         MACH_PORT_FACE  master)
1028 {
1029         security_token_t        null_security_token = {
1030                 { 0, 0 }
1031         };
1032         MACH_PORT_FACE  device;
1033         int             info[DEV_GET_SIZE_COUNT];
1034         mach_msg_type_number_t info_count;
1035         MACH_PORT_FACE  bs = MACH_PORT_NULL;
1036         unsigned int    rec_size;
1037         recnum_t        count;
1038         int             clsize;
1039         MACH_PORT_FACE  reply_port;
1040
1041         if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE,
1042                         null_security_token, dev_name, &device))
1043                 return FALSE;
1044
1045         info_count = DEV_GET_SIZE_COUNT;
1046         if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) {
1047                 rec_size = info[DEV_GET_SIZE_RECORD_SIZE];
1048                 count = info[DEV_GET_SIZE_DEVICE_SIZE] /  rec_size;
1049                 clsize = bs_get_global_clsize(0);
1050                 if (!default_pager_backing_store_create(
1051                                         default_pager_object,
1052                                         DEFAULT_PAGER_BACKING_STORE_MAXPRI,
1053                                         (clsize * vm_page_size),
1054                                         &bs)) {
1055                         if (!default_pager_add_segment(bs, device,
1056                                                        0, count, rec_size)) {
1057                                 return TRUE;
1058                         }
1059                         ipc_port_release_receive(bs);
1060                 }
1061         }
1062
1063         ipc_port_release_send(device);
1064         return FALSE;
1065 }
1066 #endif /* DEVICE_PAGING */
1067
1068 #if     VS_ASYNC_REUSE
1069
1070 struct vs_async *
1071 vs_alloc_async(void)
1072 {
1073         struct vs_async *vsa;
1074         MACH_PORT_FACE  reply_port;
1075         kern_return_t   kr;
1076
1077         VS_ASYNC_LOCK();
1078         if (vs_async_free_list == NULL) {
1079                 VS_ASYNC_UNLOCK();
1080                 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1081                 if (vsa != NULL) {
1082                         /*
1083                          * Try allocating a reply port named after the
1084                          * address of the vs_async structure.
1085                          */
1086                         struct vstruct_alias    *alias_struct;
1087
1088                         reply_port = ipc_port_alloc_kernel();
1089                         alias_struct = (struct vstruct_alias *)
1090                                 kalloc(sizeof (struct vstruct_alias));
1091                         if(alias_struct != NULL) {
1092                                 alias_struct->vs = (struct vstruct *)vsa;
1093                                 alias_struct->name = ISVS;
1094                                 reply_port->alias = (int) alias_struct;
1095                                 vsa->reply_port = reply_port;
1096                                 vs_alloc_async_count++;
1097                         }
1098                         else {
1099                                 vs_alloc_async_failed++;
1100                                 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1101                                                                 (reply_port));
1102                                 kfree((vm_offset_t)vsa,
1103                                                 sizeof (struct vs_async));
1104                                 vsa = NULL;
1105                         }
1106                 }
1107         } else {
1108                 vsa = vs_async_free_list;
1109                 vs_async_free_list = vs_async_free_list->vsa_next;
1110                 VS_ASYNC_UNLOCK();
1111         }
1112
1113         return vsa;
1114 }
1115
1116 void
1117 vs_free_async(
1118         struct vs_async *vsa)
1119 {
1120         VS_ASYNC_LOCK();
1121         vsa->vsa_next = vs_async_free_list;
1122         vs_async_free_list = vsa;
1123         VS_ASYNC_UNLOCK();
1124 }
1125
1126 #else   /* VS_ASYNC_REUSE */
1127
1128 struct vs_async *
1129 vs_alloc_async(void)
1130 {
1131         struct vs_async *vsa;
1132         MACH_PORT_FACE  reply_port;
1133         kern_return_t   kr;
1134
1135         vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1136         if (vsa != NULL) {
1137                 /*
1138                  * Try allocating a reply port named after the
1139                  * address of the vs_async structure.
1140                  */
1141                         reply_port = ipc_port_alloc_kernel();
1142                         alias_struct = (vstruct_alias *)
1143                                 kalloc(sizeof (struct vstruct_alias));
1144                         if(alias_struct != NULL) {
1145                                 alias_struct->vs = reply_port;
1146                                 alias_struct->name = ISVS;
1147                                 reply_port->alias = (int) vsa;
1148                                 vsa->reply_port = reply_port;
1149                                 vs_alloc_async_count++;
1150                         }
1151                         else {
1152                                 vs_alloc_async_failed++;
1153                                 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1154                                                                 (reply_port));
1155                                 kfree((vm_offset_t) vsa,
1156                                                 sizeof (struct vs_async));
1157                                 vsa = NULL;
1158                         }
1159         }
1160
1161         return vsa;
1162 }
1163
1164 void
1165 vs_free_async(
1166         struct vs_async *vsa)
1167 {
1168         MACH_PORT_FACE  reply_port;
1169         kern_return_t   kr;
1170
1171         reply_port = vsa->reply_port;
1172         kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
1173         kfree((vm_offset_t) vsa, sizeof (struct vs_async));
1174         ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
1175 #if 0
1176         VS_ASYNC_LOCK();
1177         vs_alloc_async_count--;
1178         VS_ASYNC_UNLOCK();
1179 #endif
1180 }
1181
1182 #endif  /* VS_ASYNC_REUSE */
1183
1184 zone_t  vstruct_zone;
1185
1186 vstruct_t
1187 ps_vstruct_create(
1188         vm_size_t size)
1189 {
1190         vstruct_t       vs;
1191         int             i;
1192
1193         vs = (vstruct_t) zalloc(vstruct_zone);
1194         if (vs == VSTRUCT_NULL) {
1195                 return VSTRUCT_NULL;
1196         }
1197
1198         VS_LOCK_INIT(vs);
1199
1200         /*
1201          * The following fields will be provided later.
1202          */
1203         vs->vs_mem_obj = NULL;
1204         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1205         vs->vs_references = 1;
1206         vs->vs_seqno = 0;
1207
1208 #ifdef MACH_KERNEL
1209         vs->vs_waiting_seqno = FALSE;
1210         vs->vs_waiting_read = FALSE;
1211         vs->vs_waiting_write = FALSE;
1212         vs->vs_waiting_async = FALSE;
1213 #else
1214         mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
1215         mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD);
1216         mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE);
1217         mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS);
1218         mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC);
1219 #endif
1220
1221         vs->vs_readers = 0;
1222         vs->vs_writers = 0;
1223
1224         vs->vs_errors = 0;
1225
1226         vs->vs_clshift = local_log2(bs_get_global_clsize(0));
1227         vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
1228         vs->vs_async_pending = 0;
1229
1230         /*
1231          * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1232          * depending on the size of the memory object.
1233          */
1234         if (INDIRECT_CLMAP(vs->vs_size)) {
1235                 vs->vs_imap = (struct vs_map **)
1236                         kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size));
1237                 vs->vs_indirect = TRUE;
1238         } else {
1239                 vs->vs_dmap = (struct vs_map *)
1240                         kalloc(CLMAP_SIZE(vs->vs_size));
1241                 vs->vs_indirect = FALSE;
1242         }
1243         vs->vs_xfer_pending = FALSE;
1244         DEBUG(DEBUG_VS_INTERNAL,
1245               ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
1246
1247         /*
1248          * Check to see that we got the space.
1249          */
1250         if (!vs->vs_dmap) {
1251                 kfree((vm_offset_t)vs, sizeof *vs);
1252                 return VSTRUCT_NULL;
1253         }
1254
1255         /*
1256          * Zero the indirect pointers, or clear the direct pointers.
1257          */
1258         if (vs->vs_indirect)
1259                 memset(vs->vs_imap, 0,
1260                        INDIRECT_CLMAP_SIZE(vs->vs_size));
1261         else
1262                 for (i = 0; i < vs->vs_size; i++)
1263                         VSM_CLR(vs->vs_dmap[i]);
1264
1265         VS_MAP_LOCK_INIT(vs);
1266
1267         bs_commit(vs->vs_size);
1268
1269         return vs;
1270 }
1271
1272 paging_segment_t ps_select_segment(int, int *); /* forward */
1273
1274 paging_segment_t
1275 ps_select_segment(
1276         int     shift,
1277         int     *psindex)
1278 {
1279         paging_segment_t        ps;
1280         int                     i;
1281         int                     j;
1282
1283         /*
1284          * Optimize case where there's only one segment.
1285          * paging_segment_max will index the one and only segment.
1286          */
1287
1288         PSL_LOCK();
1289         if (paging_segment_count == 1) {
1290                 paging_segment_t lps;   /* used to avoid extra PS_UNLOCK */
1291                 ipc_port_t trigger = IP_NULL;
1292
1293                 ps = paging_segments[paging_segment_max];
1294                 *psindex = paging_segment_max;
1295                 PS_LOCK(ps);
1296                 if (ps->ps_going_away) {
1297                         /* this segment is being turned off */
1298                         lps = PAGING_SEGMENT_NULL;
1299                 } else {
1300                         ASSERT(ps->ps_clshift >= shift);
1301                         if (ps->ps_clcount) {
1302                                 ps->ps_clcount--;
1303                                 dp_pages_free -=  1 << ps->ps_clshift;
1304                                 if(min_pages_trigger_port &&
1305                                   (dp_pages_free < minimum_pages_remaining)) {
1306                                         trigger = min_pages_trigger_port;
1307                                         min_pages_trigger_port = NULL;
1308                                         bs_low = TRUE;
1309                                 }
1310                                 lps = ps;
1311                         } else
1312                                 lps = PAGING_SEGMENT_NULL;
1313                 }
1314                 PS_UNLOCK(ps);
1315                 PSL_UNLOCK();
1316
1317                 if (trigger != IP_NULL) {
1318                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1319                         ipc_port_release_send(trigger);
1320                 }
1321                 return lps;
1322         }
1323
1324         if (paging_segment_count == 0) {
1325                 PSL_UNLOCK();
1326                 return PAGING_SEGMENT_NULL;
1327         }
1328
1329         for (i = BS_MAXPRI;
1330              i >= BS_MINPRI; i--) {
1331                 int start_index;
1332
1333                 if ((ps_select_array[i] == BS_NOPRI) ||
1334                                 (ps_select_array[i] == BS_FULLPRI))
1335                         continue;
1336                 start_index = ps_select_array[i];
1337
1338                 if(!(paging_segments[start_index])) {
1339                         j = start_index+1;
1340                         physical_transfer_cluster_count = 0;
1341                 }
1342                 else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >>
1343                                 (((paging_segments[start_index])->ps_clshift)
1344                                 + vm_page_shift))) {
1345                         physical_transfer_cluster_count = 0;
1346                         j = start_index + 1;
1347                 } else {
1348                         physical_transfer_cluster_count+=1;
1349                         j = start_index;
1350                         if(start_index == 0)
1351                                 start_index = paging_segment_max;
1352                         else
1353                                 start_index = start_index - 1;
1354                 }
1355
1356                 while (1) {
1357                         if (j > paging_segment_max)
1358                                 j = 0;
1359                         if ((ps = paging_segments[j]) &&
1360                             (ps->ps_bs->bs_priority == i)) {
1361                                 /*
1362                                  * Force the ps cluster size to be
1363                                  * >= that of the vstruct.
1364                                  */
1365                                 PS_LOCK(ps);
1366                                 if (ps->ps_going_away) {
1367                                         /* this segment is being turned off */
1368                                 } else if ((ps->ps_clcount) &&
1369                                            (ps->ps_clshift >= shift)) {
1370                                         ipc_port_t trigger = IP_NULL;
1371
1372                                         ps->ps_clcount--;
1373                                         dp_pages_free -=  1 << ps->ps_clshift;
1374                                         if(min_pages_trigger_port &&
1375                                                 (dp_pages_free <
1376                                                 minimum_pages_remaining)) {
1377                                                 trigger = min_pages_trigger_port;
1378                                                 min_pages_trigger_port = NULL;
1379                                         }
1380                                         PS_UNLOCK(ps);
1381                                         /*
1382                                          * found one, quit looking.
1383                                          */
1384                                         ps_select_array[i] = j;
1385                                         PSL_UNLOCK();
1386
1387                                         if (trigger != IP_NULL) {
1388                                                 default_pager_space_alert(
1389                                                         trigger,
1390                                                         HI_WAT_ALERT);
1391                                                 ipc_port_release_send(trigger);
1392                                         }
1393                                         *psindex = j;
1394                                         return ps;
1395                                 }
1396                                 PS_UNLOCK(ps);
1397                         }
1398                         if (j == start_index) {
1399                                 /*
1400                                  * none at this priority -- mark it full
1401                                  */
1402                                 ps_select_array[i] = BS_FULLPRI;
1403                                 break;
1404                         }
1405                         j++;
1406                 }
1407         }
1408         PSL_UNLOCK();
1409         return PAGING_SEGMENT_NULL;
1410 }
1411
1412 vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
1413
1414 vm_offset_t
1415 ps_allocate_cluster(
1416         vstruct_t               vs,
1417         int                     *psindex,
1418         paging_segment_t        use_ps)
1419 {
1420         int                     byte_num;
1421         int                     bit_num = 0;
1422         paging_segment_t        ps;
1423         vm_offset_t             cluster;
1424         ipc_port_t              trigger = IP_NULL;
1425
1426         /*
1427          * Find best paging segment.
1428          * ps_select_segment will decrement cluster count on ps.
1429          * Must pass cluster shift to find the most appropriate segment.
1430          */
1431         /* NOTE:  The addition of paging segment delete capability threatened
1432          * to seriously complicate the treatment of paging segments in this
1433          * module and the ones that call it (notably ps_clmap), because of the
1434          * difficulty in assuring that the paging segment would continue to
1435          * exist between being unlocked and locked.   This was
1436          * avoided because all calls to this module are based in either
1437          * dp_memory_object calls which rely on the vs lock, or by
1438          * the transfer function which is part of the segment delete path.
1439          * The transfer function which is part of paging segment delete is
1440          * protected from multiple callers by the backing store lock.
1441          * The paging segment delete function treats mappings to a paging
1442          * segment on a vstruct by vstruct basis, locking the vstruct targeted
1443          * while data is transferred to the remaining segments.  This is in
1444          * line with the view that incomplete or in-transition mappings between
1445          * data, a vstruct, and backing store are protected by the vs lock.
1446          * This and the ordering of the paging segment "going_away" bit setting
1447          * protects us.
1448          */
1449         if (use_ps != PAGING_SEGMENT_NULL) {
1450                 ps = use_ps;
1451                 PSL_LOCK();
1452                 PS_LOCK(ps);
1453
1454                 ASSERT(ps->ps_clcount != 0);
1455
1456                 ps->ps_clcount--;
1457                 dp_pages_free -=  1 << ps->ps_clshift;
1458                 if(min_pages_trigger_port &&
1459                                 (dp_pages_free < minimum_pages_remaining)) {
1460                         trigger = min_pages_trigger_port;
1461                         min_pages_trigger_port = NULL;
1462                 }
1463                 PSL_UNLOCK();
1464                 PS_UNLOCK(ps);
1465                 if (trigger != IP_NULL) {
1466                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1467                         ipc_port_release_send(trigger);
1468                 }
1469
1470         } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
1471                    PAGING_SEGMENT_NULL) {
1472 #if 0
1473                 bs_no_paging_space(TRUE);
1474 #endif
1475 #if 0
1476                 if (verbose)
1477 #endif
1478                         dprintf(("no space in available paging segments; "
1479                                  "swapon suggested\n"));
1480                 /* the count got off maybe, reset to zero */
1481                 PSL_LOCK();
1482                 dp_pages_free = 0;
1483                 if(min_pages_trigger_port) {
1484                         trigger = min_pages_trigger_port;
1485                         min_pages_trigger_port = NULL;
1486                         bs_low = TRUE;
1487                 }
1488                 PSL_UNLOCK();
1489                 if (trigger != IP_NULL) {
1490                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1491                         ipc_port_release_send(trigger);
1492                 }
1493                 return (vm_offset_t) -1;
1494         }
1495
1496         /*
1497          * Look for an available cluster.  At the end of the loop,
1498          * byte_num is the byte offset and bit_num is the bit offset of the
1499          * first zero bit in the paging segment bitmap.
1500          */
1501         PS_LOCK(ps);
1502         byte_num = ps->ps_hint;
1503         for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) {
1504                 if (*(ps->ps_bmap + byte_num) != BYTEMASK) {
1505                         for (bit_num = 0; bit_num < NBBY; bit_num++) {
1506                                 if (isclr((ps->ps_bmap + byte_num), bit_num))
1507                                         break;
1508                         }
1509                         ASSERT(bit_num != NBBY);
1510                         break;
1511                 }
1512         }
1513         ps->ps_hint = byte_num;
1514         cluster = (byte_num*NBBY) + bit_num;
1515
1516         /* Space was reserved, so this must be true */
1517         ASSERT(cluster < ps->ps_ncls);
1518
1519         setbit(ps->ps_bmap, cluster);
1520         PS_UNLOCK(ps);
1521
1522         return cluster;
1523 }
1524
1525 void ps_deallocate_cluster(paging_segment_t, vm_offset_t);      /* forward */
1526
1527 void
1528 ps_deallocate_cluster(
1529         paging_segment_t        ps,
1530         vm_offset_t             cluster)
1531 {
1532
1533         if (cluster >= (vm_offset_t) ps->ps_ncls)
1534                 panic("ps_deallocate_cluster: Invalid cluster number");
1535
1536         /*
1537          * Lock the paging segment, clear the cluster's bitmap and increment the
1538          * number of free cluster.
1539          */
1540         PSL_LOCK();
1541         PS_LOCK(ps);
1542         clrbit(ps->ps_bmap, cluster);
1543         ++ps->ps_clcount;
1544         dp_pages_free +=  1 << ps->ps_clshift;
1545         PSL_UNLOCK();
1546
1547         /*
1548          * Move the hint down to the freed cluster if it is
1549          * less than the current hint.
1550          */
1551         if ((cluster/NBBY) < ps->ps_hint) {
1552                 ps->ps_hint = (cluster/NBBY);
1553         }
1554
1555         PS_UNLOCK(ps);
1556
1557         /*
1558          * If we're freeing space on a full priority, reset the array.
1559          */
1560         PSL_LOCK();
1561         if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
1562                 ps_select_array[ps->ps_bs->bs_priority] = 0;
1563         PSL_UNLOCK();
1564
1565         return;
1566 }
1567
1568 void ps_dealloc_vsmap(struct vs_map *, vm_size_t);      /* forward */
1569
1570 void
1571 ps_dealloc_vsmap(
1572         struct vs_map   *vsmap,
1573         vm_size_t       size)
1574 {
1575         int i;
1576         for (i = 0; i < size; i++)
1577                 if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
1578                         ps_deallocate_cluster(VSM_PS(vsmap[i]),
1579                                               VSM_CLOFF(vsmap[i]));
1580 }
1581
1582 void
1583 ps_vstruct_dealloc(
1584         vstruct_t vs)
1585 {
1586         int     i;
1587         spl_t   s;
1588
1589         VS_MAP_LOCK(vs);
1590
1591         /*
1592          * If this is an indirect structure, then we walk through the valid
1593          * (non-zero) indirect pointers and deallocate the clusters
1594          * associated with each used map entry (via ps_dealloc_vsmap).
1595          * When all of the clusters in an indirect block have been
1596          * freed, we deallocate the block.  When all of the indirect
1597          * blocks have been deallocated we deallocate the memory
1598          * holding the indirect pointers.
1599          */
1600         if (vs->vs_indirect) {
1601                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
1602                         if (vs->vs_imap[i] != NULL) {
1603                                 ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
1604                                 kfree((vm_offset_t)vs->vs_imap[i],
1605                                                         CLMAP_THRESHOLD);
1606                         }
1607                 }
1608                 kfree((vm_offset_t)vs->vs_imap,
1609                                         INDIRECT_CLMAP_SIZE(vs->vs_size));
1610         } else {
1611                 /*
1612                  * Direct map.  Free used clusters, then memory.
1613                  */
1614                 ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
1615                 kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
1616         }
1617         VS_MAP_UNLOCK(vs);
1618
1619         bs_commit(- vs->vs_size);
1620
1621         zfree(vstruct_zone, (vm_offset_t)vs);
1622 }
1623
1624 int ps_map_extend(vstruct_t, int);      /* forward */
1625
1626 int ps_map_extend(
1627         vstruct_t       vs,
1628         int             new_size)
1629 {
1630         struct vs_map   **new_imap;
1631         struct vs_map   *new_dmap = NULL;
1632         int             newdsize;
1633         int             i;
1634         void            *old_map = NULL;
1635         int             old_map_size = 0;
1636
1637         if (vs->vs_size >= new_size) {
1638                 /*
1639                  * Someone has already done the work.
1640                  */
1641                 return 0;
1642         }
1643
1644         /*
1645          * If the new size extends into the indirect range, then we have one
1646          * of two cases: we are going from indirect to indirect, or we are
1647          * going from direct to indirect.  If we are going from indirect to
1648          * indirect, then it is possible that the new size will fit in the old
1649          * indirect map.  If this is the case, then just reset the size of the
1650          * vstruct map and we are done.  If the new size will not
1651          * fit into the old indirect map, then we have to allocate a new
1652          * indirect map and copy the old map pointers into this new map.
1653          *
1654          * If we are going from direct to indirect, then we have to allocate a
1655          * new indirect map and copy the old direct pages into the first
1656          * indirect page of the new map.
1657          * NOTE: allocating memory here is dangerous, as we're in the
1658          * pageout path.
1659          */
1660         if (INDIRECT_CLMAP(new_size)) {
1661                 int new_map_size = INDIRECT_CLMAP_SIZE(new_size);
1662
1663                 /*
1664                  * Get a new indirect map and zero it.
1665                  */
1666                 old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size);
1667                 if (vs->vs_indirect &&
1668                     (new_map_size == old_map_size)) {
1669                         bs_commit(new_size - vs->vs_size);
1670                         vs->vs_size = new_size;
1671                         return 0;
1672                 }
1673
1674                 new_imap = (struct vs_map **)kalloc(new_map_size);
1675                 if (new_imap == NULL) {
1676                         return -1;
1677                 }
1678                 memset(new_imap, 0, new_map_size);
1679
1680                 if (vs->vs_indirect) {
1681                         /* Copy old entries into new map */
1682                         memcpy(new_imap, vs->vs_imap, old_map_size);
1683                         /* Arrange to free the old map */
1684                         old_map = (void *) vs->vs_imap;
1685                         newdsize = 0;
1686                 } else {        /* Old map was a direct map */
1687                         /* Allocate an indirect page */
1688                         if ((new_imap[0] = (struct vs_map *)
1689                              kalloc(CLMAP_THRESHOLD)) == NULL) {
1690                                 kfree((vm_offset_t)new_imap, new_map_size);
1691                                 return -1;
1692                         }
1693                         new_dmap = new_imap[0];
1694                         newdsize = CLMAP_ENTRIES;
1695                 }
1696         } else {
1697                 new_imap = NULL;
1698                 newdsize = new_size;
1699                 /*
1700                  * If the new map is a direct map, then the old map must
1701                  * also have been a direct map.  All we have to do is
1702                  * to allocate a new direct map, copy the old entries
1703                  * into it and free the old map.
1704                  */
1705                 if ((new_dmap = (struct vs_map *)
1706                      kalloc(CLMAP_SIZE(new_size))) == NULL) {
1707                         return -1;
1708                 }
1709         }
1710         if (newdsize) {
1711
1712                 /* Free the old map */
1713                 old_map = (void *) vs->vs_dmap;
1714                 old_map_size = CLMAP_SIZE(vs->vs_size);
1715
1716                 /* Copy info from the old map into the new map */
1717                 memcpy(new_dmap, vs->vs_dmap, old_map_size);
1718
1719                 /* Initialize the rest of the new map */
1720                 for (i = vs->vs_size; i < newdsize; i++)
1721                         VSM_CLR(new_dmap[i]);
1722         }
1723         if (new_imap) {
1724                 vs->vs_imap = new_imap;
1725                 vs->vs_indirect = TRUE;
1726         } else
1727                 vs->vs_dmap = new_dmap;
1728         bs_commit(new_size - vs->vs_size);
1729         vs->vs_size = new_size;
1730         if (old_map)
1731                 kfree((vm_offset_t)old_map, old_map_size);
1732         return 0;
1733 }
1734
1735 vm_offset_t
1736 ps_clmap(
1737         vstruct_t       vs,
1738         vm_offset_t     offset,
1739         struct clmap    *clmap,
1740         int             flag,
1741         vm_size_t       size,
1742         int             error)
1743 {
1744         vm_offset_t     cluster;        /* The cluster of offset.       */
1745         vm_offset_t     newcl;          /* The new cluster allocated.   */
1746         vm_offset_t     newoff;
1747         int             i;
1748         struct vs_map   *vsmap;
1749
1750         VS_MAP_LOCK(vs);
1751
1752         ASSERT(vs->vs_dmap);
1753         cluster = atop_32(offset) >> vs->vs_clshift;
1754
1755         /*
1756          * Initialize cluster error value
1757          */
1758         clmap->cl_error = 0;
1759
1760         /*
1761          * If the object has grown, extend the page map.
1762          */
1763         if (cluster >= vs->vs_size) {
1764                 if (flag == CL_FIND) {
1765                         /* Do not allocate if just doing a lookup */
1766                         VS_MAP_UNLOCK(vs);
1767                         return (vm_offset_t) -1;
1768                 }
1769                 if (ps_map_extend(vs, cluster + 1)) {
1770                         VS_MAP_UNLOCK(vs);
1771                         return (vm_offset_t) -1;
1772                 }
1773         }
1774
1775         /*
1776          * Look for the desired cluster.  If the map is indirect, then we
1777          * have a two level lookup.  First find the indirect block, then
1778          * find the actual cluster.  If the indirect block has not yet
1779          * been allocated, then do so.  If the cluster has not yet been
1780          * allocated, then do so.
1781          *
1782          * If any of the allocations fail, then return an error.
1783          * Don't allocate if just doing a lookup.
1784          */
1785         if (vs->vs_indirect) {
1786                 long    ind_block = cluster/CLMAP_ENTRIES;
1787
1788                 /* Is the indirect block allocated? */
1789                 vsmap = vs->vs_imap[ind_block];
1790                 if (vsmap == NULL) {
1791                         if (flag == CL_FIND) {
1792                                 VS_MAP_UNLOCK(vs);
1793                                 return (vm_offset_t) -1;
1794                         }
1795
1796                         /* Allocate the indirect block */
1797                         vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
1798                         if (vsmap == NULL) {
1799                                 VS_MAP_UNLOCK(vs);
1800                                 return (vm_offset_t) -1;
1801                         }
1802                         /* Initialize the cluster offsets */
1803                         for (i = 0; i < CLMAP_ENTRIES; i++)
1804                                 VSM_CLR(vsmap[i]);
1805                         vs->vs_imap[ind_block] = vsmap;
1806                 }
1807         } else
1808                 vsmap = vs->vs_dmap;
1809
1810         ASSERT(vsmap);
1811         vsmap += cluster%CLMAP_ENTRIES;
1812
1813         /*
1814          * At this point, vsmap points to the struct vs_map desired.
1815          *
1816          * Look in the map for the cluster, if there was an error on a
1817          * previous write, flag it and return.  If it is not yet
1818          * allocated, then allocate it, if we're writing; if we're
1819          * doing a lookup and the cluster's not allocated, return error.
1820          */
1821         if (VSM_ISERR(*vsmap)) {
1822                 clmap->cl_error = VSM_GETERR(*vsmap);
1823                 VS_MAP_UNLOCK(vs);
1824                 return (vm_offset_t) -1;
1825         } else if (VSM_ISCLR(*vsmap)) {
1826                 int psindex;
1827
1828                 if (flag == CL_FIND) {
1829                         /*
1830                          * If there's an error and the entry is clear, then
1831                          * we've run out of swap space.  Record the error
1832                          * here and return.
1833                          */
1834                         if (error) {
1835                                 VSM_SETERR(*vsmap, error);
1836                         }
1837                         VS_MAP_UNLOCK(vs);
1838                         return (vm_offset_t) -1;
1839                 } else {
1840                         /*
1841                          * Attempt to allocate a cluster from the paging segment
1842                          */
1843                         newcl = ps_allocate_cluster(vs, &psindex,
1844                                                     PAGING_SEGMENT_NULL);
1845                         if (newcl == -1) {
1846                                 VS_MAP_UNLOCK(vs);
1847                                 return (vm_offset_t) -1;
1848                         }
1849                         VSM_CLR(*vsmap);
1850                         VSM_SETCLOFF(*vsmap, newcl);
1851                         VSM_SETPS(*vsmap, psindex);
1852                 }
1853         } else
1854                 newcl = VSM_CLOFF(*vsmap);
1855
1856         /*
1857          * Fill in pertinent fields of the clmap
1858          */
1859         clmap->cl_ps = VSM_PS(*vsmap);
1860         clmap->cl_numpages = VSCLSIZE(vs);
1861         clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap);
1862
1863         /*
1864          * Byte offset in paging segment is byte offset to cluster plus
1865          * byte offset within cluster.  It looks ugly, but should be
1866          * relatively quick.
1867          */
1868         ASSERT(trunc_page(offset) == offset);
1869         newcl = ptoa_32(newcl) << vs->vs_clshift;
1870         newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
1871         if (flag == CL_ALLOC) {
1872                 /*
1873                  * set bits in the allocation bitmap according to which
1874                  * pages were requested.  size is in bytes.
1875                  */
1876                 i = atop_32(newoff);
1877                 while ((size > 0) && (i < VSCLSIZE(vs))) {
1878                         VSM_SETALLOC(*vsmap, i);
1879                         i++;
1880                         size -= vm_page_size;
1881                 }
1882         }
1883         clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap);
1884         if (newoff) {
1885                 /*
1886                  * Offset is not cluster aligned, so number of pages
1887                  * and bitmaps must be adjusted
1888                  */
1889                 clmap->cl_numpages -= atop_32(newoff);
1890                 CLMAP_SHIFT(clmap, vs);
1891                 CLMAP_SHIFTALLOC(clmap, vs);
1892         }
1893
1894         /*
1895          *
1896          * The setting of valid bits and handling of write errors
1897          * must be done here, while we hold the lock on the map.
1898          * It logically should be done in ps_vs_write_complete().
1899          * The size and error information has been passed from
1900          * ps_vs_write_complete().  If the size parameter is non-zero,
1901          * then there is work to be done.  If error is also non-zero,
1902          * then the error number is recorded in the cluster and the
1903          * entire cluster is in error.
1904          */
1905         if (size && flag == CL_FIND) {
1906                 vm_offset_t off = (vm_offset_t) 0;
1907
1908                 if (!error) {
1909                         for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
1910                              i++) {
1911                                 VSM_SETPG(*vsmap, i);
1912                                 size -= vm_page_size;
1913                         }
1914                         ASSERT(i <= VSCLSIZE(vs));
1915                 } else {
1916                         BS_STAT(clmap->cl_ps->ps_bs,
1917                                 clmap->cl_ps->ps_bs->bs_pages_out_fail +=
1918                                         atop_32(size));
1919                         off = VSM_CLOFF(*vsmap);
1920                         VSM_SETERR(*vsmap, error);
1921                 }
1922                 /*
1923                  * Deallocate cluster if error, and no valid pages
1924                  * already present.
1925                  */
1926                 if (off != (vm_offset_t) 0)
1927                         ps_deallocate_cluster(clmap->cl_ps, off);
1928                 VS_MAP_UNLOCK(vs);
1929                 return (vm_offset_t) 0;
1930         } else
1931                 VS_MAP_UNLOCK(vs);
1932
1933         DEBUG(DEBUG_VS_INTERNAL,
1934               ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1935                newcl+newoff, (int) vs, (int) vsmap, flag));
1936         DEBUG(DEBUG_VS_INTERNAL,
1937               ("        clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1938                (int) clmap->cl_ps, clmap->cl_numpages,
1939                (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
1940
1941         return (newcl + newoff);
1942 }
1943
1944 void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t);     /* forward */
1945
1946 void
1947 ps_clunmap(
1948         vstruct_t       vs,
1949         vm_offset_t     offset,
1950         vm_size_t       length)
1951 {
1952         vm_offset_t             cluster; /* The cluster number of offset */
1953         struct vs_map           *vsmap;
1954
1955         VS_MAP_LOCK(vs);
1956
1957         /*
1958          * Loop through all clusters in this range, freeing paging segment
1959          * clusters and map entries as encountered.
1960          */
1961         while (length > 0) {
1962                 vm_offset_t     newoff;
1963                 int             i;
1964
1965                 cluster = atop_32(offset) >> vs->vs_clshift;
1966                 if (vs->vs_indirect)    /* indirect map */
1967                         vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
1968                 else
1969                         vsmap = vs->vs_dmap;
1970                 if (vsmap == NULL) {
1971                         VS_MAP_UNLOCK(vs);
1972                         return;
1973                 }
1974                 vsmap += cluster%CLMAP_ENTRIES;
1975                 if (VSM_ISCLR(*vsmap)) {
1976                         length -= vm_page_size;
1977                         offset += vm_page_size;
1978                         continue;
1979                 }
1980                 /*
1981                  * We've got a valid mapping.  Clear it and deallocate
1982                  * paging segment cluster pages.
1983                  * Optimize for entire cluster cleraing.
1984                  */
1985                 if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) {
1986                         /*
1987                          * Not cluster aligned.
1988                          */
1989                         ASSERT(trunc_page(newoff) == newoff);
1990                         i = atop_32(newoff);
1991                 } else
1992                         i = 0;
1993                 while ((i < VSCLSIZE(vs)) && (length > 0)) {
1994                         VSM_CLRPG(*vsmap, i);
1995                         VSM_CLRALLOC(*vsmap, i);
1996                         length -= vm_page_size;
1997                         offset += vm_page_size;
1998                         i++;
1999                 }
2000
2001                 /*
2002                  * If map entry is empty, clear and deallocate cluster.
2003                  */
2004                 if (!VSM_ALLOC(*vsmap)) {
2005                         ps_deallocate_cluster(VSM_PS(*vsmap),
2006                                               VSM_CLOFF(*vsmap));
2007                         VSM_CLR(*vsmap);
2008                 }
2009         }
2010
2011         VS_MAP_UNLOCK(vs);
2012 }
2013
2014 void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
2015
2016 void
2017 ps_vs_write_complete(
2018         vstruct_t       vs,
2019         vm_offset_t     offset,
2020         vm_size_t       size,
2021         int             error)
2022 {
2023         struct clmap    clmap;
2024
2025         /*
2026          * Get the struct vsmap for this cluster.
2027          * Use READ, even though it was written, because the
2028          * cluster MUST be present, unless there was an error
2029          * in the original ps_clmap (e.g. no space), in which
2030          * case, nothing happens.
2031          *
2032          * Must pass enough information to ps_clmap to allow it
2033          * to set the vs_map structure bitmap under lock.
2034          */
2035         (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
2036 }
2037
2038 void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int);    /* forward */
2039
2040 void
2041 vs_cl_write_complete(
2042         vstruct_t               vs,
2043         paging_segment_t        ps,
2044         vm_offset_t             offset,
2045         vm_offset_t             addr,
2046         vm_size_t               size,
2047         boolean_t               async,
2048         int                     error)
2049 {
2050         kern_return_t   kr;
2051
2052         if (error) {
2053                 /*
2054                  * For internal objects, the error is recorded on a
2055                  * per-cluster basis by ps_clmap() which is called
2056                  * by ps_vs_write_complete() below.
2057                  */
2058                 dprintf(("write failed error = 0x%x\n", error));
2059                 /* add upl_abort code here */
2060         } else
2061                 GSTAT(global_stats.gs_pages_out += atop_32(size));
2062         /*
2063          * Notify the vstruct mapping code, so it can do its accounting.
2064          */
2065         ps_vs_write_complete(vs, offset, size, error);
2066
2067         if (async) {
2068                 VS_LOCK(vs);
2069                 ASSERT(vs->vs_async_pending > 0);
2070                 vs->vs_async_pending -= size;
2071                 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
2072                         vs->vs_waiting_async = FALSE;
2073                         VS_UNLOCK(vs);
2074                         /* mutex_unlock(&vs->vs_waiting_async); */
2075                         thread_wakeup(&vs->vs_async_pending);
2076                 } else {
2077                         VS_UNLOCK(vs);
2078                 }
2079         }
2080 }
2081
2082 #ifdef DEVICE_PAGING
2083 kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2084
2085 kern_return_t
2086 device_write_reply(
2087         MACH_PORT_FACE  reply_port,
2088         kern_return_t   device_code,
2089         io_buf_len_t    bytes_written)
2090 {
2091         struct vs_async *vsa;
2092
2093         vsa = (struct vs_async *)
2094                 ((struct vstruct_alias *)(reply_port->alias))->vs;
2095
2096         if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
2097                 device_code = KERN_FAILURE;
2098         }
2099
2100         vsa->vsa_error = device_code;
2101
2102
2103         ASSERT(vsa->vsa_vs != VSTRUCT_NULL);
2104         if(vsa->vsa_flags & VSA_TRANSFER) {
2105                 /* revisit when async disk segments redone */
2106                 if(vsa->vsa_error) {
2107                    /* need to consider error condition.  re-write data or */
2108                    /* throw it away here. */
2109                    vm_offset_t  ioaddr;
2110                    if(vm_map_copyout(kernel_map, &ioaddr,
2111                                  (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS)
2112                    panic("vs_cluster_write: unable to copy source list\n");
2113                    vm_deallocate(kernel_map, ioaddr, vsa->vsa_size);
2114                 }
2115                 ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
2116                                                 vsa->vsa_size, vsa->vsa_error);
2117         } else {
2118                 vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset,
2119                              vsa->vsa_addr, vsa->vsa_size, TRUE,
2120                              vsa->vsa_error);
2121         }
2122         VS_FREE_ASYNC(vsa);
2123
2124         return KERN_SUCCESS;
2125 }
2126
2127 kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2128 kern_return_t
2129 device_write_reply_inband(
2130         MACH_PORT_FACE          reply_port,
2131         kern_return_t           return_code,
2132         io_buf_len_t            bytes_written)
2133 {
2134         panic("device_write_reply_inband: illegal");
2135         return KERN_SUCCESS;
2136 }
2137
2138 kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t);
2139 kern_return_t
2140 device_read_reply(
2141         MACH_PORT_FACE          reply_port,
2142         kern_return_t           return_code,
2143         io_buf_ptr_t            data,
2144         mach_msg_type_number_t  dataCnt)
2145 {
2146         struct vs_async *vsa;
2147         vsa = (struct vs_async *)
2148                 ((struct vstruct_alias *)(reply_port->alias))->vs;
2149         vsa->vsa_addr = (vm_offset_t)data;
2150         vsa->vsa_size = (vm_size_t)dataCnt;
2151         vsa->vsa_error = return_code;
2152         thread_wakeup(&vsa->vsa_lock);
2153         return KERN_SUCCESS;
2154 }
2155
2156 kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t);
2157 kern_return_t
2158 device_read_reply_inband(
2159         MACH_PORT_FACE          reply_port,
2160         kern_return_t           return_code,
2161         io_buf_ptr_inband_t     data,
2162         mach_msg_type_number_t  dataCnt)
2163 {
2164         panic("device_read_reply_inband: illegal");
2165         return KERN_SUCCESS;
2166 }
2167
2168 kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2169 kern_return_t
2170 device_read_reply_overwrite(
2171         MACH_PORT_FACE          reply_port,
2172         kern_return_t           return_code,
2173         io_buf_len_t            bytes_read)
2174 {
2175         panic("device_read_reply_overwrite: illegal\n");
2176         return KERN_SUCCESS;
2177 }
2178
2179 kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE);
2180 kern_return_t
2181 device_open_reply(
2182         MACH_PORT_FACE          reply_port,
2183         kern_return_t           return_code,
2184         MACH_PORT_FACE          device_port)
2185 {
2186         panic("device_open_reply: illegal\n");
2187         return KERN_SUCCESS;
2188 }
2189
2190 kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int);  /* forward */
2191
2192 kern_return_t
2193 ps_read_device(
2194         paging_segment_t        ps,
2195         vm_offset_t             offset,
2196         vm_offset_t             *bufferp,
2197         unsigned int            size,
2198         unsigned int            *residualp,
2199         int                     flags)
2200 {
2201         kern_return_t   kr;
2202         recnum_t        dev_offset;
2203         unsigned int    bytes_wanted;
2204         unsigned int    bytes_read;
2205         unsigned int    total_read;
2206         vm_offset_t     dev_buffer;
2207         vm_offset_t     buf_ptr;
2208         unsigned int    records_read;
2209         struct vs_async *vsa;
2210         mutex_t vs_waiting_read_reply;
2211
2212         device_t        device;
2213         vm_map_copy_t   device_data = NULL;
2214         default_pager_thread_t *dpt = NULL;
2215
2216         device = dev_port_lookup(ps->ps_device);
2217         clustered_reads[atop_32(size)]++;
2218
2219         dev_offset = (ps->ps_offset +
2220                       (offset >> (vm_page_shift - ps->ps_record_shift)));
2221         bytes_wanted = size;
2222         total_read = 0;
2223         *bufferp = (vm_offset_t)NULL;
2224
2225         do {
2226                 vsa = VS_ALLOC_ASYNC();
2227                 if (vsa) {
2228                         vsa->vsa_vs = NULL;
2229                         vsa->vsa_addr = 0;
2230                         vsa->vsa_offset = 0;
2231                         vsa->vsa_size = 0;
2232                         vsa->vsa_ps = NULL;
2233                 }
2234                 mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO);
2235                 ip_lock(vsa->reply_port);
2236                 vsa->reply_port->ip_sorights++;
2237                 ip_reference(vsa->reply_port);
2238                 ip_unlock(vsa->reply_port);
2239                 kr = ds_device_read_common(device,
2240                                  vsa->reply_port,
2241                                  (mach_msg_type_name_t)
2242                                         MACH_MSG_TYPE_MOVE_SEND_ONCE,
2243                                  (dev_mode_t) 0,
2244                                  dev_offset,
2245                                  bytes_wanted,
2246                                  (IO_READ | IO_CALL),
2247                                  (io_buf_ptr_t *) &dev_buffer,
2248                                  (mach_msg_type_number_t *) &bytes_read);
2249                 if(kr == MIG_NO_REPLY) {
2250                         assert_wait(&vsa->vsa_lock, THREAD_UNINT);
2251                         thread_block(THREAD_CONTINUE_NULL);
2252
2253                         dev_buffer = vsa->vsa_addr;
2254                         bytes_read = (unsigned int)vsa->vsa_size;
2255                         kr = vsa->vsa_error;
2256                 }
2257                 VS_FREE_ASYNC(vsa);
2258                 if (kr != KERN_SUCCESS || bytes_read == 0) {
2259                         break;
2260                 }
2261                 total_read += bytes_read;
2262
2263                 /*
2264                  * If we got the entire range, use the returned dev_buffer.
2265                  */
2266                 if (bytes_read == size) {
2267                         *bufferp = (vm_offset_t)dev_buffer;
2268                         break;
2269                 }
2270
2271 #if 1
2272                 dprintf(("read only %d bytes out of %d\n",
2273                          bytes_read, bytes_wanted));
2274 #endif
2275                 if(dpt == NULL) {
2276                         dpt = get_read_buffer();
2277                         buf_ptr = dpt->dpt_buffer;
2278                         *bufferp = (vm_offset_t)buf_ptr;
2279                 }
2280                 /*
2281                  * Otherwise, copy the data into the provided buffer (*bufferp)
2282                  * and append the rest of the range as it comes in.
2283                  */
2284                 memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read);
2285                 buf_ptr += bytes_read;
2286                 bytes_wanted -= bytes_read;
2287                 records_read = (bytes_read >>
2288                                 (vm_page_shift - ps->ps_record_shift));
2289                 dev_offset += records_read;
2290                 DEBUG(DEBUG_VS_INTERNAL,
2291                       ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2292                        dev_buffer, bytes_read));
2293                 if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
2294                     != KERN_SUCCESS)
2295                         Panic("dealloc buf");
2296         } while (bytes_wanted);
2297
2298         *residualp = size - total_read;
2299         if((dev_buffer != *bufferp) && (total_read != 0)) {
2300                 vm_offset_t temp_buffer;
2301                 vm_allocate(kernel_map, &temp_buffer, total_read, TRUE);
2302                 memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
2303                 if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
2304                         VM_MAP_COPYIN_OPT_SRC_DESTROY |
2305                         VM_MAP_COPYIN_OPT_STEAL_PAGES |
2306                         VM_MAP_COPYIN_OPT_PMAP_ENTER,
2307                         (vm_map_copy_t *)&device_data, FALSE))
2308                                 panic("ps_read_device: cannot copyin locally provided buffer\n");
2309         }
2310         else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){
2311                 if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read,
2312                         VM_MAP_COPYIN_OPT_SRC_DESTROY |
2313                         VM_MAP_COPYIN_OPT_STEAL_PAGES |
2314                         VM_MAP_COPYIN_OPT_PMAP_ENTER,
2315                         (vm_map_copy_t *)&device_data, FALSE))
2316                                 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2317         }
2318         else {
2319                 device_data = NULL;
2320         }
2321         *bufferp = (vm_offset_t)device_data;
2322
2323         if(dpt != NULL) {
2324                 /* Free the receive buffer */
2325                 dpt->checked_out = 0;
2326                 thread_wakeup(&dpt_array);
2327         }
2328         return KERN_SUCCESS;
2329 }
2330
2331 kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *);     /* forward */
2332
2333 kern_return_t
2334 ps_write_device(
2335         paging_segment_t        ps,
2336         vm_offset_t             offset,
2337         vm_offset_t             addr,
2338         unsigned int            size,
2339         struct vs_async         *vsa)
2340 {
2341         recnum_t        dev_offset;
2342         io_buf_len_t    bytes_to_write, bytes_written;
2343         recnum_t        records_written;
2344         kern_return_t   kr;
2345         MACH_PORT_FACE  reply_port;
2346
2347
2348
2349         clustered_writes[atop_32(size)]++;
2350
2351         dev_offset = (ps->ps_offset +
2352                       (offset >> (vm_page_shift - ps->ps_record_shift)));
2353         bytes_to_write = size;
2354
2355         if (vsa) {
2356                 /*
2357                  * Asynchronous write.
2358                  */
2359                 reply_port = vsa->reply_port;
2360                 ip_lock(reply_port);
2361                 reply_port->ip_sorights++;
2362                 ip_reference(reply_port);
2363                 ip_unlock(reply_port);
2364                 {
2365                 device_t        device;
2366                 device = dev_port_lookup(ps->ps_device);
2367
2368                 vsa->vsa_addr = addr;
2369                 kr=ds_device_write_common(device,
2370                         reply_port,
2371                         (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE,
2372                         (dev_mode_t) 0,
2373                         dev_offset,
2374                         (io_buf_ptr_t)  addr,
2375                         size,
2376                         (IO_WRITE | IO_CALL),
2377                         &bytes_written);
2378                 }
2379                 if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) {
2380                         if (verbose)
2381                                 dprintf(("%s0x%x, addr=0x%x,"
2382                                          "size=0x%x,offset=0x%x\n",
2383                                          "device_write_request returned ",
2384                                          kr, addr, size, offset));
2385                         BS_STAT(ps->ps_bs,
2386                                 ps->ps_bs->bs_pages_out_fail += atop_32(size));
2387                         /* do the completion notification to free resources */
2388                         device_write_reply(reply_port, kr, 0);
2389                         return PAGER_ERROR;
2390                 }
2391         } else do {
2392                 /*
2393                  * Synchronous write.
2394                  */
2395                 {
2396                 device_t        device;
2397                 device = dev_port_lookup(ps->ps_device);
2398                 kr=ds_device_write_common(device,
2399                         IP_NULL, 0,
2400                         (dev_mode_t) 0,
2401                         dev_offset,
2402                         (io_buf_ptr_t)  addr,
2403                         size,
2404                         (IO_WRITE | IO_SYNC | IO_KERNEL_BUF),
2405                         &bytes_written);
2406                 }
2407                 if (kr != KERN_SUCCESS) {
2408                         dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2409                                  "device_write returned ",
2410                                  kr, addr, size, offset));
2411                         BS_STAT(ps->ps_bs,
2412                                 ps->ps_bs->bs_pages_out_fail += atop_32(size));
2413                         return PAGER_ERROR;
2414                 }
2415                 if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
2416                         Panic("fragmented write");
2417                 records_written = (bytes_written >>
2418                                    (vm_page_shift - ps->ps_record_shift));
2419                 dev_offset += records_written;
2420 #if 1
2421                 if (bytes_written != bytes_to_write) {
2422                         dprintf(("wrote only %d bytes out of %d\n",
2423                                  bytes_written, bytes_to_write));
2424                 }
2425 #endif
2426                 bytes_to_write -= bytes_written;
2427                 addr += bytes_written;
2428         } while (bytes_to_write > 0);
2429
2430         return PAGER_SUCCESS;
2431 }
2432
2433
2434 #else /* !DEVICE_PAGING */
2435
2436 kern_return_t
2437 ps_read_device(
2438         paging_segment_t        ps,
2439         vm_offset_t             offset,
2440         vm_offset_t             *bufferp,
2441         unsigned int            size,
2442         unsigned int            *residualp,
2443         int                     flags)
2444 {
2445   panic("ps_read_device not supported");
2446 }
2447
2448 ps_write_device(
2449         paging_segment_t        ps,
2450         vm_offset_t             offset,
2451         vm_offset_t             addr,
2452         unsigned int            size,
2453         struct vs_async         *vsa)
2454 {
2455   panic("ps_write_device not supported");
2456 }
2457
2458 #endif /* DEVICE_PAGING */
2459 void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t);        /* forward */
2460
2461 void
2462 pvs_object_data_provided(
2463         vstruct_t       vs,
2464         upl_t           upl,
2465         vm_offset_t     offset,
2466         vm_size_t       size)
2467 {
2468
2469         DEBUG(DEBUG_VS_INTERNAL,
2470               ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2471                upl, offset, size));
2472
2473         ASSERT(size > 0);
2474         GSTAT(global_stats.gs_pages_in += atop_32(size));
2475
2476
2477 #if     USE_PRECIOUS
2478         ps_clunmap(vs, offset, size);
2479 #endif  /* USE_PRECIOUS */
2480
2481 }
2482
2483 kern_return_t
2484 pvs_cluster_read(
2485         vstruct_t       vs,
2486         vm_offset_t     vs_offset,
2487         vm_size_t       cnt)
2488 {
2489         upl_t                   upl;
2490         kern_return_t           error = KERN_SUCCESS;
2491         int                     size;
2492         unsigned int            residual;
2493         unsigned int            request_flags;
2494         int                     seg_index;
2495         int                     pages_in_cl;
2496         int                     cl_size;
2497         int                     cl_mask;
2498         int                     cl_index;
2499         int                     xfer_size;
2500         vm_offset_t       ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2501         paging_segment_t        psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2502         struct clmap            clmap;
2503
2504         pages_in_cl = 1 << vs->vs_clshift;
2505         cl_size = pages_in_cl * vm_page_size;
2506         cl_mask = cl_size - 1;
2507
2508         /*
2509          * This loop will be executed multiple times until the entire
2510          * request has been satisfied... if the request spans cluster
2511          * boundaries, the clusters will be checked for logical continunity,
2512          * if contiguous the I/O request will span multiple clusters, otherwise
2513          * it will be broken up into the minimal set of I/O's
2514          *
2515          * If there are holes in a request (either unallocated pages in a paging
2516          * segment or an unallocated paging segment), we stop
2517          * reading at the hole, inform the VM of any data read, inform
2518          * the VM of an unavailable range, then loop again, hoping to
2519          * find valid pages later in the requested range.  This continues until
2520          * the entire range has been examined, and read, if present.
2521          */
2522
2523 #if     USE_PRECIOUS
2524         request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT;
2525 #else
2526         request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT;
2527 #endif
2528         while (cnt && (error == KERN_SUCCESS)) {
2529                 int     ps_info_valid;
2530                 int     page_list_count;
2531
2532                 if((vs_offset & cl_mask) &&
2533                         (cnt > (VM_SUPER_CLUSTER -
2534                                 (vs_offset & cl_mask)))) {
2535                         size = VM_SUPER_CLUSTER;
2536                         size -= vs_offset & cl_mask;
2537                 } else if (cnt > VM_SUPER_CLUSTER) {
2538                         size = VM_SUPER_CLUSTER;
2539                 } else {
2540                         size = cnt;
2541                 }
2542                 cnt -= size;
2543
2544                 ps_info_valid = 0;
2545                 seg_index     = 0;
2546
2547                 while (size > 0 && error == KERN_SUCCESS) {
2548                         int           abort_size;
2549                         int           failed_size;
2550                         int           beg_pseg;
2551                         int           beg_indx;
2552                         vm_offset_t   cur_offset;
2553
2554
2555                         if ( !ps_info_valid) {
2556                                 ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
2557                                 psp[seg_index]       = CLMAP_PS(clmap);
2558                                 ps_info_valid = 1;
2559                         }
2560                         /*
2561                          * skip over unallocated physical segments
2562                          */
2563                         if (ps_offset[seg_index] == (vm_offset_t) -1) {
2564                                 abort_size = cl_size - (vs_offset & cl_mask);
2565                                 abort_size = MIN(abort_size, size);
2566
2567                                 page_list_count = 0;
2568                                 memory_object_super_upl_request(
2569                                         vs->vs_control,
2570                                         (memory_object_offset_t)vs_offset,
2571                                         abort_size, abort_size,
2572                                         &upl, NULL, &page_list_count,
2573                                         request_flags);
2574
2575                                 if (clmap.cl_error) {
2576                                         upl_abort(upl, UPL_ABORT_ERROR);
2577                                 } else {
2578                                         upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2579                                 }
2580                                 upl_deallocate(upl);
2581
2582                                 size       -= abort_size;
2583                                 vs_offset  += abort_size;
2584
2585                                 seg_index++;
2586                                 ps_info_valid = 0;
2587                                 continue;
2588                         }
2589                         cl_index = (vs_offset & cl_mask) / vm_page_size;
2590
2591                         for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) {
2592                                 /*
2593                                  * skip over unallocated pages
2594                                  */
2595                                 if (CLMAP_ISSET(clmap, cl_index))
2596                                         break;
2597                                 abort_size += vm_page_size;
2598                         }
2599                         if (abort_size) {
2600                                 /*
2601                                  * Let VM system know about holes in clusters.
2602                                  */
2603                                 GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size));
2604
2605                                 page_list_count = 0;
2606                                 memory_object_super_upl_request(
2607                                         vs->vs_control,
2608                                         (memory_object_offset_t)vs_offset,
2609                                         abort_size, abort_size,
2610                                         &upl, NULL, &page_list_count,
2611                                         request_flags);
2612
2613                                 upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2614                                 upl_deallocate(upl);
2615
2616                                 size       -= abort_size;
2617                                 vs_offset  += abort_size;
2618
2619                                 if (cl_index == pages_in_cl) {
2620                                         /*
2621                                          * if we're at the end of this physical cluster
2622                                          * then bump to the next one and continue looking
2623                                          */
2624                                         seg_index++;
2625                                         ps_info_valid = 0;
2626                                         continue;
2627                                 }
2628                                 if (size == 0)
2629                                         break;
2630                         }
2631                         /*
2632                          * remember the starting point of the first allocated page
2633                          * for the I/O we're about to issue
2634                          */
2635                         beg_pseg   = seg_index;
2636                         beg_indx   = cl_index;
2637                         cur_offset = vs_offset;
2638
2639                         /*
2640                          * calculate the size of the I/O that we can do...
2641                          * this may span multiple physical segments if
2642                          * they are contiguous
2643                          */
2644                         for (xfer_size = 0; xfer_size < size; ) {
2645
2646                                 while (cl_index < pages_in_cl
2647                                                 && xfer_size < size) {
2648                                         /*
2649                                          * accumulate allocated pages within
2650                                          * a physical segment
2651                                          */
2652                                         if (CLMAP_ISSET(clmap, cl_index)) {
2653                                                 xfer_size  += vm_page_size;
2654                                                 cur_offset += vm_page_size;
2655                                                 cl_index++;
2656
2657                                                 BS_STAT(psp[seg_index]->ps_bs,
2658                                                         psp[seg_index]->ps_bs->bs_pages_in++);
2659                                         } else
2660                                                 break;
2661                                 }
2662                                 if (cl_index < pages_in_cl
2663                                                 || xfer_size >= size) {
2664                                         /*
2665                                          * we've hit an unallocated page or
2666                                          * the end of this request... go fire
2667                                          * the I/O
2668                                          */
2669                                         break;
2670                                 }
2671                                 /*
2672                                  * we've hit the end of the current physical
2673                                  * segment and there's more to do, so try
2674                                  * moving to the next one
2675                                  */
2676                                 seg_index++;
2677
2678                                 ps_offset[seg_index] =
2679                                         ps_clmap(vs,
2680                                                 cur_offset & ~cl_mask,
2681                                                 &clmap, CL_FIND, 0, 0);
2682                                 psp[seg_index] = CLMAP_PS(clmap);
2683                                 ps_info_valid = 1;
2684
2685                                 if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
2686                                         /*
2687                                          * if the physical segment we're about
2688                                          * to step into is not contiguous to
2689                                          * the one we're currently in, or it's
2690                                          * in a different paging file, or
2691                                          * it hasn't been allocated....
2692                                          * we stop here and generate the I/O
2693                                          */
2694                                         break;
2695                                 }
2696                                 /*
2697                                  * start with first page of the next physical
2698                                  *  segment
2699                                  */
2700                                 cl_index = 0;
2701                         }
2702                         if (xfer_size) {
2703                                 /*
2704                                  * we have a contiguous range of allocated pages
2705                                  * to read from
2706                                  */
2707                                 page_list_count = 0;
2708                                 memory_object_super_upl_request(vs->vs_control,
2709                                         (memory_object_offset_t)vs_offset,
2710                                         xfer_size, xfer_size,
2711                                         &upl, NULL, &page_list_count,
2712                                         request_flags | UPL_SET_INTERNAL);
2713
2714                                 error = ps_read_file(psp[beg_pseg],
2715                                         upl, (vm_offset_t) 0,
2716                                         ps_offset[beg_pseg] +
2717                                                 (beg_indx * vm_page_size),
2718                                         xfer_size, &residual, 0);
2719                         } else
2720                                 continue;
2721
2722                         failed_size = 0;
2723
2724                         /*
2725                          * Adjust counts and send response to VM.  Optimize
2726                          * for the common case, i.e. no error and/or partial
2727                          * data. If there was an error, then we need to error
2728                          * the entire range, even if some data was successfully
2729                          * read. If there was a partial read we may supply some
2730                          * data and may error some as well.  In all cases the
2731                          * VM must receive some notification for every page
2732                          * in the range.
2733                          */
2734                         if ((error == KERN_SUCCESS) && (residual == 0)) {
2735                                 /*
2736                                  * Got everything we asked for, supply the data
2737                                  * to the VM.  Note that as a side effect of
2738                                  * supplying the data, the buffer holding the
2739                                  * supplied data is deallocated from the pager's
2740                                  *  address space.
2741                                  */
2742                                 pvs_object_data_provided(
2743                                         vs, upl, vs_offset, xfer_size);
2744                         } else {
2745                                 failed_size = xfer_size;
2746
2747                                 if (error == KERN_SUCCESS) {
2748                                         if (residual == xfer_size) {
2749                                         /*
2750                                          * If a read operation returns no error
2751                                          * and no data moved, we turn it into
2752                                          * an error, assuming we're reading at
2753                                          * or beyong EOF.
2754                                          * Fall through and error the entire
2755                                          * range.
2756                                          */
2757                                                 error = KERN_FAILURE;
2758                                         } else {
2759                                         /*
2760                                          * Otherwise, we have partial read. If
2761                                          * the part read is a integral number
2762                                          * of pages supply it. Otherwise round
2763                                          * it up to a page boundary, zero fill
2764                                          * the unread part, and supply it.
2765                                          * Fall through and error the remainder
2766                                          * of the range, if any.
2767                                          */
2768                                                 int fill, lsize;
2769
2770                                                 fill = residual
2771                                                         & ~vm_page_size;
2772                                                 lsize = (xfer_size - residual)
2773                                                                          + fill;
2774                                                 pvs_object_data_provided(
2775                                                         vs, upl,
2776                                                         vs_offset, lsize);
2777
2778                                                 if (lsize < xfer_size) {
2779                                                         failed_size =
2780                                                             xfer_size - lsize;
2781                                                         error = KERN_FAILURE;
2782                                                 }
2783                                         }
2784                                 }
2785                         }
2786                         /*
2787                          * If there was an error in any part of the range, tell
2788                          * the VM. Note that error is explicitly checked again
2789                          *  since it can be modified above.
2790                          */
2791                         if (error != KERN_SUCCESS) {
2792                                 BS_STAT(psp[beg_pseg]->ps_bs,
2793                                         psp[beg_pseg]->ps_bs->bs_pages_in_fail
2794                                                 += atop_32(failed_size));
2795                         }
2796                         size       -= xfer_size;
2797                         vs_offset  += xfer_size;
2798                 }
2799
2800         } /* END while (cnt && (error == 0)) */
2801         return error;
2802 }
2803
2804 int vs_do_async_write = 1;
2805
2806 kern_return_t
2807 vs_cluster_write(
2808         vstruct_t       vs,
2809         upl_t           internal_upl,
2810         vm_offset_t     offset,
2811         vm_size_t       cnt,
2812         boolean_t       dp_internal,
2813         int             flags)
2814 {
2815         vm_offset_t     size;
2816         vm_offset_t     transfer_size;
2817         int             error = 0;
2818         struct clmap    clmap;
2819
2820         vm_offset_t     actual_offset;  /* Offset within paging segment */
2821         paging_segment_t ps;
2822         vm_offset_t     subx_size;
2823         vm_offset_t     mobj_base_addr;
2824         vm_offset_t     mobj_target_addr;
2825         int             mobj_size;
2826
2827         struct vs_async *vsa;
2828         vm_map_copy_t   copy;
2829
2830         upl_t           upl;
2831         upl_page_info_t *pl;
2832         int             page_index;
2833         int             list_size;
2834         int             pages_in_cl;
2835         int             cl_size;
2836         int             base_index;
2837         int             seg_size;
2838
2839         pages_in_cl = 1 << vs->vs_clshift;
2840         cl_size = pages_in_cl * vm_page_size;
2841
2842         if (!dp_internal) {
2843                 int          page_list_count;
2844                 int          request_flags;
2845                 int          super_size;
2846                 int          first_dirty;
2847                 int          num_dirty;
2848                 int          num_of_pages;
2849                 int          seg_index;
2850                 vm_offset_t  upl_offset;
2851                 vm_offset_t  seg_offset;
2852                 vm_offset_t  ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2853                 paging_segment_t   psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2854
2855
2856                 if (bs_low) {
2857                         super_size = cl_size;
2858
2859                         request_flags = UPL_NOBLOCK |
2860                                 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2861                                 UPL_NO_SYNC | UPL_SET_INTERNAL;
2862                 } else {
2863                         super_size = VM_SUPER_CLUSTER;
2864
2865                         request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
2866                                 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2867                                 UPL_NO_SYNC | UPL_SET_INTERNAL;
2868                 }
2869
2870                 page_list_count = 0;
2871                 memory_object_super_upl_request(vs->vs_control,
2872                                 (memory_object_offset_t)offset,
2873                                 cnt, super_size,
2874                                 &upl, NULL, &page_list_count,
2875                                 request_flags | UPL_FOR_PAGEOUT);
2876
2877                 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2878
2879                 seg_size = cl_size - (upl->offset % cl_size);
2880                 upl_offset = upl->offset & ~(cl_size - 1);
2881
2882                 for (seg_index = 0, transfer_size = upl->size;
2883                                                 transfer_size > 0; ) {
2884                         ps_offset[seg_index] =
2885                                 ps_clmap(vs,
2886                                         upl_offset,
2887                                         &clmap, CL_ALLOC,
2888                                         cl_size, 0);
2889
2890                         if (ps_offset[seg_index] == (vm_offset_t) -1) {
2891                                 upl_abort(upl, 0);
2892                                 upl_deallocate(upl);
2893
2894                                 return KERN_FAILURE;
2895
2896                         }
2897                         psp[seg_index] = CLMAP_PS(clmap);
2898
2899                         if (transfer_size > seg_size) {
2900                                 transfer_size -= seg_size;
2901                                 upl_offset += cl_size;
2902                                 seg_size    = cl_size;
2903                                 seg_index++;
2904                         } else
2905                                 transfer_size = 0;
2906                 }
2907                 /*
2908                  * Ignore any non-present pages at the end of the
2909                  * UPL.
2910                  */
2911                 for (page_index = upl->size / vm_page_size; page_index > 0;)
2912                         if (UPL_PAGE_PRESENT(pl, --page_index))
2913                                 break;
2914                 num_of_pages = page_index + 1;
2915
2916                 base_index = (upl->offset % cl_size) / PAGE_SIZE;
2917
2918                 for (page_index = 0; page_index < num_of_pages; ) {
2919                         /*
2920                          * skip over non-dirty pages
2921                          */
2922                         for ( ; page_index < num_of_pages; page_index++) {
2923                                 if (UPL_DIRTY_PAGE(pl, page_index)
2924                                         || UPL_PRECIOUS_PAGE(pl, page_index))
2925                                         /*
2926                                          * this is a page we need to write
2927                                          * go see if we can buddy it up with
2928                                          * others that are contiguous to it
2929                                          */
2930                                         break;
2931                                 /*
2932                                  * if the page is not-dirty, but present we
2933                                  * need to commit it...  This is an unusual
2934                                  * case since we only asked for dirty pages
2935                                  */
2936                                 if (UPL_PAGE_PRESENT(pl, page_index)) {
2937                                         boolean_t empty = FALSE;
2938                                         upl_commit_range(upl,
2939                                                  page_index * vm_page_size,
2940                                                  vm_page_size,
2941                                                  UPL_COMMIT_NOTIFY_EMPTY,
2942                                                  pl,
2943                                                  page_list_count,
2944                                                  &empty);
2945                                         if (empty) {
2946                                                 assert(page_index ==
2947                                                        num_of_pages - 1);
2948                                                 upl_deallocate(upl);
2949                                         }
2950                                 }
2951                         }
2952                         if (page_index == num_of_pages)
2953                                 /*
2954                                  * no more pages to look at, we're out of here
2955                                  */
2956                                 break;
2957
2958                         /*
2959                          * gather up contiguous dirty pages... we have at
2960                          * least 1 * otherwise we would have bailed above
2961                          * make sure that each physical segment that we step
2962                          * into is contiguous to the one we're currently in
2963                          * if it's not, we have to stop and write what we have
2964                          */
2965                         for (first_dirty = page_index;
2966                                         page_index < num_of_pages; ) {
2967                                 if ( !UPL_DIRTY_PAGE(pl, page_index)
2968                                         && !UPL_PRECIOUS_PAGE(pl, page_index))
2969                                         break;
2970                                 page_index++;
2971                                 /*
2972                                  * if we just looked at the last page in the UPL
2973                                  * we don't need to check for physical segment
2974                                  * continuity
2975                                  */
2976                                 if (page_index < num_of_pages) {
2977                                         int cur_seg;
2978                                         int nxt_seg;
2979
2980                                         cur_seg = (base_index + (page_index - 1))/pages_in_cl;
2981                                         nxt_seg = (base_index + page_index)/pages_in_cl;
2982
2983                                         if (cur_seg != nxt_seg) {
2984                                                 if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
2985                                                 /*
2986                                                  * if the segment we're about
2987                                                  * to step into is not
2988                                                  * contiguous to the one we're
2989                                                  * currently in, or it's in a
2990                                                  * different paging file....
2991                                                  * we stop here and generate
2992                                                  * the I/O
2993                                                  */
2994                                                         break;
2995                                         }
2996                                 }
2997                         }
2998                         num_dirty = page_index - first_dirty;
2999
3000                         if (num_dirty) {
3001                                 upl_offset = first_dirty * vm_page_size;
3002                                 transfer_size = num_dirty * vm_page_size;
3003
3004                                 while (transfer_size) {
3005
3006                                         if ((seg_size = cl_size -
3007                                                 ((upl->offset + upl_offset) % cl_size))
3008                                                         > transfer_size)
3009                                                 seg_size = transfer_size;
3010
3011                                         ps_vs_write_complete(vs,
3012                                                 upl->offset + upl_offset,
3013                                                 seg_size, error);
3014
3015                                         transfer_size -= seg_size;
3016                                         upl_offset += seg_size;
3017                                 }
3018                                 upl_offset = first_dirty * vm_page_size;
3019                                 transfer_size = num_dirty * vm_page_size;
3020
3021                                 seg_index  = (base_index + first_dirty) / pages_in_cl;
3022                                 seg_offset = (upl->offset + upl_offset) % cl_size;
3023
3024                                 error = ps_write_file(psp[seg_index],
3025                                                 upl, upl_offset,
3026                                                 ps_offset[seg_index]
3027                                                                 + seg_offset,
3028                                                 transfer_size, flags);
3029                         } else {
3030                                 boolean_t empty = FALSE;
3031                                 upl_abort_range(upl,
3032                                                 first_dirty * vm_page_size,
3033                                                 num_dirty   * vm_page_size,
3034                                                 UPL_ABORT_NOTIFY_EMPTY,
3035                                                 &empty);
3036                                 if (empty) {
3037                                         assert(page_index == num_of_pages);
3038                                         upl_deallocate(upl);
3039                                 }
3040                         }
3041                 }
3042
3043         } else {
3044                 assert(cnt  <= (vm_page_size << vs->vs_clshift));
3045                 list_size = cnt;
3046
3047                 page_index = 0;
3048                 /* The caller provides a mapped_data which is derived  */
3049                 /* from a temporary object.  The targeted pages are    */
3050                 /* guaranteed to be set at offset 0 in the mapped_data */
3051                 /* The actual offset however must still be derived     */
3052                 /* from the offset in the vs in question               */
3053                 mobj_base_addr = offset;
3054                 mobj_target_addr = mobj_base_addr;
3055
3056                 for (transfer_size = list_size; transfer_size != 0;) {
3057                         actual_offset = ps_clmap(vs, mobj_target_addr,
3058                                 &clmap, CL_ALLOC,
3059                                 transfer_size < cl_size ?
3060                                         transfer_size : cl_size, 0);
3061                         if(actual_offset == (vm_offset_t) -1) {
3062                                 error = 1;
3063                                 break;
3064                         }
3065                         cnt = MIN(transfer_size,
3066                                 CLMAP_NPGS(clmap) * vm_page_size);
3067                         ps = CLMAP_PS(clmap);
3068                         /* Assume that the caller has given us contiguous */
3069                         /* pages */
3070                         if(cnt) {
3071                                 ps_vs_write_complete(vs, mobj_target_addr,
3072                                                                 cnt, error);
3073                                 error = ps_write_file(ps, internal_upl,
3074                                                 0, actual_offset,
3075                                                 cnt, flags);
3076                                 if (error)
3077                                         break;
3078                            }
3079                         if (error)
3080                                 break;
3081                         actual_offset += cnt;
3082                         mobj_target_addr += cnt;
3083                         transfer_size -= cnt;
3084                         cnt = 0;
3085
3086                         if (error)
3087                                 break;
3088                 }
3089         }
3090         if(error)
3091                 return KERN_FAILURE;
3092         else
3093                 return KERN_SUCCESS;
3094 }
3095
3096 vm_size_t
3097 ps_vstruct_allocated_size(
3098         vstruct_t       vs)
3099 {
3100         int             num_pages;
3101         struct vs_map   *vsmap;
3102         int             i, j, k;
3103
3104         num_pages = 0;
3105         if (vs->vs_indirect) {
3106                 /* loop on indirect maps */
3107                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3108                         vsmap = vs->vs_imap[i];
3109                         if (vsmap == NULL)
3110                                 continue;
3111                         /* loop on clusters in this indirect map */
3112                         for (j = 0; j < CLMAP_ENTRIES; j++) {
3113                                 if (VSM_ISCLR(vsmap[j]) ||
3114                                     VSM_ISERR(vsmap[j]))
3115                                         continue;
3116                                 /* loop on pages in this cluster */
3117                                 for (k = 0; k < VSCLSIZE(vs); k++) {
3118                                         if ((VSM_BMAP(vsmap[j])) & (1 << k))
3119                                                 num_pages++;
3120                                 }
3121                         }
3122                 }
3123         } else {
3124                 vsmap = vs->vs_dmap;
3125                 if (vsmap == NULL)
3126                         return 0;
3127                 /* loop on clusters in the direct map */
3128                 for (j = 0; j < CLMAP_ENTRIES; j++) {
3129                         if (VSM_ISCLR(vsmap[j]) ||
3130                             VSM_ISERR(vsmap[j]))
3131                                 continue;
3132                         /* loop on pages in this cluster */
3133                         for (k = 0; k < VSCLSIZE(vs); k++) {
3134                                 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3135                                         num_pages++;
3136                         }
3137                 }
3138         }
3139
3140         return ptoa_32(num_pages);
3141 }
3142
3143 size_t
3144 ps_vstruct_allocated_pages(
3145         vstruct_t               vs,
3146         default_pager_page_t    *pages,
3147         size_t                  pages_size)
3148 {
3149         int             num_pages;
3150         struct vs_map   *vsmap;
3151         vm_offset_t     offset;
3152         int             i, j, k;
3153
3154         num_pages = 0;
3155         offset = 0;
3156         if (vs->vs_indirect) {
3157                 /* loop on indirect maps */
3158                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3159                         vsmap = vs->vs_imap[i];
3160                         if (vsmap == NULL) {
3161                                 offset += (vm_page_size * CLMAP_ENTRIES *
3162                                            VSCLSIZE(vs));
3163                                 continue;
3164                         }
3165                         /* loop on clusters in this indirect map */
3166                         for (j = 0; j < CLMAP_ENTRIES; j++) {
3167                                 if (VSM_ISCLR(vsmap[j]) ||
3168                                     VSM_ISERR(vsmap[j])) {
3169                                         offset += vm_page_size * VSCLSIZE(vs);
3170                                         continue;
3171                                 }
3172                                 /* loop on pages in this cluster */
3173                                 for (k = 0; k < VSCLSIZE(vs); k++) {
3174                                         if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3175                                                 num_pages++;
3176                                                 if (num_pages < pages_size)
3177                                                         pages++->dpp_offset =
3178                                                                 offset;
3179                                         }
3180                                         offset += vm_page_size;
3181                                 }
3182                         }
3183                 }
3184         } else {
3185                 vsmap = vs->vs_dmap;
3186                 if (vsmap == NULL)
3187                         return 0;
3188                 /* loop on clusters in the direct map */
3189                 for (j = 0; j < CLMAP_ENTRIES; j++) {
3190                         if (VSM_ISCLR(vsmap[j]) ||
3191                             VSM_ISERR(vsmap[j])) {
3192                                 offset += vm_page_size * VSCLSIZE(vs);
3193                                 continue;
3194                         }
3195                         /* loop on pages in this cluster */
3196                         for (k = 0; k < VSCLSIZE(vs); k++) {
3197                                 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3198                                         num_pages++;
3199                                         if (num_pages < pages_size)
3200                                                 pages++->dpp_offset = offset;
3201                                 }
3202                                 offset += vm_page_size;
3203                         }
3204                 }
3205         }
3206
3207         return num_pages;
3208 }
3209
3210
3211 kern_return_t
3212 ps_vstruct_transfer_from_segment(
3213         vstruct_t        vs,
3214         paging_segment_t segment,
3215         upl_t            upl)
3216 {
3217         struct vs_map   *vsmap;
3218         struct vs_map   old_vsmap;
3219         struct vs_map   new_vsmap;
3220         int             i, j, k;
3221
3222         VS_LOCK(vs);    /* block all work on this vstruct */
3223                         /* can't allow the normal multiple write */
3224                         /* semantic because writes may conflict */
3225         vs->vs_xfer_pending = TRUE;
3226         vs_wait_for_sync_writers(vs);
3227         vs_start_write(vs);
3228         vs_wait_for_readers(vs);
3229         /* we will unlock the vs to allow other writes while transferring */
3230         /* and will be guaranteed of the persistance of the vs struct     */
3231         /* because the caller of  ps_vstruct_transfer_from_segment bumped */
3232         /* vs_async_pending */
3233         /* OK we now have guaranteed no other parties are accessing this */
3234         /* vs.  Now that we are also supporting simple lock versions of  */
3235         /* vs_lock we cannot hold onto VS_LOCK as we may block below.    */
3236         /* our purpose in holding it before was the multiple write case */
3237         /* we now use the boolean xfer_pending to do that.  We can use  */
3238         /* a boolean instead of a count because we have guaranteed single */
3239         /* file access to this code in its caller */
3240         VS_UNLOCK(vs);
3241 vs_changed:
3242         if (vs->vs_indirect) {
3243                 int     vsmap_size;
3244                 int     clmap_off;
3245                 /* loop on indirect maps */
3246                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3247                         vsmap = vs->vs_imap[i];
3248                         if (vsmap == NULL)
3249                                 continue;
3250                         /* loop on clusters in this indirect map */
3251                         clmap_off = (vm_page_size * CLMAP_ENTRIES *
3252                                            VSCLSIZE(vs) * i);
3253                         if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
3254                                 vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
3255                         else
3256                                 vsmap_size = CLMAP_ENTRIES;
3257                         for (j = 0; j < vsmap_size; j++) {
3258                                 if (VSM_ISCLR(vsmap[j]) ||
3259                                     VSM_ISERR(vsmap[j]) ||
3260                                     (VSM_PS(vsmap[j]) != segment))
3261                                         continue;
3262                                 if(vs_cluster_transfer(vs,
3263                                         (vm_page_size * (j << vs->vs_clshift))
3264                                         + clmap_off,
3265                                         vm_page_size << vs->vs_clshift,
3266                                         upl)
3267                                                 != KERN_SUCCESS) {
3268                                    VS_LOCK(vs);
3269                                    vs->vs_xfer_pending = FALSE;
3270                                    VS_UNLOCK(vs);
3271                                    vs_finish_write(vs);
3272                                    return KERN_FAILURE;
3273                                 }
3274                                 /* allow other readers/writers during transfer*/
3275                                 VS_LOCK(vs);
3276                                 vs->vs_xfer_pending = FALSE;
3277                                 VS_UNLOCK(vs);
3278                                 vs_finish_write(vs);
3279                                 VS_LOCK(vs);
3280                                 vs->vs_xfer_pending = TRUE;
3281                                 vs_wait_for_sync_writers(vs);
3282                                 vs_start_write(vs);
3283                                 vs_wait_for_readers(vs);
3284                                 VS_UNLOCK(vs);
3285                                 if (!(vs->vs_indirect)) {
3286                                         goto vs_changed;
3287                                 }
3288                         }
3289                 }
3290         } else {
3291                 vsmap = vs->vs_dmap;
3292                 if (vsmap == NULL) {
3293                         VS_LOCK(vs);
3294                         vs->vs_xfer_pending = FALSE;
3295                         VS_UNLOCK(vs);
3296                         vs_finish_write(vs);
3297                         return KERN_SUCCESS;
3298                 }
3299                 /* loop on clusters in the direct map */
3300                 for (j = 0; j < vs->vs_size; j++) {
3301                         if (VSM_ISCLR(vsmap[j]) ||
3302                             VSM_ISERR(vsmap[j]) ||
3303                             (VSM_PS(vsmap[j]) != segment))
3304                                 continue;
3305                         if(vs_cluster_transfer(vs,
3306                                 vm_page_size * (j << vs->vs_clshift),
3307                                 vm_page_size << vs->vs_clshift,
3308                                 upl) != KERN_SUCCESS) {
3309                            VS_LOCK(vs);
3310                            vs->vs_xfer_pending = FALSE;
3311                            VS_UNLOCK(vs);
3312                            vs_finish_write(vs);
3313                            return KERN_FAILURE;
3314                         }
3315                         /* allow other readers/writers during transfer*/
3316                         VS_LOCK(vs);
3317                         vs->vs_xfer_pending = FALSE;
3318                         VS_UNLOCK(vs);
3319                         vs_finish_write(vs);
3320                         VS_LOCK(vs);
3321                         vs->vs_xfer_pending = TRUE;
3322                         VS_UNLOCK(vs);
3323                         vs_wait_for_sync_writers(vs);
3324                         vs_start_write(vs);
3325                         vs_wait_for_readers(vs);
3326                         if (vs->vs_indirect) {
3327                                 goto vs_changed;
3328                         }
3329                 }
3330         }
3331
3332         VS_LOCK(vs);
3333         vs->vs_xfer_pending = FALSE;
3334         VS_UNLOCK(vs);
3335         vs_finish_write(vs);
3336         return KERN_SUCCESS;
3337 }
3338
3339
3340
3341 vs_map_t
3342 vs_get_map_entry(
3343         vstruct_t       vs,
3344         vm_offset_t     offset)
3345 {
3346         struct vs_map   *vsmap;
3347         vm_offset_t     cluster;
3348
3349         cluster = atop_32(offset) >> vs->vs_clshift;
3350         if (vs->vs_indirect) {
3351                 long    ind_block = cluster/CLMAP_ENTRIES;
3352
3353                 /* Is the indirect block allocated? */
3354                 vsmap = vs->vs_imap[ind_block];
3355                 if(vsmap == (vs_map_t) NULL)
3356                         return vsmap;
3357         } else
3358                 vsmap = vs->vs_dmap;
3359         vsmap += cluster%CLMAP_ENTRIES;
3360         return vsmap;
3361 }
3362
3363 kern_return_t
3364 vs_cluster_transfer(
3365         vstruct_t       vs,
3366         vm_offset_t     offset,
3367         vm_size_t       cnt,
3368         upl_t           upl)
3369 {
3370         vm_offset_t             actual_offset;
3371         paging_segment_t        ps;
3372         struct clmap            clmap;
3373         kern_return_t           error = KERN_SUCCESS;
3374         int                     size, size_wanted, i;
3375         unsigned int            residual;
3376         int                     unavail_size;
3377         default_pager_thread_t  *dpt;
3378         boolean_t               dealloc;
3379         struct  vs_map          *vsmap_ptr;
3380         struct  vs_map          read_vsmap;
3381         struct  vs_map          original_read_vsmap;
3382         struct  vs_map          write_vsmap;
3383         upl_t                   sync_upl;
3384         vm_offset_t     ioaddr;
3385
3386         /* vs_cluster_transfer reads in the pages of a cluster and
3387          * then writes these pages back to new backing store.  The
3388          * segment the pages are being read from is assumed to have
3389          * been taken off-line and is no longer considered for new
3390          * space requests.
3391          */
3392
3393         /*
3394          * This loop will be executed once per cluster referenced.
3395          * Typically this means once, since it's unlikely that the
3396          * VM system will ask for anything spanning cluster boundaries.
3397          *
3398          * If there are holes in a cluster (in a paging segment), we stop
3399          * reading at the hole, then loop again, hoping to
3400          * find valid pages later in the cluster.  This continues until
3401          * the entire range has been examined, and read, if present.  The
3402          * pages are written as they are read.  If a failure occurs after
3403          * some pages are written the unmap call at the bottom of the loop
3404          * recovers the backing store and the old backing store remains
3405          * in effect.
3406          */
3407
3408         VSM_CLR(write_vsmap);
3409         VSM_CLR(original_read_vsmap);
3410         /* grab the actual object's pages to sync with I/O */
3411         while (cnt && (error == KERN_SUCCESS)) {
3412                 vsmap_ptr = vs_get_map_entry(vs, offset);
3413                 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
3414
3415                 if (actual_offset == (vm_offset_t) -1) {
3416
3417                         /*
3418                          * Nothing left to write in this cluster at least
3419                          * set write cluster information for any previous
3420                          * write, clear for next cluster, if there is one
3421                          */
3422                         unsigned int local_size, clmask, clsize;
3423
3424                         clsize = vm_page_size << vs->vs_clshift;
3425                         clmask = clsize - 1;
3426                         local_size = clsize - (offset & clmask);
3427                         ASSERT(local_size);
3428                         local_size = MIN(local_size, cnt);
3429
3430                         /* This cluster has no data in it beyond what may */
3431                         /* have been found on a previous iteration through */
3432                         /* the loop "write_vsmap" */
3433                         *vsmap_ptr = write_vsmap;
3434                         VSM_CLR(write_vsmap);
3435                         VSM_CLR(original_read_vsmap);
3436
3437                         cnt -= local_size;
3438                         offset += local_size;
3439                         continue;
3440                 }
3441
3442                 /*
3443                  * Count up contiguous available or unavailable
3444                  * pages.
3445                  */
3446                 ps = CLMAP_PS(clmap);
3447                 ASSERT(ps);
3448                 size = 0;
3449                 unavail_size = 0;
3450                 for (i = 0;
3451                      (size < cnt) && (unavail_size < cnt) &&
3452                      (i < CLMAP_NPGS(clmap)); i++) {
3453                         if (CLMAP_ISSET(clmap, i)) {
3454                                 if (unavail_size != 0)
3455                                         break;
3456                                 size += vm_page_size;
3457                                 BS_STAT(ps->ps_bs,
3458                                         ps->ps_bs->bs_pages_in++);
3459                         } else {
3460                                 if (size != 0)
3461                                         break;
3462                                 unavail_size += vm_page_size;
3463                         }
3464                 }
3465
3466                 if (size == 0) {
3467                         ASSERT(unavail_size);
3468                         cnt -= unavail_size;
3469                         offset += unavail_size;
3470                         if((offset & ((vm_page_size << vs->vs_clshift) - 1))
3471                                 == 0) {
3472                                 /* There is no more to transfer in this
3473                                    cluster
3474                                 */
3475                                 *vsmap_ptr = write_vsmap;
3476                                 VSM_CLR(write_vsmap);
3477                                 VSM_CLR(original_read_vsmap);
3478                         }
3479                         continue;
3480                 }
3481
3482                 if(VSM_ISCLR(original_read_vsmap))
3483                         original_read_vsmap = *vsmap_ptr;
3484
3485                 if(ps->ps_segtype == PS_PARTITION) {
3486 /*
3487                         NEED TO ISSUE WITH SYNC & NO COMMIT
3488                         error = ps_read_device(ps, actual_offset, &buffer,
3489                                        size, &residual, flags);
3490 */
3491                 } else {
3492                         /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3493                         error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset,
3494                                         size, &residual,
3495                                         (UPL_IOSYNC | UPL_NOCOMMIT));
3496                 }
3497
3498                 read_vsmap = *vsmap_ptr;
3499
3500
3501                 /*
3502                  * Adjust counts and put data in new BS.  Optimize for the
3503                  * common case, i.e. no error and/or partial data.
3504                  * If there was an error, then we need to error the entire
3505                  * range, even if some data was successfully read.
3506                  *
3507                  */
3508                 if ((error == KERN_SUCCESS) && (residual == 0)) {
3509                         int page_list_count = 0;
3510
3511                         /*
3512                          * Got everything we asked for, supply the data to
3513                          * the new BS.  Note that as a side effect of supplying
3514                          * the data, the buffer holding the supplied data is
3515                          * deallocated from the pager's address space unless
3516                          * the write is unsuccessful.
3517                          */
3518
3519                         /* note buffer will be cleaned up in all cases by */
3520                         /* internal_cluster_write or if an error on write */
3521                         /* the vm_map_copy_page_discard call              */
3522                         *vsmap_ptr = write_vsmap;
3523
3524                         if(vs_cluster_write(vs, upl, offset,
3525                                         size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
3526                                 error = KERN_FAILURE;
3527                                 if(!(VSM_ISCLR(*vsmap_ptr))) {
3528                                         /* unmap the new backing store object */
3529                                         ps_clunmap(vs, offset, size);
3530                                 }
3531                                 /* original vsmap */
3532                                 *vsmap_ptr = original_read_vsmap;
3533                                 VSM_CLR(write_vsmap);
3534                         } else {
3535                                if((offset + size) &
3536                                         ((vm_page_size << vs->vs_clshift)
3537                                         - 1)) {
3538                                         /* There is more to transfer in this
3539                                            cluster
3540                                         */
3541                                         write_vsmap = *vsmap_ptr;
3542                                         *vsmap_ptr = read_vsmap;
3543                                 } else {
3544                                         /* discard the old backing object */
3545                                         write_vsmap = *vsmap_ptr;
3546                                         *vsmap_ptr = read_vsmap;
3547                                         ps_clunmap(vs, offset, size);
3548                                         *vsmap_ptr = write_vsmap;
3549                                         VSM_CLR(write_vsmap);
3550                                         VSM_CLR(original_read_vsmap);
3551                                 }
3552                         }
3553                 } else {
3554                         size_wanted = size;
3555                         if (error == KERN_SUCCESS) {
3556                                 if (residual == size) {
3557                                         /*
3558                                          * If a read operation returns no error
3559                                          * and no data moved, we turn it into
3560                                          * an error, assuming we're reading at
3561                                          * or beyond EOF.
3562                                          * Fall through and error the entire
3563                                          * range.
3564                                          */
3565                                         error = KERN_FAILURE;
3566                                         *vsmap_ptr = write_vsmap;
3567                                         if(!(VSM_ISCLR(*vsmap_ptr))) {
3568                                         /* unmap the new backing store object */
3569                                         ps_clunmap(vs, offset, size);
3570                                         }
3571                                         *vsmap_ptr = original_read_vsmap;
3572                                         VSM_CLR(write_vsmap);
3573                                         continue;
3574                                 } else {
3575                                         /*
3576                                          * Otherwise, we have partial read.
3577                                          * This is also considered an error
3578                                          * for the purposes of cluster transfer
3579                                          */
3580                                         error = KERN_FAILURE;
3581                                         *vsmap_ptr = write_vsmap;
3582                                         if(!(VSM_ISCLR(*vsmap_ptr))) {
3583                                         /* unmap the new backing store object */
3584                                         ps_clunmap(vs, offset, size);
3585                                         }
3586                                         *vsmap_ptr = original_read_vsmap;
3587                                         VSM_CLR(write_vsmap);
3588                                         continue;
3589                                 }
3590                         }
3591
3592                 }
3593                 cnt -= size;
3594                 offset += size;
3595
3596         } /* END while (cnt && (error == 0)) */
3597         if(!VSM_ISCLR(write_vsmap))
3598                 *vsmap_ptr = write_vsmap;
3599
3600         return error;
3601 }
3602
3603 kern_return_t
3604 default_pager_add_file(MACH_PORT_FACE backing_store,
3605         int             *vp,
3606         int             record_size,
3607         long            size)
3608 {
3609         backing_store_t         bs;
3610         paging_segment_t        ps;
3611         int                     i;
3612         int                     error;
3613
3614         if ((bs = backing_store_lookup(backing_store))
3615             == BACKING_STORE_NULL)
3616                 return KERN_INVALID_ARGUMENT;
3617
3618         PSL_LOCK();
3619         for (i = 0; i <= paging_segment_max; i++) {
3620                 ps = paging_segments[i];
3621                 if (ps == PAGING_SEGMENT_NULL)
3622                         continue;
3623                 if (ps->ps_segtype != PS_FILE)
3624                         continue;
3625
3626                 /*
3627                  * Check for overlap on same device.
3628                  */
3629                 if (ps->ps_vnode == (struct vnode *)vp) {
3630                         PSL_UNLOCK();
3631                         BS_UNLOCK(bs);
3632                         return KERN_INVALID_ARGUMENT;
3633                 }
3634         }
3635         PSL_UNLOCK();
3636
3637         /*
3638          * Set up the paging segment
3639          */
3640         ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
3641         if (ps == PAGING_SEGMENT_NULL) {
3642                 BS_UNLOCK(bs);
3643                 return KERN_RESOURCE_SHORTAGE;
3644         }
3645
3646         ps->ps_segtype = PS_FILE;
3647         ps->ps_vnode = (struct vnode *)vp;
3648         ps->ps_offset = 0;
3649         ps->ps_record_shift = local_log2(vm_page_size / record_size);
3650         ps->ps_recnum = size;
3651         ps->ps_pgnum = size >> ps->ps_record_shift;
3652
3653         ps->ps_pgcount = ps->ps_pgnum;
3654         ps->ps_clshift = local_log2(bs->bs_clsize);
3655         ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
3656         ps->ps_hint = 0;
3657
3658         PS_LOCK_INIT(ps);
3659         ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
3660         if (!ps->ps_bmap) {
3661                 kfree((vm_offset_t)ps, sizeof *ps);
3662                 BS_UNLOCK(bs);
3663                 return KERN_RESOURCE_SHORTAGE;
3664         }
3665         for (i = 0; i < ps->ps_ncls; i++) {
3666                 clrbit(ps->ps_bmap, i);
3667         }
3668
3669         ps->ps_going_away = FALSE;
3670         ps->ps_bs = bs;
3671
3672         if ((error = ps_enter(ps)) != 0) {
3673                 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
3674                 kfree((vm_offset_t)ps, sizeof *ps);
3675                 BS_UNLOCK(bs);
3676                 return KERN_RESOURCE_SHORTAGE;
3677         }
3678
3679         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
3680         bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
3681         PSL_LOCK();
3682         dp_pages_free += ps->ps_pgcount;
3683         PSL_UNLOCK();
3684
3685         BS_UNLOCK(bs);
3686
3687         bs_more_space(ps->ps_clcount);
3688
3689         DEBUG(DEBUG_BS_INTERNAL,
3690               ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3691                device, offset, size, record_size,
3692                ps->ps_record_shift, ps->ps_pgnum));
3693
3694         return KERN_SUCCESS;
3695 }
3696
3697
3698
3699 kern_return_t
3700 ps_read_file(
3701         paging_segment_t        ps,
3702         upl_t                   upl,
3703         vm_offset_t             upl_offset,
3704         vm_offset_t             offset,
3705         unsigned int            size,
3706         unsigned int            *residualp,
3707         int                     flags)
3708 {
3709         vm_object_offset_t      f_offset;
3710         int                     error = 0;
3711         int                     result;
3712
3713
3714         clustered_reads[atop_32(size)]++;
3715
3716         f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3717
3718         /* for transfer case we need to pass uploffset and flags */
3719         error = vnode_pagein(ps->ps_vnode,
3720                                    upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
3721
3722         /* The vnode_pagein semantic is somewhat at odds with the existing   */
3723         /* device_read semantic.  Partial reads are not experienced at this  */
3724         /* level.  It is up to the bit map code and cluster read code to     */
3725         /* check that requested data locations are actually backed, and the  */
3726         /* pagein code to either read all of the requested data or return an */
3727         /* error. */
3728
3729         if (error)
3730                 result = KERN_FAILURE;
3731         else {
3732                 *residualp = 0;
3733                 result = KERN_SUCCESS;
3734         }
3735         return result;
3736 }
3737
3738 kern_return_t
3739 ps_write_file(
3740         paging_segment_t        ps,
3741         upl_t                   upl,
3742         vm_offset_t             upl_offset,
3743         vm_offset_t             offset,
3744         unsigned int            size,
3745         int                     flags)
3746 {
3747         vm_object_offset_t      f_offset;
3748         kern_return_t           result;
3749
3750         int             error = 0;
3751
3752         clustered_writes[atop_32(size)]++;
3753         f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3754
3755         if (vnode_pageout(ps->ps_vnode,
3756                                 upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
3757                 result = KERN_FAILURE;
3758         else
3759                 result = KERN_SUCCESS;
3760
3761         return result;
3762 }
3763
3764 kern_return_t
3765 default_pager_triggers(MACH_PORT_FACE default_pager,
3766         int             hi_wat,
3767         int             lo_wat,
3768         int             flags,
3769         MACH_PORT_FACE  trigger_port)
3770 {
3771         MACH_PORT_FACE release;
3772         kern_return_t kr;
3773
3774         PSL_LOCK();
3775         if (flags == HI_WAT_ALERT) {
3776                 release = min_pages_trigger_port;
3777                 min_pages_trigger_port = trigger_port;
3778                 minimum_pages_remaining = hi_wat/vm_page_size;
3779                 bs_low = FALSE;
3780                 kr = KERN_SUCCESS;
3781         } else if (flags ==  LO_WAT_ALERT) {
3782                 release = max_pages_trigger_port;
3783                 max_pages_trigger_port = trigger_port;
3784                 maximum_pages_free = lo_wat/vm_page_size;
3785                 kr = KERN_SUCCESS;
3786         } else {
3787                 release = trigger_port;
3788                 kr =  KERN_INVALID_ARGUMENT;
3789         }
3790         PSL_UNLOCK();
3791
3792         if (IP_VALID(release))
3793                 ipc_port_release_send(release);
3794
3795         return kr;
3796 }
3797
3798 /*
3799  * Monitor the amount of available backing store vs. the amount of
3800  * required backing store, notify a listener (if present) when
3801  * backing store may safely be removed.
3802  *
3803  * We attempt to avoid the situation where backing store is
3804  * discarded en masse, as this can lead to thrashing as the
3805  * backing store is compacted.
3806  */
3807
3808 #define PF_INTERVAL     3       /* time between free level checks */
3809 #define PF_LATENCY      10      /* number of intervals before release */
3810
3811 static int dp_pages_free_low_count = 0;
3812
3813 void
3814 default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t p2)
3815 {
3816         unsigned long long      average;
3817         ipc_port_t              trigger;
3818         uint64_t                deadline;
3819
3820         /*
3821          * We determine whether it will be safe to release some
3822          * backing store by watching the free page level.  If
3823          * it remains below the maximum_pages_free threshold for
3824          * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3825          * then we deem it safe.
3826          *
3827          * Note that this establishes a maximum rate at which backing
3828          * store will be released, as each notification (currently)
3829          * only results in a single backing store object being
3830          * released.
3831          */
3832         if (dp_pages_free > maximum_pages_free) {
3833                 dp_pages_free_low_count++;
3834         } else {
3835                 dp_pages_free_low_count = 0;
3836         }
3837
3838         /* decide whether to send notification */
3839         trigger = IP_NULL;
3840         if (max_pages_trigger_port &&
3841             (backing_store_release_trigger_disable == 0) &&
3842             (dp_pages_free_low_count > PF_LATENCY)) {
3843                 trigger = max_pages_trigger_port;
3844                 max_pages_trigger_port = NULL;
3845         }
3846
3847         /* send notification */
3848         if (trigger != IP_NULL) {
3849                 VSL_LOCK();
3850                 if(backing_store_release_trigger_disable != 0) {
3851                         assert_wait((event_t)
3852                                     &backing_store_release_trigger_disable,
3853                                     THREAD_UNINT);
3854                         VSL_UNLOCK();
3855                         thread_block(THREAD_CONTINUE_NULL);
3856                 } else {
3857                         VSL_UNLOCK();
3858                 }
3859                 default_pager_space_alert(trigger, LO_WAT_ALERT);
3860                 ipc_port_release_send(trigger);
3861                 dp_pages_free_low_count = 0;
3862         }
3863
3864         clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
3865         thread_call_func_delayed(default_pager_backing_store_monitor, NULL, deadline);
3866 }