osfmk/default_pager/dp_backing_store.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*
  26  * @OSF_COPYRIGHT@
  27  */
  28 /*
  29  * Mach Operating System
  30  * Copyright (c) 1991,1990,1989 Carnegie Mellon University
  31  * All Rights Reserved.
  32  *
  33  * Permission to use, copy, modify and distribute this software and its
  34  * documentation is hereby granted, provided that both the copyright
  35  * notice and this permission notice appear in all copies of the
  36  * software, derivative works or modified versions, and any portions
  37  * thereof, and that both notices appear in supporting documentation.
  38  *
  39  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
  40  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
  41  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
  42  *
  43  * Carnegie Mellon requests users of this software to return to
  44  *
  45  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
  46  *  School of Computer Science
  47  *  Carnegie Mellon University
  48  *  Pittsburgh PA 15213-3890
  49  *
  50  * any improvements or extensions that they make and grant Carnegie Mellon
  51  * the rights to redistribute these changes.
  52  */
  53
  54 /*
  55  *      Default Pager.
  56  *              Paging File Management.
  57  */
  58
  59 #include <mach/memory_object_control.h>
  60 #include <mach/memory_object_server.h>
  61 #include "default_pager_internal.h"
  62 #include <default_pager/default_pager_alerts.h>
  63 #include <ipc/ipc_port.h>
  64 #include <ipc/ipc_space.h>
  65 #include <kern/queue.h>
  66 #include <kern/counters.h>
  67 #include <kern/sched_prim.h>
  68 #include <vm/vm_kern.h>
  69 #include <vm/vm_pageout.h>
  70 /* CDY CDY */
  71 #include <vm/vm_map.h>
  72
  73 /*
  74  * ALLOC_STRIDE... the maximum number of bytes allocated from
  75  * a swap file before moving on to the next swap file... if
  76  * all swap files reside on a single disk, this value should
  77  * be very large (this is the default assumption)... if the
  78  * swap files are spread across multiple disks, than this value
  79  * should be small (128 * 1024)...
  80  *
  81  * This should be determined dynamically in the future
  82  */
  83
  84 #define ALLOC_STRIDE  (1024 * 1024 * 1024)
  85 int physical_transfer_cluster_count = 0;
  86
  87 #define VM_SUPER_CLUSTER        0x40000
  88 #define VM_SUPER_PAGES          64
  89
  90 /*
  91  * 0 means no shift to pages, so == 1 page/cluster. 1 would mean
  92  * 2 pages/cluster, 2 means 4 pages/cluster, and so on.
  93  */
  94 #define VSTRUCT_DEF_CLSHIFT     2
  95 int vstruct_def_clshift = VSTRUCT_DEF_CLSHIFT;
  96 int default_pager_clsize = 0;
  97
  98 /* statistics */
  99 unsigned int clustered_writes[VM_SUPER_PAGES+1];
 100 unsigned int clustered_reads[VM_SUPER_PAGES+1];
 101
 102 /*
 103  * Globals used for asynchronous paging operations:
 104  *      vs_async_list:  head of list of to-be-completed I/O ops
 105  *      async_num_queued: number of pages completed, but not yet
 106  *              processed by async thread.
 107  *      async_requests_out: number of pages of requests not completed.
 108  */
 109
 110 #if 0
 111 struct vs_async *vs_async_list;
 112 int     async_num_queued;
 113 int     async_requests_out;
 114 #endif
 115
 116
 117 #define VS_ASYNC_REUSE 1
 118 struct vs_async *vs_async_free_list;
 119
 120 mutex_t default_pager_async_lock;       /* Protects globals above */
 121
 122
 123 int vs_alloc_async_failed = 0;                  /* statistics */
 124 int vs_alloc_async_count = 0;                   /* statistics */
 125 struct vs_async *vs_alloc_async(void);          /* forward */
 126 void vs_free_async(struct vs_async *vsa);       /* forward */
 127
 128
 129 #define VS_ALLOC_ASYNC()        vs_alloc_async()
 130 #define VS_FREE_ASYNC(vsa)      vs_free_async(vsa)
 131
 132 #define VS_ASYNC_LOCK()         mutex_lock(&default_pager_async_lock)
 133 #define VS_ASYNC_UNLOCK()       mutex_unlock(&default_pager_async_lock)
 134 #define VS_ASYNC_LOCK_INIT()    mutex_init(&default_pager_async_lock,  \
 135                                                 ETAP_IO_DEV_PAGEH)
 136 #define VS_ASYNC_LOCK_ADDR()    (&default_pager_async_lock)
 137 /*
 138  *  Paging Space Hysteresis triggers and the target notification port
 139  *
 140  */
 141
 142 unsigned int    minimum_pages_remaining = 0;
 143 unsigned int    maximum_pages_free = 0;
 144 ipc_port_t      min_pages_trigger_port = NULL;
 145 ipc_port_t      max_pages_trigger_port = NULL;
 146
 147 boolean_t       bs_low = FALSE;
 148 int             backing_store_release_trigger_disable = 0;
 149
 150
 151
 152 /*
 153  * Object sizes are rounded up to the next power of 2,
 154  * unless they are bigger than a given maximum size.
 155  */
 156 vm_size_t       max_doubled_size = 4 * 1024 * 1024;     /* 4 meg */
 157
 158 /*
 159  * List of all backing store and segments.
 160  */
 161 struct backing_store_list_head backing_store_list;
 162 paging_segment_t        paging_segments[MAX_NUM_PAGING_SEGMENTS];
 163 mutex_t                 paging_segments_lock;
 164 int                     paging_segment_max = 0;
 165 int                     paging_segment_count = 0;
 166 int ps_select_array[BS_MAXPRI+1] = { -1,-1,-1,-1,-1 };
 167
 168
 169 /*
 170  * Total pages free in system
 171  * This differs from clusters committed/avail which is a measure of the
 172  * over commitment of paging segments to backing store.  An idea which is
 173  * likely to be deprecated.
 174  */
 175 unsigned  int   dp_pages_free = 0;
 176 unsigned  int   cluster_transfer_minimum = 100;
 177
 178 kern_return_t ps_write_file(paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, int);      /* forward */
 179 kern_return_t ps_read_file (paging_segment_t, upl_t, vm_offset_t, vm_offset_t, unsigned int, unsigned int *, int);      /* forward */
 180
 181
 182 default_pager_thread_t *
 183 get_read_buffer()
 184 {
 185         int     i;
 186
 187         DPT_LOCK(dpt_lock);
 188         while(TRUE) {
 189                 for (i=0; i<default_pager_internal_count; i++) {
 190                         if(dpt_array[i]->checked_out == FALSE) {
 191                           dpt_array[i]->checked_out = TRUE;
 192                           DPT_UNLOCK(dpt_lock);
 193                           return  dpt_array[i];
 194                         }
 195                 }
 196                 DPT_SLEEP(dpt_lock, &dpt_array, THREAD_UNINT);
 197         }
 198 }
 199
 200 void
 201 bs_initialize(void)
 202 {
 203         int i;
 204
 205         /*
 206          * List of all backing store.
 207          */
 208         BSL_LOCK_INIT();
 209         queue_init(&backing_store_list.bsl_queue);
 210         PSL_LOCK_INIT();
 211
 212         VS_ASYNC_LOCK_INIT();
 213 #if     VS_ASYNC_REUSE
 214         vs_async_free_list = NULL;
 215 #endif  /* VS_ASYNC_REUSE */
 216
 217         for (i = 0; i < VM_SUPER_PAGES + 1; i++) {
 218                 clustered_writes[i] = 0;
 219                 clustered_reads[i] = 0;
 220         }
 221
 222 }
 223
 224 /*
 225  * When things do not quite workout...
 226  */
 227 void bs_no_paging_space(boolean_t);     /* forward */
 228
 229 void
 230 bs_no_paging_space(
 231         boolean_t out_of_memory)
 232 {
 233
 234         if (out_of_memory)
 235                 dprintf(("*** OUT OF MEMORY ***\n"));
 236         panic("bs_no_paging_space: NOT ENOUGH PAGING SPACE");
 237 }
 238
 239 void bs_more_space(int);        /* forward */
 240 void bs_commit(int);            /* forward */
 241
 242 boolean_t       user_warned = FALSE;
 243 unsigned int    clusters_committed = 0;
 244 unsigned int    clusters_available = 0;
 245 unsigned int    clusters_committed_peak = 0;
 246
 247 void
 248 bs_more_space(
 249         int     nclusters)
 250 {
 251         BSL_LOCK();
 252         /*
 253          * Account for new paging space.
 254          */
 255         clusters_available += nclusters;
 256
 257         if (clusters_available >= clusters_committed) {
 258                 if (verbose && user_warned) {
 259                         printf("%s%s - %d excess clusters now.\n",
 260                                my_name,
 261                                "paging space is OK now",
 262                                clusters_available - clusters_committed);
 263                         user_warned = FALSE;
 264                         clusters_committed_peak = 0;
 265                 }
 266         } else {
 267                 if (verbose && user_warned) {
 268                         printf("%s%s - still short of %d clusters.\n",
 269                                my_name,
 270                                "WARNING: paging space over-committed",
 271                                clusters_committed - clusters_available);
 272                         clusters_committed_peak -= nclusters;
 273                 }
 274         }
 275         BSL_UNLOCK();
 276
 277         return;
 278 }
 279
 280 void
 281 bs_commit(
 282         int     nclusters)
 283 {
 284         BSL_LOCK();
 285         clusters_committed += nclusters;
 286         if (clusters_committed > clusters_available) {
 287                 if (verbose && !user_warned) {
 288                         user_warned = TRUE;
 289                         printf("%s%s - short of %d clusters.\n",
 290                                my_name,
 291                                "WARNING: paging space over-committed",
 292                                clusters_committed - clusters_available);
 293                 }
 294                 if (clusters_committed > clusters_committed_peak) {
 295                         clusters_committed_peak = clusters_committed;
 296                 }
 297         } else {
 298                 if (verbose && user_warned) {
 299                         printf("%s%s - was short of up to %d clusters.\n",
 300                                my_name,
 301                                "paging space is OK now",
 302                                clusters_committed_peak - clusters_available);
 303                         user_warned = FALSE;
 304                         clusters_committed_peak = 0;
 305                 }
 306         }
 307         BSL_UNLOCK();
 308
 309         return;
 310 }
 311
 312 int default_pager_info_verbose = 1;
 313
 314 void
 315 bs_global_info(
 316         vm_size_t       *totalp,
 317         vm_size_t       *freep)
 318 {
 319         vm_size_t               pages_total, pages_free;
 320         paging_segment_t        ps;
 321         int                     i;
 322
 323         PSL_LOCK();
 324         pages_total = pages_free = 0;
 325         for (i = 0; i <= paging_segment_max; i++) {
 326                 ps = paging_segments[i];
 327                 if (ps == PAGING_SEGMENT_NULL)
 328                         continue;
 329
 330                 /*
 331                  * no need to lock: by the time this data
 332                  * gets back to any remote requestor it
 333                  * will be obsolete anyways
 334                  */
 335                 pages_total += ps->ps_pgnum;
 336                 pages_free += ps->ps_clcount << ps->ps_clshift;
 337                 DEBUG(DEBUG_BS_INTERNAL,
 338                       ("segment #%d: %d total, %d free\n",
 339                        i, ps->ps_pgnum, ps->ps_clcount << ps->ps_clshift));
 340         }
 341         *totalp = pages_total;
 342         *freep = pages_free;
 343         if (verbose && user_warned && default_pager_info_verbose) {
 344                 if (clusters_available < clusters_committed) {
 345                         printf("%s %d clusters committed, %d available.\n",
 346                                my_name,
 347                                clusters_committed,
 348                                clusters_available);
 349                 }
 350         }
 351         PSL_UNLOCK();
 352 }
 353
 354 backing_store_t backing_store_alloc(void);      /* forward */
 355
 356 backing_store_t
 357 backing_store_alloc(void)
 358 {
 359         backing_store_t bs;
 360
 361         bs = (backing_store_t) kalloc(sizeof (struct backing_store));
 362         if (bs == BACKING_STORE_NULL)
 363                 panic("backing_store_alloc: no memory");
 364
 365         BS_LOCK_INIT(bs);
 366         bs->bs_port = MACH_PORT_NULL;
 367         bs->bs_priority = 0;
 368         bs->bs_clsize = 0;
 369         bs->bs_pages_total = 0;
 370         bs->bs_pages_in = 0;
 371         bs->bs_pages_in_fail = 0;
 372         bs->bs_pages_out = 0;
 373         bs->bs_pages_out_fail = 0;
 374
 375         return bs;
 376 }
 377
 378 backing_store_t backing_store_lookup(MACH_PORT_FACE);   /* forward */
 379
 380 /* Even in both the component space and external versions of this pager, */
 381 /* backing_store_lookup will be called from tasks in the application space */
 382 backing_store_t
 383 backing_store_lookup(
 384         MACH_PORT_FACE port)
 385 {
 386         backing_store_t bs;
 387
 388 /*
 389         port is currently backed with a vs structure in the alias field
 390         we could create an ISBS alias and a port_is_bs call but frankly
 391         I see no reason for the test, the bs->port == port check below
 392         will work properly on junk entries.
 393
 394         if ((port == MACH_PORT_NULL) || port_is_vs(port))
 395 */
 396         if ((port == MACH_PORT_NULL))
 397                 return BACKING_STORE_NULL;
 398
 399         BSL_LOCK();
 400         queue_iterate(&backing_store_list.bsl_queue, bs, backing_store_t,
 401                       bs_links) {
 402                 BS_LOCK(bs);
 403                 if (bs->bs_port == port) {
 404                         BSL_UNLOCK();
 405                         /* Success, return it locked. */
 406                         return bs;
 407                 }
 408                 BS_UNLOCK(bs);
 409         }
 410         BSL_UNLOCK();
 411         return BACKING_STORE_NULL;
 412 }
 413
 414 void backing_store_add(backing_store_t);        /* forward */
 415
 416 void
 417 backing_store_add(
 418         backing_store_t bs)
 419 {
 420         MACH_PORT_FACE          port = bs->bs_port;
 421         MACH_PORT_FACE          pset = default_pager_default_set;
 422         kern_return_t           kr = KERN_SUCCESS;
 423
 424         if (kr != KERN_SUCCESS)
 425                 panic("backing_store_add: add to set");
 426
 427 }
 428
 429 /*
 430  * Set up default page shift, but only if not already
 431  * set and argument is within range.
 432  */
 433 boolean_t
 434 bs_set_default_clsize(unsigned int npages)
 435 {
 436         switch(npages){
 437             case 1:
 438             case 2:
 439             case 4:
 440             case 8:
 441                 if (default_pager_clsize == 0)  /* if not yet set */
 442                         vstruct_def_clshift = local_log2(npages);
 443                 return(TRUE);
 444         }
 445         return(FALSE);
 446 }
 447
 448 int bs_get_global_clsize(int clsize);   /* forward */
 449
 450 int
 451 bs_get_global_clsize(
 452         int     clsize)
 453 {
 454         int                     i;
 455         memory_object_default_t dmm;
 456         kern_return_t           kr;
 457
 458         /*
 459          * Only allow setting of cluster size once. If called
 460          * with no cluster size (default), we use the compiled-in default
 461          * for the duration. The same cluster size is used for all
 462          * paging segments.
 463          */
 464         if (default_pager_clsize == 0) {
 465                 /*
 466                  * Keep cluster size in bit shift because it's quicker
 467                  * arithmetic, and easier to keep at a power of 2.
 468                  */
 469                 if (clsize != NO_CLSIZE) {
 470                         for (i = 0; (1 << i) < clsize; i++);
 471                         if (i > MAX_CLUSTER_SHIFT)
 472                                 i = MAX_CLUSTER_SHIFT;
 473                         vstruct_def_clshift = i;
 474                 }
 475                 default_pager_clsize = (1 << vstruct_def_clshift);
 476
 477                 /*
 478                  * Let the user know the new (and definitive) cluster size.
 479                  */
 480                 if (verbose)
 481                         printf("%scluster size = %d page%s\n",
 482                                 my_name, default_pager_clsize,
 483                                 (default_pager_clsize == 1) ? "" : "s");
 484
 485                 /*
 486                  * Let the kernel know too, in case it hasn't used the
 487                  * default value provided in main() yet.
 488                  */
 489                 dmm = default_pager_object;
 490                 clsize = default_pager_clsize * vm_page_size;   /* in bytes */
 491                 kr = host_default_memory_manager(host_priv_self(),
 492                                                  &dmm,
 493                                                  clsize);
 494                 memory_object_default_deallocate(dmm);
 495
 496                 if (kr != KERN_SUCCESS) {
 497                    panic("bs_get_global_cl_size:host_default_memory_manager");
 498                 }
 499                 if (dmm != default_pager_object) {
 500                   panic("bs_get_global_cl_size:there is another default pager");
 501                 }
 502         }
 503         ASSERT(default_pager_clsize > 0 &&
 504                (default_pager_clsize & (default_pager_clsize - 1)) == 0);
 505
 506         return default_pager_clsize;
 507 }
 508
 509 kern_return_t
 510 default_pager_backing_store_create(
 511         memory_object_default_t pager,
 512         int                     priority,
 513         int                     clsize,         /* in bytes */
 514         MACH_PORT_FACE          *backing_store)
 515 {
 516         backing_store_t bs;
 517         MACH_PORT_FACE  port;
 518         kern_return_t   kr;
 519         struct vstruct_alias *alias_struct;
 520
 521         if (pager != default_pager_object)
 522                 return KERN_INVALID_ARGUMENT;
 523
 524         bs = backing_store_alloc();
 525         port = ipc_port_alloc_kernel();
 526         ipc_port_make_send(port);
 527         assert (port != IP_NULL);
 528
 529         DEBUG(DEBUG_BS_EXTERNAL,
 530               ("priority=%d clsize=%d bs_port=0x%x\n",
 531                priority, clsize, (int) backing_store));
 532
 533         alias_struct = (struct vstruct_alias *)
 534                                 kalloc(sizeof (struct vstruct_alias));
 535         if(alias_struct != NULL) {
 536                 alias_struct->vs = (struct vstruct *)bs;
 537                 alias_struct->name = ISVS;
 538                 port->alias = (int) alias_struct;
 539         }
 540         else {
 541                 ipc_port_dealloc_kernel((MACH_PORT_FACE)(port));
 542                 kfree((vm_offset_t)bs, sizeof (struct backing_store));
 543                 return KERN_RESOURCE_SHORTAGE;
 544         }
 545
 546         bs->bs_port = port;
 547         if (priority == DEFAULT_PAGER_BACKING_STORE_MAXPRI)
 548                 priority = BS_MAXPRI;
 549         else if (priority == BS_NOPRI)
 550                 priority = BS_MAXPRI;
 551         else
 552                 priority = BS_MINPRI;
 553         bs->bs_priority = priority;
 554
 555         bs->bs_clsize = bs_get_global_clsize(atop_32(clsize));
 556
 557         BSL_LOCK();
 558         queue_enter(&backing_store_list.bsl_queue, bs, backing_store_t,
 559                     bs_links);
 560         BSL_UNLOCK();
 561
 562         backing_store_add(bs);
 563
 564         *backing_store = port;
 565         return KERN_SUCCESS;
 566 }
 567
 568 kern_return_t
 569 default_pager_backing_store_info(
 570         MACH_PORT_FACE          backing_store,
 571         backing_store_flavor_t  flavour,
 572         backing_store_info_t    info,
 573         mach_msg_type_number_t  *size)
 574 {
 575         backing_store_t                 bs;
 576         backing_store_basic_info_t      basic;
 577         int                             i;
 578         paging_segment_t                ps;
 579
 580         if (flavour != BACKING_STORE_BASIC_INFO ||
 581             *size < BACKING_STORE_BASIC_INFO_COUNT)
 582                 return KERN_INVALID_ARGUMENT;
 583
 584         basic = (backing_store_basic_info_t)info;
 585         *size = BACKING_STORE_BASIC_INFO_COUNT;
 586
 587         VSTATS_LOCK(&global_stats.gs_lock);
 588         basic->pageout_calls    = global_stats.gs_pageout_calls;
 589         basic->pagein_calls     = global_stats.gs_pagein_calls;
 590         basic->pages_in         = global_stats.gs_pages_in;
 591         basic->pages_out        = global_stats.gs_pages_out;
 592         basic->pages_unavail    = global_stats.gs_pages_unavail;
 593         basic->pages_init       = global_stats.gs_pages_init;
 594         basic->pages_init_writes= global_stats.gs_pages_init_writes;
 595         VSTATS_UNLOCK(&global_stats.gs_lock);
 596
 597         if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
 598                 return KERN_INVALID_ARGUMENT;
 599
 600         basic->bs_pages_total   = bs->bs_pages_total;
 601         PSL_LOCK();
 602         bs->bs_pages_free = 0;
 603         for (i = 0; i <= paging_segment_max; i++) {
 604                 ps = paging_segments[i];
 605                 if (ps != PAGING_SEGMENT_NULL && ps->ps_bs == bs) {
 606                         PS_LOCK(ps);
 607                         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
 608                         PS_UNLOCK(ps);
 609                 }
 610         }
 611         PSL_UNLOCK();
 612         basic->bs_pages_free    = bs->bs_pages_free;
 613         basic->bs_pages_in      = bs->bs_pages_in;
 614         basic->bs_pages_in_fail = bs->bs_pages_in_fail;
 615         basic->bs_pages_out     = bs->bs_pages_out;
 616         basic->bs_pages_out_fail= bs->bs_pages_out_fail;
 617
 618         basic->bs_priority      = bs->bs_priority;
 619         basic->bs_clsize        = ptoa_32(bs->bs_clsize);       /* in bytes */
 620
 621         BS_UNLOCK(bs);
 622
 623         return KERN_SUCCESS;
 624 }
 625
 626 int ps_delete(paging_segment_t);        /* forward */
 627
 628 int
 629 ps_delete(
 630         paging_segment_t ps)
 631 {
 632         vstruct_t       vs;
 633         kern_return_t   error = KERN_SUCCESS;
 634         int             vs_count;
 635
 636         VSL_LOCK();             /* get the lock on the list of vs's      */
 637
 638         /* The lock relationship and sequence is farily complicated      */
 639         /* this code looks at a live list, locking and unlocking the list */
 640         /* as it traverses it.  It depends on the locking behavior of    */
 641         /* default_pager_no_senders.  no_senders always locks the vstruct */
 642         /* targeted for removal before locking the vstruct list.  However */
 643         /* it will remove that member of the list without locking its    */
 644         /* neighbors.  We can be sure when we hold a lock on a vstruct   */
 645         /* it cannot be removed from the list but we must hold the list  */
 646         /* lock to be sure that its pointers to its neighbors are valid. */
 647         /* Also, we can hold off destruction of a vstruct when the list  */
 648         /* lock and the vs locks are not being held by bumping the       */
 649         /* vs_async_pending count.      */
 650
 651
 652         while(backing_store_release_trigger_disable != 0) {
 653                 VSL_SLEEP(&backing_store_release_trigger_disable, THREAD_UNINT);
 654         }
 655
 656         /* we will choose instead to hold a send right */
 657         vs_count = vstruct_list.vsl_count;
 658         vs = (vstruct_t) queue_first((queue_entry_t)&(vstruct_list.vsl_queue));
 659         if(vs == (vstruct_t)&vstruct_list)  {
 660                 VSL_UNLOCK();
 661                 return KERN_SUCCESS;
 662         }
 663         VS_LOCK(vs);
 664         vs_async_wait(vs);  /* wait for any pending async writes */
 665         if ((vs_count != 0) && (vs != NULL))
 666                 vs->vs_async_pending += 1;  /* hold parties calling  */
 667                                             /* vs_async_wait */
 668         VS_UNLOCK(vs);
 669         VSL_UNLOCK();
 670         while((vs_count != 0) && (vs != NULL)) {
 671                 /* We take the count of AMO's before beginning the         */
 672                 /* transfer of of the target segment.                      */
 673                 /* We are guaranteed that the target segment cannot get    */
 674                 /* more users.  We also know that queue entries are        */
 675                 /* made at the back of the list.  If some of the entries   */
 676                 /* we would check disappear while we are traversing the    */
 677                 /* list then we will either check new entries which        */
 678                 /* do not have any backing store in the target segment     */
 679                 /* or re-check old entries.  This might not be optimal     */
 680                 /* but it will always be correct. The alternative is to    */
 681                 /* take a snapshot of the list.                            */
 682                 vstruct_t       next_vs;
 683
 684                 if(dp_pages_free < cluster_transfer_minimum)
 685                         error = KERN_FAILURE;
 686                 else {
 687                         vm_object_t     transfer_object;
 688                         int             count;
 689                         upl_t           upl;
 690
 691                         transfer_object = vm_object_allocate(VM_SUPER_CLUSTER);
 692                         count = 0;
 693                         error = vm_object_upl_request(transfer_object,
 694                                 (vm_object_offset_t)0, VM_SUPER_CLUSTER,
 695                                 &upl, NULL, &count,
 696                                 UPL_NO_SYNC | UPL_CLEAN_IN_PLACE
 697                                             | UPL_SET_INTERNAL);
 698                         if(error == KERN_SUCCESS) {
 699                                 error = ps_vstruct_transfer_from_segment(
 700                                                         vs, ps, upl);
 701                                 upl_commit(upl, NULL);
 702                                 upl_deallocate(upl);
 703                         } else {
 704                                 error = KERN_FAILURE;
 705                         }
 706                         vm_object_deallocate(transfer_object);
 707                 }
 708                 if(error) {
 709                         VS_LOCK(vs);
 710                         vs->vs_async_pending -= 1;  /* release vs_async_wait */
 711                         if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 712                                 vs->vs_waiting_async = FALSE;
 713                                 VS_UNLOCK(vs);
 714                                 thread_wakeup(&vs->vs_async_pending);
 715                         } else {
 716                                 VS_UNLOCK(vs);
 717                         }
 718                         return KERN_FAILURE;
 719                 }
 720
 721                 VSL_LOCK();
 722
 723                 while(backing_store_release_trigger_disable != 0) {
 724                         VSL_SLEEP(&backing_store_release_trigger_disable,
 725                                   THREAD_UNINT);
 726                 }
 727
 728                 next_vs = (vstruct_t) queue_next(&(vs->vs_links));
 729                 if((next_vs != (vstruct_t)&vstruct_list) &&
 730                                 (vs != next_vs) && (vs_count != 1)) {
 731                         VS_LOCK(next_vs);
 732                         vs_async_wait(next_vs);  /* wait for any  */
 733                                                  /* pending async writes */
 734                         next_vs->vs_async_pending += 1; /* hold parties  */
 735                                                 /* calling vs_async_wait */
 736                         VS_UNLOCK(next_vs);
 737                 }
 738                 VSL_UNLOCK();
 739                 VS_LOCK(vs);
 740                 vs->vs_async_pending -= 1;
 741                 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
 742                         vs->vs_waiting_async = FALSE;
 743                         VS_UNLOCK(vs);
 744                         thread_wakeup(&vs->vs_async_pending);
 745                 } else {
 746                         VS_UNLOCK(vs);
 747                 }
 748                 if((vs == next_vs) || (next_vs == (vstruct_t)&vstruct_list))
 749                         vs = NULL;
 750                 else
 751                         vs = next_vs;
 752                 vs_count--;
 753         }
 754         return KERN_SUCCESS;
 755 }
 756
 757
 758 kern_return_t
 759 default_pager_backing_store_delete(
 760         MACH_PORT_FACE backing_store)
 761 {
 762         backing_store_t         bs;
 763         int                     i;
 764         paging_segment_t        ps;
 765         int                     error;
 766         int                     interim_pages_removed = 0;
 767         kern_return_t           kr;
 768
 769         if ((bs = backing_store_lookup(backing_store)) == BACKING_STORE_NULL)
 770                 return KERN_INVALID_ARGUMENT;
 771
 772 #if 0
 773         /* not implemented */
 774         BS_UNLOCK(bs);
 775         return KERN_FAILURE;
 776 #endif
 777
 778     restart:
 779         PSL_LOCK();
 780         error = KERN_SUCCESS;
 781         for (i = 0; i <= paging_segment_max; i++) {
 782                 ps = paging_segments[i];
 783                 if (ps != PAGING_SEGMENT_NULL &&
 784                     ps->ps_bs == bs &&
 785                     ! ps->ps_going_away) {
 786                         PS_LOCK(ps);
 787                         /* disable access to this segment */
 788                         ps->ps_going_away = TRUE;
 789                         PS_UNLOCK(ps);
 790                         /*
 791                          * The "ps" segment is "off-line" now,
 792                          * we can try and delete it...
 793                          */
 794                         if(dp_pages_free < (cluster_transfer_minimum
 795                                                         + ps->ps_pgcount)) {
 796                                 error = KERN_FAILURE;
 797                                 PSL_UNLOCK();
 798                         }
 799                         else {
 800                                 /* remove all pages associated with the  */
 801                                 /* segment from the list of free pages   */
 802                                 /* when transfer is through, all target  */
 803                                 /* segment pages will appear to be free  */
 804
 805                                 dp_pages_free -=  ps->ps_pgcount;
 806                                 interim_pages_removed += ps->ps_pgcount;
 807                                 PSL_UNLOCK();
 808                                 error = ps_delete(ps);
 809                         }
 810                         if (error != KERN_SUCCESS) {
 811                                 /*
 812                                  * We couldn't delete the segment,
 813                                  * probably because there's not enough
 814                                  * virtual memory left.
 815                                  * Re-enable all the segments.
 816                                  */
 817                                 PSL_LOCK();
 818                                 break;
 819                         }
 820                         goto restart;
 821                 }
 822         }
 823
 824         if (error != KERN_SUCCESS) {
 825                 for (i = 0; i <= paging_segment_max; i++) {
 826                         ps = paging_segments[i];
 827                         if (ps != PAGING_SEGMENT_NULL &&
 828                             ps->ps_bs == bs &&
 829                             ps->ps_going_away) {
 830                                 PS_LOCK(ps);
 831                                 /* re-enable access to this segment */
 832                                 ps->ps_going_away = FALSE;
 833                                 PS_UNLOCK(ps);
 834                         }
 835                 }
 836                 dp_pages_free += interim_pages_removed;
 837                 PSL_UNLOCK();
 838                 BS_UNLOCK(bs);
 839                 return error;
 840         }
 841
 842         for (i = 0; i <= paging_segment_max; i++) {
 843                 ps = paging_segments[i];
 844                 if (ps != PAGING_SEGMENT_NULL &&
 845                     ps->ps_bs == bs) {
 846                         if(ps->ps_going_away) {
 847                                 paging_segments[i] = PAGING_SEGMENT_NULL;
 848                                 paging_segment_count--;
 849                                 PS_LOCK(ps);
 850                                 kfree((vm_offset_t)ps->ps_bmap,
 851                                                 RMAPSIZE(ps->ps_ncls));
 852                                 kfree((vm_offset_t)ps, sizeof *ps);
 853                         }
 854                 }
 855         }
 856
 857         /* Scan the entire ps array separately to make certain we find the */
 858         /* proper paging_segment_max                                       */
 859         for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
 860                 if(paging_segments[i] != PAGING_SEGMENT_NULL)
 861                    paging_segment_max = i;
 862         }
 863
 864         PSL_UNLOCK();
 865
 866         /*
 867          * All the segments have been deleted.
 868          * We can remove the backing store.
 869          */
 870
 871         /*
 872          * Disable lookups of this backing store.
 873          */
 874         if((void *)bs->bs_port->alias != NULL)
 875                 kfree((vm_offset_t) bs->bs_port->alias,
 876                                 sizeof (struct vstruct_alias));
 877         ipc_port_dealloc_kernel((ipc_port_t) (bs->bs_port));
 878         bs->bs_port = MACH_PORT_NULL;
 879         BS_UNLOCK(bs);
 880
 881         /*
 882          * Remove backing store from backing_store list.
 883          */
 884         BSL_LOCK();
 885         queue_remove(&backing_store_list.bsl_queue, bs, backing_store_t,
 886                      bs_links);
 887         BSL_UNLOCK();
 888
 889         /*
 890          * Free the backing store structure.
 891          */
 892         kfree((vm_offset_t)bs, sizeof *bs);
 893
 894         return KERN_SUCCESS;
 895 }
 896
 897 int     ps_enter(paging_segment_t);     /* forward */
 898
 899 int
 900 ps_enter(
 901         paging_segment_t ps)
 902 {
 903         int i;
 904
 905         PSL_LOCK();
 906
 907         for (i = 0; i < MAX_NUM_PAGING_SEGMENTS; i++) {
 908                 if (paging_segments[i] == PAGING_SEGMENT_NULL)
 909                         break;
 910         }
 911
 912         if (i < MAX_NUM_PAGING_SEGMENTS) {
 913                 paging_segments[i] = ps;
 914                 if (i > paging_segment_max)
 915                         paging_segment_max = i;
 916                 paging_segment_count++;
 917                 if ((ps_select_array[ps->ps_bs->bs_priority] == BS_NOPRI) ||
 918                         (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI))
 919                         ps_select_array[ps->ps_bs->bs_priority] = 0;
 920                 i = 0;
 921         } else {
 922                 PSL_UNLOCK();
 923                 return KERN_RESOURCE_SHORTAGE;
 924         }
 925
 926         PSL_UNLOCK();
 927         return i;
 928 }
 929
 930 #ifdef DEVICE_PAGING
 931 kern_return_t
 932 default_pager_add_segment(
 933         MACH_PORT_FACE  backing_store,
 934         MACH_PORT_FACE  device,
 935         recnum_t        offset,
 936         recnum_t        count,
 937         int             record_size)
 938 {
 939         backing_store_t         bs;
 940         paging_segment_t        ps;
 941         int                     i;
 942         int                     error;
 943
 944         if ((bs = backing_store_lookup(backing_store))
 945             == BACKING_STORE_NULL)
 946                 return KERN_INVALID_ARGUMENT;
 947
 948         PSL_LOCK();
 949         for (i = 0; i <= paging_segment_max; i++) {
 950                 ps = paging_segments[i];
 951                 if (ps == PAGING_SEGMENT_NULL)
 952                         continue;
 953
 954                 /*
 955                  * Check for overlap on same device.
 956                  */
 957                 if (!(ps->ps_device != device
 958                       || offset >= ps->ps_offset + ps->ps_recnum
 959                       || offset + count <= ps->ps_offset)) {
 960                         PSL_UNLOCK();
 961                         BS_UNLOCK(bs);
 962                         return KERN_INVALID_ARGUMENT;
 963                 }
 964         }
 965         PSL_UNLOCK();
 966
 967         /*
 968          * Set up the paging segment
 969          */
 970         ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
 971         if (ps == PAGING_SEGMENT_NULL) {
 972                 BS_UNLOCK(bs);
 973                 return KERN_RESOURCE_SHORTAGE;
 974         }
 975
 976         ps->ps_segtype = PS_PARTITION;
 977         ps->ps_device = device;
 978         ps->ps_offset = offset;
 979         ps->ps_record_shift = local_log2(vm_page_size / record_size);
 980         ps->ps_recnum = count;
 981         ps->ps_pgnum = count >> ps->ps_record_shift;
 982
 983         ps->ps_pgcount = ps->ps_pgnum;
 984         ps->ps_clshift = local_log2(bs->bs_clsize);
 985         ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
 986         ps->ps_hint = 0;
 987
 988         PS_LOCK_INIT(ps);
 989         ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
 990         if (!ps->ps_bmap) {
 991                 kfree((vm_offset_t)ps, sizeof *ps);
 992                 BS_UNLOCK(bs);
 993                 return KERN_RESOURCE_SHORTAGE;
 994         }
 995         for (i = 0; i < ps->ps_ncls; i++) {
 996                 clrbit(ps->ps_bmap, i);
 997         }
 998
 999         ps->ps_going_away = FALSE;
1000         ps->ps_bs = bs;
1001
1002         if ((error = ps_enter(ps)) != 0) {
1003                 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
1004                 kfree((vm_offset_t)ps, sizeof *ps);
1005                 BS_UNLOCK(bs);
1006                 return KERN_RESOURCE_SHORTAGE;
1007         }
1008
1009         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
1010         bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
1011         BS_UNLOCK(bs);
1012
1013         PSL_LOCK();
1014         dp_pages_free += ps->ps_pgcount;
1015         PSL_UNLOCK();
1016
1017         bs_more_space(ps->ps_clcount);
1018
1019         DEBUG(DEBUG_BS_INTERNAL,
1020               ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
1021                device, offset, count, record_size,
1022                ps->ps_record_shift, ps->ps_pgnum));
1023
1024         return KERN_SUCCESS;
1025 }
1026
1027 boolean_t
1028 bs_add_device(
1029         char            *dev_name,
1030         MACH_PORT_FACE  master)
1031 {
1032         security_token_t        null_security_token = {
1033                 { 0, 0 }
1034         };
1035         MACH_PORT_FACE  device;
1036         int             info[DEV_GET_SIZE_COUNT];
1037         mach_msg_type_number_t info_count;
1038         MACH_PORT_FACE  bs = MACH_PORT_NULL;
1039         unsigned int    rec_size;
1040         recnum_t        count;
1041         int             clsize;
1042         MACH_PORT_FACE  reply_port;
1043
1044         if (ds_device_open_sync(master, MACH_PORT_NULL, D_READ | D_WRITE,
1045                         null_security_token, dev_name, &device))
1046                 return FALSE;
1047
1048         info_count = DEV_GET_SIZE_COUNT;
1049         if (!ds_device_get_status(device, DEV_GET_SIZE, info, &info_count)) {
1050                 rec_size = info[DEV_GET_SIZE_RECORD_SIZE];
1051                 count = info[DEV_GET_SIZE_DEVICE_SIZE] /  rec_size;
1052                 clsize = bs_get_global_clsize(0);
1053                 if (!default_pager_backing_store_create(
1054                                         default_pager_object,
1055                                         DEFAULT_PAGER_BACKING_STORE_MAXPRI,
1056                                         (clsize * vm_page_size),
1057                                         &bs)) {
1058                         if (!default_pager_add_segment(bs, device,
1059                                                        0, count, rec_size)) {
1060                                 return TRUE;
1061                         }
1062                         ipc_port_release_receive(bs);
1063                 }
1064         }
1065
1066         ipc_port_release_send(device);
1067         return FALSE;
1068 }
1069 #endif /* DEVICE_PAGING */
1070
1071 #if     VS_ASYNC_REUSE
1072
1073 struct vs_async *
1074 vs_alloc_async(void)
1075 {
1076         struct vs_async *vsa;
1077         MACH_PORT_FACE  reply_port;
1078         kern_return_t   kr;
1079
1080         VS_ASYNC_LOCK();
1081         if (vs_async_free_list == NULL) {
1082                 VS_ASYNC_UNLOCK();
1083                 vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1084                 if (vsa != NULL) {
1085                         /*
1086                          * Try allocating a reply port named after the
1087                          * address of the vs_async structure.
1088                          */
1089                         struct vstruct_alias    *alias_struct;
1090
1091                         reply_port = ipc_port_alloc_kernel();
1092                         alias_struct = (struct vstruct_alias *)
1093                                 kalloc(sizeof (struct vstruct_alias));
1094                         if(alias_struct != NULL) {
1095                                 alias_struct->vs = (struct vstruct *)vsa;
1096                                 alias_struct->name = ISVS;
1097                                 reply_port->alias = (int) alias_struct;
1098                                 vsa->reply_port = reply_port;
1099                                 vs_alloc_async_count++;
1100                         }
1101                         else {
1102                                 vs_alloc_async_failed++;
1103                                 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1104                                                                 (reply_port));
1105                                 kfree((vm_offset_t)vsa,
1106                                                 sizeof (struct vs_async));
1107                                 vsa = NULL;
1108                         }
1109                 }
1110         } else {
1111                 vsa = vs_async_free_list;
1112                 vs_async_free_list = vs_async_free_list->vsa_next;
1113                 VS_ASYNC_UNLOCK();
1114         }
1115
1116         return vsa;
1117 }
1118
1119 void
1120 vs_free_async(
1121         struct vs_async *vsa)
1122 {
1123         VS_ASYNC_LOCK();
1124         vsa->vsa_next = vs_async_free_list;
1125         vs_async_free_list = vsa;
1126         VS_ASYNC_UNLOCK();
1127 }
1128
1129 #else   /* VS_ASYNC_REUSE */
1130
1131 struct vs_async *
1132 vs_alloc_async(void)
1133 {
1134         struct vs_async *vsa;
1135         MACH_PORT_FACE  reply_port;
1136         kern_return_t   kr;
1137
1138         vsa = (struct vs_async *) kalloc(sizeof (struct vs_async));
1139         if (vsa != NULL) {
1140                 /*
1141                  * Try allocating a reply port named after the
1142                  * address of the vs_async structure.
1143                  */
1144                         reply_port = ipc_port_alloc_kernel();
1145                         alias_struct = (vstruct_alias *)
1146                                 kalloc(sizeof (struct vstruct_alias));
1147                         if(alias_struct != NULL) {
1148                                 alias_struct->vs = reply_port;
1149                                 alias_struct->name = ISVS;
1150                                 reply_port->alias = (int) vsa;
1151                                 vsa->reply_port = reply_port;
1152                                 vs_alloc_async_count++;
1153                         }
1154                         else {
1155                                 vs_alloc_async_failed++;
1156                                 ipc_port_dealloc_kernel((MACH_PORT_FACE)
1157                                                                 (reply_port));
1158                                 kfree((vm_offset_t) vsa,
1159                                                 sizeof (struct vs_async));
1160                                 vsa = NULL;
1161                         }
1162         }
1163
1164         return vsa;
1165 }
1166
1167 void
1168 vs_free_async(
1169         struct vs_async *vsa)
1170 {
1171         MACH_PORT_FACE  reply_port;
1172         kern_return_t   kr;
1173
1174         reply_port = vsa->reply_port;
1175         kfree((vm_offset_t) reply_port->alias, sizeof (struct vstuct_alias));
1176         kfree((vm_offset_t) vsa, sizeof (struct vs_async));
1177         ipc_port_dealloc_kernel((MACH_PORT_FACE) (reply_port));
1178 #if 0
1179         VS_ASYNC_LOCK();
1180         vs_alloc_async_count--;
1181         VS_ASYNC_UNLOCK();
1182 #endif
1183 }
1184
1185 #endif  /* VS_ASYNC_REUSE */
1186
1187 zone_t  vstruct_zone;
1188
1189 vstruct_t
1190 ps_vstruct_create(
1191         vm_size_t size)
1192 {
1193         vstruct_t       vs;
1194         int             i;
1195
1196         vs = (vstruct_t) zalloc(vstruct_zone);
1197         if (vs == VSTRUCT_NULL) {
1198                 return VSTRUCT_NULL;
1199         }
1200
1201         VS_LOCK_INIT(vs);
1202
1203         /*
1204          * The following fields will be provided later.
1205          */
1206         vs->vs_mem_obj = NULL;
1207         vs->vs_control = MEMORY_OBJECT_CONTROL_NULL;
1208         vs->vs_references = 1;
1209         vs->vs_seqno = 0;
1210
1211 #ifdef MACH_KERNEL
1212         vs->vs_waiting_seqno = FALSE;
1213         vs->vs_waiting_read = FALSE;
1214         vs->vs_waiting_write = FALSE;
1215         vs->vs_waiting_async = FALSE;
1216 #else
1217         mutex_init(&vs->vs_waiting_seqno, ETAP_DPAGE_VSSEQNO);
1218         mutex_init(&vs->vs_waiting_read, ETAP_DPAGE_VSREAD);
1219         mutex_init(&vs->vs_waiting_write, ETAP_DPAGE_VSWRITE);
1220         mutex_init(&vs->vs_waiting_refs, ETAP_DPAGE_VSREFS);
1221         mutex_init(&vs->vs_waiting_async, ETAP_DPAGE_VSASYNC);
1222 #endif
1223
1224         vs->vs_readers = 0;
1225         vs->vs_writers = 0;
1226
1227         vs->vs_errors = 0;
1228
1229         vs->vs_clshift = local_log2(bs_get_global_clsize(0));
1230         vs->vs_size = ((atop_32(round_page_32(size)) - 1) >> vs->vs_clshift) + 1;
1231         vs->vs_async_pending = 0;
1232
1233         /*
1234          * Allocate the pmap, either CLMAP_SIZE or INDIRECT_CLMAP_SIZE
1235          * depending on the size of the memory object.
1236          */
1237         if (INDIRECT_CLMAP(vs->vs_size)) {
1238                 vs->vs_imap = (struct vs_map **)
1239                         kalloc(INDIRECT_CLMAP_SIZE(vs->vs_size));
1240                 vs->vs_indirect = TRUE;
1241         } else {
1242                 vs->vs_dmap = (struct vs_map *)
1243                         kalloc(CLMAP_SIZE(vs->vs_size));
1244                 vs->vs_indirect = FALSE;
1245         }
1246         vs->vs_xfer_pending = FALSE;
1247         DEBUG(DEBUG_VS_INTERNAL,
1248               ("map=0x%x, indirect=%d\n", (int) vs->vs_dmap, vs->vs_indirect));
1249
1250         /*
1251          * Check to see that we got the space.
1252          */
1253         if (!vs->vs_dmap) {
1254                 kfree((vm_offset_t)vs, sizeof *vs);
1255                 return VSTRUCT_NULL;
1256         }
1257
1258         /*
1259          * Zero the indirect pointers, or clear the direct pointers.
1260          */
1261         if (vs->vs_indirect)
1262                 memset(vs->vs_imap, 0,
1263                        INDIRECT_CLMAP_SIZE(vs->vs_size));
1264         else
1265                 for (i = 0; i < vs->vs_size; i++)
1266                         VSM_CLR(vs->vs_dmap[i]);
1267
1268         VS_MAP_LOCK_INIT(vs);
1269
1270         bs_commit(vs->vs_size);
1271
1272         return vs;
1273 }
1274
1275 paging_segment_t ps_select_segment(int, int *); /* forward */
1276
1277 paging_segment_t
1278 ps_select_segment(
1279         int     shift,
1280         int     *psindex)
1281 {
1282         paging_segment_t        ps;
1283         int                     i;
1284         int                     j;
1285
1286         /*
1287          * Optimize case where there's only one segment.
1288          * paging_segment_max will index the one and only segment.
1289          */
1290
1291         PSL_LOCK();
1292         if (paging_segment_count == 1) {
1293                 paging_segment_t lps;   /* used to avoid extra PS_UNLOCK */
1294                 ipc_port_t trigger = IP_NULL;
1295
1296                 ps = paging_segments[paging_segment_max];
1297                 *psindex = paging_segment_max;
1298                 PS_LOCK(ps);
1299                 if (ps->ps_going_away) {
1300                         /* this segment is being turned off */
1301                         lps = PAGING_SEGMENT_NULL;
1302                 } else {
1303                         ASSERT(ps->ps_clshift >= shift);
1304                         if (ps->ps_clcount) {
1305                                 ps->ps_clcount--;
1306                                 dp_pages_free -=  1 << ps->ps_clshift;
1307                                 if(min_pages_trigger_port &&
1308                                   (dp_pages_free < minimum_pages_remaining)) {
1309                                         trigger = min_pages_trigger_port;
1310                                         min_pages_trigger_port = NULL;
1311                                         bs_low = TRUE;
1312                                 }
1313                                 lps = ps;
1314                         } else
1315                                 lps = PAGING_SEGMENT_NULL;
1316                 }
1317                 PS_UNLOCK(ps);
1318                 PSL_UNLOCK();
1319
1320                 if (trigger != IP_NULL) {
1321                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1322                         ipc_port_release_send(trigger);
1323                 }
1324                 return lps;
1325         }
1326
1327         if (paging_segment_count == 0) {
1328                 PSL_UNLOCK();
1329                 return PAGING_SEGMENT_NULL;
1330         }
1331
1332         for (i = BS_MAXPRI;
1333              i >= BS_MINPRI; i--) {
1334                 int start_index;
1335
1336                 if ((ps_select_array[i] == BS_NOPRI) ||
1337                                 (ps_select_array[i] == BS_FULLPRI))
1338                         continue;
1339                 start_index = ps_select_array[i];
1340
1341                 if(!(paging_segments[start_index])) {
1342                         j = start_index+1;
1343                         physical_transfer_cluster_count = 0;
1344                 }
1345                 else if ((physical_transfer_cluster_count+1) == (ALLOC_STRIDE >>
1346                                 (((paging_segments[start_index])->ps_clshift)
1347                                 + vm_page_shift))) {
1348                         physical_transfer_cluster_count = 0;
1349                         j = start_index + 1;
1350                 } else {
1351                         physical_transfer_cluster_count+=1;
1352                         j = start_index;
1353                         if(start_index == 0)
1354                                 start_index = paging_segment_max;
1355                         else
1356                                 start_index = start_index - 1;
1357                 }
1358
1359                 while (1) {
1360                         if (j > paging_segment_max)
1361                                 j = 0;
1362                         if ((ps = paging_segments[j]) &&
1363                             (ps->ps_bs->bs_priority == i)) {
1364                                 /*
1365                                  * Force the ps cluster size to be
1366                                  * >= that of the vstruct.
1367                                  */
1368                                 PS_LOCK(ps);
1369                                 if (ps->ps_going_away) {
1370                                         /* this segment is being turned off */
1371                                 } else if ((ps->ps_clcount) &&
1372                                            (ps->ps_clshift >= shift)) {
1373                                         ipc_port_t trigger = IP_NULL;
1374
1375                                         ps->ps_clcount--;
1376                                         dp_pages_free -=  1 << ps->ps_clshift;
1377                                         if(min_pages_trigger_port &&
1378                                                 (dp_pages_free <
1379                                                 minimum_pages_remaining)) {
1380                                                 trigger = min_pages_trigger_port;
1381                                                 min_pages_trigger_port = NULL;
1382                                         }
1383                                         PS_UNLOCK(ps);
1384                                         /*
1385                                          * found one, quit looking.
1386                                          */
1387                                         ps_select_array[i] = j;
1388                                         PSL_UNLOCK();
1389
1390                                         if (trigger != IP_NULL) {
1391                                                 default_pager_space_alert(
1392                                                         trigger,
1393                                                         HI_WAT_ALERT);
1394                                                 ipc_port_release_send(trigger);
1395                                         }
1396                                         *psindex = j;
1397                                         return ps;
1398                                 }
1399                                 PS_UNLOCK(ps);
1400                         }
1401                         if (j == start_index) {
1402                                 /*
1403                                  * none at this priority -- mark it full
1404                                  */
1405                                 ps_select_array[i] = BS_FULLPRI;
1406                                 break;
1407                         }
1408                         j++;
1409                 }
1410         }
1411         PSL_UNLOCK();
1412         return PAGING_SEGMENT_NULL;
1413 }
1414
1415 vm_offset_t ps_allocate_cluster(vstruct_t, int *, paging_segment_t); /*forward*/
1416
1417 vm_offset_t
1418 ps_allocate_cluster(
1419         vstruct_t               vs,
1420         int                     *psindex,
1421         paging_segment_t        use_ps)
1422 {
1423         int                     byte_num;
1424         int                     bit_num = 0;
1425         paging_segment_t        ps;
1426         vm_offset_t             cluster;
1427         ipc_port_t              trigger = IP_NULL;
1428
1429         /*
1430          * Find best paging segment.
1431          * ps_select_segment will decrement cluster count on ps.
1432          * Must pass cluster shift to find the most appropriate segment.
1433          */
1434         /* NOTE:  The addition of paging segment delete capability threatened
1435          * to seriously complicate the treatment of paging segments in this
1436          * module and the ones that call it (notably ps_clmap), because of the
1437          * difficulty in assuring that the paging segment would continue to
1438          * exist between being unlocked and locked.   This was
1439          * avoided because all calls to this module are based in either
1440          * dp_memory_object calls which rely on the vs lock, or by
1441          * the transfer function which is part of the segment delete path.
1442          * The transfer function which is part of paging segment delete is
1443          * protected from multiple callers by the backing store lock.
1444          * The paging segment delete function treats mappings to a paging
1445          * segment on a vstruct by vstruct basis, locking the vstruct targeted
1446          * while data is transferred to the remaining segments.  This is in
1447          * line with the view that incomplete or in-transition mappings between
1448          * data, a vstruct, and backing store are protected by the vs lock.
1449          * This and the ordering of the paging segment "going_away" bit setting
1450          * protects us.
1451          */
1452         if (use_ps != PAGING_SEGMENT_NULL) {
1453                 ps = use_ps;
1454                 PSL_LOCK();
1455                 PS_LOCK(ps);
1456
1457                 ASSERT(ps->ps_clcount != 0);
1458
1459                 ps->ps_clcount--;
1460                 dp_pages_free -=  1 << ps->ps_clshift;
1461                 if(min_pages_trigger_port &&
1462                                 (dp_pages_free < minimum_pages_remaining)) {
1463                         trigger = min_pages_trigger_port;
1464                         min_pages_trigger_port = NULL;
1465                 }
1466                 PSL_UNLOCK();
1467                 PS_UNLOCK(ps);
1468                 if (trigger != IP_NULL) {
1469                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1470                         ipc_port_release_send(trigger);
1471                 }
1472
1473         } else if ((ps = ps_select_segment(vs->vs_clshift, psindex)) ==
1474                    PAGING_SEGMENT_NULL) {
1475 #if 0
1476                 bs_no_paging_space(TRUE);
1477 #endif
1478 #if 0
1479                 if (verbose)
1480 #endif
1481                         dprintf(("no space in available paging segments; "
1482                                  "swapon suggested\n"));
1483                 /* the count got off maybe, reset to zero */
1484                 PSL_LOCK();
1485                 dp_pages_free = 0;
1486                 if(min_pages_trigger_port) {
1487                         trigger = min_pages_trigger_port;
1488                         min_pages_trigger_port = NULL;
1489                         bs_low = TRUE;
1490                 }
1491                 PSL_UNLOCK();
1492                 if (trigger != IP_NULL) {
1493                         default_pager_space_alert(trigger, HI_WAT_ALERT);
1494                         ipc_port_release_send(trigger);
1495                 }
1496                 return (vm_offset_t) -1;
1497         }
1498
1499         /*
1500          * Look for an available cluster.  At the end of the loop,
1501          * byte_num is the byte offset and bit_num is the bit offset of the
1502          * first zero bit in the paging segment bitmap.
1503          */
1504         PS_LOCK(ps);
1505         byte_num = ps->ps_hint;
1506         for (; byte_num < howmany(ps->ps_ncls, NBBY); byte_num++) {
1507                 if (*(ps->ps_bmap + byte_num) != BYTEMASK) {
1508                         for (bit_num = 0; bit_num < NBBY; bit_num++) {
1509                                 if (isclr((ps->ps_bmap + byte_num), bit_num))
1510                                         break;
1511                         }
1512                         ASSERT(bit_num != NBBY);
1513                         break;
1514                 }
1515         }
1516         ps->ps_hint = byte_num;
1517         cluster = (byte_num*NBBY) + bit_num;
1518
1519         /* Space was reserved, so this must be true */
1520         ASSERT(cluster < ps->ps_ncls);
1521
1522         setbit(ps->ps_bmap, cluster);
1523         PS_UNLOCK(ps);
1524
1525         return cluster;
1526 }
1527
1528 void ps_deallocate_cluster(paging_segment_t, vm_offset_t);      /* forward */
1529
1530 void
1531 ps_deallocate_cluster(
1532         paging_segment_t        ps,
1533         vm_offset_t             cluster)
1534 {
1535
1536         if (cluster >= (vm_offset_t) ps->ps_ncls)
1537                 panic("ps_deallocate_cluster: Invalid cluster number");
1538
1539         /*
1540          * Lock the paging segment, clear the cluster's bitmap and increment the
1541          * number of free cluster.
1542          */
1543         PSL_LOCK();
1544         PS_LOCK(ps);
1545         clrbit(ps->ps_bmap, cluster);
1546         ++ps->ps_clcount;
1547         dp_pages_free +=  1 << ps->ps_clshift;
1548         PSL_UNLOCK();
1549
1550         /*
1551          * Move the hint down to the freed cluster if it is
1552          * less than the current hint.
1553          */
1554         if ((cluster/NBBY) < ps->ps_hint) {
1555                 ps->ps_hint = (cluster/NBBY);
1556         }
1557
1558         PS_UNLOCK(ps);
1559
1560         /*
1561          * If we're freeing space on a full priority, reset the array.
1562          */
1563         PSL_LOCK();
1564         if (ps_select_array[ps->ps_bs->bs_priority] == BS_FULLPRI)
1565                 ps_select_array[ps->ps_bs->bs_priority] = 0;
1566         PSL_UNLOCK();
1567
1568         return;
1569 }
1570
1571 void ps_dealloc_vsmap(struct vs_map *, vm_size_t);      /* forward */
1572
1573 void
1574 ps_dealloc_vsmap(
1575         struct vs_map   *vsmap,
1576         vm_size_t       size)
1577 {
1578         int i;
1579         for (i = 0; i < size; i++)
1580                 if (!VSM_ISCLR(vsmap[i]) && !VSM_ISERR(vsmap[i]))
1581                         ps_deallocate_cluster(VSM_PS(vsmap[i]),
1582                                               VSM_CLOFF(vsmap[i]));
1583 }
1584
1585 void
1586 ps_vstruct_dealloc(
1587         vstruct_t vs)
1588 {
1589         int     i;
1590         spl_t   s;
1591
1592         VS_MAP_LOCK(vs);
1593
1594         /*
1595          * If this is an indirect structure, then we walk through the valid
1596          * (non-zero) indirect pointers and deallocate the clusters
1597          * associated with each used map entry (via ps_dealloc_vsmap).
1598          * When all of the clusters in an indirect block have been
1599          * freed, we deallocate the block.  When all of the indirect
1600          * blocks have been deallocated we deallocate the memory
1601          * holding the indirect pointers.
1602          */
1603         if (vs->vs_indirect) {
1604                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
1605                         if (vs->vs_imap[i] != NULL) {
1606                                 ps_dealloc_vsmap(vs->vs_imap[i], CLMAP_ENTRIES);
1607                                 kfree((vm_offset_t)vs->vs_imap[i],
1608                                                         CLMAP_THRESHOLD);
1609                         }
1610                 }
1611                 kfree((vm_offset_t)vs->vs_imap,
1612                                         INDIRECT_CLMAP_SIZE(vs->vs_size));
1613         } else {
1614                 /*
1615                  * Direct map.  Free used clusters, then memory.
1616                  */
1617                 ps_dealloc_vsmap(vs->vs_dmap, vs->vs_size);
1618                 kfree((vm_offset_t)vs->vs_dmap, CLMAP_SIZE(vs->vs_size));
1619         }
1620         VS_MAP_UNLOCK(vs);
1621
1622         bs_commit(- vs->vs_size);
1623
1624         zfree(vstruct_zone, (vm_offset_t)vs);
1625 }
1626
1627 int ps_map_extend(vstruct_t, int);      /* forward */
1628
1629 int ps_map_extend(
1630         vstruct_t       vs,
1631         int             new_size)
1632 {
1633         struct vs_map   **new_imap;
1634         struct vs_map   *new_dmap = NULL;
1635         int             newdsize;
1636         int             i;
1637         void            *old_map = NULL;
1638         int             old_map_size = 0;
1639
1640         if (vs->vs_size >= new_size) {
1641                 /*
1642                  * Someone has already done the work.
1643                  */
1644                 return 0;
1645         }
1646
1647         /*
1648          * If the new size extends into the indirect range, then we have one
1649          * of two cases: we are going from indirect to indirect, or we are
1650          * going from direct to indirect.  If we are going from indirect to
1651          * indirect, then it is possible that the new size will fit in the old
1652          * indirect map.  If this is the case, then just reset the size of the
1653          * vstruct map and we are done.  If the new size will not
1654          * fit into the old indirect map, then we have to allocate a new
1655          * indirect map and copy the old map pointers into this new map.
1656          *
1657          * If we are going from direct to indirect, then we have to allocate a
1658          * new indirect map and copy the old direct pages into the first
1659          * indirect page of the new map.
1660          * NOTE: allocating memory here is dangerous, as we're in the
1661          * pageout path.
1662          */
1663         if (INDIRECT_CLMAP(new_size)) {
1664                 int new_map_size = INDIRECT_CLMAP_SIZE(new_size);
1665
1666                 /*
1667                  * Get a new indirect map and zero it.
1668                  */
1669                 old_map_size = INDIRECT_CLMAP_SIZE(vs->vs_size);
1670                 if (vs->vs_indirect &&
1671                     (new_map_size == old_map_size)) {
1672                         bs_commit(new_size - vs->vs_size);
1673                         vs->vs_size = new_size;
1674                         return 0;
1675                 }
1676
1677                 new_imap = (struct vs_map **)kalloc(new_map_size);
1678                 if (new_imap == NULL) {
1679                         return -1;
1680                 }
1681                 memset(new_imap, 0, new_map_size);
1682
1683                 if (vs->vs_indirect) {
1684                         /* Copy old entries into new map */
1685                         memcpy(new_imap, vs->vs_imap, old_map_size);
1686                         /* Arrange to free the old map */
1687                         old_map = (void *) vs->vs_imap;
1688                         newdsize = 0;
1689                 } else {        /* Old map was a direct map */
1690                         /* Allocate an indirect page */
1691                         if ((new_imap[0] = (struct vs_map *)
1692                              kalloc(CLMAP_THRESHOLD)) == NULL) {
1693                                 kfree((vm_offset_t)new_imap, new_map_size);
1694                                 return -1;
1695                         }
1696                         new_dmap = new_imap[0];
1697                         newdsize = CLMAP_ENTRIES;
1698                 }
1699         } else {
1700                 new_imap = NULL;
1701                 newdsize = new_size;
1702                 /*
1703                  * If the new map is a direct map, then the old map must
1704                  * also have been a direct map.  All we have to do is
1705                  * to allocate a new direct map, copy the old entries
1706                  * into it and free the old map.
1707                  */
1708                 if ((new_dmap = (struct vs_map *)
1709                      kalloc(CLMAP_SIZE(new_size))) == NULL) {
1710                         return -1;
1711                 }
1712         }
1713         if (newdsize) {
1714
1715                 /* Free the old map */
1716                 old_map = (void *) vs->vs_dmap;
1717                 old_map_size = CLMAP_SIZE(vs->vs_size);
1718
1719                 /* Copy info from the old map into the new map */
1720                 memcpy(new_dmap, vs->vs_dmap, old_map_size);
1721
1722                 /* Initialize the rest of the new map */
1723                 for (i = vs->vs_size; i < newdsize; i++)
1724                         VSM_CLR(new_dmap[i]);
1725         }
1726         if (new_imap) {
1727                 vs->vs_imap = new_imap;
1728                 vs->vs_indirect = TRUE;
1729         } else
1730                 vs->vs_dmap = new_dmap;
1731         bs_commit(new_size - vs->vs_size);
1732         vs->vs_size = new_size;
1733         if (old_map)
1734                 kfree((vm_offset_t)old_map, old_map_size);
1735         return 0;
1736 }
1737
1738 vm_offset_t
1739 ps_clmap(
1740         vstruct_t       vs,
1741         vm_offset_t     offset,
1742         struct clmap    *clmap,
1743         int             flag,
1744         vm_size_t       size,
1745         int             error)
1746 {
1747         vm_offset_t     cluster;        /* The cluster of offset.       */
1748         vm_offset_t     newcl;          /* The new cluster allocated.   */
1749         vm_offset_t     newoff;
1750         int             i;
1751         struct vs_map   *vsmap;
1752
1753         VS_MAP_LOCK(vs);
1754
1755         ASSERT(vs->vs_dmap);
1756         cluster = atop_32(offset) >> vs->vs_clshift;
1757
1758         /*
1759          * Initialize cluster error value
1760          */
1761         clmap->cl_error = 0;
1762
1763         /*
1764          * If the object has grown, extend the page map.
1765          */
1766         if (cluster >= vs->vs_size) {
1767                 if (flag == CL_FIND) {
1768                         /* Do not allocate if just doing a lookup */
1769                         VS_MAP_UNLOCK(vs);
1770                         return (vm_offset_t) -1;
1771                 }
1772                 if (ps_map_extend(vs, cluster + 1)) {
1773                         VS_MAP_UNLOCK(vs);
1774                         return (vm_offset_t) -1;
1775                 }
1776         }
1777
1778         /*
1779          * Look for the desired cluster.  If the map is indirect, then we
1780          * have a two level lookup.  First find the indirect block, then
1781          * find the actual cluster.  If the indirect block has not yet
1782          * been allocated, then do so.  If the cluster has not yet been
1783          * allocated, then do so.
1784          *
1785          * If any of the allocations fail, then return an error.
1786          * Don't allocate if just doing a lookup.
1787          */
1788         if (vs->vs_indirect) {
1789                 long    ind_block = cluster/CLMAP_ENTRIES;
1790
1791                 /* Is the indirect block allocated? */
1792                 vsmap = vs->vs_imap[ind_block];
1793                 if (vsmap == NULL) {
1794                         if (flag == CL_FIND) {
1795                                 VS_MAP_UNLOCK(vs);
1796                                 return (vm_offset_t) -1;
1797                         }
1798
1799                         /* Allocate the indirect block */
1800                         vsmap = (struct vs_map *) kalloc(CLMAP_THRESHOLD);
1801                         if (vsmap == NULL) {
1802                                 VS_MAP_UNLOCK(vs);
1803                                 return (vm_offset_t) -1;
1804                         }
1805                         /* Initialize the cluster offsets */
1806                         for (i = 0; i < CLMAP_ENTRIES; i++)
1807                                 VSM_CLR(vsmap[i]);
1808                         vs->vs_imap[ind_block] = vsmap;
1809                 }
1810         } else
1811                 vsmap = vs->vs_dmap;
1812
1813         ASSERT(vsmap);
1814         vsmap += cluster%CLMAP_ENTRIES;
1815
1816         /*
1817          * At this point, vsmap points to the struct vs_map desired.
1818          *
1819          * Look in the map for the cluster, if there was an error on a
1820          * previous write, flag it and return.  If it is not yet
1821          * allocated, then allocate it, if we're writing; if we're
1822          * doing a lookup and the cluster's not allocated, return error.
1823          */
1824         if (VSM_ISERR(*vsmap)) {
1825                 clmap->cl_error = VSM_GETERR(*vsmap);
1826                 VS_MAP_UNLOCK(vs);
1827                 return (vm_offset_t) -1;
1828         } else if (VSM_ISCLR(*vsmap)) {
1829                 int psindex;
1830
1831                 if (flag == CL_FIND) {
1832                         /*
1833                          * If there's an error and the entry is clear, then
1834                          * we've run out of swap space.  Record the error
1835                          * here and return.
1836                          */
1837                         if (error) {
1838                                 VSM_SETERR(*vsmap, error);
1839                         }
1840                         VS_MAP_UNLOCK(vs);
1841                         return (vm_offset_t) -1;
1842                 } else {
1843                         /*
1844                          * Attempt to allocate a cluster from the paging segment
1845                          */
1846                         newcl = ps_allocate_cluster(vs, &psindex,
1847                                                     PAGING_SEGMENT_NULL);
1848                         if (newcl == -1) {
1849                                 VS_MAP_UNLOCK(vs);
1850                                 return (vm_offset_t) -1;
1851                         }
1852                         VSM_CLR(*vsmap);
1853                         VSM_SETCLOFF(*vsmap, newcl);
1854                         VSM_SETPS(*vsmap, psindex);
1855                 }
1856         } else
1857                 newcl = VSM_CLOFF(*vsmap);
1858
1859         /*
1860          * Fill in pertinent fields of the clmap
1861          */
1862         clmap->cl_ps = VSM_PS(*vsmap);
1863         clmap->cl_numpages = VSCLSIZE(vs);
1864         clmap->cl_bmap.clb_map = (unsigned int) VSM_BMAP(*vsmap);
1865
1866         /*
1867          * Byte offset in paging segment is byte offset to cluster plus
1868          * byte offset within cluster.  It looks ugly, but should be
1869          * relatively quick.
1870          */
1871         ASSERT(trunc_page(offset) == offset);
1872         newcl = ptoa_32(newcl) << vs->vs_clshift;
1873         newoff = offset & ((1<<(vm_page_shift + vs->vs_clshift)) - 1);
1874         if (flag == CL_ALLOC) {
1875                 /*
1876                  * set bits in the allocation bitmap according to which
1877                  * pages were requested.  size is in bytes.
1878                  */
1879                 i = atop_32(newoff);
1880                 while ((size > 0) && (i < VSCLSIZE(vs))) {
1881                         VSM_SETALLOC(*vsmap, i);
1882                         i++;
1883                         size -= vm_page_size;
1884                 }
1885         }
1886         clmap->cl_alloc.clb_map = (unsigned int) VSM_ALLOC(*vsmap);
1887         if (newoff) {
1888                 /*
1889                  * Offset is not cluster aligned, so number of pages
1890                  * and bitmaps must be adjusted
1891                  */
1892                 clmap->cl_numpages -= atop_32(newoff);
1893                 CLMAP_SHIFT(clmap, vs);
1894                 CLMAP_SHIFTALLOC(clmap, vs);
1895         }
1896
1897         /*
1898          *
1899          * The setting of valid bits and handling of write errors
1900          * must be done here, while we hold the lock on the map.
1901          * It logically should be done in ps_vs_write_complete().
1902          * The size and error information has been passed from
1903          * ps_vs_write_complete().  If the size parameter is non-zero,
1904          * then there is work to be done.  If error is also non-zero,
1905          * then the error number is recorded in the cluster and the
1906          * entire cluster is in error.
1907          */
1908         if (size && flag == CL_FIND) {
1909                 vm_offset_t off = (vm_offset_t) 0;
1910
1911                 if (!error) {
1912                         for (i = VSCLSIZE(vs) - clmap->cl_numpages; size > 0;
1913                              i++) {
1914                                 VSM_SETPG(*vsmap, i);
1915                                 size -= vm_page_size;
1916                         }
1917                         ASSERT(i <= VSCLSIZE(vs));
1918                 } else {
1919                         BS_STAT(clmap->cl_ps->ps_bs,
1920                                 clmap->cl_ps->ps_bs->bs_pages_out_fail +=
1921                                         atop_32(size));
1922                         off = VSM_CLOFF(*vsmap);
1923                         VSM_SETERR(*vsmap, error);
1924                 }
1925                 /*
1926                  * Deallocate cluster if error, and no valid pages
1927                  * already present.
1928                  */
1929                 if (off != (vm_offset_t) 0)
1930                         ps_deallocate_cluster(clmap->cl_ps, off);
1931                 VS_MAP_UNLOCK(vs);
1932                 return (vm_offset_t) 0;
1933         } else
1934                 VS_MAP_UNLOCK(vs);
1935
1936         DEBUG(DEBUG_VS_INTERNAL,
1937               ("returning 0x%X,vs=0x%X,vsmap=0x%X,flag=%d\n",
1938                newcl+newoff, (int) vs, (int) vsmap, flag));
1939         DEBUG(DEBUG_VS_INTERNAL,
1940               ("        clmap->cl_ps=0x%X,cl_numpages=%d,clbmap=0x%x,cl_alloc=%x\n",
1941                (int) clmap->cl_ps, clmap->cl_numpages,
1942                (int) clmap->cl_bmap.clb_map, (int) clmap->cl_alloc.clb_map));
1943
1944         return (newcl + newoff);
1945 }
1946
1947 void ps_clunmap(vstruct_t, vm_offset_t, vm_size_t);     /* forward */
1948
1949 void
1950 ps_clunmap(
1951         vstruct_t       vs,
1952         vm_offset_t     offset,
1953         vm_size_t       length)
1954 {
1955         vm_offset_t             cluster; /* The cluster number of offset */
1956         struct vs_map           *vsmap;
1957
1958         VS_MAP_LOCK(vs);
1959
1960         /*
1961          * Loop through all clusters in this range, freeing paging segment
1962          * clusters and map entries as encountered.
1963          */
1964         while (length > 0) {
1965                 vm_offset_t     newoff;
1966                 int             i;
1967
1968                 cluster = atop_32(offset) >> vs->vs_clshift;
1969                 if (vs->vs_indirect)    /* indirect map */
1970                         vsmap = vs->vs_imap[cluster/CLMAP_ENTRIES];
1971                 else
1972                         vsmap = vs->vs_dmap;
1973                 if (vsmap == NULL) {
1974                         VS_MAP_UNLOCK(vs);
1975                         return;
1976                 }
1977                 vsmap += cluster%CLMAP_ENTRIES;
1978                 if (VSM_ISCLR(*vsmap)) {
1979                         length -= vm_page_size;
1980                         offset += vm_page_size;
1981                         continue;
1982                 }
1983                 /*
1984                  * We've got a valid mapping.  Clear it and deallocate
1985                  * paging segment cluster pages.
1986                  * Optimize for entire cluster cleraing.
1987                  */
1988                 if (newoff = (offset&((1<<(vm_page_shift+vs->vs_clshift))-1))) {
1989                         /*
1990                          * Not cluster aligned.
1991                          */
1992                         ASSERT(trunc_page(newoff) == newoff);
1993                         i = atop_32(newoff);
1994                 } else
1995                         i = 0;
1996                 while ((i < VSCLSIZE(vs)) && (length > 0)) {
1997                         VSM_CLRPG(*vsmap, i);
1998                         VSM_CLRALLOC(*vsmap, i);
1999                         length -= vm_page_size;
2000                         offset += vm_page_size;
2001                         i++;
2002                 }
2003
2004                 /*
2005                  * If map entry is empty, clear and deallocate cluster.
2006                  */
2007                 if (!VSM_ALLOC(*vsmap)) {
2008                         ps_deallocate_cluster(VSM_PS(*vsmap),
2009                                               VSM_CLOFF(*vsmap));
2010                         VSM_CLR(*vsmap);
2011                 }
2012         }
2013
2014         VS_MAP_UNLOCK(vs);
2015 }
2016
2017 void ps_vs_write_complete(vstruct_t, vm_offset_t, vm_size_t, int); /* forward */
2018
2019 void
2020 ps_vs_write_complete(
2021         vstruct_t       vs,
2022         vm_offset_t     offset,
2023         vm_size_t       size,
2024         int             error)
2025 {
2026         struct clmap    clmap;
2027
2028         /*
2029          * Get the struct vsmap for this cluster.
2030          * Use READ, even though it was written, because the
2031          * cluster MUST be present, unless there was an error
2032          * in the original ps_clmap (e.g. no space), in which
2033          * case, nothing happens.
2034          *
2035          * Must pass enough information to ps_clmap to allow it
2036          * to set the vs_map structure bitmap under lock.
2037          */
2038         (void) ps_clmap(vs, offset, &clmap, CL_FIND, size, error);
2039 }
2040
2041 void vs_cl_write_complete(vstruct_t, paging_segment_t, vm_offset_t, vm_offset_t, vm_size_t, boolean_t, int);    /* forward */
2042
2043 void
2044 vs_cl_write_complete(
2045         vstruct_t               vs,
2046         paging_segment_t        ps,
2047         vm_offset_t             offset,
2048         vm_offset_t             addr,
2049         vm_size_t               size,
2050         boolean_t               async,
2051         int                     error)
2052 {
2053         kern_return_t   kr;
2054
2055         if (error) {
2056                 /*
2057                  * For internal objects, the error is recorded on a
2058                  * per-cluster basis by ps_clmap() which is called
2059                  * by ps_vs_write_complete() below.
2060                  */
2061                 dprintf(("write failed error = 0x%x\n", error));
2062                 /* add upl_abort code here */
2063         } else
2064                 GSTAT(global_stats.gs_pages_out += atop_32(size));
2065         /*
2066          * Notify the vstruct mapping code, so it can do its accounting.
2067          */
2068         ps_vs_write_complete(vs, offset, size, error);
2069
2070         if (async) {
2071                 VS_LOCK(vs);
2072                 ASSERT(vs->vs_async_pending > 0);
2073                 vs->vs_async_pending -= size;
2074                 if (vs->vs_async_pending == 0 && vs->vs_waiting_async) {
2075                         vs->vs_waiting_async = FALSE;
2076                         VS_UNLOCK(vs);
2077                         /* mutex_unlock(&vs->vs_waiting_async); */
2078                         thread_wakeup(&vs->vs_async_pending);
2079                 } else {
2080                         VS_UNLOCK(vs);
2081                 }
2082         }
2083 }
2084
2085 #ifdef DEVICE_PAGING
2086 kern_return_t device_write_reply(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2087
2088 kern_return_t
2089 device_write_reply(
2090         MACH_PORT_FACE  reply_port,
2091         kern_return_t   device_code,
2092         io_buf_len_t    bytes_written)
2093 {
2094         struct vs_async *vsa;
2095
2096         vsa = (struct vs_async *)
2097                 ((struct vstruct_alias *)(reply_port->alias))->vs;
2098
2099         if (device_code == KERN_SUCCESS && bytes_written != vsa->vsa_size) {
2100                 device_code = KERN_FAILURE;
2101         }
2102
2103         vsa->vsa_error = device_code;
2104
2105
2106         ASSERT(vsa->vsa_vs != VSTRUCT_NULL);
2107         if(vsa->vsa_flags & VSA_TRANSFER) {
2108                 /* revisit when async disk segments redone */
2109                 if(vsa->vsa_error) {
2110                    /* need to consider error condition.  re-write data or */
2111                    /* throw it away here. */
2112                    vm_offset_t  ioaddr;
2113                    if(vm_map_copyout(kernel_map, &ioaddr,
2114                                  (vm_map_copy_t)vsa->vsa_addr) != KERN_SUCCESS)
2115                    panic("vs_cluster_write: unable to copy source list\n");
2116                    vm_deallocate(kernel_map, ioaddr, vsa->vsa_size);
2117                 }
2118                 ps_vs_write_complete(vsa->vsa_vs, vsa->vsa_offset,
2119                                                 vsa->vsa_size, vsa->vsa_error);
2120         } else {
2121                 vs_cl_write_complete(vsa->vsa_vs, vsa->vsa_ps, vsa->vsa_offset,
2122                              vsa->vsa_addr, vsa->vsa_size, TRUE,
2123                              vsa->vsa_error);
2124         }
2125         VS_FREE_ASYNC(vsa);
2126
2127         return KERN_SUCCESS;
2128 }
2129
2130 kern_return_t device_write_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2131 kern_return_t
2132 device_write_reply_inband(
2133         MACH_PORT_FACE          reply_port,
2134         kern_return_t           return_code,
2135         io_buf_len_t            bytes_written)
2136 {
2137         panic("device_write_reply_inband: illegal");
2138         return KERN_SUCCESS;
2139 }
2140
2141 kern_return_t device_read_reply(MACH_PORT_FACE, kern_return_t, io_buf_ptr_t, mach_msg_type_number_t);
2142 kern_return_t
2143 device_read_reply(
2144         MACH_PORT_FACE          reply_port,
2145         kern_return_t           return_code,
2146         io_buf_ptr_t            data,
2147         mach_msg_type_number_t  dataCnt)
2148 {
2149         struct vs_async *vsa;
2150         vsa = (struct vs_async *)
2151                 ((struct vstruct_alias *)(reply_port->alias))->vs;
2152         vsa->vsa_addr = (vm_offset_t)data;
2153         vsa->vsa_size = (vm_size_t)dataCnt;
2154         vsa->vsa_error = return_code;
2155         thread_wakeup(&vsa->vsa_lock);
2156         return KERN_SUCCESS;
2157 }
2158
2159 kern_return_t device_read_reply_inband(MACH_PORT_FACE, kern_return_t, io_buf_ptr_inband_t, mach_msg_type_number_t);
2160 kern_return_t
2161 device_read_reply_inband(
2162         MACH_PORT_FACE          reply_port,
2163         kern_return_t           return_code,
2164         io_buf_ptr_inband_t     data,
2165         mach_msg_type_number_t  dataCnt)
2166 {
2167         panic("device_read_reply_inband: illegal");
2168         return KERN_SUCCESS;
2169 }
2170
2171 kern_return_t device_read_reply_overwrite(MACH_PORT_FACE, kern_return_t, io_buf_len_t);
2172 kern_return_t
2173 device_read_reply_overwrite(
2174         MACH_PORT_FACE          reply_port,
2175         kern_return_t           return_code,
2176         io_buf_len_t            bytes_read)
2177 {
2178         panic("device_read_reply_overwrite: illegal\n");
2179         return KERN_SUCCESS;
2180 }
2181
2182 kern_return_t device_open_reply(MACH_PORT_FACE, kern_return_t, MACH_PORT_FACE);
2183 kern_return_t
2184 device_open_reply(
2185         MACH_PORT_FACE          reply_port,
2186         kern_return_t           return_code,
2187         MACH_PORT_FACE          device_port)
2188 {
2189         panic("device_open_reply: illegal\n");
2190         return KERN_SUCCESS;
2191 }
2192
2193 kern_return_t ps_read_device(paging_segment_t, vm_offset_t, vm_offset_t *, unsigned int, unsigned int *, int);  /* forward */
2194
2195 kern_return_t
2196 ps_read_device(
2197         paging_segment_t        ps,
2198         vm_offset_t             offset,
2199         vm_offset_t             *bufferp,
2200         unsigned int            size,
2201         unsigned int            *residualp,
2202         int                     flags)
2203 {
2204         kern_return_t   kr;
2205         recnum_t        dev_offset;
2206         unsigned int    bytes_wanted;
2207         unsigned int    bytes_read;
2208         unsigned int    total_read;
2209         vm_offset_t     dev_buffer;
2210         vm_offset_t     buf_ptr;
2211         unsigned int    records_read;
2212         struct vs_async *vsa;
2213         mutex_t vs_waiting_read_reply;
2214
2215         device_t        device;
2216         vm_map_copy_t   device_data = NULL;
2217         default_pager_thread_t *dpt = NULL;
2218
2219         device = dev_port_lookup(ps->ps_device);
2220         clustered_reads[atop_32(size)]++;
2221
2222         dev_offset = (ps->ps_offset +
2223                       (offset >> (vm_page_shift - ps->ps_record_shift)));
2224         bytes_wanted = size;
2225         total_read = 0;
2226         *bufferp = (vm_offset_t)NULL;
2227
2228         do {
2229                 vsa = VS_ALLOC_ASYNC();
2230                 if (vsa) {
2231                         vsa->vsa_vs = NULL;
2232                         vsa->vsa_addr = 0;
2233                         vsa->vsa_offset = 0;
2234                         vsa->vsa_size = 0;
2235                         vsa->vsa_ps = NULL;
2236                 }
2237                 mutex_init(&vsa->vsa_lock, ETAP_DPAGE_VSSEQNO);
2238                 ip_lock(vsa->reply_port);
2239                 vsa->reply_port->ip_sorights++;
2240                 ip_reference(vsa->reply_port);
2241                 ip_unlock(vsa->reply_port);
2242                 kr = ds_device_read_common(device,
2243                                  vsa->reply_port,
2244                                  (mach_msg_type_name_t)
2245                                         MACH_MSG_TYPE_MOVE_SEND_ONCE,
2246                                  (dev_mode_t) 0,
2247                                  dev_offset,
2248                                  bytes_wanted,
2249                                  (IO_READ | IO_CALL),
2250                                  (io_buf_ptr_t *) &dev_buffer,
2251                                  (mach_msg_type_number_t *) &bytes_read);
2252                 if(kr == MIG_NO_REPLY) {
2253                         assert_wait(&vsa->vsa_lock, THREAD_UNINT);
2254                         thread_block(THREAD_CONTINUE_NULL);
2255
2256                         dev_buffer = vsa->vsa_addr;
2257                         bytes_read = (unsigned int)vsa->vsa_size;
2258                         kr = vsa->vsa_error;
2259                 }
2260                 VS_FREE_ASYNC(vsa);
2261                 if (kr != KERN_SUCCESS || bytes_read == 0) {
2262                         break;
2263                 }
2264                 total_read += bytes_read;
2265
2266                 /*
2267                  * If we got the entire range, use the returned dev_buffer.
2268                  */
2269                 if (bytes_read == size) {
2270                         *bufferp = (vm_offset_t)dev_buffer;
2271                         break;
2272                 }
2273
2274 #if 1
2275                 dprintf(("read only %d bytes out of %d\n",
2276                          bytes_read, bytes_wanted));
2277 #endif
2278                 if(dpt == NULL) {
2279                         dpt = get_read_buffer();
2280                         buf_ptr = dpt->dpt_buffer;
2281                         *bufferp = (vm_offset_t)buf_ptr;
2282                 }
2283                 /*
2284                  * Otherwise, copy the data into the provided buffer (*bufferp)
2285                  * and append the rest of the range as it comes in.
2286                  */
2287                 memcpy((void *) buf_ptr, (void *) dev_buffer, bytes_read);
2288                 buf_ptr += bytes_read;
2289                 bytes_wanted -= bytes_read;
2290                 records_read = (bytes_read >>
2291                                 (vm_page_shift - ps->ps_record_shift));
2292                 dev_offset += records_read;
2293                 DEBUG(DEBUG_VS_INTERNAL,
2294                       ("calling vm_deallocate(addr=0x%X,size=0x%X)\n",
2295                        dev_buffer, bytes_read));
2296                 if (vm_deallocate(kernel_map, dev_buffer, bytes_read)
2297                     != KERN_SUCCESS)
2298                         Panic("dealloc buf");
2299         } while (bytes_wanted);
2300
2301         *residualp = size - total_read;
2302         if((dev_buffer != *bufferp) && (total_read != 0)) {
2303                 vm_offset_t temp_buffer;
2304                 vm_allocate(kernel_map, &temp_buffer, total_read, TRUE);
2305                 memcpy((void *) temp_buffer, (void *) *bufferp, total_read);
2306                 if(vm_map_copyin_page_list(kernel_map, temp_buffer, total_read,
2307                         VM_MAP_COPYIN_OPT_SRC_DESTROY |
2308                         VM_MAP_COPYIN_OPT_STEAL_PAGES |
2309                         VM_MAP_COPYIN_OPT_PMAP_ENTER,
2310                         (vm_map_copy_t *)&device_data, FALSE))
2311                                 panic("ps_read_device: cannot copyin locally provided buffer\n");
2312         }
2313         else if((kr == KERN_SUCCESS) && (total_read != 0) && (dev_buffer != 0)){
2314                 if(vm_map_copyin_page_list(kernel_map, dev_buffer, bytes_read,
2315                         VM_MAP_COPYIN_OPT_SRC_DESTROY |
2316                         VM_MAP_COPYIN_OPT_STEAL_PAGES |
2317                         VM_MAP_COPYIN_OPT_PMAP_ENTER,
2318                         (vm_map_copy_t *)&device_data, FALSE))
2319                                 panic("ps_read_device: cannot copyin backing store provided buffer\n");
2320         }
2321         else {
2322                 device_data = NULL;
2323         }
2324         *bufferp = (vm_offset_t)device_data;
2325
2326         if(dpt != NULL) {
2327                 /* Free the receive buffer */
2328                 dpt->checked_out = 0;
2329                 thread_wakeup(&dpt_array);
2330         }
2331         return KERN_SUCCESS;
2332 }
2333
2334 kern_return_t ps_write_device(paging_segment_t, vm_offset_t, vm_offset_t, unsigned int, struct vs_async *);     /* forward */
2335
2336 kern_return_t
2337 ps_write_device(
2338         paging_segment_t        ps,
2339         vm_offset_t             offset,
2340         vm_offset_t             addr,
2341         unsigned int            size,
2342         struct vs_async         *vsa)
2343 {
2344         recnum_t        dev_offset;
2345         io_buf_len_t    bytes_to_write, bytes_written;
2346         recnum_t        records_written;
2347         kern_return_t   kr;
2348         MACH_PORT_FACE  reply_port;
2349
2350
2351
2352         clustered_writes[atop_32(size)]++;
2353
2354         dev_offset = (ps->ps_offset +
2355                       (offset >> (vm_page_shift - ps->ps_record_shift)));
2356         bytes_to_write = size;
2357
2358         if (vsa) {
2359                 /*
2360                  * Asynchronous write.
2361                  */
2362                 reply_port = vsa->reply_port;
2363                 ip_lock(reply_port);
2364                 reply_port->ip_sorights++;
2365                 ip_reference(reply_port);
2366                 ip_unlock(reply_port);
2367                 {
2368                 device_t        device;
2369                 device = dev_port_lookup(ps->ps_device);
2370
2371                 vsa->vsa_addr = addr;
2372                 kr=ds_device_write_common(device,
2373                         reply_port,
2374                         (mach_msg_type_name_t) MACH_MSG_TYPE_MOVE_SEND_ONCE,
2375                         (dev_mode_t) 0,
2376                         dev_offset,
2377                         (io_buf_ptr_t)  addr,
2378                         size,
2379                         (IO_WRITE | IO_CALL),
2380                         &bytes_written);
2381                 }
2382                 if ((kr != KERN_SUCCESS) && (kr != MIG_NO_REPLY)) {
2383                         if (verbose)
2384                                 dprintf(("%s0x%x, addr=0x%x,"
2385                                          "size=0x%x,offset=0x%x\n",
2386                                          "device_write_request returned ",
2387                                          kr, addr, size, offset));
2388                         BS_STAT(ps->ps_bs,
2389                                 ps->ps_bs->bs_pages_out_fail += atop_32(size));
2390                         /* do the completion notification to free resources */
2391                         device_write_reply(reply_port, kr, 0);
2392                         return PAGER_ERROR;
2393                 }
2394         } else do {
2395                 /*
2396                  * Synchronous write.
2397                  */
2398                 {
2399                 device_t        device;
2400                 device = dev_port_lookup(ps->ps_device);
2401                 kr=ds_device_write_common(device,
2402                         IP_NULL, 0,
2403                         (dev_mode_t) 0,
2404                         dev_offset,
2405                         (io_buf_ptr_t)  addr,
2406                         size,
2407                         (IO_WRITE | IO_SYNC | IO_KERNEL_BUF),
2408                         &bytes_written);
2409                 }
2410                 if (kr != KERN_SUCCESS) {
2411                         dprintf(("%s0x%x, addr=0x%x,size=0x%x,offset=0x%x\n",
2412                                  "device_write returned ",
2413                                  kr, addr, size, offset));
2414                         BS_STAT(ps->ps_bs,
2415                                 ps->ps_bs->bs_pages_out_fail += atop_32(size));
2416                         return PAGER_ERROR;
2417                 }
2418                 if (bytes_written & ((vm_page_size >> ps->ps_record_shift) - 1))
2419                         Panic("fragmented write");
2420                 records_written = (bytes_written >>
2421                                    (vm_page_shift - ps->ps_record_shift));
2422                 dev_offset += records_written;
2423 #if 1
2424                 if (bytes_written != bytes_to_write) {
2425                         dprintf(("wrote only %d bytes out of %d\n",
2426                                  bytes_written, bytes_to_write));
2427                 }
2428 #endif
2429                 bytes_to_write -= bytes_written;
2430                 addr += bytes_written;
2431         } while (bytes_to_write > 0);
2432
2433         return PAGER_SUCCESS;
2434 }
2435
2436
2437 #else /* !DEVICE_PAGING */
2438
2439 kern_return_t
2440 ps_read_device(
2441         paging_segment_t        ps,
2442         vm_offset_t             offset,
2443         vm_offset_t             *bufferp,
2444         unsigned int            size,
2445         unsigned int            *residualp,
2446         int                     flags)
2447 {
2448   panic("ps_read_device not supported");
2449 }
2450
2451 ps_write_device(
2452         paging_segment_t        ps,
2453         vm_offset_t             offset,
2454         vm_offset_t             addr,
2455         unsigned int            size,
2456         struct vs_async         *vsa)
2457 {
2458   panic("ps_write_device not supported");
2459 }
2460
2461 #endif /* DEVICE_PAGING */
2462 void pvs_object_data_provided(vstruct_t, upl_t, vm_offset_t, vm_size_t);        /* forward */
2463
2464 void
2465 pvs_object_data_provided(
2466         vstruct_t       vs,
2467         upl_t           upl,
2468         vm_offset_t     offset,
2469         vm_size_t       size)
2470 {
2471
2472         DEBUG(DEBUG_VS_INTERNAL,
2473               ("buffer=0x%x,offset=0x%x,size=0x%x\n",
2474                upl, offset, size));
2475
2476         ASSERT(size > 0);
2477         GSTAT(global_stats.gs_pages_in += atop_32(size));
2478
2479
2480 #if     USE_PRECIOUS
2481         ps_clunmap(vs, offset, size);
2482 #endif  /* USE_PRECIOUS */
2483
2484 }
2485
2486 kern_return_t
2487 pvs_cluster_read(
2488         vstruct_t       vs,
2489         vm_offset_t     vs_offset,
2490         vm_size_t       cnt)
2491 {
2492         upl_t                   upl;
2493         kern_return_t           error = KERN_SUCCESS;
2494         int                     size;
2495         unsigned int            residual;
2496         unsigned int            request_flags;
2497         int                     seg_index;
2498         int                     pages_in_cl;
2499         int                     cl_size;
2500         int                     cl_mask;
2501         int                     cl_index;
2502         int                     xfer_size;
2503         vm_offset_t       ps_offset[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2504         paging_segment_t        psp[(VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT];
2505         struct clmap            clmap;
2506
2507         pages_in_cl = 1 << vs->vs_clshift;
2508         cl_size = pages_in_cl * vm_page_size;
2509         cl_mask = cl_size - 1;
2510
2511         /*
2512          * This loop will be executed multiple times until the entire
2513          * request has been satisfied... if the request spans cluster
2514          * boundaries, the clusters will be checked for logical continunity,
2515          * if contiguous the I/O request will span multiple clusters, otherwise
2516          * it will be broken up into the minimal set of I/O's
2517          *
2518          * If there are holes in a request (either unallocated pages in a paging
2519          * segment or an unallocated paging segment), we stop
2520          * reading at the hole, inform the VM of any data read, inform
2521          * the VM of an unavailable range, then loop again, hoping to
2522          * find valid pages later in the requested range.  This continues until
2523          * the entire range has been examined, and read, if present.
2524          */
2525
2526 #if     USE_PRECIOUS
2527         request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_PRECIOUS | UPL_RET_ONLY_ABSENT;
2528 #else
2529         request_flags = UPL_NO_SYNC |  UPL_CLEAN_IN_PLACE | UPL_RET_ONLY_ABSENT;
2530 #endif
2531         while (cnt && (error == KERN_SUCCESS)) {
2532                 int     ps_info_valid;
2533                 int     page_list_count;
2534
2535                 if((vs_offset & cl_mask) &&
2536                         (cnt > (VM_SUPER_CLUSTER -
2537                                 (vs_offset & cl_mask)))) {
2538                         size = VM_SUPER_CLUSTER;
2539                         size -= vs_offset & cl_mask;
2540                 } else if (cnt > VM_SUPER_CLUSTER) {
2541                         size = VM_SUPER_CLUSTER;
2542                 } else {
2543                         size = cnt;
2544                 }
2545                 cnt -= size;
2546
2547                 ps_info_valid = 0;
2548                 seg_index     = 0;
2549
2550                 while (size > 0 && error == KERN_SUCCESS) {
2551                         int           abort_size;
2552                         int           failed_size;
2553                         int           beg_pseg;
2554                         int           beg_indx;
2555                         vm_offset_t   cur_offset;
2556
2557
2558                         if ( !ps_info_valid) {
2559                                 ps_offset[seg_index] = ps_clmap(vs, vs_offset & ~cl_mask, &clmap, CL_FIND, 0, 0);
2560                                 psp[seg_index]       = CLMAP_PS(clmap);
2561                                 ps_info_valid = 1;
2562                         }
2563                         /*
2564                          * skip over unallocated physical segments
2565                          */
2566                         if (ps_offset[seg_index] == (vm_offset_t) -1) {
2567                                 abort_size = cl_size - (vs_offset & cl_mask);
2568                                 abort_size = MIN(abort_size, size);
2569
2570                                 page_list_count = 0;
2571                                 memory_object_super_upl_request(
2572                                         vs->vs_control,
2573                                         (memory_object_offset_t)vs_offset,
2574                                         abort_size, abort_size,
2575                                         &upl, NULL, &page_list_count,
2576                                         request_flags);
2577
2578                                 if (clmap.cl_error) {
2579                                         upl_abort(upl, UPL_ABORT_ERROR);
2580                                 } else {
2581                                         upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2582                                 }
2583                                 upl_deallocate(upl);
2584
2585                                 size       -= abort_size;
2586                                 vs_offset  += abort_size;
2587
2588                                 seg_index++;
2589                                 ps_info_valid = 0;
2590                                 continue;
2591                         }
2592                         cl_index = (vs_offset & cl_mask) / vm_page_size;
2593
2594                         for (abort_size = 0; cl_index < pages_in_cl && abort_size < size; cl_index++) {
2595                                 /*
2596                                  * skip over unallocated pages
2597                                  */
2598                                 if (CLMAP_ISSET(clmap, cl_index))
2599                                         break;
2600                                 abort_size += vm_page_size;
2601                         }
2602                         if (abort_size) {
2603                                 /*
2604                                  * Let VM system know about holes in clusters.
2605                                  */
2606                                 GSTAT(global_stats.gs_pages_unavail += atop_32(abort_size));
2607
2608                                 page_list_count = 0;
2609                                 memory_object_super_upl_request(
2610                                         vs->vs_control,
2611                                         (memory_object_offset_t)vs_offset,
2612                                         abort_size, abort_size,
2613                                         &upl, NULL, &page_list_count,
2614                                         request_flags);
2615
2616                                 upl_abort(upl, UPL_ABORT_UNAVAILABLE);
2617                                 upl_deallocate(upl);
2618
2619                                 size       -= abort_size;
2620                                 vs_offset  += abort_size;
2621
2622                                 if (cl_index == pages_in_cl) {
2623                                         /*
2624                                          * if we're at the end of this physical cluster
2625                                          * then bump to the next one and continue looking
2626                                          */
2627                                         seg_index++;
2628                                         ps_info_valid = 0;
2629                                         continue;
2630                                 }
2631                                 if (size == 0)
2632                                         break;
2633                         }
2634                         /*
2635                          * remember the starting point of the first allocated page
2636                          * for the I/O we're about to issue
2637                          */
2638                         beg_pseg   = seg_index;
2639                         beg_indx   = cl_index;
2640                         cur_offset = vs_offset;
2641
2642                         /*
2643                          * calculate the size of the I/O that we can do...
2644                          * this may span multiple physical segments if
2645                          * they are contiguous
2646                          */
2647                         for (xfer_size = 0; xfer_size < size; ) {
2648
2649                                 while (cl_index < pages_in_cl
2650                                                 && xfer_size < size) {
2651                                         /*
2652                                          * accumulate allocated pages within
2653                                          * a physical segment
2654                                          */
2655                                         if (CLMAP_ISSET(clmap, cl_index)) {
2656                                                 xfer_size  += vm_page_size;
2657                                                 cur_offset += vm_page_size;
2658                                                 cl_index++;
2659
2660                                                 BS_STAT(psp[seg_index]->ps_bs,
2661                                                         psp[seg_index]->ps_bs->bs_pages_in++);
2662                                         } else
2663                                                 break;
2664                                 }
2665                                 if (cl_index < pages_in_cl
2666                                                 || xfer_size >= size) {
2667                                         /*
2668                                          * we've hit an unallocated page or
2669                                          * the end of this request... go fire
2670                                          * the I/O
2671                                          */
2672                                         break;
2673                                 }
2674                                 /*
2675                                  * we've hit the end of the current physical
2676                                  * segment and there's more to do, so try
2677                                  * moving to the next one
2678                                  */
2679                                 seg_index++;
2680
2681                                 ps_offset[seg_index] =
2682                                         ps_clmap(vs,
2683                                                 cur_offset & ~cl_mask,
2684                                                 &clmap, CL_FIND, 0, 0);
2685                                 psp[seg_index] = CLMAP_PS(clmap);
2686                                 ps_info_valid = 1;
2687
2688                                 if ((ps_offset[seg_index - 1] != (ps_offset[seg_index] - cl_size)) || (psp[seg_index - 1] != psp[seg_index])) {
2689                                         /*
2690                                          * if the physical segment we're about
2691                                          * to step into is not contiguous to
2692                                          * the one we're currently in, or it's
2693                                          * in a different paging file, or
2694                                          * it hasn't been allocated....
2695                                          * we stop here and generate the I/O
2696                                          */
2697                                         break;
2698                                 }
2699                                 /*
2700                                  * start with first page of the next physical
2701                                  *  segment
2702                                  */
2703                                 cl_index = 0;
2704                         }
2705                         if (xfer_size) {
2706                                 /*
2707                                  * we have a contiguous range of allocated pages
2708                                  * to read from
2709                                  */
2710                                 page_list_count = 0;
2711                                 memory_object_super_upl_request(vs->vs_control,
2712                                         (memory_object_offset_t)vs_offset,
2713                                         xfer_size, xfer_size,
2714                                         &upl, NULL, &page_list_count,
2715                                         request_flags | UPL_SET_INTERNAL);
2716
2717                                 error = ps_read_file(psp[beg_pseg],
2718                                         upl, (vm_offset_t) 0,
2719                                         ps_offset[beg_pseg] +
2720                                                 (beg_indx * vm_page_size),
2721                                         xfer_size, &residual, 0);
2722                         } else
2723                                 continue;
2724
2725                         failed_size = 0;
2726
2727                         /*
2728                          * Adjust counts and send response to VM.  Optimize
2729                          * for the common case, i.e. no error and/or partial
2730                          * data. If there was an error, then we need to error
2731                          * the entire range, even if some data was successfully
2732                          * read. If there was a partial read we may supply some
2733                          * data and may error some as well.  In all cases the
2734                          * VM must receive some notification for every page
2735                          * in the range.
2736                          */
2737                         if ((error == KERN_SUCCESS) && (residual == 0)) {
2738                                 /*
2739                                  * Got everything we asked for, supply the data
2740                                  * to the VM.  Note that as a side effect of
2741                                  * supplying the data, the buffer holding the
2742                                  * supplied data is deallocated from the pager's
2743                                  *  address space.
2744                                  */
2745                                 pvs_object_data_provided(
2746                                         vs, upl, vs_offset, xfer_size);
2747                         } else {
2748                                 failed_size = xfer_size;
2749
2750                                 if (error == KERN_SUCCESS) {
2751                                         if (residual == xfer_size) {
2752                                         /*
2753                                          * If a read operation returns no error
2754                                          * and no data moved, we turn it into
2755                                          * an error, assuming we're reading at
2756                                          * or beyong EOF.
2757                                          * Fall through and error the entire
2758                                          * range.
2759                                          */
2760                                                 error = KERN_FAILURE;
2761                                         } else {
2762                                         /*
2763                                          * Otherwise, we have partial read. If
2764                                          * the part read is a integral number
2765                                          * of pages supply it. Otherwise round
2766                                          * it up to a page boundary, zero fill
2767                                          * the unread part, and supply it.
2768                                          * Fall through and error the remainder
2769                                          * of the range, if any.
2770                                          */
2771                                                 int fill, lsize;
2772
2773                                                 fill = residual
2774                                                         & ~vm_page_size;
2775                                                 lsize = (xfer_size - residual)
2776                                                                          + fill;
2777                                                 pvs_object_data_provided(
2778                                                         vs, upl,
2779                                                         vs_offset, lsize);
2780
2781                                                 if (lsize < xfer_size) {
2782                                                         failed_size =
2783                                                             xfer_size - lsize;
2784                                                         error = KERN_FAILURE;
2785                                                 }
2786                                         }
2787                                 }
2788                         }
2789                         /*
2790                          * If there was an error in any part of the range, tell
2791                          * the VM. Note that error is explicitly checked again
2792                          *  since it can be modified above.
2793                          */
2794                         if (error != KERN_SUCCESS) {
2795                                 BS_STAT(psp[beg_pseg]->ps_bs,
2796                                         psp[beg_pseg]->ps_bs->bs_pages_in_fail
2797                                                 += atop_32(failed_size));
2798                         }
2799                         size       -= xfer_size;
2800                         vs_offset  += xfer_size;
2801                 }
2802
2803         } /* END while (cnt && (error == 0)) */
2804         return error;
2805 }
2806
2807 int vs_do_async_write = 1;
2808
2809 kern_return_t
2810 vs_cluster_write(
2811         vstruct_t       vs,
2812         upl_t           internal_upl,
2813         vm_offset_t     offset,
2814         vm_size_t       cnt,
2815         boolean_t       dp_internal,
2816         int             flags)
2817 {
2818         vm_offset_t     size;
2819         vm_offset_t     transfer_size;
2820         int             error = 0;
2821         struct clmap    clmap;
2822
2823         vm_offset_t     actual_offset;  /* Offset within paging segment */
2824         paging_segment_t ps;
2825         vm_offset_t     subx_size;
2826         vm_offset_t     mobj_base_addr;
2827         vm_offset_t     mobj_target_addr;
2828         int             mobj_size;
2829
2830         struct vs_async *vsa;
2831         vm_map_copy_t   copy;
2832
2833         upl_t           upl;
2834         upl_page_info_t *pl;
2835         int             page_index;
2836         int             list_size;
2837         int             pages_in_cl;
2838         int             cl_size;
2839         int             base_index;
2840         int             seg_size;
2841
2842         pages_in_cl = 1 << vs->vs_clshift;
2843         cl_size = pages_in_cl * vm_page_size;
2844
2845         if (!dp_internal) {
2846                 int          page_list_count;
2847                 int          request_flags;
2848                 int          super_size;
2849                 int          first_dirty;
2850                 int          num_dirty;
2851                 int          num_of_pages;
2852                 int          seg_index;
2853                 vm_offset_t  upl_offset;
2854                 vm_offset_t  seg_offset;
2855                 vm_offset_t  ps_offset[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2856                 paging_segment_t   psp[((VM_SUPER_CLUSTER / PAGE_SIZE) >> VSTRUCT_DEF_CLSHIFT) + 1];
2857
2858
2859                 if (bs_low) {
2860                         super_size = cl_size;
2861
2862                         request_flags = UPL_NOBLOCK |
2863                                 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2864                                 UPL_NO_SYNC | UPL_SET_INTERNAL;
2865                 } else {
2866                         super_size = VM_SUPER_CLUSTER;
2867
2868                         request_flags = UPL_NOBLOCK | UPL_CLEAN_IN_PLACE |
2869                                 UPL_RET_ONLY_DIRTY | UPL_COPYOUT_FROM |
2870                                 UPL_NO_SYNC | UPL_SET_INTERNAL;
2871                 }
2872
2873                 page_list_count = 0;
2874                 memory_object_super_upl_request(vs->vs_control,
2875                                 (memory_object_offset_t)offset,
2876                                 cnt, super_size,
2877                                 &upl, NULL, &page_list_count,
2878                                 request_flags | UPL_FOR_PAGEOUT);
2879
2880                 pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
2881
2882                 seg_size = cl_size - (upl->offset % cl_size);
2883                 upl_offset = upl->offset & ~(cl_size - 1);
2884
2885                 for (seg_index = 0, transfer_size = upl->size;
2886                                                 transfer_size > 0; ) {
2887                         ps_offset[seg_index] =
2888                                 ps_clmap(vs,
2889                                         upl_offset,
2890                                         &clmap, CL_ALLOC,
2891                                         cl_size, 0);
2892
2893                         if (ps_offset[seg_index] == (vm_offset_t) -1) {
2894                                 upl_abort(upl, 0);
2895                                 upl_deallocate(upl);
2896
2897                                 return KERN_FAILURE;
2898
2899                         }
2900                         psp[seg_index] = CLMAP_PS(clmap);
2901
2902                         if (transfer_size > seg_size) {
2903                                 transfer_size -= seg_size;
2904                                 upl_offset += cl_size;
2905                                 seg_size    = cl_size;
2906                                 seg_index++;
2907                         } else
2908                                 transfer_size = 0;
2909                 }
2910                 /*
2911                  * Ignore any non-present pages at the end of the
2912                  * UPL.
2913                  */
2914                 for (page_index = upl->size / vm_page_size; page_index > 0;)
2915                         if (UPL_PAGE_PRESENT(pl, --page_index))
2916                                 break;
2917                 num_of_pages = page_index + 1;
2918
2919                 base_index = (upl->offset % cl_size) / PAGE_SIZE;
2920
2921                 for (page_index = 0; page_index < num_of_pages; ) {
2922                         /*
2923                          * skip over non-dirty pages
2924                          */
2925                         for ( ; page_index < num_of_pages; page_index++) {
2926                                 if (UPL_DIRTY_PAGE(pl, page_index)
2927                                         || UPL_PRECIOUS_PAGE(pl, page_index))
2928                                         /*
2929                                          * this is a page we need to write
2930                                          * go see if we can buddy it up with
2931                                          * others that are contiguous to it
2932                                          */
2933                                         break;
2934                                 /*
2935                                  * if the page is not-dirty, but present we
2936                                  * need to commit it...  This is an unusual
2937                                  * case since we only asked for dirty pages
2938                                  */
2939                                 if (UPL_PAGE_PRESENT(pl, page_index)) {
2940                                         boolean_t empty = FALSE;
2941                                         upl_commit_range(upl,
2942                                                  page_index * vm_page_size,
2943                                                  vm_page_size,
2944                                                  UPL_COMMIT_NOTIFY_EMPTY,
2945                                                  pl,
2946                                                  page_list_count,
2947                                                  &empty);
2948                                         if (empty) {
2949                                                 assert(page_index ==
2950                                                        num_of_pages - 1);
2951                                                 upl_deallocate(upl);
2952                                         }
2953                                 }
2954                         }
2955                         if (page_index == num_of_pages)
2956                                 /*
2957                                  * no more pages to look at, we're out of here
2958                                  */
2959                                 break;
2960
2961                         /*
2962                          * gather up contiguous dirty pages... we have at
2963                          * least 1 * otherwise we would have bailed above
2964                          * make sure that each physical segment that we step
2965                          * into is contiguous to the one we're currently in
2966                          * if it's not, we have to stop and write what we have
2967                          */
2968                         for (first_dirty = page_index;
2969                                         page_index < num_of_pages; ) {
2970                                 if ( !UPL_DIRTY_PAGE(pl, page_index)
2971                                         && !UPL_PRECIOUS_PAGE(pl, page_index))
2972                                         break;
2973                                 page_index++;
2974                                 /*
2975                                  * if we just looked at the last page in the UPL
2976                                  * we don't need to check for physical segment
2977                                  * continuity
2978                                  */
2979                                 if (page_index < num_of_pages) {
2980                                         int cur_seg;
2981                                         int nxt_seg;
2982
2983                                         cur_seg = (base_index + (page_index - 1))/pages_in_cl;
2984                                         nxt_seg = (base_index + page_index)/pages_in_cl;
2985
2986                                         if (cur_seg != nxt_seg) {
2987                                                 if ((ps_offset[cur_seg] != (ps_offset[nxt_seg] - cl_size)) || (psp[cur_seg] != psp[nxt_seg]))
2988                                                 /*
2989                                                  * if the segment we're about
2990                                                  * to step into is not
2991                                                  * contiguous to the one we're
2992                                                  * currently in, or it's in a
2993                                                  * different paging file....
2994                                                  * we stop here and generate
2995                                                  * the I/O
2996                                                  */
2997                                                         break;
2998                                         }
2999                                 }
3000                         }
3001                         num_dirty = page_index - first_dirty;
3002
3003                         if (num_dirty) {
3004                                 upl_offset = first_dirty * vm_page_size;
3005                                 transfer_size = num_dirty * vm_page_size;
3006
3007                                 while (transfer_size) {
3008
3009                                         if ((seg_size = cl_size -
3010                                                 ((upl->offset + upl_offset) % cl_size))
3011                                                         > transfer_size)
3012                                                 seg_size = transfer_size;
3013
3014                                         ps_vs_write_complete(vs,
3015                                                 upl->offset + upl_offset,
3016                                                 seg_size, error);
3017
3018                                         transfer_size -= seg_size;
3019                                         upl_offset += seg_size;
3020                                 }
3021                                 upl_offset = first_dirty * vm_page_size;
3022                                 transfer_size = num_dirty * vm_page_size;
3023
3024                                 seg_index  = (base_index + first_dirty) / pages_in_cl;
3025                                 seg_offset = (upl->offset + upl_offset) % cl_size;
3026
3027                                 error = ps_write_file(psp[seg_index],
3028                                                 upl, upl_offset,
3029                                                 ps_offset[seg_index]
3030                                                                 + seg_offset,
3031                                                 transfer_size, flags);
3032                         } else {
3033                                 boolean_t empty = FALSE;
3034                                 upl_abort_range(upl,
3035                                                 first_dirty * vm_page_size,
3036                                                 num_dirty   * vm_page_size,
3037                                                 UPL_ABORT_NOTIFY_EMPTY,
3038                                                 &empty);
3039                                 if (empty) {
3040                                         assert(page_index == num_of_pages);
3041                                         upl_deallocate(upl);
3042                                 }
3043                         }
3044                 }
3045
3046         } else {
3047                 assert(cnt  <= (vm_page_size << vs->vs_clshift));
3048                 list_size = cnt;
3049
3050                 page_index = 0;
3051                 /* The caller provides a mapped_data which is derived  */
3052                 /* from a temporary object.  The targeted pages are    */
3053                 /* guaranteed to be set at offset 0 in the mapped_data */
3054                 /* The actual offset however must still be derived     */
3055                 /* from the offset in the vs in question               */
3056                 mobj_base_addr = offset;
3057                 mobj_target_addr = mobj_base_addr;
3058
3059                 for (transfer_size = list_size; transfer_size != 0;) {
3060                         actual_offset = ps_clmap(vs, mobj_target_addr,
3061                                 &clmap, CL_ALLOC,
3062                                 transfer_size < cl_size ?
3063                                         transfer_size : cl_size, 0);
3064                         if(actual_offset == (vm_offset_t) -1) {
3065                                 error = 1;
3066                                 break;
3067                         }
3068                         cnt = MIN(transfer_size,
3069                                 CLMAP_NPGS(clmap) * vm_page_size);
3070                         ps = CLMAP_PS(clmap);
3071                         /* Assume that the caller has given us contiguous */
3072                         /* pages */
3073                         if(cnt) {
3074                                 ps_vs_write_complete(vs, mobj_target_addr,
3075                                                                 cnt, error);
3076                                 error = ps_write_file(ps, internal_upl,
3077                                                 0, actual_offset,
3078                                                 cnt, flags);
3079                                 if (error)
3080                                         break;
3081                            }
3082                         if (error)
3083                                 break;
3084                         actual_offset += cnt;
3085                         mobj_target_addr += cnt;
3086                         transfer_size -= cnt;
3087                         cnt = 0;
3088
3089                         if (error)
3090                                 break;
3091                 }
3092         }
3093         if(error)
3094                 return KERN_FAILURE;
3095         else
3096                 return KERN_SUCCESS;
3097 }
3098
3099 vm_size_t
3100 ps_vstruct_allocated_size(
3101         vstruct_t       vs)
3102 {
3103         int             num_pages;
3104         struct vs_map   *vsmap;
3105         int             i, j, k;
3106
3107         num_pages = 0;
3108         if (vs->vs_indirect) {
3109                 /* loop on indirect maps */
3110                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3111                         vsmap = vs->vs_imap[i];
3112                         if (vsmap == NULL)
3113                                 continue;
3114                         /* loop on clusters in this indirect map */
3115                         for (j = 0; j < CLMAP_ENTRIES; j++) {
3116                                 if (VSM_ISCLR(vsmap[j]) ||
3117                                     VSM_ISERR(vsmap[j]))
3118                                         continue;
3119                                 /* loop on pages in this cluster */
3120                                 for (k = 0; k < VSCLSIZE(vs); k++) {
3121                                         if ((VSM_BMAP(vsmap[j])) & (1 << k))
3122                                                 num_pages++;
3123                                 }
3124                         }
3125                 }
3126         } else {
3127                 vsmap = vs->vs_dmap;
3128                 if (vsmap == NULL)
3129                         return 0;
3130                 /* loop on clusters in the direct map */
3131                 for (j = 0; j < CLMAP_ENTRIES; j++) {
3132                         if (VSM_ISCLR(vsmap[j]) ||
3133                             VSM_ISERR(vsmap[j]))
3134                                 continue;
3135                         /* loop on pages in this cluster */
3136                         for (k = 0; k < VSCLSIZE(vs); k++) {
3137                                 if ((VSM_BMAP(vsmap[j])) & (1 << k))
3138                                         num_pages++;
3139                         }
3140                 }
3141         }
3142
3143         return ptoa_32(num_pages);
3144 }
3145
3146 size_t
3147 ps_vstruct_allocated_pages(
3148         vstruct_t               vs,
3149         default_pager_page_t    *pages,
3150         size_t                  pages_size)
3151 {
3152         int             num_pages;
3153         struct vs_map   *vsmap;
3154         vm_offset_t     offset;
3155         int             i, j, k;
3156
3157         num_pages = 0;
3158         offset = 0;
3159         if (vs->vs_indirect) {
3160                 /* loop on indirect maps */
3161                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3162                         vsmap = vs->vs_imap[i];
3163                         if (vsmap == NULL) {
3164                                 offset += (vm_page_size * CLMAP_ENTRIES *
3165                                            VSCLSIZE(vs));
3166                                 continue;
3167                         }
3168                         /* loop on clusters in this indirect map */
3169                         for (j = 0; j < CLMAP_ENTRIES; j++) {
3170                                 if (VSM_ISCLR(vsmap[j]) ||
3171                                     VSM_ISERR(vsmap[j])) {
3172                                         offset += vm_page_size * VSCLSIZE(vs);
3173                                         continue;
3174                                 }
3175                                 /* loop on pages in this cluster */
3176                                 for (k = 0; k < VSCLSIZE(vs); k++) {
3177                                         if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3178                                                 num_pages++;
3179                                                 if (num_pages < pages_size)
3180                                                         pages++->dpp_offset =
3181                                                                 offset;
3182                                         }
3183                                         offset += vm_page_size;
3184                                 }
3185                         }
3186                 }
3187         } else {
3188                 vsmap = vs->vs_dmap;
3189                 if (vsmap == NULL)
3190                         return 0;
3191                 /* loop on clusters in the direct map */
3192                 for (j = 0; j < CLMAP_ENTRIES; j++) {
3193                         if (VSM_ISCLR(vsmap[j]) ||
3194                             VSM_ISERR(vsmap[j])) {
3195                                 offset += vm_page_size * VSCLSIZE(vs);
3196                                 continue;
3197                         }
3198                         /* loop on pages in this cluster */
3199                         for (k = 0; k < VSCLSIZE(vs); k++) {
3200                                 if ((VSM_BMAP(vsmap[j])) & (1 << k)) {
3201                                         num_pages++;
3202                                         if (num_pages < pages_size)
3203                                                 pages++->dpp_offset = offset;
3204                                 }
3205                                 offset += vm_page_size;
3206                         }
3207                 }
3208         }
3209
3210         return num_pages;
3211 }
3212
3213
3214 kern_return_t
3215 ps_vstruct_transfer_from_segment(
3216         vstruct_t        vs,
3217         paging_segment_t segment,
3218         upl_t            upl)
3219 {
3220         struct vs_map   *vsmap;
3221         struct vs_map   old_vsmap;
3222         struct vs_map   new_vsmap;
3223         int             i, j, k;
3224
3225         VS_LOCK(vs);    /* block all work on this vstruct */
3226                         /* can't allow the normal multiple write */
3227                         /* semantic because writes may conflict */
3228         vs->vs_xfer_pending = TRUE;
3229         vs_wait_for_sync_writers(vs);
3230         vs_start_write(vs);
3231         vs_wait_for_readers(vs);
3232         /* we will unlock the vs to allow other writes while transferring */
3233         /* and will be guaranteed of the persistance of the vs struct     */
3234         /* because the caller of  ps_vstruct_transfer_from_segment bumped */
3235         /* vs_async_pending */
3236         /* OK we now have guaranteed no other parties are accessing this */
3237         /* vs.  Now that we are also supporting simple lock versions of  */
3238         /* vs_lock we cannot hold onto VS_LOCK as we may block below.    */
3239         /* our purpose in holding it before was the multiple write case */
3240         /* we now use the boolean xfer_pending to do that.  We can use  */
3241         /* a boolean instead of a count because we have guaranteed single */
3242         /* file access to this code in its caller */
3243         VS_UNLOCK(vs);
3244 vs_changed:
3245         if (vs->vs_indirect) {
3246                 int     vsmap_size;
3247                 int     clmap_off;
3248                 /* loop on indirect maps */
3249                 for (i = 0; i < INDIRECT_CLMAP_ENTRIES(vs->vs_size); i++) {
3250                         vsmap = vs->vs_imap[i];
3251                         if (vsmap == NULL)
3252                                 continue;
3253                         /* loop on clusters in this indirect map */
3254                         clmap_off = (vm_page_size * CLMAP_ENTRIES *
3255                                            VSCLSIZE(vs) * i);
3256                         if(i+1 == INDIRECT_CLMAP_ENTRIES(vs->vs_size))
3257                                 vsmap_size = vs->vs_size - (CLMAP_ENTRIES * i);
3258                         else
3259                                 vsmap_size = CLMAP_ENTRIES;
3260                         for (j = 0; j < vsmap_size; j++) {
3261                                 if (VSM_ISCLR(vsmap[j]) ||
3262                                     VSM_ISERR(vsmap[j]) ||
3263                                     (VSM_PS(vsmap[j]) != segment))
3264                                         continue;
3265                                 if(vs_cluster_transfer(vs,
3266                                         (vm_page_size * (j << vs->vs_clshift))
3267                                         + clmap_off,
3268                                         vm_page_size << vs->vs_clshift,
3269                                         upl)
3270                                                 != KERN_SUCCESS) {
3271                                    VS_LOCK(vs);
3272                                    vs->vs_xfer_pending = FALSE;
3273                                    VS_UNLOCK(vs);
3274                                    vs_finish_write(vs);
3275                                    return KERN_FAILURE;
3276                                 }
3277                                 /* allow other readers/writers during transfer*/
3278                                 VS_LOCK(vs);
3279                                 vs->vs_xfer_pending = FALSE;
3280                                 VS_UNLOCK(vs);
3281                                 vs_finish_write(vs);
3282                                 VS_LOCK(vs);
3283                                 vs->vs_xfer_pending = TRUE;
3284                                 vs_wait_for_sync_writers(vs);
3285                                 vs_start_write(vs);
3286                                 vs_wait_for_readers(vs);
3287                                 VS_UNLOCK(vs);
3288                                 if (!(vs->vs_indirect)) {
3289                                         goto vs_changed;
3290                                 }
3291                         }
3292                 }
3293         } else {
3294                 vsmap = vs->vs_dmap;
3295                 if (vsmap == NULL) {
3296                         VS_LOCK(vs);
3297                         vs->vs_xfer_pending = FALSE;
3298                         VS_UNLOCK(vs);
3299                         vs_finish_write(vs);
3300                         return KERN_SUCCESS;
3301                 }
3302                 /* loop on clusters in the direct map */
3303                 for (j = 0; j < vs->vs_size; j++) {
3304                         if (VSM_ISCLR(vsmap[j]) ||
3305                             VSM_ISERR(vsmap[j]) ||
3306                             (VSM_PS(vsmap[j]) != segment))
3307                                 continue;
3308                         if(vs_cluster_transfer(vs,
3309                                 vm_page_size * (j << vs->vs_clshift),
3310                                 vm_page_size << vs->vs_clshift,
3311                                 upl) != KERN_SUCCESS) {
3312                            VS_LOCK(vs);
3313                            vs->vs_xfer_pending = FALSE;
3314                            VS_UNLOCK(vs);
3315                            vs_finish_write(vs);
3316                            return KERN_FAILURE;
3317                         }
3318                         /* allow other readers/writers during transfer*/
3319                         VS_LOCK(vs);
3320                         vs->vs_xfer_pending = FALSE;
3321                         VS_UNLOCK(vs);
3322                         vs_finish_write(vs);
3323                         VS_LOCK(vs);
3324                         vs->vs_xfer_pending = TRUE;
3325                         VS_UNLOCK(vs);
3326                         vs_wait_for_sync_writers(vs);
3327                         vs_start_write(vs);
3328                         vs_wait_for_readers(vs);
3329                         if (vs->vs_indirect) {
3330                                 goto vs_changed;
3331                         }
3332                 }
3333         }
3334
3335         VS_LOCK(vs);
3336         vs->vs_xfer_pending = FALSE;
3337         VS_UNLOCK(vs);
3338         vs_finish_write(vs);
3339         return KERN_SUCCESS;
3340 }
3341
3342
3343
3344 vs_map_t
3345 vs_get_map_entry(
3346         vstruct_t       vs,
3347         vm_offset_t     offset)
3348 {
3349         struct vs_map   *vsmap;
3350         vm_offset_t     cluster;
3351
3352         cluster = atop_32(offset) >> vs->vs_clshift;
3353         if (vs->vs_indirect) {
3354                 long    ind_block = cluster/CLMAP_ENTRIES;
3355
3356                 /* Is the indirect block allocated? */
3357                 vsmap = vs->vs_imap[ind_block];
3358                 if(vsmap == (vs_map_t) NULL)
3359                         return vsmap;
3360         } else
3361                 vsmap = vs->vs_dmap;
3362         vsmap += cluster%CLMAP_ENTRIES;
3363         return vsmap;
3364 }
3365
3366 kern_return_t
3367 vs_cluster_transfer(
3368         vstruct_t       vs,
3369         vm_offset_t     offset,
3370         vm_size_t       cnt,
3371         upl_t           upl)
3372 {
3373         vm_offset_t             actual_offset;
3374         paging_segment_t        ps;
3375         struct clmap            clmap;
3376         kern_return_t           error = KERN_SUCCESS;
3377         int                     size, size_wanted, i;
3378         unsigned int            residual;
3379         int                     unavail_size;
3380         default_pager_thread_t  *dpt;
3381         boolean_t               dealloc;
3382         struct  vs_map          *vsmap_ptr;
3383         struct  vs_map          read_vsmap;
3384         struct  vs_map          original_read_vsmap;
3385         struct  vs_map          write_vsmap;
3386         upl_t                   sync_upl;
3387         vm_offset_t     ioaddr;
3388
3389         /* vs_cluster_transfer reads in the pages of a cluster and
3390          * then writes these pages back to new backing store.  The
3391          * segment the pages are being read from is assumed to have
3392          * been taken off-line and is no longer considered for new
3393          * space requests.
3394          */
3395
3396         /*
3397          * This loop will be executed once per cluster referenced.
3398          * Typically this means once, since it's unlikely that the
3399          * VM system will ask for anything spanning cluster boundaries.
3400          *
3401          * If there are holes in a cluster (in a paging segment), we stop
3402          * reading at the hole, then loop again, hoping to
3403          * find valid pages later in the cluster.  This continues until
3404          * the entire range has been examined, and read, if present.  The
3405          * pages are written as they are read.  If a failure occurs after
3406          * some pages are written the unmap call at the bottom of the loop
3407          * recovers the backing store and the old backing store remains
3408          * in effect.
3409          */
3410
3411         VSM_CLR(write_vsmap);
3412         VSM_CLR(original_read_vsmap);
3413         /* grab the actual object's pages to sync with I/O */
3414         while (cnt && (error == KERN_SUCCESS)) {
3415                 vsmap_ptr = vs_get_map_entry(vs, offset);
3416                 actual_offset = ps_clmap(vs, offset, &clmap, CL_FIND, 0, 0);
3417
3418                 if (actual_offset == (vm_offset_t) -1) {
3419
3420                         /*
3421                          * Nothing left to write in this cluster at least
3422                          * set write cluster information for any previous
3423                          * write, clear for next cluster, if there is one
3424                          */
3425                         unsigned int local_size, clmask, clsize;
3426
3427                         clsize = vm_page_size << vs->vs_clshift;
3428                         clmask = clsize - 1;
3429                         local_size = clsize - (offset & clmask);
3430                         ASSERT(local_size);
3431                         local_size = MIN(local_size, cnt);
3432
3433                         /* This cluster has no data in it beyond what may */
3434                         /* have been found on a previous iteration through */
3435                         /* the loop "write_vsmap" */
3436                         *vsmap_ptr = write_vsmap;
3437                         VSM_CLR(write_vsmap);
3438                         VSM_CLR(original_read_vsmap);
3439
3440                         cnt -= local_size;
3441                         offset += local_size;
3442                         continue;
3443                 }
3444
3445                 /*
3446                  * Count up contiguous available or unavailable
3447                  * pages.
3448                  */
3449                 ps = CLMAP_PS(clmap);
3450                 ASSERT(ps);
3451                 size = 0;
3452                 unavail_size = 0;
3453                 for (i = 0;
3454                      (size < cnt) && (unavail_size < cnt) &&
3455                      (i < CLMAP_NPGS(clmap)); i++) {
3456                         if (CLMAP_ISSET(clmap, i)) {
3457                                 if (unavail_size != 0)
3458                                         break;
3459                                 size += vm_page_size;
3460                                 BS_STAT(ps->ps_bs,
3461                                         ps->ps_bs->bs_pages_in++);
3462                         } else {
3463                                 if (size != 0)
3464                                         break;
3465                                 unavail_size += vm_page_size;
3466                         }
3467                 }
3468
3469                 if (size == 0) {
3470                         ASSERT(unavail_size);
3471                         cnt -= unavail_size;
3472                         offset += unavail_size;
3473                         if((offset & ((vm_page_size << vs->vs_clshift) - 1))
3474                                 == 0) {
3475                                 /* There is no more to transfer in this
3476                                    cluster
3477                                 */
3478                                 *vsmap_ptr = write_vsmap;
3479                                 VSM_CLR(write_vsmap);
3480                                 VSM_CLR(original_read_vsmap);
3481                         }
3482                         continue;
3483                 }
3484
3485                 if(VSM_ISCLR(original_read_vsmap))
3486                         original_read_vsmap = *vsmap_ptr;
3487
3488                 if(ps->ps_segtype == PS_PARTITION) {
3489 /*
3490                         NEED TO ISSUE WITH SYNC & NO COMMIT
3491                         error = ps_read_device(ps, actual_offset, &buffer,
3492                                        size, &residual, flags);
3493 */
3494                 } else {
3495                         /* NEED TO ISSUE WITH SYNC & NO COMMIT */
3496                         error = ps_read_file(ps, upl, (vm_offset_t) 0, actual_offset,
3497                                         size, &residual,
3498                                         (UPL_IOSYNC | UPL_NOCOMMIT));
3499                 }
3500
3501                 read_vsmap = *vsmap_ptr;
3502
3503
3504                 /*
3505                  * Adjust counts and put data in new BS.  Optimize for the
3506                  * common case, i.e. no error and/or partial data.
3507                  * If there was an error, then we need to error the entire
3508                  * range, even if some data was successfully read.
3509                  *
3510                  */
3511                 if ((error == KERN_SUCCESS) && (residual == 0)) {
3512                         int page_list_count = 0;
3513
3514                         /*
3515                          * Got everything we asked for, supply the data to
3516                          * the new BS.  Note that as a side effect of supplying
3517                          * the data, the buffer holding the supplied data is
3518                          * deallocated from the pager's address space unless
3519                          * the write is unsuccessful.
3520                          */
3521
3522                         /* note buffer will be cleaned up in all cases by */
3523                         /* internal_cluster_write or if an error on write */
3524                         /* the vm_map_copy_page_discard call              */
3525                         *vsmap_ptr = write_vsmap;
3526
3527                         if(vs_cluster_write(vs, upl, offset,
3528                                         size, TRUE, UPL_IOSYNC | UPL_NOCOMMIT ) != KERN_SUCCESS) {
3529                                 error = KERN_FAILURE;
3530                                 if(!(VSM_ISCLR(*vsmap_ptr))) {
3531                                         /* unmap the new backing store object */
3532                                         ps_clunmap(vs, offset, size);
3533                                 }
3534                                 /* original vsmap */
3535                                 *vsmap_ptr = original_read_vsmap;
3536                                 VSM_CLR(write_vsmap);
3537                         } else {
3538                                if((offset + size) &
3539                                         ((vm_page_size << vs->vs_clshift)
3540                                         - 1)) {
3541                                         /* There is more to transfer in this
3542                                            cluster
3543                                         */
3544                                         write_vsmap = *vsmap_ptr;
3545                                         *vsmap_ptr = read_vsmap;
3546                                 } else {
3547                                         /* discard the old backing object */
3548                                         write_vsmap = *vsmap_ptr;
3549                                         *vsmap_ptr = read_vsmap;
3550                                         ps_clunmap(vs, offset, size);
3551                                         *vsmap_ptr = write_vsmap;
3552                                         VSM_CLR(write_vsmap);
3553                                         VSM_CLR(original_read_vsmap);
3554                                 }
3555                         }
3556                 } else {
3557                         size_wanted = size;
3558                         if (error == KERN_SUCCESS) {
3559                                 if (residual == size) {
3560                                         /*
3561                                          * If a read operation returns no error
3562                                          * and no data moved, we turn it into
3563                                          * an error, assuming we're reading at
3564                                          * or beyond EOF.
3565                                          * Fall through and error the entire
3566                                          * range.
3567                                          */
3568                                         error = KERN_FAILURE;
3569                                         *vsmap_ptr = write_vsmap;
3570                                         if(!(VSM_ISCLR(*vsmap_ptr))) {
3571                                         /* unmap the new backing store object */
3572                                         ps_clunmap(vs, offset, size);
3573                                         }
3574                                         *vsmap_ptr = original_read_vsmap;
3575                                         VSM_CLR(write_vsmap);
3576                                         continue;
3577                                 } else {
3578                                         /*
3579                                          * Otherwise, we have partial read.
3580                                          * This is also considered an error
3581                                          * for the purposes of cluster transfer
3582                                          */
3583                                         error = KERN_FAILURE;
3584                                         *vsmap_ptr = write_vsmap;
3585                                         if(!(VSM_ISCLR(*vsmap_ptr))) {
3586                                         /* unmap the new backing store object */
3587                                         ps_clunmap(vs, offset, size);
3588                                         }
3589                                         *vsmap_ptr = original_read_vsmap;
3590                                         VSM_CLR(write_vsmap);
3591                                         continue;
3592                                 }
3593                         }
3594
3595                 }
3596                 cnt -= size;
3597                 offset += size;
3598
3599         } /* END while (cnt && (error == 0)) */
3600         if(!VSM_ISCLR(write_vsmap))
3601                 *vsmap_ptr = write_vsmap;
3602
3603         return error;
3604 }
3605
3606 kern_return_t
3607 default_pager_add_file(MACH_PORT_FACE backing_store,
3608         int             *vp,
3609         int             record_size,
3610         long            size)
3611 {
3612         backing_store_t         bs;
3613         paging_segment_t        ps;
3614         int                     i;
3615         int                     error;
3616
3617         if ((bs = backing_store_lookup(backing_store))
3618             == BACKING_STORE_NULL)
3619                 return KERN_INVALID_ARGUMENT;
3620
3621         PSL_LOCK();
3622         for (i = 0; i <= paging_segment_max; i++) {
3623                 ps = paging_segments[i];
3624                 if (ps == PAGING_SEGMENT_NULL)
3625                         continue;
3626                 if (ps->ps_segtype != PS_FILE)
3627                         continue;
3628
3629                 /*
3630                  * Check for overlap on same device.
3631                  */
3632                 if (ps->ps_vnode == (struct vnode *)vp) {
3633                         PSL_UNLOCK();
3634                         BS_UNLOCK(bs);
3635                         return KERN_INVALID_ARGUMENT;
3636                 }
3637         }
3638         PSL_UNLOCK();
3639
3640         /*
3641          * Set up the paging segment
3642          */
3643         ps = (paging_segment_t) kalloc(sizeof (struct paging_segment));
3644         if (ps == PAGING_SEGMENT_NULL) {
3645                 BS_UNLOCK(bs);
3646                 return KERN_RESOURCE_SHORTAGE;
3647         }
3648
3649         ps->ps_segtype = PS_FILE;
3650         ps->ps_vnode = (struct vnode *)vp;
3651         ps->ps_offset = 0;
3652         ps->ps_record_shift = local_log2(vm_page_size / record_size);
3653         ps->ps_recnum = size;
3654         ps->ps_pgnum = size >> ps->ps_record_shift;
3655
3656         ps->ps_pgcount = ps->ps_pgnum;
3657         ps->ps_clshift = local_log2(bs->bs_clsize);
3658         ps->ps_clcount = ps->ps_ncls = ps->ps_pgcount >> ps->ps_clshift;
3659         ps->ps_hint = 0;
3660
3661         PS_LOCK_INIT(ps);
3662         ps->ps_bmap = (unsigned char *) kalloc(RMAPSIZE(ps->ps_ncls));
3663         if (!ps->ps_bmap) {
3664                 kfree((vm_offset_t)ps, sizeof *ps);
3665                 BS_UNLOCK(bs);
3666                 return KERN_RESOURCE_SHORTAGE;
3667         }
3668         for (i = 0; i < ps->ps_ncls; i++) {
3669                 clrbit(ps->ps_bmap, i);
3670         }
3671
3672         ps->ps_going_away = FALSE;
3673         ps->ps_bs = bs;
3674
3675         if ((error = ps_enter(ps)) != 0) {
3676                 kfree((vm_offset_t)ps->ps_bmap, RMAPSIZE(ps->ps_ncls));
3677                 kfree((vm_offset_t)ps, sizeof *ps);
3678                 BS_UNLOCK(bs);
3679                 return KERN_RESOURCE_SHORTAGE;
3680         }
3681
3682         bs->bs_pages_free += ps->ps_clcount << ps->ps_clshift;
3683         bs->bs_pages_total += ps->ps_clcount << ps->ps_clshift;
3684         PSL_LOCK();
3685         dp_pages_free += ps->ps_pgcount;
3686         PSL_UNLOCK();
3687
3688         BS_UNLOCK(bs);
3689
3690         bs_more_space(ps->ps_clcount);
3691
3692         DEBUG(DEBUG_BS_INTERNAL,
3693               ("device=0x%x,offset=0x%x,count=0x%x,record_size=0x%x,shift=%d,total_size=0x%x\n",
3694                device, offset, size, record_size,
3695                ps->ps_record_shift, ps->ps_pgnum));
3696
3697         return KERN_SUCCESS;
3698 }
3699
3700
3701
3702 kern_return_t
3703 ps_read_file(
3704         paging_segment_t        ps,
3705         upl_t                   upl,
3706         vm_offset_t             upl_offset,
3707         vm_offset_t             offset,
3708         unsigned int            size,
3709         unsigned int            *residualp,
3710         int                     flags)
3711 {
3712         vm_object_offset_t      f_offset;
3713         int                     error = 0;
3714         int                     result;
3715
3716
3717         clustered_reads[atop_32(size)]++;
3718
3719         f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3720
3721         /* for transfer case we need to pass uploffset and flags */
3722         error = vnode_pagein(ps->ps_vnode,
3723                                    upl, upl_offset, f_offset, (vm_size_t)size, flags | UPL_NORDAHEAD, NULL);
3724
3725         /* The vnode_pagein semantic is somewhat at odds with the existing   */
3726         /* device_read semantic.  Partial reads are not experienced at this  */
3727         /* level.  It is up to the bit map code and cluster read code to     */
3728         /* check that requested data locations are actually backed, and the  */
3729         /* pagein code to either read all of the requested data or return an */
3730         /* error. */
3731
3732         if (error)
3733                 result = KERN_FAILURE;
3734         else {
3735                 *residualp = 0;
3736                 result = KERN_SUCCESS;
3737         }
3738         return result;
3739 }
3740
3741 kern_return_t
3742 ps_write_file(
3743         paging_segment_t        ps,
3744         upl_t                   upl,
3745         vm_offset_t             upl_offset,
3746         vm_offset_t             offset,
3747         unsigned int            size,
3748         int                     flags)
3749 {
3750         vm_object_offset_t      f_offset;
3751         kern_return_t           result;
3752
3753         int             error = 0;
3754
3755         clustered_writes[atop_32(size)]++;
3756         f_offset = (vm_object_offset_t)(ps->ps_offset + offset);
3757
3758         if (vnode_pageout(ps->ps_vnode,
3759                                 upl, upl_offset, f_offset, (vm_size_t)size, flags, NULL))
3760                 result = KERN_FAILURE;
3761         else
3762                 result = KERN_SUCCESS;
3763
3764         return result;
3765 }
3766
3767 kern_return_t
3768 default_pager_triggers(MACH_PORT_FACE default_pager,
3769         int             hi_wat,
3770         int             lo_wat,
3771         int             flags,
3772         MACH_PORT_FACE  trigger_port)
3773 {
3774         MACH_PORT_FACE release;
3775         kern_return_t kr;
3776
3777         PSL_LOCK();
3778         if (flags == HI_WAT_ALERT) {
3779                 release = min_pages_trigger_port;
3780                 min_pages_trigger_port = trigger_port;
3781                 minimum_pages_remaining = hi_wat/vm_page_size;
3782                 bs_low = FALSE;
3783                 kr = KERN_SUCCESS;
3784         } else if (flags ==  LO_WAT_ALERT) {
3785                 release = max_pages_trigger_port;
3786                 max_pages_trigger_port = trigger_port;
3787                 maximum_pages_free = lo_wat/vm_page_size;
3788                 kr = KERN_SUCCESS;
3789         } else {
3790                 release = trigger_port;
3791                 kr =  KERN_INVALID_ARGUMENT;
3792         }
3793         PSL_UNLOCK();
3794
3795         if (IP_VALID(release))
3796                 ipc_port_release_send(release);
3797
3798         return kr;
3799 }
3800
3801 /*
3802  * Monitor the amount of available backing store vs. the amount of
3803  * required backing store, notify a listener (if present) when
3804  * backing store may safely be removed.
3805  *
3806  * We attempt to avoid the situation where backing store is
3807  * discarded en masse, as this can lead to thrashing as the
3808  * backing store is compacted.
3809  */
3810
3811 #define PF_INTERVAL     3       /* time between free level checks */
3812 #define PF_LATENCY      10      /* number of intervals before release */
3813
3814 static int dp_pages_free_low_count = 0;
3815
3816 void
3817 default_pager_backing_store_monitor(thread_call_param_t p1, thread_call_param_t p2)
3818 {
3819         unsigned long long      average;
3820         ipc_port_t              trigger;
3821         uint64_t                deadline;
3822
3823         /*
3824          * We determine whether it will be safe to release some
3825          * backing store by watching the free page level.  If
3826          * it remains below the maximum_pages_free threshold for
3827          * at least PF_LATENCY checks (taken at PF_INTERVAL seconds)
3828          * then we deem it safe.
3829          *
3830          * Note that this establishes a maximum rate at which backing
3831          * store will be released, as each notification (currently)
3832          * only results in a single backing store object being
3833          * released.
3834          */
3835         if (dp_pages_free > maximum_pages_free) {
3836                 dp_pages_free_low_count++;
3837         } else {
3838                 dp_pages_free_low_count = 0;
3839         }
3840
3841         /* decide whether to send notification */
3842         trigger = IP_NULL;
3843         if (max_pages_trigger_port &&
3844             (backing_store_release_trigger_disable == 0) &&
3845             (dp_pages_free_low_count > PF_LATENCY)) {
3846                 trigger = max_pages_trigger_port;
3847                 max_pages_trigger_port = NULL;
3848         }
3849
3850         /* send notification */
3851         if (trigger != IP_NULL) {
3852                 VSL_LOCK();
3853                 if(backing_store_release_trigger_disable != 0) {
3854                         assert_wait((event_t)
3855                                     &backing_store_release_trigger_disable,
3856                                     THREAD_UNINT);
3857                         VSL_UNLOCK();
3858                         thread_block(THREAD_CONTINUE_NULL);
3859                 } else {
3860                         VSL_UNLOCK();
3861                 }
3862                 default_pager_space_alert(trigger, LO_WAT_ALERT);
3863                 ipc_port_release_send(trigger);
3864                 dp_pages_free_low_count = 0;
3865         }
3866
3867         clock_interval_to_deadline(PF_INTERVAL, NSEC_PER_SEC, &deadline);
3868         thread_call_func_delayed(default_pager_backing_store_monitor, NULL, deadline);
3869 }