]> git.saurik.com Git - apple/xnu.git/blob - osfmk/vm/task_working_set.c
0fb44185e155f9f19c5c599e07a39a1c85bd8b8f
[apple/xnu.git] / osfmk / vm / task_working_set.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 */
24 /*
25 * File: vm/task_working_set.c
26 * Author: Chris Youngworth
27 * Date: 2001
28 *
29 * Working set detection and maintainence module
30 */
31
32
33 #include <mach/rpc.h>
34 #include <vm/task_working_set.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_page.h>
37 #include <vm/vm_pageout.h>
38 #include <kern/sched.h>
39
40 extern unsigned sched_tick;
41
42 /* Note: all of the routines below depend on the associated map lock for */
43 /* synchronization, the map lock will be on when the routines are called */
44 /* and on when they return */
45
46 tws_hash_t
47 tws_hash_create(
48 unsigned int lines,
49 unsigned int rows,
50 unsigned int style)
51 {
52 tws_hash_t tws;
53 int i,j;
54
55
56 if ((style != TWS_HASH_STYLE_BASIC) &&
57 (style != TWS_HASH_STYLE_BASIC)) {
58 return((tws_hash_t)NULL);
59 }
60
61
62 tws = (tws_hash_t)(kalloc(sizeof(struct tws_hash)));
63 if(tws == (tws_hash_t)NULL)
64 return tws;
65
66 if((tws->table[0] = (tws_hash_ele_t *)
67 kalloc(sizeof(tws_hash_ele_t) * 2 * lines * rows))
68 == NULL) {
69 kfree((vm_offset_t)tws, sizeof(struct tws_hash));
70 return (tws_hash_t)NULL;
71 }
72 if((tws->alt_table[0] = (tws_hash_ele_t *)
73 kalloc(sizeof(tws_hash_ele_t) * 2 * lines * rows))
74 == NULL) {
75 kfree((vm_offset_t)tws, sizeof(struct tws_hash));
76 kfree((vm_offset_t)tws->table[0], sizeof(tws_hash_ele_t)
77 * 2 * lines * rows);
78 return (tws_hash_t)NULL;
79 }
80 if((tws->cache[0] = (struct tws_hash_line *)
81 kalloc(sizeof(struct tws_hash_line) * lines))
82 == NULL) {
83 kfree((vm_offset_t)tws, sizeof(struct tws_hash));
84 kfree((vm_offset_t)tws->table[0], sizeof(tws_hash_ele_t)
85 * 2 * lines * rows);
86 kfree((vm_offset_t)tws->alt_table[0], sizeof(tws_hash_ele_t)
87 * 2 * lines * rows);
88 return (tws_hash_t)NULL;
89 }
90
91 /* most defaults are such that a bzero will initialize */
92 bzero((char *)tws->table[0],sizeof(tws_hash_ele_t)
93 * 2 * lines * rows);
94 bzero((char *)tws->alt_table[0],sizeof(tws_hash_ele_t)
95 * 2 * lines * rows);
96 bzero((char *)tws->cache[0], sizeof(struct tws_hash_line)
97 * lines);
98
99 mutex_init(&tws->lock, ETAP_VM_MAP);
100 tws->style = style;
101 tws->current_line = 0;
102 tws->pageout_count = 0;
103 tws->line_count = 0;
104 tws->number_of_lines = lines;
105 tws->number_of_elements = rows;
106 tws->expansion_count = 1;
107 tws->lookup_count = 0;
108 tws->insert_count = 0;
109 tws->time_of_creation = sched_tick;
110
111 return tws;
112 }
113
114 int newtest = 0;
115 void
116 tws_hash_line_clear(
117 tws_hash_t tws,
118 tws_hash_line_t hash_line,
119 boolean_t live)
120 {
121 struct tws_hash_ele *hash_ele;
122 int index;
123 unsigned int i, j, k;
124 int alt_index;
125 int dump_pmap;
126 int hash_loop;
127
128
129 if(tws->line_count < tws->number_of_lines) {
130 tws->line_count++;
131 dump_pmap = 1;
132 } else {
133 if(tws->pageout_count != vm_pageout_scan_event_counter) {
134 tws->pageout_count =
135 vm_pageout_scan_event_counter;
136 tws->line_count = 0;
137 dump_pmap = 1;
138 } else {
139 dump_pmap = 0;
140 }
141 }
142 hash_line->ele_count = 0;
143 for (i=0; i<tws->number_of_elements; i++) {
144 hash_loop = 0;
145 hash_ele = &(hash_line->list[i]);
146 if(hash_ele->object != 0) {
147 vm_offset_t vaddr_off = 0;
148 vm_object_offset_t local_off = 0;
149
150 for (j = 0x1; j != 0; j = j<<1) {
151 if(j & hash_ele->page_cache) {
152 unsigned int alt_index;
153 alt_index = alt_tws_hash(
154 hash_ele->page_addr + vaddr_off,
155 tws->number_of_elements,
156 tws->number_of_lines);
157 for(k = 0; k < tws->expansion_count; k++) {
158 if(tws->alt_table[k][alt_index] == hash_ele) {
159 tws->alt_table[k][alt_index] = 0;
160 }
161 }
162 vaddr_off += PAGE_SIZE;
163 }
164 }
165
166 if((hash_ele->map != NULL) && (live)) {
167 vm_page_t p;
168
169 for (j = 0x1; j != 0; j = j<<1) {
170 if(j & hash_ele->page_cache) {
171 p = vm_page_lookup(hash_ele->object,
172 hash_ele->offset + local_off);
173 if((p != NULL) && (p->wire_count == 0)
174 && (dump_pmap == 1)) {
175 pmap_remove_some_phys((pmap_t)
176 vm_map_pmap(hash_ele->map),
177 p->phys_addr);
178 }
179 }
180 local_off += PAGE_SIZE_64;
181 }
182 }
183 if(tws->style == TWS_HASH_STYLE_SIGNAL) {
184 vm_object_deallocate(hash_ele->object);
185 vm_map_deallocate(hash_ele->map);
186 }
187
188 index = do_tws_hash(hash_ele->object, hash_ele->offset,
189 tws->number_of_elements, tws->number_of_lines);
190
191 while (hash_loop < TWS_MAX_REHASH) {
192 for(k = 0; k < tws->expansion_count; k++) {
193 if((tws->table[k][index] != 0) &&
194 (tws->table[k][index] == hash_ele)) {
195 tws->table[k][index] = 0;
196 break;
197 }
198 if(k < tws->expansion_count)
199 break;
200 }
201 index += 17;
202 if(index >= (2 *
203 tws->number_of_lines
204 * tws->number_of_elements)) {
205 index = index - (2 *
206 tws->number_of_lines
207 * tws->number_of_elements);
208 }
209 hash_loop++;
210 }
211 hash_ele->object = 0;
212 hash_ele->page_cache = 0;
213
214 if(newtest != 0) {
215 if (hash_loop == TWS_MAX_REHASH) {
216 panic("tws_hash_line_clear: Cache and Hash out of sync\n");
217 }
218 }
219 }
220 }
221 }
222
223 kern_return_t
224 tws_lookup(
225 tws_hash_t tws,
226 vm_object_offset_t offset,
227 vm_object_t object,
228 tws_hash_line_t *line)
229 {
230 struct tws_hash_ele *hash_ele;
231 int index;
232 int k;
233 int loop;
234
235 /* don't cache private objects */
236 if(object->private)
237 return KERN_SUCCESS;
238
239 if(!tws_lock_try(tws)) {
240 return KERN_FAILURE;
241 }
242
243 index = do_tws_hash(object, offset,
244 tws->number_of_elements, tws->number_of_lines);
245 loop = 0;
246
247 tws->lookup_count++;
248 if(tws->lookup_count == 0)
249 tws->insert_count = 0;
250 while (loop < TWS_MAX_REHASH) {
251 for(k=0; k<tws->expansion_count; k++) {
252 if((hash_ele = tws->table[k][index]) != 0) {
253 if((hash_ele->offset == (offset & TWS_HASH_OFF_MASK))
254 && (hash_ele->object == object)) {
255 vm_offset_t pagenum;
256
257 pagenum = (vm_offset_t)
258 (offset & TWS_INDEX_MASK);
259 pagenum = pagenum >> 12;
260
261 if((1<<pagenum) & hash_ele->page_cache) {
262 int set;
263 int ele_line;
264
265 set = hash_ele->line/tws->number_of_lines;
266 ele_line = hash_ele->line - set;
267 *line = &tws->cache[k][ele_line];
268 tws_unlock(tws);
269 return KERN_SUCCESS;
270 }
271 }
272 }
273 }
274 index += 17;
275 if(index >= (2 * tws->number_of_lines * tws->number_of_elements)) {
276 index = index -
277 (2 * tws->number_of_lines * tws->number_of_elements);
278 }
279 loop++;
280 }
281 tws_unlock(tws);
282 return KERN_FAILURE;
283 }
284
285 kern_return_t
286 tws_expand_working_set(
287 vm_offset_t tws,
288 int line_count)
289 {
290 tws_hash_t new_tws;
291 tws_hash_t old_tws;
292 unsigned int i,j,k;
293 struct tws_hash temp;
294
295 old_tws = (tws_hash_t)tws;
296
297 /* Note we do an elaborate dance to preserve the header that */
298 /* task is pointing to. In this way we can avoid taking a task */
299 /* lock every time we want to access the tws */
300
301 if (old_tws->number_of_lines >= line_count) {
302 return KERN_FAILURE;
303 }
304 if((new_tws = tws_hash_create(line_count,
305 old_tws->number_of_elements, old_tws->style)) == 0) {
306 return(KERN_NO_SPACE);
307 }
308 tws_lock(old_tws);
309
310 for(i = 0; i<old_tws->number_of_lines; i++) {
311 for(j = 0; j<old_tws->number_of_elements; j++) {
312 for(k = 0; k<old_tws->expansion_count; k++) {
313 tws_hash_ele_t entry;
314 vm_object_offset_t paddr;
315 unsigned int page_index;
316 entry = &old_tws->cache[k][i].list[j];
317 if(entry->object != 0) {
318 paddr = 0;
319 for(page_index = 1; page_index != 0;
320 page_index = page_index << 1); {
321 if (entry->page_cache & page_index) {
322 tws_insert(new_tws,
323 entry->offset+paddr,
324 entry->object,
325 entry->page_addr+paddr,
326 entry->map);
327 }
328 paddr+=PAGE_SIZE;
329 }
330
331 }
332 }
333 }
334 }
335
336 temp.style = new_tws->style;
337 temp.current_line = new_tws->current_line;
338 temp.pageout_count = new_tws->pageout_count;
339 temp.line_count = new_tws->line_count;
340 temp.number_of_lines = new_tws->number_of_lines;
341 temp.number_of_elements = new_tws->number_of_elements;
342 temp.expansion_count = new_tws->expansion_count;
343 temp.lookup_count = new_tws->lookup_count;
344 temp.insert_count = new_tws->insert_count;
345 for(i = 0; i<new_tws->expansion_count; i++) {
346 temp.table[i] = new_tws->table[i];
347 temp.alt_table[i] = new_tws->alt_table[i];
348 temp.cache[i] = new_tws->cache[i];
349 }
350
351 new_tws->style = old_tws->style;
352 new_tws->current_line = old_tws->current_line;
353 new_tws->pageout_count = old_tws->pageout_count;
354 new_tws->line_count = old_tws->line_count;
355 new_tws->number_of_lines = old_tws->number_of_lines;
356 new_tws->number_of_elements = old_tws->number_of_elements;
357 new_tws->expansion_count = old_tws->expansion_count;
358 new_tws->lookup_count = old_tws->lookup_count;
359 new_tws->insert_count = old_tws->insert_count;
360 for(i = 0; i<old_tws->expansion_count; i++) {
361 new_tws->table[i] = old_tws->table[i];
362 new_tws->alt_table[i] = old_tws->alt_table[i];
363 new_tws->cache[i] = old_tws->cache[i];
364 }
365
366 old_tws->style = temp.style;
367 old_tws->current_line = temp.current_line;
368 old_tws->pageout_count = temp.pageout_count;
369 old_tws->line_count = temp.line_count;
370 old_tws->number_of_lines = temp.number_of_lines;
371 old_tws->number_of_elements = temp.number_of_elements;
372 old_tws->expansion_count = temp.expansion_count;
373 old_tws->lookup_count = temp.lookup_count;
374 old_tws->insert_count = temp.insert_count;
375 for(i = 0; i<temp.expansion_count; i++) {
376 old_tws->table[i] = temp.table[i];
377 old_tws->alt_table[i] = temp.alt_table[i];
378 old_tws->cache[i] = temp.cache[i];
379 }
380
381 tws_hash_destroy(new_tws);
382 tws_unlock(old_tws);
383 return KERN_SUCCESS;
384 }
385
386 kern_return_t
387 tws_insert(
388 tws_hash_t tws,
389 vm_object_offset_t offset,
390 vm_object_t object,
391 vm_offset_t page_addr,
392 vm_map_t map)
393 {
394 queue_t bucket;
395 struct tws_hash_ele *new_entry;
396 unsigned int index;
397 unsigned int alt_index;
398 unsigned int ele_index;
399 unsigned int page_index;
400 int i,k;
401 int alt_k;
402 int alt_hash_count;
403 int current_line;
404 int set;
405 int hash_loop;
406
407 if(!tws_lock_try(tws)) {
408 return KERN_FAILURE;
409 }
410 tws->insert_count++;
411 current_line = 0xFFFFFFFF;
412
413 /* This next bit of code, the and alternate hash */
414 /* are all made necessary because of IPC COW */
415
416 alt_index = alt_tws_hash(page_addr,
417 tws->number_of_elements, tws->number_of_lines);
418
419 for(alt_k=0; alt_k<tws->expansion_count; alt_k++) {
420 new_entry = tws->alt_table[alt_k][alt_index];
421 if((new_entry == 0) || (new_entry->object == 0)) {
422 tws->alt_table[alt_k][alt_index] = 0;
423 continue;
424 }
425 if(!((new_entry->offset == (offset & TWS_HASH_OFF_MASK))
426 && (new_entry->object == object))) {
427
428 tws->alt_table[alt_k][alt_index] = 0;
429
430 index = do_tws_hash(
431 new_entry->object, new_entry->offset,
432 tws->number_of_elements, tws->number_of_lines);
433
434 hash_loop = 0;
435 while (hash_loop < TWS_MAX_REHASH) {
436
437 for(k=0; k<tws->expansion_count; k++) {
438 if(tws->table[k][index] == new_entry) {
439 break;
440 }
441 }
442
443 if(k == tws->expansion_count) {
444 index+=17;
445 if(index >= (2 * tws->number_of_lines
446 * tws->number_of_elements)) {
447 index = index -
448 (2 * tws->number_of_lines
449 * tws->number_of_elements);
450 }
451 } else {
452 break;
453 }
454 hash_loop++;
455
456 }
457 if((k < tws->expansion_count) &&
458 (tws->table[k][index] == new_entry)) {
459 page_index = (offset & TWS_INDEX_MASK) >> 12;
460 new_entry->page_cache &=
461 ~((unsigned int)(1 << page_index));
462
463 if(new_entry->page_cache == 0) {
464
465 if(tws->style ==
466 TWS_HASH_STYLE_SIGNAL) {
467 vm_object_deallocate(
468 new_entry->object);
469 vm_map_deallocate(
470 new_entry->map);
471 }
472 new_entry->object = 0;
473 tws->table[k][index] = 0;
474 current_line = new_entry->line;
475 set = current_line/tws->number_of_lines;
476 current_line = current_line -
477 (set * tws->number_of_lines);
478 tws->cache[set]
479 [current_line].ele_count--;
480 }
481
482 }
483 break;
484 }
485 }
486
487 index = do_tws_hash(object, offset,
488 tws->number_of_elements, tws->number_of_lines);
489 alt_hash_count = 0;
490 /* we will do MAX_REHASH hash attempts and then give up */
491 while (alt_hash_count < TWS_MAX_REHASH) {
492 for(k=0; k<tws->expansion_count; k++) {
493 new_entry = tws->table[k][index];
494 if(new_entry == NULL)
495 continue;
496 if((new_entry->object == object) &&
497 (new_entry->offset ==
498 (offset & TWS_HASH_OFF_MASK))) {
499 new_entry->page_cache |=
500 (1<<(((vm_offset_t)
501 (offset & TWS_INDEX_MASK))>>12));
502 tws->alt_table[k][alt_index] = new_entry;
503 tws_unlock(tws);
504 return KERN_SUCCESS;
505 }
506 }
507
508 alt_hash_count += 1;
509 index += 17;
510 if(index >= (2 *
511 tws->number_of_lines * tws->number_of_elements))
512 index = index -
513 (2 * tws->number_of_lines * tws->number_of_elements);
514 }
515 alt_hash_count = 0;
516 index = do_tws_hash(object, offset,
517 tws->number_of_elements, tws->number_of_lines);
518 while (alt_hash_count < TWS_MAX_REHASH) {
519 for(k=0; k<tws->expansion_count; k++) {
520 new_entry = tws->table[k][index];
521 if(new_entry == NULL)
522 break;
523 }
524
525 if (k<tws->expansion_count)
526 break;
527
528 alt_hash_count += 1;
529 index += 17;
530 if(index >= (2 *
531 tws->number_of_lines * tws->number_of_elements))
532 index = index -
533 (2 * tws->number_of_lines * tws->number_of_elements);
534 }
535
536 if(alt_hash_count == TWS_MAX_REHASH) {
537 tws_unlock(tws);
538 return KERN_FAILURE;
539 }
540
541 if(tws->style == TWS_HASH_STYLE_SIGNAL) {
542 vm_object_reference(object);
543 vm_map_reference(map);
544 }
545
546 if(current_line == 0xFFFFFFFF) {
547
548 current_line = tws->current_line;
549 set = current_line/tws->number_of_lines;
550 current_line = current_line - (set * tws->number_of_lines);
551
552 if(tws->cache[set][current_line].ele_count
553 >= tws->number_of_elements) {
554 current_line++;
555 tws->current_line++;
556 if(current_line == tws->number_of_lines) {
557 set++;
558 current_line = 0;
559 if (set == tws->expansion_count) {
560 if((tws->lookup_count <
561 (2 * tws->insert_count)) &&
562 (set<TWS_HASH_EXPANSION_MAX)) {
563 tws->lookup_count = 0;
564 tws->insert_count = 0;
565 if(tws->number_of_lines
566 < TWS_HASH_LINE_COUNT) {
567 tws->current_line--;
568 tws_unlock(tws);
569 return KERN_NO_SPACE;
570 }
571 if((tws->table[set] = (tws_hash_ele_t *)
572 kalloc(sizeof(tws_hash_ele_t)
573 * 2 * tws->number_of_lines
574 * tws->number_of_elements))
575 == NULL) {
576 set = 0;
577 } else if((tws->alt_table[set] =
578 (tws_hash_ele_t *)
579 kalloc(sizeof(tws_hash_ele_t)
580 * 2 * tws->number_of_lines
581 * tws->number_of_elements))
582 == NULL) {
583 kfree((vm_offset_t)tws->table[set],
584 sizeof(tws_hash_ele_t)
585 * 2 * tws->number_of_lines
586 * tws->number_of_elements);
587 tws->table[set] = NULL;
588 set = 0;
589
590 } else if((tws->cache[set] =
591 (struct tws_hash_line *)
592 kalloc(sizeof
593 (struct tws_hash_line)
594 * tws->number_of_lines))
595 == NULL) {
596 kfree((vm_offset_t)tws->table[set],
597 sizeof(tws_hash_ele_t)
598 * 2 * tws->number_of_lines
599 * tws->number_of_elements);
600 kfree((vm_offset_t)tws->alt_table[set],
601 sizeof(tws_hash_ele_t)
602 * 2 * tws->number_of_lines
603 * tws->number_of_elements);
604 tws->table[set] = NULL;
605 set = 0;
606
607 } else {
608 bzero((char *)tws->table[set],
609 sizeof(tws_hash_ele_t)
610 * 2 * tws->number_of_lines
611 * tws->number_of_elements);
612 bzero((char *)tws->alt_table[set],
613 sizeof(tws_hash_ele_t)
614 * 2 * tws->number_of_lines
615 * tws->number_of_elements);
616 bzero((char *)tws->cache[set],
617 sizeof(struct tws_hash_line)
618 * tws->number_of_lines);
619 }
620 } else {
621 tws->lookup_count = 0;
622 tws->insert_count = 0;
623 set = 0;
624 }
625 }
626 tws->current_line = set * tws->number_of_lines;
627 }
628 if(set < tws->expansion_count) {
629 tws_hash_line_clear(tws,
630 &(tws->cache[set][current_line]), TRUE);
631 if(tws->cache[set][current_line].ele_count
632 >= tws->number_of_elements) {
633 if(tws->style == TWS_HASH_STYLE_SIGNAL) {
634 vm_object_deallocate(object);
635 vm_map_deallocate(map);
636 }
637 tws_unlock(tws);
638 return KERN_FAILURE;
639 }
640 } else {
641 tws->expansion_count++;
642 }
643 }
644 }
645
646 ele_index = 0;
647 for(i = 0; i<tws->number_of_elements; i++) {
648 if(tws->cache[set][current_line].
649 list[ele_index].object == 0) {
650 break;
651 }
652 ele_index++;
653 if(ele_index >= tws->number_of_elements)
654 ele_index = 0;
655
656 }
657
658 if(i == tws->number_of_elements)
659 panic("tws_insert: no free elements");
660
661
662 tws->cache[set][current_line].list[ele_index].object = object;
663 tws->cache[set][current_line].list[ele_index].offset =
664 offset & TWS_HASH_OFF_MASK;
665 tws->cache[set][current_line].
666 list[ele_index].page_addr = page_addr & TWS_HASH_OFF_MASK;
667 tws->cache[set][current_line].list[ele_index].map = map;
668 tws->cache[set][current_line].list[ele_index].line =
669 current_line + (set * tws->number_of_lines);
670 tws->cache[set][current_line].list[ele_index].page_cache =
671 1<<(((vm_offset_t)(offset & TWS_INDEX_MASK))>>12);
672
673 tws->table[k][index] = &tws->cache[set][current_line].list[ele_index];
674 for(alt_k=0; alt_k<tws->expansion_count; alt_k++) {
675 if(tws->alt_table[alt_k][alt_index] == 0) {
676 tws->alt_table[alt_k][alt_index] =
677 &tws->cache[set][current_line].list[ele_index];
678 break;
679 }
680 }
681 tws->cache[set][current_line].ele_count++;
682
683 tws_unlock(tws);
684 return KERN_SUCCESS;
685 }
686
687 /*
688 * tws_build_cluster
689 * lengthen the cluster of pages by the number of pages encountered in the
690 * working set up to the limit requested by the caller. The object needs
691 * to be locked on entry. The map does not because the tws_lookup function
692 * is used only to find if their is an entry in the cache. No transient
693 * data from the cache is de-referenced.
694 *
695 */
696 #if MACH_PAGEMAP
697 /*
698 * MACH page map - an optional optimization where a bit map is maintained
699 * by the VM subsystem for internal objects to indicate which pages of
700 * the object currently reside on backing store. This existence map
701 * duplicates information maintained by the vnode pager. It is
702 * created at the time of the first pageout against the object, i.e.
703 * at the same time pager for the object is created. The optimization
704 * is designed to eliminate pager interaction overhead, if it is
705 * 'known' that the page does not exist on backing store.
706 *
707 * LOOK_FOR() evaluates to TRUE if the page specified by object/offset is
708 * either marked as paged out in the existence map for the object or no
709 * existence map exists for the object. LOOK_FOR() is one of the
710 * criteria in the decision to invoke the pager. It is also used as one
711 * of the criteria to terminate the scan for adjacent pages in a clustered
712 * pagein operation. Note that LOOK_FOR() always evaluates to TRUE for
713 * permanent objects. Note also that if the pager for an internal object
714 * has not been created, the pager is not invoked regardless of the value
715 * of LOOK_FOR() and that clustered pagein scans are only done on an object
716 * for which a pager has been created.
717 *
718 * PAGED_OUT() evaluates to TRUE if the page specified by the object/offset
719 * is marked as paged out in the existence map for the object. PAGED_OUT()
720 * PAGED_OUT() is used to determine if a page has already been pushed
721 * into a copy object in order to avoid a redundant page out operation.
722 */
723 #define LOOK_FOR(o, f) (vm_external_state_get((o)->existence_map, (f)) \
724 != VM_EXTERNAL_STATE_ABSENT)
725 #define PAGED_OUT(o, f) (vm_external_state_get((o)->existence_map, (f)) \
726 == VM_EXTERNAL_STATE_EXISTS)
727 #else /* MACH_PAGEMAP */
728 /*
729 * If the MACH page map optimization is not enabled,
730 * LOOK_FOR() always evaluates to TRUE. The pager will always be
731 * invoked to resolve missing pages in an object, assuming the pager
732 * has been created for the object. In a clustered page operation, the
733 * absence of a page on backing backing store cannot be used to terminate
734 * a scan for adjacent pages since that information is available only in
735 * the pager. Hence pages that may not be paged out are potentially
736 * included in a clustered request. The vnode pager is coded to deal
737 * with any combination of absent/present pages in a clustered
738 * pagein request. PAGED_OUT() always evaluates to FALSE, i.e. the pager
739 * will always be invoked to push a dirty page into a copy object assuming
740 * a pager has been created. If the page has already been pushed, the
741 * pager will ingore the new request.
742 */
743 #define LOOK_FOR(o, f) TRUE
744 #define PAGED_OUT(o, f) FALSE
745 #endif /* MACH_PAGEMAP */
746
747 void
748 tws_build_cluster(
749 tws_hash_t tws,
750 vm_object_t object,
751 vm_object_offset_t *start,
752 vm_object_offset_t *end,
753 vm_size_t max_length)
754 {
755 tws_hash_line_t line;
756 task_t task;
757 vm_object_offset_t before = *start;
758 vm_object_offset_t after = *end;
759 vm_size_t length = (vm_size_t)(*end - *start);
760 vm_page_t m;
761 kern_return_t kret;
762 vm_object_offset_t object_size;
763 int pre_heat_size;
764 int age_of_cache;
765
766 if((object->private) || !(object->pager))
767 return;
768
769 if (!object->internal) {
770 kret = vnode_pager_get_object_size(
771 object->pager,
772 &object_size);
773 } else {
774 object_size = object->size;
775 }
776 /*
777 * determine age of cache in seconds
778 */
779 age_of_cache = ((sched_tick - tws->time_of_creation) >> SCHED_TICK_SHIFT);
780
781 if (object->internal || age_of_cache > 15 || (age_of_cache > 5 && vm_page_free_count < (vm_page_free_target * 2 ))) {
782 pre_heat_size = 0;
783 } else {
784 if (object_size > (vm_object_offset_t)(1024 * 1024))
785 pre_heat_size = 8 * PAGE_SIZE;
786 else if (object_size > (vm_object_offset_t)(128 * 1024))
787 pre_heat_size = 4 * PAGE_SIZE;
788 else
789 pre_heat_size = 2 * PAGE_SIZE;
790 }
791
792 while ((length < max_length) &&
793 (object_size >=
794 (after + PAGE_SIZE_64))) {
795
796 if(length >= pre_heat_size)
797 {
798 if(tws_lookup(tws, after, object,
799 &line) != KERN_SUCCESS) {
800 vm_object_offset_t extend;
801
802 extend = after + PAGE_SIZE_64;
803 if(tws_lookup(tws, extend, object,
804 &line) != KERN_SUCCESS) {
805 break;
806 }
807 }
808 }
809 if (((object->existence_map != NULL)
810 && (!LOOK_FOR(object, after))) ||
811 (vm_page_lookup(object, after)
812 != VM_PAGE_NULL)) {
813 break;
814 }
815 if (object->internal) {
816 /*
817 * need to acquire a real page in
818 * advance because this acts as
819 * a throttling mechanism for
820 * data_requests to the default
821 * pager. If this fails, give up
822 * trying to find any more pages
823 * in the cluster and send off the
824 * request for what we already have.
825 */
826 if ((m = vm_page_grab()) == VM_PAGE_NULL) {
827 break;
828 }
829 } else if ((m = vm_page_grab_fictitious())
830 == VM_PAGE_NULL) {
831 break;
832 }
833 m->absent = TRUE;
834 m->unusual = TRUE;
835 m->clustered = TRUE;
836 m->list_req_pending = TRUE;
837
838 vm_page_insert(m, object, after);
839 object->absent_count++;
840 after += PAGE_SIZE_64;
841 length += PAGE_SIZE;
842 }
843 *end = after;
844 while (length < max_length) {
845 if (before == 0)
846 break;
847 before -= PAGE_SIZE_64;
848
849 if(length >= pre_heat_size)
850 {
851
852 if(tws_lookup(tws, before, object,
853 &line) != KERN_SUCCESS) {
854 vm_object_offset_t extend;
855
856 extend = before;
857 if (extend == 0)
858 break;
859 extend -= PAGE_SIZE_64;
860 if(tws_lookup(tws, extend, object,
861 &line) != KERN_SUCCESS) {
862 break;
863 }
864 }
865 }
866 if (((object->existence_map != NULL)
867 && (!LOOK_FOR(object, before))) ||
868 (vm_page_lookup(object, before)
869 != VM_PAGE_NULL)) {
870 break;
871 }
872 if (object->internal) {
873 /*
874 * need to acquire a real page in
875 * advance because this acts as
876 * a throttling mechanism for
877 * data_requests to the default
878 * pager. If this fails, give up
879 * trying to find any more pages
880 * in the cluster and send off the
881 * request for what we already have.
882 */
883 if ((m = vm_page_grab()) == VM_PAGE_NULL) {
884 break;
885 }
886 } else if ((m = vm_page_grab_fictitious())
887 == VM_PAGE_NULL) {
888 break;
889 }
890 m->absent = TRUE;
891 m->unusual = TRUE;
892 m->clustered = TRUE;
893 m->list_req_pending = TRUE;
894
895 vm_page_insert(m, object, before);
896 object->absent_count++;
897 *start -= PAGE_SIZE_64;
898 length += PAGE_SIZE;
899 }
900 }
901
902 tws_line_signal(
903 tws_hash_t tws,
904 vm_map_t map,
905 tws_hash_line_t hash_line,
906 vm_offset_t target_page)
907 {
908 unsigned int i,j;
909 vm_object_t object;
910 vm_object_offset_t offset;
911 vm_object_offset_t before;
912 vm_object_offset_t after;
913 struct tws_hash_ele *element;
914 vm_page_t m,p;
915 kern_return_t rc;
916
917 if(tws->style != TWS_HASH_STYLE_SIGNAL)
918 return;
919
920 vm_map_lock(map);
921 for (i=0; i<tws->number_of_elements; i++) {
922
923 vm_object_offset_t local_off = 0;
924
925 if(hash_line->list[i].object == 0)
926 continue;
927
928 element = &hash_line->list[i];
929
930 if (element->page_addr == target_page)
931 continue;
932
933 j = 1;
934 while (j != 0) {
935 if(j & element->page_cache)
936 break;
937 j << 1;
938 local_off += PAGE_SIZE_64;
939 }
940 object = element->object;
941 offset = element->offset + local_off;
942
943 /* first try a fast test to speed up no-op signal */
944 if (((p = vm_page_lookup(object, offset)) != NULL)
945 || (object->pager == NULL)
946 || (object->shadow_severed)) {
947 continue;
948 }
949
950 if((!object->alive) ||
951 (!object->pager_created) || (!object->pager_ready))
952 continue;
953
954 if (object->internal) {
955 if (object->existence_map == NULL) {
956 if (object->shadow)
957 continue;
958 } else {
959 if(!LOOK_FOR(object, offset))
960 continue;
961 }
962 }
963
964 vm_object_reference(object);
965 vm_map_unlock(map);
966
967 if(object->internal) {
968 m = vm_page_grab();
969 } else {
970 m = vm_page_grab_fictitious();
971 }
972
973 if(m == NULL) {
974 vm_object_deallocate(object);
975 vm_map_lock(map);
976 continue;
977 }
978
979 vm_object_lock(object);
980 if (((p = vm_page_lookup(object, offset)) != NULL)
981 || (object->pager == NULL)
982 || (object->shadow_severed)) {
983 VM_PAGE_FREE(m);
984 vm_object_unlock(object);
985 vm_object_deallocate(object);
986 vm_map_lock(map);
987 continue;
988 }
989
990 vm_page_insert(m, object, offset);
991
992 if (object->absent_count > vm_object_absent_max) {
993 VM_PAGE_FREE(m);
994 vm_object_unlock(object);
995 vm_object_deallocate(object);
996 vm_map_lock(map);
997 break;
998 }
999 m->list_req_pending = TRUE;
1000 m->absent = TRUE;
1001 m->unusual = TRUE;
1002 object->absent_count++;
1003
1004 before = offset;
1005 after = offset + PAGE_SIZE_64;
1006 tws_build_cluster(tws, object, &before, &after, 0x16000);
1007 vm_object_unlock(object);
1008
1009 rc = memory_object_data_request(object->pager,
1010 before + object->paging_offset,
1011 (vm_size_t)(after - before), VM_PROT_READ);
1012 if (rc != KERN_SUCCESS) {
1013 offset = before;
1014 vm_object_lock(object);
1015 while (offset < after) {
1016 m = vm_page_lookup(object, offset);
1017 if(m && m->absent && m->busy)
1018 VM_PAGE_FREE(m);
1019 offset += PAGE_SIZE;
1020 }
1021 vm_object_unlock(object);
1022 vm_object_deallocate(object);
1023 } else {
1024 vm_object_deallocate(object);
1025 }
1026 vm_map_lock(map);
1027 continue;
1028 }
1029 vm_map_unlock(map);
1030 }
1031
1032
1033
1034 void
1035 tws_hash_destroy(tws_hash_t tws)
1036 {
1037 int i,k;
1038 vm_size_t cache_size;
1039
1040 for (i=0; i<tws->number_of_lines; i++) {
1041 for(k=0; k<tws->expansion_count; k++) {
1042 /* clear the object refs */
1043 tws_hash_line_clear(tws, &(tws->cache[k][i]), FALSE);
1044 }
1045 }
1046 i = 0;
1047 while (i < tws->expansion_count) {
1048
1049 kfree((vm_offset_t)tws->table[i], sizeof(tws_hash_ele_t)
1050 * 2 * tws->number_of_lines
1051 * tws->number_of_elements);
1052 kfree((vm_offset_t)tws->alt_table[i], sizeof(tws_hash_ele_t)
1053 * 2 * tws->number_of_lines
1054 * tws->number_of_elements);
1055 kfree((vm_offset_t)tws->cache[i], sizeof(struct tws_hash_line)
1056 * tws->number_of_lines);
1057 i++;
1058 }
1059 kfree((vm_offset_t)tws, sizeof(struct tws_hash));
1060 }
1061
1062 void
1063 tws_hash_clear(tws_hash_t tws)
1064 {
1065 int i, k;
1066
1067 for (i=0; i<tws->number_of_lines; i++) {
1068 for(k=0; k<tws->expansion_count; k++) {
1069 /* clear the object refs */
1070 tws_hash_line_clear(tws, &(tws->cache[k][i]), FALSE);
1071 }
1072 }
1073 }
1074
1075 kern_return_t
1076 task_working_set_create(
1077 task_t task,
1078 unsigned int lines,
1079 unsigned int rows,
1080 unsigned int style)
1081 {
1082
1083 if (lines == 0) {
1084 lines = TWS_HASH_LINE_COUNT;
1085 }
1086 if (rows == 0) {
1087 rows = TWS_ARRAY_SIZE;
1088 }
1089 if (style == TWS_HASH_STYLE_DEFAULT) {
1090 style = TWS_HASH_STYLE_BASIC;
1091 }
1092 task_lock(task);
1093 if(task->dynamic_working_set != 0) {
1094 task_unlock(task);
1095 return(KERN_FAILURE);
1096 } else if((task->dynamic_working_set
1097 = (vm_offset_t) tws_hash_create(lines, rows, style)) == 0) {
1098 task_unlock(task);
1099 return(KERN_NO_SPACE);
1100 }
1101 task_unlock(task);
1102 return KERN_SUCCESS;
1103 }