]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task_swap.c
6744ce3bc442a69bbd0e680f2aff7e3c2e4da9af
[apple/xnu.git] / osfmk / kern / task_swap.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * File: kern/task_swap.c
27 *
28 * Task residency management primitives implementation.
29 */
30 #include <mach_assert.h>
31 #include <task_swapper.h>
32
33 #include <kern/spl.h>
34 #include <kern/lock.h>
35 #include <kern/queue.h>
36 #include <kern/host.h>
37 #include <kern/task.h>
38 #include <kern/task_swap.h>
39 #include <kern/thread.h>
40 #include <kern/thread_swap.h>
41 #include <kern/host_statistics.h>
42 #include <kern/misc_protos.h>
43 #include <kern/assert.h>
44 #include <mach/policy.h>
45
46 #include <ipc/ipc_port.h> /* We use something from in here */
47
48 /*
49 * Note: if TASK_SWAPPER is disabled, then this file defines only
50 * a stub version of task_swappable(), so that the service can always
51 * be defined, even if swapping has been configured out of the kernel.
52 */
53 #if TASK_SWAPPER
54
55 /* temporary debug flags */
56 #define TASK_SW_DEBUG 1
57 #define TASK_SW_STATS 1
58
59 int task_swap_debug = 0;
60 int task_swap_stats = 0;
61 int task_swap_enable = 1;
62 int task_swap_on = 1;
63
64 queue_head_t swapped_tasks; /* completely swapped out tasks */
65 queue_head_t swapout_thread_q; /* threads to be swapped out */
66 mutex_t task_swapper_lock; /* protects above queue */
67
68 #define task_swapper_lock() mutex_lock(&task_swapper_lock)
69 #define task_swapper_unlock() mutex_unlock(&task_swapper_lock)
70
71 queue_head_t eligible_tasks; /* tasks eligible for swapout */
72 mutex_t task_swapout_list_lock; /* protects above queue */
73 #define task_swapout_lock() mutex_lock(&task_swapout_list_lock)
74 #define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock)
75
76 /*
77 * The next section of constants and globals are tunable parameters
78 * used in making swapping decisions. They may be changed dynamically
79 * without adversely affecting the robustness of the system; however,
80 * the policy will change, one way or the other.
81 */
82
83 #define SHORT_AVG_INTERVAL 5 /* in seconds */
84 #define LONG_AVG_INTERVAL 30 /* in seconds */
85 #define AVE_SCALE 1024
86
87 unsigned int short_avg_interval = SHORT_AVG_INTERVAL;
88 unsigned int long_avg_interval = LONG_AVG_INTERVAL;
89
90 #ifndef MIN_SWAP_PAGEOUT_RATE
91 #define MIN_SWAP_PAGEOUT_RATE 10
92 #endif
93
94 /*
95 * The following are all stored in fixed-point representation (the actual
96 * value times AVE_SCALE), to allow more accurate computing of decaying
97 * averages. So all variables that end with "avg" must be divided by
98 * AVE_SCALE to convert them or compare them to ints.
99 */
100 unsigned int vm_grab_rate_avg;
101 unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
102 unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
103 unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
104 unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */
105 unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */
106
107 /*
108 * Trigger task swapping when paging activity reaches
109 * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed.
110 * Turn off task swapping when paging activity goes back down to below
111 * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum.
112 * These numbers have been found empirically and might need some tuning...
113 */
114 #ifndef SWAP_PAGEOUT_HIGH_WATER_MARK
115 #define SWAP_PAGEOUT_HIGH_WATER_MARK 30
116 #endif
117 #ifndef SWAP_PAGEOUT_LOW_WATER_MARK
118 #define SWAP_PAGEOUT_LOW_WATER_MARK 10
119 #endif
120
121 #ifndef MAX_GRAB_RATE
122 #define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */
123 #endif
124
125 /*
126 * swap_{start,stop}_pageout_rate start at the minimum value, then increase
127 * to adjust to the hardware's performance, following the paging rate peaks.
128 */
129 unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK;
130 unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK;
131 unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
132 SWAP_PAGEOUT_HIGH_WATER_MARK / 100;
133 unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
134 SWAP_PAGEOUT_LOW_WATER_MARK / 100;
135 #if TASK_SW_DEBUG
136 unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */
137 unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */
138 #endif /* TASK_SW_DEBUG */
139 unsigned int max_grab_rate = MAX_GRAB_RATE;
140
141 #ifndef MIN_SWAP_TIME
142 #define MIN_SWAP_TIME 1
143 #endif
144
145 int min_swap_time = MIN_SWAP_TIME; /* in seconds */
146
147 #ifndef MIN_RES_TIME
148 #define MIN_RES_TIME 6
149 #endif
150
151 int min_res_time = MIN_RES_TIME; /* in seconds */
152
153 #ifndef MIN_ACTIVE_TASKS
154 #define MIN_ACTIVE_TASKS 4
155 #endif
156
157 int min_active_tasks = MIN_ACTIVE_TASKS;
158
159 #ifndef TASK_SWAP_CYCLE_TIME
160 #define TASK_SWAP_CYCLE_TIME 2
161 #endif
162
163 int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */
164
165 int last_task_swap_cycle = 0;
166
167 /* temporary statistics */
168 int task_swapouts = 0;
169 int task_swapins = 0;
170 int task_swaprss_out = 0; /* total rss at swapout time */
171 int task_swaprss_in = 0; /* total rss at swapin time */
172 int task_swap_total_time = 0; /* total time spent swapped out */
173 int tasks_swapped_out = 0; /* number of tasks swapped out now */
174
175 #ifdef TASK_SW_STATS
176 #define TASK_STATS_INCR(cnt) (cnt)++
177 #else
178 #define TASK_STATS_INCR(cnt)
179 #endif /* TASK_SW_STATS */
180
181 #if TASK_SW_DEBUG
182 boolean_t on_swapped_list(task_t task); /* forward */
183 /*
184 * Debug function to determine if a task is already on the
185 * swapped out tasks list. It also checks for tasks on the list
186 * that are in an illegal state (i.e. swapped in).
187 */
188 boolean_t
189 on_swapped_list(task_t task)
190 {
191 task_t ltask;
192 /* task_swapper_lock is locked. */
193
194 if (queue_empty(&swapped_tasks)) {
195 return(FALSE);
196 }
197 ltask = (task_t)queue_first(&swapped_tasks);
198 while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) {
199 /* check for illegal state */
200 if (ltask->swap_state == TASK_SW_IN) {
201 printf("on_swapped_list and in: 0x%X\n",ltask);
202 Debugger("");
203 }
204 if (ltask == task)
205 return(TRUE);
206 ltask = (task_t)queue_next(&ltask->swapped_tasks);
207 }
208 return(FALSE);
209 }
210 #endif /* TASK_SW_DEBUG */
211
212 /*
213 * task_swapper_init: [exported]
214 */
215 void
216 task_swapper_init()
217 {
218 queue_init(&swapped_tasks);
219 queue_init(&eligible_tasks);
220 queue_init(&swapout_thread_q);
221 mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP);
222 mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT);
223 vm_page_free_avg = vm_page_free_count * AVE_SCALE;
224 vm_page_free_longavg = vm_page_free_count * AVE_SCALE;
225 }
226
227 #endif /* TASK_SWAPPER */
228
229 /*
230 * task_swappable: [exported]
231 *
232 * Make a task swappable or non-swappable. If made non-swappable,
233 * it will be swapped in.
234 *
235 * Locking: task_swapout_lock is taken before task lock.
236 */
237 kern_return_t
238 task_swappable(
239 host_priv_t host_priv,
240 task_t task,
241 boolean_t make_swappable)
242 {
243 if (host_priv == HOST_PRIV_NULL)
244 return(KERN_INVALID_ARGUMENT);
245
246 if (task == TASK_NULL)
247 return(KERN_INVALID_ARGUMENT);
248
249 #if !TASK_SWAPPER
250
251 /*
252 * If we don't support swapping, this call is purely advisory.
253 */
254 return(KERN_SUCCESS);
255
256 #else /* TASK_SWAPPER */
257
258 task_lock(task);
259 if (make_swappable) {
260 /* make task swappable */
261 if (task->swap_state == TASK_SW_UNSWAPPABLE) {
262 task->swap_state = TASK_SW_IN;
263 task_unlock(task);
264 task_swapout_eligible(task);
265 }
266 } else {
267 switch (task->swap_state) {
268 case TASK_SW_IN:
269 task->swap_state = TASK_SW_UNSWAPPABLE;
270 task_unlock(task);
271 task_swapout_ineligible(task);
272 break;
273 case TASK_SW_UNSWAPPABLE:
274 task_unlock(task);
275 break;
276 default:
277 /*
278 * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT,
279 * or TASK_SW_COMING_IN. task_swapin handles all
280 * three, and its default case will catch any bad
281 * states.
282 */
283 task_unlock(task);
284 task_swapin(task, TRUE);
285 break;
286 }
287 }
288 return(KERN_SUCCESS);
289
290 #endif /* TASK_SWAPPER */
291
292 }
293
294 #if TASK_SWAPPER
295
296 /*
297 * task_swapout:
298 * A reference to the task must be held.
299 *
300 * Start swapping out a task by sending an AST_SWAPOUT to each thread.
301 * When the threads reach a clean point, they queue themselves up on the
302 * swapout_thread_q to be swapped out by the task_swap_swapout_thread.
303 * The task can be swapped in at any point in this process.
304 *
305 * A task will not be fully swapped out (i.e. its map residence count
306 * at zero) until all currently-swapped threads run and reach
307 * a clean point, at which time they will be swapped again,
308 * decrementing the swap_ast_waiting count on the task.
309 *
310 * Locking: no locks held upon entry and exit.
311 * Task_lock is held throughout this function.
312 */
313 kern_return_t
314 task_swapout(task_t task)
315 {
316 thread_act_t thr_act;
317 thread_t thread;
318 queue_head_t *list;
319 int s;
320
321 task_swapout_lock();
322 task_lock(task);
323 /*
324 * NOTE: look into turning these into assertions if they
325 * are invariants.
326 */
327 if ((task->swap_state != TASK_SW_IN) || (!task->active)) {
328 task_unlock(task);
329 task_swapout_unlock();
330 return(KERN_FAILURE);
331 }
332 if (task->swap_flags & TASK_SW_ELIGIBLE) {
333 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
334 task->swap_flags &= ~TASK_SW_ELIGIBLE;
335 }
336 task_swapout_unlock();
337
338 /* set state to avoid races with task_swappable(FALSE) */
339 task->swap_state = TASK_SW_GOING_OUT;
340 task->swap_rss = pmap_resident_count(task->map->pmap);
341 task_swaprss_out += task->swap_rss;
342 task->swap_ast_waiting = task->thr_act_count;
343
344 /*
345 * halt all threads in this task:
346 * We don't need the thread list lock for traversal.
347 */
348 list = &task->thr_acts;
349 thr_act = (thread_act_t) queue_first(list);
350 while (!queue_end(list, (queue_entry_t) thr_act)) {
351 boolean_t swappable;
352 thread_act_t ract;
353
354 thread = act_lock_thread(thr_act);
355 s = splsched();
356 if (!thread)
357 swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE);
358 else {
359 thread_lock(thread);
360 swappable = TRUE;
361 for (ract = thread->top_act; ract; ract = ract->lower)
362 if (ract->swap_state == TH_SW_UNSWAPPABLE) {
363 swappable = FALSE;
364 break;
365 }
366 }
367 if (swappable)
368 thread_ast_set(thr_act, AST_SWAPOUT);
369 if (thread)
370 thread_unlock(thread);
371 splx(s);
372 assert((thr_act->ast & AST_TERMINATE) == 0);
373 act_unlock_thread(thr_act);
374 thr_act = (thread_act_t) queue_next(&thr_act->thr_acts);
375 }
376
377 task->swap_stamp = sched_tick;
378 task->swap_nswap++;
379 assert((task->swap_flags&TASK_SW_WANT_IN) == 0);
380 /* put task on the queue of swapped out tasks */
381 task_swapper_lock();
382 #if TASK_SW_DEBUG
383 if (task_swap_debug && on_swapped_list(task)) {
384 printf("task 0x%X already on list\n", task);
385 Debugger("");
386 }
387 #endif /* TASK_SW_DEBUG */
388 queue_enter(&swapped_tasks, task, task_t, swapped_tasks);
389 tasks_swapped_out++;
390 task_swapouts++;
391 task_swapper_unlock();
392 task_unlock(task);
393
394 return(KERN_SUCCESS);
395 }
396
397 #ifdef TASK_SW_STATS
398 int task_sw_race_in = 0;
399 int task_sw_race_coming_in = 0;
400 int task_sw_race_going_out = 0;
401 int task_sw_before_ast = 0;
402 int task_sw_before_swap = 0;
403 int task_sw_after_swap = 0;
404 int task_sw_race_in_won = 0;
405 int task_sw_unswappable = 0;
406 int task_sw_act_inactive = 0;
407 #endif /* TASK_SW_STATS */
408
409 /*
410 * thread_swapout_enqueue is called by thread_halt_self when it
411 * processes AST_SWAPOUT to enqueue threads to be swapped out.
412 * It must be called at normal interrupt priority for the
413 * sake of the task_swapper_lock.
414 *
415 * There can be races with task swapin here.
416 * First lock task and decrement swap_ast_waiting count, and if
417 * it's 0, we can decrement the residence count on the task's map
418 * and set the task's swap state to TASK_SW_OUT.
419 */
420 void
421 thread_swapout_enqueue(thread_act_t thr_act)
422 {
423 task_t task = thr_act->task;
424 task_lock(task);
425 /*
426 * If the swap_state is not TASK_SW_GOING_OUT, then
427 * task_swapin has beaten us to this operation, and
428 * we have nothing to do.
429 */
430 if (task->swap_state != TASK_SW_GOING_OUT) {
431 task_unlock(task);
432 return;
433 }
434 if (--task->swap_ast_waiting == 0) {
435 vm_map_t map = task->map;
436 task->swap_state = TASK_SW_OUT;
437 task_unlock(task);
438 mutex_lock(&map->s_lock);
439 vm_map_res_deallocate(map);
440 mutex_unlock(&map->s_lock);
441 } else
442 task_unlock(task);
443
444 task_swapper_lock();
445 act_lock(thr_act);
446 if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) {
447 /*
448 * We lost a race with task_swapin(): don't enqueue.
449 */
450 } else {
451 queue_enter(&swapout_thread_q, thr_act,
452 thread_act_t, swap_queue);
453 thread_wakeup((event_t)&swapout_thread_q);
454 }
455 act_unlock(thr_act);
456 task_swapper_unlock();
457 }
458
459 /*
460 * task_swap_swapout_thread: [exported]
461 *
462 * Executes as a separate kernel thread.
463 * Its job is to swap out threads that have been halted by AST_SWAPOUT.
464 */
465 void
466 task_swap_swapout_thread(void)
467 {
468 thread_act_t thr_act;
469 thread_t thread, nthread;
470 task_t task;
471 int s;
472
473 thread_swappable(current_act(), FALSE);
474 stack_privilege(current_thread());
475
476 spllo();
477
478 while (TRUE) {
479 task_swapper_lock();
480 while (! queue_empty(&swapout_thread_q)) {
481
482 queue_remove_first(&swapout_thread_q, thr_act,
483 thread_act_t, swap_queue);
484 /*
485 * If we're racing with task_swapin, we need
486 * to make it safe for it to do remque on the
487 * thread, so make its links point to itself.
488 * Allowing this ugliness is cheaper than
489 * making task_swapin search the entire queue.
490 */
491 act_lock(thr_act);
492 queue_init((queue_t) &thr_act->swap_queue);
493 act_unlock(thr_act);
494 task_swapper_unlock();
495 /*
496 * Wait for thread's RUN bit to be deasserted.
497 */
498 thread = act_lock_thread(thr_act);
499 if (thread == THREAD_NULL)
500 act_unlock_thread(thr_act);
501 else {
502 boolean_t r;
503
504 thread_reference(thread);
505 thread_hold(thr_act);
506 act_unlock_thread(thr_act);
507 r = thread_stop_wait(thread);
508 nthread = act_lock_thread(thr_act);
509 thread_release(thr_act);
510 thread_deallocate(thread);
511 act_unlock_thread(thr_act);
512 if (!r || nthread != thread) {
513 task_swapper_lock();
514 continue;
515 }
516 }
517 task = thr_act->task;
518 task_lock(task);
519 /*
520 * we can race with swapin, which would set the
521 * state to TASK_SW_IN.
522 */
523 if ((task->swap_state != TASK_SW_OUT) &&
524 (task->swap_state != TASK_SW_GOING_OUT)) {
525 task_unlock(task);
526 task_swapper_lock();
527 TASK_STATS_INCR(task_sw_race_in_won);
528 if (thread != THREAD_NULL)
529 thread_unstop(thread);
530 continue;
531 }
532 nthread = act_lock_thread(thr_act);
533 if (nthread != thread || thr_act->active == FALSE) {
534 act_unlock_thread(thr_act);
535 task_unlock(task);
536 task_swapper_lock();
537 TASK_STATS_INCR(task_sw_act_inactive);
538 if (thread != THREAD_NULL)
539 thread_unstop(thread);
540 continue;
541 }
542 s = splsched();
543 if (thread != THREAD_NULL)
544 thread_lock(thread);
545 /*
546 * Thread cannot have been swapped out yet because
547 * TH_SW_TASK_SWAPPING was set in AST. If task_swapin
548 * beat us here, we either wouldn't have found it on
549 * the queue, or the task->swap_state would have
550 * changed. The synchronization is on the
551 * task's swap_state and the task_lock.
552 * The thread can't be swapped in any other way
553 * because its task has been swapped.
554 */
555 assert(thr_act->swap_state & TH_SW_TASK_SWAPPING);
556 assert(thread == THREAD_NULL ||
557 !(thread->state & (TH_SWAPPED_OUT|TH_RUN)));
558 assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN);
559 /* assert(thread->state & TH_HALTED); */
560 /* this also clears TH_SW_TASK_SWAPPING flag */
561 thr_act->swap_state = TH_SW_GOING_OUT;
562 if (thread != THREAD_NULL) {
563 if (thread->top_act == thr_act) {
564 thread->state |= TH_SWAPPED_OUT;
565 /*
566 * Once we unlock the task, things can happen
567 * to the thread, so make sure it's consistent
568 * for thread_swapout.
569 */
570 }
571 thread->ref_count++;
572 thread_unlock(thread);
573 thread_unstop(thread);
574 }
575 splx(s);
576 act_locked_act_reference(thr_act);
577 act_unlock_thread(thr_act);
578 task_unlock(task);
579
580 thread_swapout(thr_act); /* do the work */
581
582 if (thread != THREAD_NULL)
583 thread_deallocate(thread);
584 act_deallocate(thr_act);
585 task_swapper_lock();
586 }
587 assert_wait((event_t)&swapout_thread_q, THREAD_UNINT);
588 task_swapper_unlock();
589 thread_block((void (*)(void)) 0);
590 }
591 }
592
593 /*
594 * task_swapin:
595 *
596 * Make a task resident.
597 * Performs all of the work to make a task resident and possibly
598 * non-swappable. If we race with a competing task_swapin call,
599 * we wait for its completion, then return.
600 *
601 * Locking: no locks held upon entry and exit.
602 *
603 * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the
604 * state is TASK_SW_COMING_IN.
605 */
606
607 kern_return_t
608 task_swapin(task_t task, boolean_t make_unswappable)
609 {
610 register queue_head_t *list;
611 register thread_act_t thr_act, next;
612 thread_t thread;
613 int s;
614 boolean_t swappable = TRUE;
615
616 task_lock(task);
617 switch (task->swap_state) {
618 case TASK_SW_OUT:
619 {
620 vm_map_t map = task->map;
621 /*
622 * Task has made it all the way out, which means
623 * that vm_map_res_deallocate has been done; set
624 * state to TASK_SW_COMING_IN, then bring map
625 * back in. We could actually be racing with
626 * the thread_swapout_enqueue, which does the
627 * vm_map_res_deallocate, but that race is covered.
628 */
629 task->swap_state = TASK_SW_COMING_IN;
630 assert(task->swap_ast_waiting == 0);
631 assert(map->res_count >= 0);
632 task_unlock(task);
633 mutex_lock(&map->s_lock);
634 vm_map_res_reference(map);
635 mutex_unlock(&map->s_lock);
636 task_lock(task);
637 assert(task->swap_state == TASK_SW_COMING_IN);
638 }
639 break;
640
641 case TASK_SW_GOING_OUT:
642 /*
643 * Task isn't all the way out yet. There is
644 * still at least one thread not swapped, and
645 * vm_map_res_deallocate has not been done.
646 */
647 task->swap_state = TASK_SW_COMING_IN;
648 assert(task->swap_ast_waiting > 0 ||
649 (task->swap_ast_waiting == 0 &&
650 task->thr_act_count == 0));
651 assert(task->map->res_count > 0);
652 TASK_STATS_INCR(task_sw_race_going_out);
653 break;
654 case TASK_SW_IN:
655 assert(task->map->res_count > 0);
656 #if TASK_SW_DEBUG
657 task_swapper_lock();
658 if (task_swap_debug && on_swapped_list(task)) {
659 printf("task 0x%X on list, state is SW_IN\n",
660 task);
661 Debugger("");
662 }
663 task_swapper_unlock();
664 #endif /* TASK_SW_DEBUG */
665 TASK_STATS_INCR(task_sw_race_in);
666 if (make_unswappable) {
667 task->swap_state = TASK_SW_UNSWAPPABLE;
668 task_unlock(task);
669 task_swapout_ineligible(task);
670 } else
671 task_unlock(task);
672 return(KERN_SUCCESS);
673 case TASK_SW_COMING_IN:
674 /*
675 * Raced with another task_swapin and lost;
676 * wait for other one to complete first
677 */
678 assert(task->map->res_count >= 0);
679 /*
680 * set MAKE_UNSWAPPABLE so that whoever is swapping
681 * the task in will make it unswappable, and return
682 */
683 if (make_unswappable)
684 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
685 task->swap_flags |= TASK_SW_WANT_IN;
686 assert_wait((event_t)&task->swap_state, THREAD_UNINT);
687 task_unlock(task);
688 thread_block((void (*)(void)) 0);
689 TASK_STATS_INCR(task_sw_race_coming_in);
690 return(KERN_SUCCESS);
691 case TASK_SW_UNSWAPPABLE:
692 /*
693 * This can happen, since task_terminate
694 * unconditionally calls task_swapin.
695 */
696 task_unlock(task);
697 return(KERN_SUCCESS);
698 default:
699 panic("task_swapin bad state");
700 break;
701 }
702 if (make_unswappable)
703 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
704 assert(task->swap_state == TASK_SW_COMING_IN);
705 task_swapper_lock();
706 #if TASK_SW_DEBUG
707 if (task_swap_debug && !on_swapped_list(task)) {
708 printf("task 0x%X not on list\n", task);
709 Debugger("");
710 }
711 #endif /* TASK_SW_DEBUG */
712 queue_remove(&swapped_tasks, task, task_t, swapped_tasks);
713 tasks_swapped_out--;
714 task_swapins++;
715 task_swapper_unlock();
716
717 /*
718 * Iterate through all threads for this task and
719 * release them, as required. They may not have been swapped
720 * out yet. The task remains locked throughout.
721 */
722 list = &task->thr_acts;
723 thr_act = (thread_act_t) queue_first(list);
724 while (!queue_end(list, (queue_entry_t) thr_act)) {
725 boolean_t need_to_release;
726 next = (thread_act_t) queue_next(&thr_act->thr_acts);
727 /*
728 * Keep task_swapper_lock across thread handling
729 * to synchronize with task_swap_swapout_thread
730 */
731 task_swapper_lock();
732 thread = act_lock_thread(thr_act);
733 s = splsched();
734 if (thr_act->ast & AST_SWAPOUT) {
735 /* thread hasn't gotten the AST yet, just clear it */
736 thread_ast_clear(thr_act, AST_SWAPOUT);
737 need_to_release = FALSE;
738 TASK_STATS_INCR(task_sw_before_ast);
739 splx(s);
740 act_unlock_thread(thr_act);
741 } else {
742 /*
743 * If AST_SWAPOUT was cleared, then thread_hold,
744 * or equivalent was done.
745 */
746 need_to_release = TRUE;
747 /*
748 * Thread has hit AST, but it may not have
749 * been dequeued yet, so we need to check.
750 * NOTE: the thread may have been dequeued, but
751 * has not yet been swapped (the task_swapper_lock
752 * has been dropped, but the thread is not yet
753 * locked), and the TH_SW_TASK_SWAPPING flag may
754 * not have been cleared. In this case, we will do
755 * an extra remque, which the task_swap_swapout_thread
756 * has made safe, and clear the flag, which is also
757 * checked by the t_s_s_t before doing the swapout.
758 */
759 if (thread)
760 thread_lock(thread);
761 if (thr_act->swap_state & TH_SW_TASK_SWAPPING) {
762 /*
763 * hasn't yet been dequeued for swapout,
764 * so clear flags and dequeue it first.
765 */
766 thr_act->swap_state &= ~TH_SW_TASK_SWAPPING;
767 assert(thr_act->thread == THREAD_NULL ||
768 !(thr_act->thread->state &
769 TH_SWAPPED_OUT));
770 queue_remove(&swapout_thread_q, thr_act,
771 thread_act_t, swap_queue);
772 TASK_STATS_INCR(task_sw_before_swap);
773 } else {
774 TASK_STATS_INCR(task_sw_after_swap);
775 /*
776 * It's possible that the thread was
777 * made unswappable before hitting the
778 * AST, in which case it's still running.
779 */
780 if (thr_act->swap_state == TH_SW_UNSWAPPABLE) {
781 need_to_release = FALSE;
782 TASK_STATS_INCR(task_sw_unswappable);
783 }
784 }
785 if (thread)
786 thread_unlock(thread);
787 splx(s);
788 act_unlock_thread(thr_act);
789 }
790 task_swapper_unlock();
791
792 /*
793 * thread_release will swap in the thread if it's been
794 * swapped out.
795 */
796 if (need_to_release) {
797 act_lock_thread(thr_act);
798 thread_release(thr_act);
799 act_unlock_thread(thr_act);
800 }
801 thr_act = next;
802 }
803
804 if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) {
805 task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE;
806 task->swap_state = TASK_SW_UNSWAPPABLE;
807 swappable = FALSE;
808 } else {
809 task->swap_state = TASK_SW_IN;
810 }
811
812 task_swaprss_in += pmap_resident_count(task->map->pmap);
813 task_swap_total_time += sched_tick - task->swap_stamp;
814 /* note when task came back in */
815 task->swap_stamp = sched_tick;
816 if (task->swap_flags & TASK_SW_WANT_IN) {
817 task->swap_flags &= ~TASK_SW_WANT_IN;
818 thread_wakeup((event_t)&task->swap_state);
819 }
820 assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0);
821 task_unlock(task);
822 #if TASK_SW_DEBUG
823 task_swapper_lock();
824 if (task_swap_debug && on_swapped_list(task)) {
825 printf("task 0x%X on list at end of swap in\n", task);
826 Debugger("");
827 }
828 task_swapper_unlock();
829 #endif /* TASK_SW_DEBUG */
830 /*
831 * Make the task eligible to be swapped again
832 */
833 if (swappable)
834 task_swapout_eligible(task);
835 return(KERN_SUCCESS);
836 }
837
838 void wake_task_swapper(boolean_t now); /* forward */
839
840 /*
841 * wake_task_swapper: [exported]
842 *
843 * Wakes up task swapper if now == TRUE or if at least
844 * task_swap_cycle_time has elapsed since the last call.
845 *
846 * NOTE: this function is not multithreaded, so if there is
847 * more than one caller, it must be modified.
848 */
849 void
850 wake_task_swapper(boolean_t now)
851 {
852 /* last_task_swap_cycle may require locking */
853 if (now ||
854 (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) {
855 last_task_swap_cycle = sched_tick;
856 if (task_swap_debug)
857 printf("wake_task_swapper: waking swapper\n");
858 thread_wakeup((event_t)&swapped_tasks); /* poke swapper */
859 }
860 }
861
862 task_t pick_intask(void); /* forward */
863 /*
864 * pick_intask:
865 * returns a task to be swapped in, or TASK_NULL if nothing suitable is found.
866 *
867 * current algorithm: Return the task that has been swapped out the
868 * longest, as long as it is > min_swap_time. It will be dequeued
869 * if actually swapped in.
870 *
871 * NOTE:**********************************************
872 * task->swap_rss (the size when the task was swapped out) could be used to
873 * further refine the selection. Another possibility would be to look at
874 * the state of the thread(s) to see if the task/threads would run if they
875 * were swapped in.
876 * ***************************************************
877 *
878 * Locking: no locks held upon entry and exit.
879 */
880 task_t
881 pick_intask(void)
882 {
883 register task_t task = TASK_NULL;
884
885 task_swapper_lock();
886 /* the oldest task is the first one */
887 if (!queue_empty(&swapped_tasks)) {
888 task = (task_t) queue_first(&swapped_tasks);
889 assert(task != TASK_NULL);
890 /* Make sure it's been out min_swap_time */
891 if ((sched_tick - task->swap_stamp) < min_swap_time)
892 task = TASK_NULL;
893 }
894 task_swapper_unlock();
895 return(task);
896 #if 0
897 /*
898 * This code looks at the entire list of swapped tasks, but since
899 * it does not yet do anything but look at time swapped, we
900 * can simply use the fact that the queue is ordered, and take
901 * the first one off the queue.
902 */
903 task = (task_t)queue_first(&swapped_tasks);
904 while (!queue_end(&swapped_tasks, (queue_entry_t)task)) {
905 task_lock(task);
906 tmp_time = sched_tick - task->swap_stamp;
907 if (tmp_time > min_swap_time && tmp_time > time_swapped) {
908 target_task = task;
909 time_swapped = tmp_time;
910 }
911 task_unlock(task);
912 task = (task_t)queue_next(&task->swapped_tasks);
913 }
914 task_swapper_unlock();
915 return(target_task);
916 #endif
917 }
918
919 task_t pick_outtask(void); /* forward */
920 /*
921 * pick_outtask:
922 * returns a task to be swapped out, with a reference on the task,
923 * or NULL if no suitable task is found.
924 *
925 * current algorithm:
926 *
927 * Examine all eligible tasks. While looking, use the first thread in
928 * each task as an indication of the task's activity. Count up
929 * "active" threads (those either runnable or sleeping). If the task
930 * is active (by these criteria), swapped in, and resident
931 * for at least min_res_time, then select the task with the largest
932 * number of pages in memory. If there are less
933 * than min_active_tasks active tasks in the system, then don't
934 * swap anything out (this avoids swapping out the only running task
935 * in the system, for example).
936 *
937 * NOTE: the task selected will not be removed from the eligible list.
938 * This means that it will be selected again if it is not swapped
939 * out, where it is removed from the list.
940 *
941 * Locking: no locks held upon entry and exit. Task_swapout_lock must be
942 * taken before task locks.
943 *
944 * ***************************************************
945 * TBD:
946 * This algorithm only examines the first thread in the task. Currently, since
947 * most swappable tasks in the system are single-threaded, this generalization
948 * works reasonably well. However, the algorithm should be changed
949 * to consider all threads in the task if more multi-threaded tasks were used.
950 * ***************************************************
951 */
952
953 #ifdef TASK_SW_STATS
954 int inactive_task_count = 0;
955 int empty_task_count = 0;
956 #endif /* TASK_SW_STATS */
957
958 task_t
959 pick_outtask(void)
960 {
961 register task_t task;
962 register task_t target_task = TASK_NULL;
963 unsigned long task_rss;
964 unsigned long target_rss = 0;
965 boolean_t wired;
966 boolean_t active;
967 int nactive = 0;
968
969 task_swapout_lock();
970 if (queue_empty(&eligible_tasks)) {
971 /* not likely to happen */
972 task_swapout_unlock();
973 return(TASK_NULL);
974 }
975 task = (task_t)queue_first(&eligible_tasks);
976 while (!queue_end(&eligible_tasks, (queue_entry_t)task)) {
977 int s;
978 register thread_act_t thr_act;
979 thread_t th;
980
981
982 task_lock(task);
983 /*
984 * Don't swap real-time tasks.
985 * XXX Should we enforce that or can we let really critical
986 * tasks use task_swappable() to make sure they never end up
987 * n the eligible list ?
988 */
989 if (task->policy & POLICYCLASS_FIXEDPRI) {
990 goto tryagain;
991 }
992 if (!task->active) {
993 TASK_STATS_INCR(inactive_task_count);
994 goto tryagain;
995 }
996 if (task->res_act_count == 0) {
997 TASK_STATS_INCR(empty_task_count);
998 goto tryagain;
999 }
1000 assert(!queue_empty(&task->thr_acts));
1001 thr_act = (thread_act_t)queue_first(&task->thr_acts);
1002 active = FALSE;
1003 th = act_lock_thread(thr_act);
1004 s = splsched();
1005 if (th != THREAD_NULL)
1006 thread_lock(th);
1007 if ((th == THREAD_NULL) ||
1008 (th->state == TH_RUN) ||
1009 (th->state & TH_WAIT)) {
1010 /*
1011 * thread is "active": either runnable
1012 * or sleeping. Count it and examine
1013 * it further below.
1014 */
1015 nactive++;
1016 active = TRUE;
1017 }
1018 if (th != THREAD_NULL)
1019 thread_unlock(th);
1020 splx(s);
1021 act_unlock_thread(thr_act);
1022 if (active &&
1023 (task->swap_state == TASK_SW_IN) &&
1024 ((sched_tick - task->swap_stamp) > min_res_time)) {
1025 long rescount = pmap_resident_count(task->map->pmap);
1026 /*
1027 * thread must be "active", task must be swapped
1028 * in and resident for at least min_res_time
1029 */
1030 #if 0
1031 /* DEBUG Test round-robin strategy. Picking biggest task could cause extreme
1032 * unfairness to such large interactive programs as xterm. Instead, pick the
1033 * first task that has any pages resident:
1034 */
1035 if (rescount > 1) {
1036 task->ref_count++;
1037 target_task = task;
1038 task_unlock(task);
1039 task_swapout_unlock();
1040 return(target_task);
1041 }
1042 #else
1043 if (rescount > target_rss) {
1044 /*
1045 * task is not swapped, and it has the
1046 * largest rss seen so far.
1047 */
1048 task->ref_count++;
1049 target_rss = rescount;
1050 assert(target_task != task);
1051 if (target_task != TASK_NULL)
1052 task_deallocate(target_task);
1053 target_task = task;
1054 }
1055 #endif
1056 }
1057 tryagain:
1058 task_unlock(task);
1059 task = (task_t)queue_next(&task->swapped_tasks);
1060 }
1061 task_swapout_unlock();
1062 /* only swap out if there are at least min_active_tasks */
1063 if (nactive < min_active_tasks) {
1064 if (target_task != TASK_NULL) {
1065 task_deallocate(target_task);
1066 target_task = TASK_NULL;
1067 }
1068 }
1069 return(target_task);
1070 }
1071
1072 #if TASK_SW_DEBUG
1073 void print_pid(task_t task, unsigned long n1, unsigned long n2,
1074 const char *comp, const char *inout); /* forward */
1075 void
1076 print_pid(
1077 task_t task,
1078 unsigned long n1,
1079 unsigned long n2,
1080 const char *comp,
1081 const char *inout)
1082 {
1083 long rescount;
1084 task_lock(task);
1085 rescount = pmap_resident_count(task->map->pmap);
1086 task_unlock(task);
1087 printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n",
1088 inout, task, n1, comp, n2, rescount);
1089 }
1090 #endif
1091
1092 /*
1093 * task_swapper: [exported]
1094 *
1095 * Executes as a separate kernel thread.
1096 */
1097 #define MAX_LOOP 3
1098 void
1099 task_swapper(void)
1100 {
1101 task_t outtask, intask;
1102 int timeout;
1103 int loopcnt = 0;
1104 boolean_t start_swapping;
1105 boolean_t stop_swapping;
1106 int local_page_free_avg;
1107 extern int hz;
1108
1109 thread_swappable(current_act(), FALSE);
1110 stack_privilege(current_thread());
1111
1112 spllo();
1113
1114 for (;;) {
1115 local_page_free_avg = vm_page_free_avg;
1116 while (TRUE) {
1117 #if 0
1118 if (task_swap_debug)
1119 printf("task_swapper: top of loop; cnt = %d\n",loopcnt);
1120 #endif
1121 intask = pick_intask();
1122
1123 start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) ||
1124 (vm_grab_rate_avg > max_grab_rate));
1125 stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate);
1126
1127 /*
1128 * If a lot of paging is going on, or another task should come
1129 * in but memory is tight, find something to swap out and start
1130 * it. Don't swap any task out if task swapping is disabled.
1131 * vm_page_queue_free_lock protects the vm globals.
1132 */
1133 outtask = TASK_NULL;
1134 if (start_swapping ||
1135 (!stop_swapping && intask &&
1136 ((local_page_free_avg / AVE_SCALE) < vm_page_free_target))
1137 ) {
1138 if (task_swap_enable &&
1139 (outtask = pick_outtask()) &&
1140 (task_swapout(outtask) == KERN_SUCCESS)) {
1141 unsigned long rss;
1142 #if TASK_SW_DEBUG
1143 if (task_swap_debug)
1144 print_pid(outtask, local_page_free_avg / AVE_SCALE,
1145 vm_page_free_target, "<",
1146 "out");
1147 #endif
1148 rss = outtask->swap_rss;
1149 if (outtask->swap_nswap == 1)
1150 rss /= 2; /* divide by 2 if never out */
1151 local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE;
1152 }
1153 if (outtask != TASK_NULL)
1154 task_deallocate(outtask);
1155 }
1156
1157 /*
1158 * If there is an eligible task to bring in and there are at
1159 * least vm_page_free_target free pages, swap it in. If task
1160 * swapping has been disabled, bring the task in anyway.
1161 */
1162 if (intask && ((local_page_free_avg / AVE_SCALE) >=
1163 vm_page_free_target ||
1164 stop_swapping || !task_swap_enable)) {
1165 if (task_swapin(intask, FALSE) == KERN_SUCCESS) {
1166 unsigned long rss;
1167 #if TASK_SW_DEBUG
1168 if (task_swap_debug)
1169 print_pid(intask, local_page_free_avg / AVE_SCALE,
1170 vm_page_free_target, ">=",
1171 "in");
1172 #endif
1173 rss = intask->swap_rss;
1174 if (intask->swap_nswap == 1)
1175 rss /= 2; /* divide by 2 if never out */
1176 local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE;
1177 }
1178 }
1179 /*
1180 * XXX
1181 * Here we have to decide whether to continue swapping
1182 * in and/or out before sleeping. The decision should
1183 * be made based on the previous action (swapin/out) and
1184 * current system parameters, such as paging rates and
1185 * demand.
1186 * The function, compute_vm_averages, which does these
1187 * calculations, depends on being called every second,
1188 * so we can't just do the same thing.
1189 */
1190 if (++loopcnt < MAX_LOOP)
1191 continue;
1192
1193 /*
1194 * Arrange to be awakened if paging is still heavy or there are
1195 * any tasks partially or completely swapped out. (Otherwise,
1196 * the wakeup will come from the external trigger(s).)
1197 */
1198 timeout = 0;
1199 if (start_swapping)
1200 timeout = task_swap_cycle_time;
1201 else {
1202 task_swapper_lock();
1203 if (!queue_empty(&swapped_tasks))
1204 timeout = min_swap_time;
1205 task_swapper_unlock();
1206 }
1207 assert_wait((event_t)&swapped_tasks, THREAD_UNINT);
1208 if (timeout) {
1209 if (task_swap_debug)
1210 printf("task_swapper: set timeout of %d\n",
1211 timeout);
1212 thread_set_timeout(timeout, NSEC_PER_SEC);
1213 }
1214 if (task_swap_debug)
1215 printf("task_swapper: blocking\n");
1216 thread_block((void (*)(void)) 0);
1217 if (timeout) {
1218 thread_cancel_timeout(current_thread());
1219 }
1220 /* reset locals */
1221 loopcnt = 0;
1222 local_page_free_avg = vm_page_free_avg;
1223 }
1224 }
1225 }
1226
1227 /* from BSD */
1228 #define ave(smooth, cnt, time) \
1229 smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time)
1230
1231 /*
1232 * We estimate the system paging load in more than one metric:
1233 * 1) the total number of calls into the function, vm_page_grab,
1234 * which allocates all page frames for real pages.
1235 * 2) the total number of pages paged in and out of paging files.
1236 * This is a measure of page cleaning and faulting from backing
1237 * store.
1238 *
1239 * When either metric passes a threshold, tasks are swapped out.
1240 */
1241 long last_grab_count = 0;
1242 long last_pageout_count = 0;
1243
1244 /*
1245 * compute_vm_averages: [exported]
1246 *
1247 * This function is to be called once a second to calculate average paging
1248 * demand and average numbers of free pages for use by the task swapper.
1249 * Can also be used to wake up task swapper at desired thresholds.
1250 *
1251 * NOTE: this function is single-threaded, and requires locking if
1252 * ever there are multiple callers.
1253 */
1254 void
1255 compute_vm_averages(void)
1256 {
1257 extern unsigned long vm_page_grab_count;
1258 long grab_count, pageout_count;
1259 int i;
1260
1261 ave(vm_page_free_avg, vm_page_free_count, short_avg_interval);
1262 ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval);
1263
1264 /*
1265 * NOTE: the vm_page_grab_count and vm_stat structure are
1266 * under control of vm_page_queue_free_lock. We're simply reading
1267 * memory here, and the numbers don't depend on each other, so
1268 * no lock is taken.
1269 */
1270
1271 grab_count = vm_page_grab_count;
1272 pageout_count = 0;
1273 for (i = 0; i < NCPUS; i++) {
1274 pageout_count += vm_stat[i].pageouts;
1275 }
1276
1277 ave(vm_pageout_rate_avg, pageout_count - last_pageout_count,
1278 short_avg_interval);
1279 ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count,
1280 long_avg_interval);
1281 ave(vm_grab_rate_avg, grab_count - last_grab_count,
1282 short_avg_interval);
1283 last_grab_count = grab_count;
1284 last_pageout_count = pageout_count;
1285
1286 /*
1287 * Adjust swap_{start,stop}_pageout_rate to the paging rate peak.
1288 * This is an attempt to find the optimum paging rates at which
1289 * to trigger task swapping on or off to regulate paging activity,
1290 * depending on the hardware capacity.
1291 */
1292 if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) {
1293 unsigned int desired_max;
1294
1295 vm_pageout_rate_peakavg = vm_pageout_rate_avg;
1296 swap_start_pageout_rate =
1297 vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100;
1298 swap_stop_pageout_rate =
1299 vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100;
1300 }
1301
1302 #if TASK_SW_DEBUG
1303 /*
1304 * For measurements, allow fixed values.
1305 */
1306 if (fixed_swap_start_pageout_rate)
1307 swap_start_pageout_rate = fixed_swap_start_pageout_rate;
1308 if (fixed_swap_stop_pageout_rate)
1309 swap_stop_pageout_rate = fixed_swap_stop_pageout_rate;
1310 #endif /* TASK_SW_DEBUG */
1311
1312 #if TASK_SW_DEBUG
1313 if (task_swap_stats)
1314 printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n",
1315 vm_pageout_rate_avg / AVE_SCALE,
1316 vm_pageout_rate_longavg / AVE_SCALE,
1317 swap_start_pageout_rate / AVE_SCALE,
1318 swap_stop_pageout_rate / AVE_SCALE,
1319 vm_page_free_avg / AVE_SCALE,
1320 vm_page_free_longavg / AVE_SCALE,
1321 vm_page_free_target);
1322 #endif /* TASK_SW_DEBUG */
1323
1324 if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) {
1325 if (task_swap_on) {
1326 /* The following is a delicate attempt to balance the
1327 * need for reasonably rapid response to system
1328 * thrashing, with the equally important desire to
1329 * prevent the onset of swapping simply because of a
1330 * short burst of paging activity.
1331 */
1332 if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) &&
1333 (vm_pageout_rate_avg > swap_start_pageout_rate) ||
1334 (vm_pageout_rate_avg > vm_pageout_rate_peakavg) ||
1335 (vm_grab_rate_avg > max_grab_rate))
1336 wake_task_swapper(FALSE);
1337 }
1338 } else /* page demand is low; should consider swapin */ {
1339 if (tasks_swapped_out != 0)
1340 wake_task_swapper(TRUE);
1341 }
1342 }
1343
1344 void
1345 task_swapout_eligible(task_t task)
1346 {
1347 #if TASK_SW_DEBUG
1348 task_swapper_lock();
1349 if (task_swap_debug && on_swapped_list(task)) {
1350 printf("swapout_eligible: task 0x%X on swapped list\n", task);
1351 Debugger("");
1352 }
1353 task_swapper_unlock();
1354 #endif
1355 task_swapout_lock();
1356 task_lock(task);
1357 #if TASK_SW_DEBUG
1358 if (task->swap_flags & TASK_SW_ELIGIBLE) {
1359 printf("swapout_eligible: task 0x%X already eligible\n", task);
1360 }
1361 #endif /* TASK_SW_DEBUG */
1362 if ((task->swap_state == TASK_SW_IN) &&
1363 ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) {
1364 queue_enter(&eligible_tasks,task,task_t,swapped_tasks);
1365 task->swap_flags |= TASK_SW_ELIGIBLE;
1366 }
1367 task_unlock(task);
1368 task_swapout_unlock();
1369 }
1370
1371 void
1372 task_swapout_ineligible(task_t task)
1373 {
1374 #if TASK_SW_DEBUG
1375 task_swapper_lock();
1376 if (task_swap_debug && on_swapped_list(task)) {
1377 printf("swapout_ineligible: task 0x%X on swapped list\n", task);
1378 Debugger("");
1379 }
1380 task_swapper_unlock();
1381 #endif
1382 task_swapout_lock();
1383 task_lock(task);
1384 #if TASK_SW_DEBUG
1385 if (!(task->swap_flags & TASK_SW_ELIGIBLE))
1386 printf("swapout_ineligible: task 0x%X already inel.\n", task);
1387 #endif /* TASK_SW_DEBUG */
1388 if ((task->swap_state != TASK_SW_IN) &&
1389 (task->swap_flags & TASK_SW_ELIGIBLE)) {
1390 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
1391 task->swap_flags &= ~TASK_SW_ELIGIBLE;
1392 }
1393 task_unlock(task);
1394 task_swapout_unlock();
1395 }
1396
1397 int task_swap_ast_aborted = 0;
1398
1399 /*
1400 * Process an AST_SWAPOUT.
1401 */
1402 void
1403 swapout_ast()
1404 {
1405 spl_t s;
1406 thread_act_t act;
1407 thread_t thread;
1408
1409 act = current_act();
1410
1411 /*
1412 * Task is being swapped out. First mark it as suspended
1413 * and halted, then call thread_swapout_enqueue to put
1414 * the thread on the queue for task_swap_swapout_threads
1415 * to swap out the thread.
1416 */
1417 /*
1418 * Don't swap unswappable threads
1419 */
1420 thread = act_lock_thread(act);
1421 s = splsched();
1422 if (thread)
1423 thread_lock(thread);
1424 if ((act->ast & AST_SWAPOUT) == 0) {
1425 /*
1426 * Race with task_swapin. Abort swapout.
1427 */
1428 task_swap_ast_aborted++; /* not locked XXX */
1429 if (thread)
1430 thread_unlock(thread);
1431 splx(s);
1432 act_unlock_thread(act);
1433 } else if (act->swap_state == TH_SW_IN) {
1434 /*
1435 * Mark swap_state as TH_SW_TASK_SWAPPING to avoid
1436 * race with thread swapper, which will only
1437 * swap thread if swap_state is TH_SW_IN.
1438 * This way, the thread can only be swapped by
1439 * the task swapping mechanism.
1440 */
1441 act->swap_state |= TH_SW_TASK_SWAPPING;
1442 /* assert(act->suspend_count == 0); XXX ? */
1443 if (thread)
1444 thread_unlock(thread);
1445 if (act->suspend_count++ == 0) /* inline thread_hold */
1446 install_special_handler(act);
1447 /* self->state |= TH_HALTED; */
1448 thread_ast_clear(act, AST_SWAPOUT);
1449 /*
1450 * Initialize the swap_queue fields to allow an extra
1451 * queue_remove() in task_swapin if we lose the race
1452 * (task_swapin can be called before we complete
1453 * thread_swapout_enqueue).
1454 */
1455 queue_init((queue_t) &act->swap_queue);
1456 splx(s);
1457 act_unlock_thread(act);
1458 /* this must be called at normal interrupt level */
1459 thread_swapout_enqueue(act);
1460 } else {
1461 /* thread isn't swappable; continue running */
1462 assert(act->swap_state == TH_SW_UNSWAPPABLE);
1463 if (thread)
1464 thread_unlock(thread);
1465 thread_ast_clear(act, AST_SWAPOUT);
1466 splx(s);
1467 act_unlock_thread(act);
1468 }
1469 }
1470
1471 #endif /* TASK_SWAPPER */