]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task_swap.c
7d69ca1e403d5cbfa9d2d74a1aab7f2a4a5cde5b
[apple/xnu.git] / osfmk / kern / task_swap.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
11 *
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22 /*
23 * @OSF_COPYRIGHT@
24 */
25 /*
26 * File: kern/task_swap.c
27 *
28 * Task residency management primitives implementation.
29 */
30 #include <mach_assert.h>
31 #include <task_swapper.h>
32
33 #include <kern/spl.h>
34 #include <kern/lock.h>
35 #include <kern/queue.h>
36 #include <kern/host.h>
37 #include <kern/task.h>
38 #include <kern/task_swap.h>
39 #include <kern/thread.h>
40 #include <kern/thread_swap.h>
41 #include <kern/host_statistics.h>
42 #include <kern/misc_protos.h>
43 #include <kern/assert.h>
44 #include <mach/policy.h>
45
46 #include <ipc/ipc_port.h> /* We use something from in here */
47
48 /*
49 * Note: if TASK_SWAPPER is disabled, then this file defines only
50 * a stub version of task_swappable(), so that the service can always
51 * be defined, even if swapping has been configured out of the kernel.
52 */
53 #if TASK_SWAPPER
54
55 /* temporary debug flags */
56 #define TASK_SW_DEBUG 1
57 #define TASK_SW_STATS 1
58
59 int task_swap_debug = 0;
60 int task_swap_stats = 0;
61 int task_swap_enable = 1;
62 int task_swap_on = 1;
63
64 queue_head_t swapped_tasks; /* completely swapped out tasks */
65 queue_head_t swapout_thread_q; /* threads to be swapped out */
66 mutex_t task_swapper_lock; /* protects above queue */
67
68 #define task_swapper_lock() mutex_lock(&task_swapper_lock)
69 #define task_swapper_unlock() mutex_unlock(&task_swapper_lock)
70 #define task_swapper_wakeup() thread_wakeup((event_t)&swapout_thread_q)
71 #define task_swapper_sleep() thread_sleep_mutex((event_t)&swapout_thread_q, \
72 &task_swapper_lock, \
73 THREAD_UNINT)
74
75
76 queue_head_t eligible_tasks; /* tasks eligible for swapout */
77 mutex_t task_swapout_list_lock; /* protects above queue */
78 #define task_swapout_lock() mutex_lock(&task_swapout_list_lock)
79 #define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock)
80
81 /*
82 * The next section of constants and globals are tunable parameters
83 * used in making swapping decisions. They may be changed dynamically
84 * without adversely affecting the robustness of the system; however,
85 * the policy will change, one way or the other.
86 */
87
88 #define SHORT_AVG_INTERVAL 5 /* in seconds */
89 #define LONG_AVG_INTERVAL 30 /* in seconds */
90 #define AVE_SCALE 1024
91
92 unsigned int short_avg_interval = SHORT_AVG_INTERVAL;
93 unsigned int long_avg_interval = LONG_AVG_INTERVAL;
94
95 #ifndef MIN_SWAP_PAGEOUT_RATE
96 #define MIN_SWAP_PAGEOUT_RATE 10
97 #endif
98
99 /*
100 * The following are all stored in fixed-point representation (the actual
101 * value times AVE_SCALE), to allow more accurate computing of decaying
102 * averages. So all variables that end with "avg" must be divided by
103 * AVE_SCALE to convert them or compare them to ints.
104 */
105 unsigned int vm_grab_rate_avg;
106 unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
107 unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
108 unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
109 unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */
110 unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */
111
112 /*
113 * Trigger task swapping when paging activity reaches
114 * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed.
115 * Turn off task swapping when paging activity goes back down to below
116 * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum.
117 * These numbers have been found empirically and might need some tuning...
118 */
119 #ifndef SWAP_PAGEOUT_HIGH_WATER_MARK
120 #define SWAP_PAGEOUT_HIGH_WATER_MARK 30
121 #endif
122 #ifndef SWAP_PAGEOUT_LOW_WATER_MARK
123 #define SWAP_PAGEOUT_LOW_WATER_MARK 10
124 #endif
125
126 #ifndef MAX_GRAB_RATE
127 #define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */
128 #endif
129
130 /*
131 * swap_{start,stop}_pageout_rate start at the minimum value, then increase
132 * to adjust to the hardware's performance, following the paging rate peaks.
133 */
134 unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK;
135 unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK;
136 unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
137 SWAP_PAGEOUT_HIGH_WATER_MARK / 100;
138 unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
139 SWAP_PAGEOUT_LOW_WATER_MARK / 100;
140 #if TASK_SW_DEBUG
141 unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */
142 unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */
143 #endif /* TASK_SW_DEBUG */
144 unsigned int max_grab_rate = MAX_GRAB_RATE;
145
146 #ifndef MIN_SWAP_TIME
147 #define MIN_SWAP_TIME 1
148 #endif
149
150 int min_swap_time = MIN_SWAP_TIME; /* in seconds */
151
152 #ifndef MIN_RES_TIME
153 #define MIN_RES_TIME 6
154 #endif
155
156 int min_res_time = MIN_RES_TIME; /* in seconds */
157
158 #ifndef MIN_ACTIVE_TASKS
159 #define MIN_ACTIVE_TASKS 4
160 #endif
161
162 int min_active_tasks = MIN_ACTIVE_TASKS;
163
164 #ifndef TASK_SWAP_CYCLE_TIME
165 #define TASK_SWAP_CYCLE_TIME 2
166 #endif
167
168 int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */
169
170 int last_task_swap_cycle = 0;
171
172 /* temporary statistics */
173 int task_swapouts = 0;
174 int task_swapins = 0;
175 int task_swaprss_out = 0; /* total rss at swapout time */
176 int task_swaprss_in = 0; /* total rss at swapin time */
177 int task_swap_total_time = 0; /* total time spent swapped out */
178 int tasks_swapped_out = 0; /* number of tasks swapped out now */
179
180 #ifdef TASK_SW_STATS
181 #define TASK_STATS_INCR(cnt) (cnt)++
182 #else
183 #define TASK_STATS_INCR(cnt)
184 #endif /* TASK_SW_STATS */
185
186 #if TASK_SW_DEBUG
187 boolean_t on_swapped_list(task_t task); /* forward */
188 /*
189 * Debug function to determine if a task is already on the
190 * swapped out tasks list. It also checks for tasks on the list
191 * that are in an illegal state (i.e. swapped in).
192 */
193 boolean_t
194 on_swapped_list(task_t task)
195 {
196 task_t ltask;
197 /* task_swapper_lock is locked. */
198
199 if (queue_empty(&swapped_tasks)) {
200 return(FALSE);
201 }
202 ltask = (task_t)queue_first(&swapped_tasks);
203 while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) {
204 /* check for illegal state */
205 if (ltask->swap_state == TASK_SW_IN) {
206 printf("on_swapped_list and in: 0x%X\n",ltask);
207 Debugger("");
208 }
209 if (ltask == task)
210 return(TRUE);
211 ltask = (task_t)queue_next(&ltask->swapped_tasks);
212 }
213 return(FALSE);
214 }
215 #endif /* TASK_SW_DEBUG */
216
217 /*
218 * task_swapper_init: [exported]
219 */
220 void
221 task_swapper_init()
222 {
223 queue_init(&swapped_tasks);
224 queue_init(&eligible_tasks);
225 queue_init(&swapout_thread_q);
226 mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP);
227 mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT);
228 vm_page_free_avg = vm_page_free_count * AVE_SCALE;
229 vm_page_free_longavg = vm_page_free_count * AVE_SCALE;
230 }
231
232 #endif /* TASK_SWAPPER */
233
234 /*
235 * task_swappable: [exported]
236 *
237 * Make a task swappable or non-swappable. If made non-swappable,
238 * it will be swapped in.
239 *
240 * Locking: task_swapout_lock is taken before task lock.
241 */
242 kern_return_t
243 task_swappable(
244 host_priv_t host_priv,
245 task_t task,
246 boolean_t make_swappable)
247 {
248 if (host_priv == HOST_PRIV_NULL)
249 return(KERN_INVALID_ARGUMENT);
250
251 if (task == TASK_NULL)
252 return(KERN_INVALID_ARGUMENT);
253
254 #if !TASK_SWAPPER
255
256 /*
257 * If we don't support swapping, this call is purely advisory.
258 */
259 return(KERN_SUCCESS);
260
261 #else /* TASK_SWAPPER */
262
263 task_lock(task);
264 if (make_swappable) {
265 /* make task swappable */
266 if (task->swap_state == TASK_SW_UNSWAPPABLE) {
267 task->swap_state = TASK_SW_IN;
268 task_unlock(task);
269 task_swapout_eligible(task);
270 }
271 } else {
272 switch (task->swap_state) {
273 case TASK_SW_IN:
274 task->swap_state = TASK_SW_UNSWAPPABLE;
275 task_unlock(task);
276 task_swapout_ineligible(task);
277 break;
278 case TASK_SW_UNSWAPPABLE:
279 task_unlock(task);
280 break;
281 default:
282 /*
283 * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT,
284 * or TASK_SW_COMING_IN. task_swapin handles all
285 * three, and its default case will catch any bad
286 * states.
287 */
288 task_unlock(task);
289 task_swapin(task, TRUE);
290 break;
291 }
292 }
293 return(KERN_SUCCESS);
294
295 #endif /* TASK_SWAPPER */
296
297 }
298
299 #if TASK_SWAPPER
300
301 /*
302 * task_swapout:
303 * A reference to the task must be held.
304 *
305 * Start swapping out a task by sending an AST_SWAPOUT to each thread.
306 * When the threads reach a clean point, they queue themselves up on the
307 * swapout_thread_q to be swapped out by the task_swap_swapout_thread.
308 * The task can be swapped in at any point in this process.
309 *
310 * A task will not be fully swapped out (i.e. its map residence count
311 * at zero) until all currently-swapped threads run and reach
312 * a clean point, at which time they will be swapped again,
313 * decrementing the swap_ast_waiting count on the task.
314 *
315 * Locking: no locks held upon entry and exit.
316 * Task_lock is held throughout this function.
317 */
318 kern_return_t
319 task_swapout(task_t task)
320 {
321 thread_act_t thr_act;
322 thread_t thread;
323 queue_head_t *list;
324 int s;
325
326 task_swapout_lock();
327 task_lock(task);
328 /*
329 * NOTE: look into turning these into assertions if they
330 * are invariants.
331 */
332 if ((task->swap_state != TASK_SW_IN) || (!task->active)) {
333 task_unlock(task);
334 task_swapout_unlock();
335 return(KERN_FAILURE);
336 }
337 if (task->swap_flags & TASK_SW_ELIGIBLE) {
338 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
339 task->swap_flags &= ~TASK_SW_ELIGIBLE;
340 }
341 task_swapout_unlock();
342
343 /* set state to avoid races with task_swappable(FALSE) */
344 task->swap_state = TASK_SW_GOING_OUT;
345 task->swap_rss = pmap_resident_count(task->map->pmap);
346 task_swaprss_out += task->swap_rss;
347 task->swap_ast_waiting = task->thr_act_count;
348
349 /*
350 * halt all threads in this task:
351 * We don't need the thread list lock for traversal.
352 */
353 list = &task->thr_acts;
354 thr_act = (thread_act_t) queue_first(list);
355 while (!queue_end(list, (queue_entry_t) thr_act)) {
356 boolean_t swappable;
357 thread_act_t ract;
358
359 thread = act_lock_thread(thr_act);
360 s = splsched();
361 if (!thread)
362 swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE);
363 else {
364 thread_lock(thread);
365 swappable = TRUE;
366 for (ract = thread->top_act; ract; ract = ract->lower)
367 if (ract->swap_state == TH_SW_UNSWAPPABLE) {
368 swappable = FALSE;
369 break;
370 }
371 }
372 if (swappable)
373 thread_ast_set(thr_act, AST_SWAPOUT);
374 if (thread)
375 thread_unlock(thread);
376 splx(s);
377 assert((thr_act->ast & AST_TERMINATE) == 0);
378 act_unlock_thread(thr_act);
379 thr_act = (thread_act_t) queue_next(&thr_act->thr_acts);
380 }
381
382 task->swap_stamp = sched_tick;
383 task->swap_nswap++;
384 assert((task->swap_flags&TASK_SW_WANT_IN) == 0);
385 /* put task on the queue of swapped out tasks */
386 task_swapper_lock();
387 #if TASK_SW_DEBUG
388 if (task_swap_debug && on_swapped_list(task)) {
389 printf("task 0x%X already on list\n", task);
390 Debugger("");
391 }
392 #endif /* TASK_SW_DEBUG */
393 queue_enter(&swapped_tasks, task, task_t, swapped_tasks);
394 tasks_swapped_out++;
395 task_swapouts++;
396 task_swapper_unlock();
397 task_unlock(task);
398
399 return(KERN_SUCCESS);
400 }
401
402 #ifdef TASK_SW_STATS
403 int task_sw_race_in = 0;
404 int task_sw_race_coming_in = 0;
405 int task_sw_race_going_out = 0;
406 int task_sw_before_ast = 0;
407 int task_sw_before_swap = 0;
408 int task_sw_after_swap = 0;
409 int task_sw_race_in_won = 0;
410 int task_sw_unswappable = 0;
411 int task_sw_act_inactive = 0;
412 #endif /* TASK_SW_STATS */
413
414 /*
415 * thread_swapout_enqueue is called by thread_halt_self when it
416 * processes AST_SWAPOUT to enqueue threads to be swapped out.
417 * It must be called at normal interrupt priority for the
418 * sake of the task_swapper_lock.
419 *
420 * There can be races with task swapin here.
421 * First lock task and decrement swap_ast_waiting count, and if
422 * it's 0, we can decrement the residence count on the task's map
423 * and set the task's swap state to TASK_SW_OUT.
424 */
425 void
426 thread_swapout_enqueue(thread_act_t thr_act)
427 {
428 task_t task = thr_act->task;
429 task_lock(task);
430 /*
431 * If the swap_state is not TASK_SW_GOING_OUT, then
432 * task_swapin has beaten us to this operation, and
433 * we have nothing to do.
434 */
435 if (task->swap_state != TASK_SW_GOING_OUT) {
436 task_unlock(task);
437 return;
438 }
439 if (--task->swap_ast_waiting == 0) {
440 vm_map_t map = task->map;
441 task->swap_state = TASK_SW_OUT;
442 task_unlock(task);
443 mutex_lock(&map->s_lock);
444 vm_map_res_deallocate(map);
445 mutex_unlock(&map->s_lock);
446 } else
447 task_unlock(task);
448
449 task_swapper_lock();
450 act_lock(thr_act);
451 if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) {
452 /*
453 * We lost a race with task_swapin(): don't enqueue.
454 */
455 } else {
456 queue_enter(&swapout_thread_q, thr_act,
457 thread_act_t, swap_queue);
458 task_swapper_wakeup();
459 }
460 act_unlock(thr_act);
461 task_swapper_unlock();
462 }
463
464 /*
465 * task_swap_swapout_thread: [exported]
466 *
467 * Executes as a separate kernel thread.
468 * Its job is to swap out threads that have been halted by AST_SWAPOUT.
469 */
470 void
471 task_swap_swapout_thread(void)
472 {
473 thread_act_t thr_act;
474 thread_t thread, nthread;
475 task_t task;
476 int s;
477
478 thread_swappable(current_act(), FALSE);
479 stack_privilege(current_thread());
480
481 spllo();
482
483 task_swapper_lock();
484 while (TRUE) {
485 while (! queue_empty(&swapout_thread_q)) {
486
487 queue_remove_first(&swapout_thread_q, thr_act,
488 thread_act_t, swap_queue);
489 /*
490 * If we're racing with task_swapin, we need
491 * to make it safe for it to do remque on the
492 * thread, so make its links point to itself.
493 * Allowing this ugliness is cheaper than
494 * making task_swapin search the entire queue.
495 */
496 act_lock(thr_act);
497 queue_init((queue_t) &thr_act->swap_queue);
498 act_unlock(thr_act);
499 task_swapper_unlock();
500 /*
501 * Wait for thread's RUN bit to be deasserted.
502 */
503 thread = act_lock_thread(thr_act);
504 if (thread == THREAD_NULL)
505 act_unlock_thread(thr_act);
506 else {
507 boolean_t r;
508
509 thread_reference(thread);
510 thread_hold(thr_act);
511 act_unlock_thread(thr_act);
512 r = thread_stop_wait(thread);
513 nthread = act_lock_thread(thr_act);
514 thread_release(thr_act);
515 thread_deallocate(thread);
516 act_unlock_thread(thr_act);
517 if (!r || nthread != thread) {
518 task_swapper_lock();
519 continue;
520 }
521 }
522 task = thr_act->task;
523 task_lock(task);
524 /*
525 * we can race with swapin, which would set the
526 * state to TASK_SW_IN.
527 */
528 if ((task->swap_state != TASK_SW_OUT) &&
529 (task->swap_state != TASK_SW_GOING_OUT)) {
530 task_unlock(task);
531 task_swapper_lock();
532 TASK_STATS_INCR(task_sw_race_in_won);
533 if (thread != THREAD_NULL)
534 thread_unstop(thread);
535 continue;
536 }
537 nthread = act_lock_thread(thr_act);
538 if (nthread != thread || thr_act->active == FALSE) {
539 act_unlock_thread(thr_act);
540 task_unlock(task);
541 task_swapper_lock();
542 TASK_STATS_INCR(task_sw_act_inactive);
543 if (thread != THREAD_NULL)
544 thread_unstop(thread);
545 continue;
546 }
547 s = splsched();
548 if (thread != THREAD_NULL)
549 thread_lock(thread);
550 /*
551 * Thread cannot have been swapped out yet because
552 * TH_SW_TASK_SWAPPING was set in AST. If task_swapin
553 * beat us here, we either wouldn't have found it on
554 * the queue, or the task->swap_state would have
555 * changed. The synchronization is on the
556 * task's swap_state and the task_lock.
557 * The thread can't be swapped in any other way
558 * because its task has been swapped.
559 */
560 assert(thr_act->swap_state & TH_SW_TASK_SWAPPING);
561 assert(thread == THREAD_NULL ||
562 !(thread->state & (TH_SWAPPED_OUT|TH_RUN)));
563 assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN);
564 /* assert(thread->state & TH_HALTED); */
565 /* this also clears TH_SW_TASK_SWAPPING flag */
566 thr_act->swap_state = TH_SW_GOING_OUT;
567 if (thread != THREAD_NULL) {
568 if (thread->top_act == thr_act) {
569 thread->state |= TH_SWAPPED_OUT;
570 /*
571 * Once we unlock the task, things can happen
572 * to the thread, so make sure it's consistent
573 * for thread_swapout.
574 */
575 }
576 thread->ref_count++;
577 thread_unlock(thread);
578 thread_unstop(thread);
579 }
580 splx(s);
581 act_locked_act_reference(thr_act);
582 act_unlock_thread(thr_act);
583 task_unlock(task);
584
585 thread_swapout(thr_act); /* do the work */
586
587 if (thread != THREAD_NULL)
588 thread_deallocate(thread);
589 act_deallocate(thr_act);
590 task_swapper_lock();
591 }
592 task_swapper_sleep();
593 }
594 }
595
596 /*
597 * task_swapin:
598 *
599 * Make a task resident.
600 * Performs all of the work to make a task resident and possibly
601 * non-swappable. If we race with a competing task_swapin call,
602 * we wait for its completion, then return.
603 *
604 * Locking: no locks held upon entry and exit.
605 *
606 * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the
607 * state is TASK_SW_COMING_IN.
608 */
609
610 kern_return_t
611 task_swapin(task_t task, boolean_t make_unswappable)
612 {
613 register queue_head_t *list;
614 register thread_act_t thr_act, next;
615 thread_t thread;
616 int s;
617 boolean_t swappable = TRUE;
618
619 task_lock(task);
620 switch (task->swap_state) {
621 case TASK_SW_OUT:
622 {
623 vm_map_t map = task->map;
624 /*
625 * Task has made it all the way out, which means
626 * that vm_map_res_deallocate has been done; set
627 * state to TASK_SW_COMING_IN, then bring map
628 * back in. We could actually be racing with
629 * the thread_swapout_enqueue, which does the
630 * vm_map_res_deallocate, but that race is covered.
631 */
632 task->swap_state = TASK_SW_COMING_IN;
633 assert(task->swap_ast_waiting == 0);
634 assert(map->res_count >= 0);
635 task_unlock(task);
636 mutex_lock(&map->s_lock);
637 vm_map_res_reference(map);
638 mutex_unlock(&map->s_lock);
639 task_lock(task);
640 assert(task->swap_state == TASK_SW_COMING_IN);
641 }
642 break;
643
644 case TASK_SW_GOING_OUT:
645 /*
646 * Task isn't all the way out yet. There is
647 * still at least one thread not swapped, and
648 * vm_map_res_deallocate has not been done.
649 */
650 task->swap_state = TASK_SW_COMING_IN;
651 assert(task->swap_ast_waiting > 0 ||
652 (task->swap_ast_waiting == 0 &&
653 task->thr_act_count == 0));
654 assert(task->map->res_count > 0);
655 TASK_STATS_INCR(task_sw_race_going_out);
656 break;
657 case TASK_SW_IN:
658 assert(task->map->res_count > 0);
659 #if TASK_SW_DEBUG
660 task_swapper_lock();
661 if (task_swap_debug && on_swapped_list(task)) {
662 printf("task 0x%X on list, state is SW_IN\n",
663 task);
664 Debugger("");
665 }
666 task_swapper_unlock();
667 #endif /* TASK_SW_DEBUG */
668 TASK_STATS_INCR(task_sw_race_in);
669 if (make_unswappable) {
670 task->swap_state = TASK_SW_UNSWAPPABLE;
671 task_unlock(task);
672 task_swapout_ineligible(task);
673 } else
674 task_unlock(task);
675 return(KERN_SUCCESS);
676 case TASK_SW_COMING_IN:
677 /*
678 * Raced with another task_swapin and lost;
679 * wait for other one to complete first
680 */
681 assert(task->map->res_count >= 0);
682 /*
683 * set MAKE_UNSWAPPABLE so that whoever is swapping
684 * the task in will make it unswappable, and return
685 */
686 if (make_unswappable)
687 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
688 task->swap_flags |= TASK_SW_WANT_IN;
689 assert_wait((event_t)&task->swap_state, THREAD_UNINT);
690 task_unlock(task);
691 thread_block(THREAD_CONTINUE_NULL);
692 TASK_STATS_INCR(task_sw_race_coming_in);
693 return(KERN_SUCCESS);
694 case TASK_SW_UNSWAPPABLE:
695 /*
696 * This can happen, since task_terminate
697 * unconditionally calls task_swapin.
698 */
699 task_unlock(task);
700 return(KERN_SUCCESS);
701 default:
702 panic("task_swapin bad state");
703 break;
704 }
705 if (make_unswappable)
706 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
707 assert(task->swap_state == TASK_SW_COMING_IN);
708 task_swapper_lock();
709 #if TASK_SW_DEBUG
710 if (task_swap_debug && !on_swapped_list(task)) {
711 printf("task 0x%X not on list\n", task);
712 Debugger("");
713 }
714 #endif /* TASK_SW_DEBUG */
715 queue_remove(&swapped_tasks, task, task_t, swapped_tasks);
716 tasks_swapped_out--;
717 task_swapins++;
718 task_swapper_unlock();
719
720 /*
721 * Iterate through all threads for this task and
722 * release them, as required. They may not have been swapped
723 * out yet. The task remains locked throughout.
724 */
725 list = &task->thr_acts;
726 thr_act = (thread_act_t) queue_first(list);
727 while (!queue_end(list, (queue_entry_t) thr_act)) {
728 boolean_t need_to_release;
729 next = (thread_act_t) queue_next(&thr_act->thr_acts);
730 /*
731 * Keep task_swapper_lock across thread handling
732 * to synchronize with task_swap_swapout_thread
733 */
734 task_swapper_lock();
735 thread = act_lock_thread(thr_act);
736 s = splsched();
737 if (thr_act->ast & AST_SWAPOUT) {
738 /* thread hasn't gotten the AST yet, just clear it */
739 thread_ast_clear(thr_act, AST_SWAPOUT);
740 need_to_release = FALSE;
741 TASK_STATS_INCR(task_sw_before_ast);
742 splx(s);
743 act_unlock_thread(thr_act);
744 } else {
745 /*
746 * If AST_SWAPOUT was cleared, then thread_hold,
747 * or equivalent was done.
748 */
749 need_to_release = TRUE;
750 /*
751 * Thread has hit AST, but it may not have
752 * been dequeued yet, so we need to check.
753 * NOTE: the thread may have been dequeued, but
754 * has not yet been swapped (the task_swapper_lock
755 * has been dropped, but the thread is not yet
756 * locked), and the TH_SW_TASK_SWAPPING flag may
757 * not have been cleared. In this case, we will do
758 * an extra remque, which the task_swap_swapout_thread
759 * has made safe, and clear the flag, which is also
760 * checked by the t_s_s_t before doing the swapout.
761 */
762 if (thread)
763 thread_lock(thread);
764 if (thr_act->swap_state & TH_SW_TASK_SWAPPING) {
765 /*
766 * hasn't yet been dequeued for swapout,
767 * so clear flags and dequeue it first.
768 */
769 thr_act->swap_state &= ~TH_SW_TASK_SWAPPING;
770 assert(thr_act->thread == THREAD_NULL ||
771 !(thr_act->thread->state &
772 TH_SWAPPED_OUT));
773 queue_remove(&swapout_thread_q, thr_act,
774 thread_act_t, swap_queue);
775 TASK_STATS_INCR(task_sw_before_swap);
776 } else {
777 TASK_STATS_INCR(task_sw_after_swap);
778 /*
779 * It's possible that the thread was
780 * made unswappable before hitting the
781 * AST, in which case it's still running.
782 */
783 if (thr_act->swap_state == TH_SW_UNSWAPPABLE) {
784 need_to_release = FALSE;
785 TASK_STATS_INCR(task_sw_unswappable);
786 }
787 }
788 if (thread)
789 thread_unlock(thread);
790 splx(s);
791 act_unlock_thread(thr_act);
792 }
793 task_swapper_unlock();
794
795 /*
796 * thread_release will swap in the thread if it's been
797 * swapped out.
798 */
799 if (need_to_release) {
800 act_lock_thread(thr_act);
801 thread_release(thr_act);
802 act_unlock_thread(thr_act);
803 }
804 thr_act = next;
805 }
806
807 if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) {
808 task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE;
809 task->swap_state = TASK_SW_UNSWAPPABLE;
810 swappable = FALSE;
811 } else {
812 task->swap_state = TASK_SW_IN;
813 }
814
815 task_swaprss_in += pmap_resident_count(task->map->pmap);
816 task_swap_total_time += sched_tick - task->swap_stamp;
817 /* note when task came back in */
818 task->swap_stamp = sched_tick;
819 if (task->swap_flags & TASK_SW_WANT_IN) {
820 task->swap_flags &= ~TASK_SW_WANT_IN;
821 thread_wakeup((event_t)&task->swap_state);
822 }
823 assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0);
824 task_unlock(task);
825 #if TASK_SW_DEBUG
826 task_swapper_lock();
827 if (task_swap_debug && on_swapped_list(task)) {
828 printf("task 0x%X on list at end of swap in\n", task);
829 Debugger("");
830 }
831 task_swapper_unlock();
832 #endif /* TASK_SW_DEBUG */
833 /*
834 * Make the task eligible to be swapped again
835 */
836 if (swappable)
837 task_swapout_eligible(task);
838 return(KERN_SUCCESS);
839 }
840
841 void wake_task_swapper(boolean_t now); /* forward */
842
843 /*
844 * wake_task_swapper: [exported]
845 *
846 * Wakes up task swapper if now == TRUE or if at least
847 * task_swap_cycle_time has elapsed since the last call.
848 *
849 * NOTE: this function is not multithreaded, so if there is
850 * more than one caller, it must be modified.
851 */
852 void
853 wake_task_swapper(boolean_t now)
854 {
855 /* last_task_swap_cycle may require locking */
856 if (now ||
857 (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) {
858 last_task_swap_cycle = sched_tick;
859 if (task_swap_debug)
860 printf("wake_task_swapper: waking swapper\n");
861 thread_wakeup((event_t)&swapped_tasks); /* poke swapper */
862 }
863 }
864
865 task_t pick_intask(void); /* forward */
866 /*
867 * pick_intask:
868 * returns a task to be swapped in, or TASK_NULL if nothing suitable is found.
869 *
870 * current algorithm: Return the task that has been swapped out the
871 * longest, as long as it is > min_swap_time. It will be dequeued
872 * if actually swapped in.
873 *
874 * NOTE:**********************************************
875 * task->swap_rss (the size when the task was swapped out) could be used to
876 * further refine the selection. Another possibility would be to look at
877 * the state of the thread(s) to see if the task/threads would run if they
878 * were swapped in.
879 * ***************************************************
880 *
881 * Locking: no locks held upon entry and exit.
882 */
883 task_t
884 pick_intask(void)
885 {
886 register task_t task = TASK_NULL;
887
888 task_swapper_lock();
889 /* the oldest task is the first one */
890 if (!queue_empty(&swapped_tasks)) {
891 task = (task_t) queue_first(&swapped_tasks);
892 assert(task != TASK_NULL);
893 /* Make sure it's been out min_swap_time */
894 if ((sched_tick - task->swap_stamp) < min_swap_time)
895 task = TASK_NULL;
896 }
897 task_swapper_unlock();
898 return(task);
899 #if 0
900 /*
901 * This code looks at the entire list of swapped tasks, but since
902 * it does not yet do anything but look at time swapped, we
903 * can simply use the fact that the queue is ordered, and take
904 * the first one off the queue.
905 */
906 task = (task_t)queue_first(&swapped_tasks);
907 while (!queue_end(&swapped_tasks, (queue_entry_t)task)) {
908 task_lock(task);
909 tmp_time = sched_tick - task->swap_stamp;
910 if (tmp_time > min_swap_time && tmp_time > time_swapped) {
911 target_task = task;
912 time_swapped = tmp_time;
913 }
914 task_unlock(task);
915 task = (task_t)queue_next(&task->swapped_tasks);
916 }
917 task_swapper_unlock();
918 return(target_task);
919 #endif
920 }
921
922 task_t pick_outtask(void); /* forward */
923 /*
924 * pick_outtask:
925 * returns a task to be swapped out, with a reference on the task,
926 * or NULL if no suitable task is found.
927 *
928 * current algorithm:
929 *
930 * Examine all eligible tasks. While looking, use the first thread in
931 * each task as an indication of the task's activity. Count up
932 * "active" threads (those either runnable or sleeping). If the task
933 * is active (by these criteria), swapped in, and resident
934 * for at least min_res_time, then select the task with the largest
935 * number of pages in memory. If there are less
936 * than min_active_tasks active tasks in the system, then don't
937 * swap anything out (this avoids swapping out the only running task
938 * in the system, for example).
939 *
940 * NOTE: the task selected will not be removed from the eligible list.
941 * This means that it will be selected again if it is not swapped
942 * out, where it is removed from the list.
943 *
944 * Locking: no locks held upon entry and exit. Task_swapout_lock must be
945 * taken before task locks.
946 *
947 * ***************************************************
948 * TBD:
949 * This algorithm only examines the first thread in the task. Currently, since
950 * most swappable tasks in the system are single-threaded, this generalization
951 * works reasonably well. However, the algorithm should be changed
952 * to consider all threads in the task if more multi-threaded tasks were used.
953 * ***************************************************
954 */
955
956 #ifdef TASK_SW_STATS
957 int inactive_task_count = 0;
958 int empty_task_count = 0;
959 #endif /* TASK_SW_STATS */
960
961 task_t
962 pick_outtask(void)
963 {
964 register task_t task;
965 register task_t target_task = TASK_NULL;
966 unsigned long task_rss;
967 unsigned long target_rss = 0;
968 boolean_t wired;
969 boolean_t active;
970 int nactive = 0;
971
972 task_swapout_lock();
973 if (queue_empty(&eligible_tasks)) {
974 /* not likely to happen */
975 task_swapout_unlock();
976 return(TASK_NULL);
977 }
978 task = (task_t)queue_first(&eligible_tasks);
979 while (!queue_end(&eligible_tasks, (queue_entry_t)task)) {
980 int s;
981 register thread_act_t thr_act;
982 thread_t th;
983
984
985 task_lock(task);
986 /*
987 * Don't swap real-time tasks.
988 * XXX Should we enforce that or can we let really critical
989 * tasks use task_swappable() to make sure they never end up
990 * n the eligible list ?
991 */
992 if (task->policy & POLICYCLASS_FIXEDPRI) {
993 goto tryagain;
994 }
995 if (!task->active) {
996 TASK_STATS_INCR(inactive_task_count);
997 goto tryagain;
998 }
999 if (task->res_act_count == 0) {
1000 TASK_STATS_INCR(empty_task_count);
1001 goto tryagain;
1002 }
1003 assert(!queue_empty(&task->thr_acts));
1004 thr_act = (thread_act_t)queue_first(&task->thr_acts);
1005 active = FALSE;
1006 th = act_lock_thread(thr_act);
1007 s = splsched();
1008 if (th != THREAD_NULL)
1009 thread_lock(th);
1010 if ((th == THREAD_NULL) ||
1011 (th->state == TH_RUN) ||
1012 (th->state & TH_WAIT)) {
1013 /*
1014 * thread is "active": either runnable
1015 * or sleeping. Count it and examine
1016 * it further below.
1017 */
1018 nactive++;
1019 active = TRUE;
1020 }
1021 if (th != THREAD_NULL)
1022 thread_unlock(th);
1023 splx(s);
1024 act_unlock_thread(thr_act);
1025 if (active &&
1026 (task->swap_state == TASK_SW_IN) &&
1027 ((sched_tick - task->swap_stamp) > min_res_time)) {
1028 long rescount = pmap_resident_count(task->map->pmap);
1029 /*
1030 * thread must be "active", task must be swapped
1031 * in and resident for at least min_res_time
1032 */
1033 #if 0
1034 /* DEBUG Test round-robin strategy. Picking biggest task could cause extreme
1035 * unfairness to such large interactive programs as xterm. Instead, pick the
1036 * first task that has any pages resident:
1037 */
1038 if (rescount > 1) {
1039 task->ref_count++;
1040 target_task = task;
1041 task_unlock(task);
1042 task_swapout_unlock();
1043 return(target_task);
1044 }
1045 #else
1046 if (rescount > target_rss) {
1047 /*
1048 * task is not swapped, and it has the
1049 * largest rss seen so far.
1050 */
1051 task->ref_count++;
1052 target_rss = rescount;
1053 assert(target_task != task);
1054 if (target_task != TASK_NULL)
1055 task_deallocate(target_task);
1056 target_task = task;
1057 }
1058 #endif
1059 }
1060 tryagain:
1061 task_unlock(task);
1062 task = (task_t)queue_next(&task->swapped_tasks);
1063 }
1064 task_swapout_unlock();
1065 /* only swap out if there are at least min_active_tasks */
1066 if (nactive < min_active_tasks) {
1067 if (target_task != TASK_NULL) {
1068 task_deallocate(target_task);
1069 target_task = TASK_NULL;
1070 }
1071 }
1072 return(target_task);
1073 }
1074
1075 #if TASK_SW_DEBUG
1076 void print_pid(task_t task, unsigned long n1, unsigned long n2,
1077 const char *comp, const char *inout); /* forward */
1078 void
1079 print_pid(
1080 task_t task,
1081 unsigned long n1,
1082 unsigned long n2,
1083 const char *comp,
1084 const char *inout)
1085 {
1086 long rescount;
1087 task_lock(task);
1088 rescount = pmap_resident_count(task->map->pmap);
1089 task_unlock(task);
1090 printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n",
1091 inout, task, n1, comp, n2, rescount);
1092 }
1093 #endif
1094
1095 /*
1096 * task_swapper: [exported]
1097 *
1098 * Executes as a separate kernel thread.
1099 */
1100 #define MAX_LOOP 3
1101 void
1102 task_swapper(void)
1103 {
1104 task_t outtask, intask;
1105 int timeout;
1106 int loopcnt = 0;
1107 boolean_t start_swapping;
1108 boolean_t stop_swapping;
1109 int local_page_free_avg;
1110 extern int hz;
1111
1112 thread_swappable(current_act(), FALSE);
1113 stack_privilege(current_thread());
1114
1115 spllo();
1116
1117 for (;;) {
1118 local_page_free_avg = vm_page_free_avg;
1119 while (TRUE) {
1120 #if 0
1121 if (task_swap_debug)
1122 printf("task_swapper: top of loop; cnt = %d\n",loopcnt);
1123 #endif
1124 intask = pick_intask();
1125
1126 start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) ||
1127 (vm_grab_rate_avg > max_grab_rate));
1128 stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate);
1129
1130 /*
1131 * If a lot of paging is going on, or another task should come
1132 * in but memory is tight, find something to swap out and start
1133 * it. Don't swap any task out if task swapping is disabled.
1134 * vm_page_queue_free_lock protects the vm globals.
1135 */
1136 outtask = TASK_NULL;
1137 if (start_swapping ||
1138 (!stop_swapping && intask &&
1139 ((local_page_free_avg / AVE_SCALE) < vm_page_free_target))
1140 ) {
1141 if (task_swap_enable &&
1142 (outtask = pick_outtask()) &&
1143 (task_swapout(outtask) == KERN_SUCCESS)) {
1144 unsigned long rss;
1145 #if TASK_SW_DEBUG
1146 if (task_swap_debug)
1147 print_pid(outtask, local_page_free_avg / AVE_SCALE,
1148 vm_page_free_target, "<",
1149 "out");
1150 #endif
1151 rss = outtask->swap_rss;
1152 if (outtask->swap_nswap == 1)
1153 rss /= 2; /* divide by 2 if never out */
1154 local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE;
1155 }
1156 if (outtask != TASK_NULL)
1157 task_deallocate(outtask);
1158 }
1159
1160 /*
1161 * If there is an eligible task to bring in and there are at
1162 * least vm_page_free_target free pages, swap it in. If task
1163 * swapping has been disabled, bring the task in anyway.
1164 */
1165 if (intask && ((local_page_free_avg / AVE_SCALE) >=
1166 vm_page_free_target ||
1167 stop_swapping || !task_swap_enable)) {
1168 if (task_swapin(intask, FALSE) == KERN_SUCCESS) {
1169 unsigned long rss;
1170 #if TASK_SW_DEBUG
1171 if (task_swap_debug)
1172 print_pid(intask, local_page_free_avg / AVE_SCALE,
1173 vm_page_free_target, ">=",
1174 "in");
1175 #endif
1176 rss = intask->swap_rss;
1177 if (intask->swap_nswap == 1)
1178 rss /= 2; /* divide by 2 if never out */
1179 local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE;
1180 }
1181 }
1182 /*
1183 * XXX
1184 * Here we have to decide whether to continue swapping
1185 * in and/or out before sleeping. The decision should
1186 * be made based on the previous action (swapin/out) and
1187 * current system parameters, such as paging rates and
1188 * demand.
1189 * The function, compute_vm_averages, which does these
1190 * calculations, depends on being called every second,
1191 * so we can't just do the same thing.
1192 */
1193 if (++loopcnt < MAX_LOOP)
1194 continue;
1195
1196 /*
1197 * Arrange to be awakened if paging is still heavy or there are
1198 * any tasks partially or completely swapped out. (Otherwise,
1199 * the wakeup will come from the external trigger(s).)
1200 */
1201 timeout = 0;
1202 if (start_swapping)
1203 timeout = task_swap_cycle_time;
1204 else {
1205 task_swapper_lock();
1206 if (!queue_empty(&swapped_tasks))
1207 timeout = min_swap_time;
1208 task_swapper_unlock();
1209 }
1210 assert_wait((event_t)&swapped_tasks, THREAD_UNINT);
1211 if (timeout) {
1212 if (task_swap_debug)
1213 printf("task_swapper: set timeout of %d\n",
1214 timeout);
1215 thread_set_timeout(timeout, NSEC_PER_SEC);
1216 }
1217 if (task_swap_debug)
1218 printf("task_swapper: blocking\n");
1219 thread_block(THREAD_CONTINUE_NULL);
1220 if (timeout) {
1221 thread_cancel_timeout(current_thread());
1222 }
1223 /* reset locals */
1224 loopcnt = 0;
1225 local_page_free_avg = vm_page_free_avg;
1226 }
1227 }
1228 }
1229
1230 /* from BSD */
1231 #define ave(smooth, cnt, time) \
1232 smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time)
1233
1234 /*
1235 * We estimate the system paging load in more than one metric:
1236 * 1) the total number of calls into the function, vm_page_grab,
1237 * which allocates all page frames for real pages.
1238 * 2) the total number of pages paged in and out of paging files.
1239 * This is a measure of page cleaning and faulting from backing
1240 * store.
1241 *
1242 * When either metric passes a threshold, tasks are swapped out.
1243 */
1244 long last_grab_count = 0;
1245 long last_pageout_count = 0;
1246
1247 /*
1248 * compute_vm_averages: [exported]
1249 *
1250 * This function is to be called once a second to calculate average paging
1251 * demand and average numbers of free pages for use by the task swapper.
1252 * Can also be used to wake up task swapper at desired thresholds.
1253 *
1254 * NOTE: this function is single-threaded, and requires locking if
1255 * ever there are multiple callers.
1256 */
1257 void
1258 compute_vm_averages(void)
1259 {
1260 extern unsigned long vm_page_grab_count;
1261 long grab_count, pageout_count;
1262 int i;
1263
1264 ave(vm_page_free_avg, vm_page_free_count, short_avg_interval);
1265 ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval);
1266
1267 /*
1268 * NOTE: the vm_page_grab_count and vm_stat structure are
1269 * under control of vm_page_queue_free_lock. We're simply reading
1270 * memory here, and the numbers don't depend on each other, so
1271 * no lock is taken.
1272 */
1273
1274 grab_count = vm_page_grab_count;
1275 pageout_count = 0;
1276 for (i = 0; i < NCPUS; i++) {
1277 pageout_count += vm_stat[i].pageouts;
1278 }
1279
1280 ave(vm_pageout_rate_avg, pageout_count - last_pageout_count,
1281 short_avg_interval);
1282 ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count,
1283 long_avg_interval);
1284 ave(vm_grab_rate_avg, grab_count - last_grab_count,
1285 short_avg_interval);
1286 last_grab_count = grab_count;
1287 last_pageout_count = pageout_count;
1288
1289 /*
1290 * Adjust swap_{start,stop}_pageout_rate to the paging rate peak.
1291 * This is an attempt to find the optimum paging rates at which
1292 * to trigger task swapping on or off to regulate paging activity,
1293 * depending on the hardware capacity.
1294 */
1295 if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) {
1296 unsigned int desired_max;
1297
1298 vm_pageout_rate_peakavg = vm_pageout_rate_avg;
1299 swap_start_pageout_rate =
1300 vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100;
1301 swap_stop_pageout_rate =
1302 vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100;
1303 }
1304
1305 #if TASK_SW_DEBUG
1306 /*
1307 * For measurements, allow fixed values.
1308 */
1309 if (fixed_swap_start_pageout_rate)
1310 swap_start_pageout_rate = fixed_swap_start_pageout_rate;
1311 if (fixed_swap_stop_pageout_rate)
1312 swap_stop_pageout_rate = fixed_swap_stop_pageout_rate;
1313 #endif /* TASK_SW_DEBUG */
1314
1315 #if TASK_SW_DEBUG
1316 if (task_swap_stats)
1317 printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n",
1318 vm_pageout_rate_avg / AVE_SCALE,
1319 vm_pageout_rate_longavg / AVE_SCALE,
1320 swap_start_pageout_rate / AVE_SCALE,
1321 swap_stop_pageout_rate / AVE_SCALE,
1322 vm_page_free_avg / AVE_SCALE,
1323 vm_page_free_longavg / AVE_SCALE,
1324 vm_page_free_target);
1325 #endif /* TASK_SW_DEBUG */
1326
1327 if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) {
1328 if (task_swap_on) {
1329 /* The following is a delicate attempt to balance the
1330 * need for reasonably rapid response to system
1331 * thrashing, with the equally important desire to
1332 * prevent the onset of swapping simply because of a
1333 * short burst of paging activity.
1334 */
1335 if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) &&
1336 (vm_pageout_rate_avg > swap_start_pageout_rate) ||
1337 (vm_pageout_rate_avg > vm_pageout_rate_peakavg) ||
1338 (vm_grab_rate_avg > max_grab_rate))
1339 wake_task_swapper(FALSE);
1340 }
1341 } else /* page demand is low; should consider swapin */ {
1342 if (tasks_swapped_out != 0)
1343 wake_task_swapper(TRUE);
1344 }
1345 }
1346
1347 void
1348 task_swapout_eligible(task_t task)
1349 {
1350 #if TASK_SW_DEBUG
1351 task_swapper_lock();
1352 if (task_swap_debug && on_swapped_list(task)) {
1353 printf("swapout_eligible: task 0x%X on swapped list\n", task);
1354 Debugger("");
1355 }
1356 task_swapper_unlock();
1357 #endif
1358 task_swapout_lock();
1359 task_lock(task);
1360 #if TASK_SW_DEBUG
1361 if (task->swap_flags & TASK_SW_ELIGIBLE) {
1362 printf("swapout_eligible: task 0x%X already eligible\n", task);
1363 }
1364 #endif /* TASK_SW_DEBUG */
1365 if ((task->swap_state == TASK_SW_IN) &&
1366 ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) {
1367 queue_enter(&eligible_tasks,task,task_t,swapped_tasks);
1368 task->swap_flags |= TASK_SW_ELIGIBLE;
1369 }
1370 task_unlock(task);
1371 task_swapout_unlock();
1372 }
1373
1374 void
1375 task_swapout_ineligible(task_t task)
1376 {
1377 #if TASK_SW_DEBUG
1378 task_swapper_lock();
1379 if (task_swap_debug && on_swapped_list(task)) {
1380 printf("swapout_ineligible: task 0x%X on swapped list\n", task);
1381 Debugger("");
1382 }
1383 task_swapper_unlock();
1384 #endif
1385 task_swapout_lock();
1386 task_lock(task);
1387 #if TASK_SW_DEBUG
1388 if (!(task->swap_flags & TASK_SW_ELIGIBLE))
1389 printf("swapout_ineligible: task 0x%X already inel.\n", task);
1390 #endif /* TASK_SW_DEBUG */
1391 if ((task->swap_state != TASK_SW_IN) &&
1392 (task->swap_flags & TASK_SW_ELIGIBLE)) {
1393 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
1394 task->swap_flags &= ~TASK_SW_ELIGIBLE;
1395 }
1396 task_unlock(task);
1397 task_swapout_unlock();
1398 }
1399
1400 int task_swap_ast_aborted = 0;
1401
1402 /*
1403 * Process an AST_SWAPOUT.
1404 */
1405 void
1406 swapout_ast()
1407 {
1408 spl_t s;
1409 thread_act_t act;
1410 thread_t thread;
1411
1412 act = current_act();
1413
1414 /*
1415 * Task is being swapped out. First mark it as suspended
1416 * and halted, then call thread_swapout_enqueue to put
1417 * the thread on the queue for task_swap_swapout_threads
1418 * to swap out the thread.
1419 */
1420 /*
1421 * Don't swap unswappable threads
1422 */
1423 thread = act_lock_thread(act);
1424 s = splsched();
1425 if (thread)
1426 thread_lock(thread);
1427 if ((act->ast & AST_SWAPOUT) == 0) {
1428 /*
1429 * Race with task_swapin. Abort swapout.
1430 */
1431 task_swap_ast_aborted++; /* not locked XXX */
1432 if (thread)
1433 thread_unlock(thread);
1434 splx(s);
1435 act_unlock_thread(act);
1436 } else if (act->swap_state == TH_SW_IN) {
1437 /*
1438 * Mark swap_state as TH_SW_TASK_SWAPPING to avoid
1439 * race with thread swapper, which will only
1440 * swap thread if swap_state is TH_SW_IN.
1441 * This way, the thread can only be swapped by
1442 * the task swapping mechanism.
1443 */
1444 act->swap_state |= TH_SW_TASK_SWAPPING;
1445 /* assert(act->suspend_count == 0); XXX ? */
1446 if (thread)
1447 thread_unlock(thread);
1448 if (act->suspend_count++ == 0) /* inline thread_hold */
1449 install_special_handler(act);
1450 /* self->state |= TH_HALTED; */
1451 thread_ast_clear(act, AST_SWAPOUT);
1452 /*
1453 * Initialize the swap_queue fields to allow an extra
1454 * queue_remove() in task_swapin if we lose the race
1455 * (task_swapin can be called before we complete
1456 * thread_swapout_enqueue).
1457 */
1458 queue_init((queue_t) &act->swap_queue);
1459 splx(s);
1460 act_unlock_thread(act);
1461 /* this must be called at normal interrupt level */
1462 thread_swapout_enqueue(act);
1463 } else {
1464 /* thread isn't swappable; continue running */
1465 assert(act->swap_state == TH_SW_UNSWAPPABLE);
1466 if (thread)
1467 thread_unlock(thread);
1468 thread_ast_clear(act, AST_SWAPOUT);
1469 splx(s);
1470 act_unlock_thread(act);
1471 }
1472 }
1473
1474 #endif /* TASK_SWAPPER */