]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/task_swap.c
2e97218707320bb4d9ea11b0b521e85ea08a52b5
[apple/xnu.git] / osfmk / kern / task_swap.c
1 /*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /*
26 * @OSF_COPYRIGHT@
27 */
28 /*
29 * File: kern/task_swap.c
30 *
31 * Task residency management primitives implementation.
32 */
33 #include <mach_assert.h>
34 #include <task_swapper.h>
35
36 #include <kern/spl.h>
37 #include <kern/lock.h>
38 #include <kern/queue.h>
39 #include <kern/host.h>
40 #include <kern/task.h>
41 #include <kern/task_swap.h>
42 #include <kern/thread.h>
43 #include <kern/thread_swap.h>
44 #include <kern/host_statistics.h>
45 #include <kern/misc_protos.h>
46 #include <kern/assert.h>
47 #include <mach/policy.h>
48
49 #include <ipc/ipc_port.h> /* We use something from in here */
50
51 /*
52 * Note: if TASK_SWAPPER is disabled, then this file defines only
53 * a stub version of task_swappable(), so that the service can always
54 * be defined, even if swapping has been configured out of the kernel.
55 */
56 #if TASK_SWAPPER
57
58 /* temporary debug flags */
59 #define TASK_SW_DEBUG 1
60 #define TASK_SW_STATS 1
61
62 int task_swap_debug = 0;
63 int task_swap_stats = 0;
64 int task_swap_enable = 1;
65 int task_swap_on = 1;
66
67 queue_head_t swapped_tasks; /* completely swapped out tasks */
68 queue_head_t swapout_thread_q; /* threads to be swapped out */
69 mutex_t task_swapper_lock; /* protects above queue */
70
71 #define task_swapper_lock() mutex_lock(&task_swapper_lock)
72 #define task_swapper_unlock() mutex_unlock(&task_swapper_lock)
73 #define task_swapper_wakeup() thread_wakeup((event_t)&swapout_thread_q)
74 #define task_swapper_sleep() thread_sleep_mutex((event_t)&swapout_thread_q, \
75 &task_swapper_lock, \
76 THREAD_UNINT)
77
78
79 queue_head_t eligible_tasks; /* tasks eligible for swapout */
80 mutex_t task_swapout_list_lock; /* protects above queue */
81 #define task_swapout_lock() mutex_lock(&task_swapout_list_lock)
82 #define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock)
83
84 /*
85 * The next section of constants and globals are tunable parameters
86 * used in making swapping decisions. They may be changed dynamically
87 * without adversely affecting the robustness of the system; however,
88 * the policy will change, one way or the other.
89 */
90
91 #define SHORT_AVG_INTERVAL 5 /* in seconds */
92 #define LONG_AVG_INTERVAL 30 /* in seconds */
93 #define AVE_SCALE 1024
94
95 unsigned int short_avg_interval = SHORT_AVG_INTERVAL;
96 unsigned int long_avg_interval = LONG_AVG_INTERVAL;
97
98 #ifndef MIN_SWAP_PAGEOUT_RATE
99 #define MIN_SWAP_PAGEOUT_RATE 10
100 #endif
101
102 /*
103 * The following are all stored in fixed-point representation (the actual
104 * value times AVE_SCALE), to allow more accurate computing of decaying
105 * averages. So all variables that end with "avg" must be divided by
106 * AVE_SCALE to convert them or compare them to ints.
107 */
108 unsigned int vm_grab_rate_avg;
109 unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
110 unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
111 unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
112 unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */
113 unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */
114
115 /*
116 * Trigger task swapping when paging activity reaches
117 * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed.
118 * Turn off task swapping when paging activity goes back down to below
119 * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum.
120 * These numbers have been found empirically and might need some tuning...
121 */
122 #ifndef SWAP_PAGEOUT_HIGH_WATER_MARK
123 #define SWAP_PAGEOUT_HIGH_WATER_MARK 30
124 #endif
125 #ifndef SWAP_PAGEOUT_LOW_WATER_MARK
126 #define SWAP_PAGEOUT_LOW_WATER_MARK 10
127 #endif
128
129 #ifndef MAX_GRAB_RATE
130 #define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */
131 #endif
132
133 /*
134 * swap_{start,stop}_pageout_rate start at the minimum value, then increase
135 * to adjust to the hardware's performance, following the paging rate peaks.
136 */
137 unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK;
138 unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK;
139 unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
140 SWAP_PAGEOUT_HIGH_WATER_MARK / 100;
141 unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
142 SWAP_PAGEOUT_LOW_WATER_MARK / 100;
143 #if TASK_SW_DEBUG
144 unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */
145 unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */
146 #endif /* TASK_SW_DEBUG */
147 unsigned int max_grab_rate = MAX_GRAB_RATE;
148
149 #ifndef MIN_SWAP_TIME
150 #define MIN_SWAP_TIME 1
151 #endif
152
153 int min_swap_time = MIN_SWAP_TIME; /* in seconds */
154
155 #ifndef MIN_RES_TIME
156 #define MIN_RES_TIME 6
157 #endif
158
159 int min_res_time = MIN_RES_TIME; /* in seconds */
160
161 #ifndef MIN_ACTIVE_TASKS
162 #define MIN_ACTIVE_TASKS 4
163 #endif
164
165 int min_active_tasks = MIN_ACTIVE_TASKS;
166
167 #ifndef TASK_SWAP_CYCLE_TIME
168 #define TASK_SWAP_CYCLE_TIME 2
169 #endif
170
171 int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */
172
173 int last_task_swap_cycle = 0;
174
175 /* temporary statistics */
176 int task_swapouts = 0;
177 int task_swapins = 0;
178 int task_swaprss_out = 0; /* total rss at swapout time */
179 int task_swaprss_in = 0; /* total rss at swapin time */
180 int task_swap_total_time = 0; /* total time spent swapped out */
181 int tasks_swapped_out = 0; /* number of tasks swapped out now */
182
183 #ifdef TASK_SW_STATS
184 #define TASK_STATS_INCR(cnt) (cnt)++
185 #else
186 #define TASK_STATS_INCR(cnt)
187 #endif /* TASK_SW_STATS */
188
189 #if TASK_SW_DEBUG
190 boolean_t on_swapped_list(task_t task); /* forward */
191 /*
192 * Debug function to determine if a task is already on the
193 * swapped out tasks list. It also checks for tasks on the list
194 * that are in an illegal state (i.e. swapped in).
195 */
196 boolean_t
197 on_swapped_list(task_t task)
198 {
199 task_t ltask;
200 /* task_swapper_lock is locked. */
201
202 if (queue_empty(&swapped_tasks)) {
203 return(FALSE);
204 }
205 ltask = (task_t)queue_first(&swapped_tasks);
206 while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) {
207 /* check for illegal state */
208 if (ltask->swap_state == TASK_SW_IN) {
209 printf("on_swapped_list and in: 0x%X\n",ltask);
210 Debugger("");
211 }
212 if (ltask == task)
213 return(TRUE);
214 ltask = (task_t)queue_next(&ltask->swapped_tasks);
215 }
216 return(FALSE);
217 }
218 #endif /* TASK_SW_DEBUG */
219
220 /*
221 * task_swapper_init: [exported]
222 */
223 void
224 task_swapper_init()
225 {
226 queue_init(&swapped_tasks);
227 queue_init(&eligible_tasks);
228 queue_init(&swapout_thread_q);
229 mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP);
230 mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT);
231 vm_page_free_avg = vm_page_free_count * AVE_SCALE;
232 vm_page_free_longavg = vm_page_free_count * AVE_SCALE;
233 }
234
235 #endif /* TASK_SWAPPER */
236
237 /*
238 * task_swappable: [exported]
239 *
240 * Make a task swappable or non-swappable. If made non-swappable,
241 * it will be swapped in.
242 *
243 * Locking: task_swapout_lock is taken before task lock.
244 */
245 kern_return_t
246 task_swappable(
247 host_priv_t host_priv,
248 task_t task,
249 boolean_t make_swappable)
250 {
251 if (host_priv == HOST_PRIV_NULL)
252 return(KERN_INVALID_ARGUMENT);
253
254 if (task == TASK_NULL)
255 return(KERN_INVALID_ARGUMENT);
256
257 #if !TASK_SWAPPER
258
259 /*
260 * If we don't support swapping, this call is purely advisory.
261 */
262 return(KERN_SUCCESS);
263
264 #else /* TASK_SWAPPER */
265
266 task_lock(task);
267 if (make_swappable) {
268 /* make task swappable */
269 if (task->swap_state == TASK_SW_UNSWAPPABLE) {
270 task->swap_state = TASK_SW_IN;
271 task_unlock(task);
272 task_swapout_eligible(task);
273 }
274 } else {
275 switch (task->swap_state) {
276 case TASK_SW_IN:
277 task->swap_state = TASK_SW_UNSWAPPABLE;
278 task_unlock(task);
279 task_swapout_ineligible(task);
280 break;
281 case TASK_SW_UNSWAPPABLE:
282 task_unlock(task);
283 break;
284 default:
285 /*
286 * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT,
287 * or TASK_SW_COMING_IN. task_swapin handles all
288 * three, and its default case will catch any bad
289 * states.
290 */
291 task_unlock(task);
292 task_swapin(task, TRUE);
293 break;
294 }
295 }
296 return(KERN_SUCCESS);
297
298 #endif /* TASK_SWAPPER */
299
300 }
301
302 #if TASK_SWAPPER
303
304 /*
305 * task_swapout:
306 * A reference to the task must be held.
307 *
308 * Start swapping out a task by sending an AST_SWAPOUT to each thread.
309 * When the threads reach a clean point, they queue themselves up on the
310 * swapout_thread_q to be swapped out by the task_swap_swapout_thread.
311 * The task can be swapped in at any point in this process.
312 *
313 * A task will not be fully swapped out (i.e. its map residence count
314 * at zero) until all currently-swapped threads run and reach
315 * a clean point, at which time they will be swapped again,
316 * decrementing the swap_ast_waiting count on the task.
317 *
318 * Locking: no locks held upon entry and exit.
319 * Task_lock is held throughout this function.
320 */
321 kern_return_t
322 task_swapout(task_t task)
323 {
324 thread_act_t thr_act;
325 thread_t thread;
326 queue_head_t *list;
327 int s;
328
329 task_swapout_lock();
330 task_lock(task);
331 /*
332 * NOTE: look into turning these into assertions if they
333 * are invariants.
334 */
335 if ((task->swap_state != TASK_SW_IN) || (!task->active)) {
336 task_unlock(task);
337 task_swapout_unlock();
338 return(KERN_FAILURE);
339 }
340 if (task->swap_flags & TASK_SW_ELIGIBLE) {
341 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
342 task->swap_flags &= ~TASK_SW_ELIGIBLE;
343 }
344 task_swapout_unlock();
345
346 /* set state to avoid races with task_swappable(FALSE) */
347 task->swap_state = TASK_SW_GOING_OUT;
348 task->swap_rss = pmap_resident_count(task->map->pmap);
349 task_swaprss_out += task->swap_rss;
350 task->swap_ast_waiting = task->thr_act_count;
351
352 /*
353 * halt all threads in this task:
354 * We don't need the thread list lock for traversal.
355 */
356 list = &task->thr_acts;
357 thr_act = (thread_act_t) queue_first(list);
358 while (!queue_end(list, (queue_entry_t) thr_act)) {
359 boolean_t swappable;
360 thread_act_t ract;
361
362 thread = act_lock_thread(thr_act);
363 s = splsched();
364 if (!thread)
365 swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE);
366 else {
367 thread_lock(thread);
368 swappable = TRUE;
369 for (ract = thread->top_act; ract; ract = ract->lower)
370 if (ract->swap_state == TH_SW_UNSWAPPABLE) {
371 swappable = FALSE;
372 break;
373 }
374 }
375 if (swappable)
376 thread_ast_set(thr_act, AST_SWAPOUT);
377 if (thread)
378 thread_unlock(thread);
379 splx(s);
380 assert((thr_act->ast & AST_TERMINATE) == 0);
381 act_unlock_thread(thr_act);
382 thr_act = (thread_act_t) queue_next(&thr_act->thr_acts);
383 }
384
385 task->swap_stamp = sched_tick;
386 task->swap_nswap++;
387 assert((task->swap_flags&TASK_SW_WANT_IN) == 0);
388 /* put task on the queue of swapped out tasks */
389 task_swapper_lock();
390 #if TASK_SW_DEBUG
391 if (task_swap_debug && on_swapped_list(task)) {
392 printf("task 0x%X already on list\n", task);
393 Debugger("");
394 }
395 #endif /* TASK_SW_DEBUG */
396 queue_enter(&swapped_tasks, task, task_t, swapped_tasks);
397 tasks_swapped_out++;
398 task_swapouts++;
399 task_swapper_unlock();
400 task_unlock(task);
401
402 return(KERN_SUCCESS);
403 }
404
405 #ifdef TASK_SW_STATS
406 int task_sw_race_in = 0;
407 int task_sw_race_coming_in = 0;
408 int task_sw_race_going_out = 0;
409 int task_sw_before_ast = 0;
410 int task_sw_before_swap = 0;
411 int task_sw_after_swap = 0;
412 int task_sw_race_in_won = 0;
413 int task_sw_unswappable = 0;
414 int task_sw_act_inactive = 0;
415 #endif /* TASK_SW_STATS */
416
417 /*
418 * thread_swapout_enqueue is called by thread_halt_self when it
419 * processes AST_SWAPOUT to enqueue threads to be swapped out.
420 * It must be called at normal interrupt priority for the
421 * sake of the task_swapper_lock.
422 *
423 * There can be races with task swapin here.
424 * First lock task and decrement swap_ast_waiting count, and if
425 * it's 0, we can decrement the residence count on the task's map
426 * and set the task's swap state to TASK_SW_OUT.
427 */
428 void
429 thread_swapout_enqueue(thread_act_t thr_act)
430 {
431 task_t task = thr_act->task;
432 task_lock(task);
433 /*
434 * If the swap_state is not TASK_SW_GOING_OUT, then
435 * task_swapin has beaten us to this operation, and
436 * we have nothing to do.
437 */
438 if (task->swap_state != TASK_SW_GOING_OUT) {
439 task_unlock(task);
440 return;
441 }
442 if (--task->swap_ast_waiting == 0) {
443 vm_map_t map = task->map;
444 task->swap_state = TASK_SW_OUT;
445 task_unlock(task);
446 mutex_lock(&map->s_lock);
447 vm_map_res_deallocate(map);
448 mutex_unlock(&map->s_lock);
449 } else
450 task_unlock(task);
451
452 task_swapper_lock();
453 act_lock(thr_act);
454 if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) {
455 /*
456 * We lost a race with task_swapin(): don't enqueue.
457 */
458 } else {
459 queue_enter(&swapout_thread_q, thr_act,
460 thread_act_t, swap_queue);
461 task_swapper_wakeup();
462 }
463 act_unlock(thr_act);
464 task_swapper_unlock();
465 }
466
467 /*
468 * task_swap_swapout_thread: [exported]
469 *
470 * Executes as a separate kernel thread.
471 * Its job is to swap out threads that have been halted by AST_SWAPOUT.
472 */
473 void
474 task_swap_swapout_thread(void)
475 {
476 thread_act_t thr_act;
477 thread_t thread, nthread;
478 task_t task;
479 int s;
480
481 thread_swappable(current_act(), FALSE);
482 stack_privilege(current_thread());
483
484 spllo();
485
486 task_swapper_lock();
487 while (TRUE) {
488 while (! queue_empty(&swapout_thread_q)) {
489
490 queue_remove_first(&swapout_thread_q, thr_act,
491 thread_act_t, swap_queue);
492 /*
493 * If we're racing with task_swapin, we need
494 * to make it safe for it to do remque on the
495 * thread, so make its links point to itself.
496 * Allowing this ugliness is cheaper than
497 * making task_swapin search the entire queue.
498 */
499 act_lock(thr_act);
500 queue_init((queue_t) &thr_act->swap_queue);
501 act_unlock(thr_act);
502 task_swapper_unlock();
503 /*
504 * Wait for thread's RUN bit to be deasserted.
505 */
506 thread = act_lock_thread(thr_act);
507 if (thread == THREAD_NULL)
508 act_unlock_thread(thr_act);
509 else {
510 boolean_t r;
511
512 thread_reference(thread);
513 thread_hold(thr_act);
514 act_unlock_thread(thr_act);
515 r = thread_stop_wait(thread);
516 nthread = act_lock_thread(thr_act);
517 thread_release(thr_act);
518 thread_deallocate(thread);
519 act_unlock_thread(thr_act);
520 if (!r || nthread != thread) {
521 task_swapper_lock();
522 continue;
523 }
524 }
525 task = thr_act->task;
526 task_lock(task);
527 /*
528 * we can race with swapin, which would set the
529 * state to TASK_SW_IN.
530 */
531 if ((task->swap_state != TASK_SW_OUT) &&
532 (task->swap_state != TASK_SW_GOING_OUT)) {
533 task_unlock(task);
534 task_swapper_lock();
535 TASK_STATS_INCR(task_sw_race_in_won);
536 if (thread != THREAD_NULL)
537 thread_unstop(thread);
538 continue;
539 }
540 nthread = act_lock_thread(thr_act);
541 if (nthread != thread || thr_act->active == FALSE) {
542 act_unlock_thread(thr_act);
543 task_unlock(task);
544 task_swapper_lock();
545 TASK_STATS_INCR(task_sw_act_inactive);
546 if (thread != THREAD_NULL)
547 thread_unstop(thread);
548 continue;
549 }
550 s = splsched();
551 if (thread != THREAD_NULL)
552 thread_lock(thread);
553 /*
554 * Thread cannot have been swapped out yet because
555 * TH_SW_TASK_SWAPPING was set in AST. If task_swapin
556 * beat us here, we either wouldn't have found it on
557 * the queue, or the task->swap_state would have
558 * changed. The synchronization is on the
559 * task's swap_state and the task_lock.
560 * The thread can't be swapped in any other way
561 * because its task has been swapped.
562 */
563 assert(thr_act->swap_state & TH_SW_TASK_SWAPPING);
564 assert(thread == THREAD_NULL ||
565 !(thread->state & (TH_SWAPPED_OUT|TH_RUN)));
566 assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN);
567 /* assert(thread->state & TH_HALTED); */
568 /* this also clears TH_SW_TASK_SWAPPING flag */
569 thr_act->swap_state = TH_SW_GOING_OUT;
570 if (thread != THREAD_NULL) {
571 if (thread->top_act == thr_act) {
572 thread->state |= TH_SWAPPED_OUT;
573 /*
574 * Once we unlock the task, things can happen
575 * to the thread, so make sure it's consistent
576 * for thread_swapout.
577 */
578 }
579 thread->ref_count++;
580 thread_unlock(thread);
581 thread_unstop(thread);
582 }
583 splx(s);
584 act_locked_act_reference(thr_act);
585 act_unlock_thread(thr_act);
586 task_unlock(task);
587
588 thread_swapout(thr_act); /* do the work */
589
590 if (thread != THREAD_NULL)
591 thread_deallocate(thread);
592 act_deallocate(thr_act);
593 task_swapper_lock();
594 }
595 task_swapper_sleep();
596 }
597 }
598
599 /*
600 * task_swapin:
601 *
602 * Make a task resident.
603 * Performs all of the work to make a task resident and possibly
604 * non-swappable. If we race with a competing task_swapin call,
605 * we wait for its completion, then return.
606 *
607 * Locking: no locks held upon entry and exit.
608 *
609 * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the
610 * state is TASK_SW_COMING_IN.
611 */
612
613 kern_return_t
614 task_swapin(task_t task, boolean_t make_unswappable)
615 {
616 register queue_head_t *list;
617 register thread_act_t thr_act, next;
618 thread_t thread;
619 int s;
620 boolean_t swappable = TRUE;
621
622 task_lock(task);
623 switch (task->swap_state) {
624 case TASK_SW_OUT:
625 {
626 vm_map_t map = task->map;
627 /*
628 * Task has made it all the way out, which means
629 * that vm_map_res_deallocate has been done; set
630 * state to TASK_SW_COMING_IN, then bring map
631 * back in. We could actually be racing with
632 * the thread_swapout_enqueue, which does the
633 * vm_map_res_deallocate, but that race is covered.
634 */
635 task->swap_state = TASK_SW_COMING_IN;
636 assert(task->swap_ast_waiting == 0);
637 assert(map->res_count >= 0);
638 task_unlock(task);
639 mutex_lock(&map->s_lock);
640 vm_map_res_reference(map);
641 mutex_unlock(&map->s_lock);
642 task_lock(task);
643 assert(task->swap_state == TASK_SW_COMING_IN);
644 }
645 break;
646
647 case TASK_SW_GOING_OUT:
648 /*
649 * Task isn't all the way out yet. There is
650 * still at least one thread not swapped, and
651 * vm_map_res_deallocate has not been done.
652 */
653 task->swap_state = TASK_SW_COMING_IN;
654 assert(task->swap_ast_waiting > 0 ||
655 (task->swap_ast_waiting == 0 &&
656 task->thr_act_count == 0));
657 assert(task->map->res_count > 0);
658 TASK_STATS_INCR(task_sw_race_going_out);
659 break;
660 case TASK_SW_IN:
661 assert(task->map->res_count > 0);
662 #if TASK_SW_DEBUG
663 task_swapper_lock();
664 if (task_swap_debug && on_swapped_list(task)) {
665 printf("task 0x%X on list, state is SW_IN\n",
666 task);
667 Debugger("");
668 }
669 task_swapper_unlock();
670 #endif /* TASK_SW_DEBUG */
671 TASK_STATS_INCR(task_sw_race_in);
672 if (make_unswappable) {
673 task->swap_state = TASK_SW_UNSWAPPABLE;
674 task_unlock(task);
675 task_swapout_ineligible(task);
676 } else
677 task_unlock(task);
678 return(KERN_SUCCESS);
679 case TASK_SW_COMING_IN:
680 /*
681 * Raced with another task_swapin and lost;
682 * wait for other one to complete first
683 */
684 assert(task->map->res_count >= 0);
685 /*
686 * set MAKE_UNSWAPPABLE so that whoever is swapping
687 * the task in will make it unswappable, and return
688 */
689 if (make_unswappable)
690 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
691 task->swap_flags |= TASK_SW_WANT_IN;
692 assert_wait((event_t)&task->swap_state, THREAD_UNINT);
693 task_unlock(task);
694 thread_block(THREAD_CONTINUE_NULL);
695 TASK_STATS_INCR(task_sw_race_coming_in);
696 return(KERN_SUCCESS);
697 case TASK_SW_UNSWAPPABLE:
698 /*
699 * This can happen, since task_terminate
700 * unconditionally calls task_swapin.
701 */
702 task_unlock(task);
703 return(KERN_SUCCESS);
704 default:
705 panic("task_swapin bad state");
706 break;
707 }
708 if (make_unswappable)
709 task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
710 assert(task->swap_state == TASK_SW_COMING_IN);
711 task_swapper_lock();
712 #if TASK_SW_DEBUG
713 if (task_swap_debug && !on_swapped_list(task)) {
714 printf("task 0x%X not on list\n", task);
715 Debugger("");
716 }
717 #endif /* TASK_SW_DEBUG */
718 queue_remove(&swapped_tasks, task, task_t, swapped_tasks);
719 tasks_swapped_out--;
720 task_swapins++;
721 task_swapper_unlock();
722
723 /*
724 * Iterate through all threads for this task and
725 * release them, as required. They may not have been swapped
726 * out yet. The task remains locked throughout.
727 */
728 list = &task->thr_acts;
729 thr_act = (thread_act_t) queue_first(list);
730 while (!queue_end(list, (queue_entry_t) thr_act)) {
731 boolean_t need_to_release;
732 next = (thread_act_t) queue_next(&thr_act->thr_acts);
733 /*
734 * Keep task_swapper_lock across thread handling
735 * to synchronize with task_swap_swapout_thread
736 */
737 task_swapper_lock();
738 thread = act_lock_thread(thr_act);
739 s = splsched();
740 if (thr_act->ast & AST_SWAPOUT) {
741 /* thread hasn't gotten the AST yet, just clear it */
742 thread_ast_clear(thr_act, AST_SWAPOUT);
743 need_to_release = FALSE;
744 TASK_STATS_INCR(task_sw_before_ast);
745 splx(s);
746 act_unlock_thread(thr_act);
747 } else {
748 /*
749 * If AST_SWAPOUT was cleared, then thread_hold,
750 * or equivalent was done.
751 */
752 need_to_release = TRUE;
753 /*
754 * Thread has hit AST, but it may not have
755 * been dequeued yet, so we need to check.
756 * NOTE: the thread may have been dequeued, but
757 * has not yet been swapped (the task_swapper_lock
758 * has been dropped, but the thread is not yet
759 * locked), and the TH_SW_TASK_SWAPPING flag may
760 * not have been cleared. In this case, we will do
761 * an extra remque, which the task_swap_swapout_thread
762 * has made safe, and clear the flag, which is also
763 * checked by the t_s_s_t before doing the swapout.
764 */
765 if (thread)
766 thread_lock(thread);
767 if (thr_act->swap_state & TH_SW_TASK_SWAPPING) {
768 /*
769 * hasn't yet been dequeued for swapout,
770 * so clear flags and dequeue it first.
771 */
772 thr_act->swap_state &= ~TH_SW_TASK_SWAPPING;
773 assert(thr_act->thread == THREAD_NULL ||
774 !(thr_act->thread->state &
775 TH_SWAPPED_OUT));
776 queue_remove(&swapout_thread_q, thr_act,
777 thread_act_t, swap_queue);
778 TASK_STATS_INCR(task_sw_before_swap);
779 } else {
780 TASK_STATS_INCR(task_sw_after_swap);
781 /*
782 * It's possible that the thread was
783 * made unswappable before hitting the
784 * AST, in which case it's still running.
785 */
786 if (thr_act->swap_state == TH_SW_UNSWAPPABLE) {
787 need_to_release = FALSE;
788 TASK_STATS_INCR(task_sw_unswappable);
789 }
790 }
791 if (thread)
792 thread_unlock(thread);
793 splx(s);
794 act_unlock_thread(thr_act);
795 }
796 task_swapper_unlock();
797
798 /*
799 * thread_release will swap in the thread if it's been
800 * swapped out.
801 */
802 if (need_to_release) {
803 act_lock_thread(thr_act);
804 thread_release(thr_act);
805 act_unlock_thread(thr_act);
806 }
807 thr_act = next;
808 }
809
810 if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) {
811 task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE;
812 task->swap_state = TASK_SW_UNSWAPPABLE;
813 swappable = FALSE;
814 } else {
815 task->swap_state = TASK_SW_IN;
816 }
817
818 task_swaprss_in += pmap_resident_count(task->map->pmap);
819 task_swap_total_time += sched_tick - task->swap_stamp;
820 /* note when task came back in */
821 task->swap_stamp = sched_tick;
822 if (task->swap_flags & TASK_SW_WANT_IN) {
823 task->swap_flags &= ~TASK_SW_WANT_IN;
824 thread_wakeup((event_t)&task->swap_state);
825 }
826 assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0);
827 task_unlock(task);
828 #if TASK_SW_DEBUG
829 task_swapper_lock();
830 if (task_swap_debug && on_swapped_list(task)) {
831 printf("task 0x%X on list at end of swap in\n", task);
832 Debugger("");
833 }
834 task_swapper_unlock();
835 #endif /* TASK_SW_DEBUG */
836 /*
837 * Make the task eligible to be swapped again
838 */
839 if (swappable)
840 task_swapout_eligible(task);
841 return(KERN_SUCCESS);
842 }
843
844 void wake_task_swapper(boolean_t now); /* forward */
845
846 /*
847 * wake_task_swapper: [exported]
848 *
849 * Wakes up task swapper if now == TRUE or if at least
850 * task_swap_cycle_time has elapsed since the last call.
851 *
852 * NOTE: this function is not multithreaded, so if there is
853 * more than one caller, it must be modified.
854 */
855 void
856 wake_task_swapper(boolean_t now)
857 {
858 /* last_task_swap_cycle may require locking */
859 if (now ||
860 (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) {
861 last_task_swap_cycle = sched_tick;
862 if (task_swap_debug)
863 printf("wake_task_swapper: waking swapper\n");
864 thread_wakeup((event_t)&swapped_tasks); /* poke swapper */
865 }
866 }
867
868 task_t pick_intask(void); /* forward */
869 /*
870 * pick_intask:
871 * returns a task to be swapped in, or TASK_NULL if nothing suitable is found.
872 *
873 * current algorithm: Return the task that has been swapped out the
874 * longest, as long as it is > min_swap_time. It will be dequeued
875 * if actually swapped in.
876 *
877 * NOTE:**********************************************
878 * task->swap_rss (the size when the task was swapped out) could be used to
879 * further refine the selection. Another possibility would be to look at
880 * the state of the thread(s) to see if the task/threads would run if they
881 * were swapped in.
882 * ***************************************************
883 *
884 * Locking: no locks held upon entry and exit.
885 */
886 task_t
887 pick_intask(void)
888 {
889 register task_t task = TASK_NULL;
890
891 task_swapper_lock();
892 /* the oldest task is the first one */
893 if (!queue_empty(&swapped_tasks)) {
894 task = (task_t) queue_first(&swapped_tasks);
895 assert(task != TASK_NULL);
896 /* Make sure it's been out min_swap_time */
897 if ((sched_tick - task->swap_stamp) < min_swap_time)
898 task = TASK_NULL;
899 }
900 task_swapper_unlock();
901 return(task);
902 #if 0
903 /*
904 * This code looks at the entire list of swapped tasks, but since
905 * it does not yet do anything but look at time swapped, we
906 * can simply use the fact that the queue is ordered, and take
907 * the first one off the queue.
908 */
909 task = (task_t)queue_first(&swapped_tasks);
910 while (!queue_end(&swapped_tasks, (queue_entry_t)task)) {
911 task_lock(task);
912 tmp_time = sched_tick - task->swap_stamp;
913 if (tmp_time > min_swap_time && tmp_time > time_swapped) {
914 target_task = task;
915 time_swapped = tmp_time;
916 }
917 task_unlock(task);
918 task = (task_t)queue_next(&task->swapped_tasks);
919 }
920 task_swapper_unlock();
921 return(target_task);
922 #endif
923 }
924
925 task_t pick_outtask(void); /* forward */
926 /*
927 * pick_outtask:
928 * returns a task to be swapped out, with a reference on the task,
929 * or NULL if no suitable task is found.
930 *
931 * current algorithm:
932 *
933 * Examine all eligible tasks. While looking, use the first thread in
934 * each task as an indication of the task's activity. Count up
935 * "active" threads (those either runnable or sleeping). If the task
936 * is active (by these criteria), swapped in, and resident
937 * for at least min_res_time, then select the task with the largest
938 * number of pages in memory. If there are less
939 * than min_active_tasks active tasks in the system, then don't
940 * swap anything out (this avoids swapping out the only running task
941 * in the system, for example).
942 *
943 * NOTE: the task selected will not be removed from the eligible list.
944 * This means that it will be selected again if it is not swapped
945 * out, where it is removed from the list.
946 *
947 * Locking: no locks held upon entry and exit. Task_swapout_lock must be
948 * taken before task locks.
949 *
950 * ***************************************************
951 * TBD:
952 * This algorithm only examines the first thread in the task. Currently, since
953 * most swappable tasks in the system are single-threaded, this generalization
954 * works reasonably well. However, the algorithm should be changed
955 * to consider all threads in the task if more multi-threaded tasks were used.
956 * ***************************************************
957 */
958
959 #ifdef TASK_SW_STATS
960 int inactive_task_count = 0;
961 int empty_task_count = 0;
962 #endif /* TASK_SW_STATS */
963
964 task_t
965 pick_outtask(void)
966 {
967 register task_t task;
968 register task_t target_task = TASK_NULL;
969 unsigned long task_rss;
970 unsigned long target_rss = 0;
971 boolean_t wired;
972 boolean_t active;
973 int nactive = 0;
974
975 task_swapout_lock();
976 if (queue_empty(&eligible_tasks)) {
977 /* not likely to happen */
978 task_swapout_unlock();
979 return(TASK_NULL);
980 }
981 task = (task_t)queue_first(&eligible_tasks);
982 while (!queue_end(&eligible_tasks, (queue_entry_t)task)) {
983 int s;
984 register thread_act_t thr_act;
985 thread_t th;
986
987
988 task_lock(task);
989 /*
990 * Don't swap real-time tasks.
991 * XXX Should we enforce that or can we let really critical
992 * tasks use task_swappable() to make sure they never end up
993 * n the eligible list ?
994 */
995 if (task->policy & POLICYCLASS_FIXEDPRI) {
996 goto tryagain;
997 }
998 if (!task->active) {
999 TASK_STATS_INCR(inactive_task_count);
1000 goto tryagain;
1001 }
1002 if (task->res_act_count == 0) {
1003 TASK_STATS_INCR(empty_task_count);
1004 goto tryagain;
1005 }
1006 assert(!queue_empty(&task->thr_acts));
1007 thr_act = (thread_act_t)queue_first(&task->thr_acts);
1008 active = FALSE;
1009 th = act_lock_thread(thr_act);
1010 s = splsched();
1011 if (th != THREAD_NULL)
1012 thread_lock(th);
1013 if ((th == THREAD_NULL) ||
1014 (th->state == TH_RUN) ||
1015 (th->state & TH_WAIT)) {
1016 /*
1017 * thread is "active": either runnable
1018 * or sleeping. Count it and examine
1019 * it further below.
1020 */
1021 nactive++;
1022 active = TRUE;
1023 }
1024 if (th != THREAD_NULL)
1025 thread_unlock(th);
1026 splx(s);
1027 act_unlock_thread(thr_act);
1028 if (active &&
1029 (task->swap_state == TASK_SW_IN) &&
1030 ((sched_tick - task->swap_stamp) > min_res_time)) {
1031 long rescount = pmap_resident_count(task->map->pmap);
1032 /*
1033 * thread must be "active", task must be swapped
1034 * in and resident for at least min_res_time
1035 */
1036 #if 0
1037 /* DEBUG Test round-robin strategy. Picking biggest task could cause extreme
1038 * unfairness to such large interactive programs as xterm. Instead, pick the
1039 * first task that has any pages resident:
1040 */
1041 if (rescount > 1) {
1042 task->ref_count++;
1043 target_task = task;
1044 task_unlock(task);
1045 task_swapout_unlock();
1046 return(target_task);
1047 }
1048 #else
1049 if (rescount > target_rss) {
1050 /*
1051 * task is not swapped, and it has the
1052 * largest rss seen so far.
1053 */
1054 task->ref_count++;
1055 target_rss = rescount;
1056 assert(target_task != task);
1057 if (target_task != TASK_NULL)
1058 task_deallocate(target_task);
1059 target_task = task;
1060 }
1061 #endif
1062 }
1063 tryagain:
1064 task_unlock(task);
1065 task = (task_t)queue_next(&task->swapped_tasks);
1066 }
1067 task_swapout_unlock();
1068 /* only swap out if there are at least min_active_tasks */
1069 if (nactive < min_active_tasks) {
1070 if (target_task != TASK_NULL) {
1071 task_deallocate(target_task);
1072 target_task = TASK_NULL;
1073 }
1074 }
1075 return(target_task);
1076 }
1077
1078 #if TASK_SW_DEBUG
1079 void print_pid(task_t task, unsigned long n1, unsigned long n2,
1080 const char *comp, const char *inout); /* forward */
1081 void
1082 print_pid(
1083 task_t task,
1084 unsigned long n1,
1085 unsigned long n2,
1086 const char *comp,
1087 const char *inout)
1088 {
1089 long rescount;
1090 task_lock(task);
1091 rescount = pmap_resident_count(task->map->pmap);
1092 task_unlock(task);
1093 printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n",
1094 inout, task, n1, comp, n2, rescount);
1095 }
1096 #endif
1097
1098 /*
1099 * task_swapper: [exported]
1100 *
1101 * Executes as a separate kernel thread.
1102 */
1103 #define MAX_LOOP 3
1104 void
1105 task_swapper(void)
1106 {
1107 task_t outtask, intask;
1108 int timeout;
1109 int loopcnt = 0;
1110 boolean_t start_swapping;
1111 boolean_t stop_swapping;
1112 int local_page_free_avg;
1113 extern int hz;
1114
1115 thread_swappable(current_act(), FALSE);
1116 stack_privilege(current_thread());
1117
1118 spllo();
1119
1120 for (;;) {
1121 local_page_free_avg = vm_page_free_avg;
1122 while (TRUE) {
1123 #if 0
1124 if (task_swap_debug)
1125 printf("task_swapper: top of loop; cnt = %d\n",loopcnt);
1126 #endif
1127 intask = pick_intask();
1128
1129 start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) ||
1130 (vm_grab_rate_avg > max_grab_rate));
1131 stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate);
1132
1133 /*
1134 * If a lot of paging is going on, or another task should come
1135 * in but memory is tight, find something to swap out and start
1136 * it. Don't swap any task out if task swapping is disabled.
1137 * vm_page_queue_free_lock protects the vm globals.
1138 */
1139 outtask = TASK_NULL;
1140 if (start_swapping ||
1141 (!stop_swapping && intask &&
1142 ((local_page_free_avg / AVE_SCALE) < vm_page_free_target))
1143 ) {
1144 if (task_swap_enable &&
1145 (outtask = pick_outtask()) &&
1146 (task_swapout(outtask) == KERN_SUCCESS)) {
1147 unsigned long rss;
1148 #if TASK_SW_DEBUG
1149 if (task_swap_debug)
1150 print_pid(outtask, local_page_free_avg / AVE_SCALE,
1151 vm_page_free_target, "<",
1152 "out");
1153 #endif
1154 rss = outtask->swap_rss;
1155 if (outtask->swap_nswap == 1)
1156 rss /= 2; /* divide by 2 if never out */
1157 local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE;
1158 }
1159 if (outtask != TASK_NULL)
1160 task_deallocate(outtask);
1161 }
1162
1163 /*
1164 * If there is an eligible task to bring in and there are at
1165 * least vm_page_free_target free pages, swap it in. If task
1166 * swapping has been disabled, bring the task in anyway.
1167 */
1168 if (intask && ((local_page_free_avg / AVE_SCALE) >=
1169 vm_page_free_target ||
1170 stop_swapping || !task_swap_enable)) {
1171 if (task_swapin(intask, FALSE) == KERN_SUCCESS) {
1172 unsigned long rss;
1173 #if TASK_SW_DEBUG
1174 if (task_swap_debug)
1175 print_pid(intask, local_page_free_avg / AVE_SCALE,
1176 vm_page_free_target, ">=",
1177 "in");
1178 #endif
1179 rss = intask->swap_rss;
1180 if (intask->swap_nswap == 1)
1181 rss /= 2; /* divide by 2 if never out */
1182 local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE;
1183 }
1184 }
1185 /*
1186 * XXX
1187 * Here we have to decide whether to continue swapping
1188 * in and/or out before sleeping. The decision should
1189 * be made based on the previous action (swapin/out) and
1190 * current system parameters, such as paging rates and
1191 * demand.
1192 * The function, compute_vm_averages, which does these
1193 * calculations, depends on being called every second,
1194 * so we can't just do the same thing.
1195 */
1196 if (++loopcnt < MAX_LOOP)
1197 continue;
1198
1199 /*
1200 * Arrange to be awakened if paging is still heavy or there are
1201 * any tasks partially or completely swapped out. (Otherwise,
1202 * the wakeup will come from the external trigger(s).)
1203 */
1204 timeout = 0;
1205 if (start_swapping)
1206 timeout = task_swap_cycle_time;
1207 else {
1208 task_swapper_lock();
1209 if (!queue_empty(&swapped_tasks))
1210 timeout = min_swap_time;
1211 task_swapper_unlock();
1212 }
1213 assert_wait((event_t)&swapped_tasks, THREAD_UNINT);
1214 if (timeout) {
1215 if (task_swap_debug)
1216 printf("task_swapper: set timeout of %d\n",
1217 timeout);
1218 thread_set_timeout(timeout, NSEC_PER_SEC);
1219 }
1220 if (task_swap_debug)
1221 printf("task_swapper: blocking\n");
1222 thread_block(THREAD_CONTINUE_NULL);
1223 if (timeout) {
1224 thread_cancel_timeout(current_thread());
1225 }
1226 /* reset locals */
1227 loopcnt = 0;
1228 local_page_free_avg = vm_page_free_avg;
1229 }
1230 }
1231 }
1232
1233 /* from BSD */
1234 #define ave(smooth, cnt, time) \
1235 smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time)
1236
1237 /*
1238 * We estimate the system paging load in more than one metric:
1239 * 1) the total number of calls into the function, vm_page_grab,
1240 * which allocates all page frames for real pages.
1241 * 2) the total number of pages paged in and out of paging files.
1242 * This is a measure of page cleaning and faulting from backing
1243 * store.
1244 *
1245 * When either metric passes a threshold, tasks are swapped out.
1246 */
1247 long last_grab_count = 0;
1248 long last_pageout_count = 0;
1249
1250 /*
1251 * compute_vm_averages: [exported]
1252 *
1253 * This function is to be called once a second to calculate average paging
1254 * demand and average numbers of free pages for use by the task swapper.
1255 * Can also be used to wake up task swapper at desired thresholds.
1256 *
1257 * NOTE: this function is single-threaded, and requires locking if
1258 * ever there are multiple callers.
1259 */
1260 void
1261 compute_vm_averages(void)
1262 {
1263 extern unsigned long vm_page_grab_count;
1264 long grab_count, pageout_count;
1265 int i;
1266
1267 ave(vm_page_free_avg, vm_page_free_count, short_avg_interval);
1268 ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval);
1269
1270 /*
1271 * NOTE: the vm_page_grab_count and vm_stat structure are
1272 * under control of vm_page_queue_free_lock. We're simply reading
1273 * memory here, and the numbers don't depend on each other, so
1274 * no lock is taken.
1275 */
1276
1277 grab_count = vm_page_grab_count;
1278 pageout_count = 0;
1279 for (i = 0; i < NCPUS; i++) {
1280 pageout_count += vm_stat[i].pageouts;
1281 }
1282
1283 ave(vm_pageout_rate_avg, pageout_count - last_pageout_count,
1284 short_avg_interval);
1285 ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count,
1286 long_avg_interval);
1287 ave(vm_grab_rate_avg, grab_count - last_grab_count,
1288 short_avg_interval);
1289 last_grab_count = grab_count;
1290 last_pageout_count = pageout_count;
1291
1292 /*
1293 * Adjust swap_{start,stop}_pageout_rate to the paging rate peak.
1294 * This is an attempt to find the optimum paging rates at which
1295 * to trigger task swapping on or off to regulate paging activity,
1296 * depending on the hardware capacity.
1297 */
1298 if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) {
1299 unsigned int desired_max;
1300
1301 vm_pageout_rate_peakavg = vm_pageout_rate_avg;
1302 swap_start_pageout_rate =
1303 vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100;
1304 swap_stop_pageout_rate =
1305 vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100;
1306 }
1307
1308 #if TASK_SW_DEBUG
1309 /*
1310 * For measurements, allow fixed values.
1311 */
1312 if (fixed_swap_start_pageout_rate)
1313 swap_start_pageout_rate = fixed_swap_start_pageout_rate;
1314 if (fixed_swap_stop_pageout_rate)
1315 swap_stop_pageout_rate = fixed_swap_stop_pageout_rate;
1316 #endif /* TASK_SW_DEBUG */
1317
1318 #if TASK_SW_DEBUG
1319 if (task_swap_stats)
1320 printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n",
1321 vm_pageout_rate_avg / AVE_SCALE,
1322 vm_pageout_rate_longavg / AVE_SCALE,
1323 swap_start_pageout_rate / AVE_SCALE,
1324 swap_stop_pageout_rate / AVE_SCALE,
1325 vm_page_free_avg / AVE_SCALE,
1326 vm_page_free_longavg / AVE_SCALE,
1327 vm_page_free_target);
1328 #endif /* TASK_SW_DEBUG */
1329
1330 if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) {
1331 if (task_swap_on) {
1332 /* The following is a delicate attempt to balance the
1333 * need for reasonably rapid response to system
1334 * thrashing, with the equally important desire to
1335 * prevent the onset of swapping simply because of a
1336 * short burst of paging activity.
1337 */
1338 if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) &&
1339 (vm_pageout_rate_avg > swap_start_pageout_rate) ||
1340 (vm_pageout_rate_avg > vm_pageout_rate_peakavg) ||
1341 (vm_grab_rate_avg > max_grab_rate))
1342 wake_task_swapper(FALSE);
1343 }
1344 } else /* page demand is low; should consider swapin */ {
1345 if (tasks_swapped_out != 0)
1346 wake_task_swapper(TRUE);
1347 }
1348 }
1349
1350 void
1351 task_swapout_eligible(task_t task)
1352 {
1353 #if TASK_SW_DEBUG
1354 task_swapper_lock();
1355 if (task_swap_debug && on_swapped_list(task)) {
1356 printf("swapout_eligible: task 0x%X on swapped list\n", task);
1357 Debugger("");
1358 }
1359 task_swapper_unlock();
1360 #endif
1361 task_swapout_lock();
1362 task_lock(task);
1363 #if TASK_SW_DEBUG
1364 if (task->swap_flags & TASK_SW_ELIGIBLE) {
1365 printf("swapout_eligible: task 0x%X already eligible\n", task);
1366 }
1367 #endif /* TASK_SW_DEBUG */
1368 if ((task->swap_state == TASK_SW_IN) &&
1369 ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) {
1370 queue_enter(&eligible_tasks,task,task_t,swapped_tasks);
1371 task->swap_flags |= TASK_SW_ELIGIBLE;
1372 }
1373 task_unlock(task);
1374 task_swapout_unlock();
1375 }
1376
1377 void
1378 task_swapout_ineligible(task_t task)
1379 {
1380 #if TASK_SW_DEBUG
1381 task_swapper_lock();
1382 if (task_swap_debug && on_swapped_list(task)) {
1383 printf("swapout_ineligible: task 0x%X on swapped list\n", task);
1384 Debugger("");
1385 }
1386 task_swapper_unlock();
1387 #endif
1388 task_swapout_lock();
1389 task_lock(task);
1390 #if TASK_SW_DEBUG
1391 if (!(task->swap_flags & TASK_SW_ELIGIBLE))
1392 printf("swapout_ineligible: task 0x%X already inel.\n", task);
1393 #endif /* TASK_SW_DEBUG */
1394 if ((task->swap_state != TASK_SW_IN) &&
1395 (task->swap_flags & TASK_SW_ELIGIBLE)) {
1396 queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
1397 task->swap_flags &= ~TASK_SW_ELIGIBLE;
1398 }
1399 task_unlock(task);
1400 task_swapout_unlock();
1401 }
1402
1403 int task_swap_ast_aborted = 0;
1404
1405 /*
1406 * Process an AST_SWAPOUT.
1407 */
1408 void
1409 swapout_ast()
1410 {
1411 spl_t s;
1412 thread_act_t act;
1413 thread_t thread;
1414
1415 act = current_act();
1416
1417 /*
1418 * Task is being swapped out. First mark it as suspended
1419 * and halted, then call thread_swapout_enqueue to put
1420 * the thread on the queue for task_swap_swapout_threads
1421 * to swap out the thread.
1422 */
1423 /*
1424 * Don't swap unswappable threads
1425 */
1426 thread = act_lock_thread(act);
1427 s = splsched();
1428 if (thread)
1429 thread_lock(thread);
1430 if ((act->ast & AST_SWAPOUT) == 0) {
1431 /*
1432 * Race with task_swapin. Abort swapout.
1433 */
1434 task_swap_ast_aborted++; /* not locked XXX */
1435 if (thread)
1436 thread_unlock(thread);
1437 splx(s);
1438 act_unlock_thread(act);
1439 } else if (act->swap_state == TH_SW_IN) {
1440 /*
1441 * Mark swap_state as TH_SW_TASK_SWAPPING to avoid
1442 * race with thread swapper, which will only
1443 * swap thread if swap_state is TH_SW_IN.
1444 * This way, the thread can only be swapped by
1445 * the task swapping mechanism.
1446 */
1447 act->swap_state |= TH_SW_TASK_SWAPPING;
1448 /* assert(act->suspend_count == 0); XXX ? */
1449 if (thread)
1450 thread_unlock(thread);
1451 if (act->suspend_count++ == 0) /* inline thread_hold */
1452 install_special_handler(act);
1453 /* self->state |= TH_HALTED; */
1454 thread_ast_clear(act, AST_SWAPOUT);
1455 /*
1456 * Initialize the swap_queue fields to allow an extra
1457 * queue_remove() in task_swapin if we lose the race
1458 * (task_swapin can be called before we complete
1459 * thread_swapout_enqueue).
1460 */
1461 queue_init((queue_t) &act->swap_queue);
1462 splx(s);
1463 act_unlock_thread(act);
1464 /* this must be called at normal interrupt level */
1465 thread_swapout_enqueue(act);
1466 } else {
1467 /* thread isn't swappable; continue running */
1468 assert(act->swap_state == TH_SW_UNSWAPPABLE);
1469 if (thread)
1470 thread_unlock(thread);
1471 thread_ast_clear(act, AST_SWAPOUT);
1472 splx(s);
1473 act_unlock_thread(act);
1474 }
1475 }
1476
1477 #endif /* TASK_SWAPPER */