#include <ipc/ipc_port.h> /* We use something from in here */
-/*
- * Note: if TASK_SWAPPER is disabled, then this file defines only
- * a stub version of task_swappable(), so that the service can always
- * be defined, even if swapping has been configured out of the kernel.
- */
-#if TASK_SWAPPER
-
-/* temporary debug flags */
-#define TASK_SW_DEBUG 1
-#define TASK_SW_STATS 1
-
-int task_swap_debug = 0;
-int task_swap_stats = 0;
-int task_swap_enable = 1;
-int task_swap_on = 1;
-
-queue_head_t swapped_tasks; /* completely swapped out tasks */
-queue_head_t swapout_thread_q; /* threads to be swapped out */
-mutex_t task_swapper_lock; /* protects above queue */
-
-#define task_swapper_lock() mutex_lock(&task_swapper_lock)
-#define task_swapper_unlock() mutex_unlock(&task_swapper_lock)
-#define task_swapper_wakeup() thread_wakeup((event_t)&swapout_thread_q)
-#define task_swapper_sleep() thread_sleep_mutex((event_t)&swapout_thread_q, \
- &task_swapper_lock, \
- THREAD_UNINT)
-
-
-queue_head_t eligible_tasks; /* tasks eligible for swapout */
-mutex_t task_swapout_list_lock; /* protects above queue */
-#define task_swapout_lock() mutex_lock(&task_swapout_list_lock)
-#define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock)
-
-/*
- * The next section of constants and globals are tunable parameters
- * used in making swapping decisions. They may be changed dynamically
- * without adversely affecting the robustness of the system; however,
- * the policy will change, one way or the other.
- */
-
-#define SHORT_AVG_INTERVAL 5 /* in seconds */
-#define LONG_AVG_INTERVAL 30 /* in seconds */
-#define AVE_SCALE 1024
-
-unsigned int short_avg_interval = SHORT_AVG_INTERVAL;
-unsigned int long_avg_interval = LONG_AVG_INTERVAL;
-
-#ifndef MIN_SWAP_PAGEOUT_RATE
-#define MIN_SWAP_PAGEOUT_RATE 10
-#endif
-
-/*
- * The following are all stored in fixed-point representation (the actual
- * value times AVE_SCALE), to allow more accurate computing of decaying
- * averages. So all variables that end with "avg" must be divided by
- * AVE_SCALE to convert them or compare them to ints.
- */
-unsigned int vm_grab_rate_avg;
-unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
-unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
-unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE;
-unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */
-unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */
-
-/*
- * Trigger task swapping when paging activity reaches
- * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed.
- * Turn off task swapping when paging activity goes back down to below
- * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum.
- * These numbers have been found empirically and might need some tuning...
- */
-#ifndef SWAP_PAGEOUT_HIGH_WATER_MARK
-#define SWAP_PAGEOUT_HIGH_WATER_MARK 30
-#endif
-#ifndef SWAP_PAGEOUT_LOW_WATER_MARK
-#define SWAP_PAGEOUT_LOW_WATER_MARK 10
-#endif
-
-#ifndef MAX_GRAB_RATE
-#define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */
-#endif
-
-/*
- * swap_{start,stop}_pageout_rate start at the minimum value, then increase
- * to adjust to the hardware's performance, following the paging rate peaks.
- */
-unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK;
-unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK;
-unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
- SWAP_PAGEOUT_HIGH_WATER_MARK / 100;
-unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE *
- SWAP_PAGEOUT_LOW_WATER_MARK / 100;
-#if TASK_SW_DEBUG
-unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */
-unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */
-#endif /* TASK_SW_DEBUG */
-unsigned int max_grab_rate = MAX_GRAB_RATE;
-
-#ifndef MIN_SWAP_TIME
-#define MIN_SWAP_TIME 1
-#endif
-
-int min_swap_time = MIN_SWAP_TIME; /* in seconds */
-
-#ifndef MIN_RES_TIME
-#define MIN_RES_TIME 6
-#endif
-
-int min_res_time = MIN_RES_TIME; /* in seconds */
-
-#ifndef MIN_ACTIVE_TASKS
-#define MIN_ACTIVE_TASKS 4
-#endif
-
-int min_active_tasks = MIN_ACTIVE_TASKS;
-
-#ifndef TASK_SWAP_CYCLE_TIME
-#define TASK_SWAP_CYCLE_TIME 2
-#endif
-
-int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */
-
-int last_task_swap_cycle = 0;
-
-/* temporary statistics */
-int task_swapouts = 0;
-int task_swapins = 0;
-int task_swaprss_out = 0; /* total rss at swapout time */
-int task_swaprss_in = 0; /* total rss at swapin time */
-int task_swap_total_time = 0; /* total time spent swapped out */
-int tasks_swapped_out = 0; /* number of tasks swapped out now */
-
-#ifdef TASK_SW_STATS
-#define TASK_STATS_INCR(cnt) (cnt)++
-#else
-#define TASK_STATS_INCR(cnt)
-#endif /* TASK_SW_STATS */
-
-#if TASK_SW_DEBUG
-boolean_t on_swapped_list(task_t task); /* forward */
-/*
- * Debug function to determine if a task is already on the
- * swapped out tasks list. It also checks for tasks on the list
- * that are in an illegal state (i.e. swapped in).
- */
-boolean_t
-on_swapped_list(task_t task)
-{
- task_t ltask;
- /* task_swapper_lock is locked. */
-
- if (queue_empty(&swapped_tasks)) {
- return(FALSE);
- }
- ltask = (task_t)queue_first(&swapped_tasks);
- while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) {
- /* check for illegal state */
- if (ltask->swap_state == TASK_SW_IN) {
- printf("on_swapped_list and in: 0x%X\n",ltask);
- Debugger("");
- }
- if (ltask == task)
- return(TRUE);
- ltask = (task_t)queue_next(<ask->swapped_tasks);
- }
- return(FALSE);
-}
-#endif /* TASK_SW_DEBUG */
-
-/*
- * task_swapper_init: [exported]
- */
-void
-task_swapper_init()
-{
- queue_init(&swapped_tasks);
- queue_init(&eligible_tasks);
- queue_init(&swapout_thread_q);
- mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP);
- mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT);
- vm_page_free_avg = vm_page_free_count * AVE_SCALE;
- vm_page_free_longavg = vm_page_free_count * AVE_SCALE;
-}
-
-#endif /* TASK_SWAPPER */
-
/*
* task_swappable: [exported]
*
* Make a task swappable or non-swappable. If made non-swappable,
* it will be swapped in.
- *
- * Locking: task_swapout_lock is taken before task lock.
*/
kern_return_t
task_swappable(
boolean_t make_swappable)
{
if (host_priv == HOST_PRIV_NULL)
- return(KERN_INVALID_ARGUMENT);
+ return (KERN_INVALID_ARGUMENT);
if (task == TASK_NULL)
- return(KERN_INVALID_ARGUMENT);
-
-#if !TASK_SWAPPER
-
- /*
- * If we don't support swapping, this call is purely advisory.
- */
- return(KERN_SUCCESS);
+ return (KERN_INVALID_ARGUMENT);
-#else /* TASK_SWAPPER */
-
- task_lock(task);
- if (make_swappable) {
- /* make task swappable */
- if (task->swap_state == TASK_SW_UNSWAPPABLE) {
- task->swap_state = TASK_SW_IN;
- task_unlock(task);
- task_swapout_eligible(task);
- }
- } else {
- switch (task->swap_state) {
- case TASK_SW_IN:
- task->swap_state = TASK_SW_UNSWAPPABLE;
- task_unlock(task);
- task_swapout_ineligible(task);
- break;
- case TASK_SW_UNSWAPPABLE:
- task_unlock(task);
- break;
- default:
- /*
- * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT,
- * or TASK_SW_COMING_IN. task_swapin handles all
- * three, and its default case will catch any bad
- * states.
- */
- task_unlock(task);
- task_swapin(task, TRUE);
- break;
- }
- }
- return(KERN_SUCCESS);
-
-#endif /* TASK_SWAPPER */
-
-}
-
-#if TASK_SWAPPER
-
-/*
- * task_swapout:
- * A reference to the task must be held.
- *
- * Start swapping out a task by sending an AST_SWAPOUT to each thread.
- * When the threads reach a clean point, they queue themselves up on the
- * swapout_thread_q to be swapped out by the task_swap_swapout_thread.
- * The task can be swapped in at any point in this process.
- *
- * A task will not be fully swapped out (i.e. its map residence count
- * at zero) until all currently-swapped threads run and reach
- * a clean point, at which time they will be swapped again,
- * decrementing the swap_ast_waiting count on the task.
- *
- * Locking: no locks held upon entry and exit.
- * Task_lock is held throughout this function.
- */
-kern_return_t
-task_swapout(task_t task)
-{
- thread_act_t thr_act;
- thread_t thread;
- queue_head_t *list;
- int s;
-
- task_swapout_lock();
- task_lock(task);
/*
- * NOTE: look into turning these into assertions if they
- * are invariants.
+ * We don't support swapping, this call is purely advisory.
*/
- if ((task->swap_state != TASK_SW_IN) || (!task->active)) {
- task_unlock(task);
- task_swapout_unlock();
- return(KERN_FAILURE);
- }
- if (task->swap_flags & TASK_SW_ELIGIBLE) {
- queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
- task->swap_flags &= ~TASK_SW_ELIGIBLE;
- }
- task_swapout_unlock();
-
- /* set state to avoid races with task_swappable(FALSE) */
- task->swap_state = TASK_SW_GOING_OUT;
- task->swap_rss = pmap_resident_count(task->map->pmap);
- task_swaprss_out += task->swap_rss;
- task->swap_ast_waiting = task->thr_act_count;
-
- /*
- * halt all threads in this task:
- * We don't need the thread list lock for traversal.
- */
- list = &task->thr_acts;
- thr_act = (thread_act_t) queue_first(list);
- while (!queue_end(list, (queue_entry_t) thr_act)) {
- boolean_t swappable;
- thread_act_t ract;
-
- thread = act_lock_thread(thr_act);
- s = splsched();
- if (!thread)
- swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE);
- else {
- thread_lock(thread);
- swappable = TRUE;
- for (ract = thread->top_act; ract; ract = ract->lower)
- if (ract->swap_state == TH_SW_UNSWAPPABLE) {
- swappable = FALSE;
- break;
- }
- }
- if (swappable)
- thread_ast_set(thr_act, AST_SWAPOUT);
- if (thread)
- thread_unlock(thread);
- splx(s);
- assert((thr_act->ast & AST_TERMINATE) == 0);
- act_unlock_thread(thr_act);
- thr_act = (thread_act_t) queue_next(&thr_act->thr_acts);
- }
-
- task->swap_stamp = sched_tick;
- task->swap_nswap++;
- assert((task->swap_flags&TASK_SW_WANT_IN) == 0);
- /* put task on the queue of swapped out tasks */
- task_swapper_lock();
-#if TASK_SW_DEBUG
- if (task_swap_debug && on_swapped_list(task)) {
- printf("task 0x%X already on list\n", task);
- Debugger("");
- }
-#endif /* TASK_SW_DEBUG */
- queue_enter(&swapped_tasks, task, task_t, swapped_tasks);
- tasks_swapped_out++;
- task_swapouts++;
- task_swapper_unlock();
- task_unlock(task);
-
- return(KERN_SUCCESS);
+ return (KERN_SUCCESS);
}
-
-#ifdef TASK_SW_STATS
-int task_sw_race_in = 0;
-int task_sw_race_coming_in = 0;
-int task_sw_race_going_out = 0;
-int task_sw_before_ast = 0;
-int task_sw_before_swap = 0;
-int task_sw_after_swap = 0;
-int task_sw_race_in_won = 0;
-int task_sw_unswappable = 0;
-int task_sw_act_inactive = 0;
-#endif /* TASK_SW_STATS */
-
-/*
- * thread_swapout_enqueue is called by thread_halt_self when it
- * processes AST_SWAPOUT to enqueue threads to be swapped out.
- * It must be called at normal interrupt priority for the
- * sake of the task_swapper_lock.
- *
- * There can be races with task swapin here.
- * First lock task and decrement swap_ast_waiting count, and if
- * it's 0, we can decrement the residence count on the task's map
- * and set the task's swap state to TASK_SW_OUT.
- */
-void
-thread_swapout_enqueue(thread_act_t thr_act)
-{
- task_t task = thr_act->task;
- task_lock(task);
- /*
- * If the swap_state is not TASK_SW_GOING_OUT, then
- * task_swapin has beaten us to this operation, and
- * we have nothing to do.
- */
- if (task->swap_state != TASK_SW_GOING_OUT) {
- task_unlock(task);
- return;
- }
- if (--task->swap_ast_waiting == 0) {
- vm_map_t map = task->map;
- task->swap_state = TASK_SW_OUT;
- task_unlock(task);
- mutex_lock(&map->s_lock);
- vm_map_res_deallocate(map);
- mutex_unlock(&map->s_lock);
- } else
- task_unlock(task);
-
- task_swapper_lock();
- act_lock(thr_act);
- if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) {
- /*
- * We lost a race with task_swapin(): don't enqueue.
- */
- } else {
- queue_enter(&swapout_thread_q, thr_act,
- thread_act_t, swap_queue);
- task_swapper_wakeup();
- }
- act_unlock(thr_act);
- task_swapper_unlock();
-}
-
-/*
- * task_swap_swapout_thread: [exported]
- *
- * Executes as a separate kernel thread.
- * Its job is to swap out threads that have been halted by AST_SWAPOUT.
- */
-void
-task_swap_swapout_thread(void)
-{
- thread_act_t thr_act;
- thread_t thread, nthread;
- task_t task;
- int s;
-
- thread_swappable(current_act(), FALSE);
- stack_privilege(current_thread());
-
- spllo();
-
- task_swapper_lock();
- while (TRUE) {
- while (! queue_empty(&swapout_thread_q)) {
-
- queue_remove_first(&swapout_thread_q, thr_act,
- thread_act_t, swap_queue);
- /*
- * If we're racing with task_swapin, we need
- * to make it safe for it to do remque on the
- * thread, so make its links point to itself.
- * Allowing this ugliness is cheaper than
- * making task_swapin search the entire queue.
- */
- act_lock(thr_act);
- queue_init((queue_t) &thr_act->swap_queue);
- act_unlock(thr_act);
- task_swapper_unlock();
- /*
- * Wait for thread's RUN bit to be deasserted.
- */
- thread = act_lock_thread(thr_act);
- if (thread == THREAD_NULL)
- act_unlock_thread(thr_act);
- else {
- boolean_t r;
-
- thread_reference(thread);
- thread_hold(thr_act);
- act_unlock_thread(thr_act);
- r = thread_stop_wait(thread);
- nthread = act_lock_thread(thr_act);
- thread_release(thr_act);
- thread_deallocate(thread);
- act_unlock_thread(thr_act);
- if (!r || nthread != thread) {
- task_swapper_lock();
- continue;
- }
- }
- task = thr_act->task;
- task_lock(task);
- /*
- * we can race with swapin, which would set the
- * state to TASK_SW_IN.
- */
- if ((task->swap_state != TASK_SW_OUT) &&
- (task->swap_state != TASK_SW_GOING_OUT)) {
- task_unlock(task);
- task_swapper_lock();
- TASK_STATS_INCR(task_sw_race_in_won);
- if (thread != THREAD_NULL)
- thread_unstop(thread);
- continue;
- }
- nthread = act_lock_thread(thr_act);
- if (nthread != thread || thr_act->active == FALSE) {
- act_unlock_thread(thr_act);
- task_unlock(task);
- task_swapper_lock();
- TASK_STATS_INCR(task_sw_act_inactive);
- if (thread != THREAD_NULL)
- thread_unstop(thread);
- continue;
- }
- s = splsched();
- if (thread != THREAD_NULL)
- thread_lock(thread);
- /*
- * Thread cannot have been swapped out yet because
- * TH_SW_TASK_SWAPPING was set in AST. If task_swapin
- * beat us here, we either wouldn't have found it on
- * the queue, or the task->swap_state would have
- * changed. The synchronization is on the
- * task's swap_state and the task_lock.
- * The thread can't be swapped in any other way
- * because its task has been swapped.
- */
- assert(thr_act->swap_state & TH_SW_TASK_SWAPPING);
- assert(thread == THREAD_NULL ||
- !(thread->state & (TH_SWAPPED_OUT|TH_RUN)));
- assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN);
- /* assert(thread->state & TH_HALTED); */
- /* this also clears TH_SW_TASK_SWAPPING flag */
- thr_act->swap_state = TH_SW_GOING_OUT;
- if (thread != THREAD_NULL) {
- if (thread->top_act == thr_act) {
- thread->state |= TH_SWAPPED_OUT;
- /*
- * Once we unlock the task, things can happen
- * to the thread, so make sure it's consistent
- * for thread_swapout.
- */
- }
- thread->ref_count++;
- thread_unlock(thread);
- thread_unstop(thread);
- }
- splx(s);
- act_locked_act_reference(thr_act);
- act_unlock_thread(thr_act);
- task_unlock(task);
-
- thread_swapout(thr_act); /* do the work */
-
- if (thread != THREAD_NULL)
- thread_deallocate(thread);
- act_deallocate(thr_act);
- task_swapper_lock();
- }
- task_swapper_sleep();
- }
-}
-
-/*
- * task_swapin:
- *
- * Make a task resident.
- * Performs all of the work to make a task resident and possibly
- * non-swappable. If we race with a competing task_swapin call,
- * we wait for its completion, then return.
- *
- * Locking: no locks held upon entry and exit.
- *
- * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the
- * state is TASK_SW_COMING_IN.
- */
-
-kern_return_t
-task_swapin(task_t task, boolean_t make_unswappable)
-{
- register queue_head_t *list;
- register thread_act_t thr_act, next;
- thread_t thread;
- int s;
- boolean_t swappable = TRUE;
-
- task_lock(task);
- switch (task->swap_state) {
- case TASK_SW_OUT:
- {
- vm_map_t map = task->map;
- /*
- * Task has made it all the way out, which means
- * that vm_map_res_deallocate has been done; set
- * state to TASK_SW_COMING_IN, then bring map
- * back in. We could actually be racing with
- * the thread_swapout_enqueue, which does the
- * vm_map_res_deallocate, but that race is covered.
- */
- task->swap_state = TASK_SW_COMING_IN;
- assert(task->swap_ast_waiting == 0);
- assert(map->res_count >= 0);
- task_unlock(task);
- mutex_lock(&map->s_lock);
- vm_map_res_reference(map);
- mutex_unlock(&map->s_lock);
- task_lock(task);
- assert(task->swap_state == TASK_SW_COMING_IN);
- }
- break;
-
- case TASK_SW_GOING_OUT:
- /*
- * Task isn't all the way out yet. There is
- * still at least one thread not swapped, and
- * vm_map_res_deallocate has not been done.
- */
- task->swap_state = TASK_SW_COMING_IN;
- assert(task->swap_ast_waiting > 0 ||
- (task->swap_ast_waiting == 0 &&
- task->thr_act_count == 0));
- assert(task->map->res_count > 0);
- TASK_STATS_INCR(task_sw_race_going_out);
- break;
- case TASK_SW_IN:
- assert(task->map->res_count > 0);
-#if TASK_SW_DEBUG
- task_swapper_lock();
- if (task_swap_debug && on_swapped_list(task)) {
- printf("task 0x%X on list, state is SW_IN\n",
- task);
- Debugger("");
- }
- task_swapper_unlock();
-#endif /* TASK_SW_DEBUG */
- TASK_STATS_INCR(task_sw_race_in);
- if (make_unswappable) {
- task->swap_state = TASK_SW_UNSWAPPABLE;
- task_unlock(task);
- task_swapout_ineligible(task);
- } else
- task_unlock(task);
- return(KERN_SUCCESS);
- case TASK_SW_COMING_IN:
- /*
- * Raced with another task_swapin and lost;
- * wait for other one to complete first
- */
- assert(task->map->res_count >= 0);
- /*
- * set MAKE_UNSWAPPABLE so that whoever is swapping
- * the task in will make it unswappable, and return
- */
- if (make_unswappable)
- task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
- task->swap_flags |= TASK_SW_WANT_IN;
- assert_wait((event_t)&task->swap_state, THREAD_UNINT);
- task_unlock(task);
- thread_block(THREAD_CONTINUE_NULL);
- TASK_STATS_INCR(task_sw_race_coming_in);
- return(KERN_SUCCESS);
- case TASK_SW_UNSWAPPABLE:
- /*
- * This can happen, since task_terminate
- * unconditionally calls task_swapin.
- */
- task_unlock(task);
- return(KERN_SUCCESS);
- default:
- panic("task_swapin bad state");
- break;
- }
- if (make_unswappable)
- task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE;
- assert(task->swap_state == TASK_SW_COMING_IN);
- task_swapper_lock();
-#if TASK_SW_DEBUG
- if (task_swap_debug && !on_swapped_list(task)) {
- printf("task 0x%X not on list\n", task);
- Debugger("");
- }
-#endif /* TASK_SW_DEBUG */
- queue_remove(&swapped_tasks, task, task_t, swapped_tasks);
- tasks_swapped_out--;
- task_swapins++;
- task_swapper_unlock();
-
- /*
- * Iterate through all threads for this task and
- * release them, as required. They may not have been swapped
- * out yet. The task remains locked throughout.
- */
- list = &task->thr_acts;
- thr_act = (thread_act_t) queue_first(list);
- while (!queue_end(list, (queue_entry_t) thr_act)) {
- boolean_t need_to_release;
- next = (thread_act_t) queue_next(&thr_act->thr_acts);
- /*
- * Keep task_swapper_lock across thread handling
- * to synchronize with task_swap_swapout_thread
- */
- task_swapper_lock();
- thread = act_lock_thread(thr_act);
- s = splsched();
- if (thr_act->ast & AST_SWAPOUT) {
- /* thread hasn't gotten the AST yet, just clear it */
- thread_ast_clear(thr_act, AST_SWAPOUT);
- need_to_release = FALSE;
- TASK_STATS_INCR(task_sw_before_ast);
- splx(s);
- act_unlock_thread(thr_act);
- } else {
- /*
- * If AST_SWAPOUT was cleared, then thread_hold,
- * or equivalent was done.
- */
- need_to_release = TRUE;
- /*
- * Thread has hit AST, but it may not have
- * been dequeued yet, so we need to check.
- * NOTE: the thread may have been dequeued, but
- * has not yet been swapped (the task_swapper_lock
- * has been dropped, but the thread is not yet
- * locked), and the TH_SW_TASK_SWAPPING flag may
- * not have been cleared. In this case, we will do
- * an extra remque, which the task_swap_swapout_thread
- * has made safe, and clear the flag, which is also
- * checked by the t_s_s_t before doing the swapout.
- */
- if (thread)
- thread_lock(thread);
- if (thr_act->swap_state & TH_SW_TASK_SWAPPING) {
- /*
- * hasn't yet been dequeued for swapout,
- * so clear flags and dequeue it first.
- */
- thr_act->swap_state &= ~TH_SW_TASK_SWAPPING;
- assert(thr_act->thread == THREAD_NULL ||
- !(thr_act->thread->state &
- TH_SWAPPED_OUT));
- queue_remove(&swapout_thread_q, thr_act,
- thread_act_t, swap_queue);
- TASK_STATS_INCR(task_sw_before_swap);
- } else {
- TASK_STATS_INCR(task_sw_after_swap);
- /*
- * It's possible that the thread was
- * made unswappable before hitting the
- * AST, in which case it's still running.
- */
- if (thr_act->swap_state == TH_SW_UNSWAPPABLE) {
- need_to_release = FALSE;
- TASK_STATS_INCR(task_sw_unswappable);
- }
- }
- if (thread)
- thread_unlock(thread);
- splx(s);
- act_unlock_thread(thr_act);
- }
- task_swapper_unlock();
-
- /*
- * thread_release will swap in the thread if it's been
- * swapped out.
- */
- if (need_to_release) {
- act_lock_thread(thr_act);
- thread_release(thr_act);
- act_unlock_thread(thr_act);
- }
- thr_act = next;
- }
-
- if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) {
- task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE;
- task->swap_state = TASK_SW_UNSWAPPABLE;
- swappable = FALSE;
- } else {
- task->swap_state = TASK_SW_IN;
- }
-
- task_swaprss_in += pmap_resident_count(task->map->pmap);
- task_swap_total_time += sched_tick - task->swap_stamp;
- /* note when task came back in */
- task->swap_stamp = sched_tick;
- if (task->swap_flags & TASK_SW_WANT_IN) {
- task->swap_flags &= ~TASK_SW_WANT_IN;
- thread_wakeup((event_t)&task->swap_state);
- }
- assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0);
- task_unlock(task);
-#if TASK_SW_DEBUG
- task_swapper_lock();
- if (task_swap_debug && on_swapped_list(task)) {
- printf("task 0x%X on list at end of swap in\n", task);
- Debugger("");
- }
- task_swapper_unlock();
-#endif /* TASK_SW_DEBUG */
- /*
- * Make the task eligible to be swapped again
- */
- if (swappable)
- task_swapout_eligible(task);
- return(KERN_SUCCESS);
-}
-
-void wake_task_swapper(boolean_t now); /* forward */
-
-/*
- * wake_task_swapper: [exported]
- *
- * Wakes up task swapper if now == TRUE or if at least
- * task_swap_cycle_time has elapsed since the last call.
- *
- * NOTE: this function is not multithreaded, so if there is
- * more than one caller, it must be modified.
- */
-void
-wake_task_swapper(boolean_t now)
-{
- /* last_task_swap_cycle may require locking */
- if (now ||
- (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) {
- last_task_swap_cycle = sched_tick;
- if (task_swap_debug)
- printf("wake_task_swapper: waking swapper\n");
- thread_wakeup((event_t)&swapped_tasks); /* poke swapper */
- }
-}
-
-task_t pick_intask(void); /* forward */
-/*
- * pick_intask:
- * returns a task to be swapped in, or TASK_NULL if nothing suitable is found.
- *
- * current algorithm: Return the task that has been swapped out the
- * longest, as long as it is > min_swap_time. It will be dequeued
- * if actually swapped in.
- *
- * NOTE:**********************************************
- * task->swap_rss (the size when the task was swapped out) could be used to
- * further refine the selection. Another possibility would be to look at
- * the state of the thread(s) to see if the task/threads would run if they
- * were swapped in.
- * ***************************************************
- *
- * Locking: no locks held upon entry and exit.
- */
-task_t
-pick_intask(void)
-{
- register task_t task = TASK_NULL;
-
- task_swapper_lock();
- /* the oldest task is the first one */
- if (!queue_empty(&swapped_tasks)) {
- task = (task_t) queue_first(&swapped_tasks);
- assert(task != TASK_NULL);
- /* Make sure it's been out min_swap_time */
- if ((sched_tick - task->swap_stamp) < min_swap_time)
- task = TASK_NULL;
- }
- task_swapper_unlock();
- return(task);
-#if 0
- /*
- * This code looks at the entire list of swapped tasks, but since
- * it does not yet do anything but look at time swapped, we
- * can simply use the fact that the queue is ordered, and take
- * the first one off the queue.
- */
- task = (task_t)queue_first(&swapped_tasks);
- while (!queue_end(&swapped_tasks, (queue_entry_t)task)) {
- task_lock(task);
- tmp_time = sched_tick - task->swap_stamp;
- if (tmp_time > min_swap_time && tmp_time > time_swapped) {
- target_task = task;
- time_swapped = tmp_time;
- }
- task_unlock(task);
- task = (task_t)queue_next(&task->swapped_tasks);
- }
- task_swapper_unlock();
- return(target_task);
-#endif
-}
-
-task_t pick_outtask(void); /* forward */
-/*
- * pick_outtask:
- * returns a task to be swapped out, with a reference on the task,
- * or NULL if no suitable task is found.
- *
- * current algorithm:
- *
- * Examine all eligible tasks. While looking, use the first thread in
- * each task as an indication of the task's activity. Count up
- * "active" threads (those either runnable or sleeping). If the task
- * is active (by these criteria), swapped in, and resident
- * for at least min_res_time, then select the task with the largest
- * number of pages in memory. If there are less
- * than min_active_tasks active tasks in the system, then don't
- * swap anything out (this avoids swapping out the only running task
- * in the system, for example).
- *
- * NOTE: the task selected will not be removed from the eligible list.
- * This means that it will be selected again if it is not swapped
- * out, where it is removed from the list.
- *
- * Locking: no locks held upon entry and exit. Task_swapout_lock must be
- * taken before task locks.
- *
- * ***************************************************
- * TBD:
- * This algorithm only examines the first thread in the task. Currently, since
- * most swappable tasks in the system are single-threaded, this generalization
- * works reasonably well. However, the algorithm should be changed
- * to consider all threads in the task if more multi-threaded tasks were used.
- * ***************************************************
- */
-
-#ifdef TASK_SW_STATS
-int inactive_task_count = 0;
-int empty_task_count = 0;
-#endif /* TASK_SW_STATS */
-
-task_t
-pick_outtask(void)
-{
- register task_t task;
- register task_t target_task = TASK_NULL;
- unsigned long task_rss;
- unsigned long target_rss = 0;
- boolean_t wired;
- boolean_t active;
- int nactive = 0;
-
- task_swapout_lock();
- if (queue_empty(&eligible_tasks)) {
- /* not likely to happen */
- task_swapout_unlock();
- return(TASK_NULL);
- }
- task = (task_t)queue_first(&eligible_tasks);
- while (!queue_end(&eligible_tasks, (queue_entry_t)task)) {
- int s;
- register thread_act_t thr_act;
- thread_t th;
-
-
- task_lock(task);
- /*
- * Don't swap real-time tasks.
- * XXX Should we enforce that or can we let really critical
- * tasks use task_swappable() to make sure they never end up
- * n the eligible list ?
- */
- if (task->policy & POLICYCLASS_FIXEDPRI) {
- goto tryagain;
- }
- if (!task->active) {
- TASK_STATS_INCR(inactive_task_count);
- goto tryagain;
- }
- if (task->res_act_count == 0) {
- TASK_STATS_INCR(empty_task_count);
- goto tryagain;
- }
- assert(!queue_empty(&task->thr_acts));
- thr_act = (thread_act_t)queue_first(&task->thr_acts);
- active = FALSE;
- th = act_lock_thread(thr_act);
- s = splsched();
- if (th != THREAD_NULL)
- thread_lock(th);
- if ((th == THREAD_NULL) ||
- (th->state == TH_RUN) ||
- (th->state & TH_WAIT)) {
- /*
- * thread is "active": either runnable
- * or sleeping. Count it and examine
- * it further below.
- */
- nactive++;
- active = TRUE;
- }
- if (th != THREAD_NULL)
- thread_unlock(th);
- splx(s);
- act_unlock_thread(thr_act);
- if (active &&
- (task->swap_state == TASK_SW_IN) &&
- ((sched_tick - task->swap_stamp) > min_res_time)) {
- long rescount = pmap_resident_count(task->map->pmap);
- /*
- * thread must be "active", task must be swapped
- * in and resident for at least min_res_time
- */
-#if 0
-/* DEBUG Test round-robin strategy. Picking biggest task could cause extreme
- * unfairness to such large interactive programs as xterm. Instead, pick the
- * first task that has any pages resident:
- */
- if (rescount > 1) {
- task->ref_count++;
- target_task = task;
- task_unlock(task);
- task_swapout_unlock();
- return(target_task);
- }
-#else
- if (rescount > target_rss) {
- /*
- * task is not swapped, and it has the
- * largest rss seen so far.
- */
- task->ref_count++;
- target_rss = rescount;
- assert(target_task != task);
- if (target_task != TASK_NULL)
- task_deallocate(target_task);
- target_task = task;
- }
-#endif
- }
-tryagain:
- task_unlock(task);
- task = (task_t)queue_next(&task->swapped_tasks);
- }
- task_swapout_unlock();
- /* only swap out if there are at least min_active_tasks */
- if (nactive < min_active_tasks) {
- if (target_task != TASK_NULL) {
- task_deallocate(target_task);
- target_task = TASK_NULL;
- }
- }
- return(target_task);
-}
-
-#if TASK_SW_DEBUG
-void print_pid(task_t task, unsigned long n1, unsigned long n2,
- const char *comp, const char *inout); /* forward */
-void
-print_pid(
- task_t task,
- unsigned long n1,
- unsigned long n2,
- const char *comp,
- const char *inout)
-{
- long rescount;
- task_lock(task);
- rescount = pmap_resident_count(task->map->pmap);
- task_unlock(task);
- printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n",
- inout, task, n1, comp, n2, rescount);
-}
-#endif
-
-/*
- * task_swapper: [exported]
- *
- * Executes as a separate kernel thread.
- */
-#define MAX_LOOP 3
-void
-task_swapper(void)
-{
- task_t outtask, intask;
- int timeout;
- int loopcnt = 0;
- boolean_t start_swapping;
- boolean_t stop_swapping;
- int local_page_free_avg;
- extern int hz;
-
- thread_swappable(current_act(), FALSE);
- stack_privilege(current_thread());
-
- spllo();
-
- for (;;) {
- local_page_free_avg = vm_page_free_avg;
- while (TRUE) {
-#if 0
- if (task_swap_debug)
- printf("task_swapper: top of loop; cnt = %d\n",loopcnt);
-#endif
- intask = pick_intask();
-
- start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) ||
- (vm_grab_rate_avg > max_grab_rate));
- stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate);
-
- /*
- * If a lot of paging is going on, or another task should come
- * in but memory is tight, find something to swap out and start
- * it. Don't swap any task out if task swapping is disabled.
- * vm_page_queue_free_lock protects the vm globals.
- */
- outtask = TASK_NULL;
- if (start_swapping ||
- (!stop_swapping && intask &&
- ((local_page_free_avg / AVE_SCALE) < vm_page_free_target))
- ) {
- if (task_swap_enable &&
- (outtask = pick_outtask()) &&
- (task_swapout(outtask) == KERN_SUCCESS)) {
- unsigned long rss;
-#if TASK_SW_DEBUG
- if (task_swap_debug)
- print_pid(outtask, local_page_free_avg / AVE_SCALE,
- vm_page_free_target, "<",
- "out");
-#endif
- rss = outtask->swap_rss;
- if (outtask->swap_nswap == 1)
- rss /= 2; /* divide by 2 if never out */
- local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE;
- }
- if (outtask != TASK_NULL)
- task_deallocate(outtask);
- }
-
- /*
- * If there is an eligible task to bring in and there are at
- * least vm_page_free_target free pages, swap it in. If task
- * swapping has been disabled, bring the task in anyway.
- */
- if (intask && ((local_page_free_avg / AVE_SCALE) >=
- vm_page_free_target ||
- stop_swapping || !task_swap_enable)) {
- if (task_swapin(intask, FALSE) == KERN_SUCCESS) {
- unsigned long rss;
-#if TASK_SW_DEBUG
- if (task_swap_debug)
- print_pid(intask, local_page_free_avg / AVE_SCALE,
- vm_page_free_target, ">=",
- "in");
-#endif
- rss = intask->swap_rss;
- if (intask->swap_nswap == 1)
- rss /= 2; /* divide by 2 if never out */
- local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE;
- }
- }
- /*
- * XXX
- * Here we have to decide whether to continue swapping
- * in and/or out before sleeping. The decision should
- * be made based on the previous action (swapin/out) and
- * current system parameters, such as paging rates and
- * demand.
- * The function, compute_vm_averages, which does these
- * calculations, depends on being called every second,
- * so we can't just do the same thing.
- */
- if (++loopcnt < MAX_LOOP)
- continue;
-
- /*
- * Arrange to be awakened if paging is still heavy or there are
- * any tasks partially or completely swapped out. (Otherwise,
- * the wakeup will come from the external trigger(s).)
- */
- timeout = 0;
- if (start_swapping)
- timeout = task_swap_cycle_time;
- else {
- task_swapper_lock();
- if (!queue_empty(&swapped_tasks))
- timeout = min_swap_time;
- task_swapper_unlock();
- }
- assert_wait((event_t)&swapped_tasks, THREAD_UNINT);
- if (timeout) {
- if (task_swap_debug)
- printf("task_swapper: set timeout of %d\n",
- timeout);
- thread_set_timeout(timeout, NSEC_PER_SEC);
- }
- if (task_swap_debug)
- printf("task_swapper: blocking\n");
- thread_block(THREAD_CONTINUE_NULL);
- if (timeout) {
- thread_cancel_timeout(current_thread());
- }
- /* reset locals */
- loopcnt = 0;
- local_page_free_avg = vm_page_free_avg;
- }
- }
-}
-
-/* from BSD */
-#define ave(smooth, cnt, time) \
- smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time)
-
-/*
- * We estimate the system paging load in more than one metric:
- * 1) the total number of calls into the function, vm_page_grab,
- * which allocates all page frames for real pages.
- * 2) the total number of pages paged in and out of paging files.
- * This is a measure of page cleaning and faulting from backing
- * store.
- *
- * When either metric passes a threshold, tasks are swapped out.
- */
-long last_grab_count = 0;
-long last_pageout_count = 0;
-
-/*
- * compute_vm_averages: [exported]
- *
- * This function is to be called once a second to calculate average paging
- * demand and average numbers of free pages for use by the task swapper.
- * Can also be used to wake up task swapper at desired thresholds.
- *
- * NOTE: this function is single-threaded, and requires locking if
- * ever there are multiple callers.
- */
-void
-compute_vm_averages(void)
-{
- extern unsigned long vm_page_grab_count;
- long grab_count, pageout_count;
- int i;
-
- ave(vm_page_free_avg, vm_page_free_count, short_avg_interval);
- ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval);
-
- /*
- * NOTE: the vm_page_grab_count and vm_stat structure are
- * under control of vm_page_queue_free_lock. We're simply reading
- * memory here, and the numbers don't depend on each other, so
- * no lock is taken.
- */
-
- grab_count = vm_page_grab_count;
- pageout_count = 0;
- for (i = 0; i < NCPUS; i++) {
- pageout_count += vm_stat[i].pageouts;
- }
-
- ave(vm_pageout_rate_avg, pageout_count - last_pageout_count,
- short_avg_interval);
- ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count,
- long_avg_interval);
- ave(vm_grab_rate_avg, grab_count - last_grab_count,
- short_avg_interval);
- last_grab_count = grab_count;
- last_pageout_count = pageout_count;
-
- /*
- * Adjust swap_{start,stop}_pageout_rate to the paging rate peak.
- * This is an attempt to find the optimum paging rates at which
- * to trigger task swapping on or off to regulate paging activity,
- * depending on the hardware capacity.
- */
- if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) {
- unsigned int desired_max;
-
- vm_pageout_rate_peakavg = vm_pageout_rate_avg;
- swap_start_pageout_rate =
- vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100;
- swap_stop_pageout_rate =
- vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100;
- }
-
-#if TASK_SW_DEBUG
- /*
- * For measurements, allow fixed values.
- */
- if (fixed_swap_start_pageout_rate)
- swap_start_pageout_rate = fixed_swap_start_pageout_rate;
- if (fixed_swap_stop_pageout_rate)
- swap_stop_pageout_rate = fixed_swap_stop_pageout_rate;
-#endif /* TASK_SW_DEBUG */
-
-#if TASK_SW_DEBUG
- if (task_swap_stats)
- printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n",
- vm_pageout_rate_avg / AVE_SCALE,
- vm_pageout_rate_longavg / AVE_SCALE,
- swap_start_pageout_rate / AVE_SCALE,
- swap_stop_pageout_rate / AVE_SCALE,
- vm_page_free_avg / AVE_SCALE,
- vm_page_free_longavg / AVE_SCALE,
- vm_page_free_target);
-#endif /* TASK_SW_DEBUG */
-
- if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) {
- if (task_swap_on) {
- /* The following is a delicate attempt to balance the
- * need for reasonably rapid response to system
- * thrashing, with the equally important desire to
- * prevent the onset of swapping simply because of a
- * short burst of paging activity.
- */
- if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) &&
- (vm_pageout_rate_avg > swap_start_pageout_rate) ||
- (vm_pageout_rate_avg > vm_pageout_rate_peakavg) ||
- (vm_grab_rate_avg > max_grab_rate))
- wake_task_swapper(FALSE);
- }
- } else /* page demand is low; should consider swapin */ {
- if (tasks_swapped_out != 0)
- wake_task_swapper(TRUE);
- }
-}
-
-void
-task_swapout_eligible(task_t task)
-{
-#if TASK_SW_DEBUG
- task_swapper_lock();
- if (task_swap_debug && on_swapped_list(task)) {
- printf("swapout_eligible: task 0x%X on swapped list\n", task);
- Debugger("");
- }
- task_swapper_unlock();
-#endif
- task_swapout_lock();
- task_lock(task);
-#if TASK_SW_DEBUG
- if (task->swap_flags & TASK_SW_ELIGIBLE) {
- printf("swapout_eligible: task 0x%X already eligible\n", task);
- }
-#endif /* TASK_SW_DEBUG */
- if ((task->swap_state == TASK_SW_IN) &&
- ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) {
- queue_enter(&eligible_tasks,task,task_t,swapped_tasks);
- task->swap_flags |= TASK_SW_ELIGIBLE;
- }
- task_unlock(task);
- task_swapout_unlock();
-}
-
-void
-task_swapout_ineligible(task_t task)
-{
-#if TASK_SW_DEBUG
- task_swapper_lock();
- if (task_swap_debug && on_swapped_list(task)) {
- printf("swapout_ineligible: task 0x%X on swapped list\n", task);
- Debugger("");
- }
- task_swapper_unlock();
-#endif
- task_swapout_lock();
- task_lock(task);
-#if TASK_SW_DEBUG
- if (!(task->swap_flags & TASK_SW_ELIGIBLE))
- printf("swapout_ineligible: task 0x%X already inel.\n", task);
-#endif /* TASK_SW_DEBUG */
- if ((task->swap_state != TASK_SW_IN) &&
- (task->swap_flags & TASK_SW_ELIGIBLE)) {
- queue_remove(&eligible_tasks, task, task_t, swapped_tasks);
- task->swap_flags &= ~TASK_SW_ELIGIBLE;
- }
- task_unlock(task);
- task_swapout_unlock();
-}
-
-int task_swap_ast_aborted = 0;
-
-/*
- * Process an AST_SWAPOUT.
- */
-void
-swapout_ast()
-{
- spl_t s;
- thread_act_t act;
- thread_t thread;
-
- act = current_act();
-
- /*
- * Task is being swapped out. First mark it as suspended
- * and halted, then call thread_swapout_enqueue to put
- * the thread on the queue for task_swap_swapout_threads
- * to swap out the thread.
- */
- /*
- * Don't swap unswappable threads
- */
- thread = act_lock_thread(act);
- s = splsched();
- if (thread)
- thread_lock(thread);
- if ((act->ast & AST_SWAPOUT) == 0) {
- /*
- * Race with task_swapin. Abort swapout.
- */
- task_swap_ast_aborted++; /* not locked XXX */
- if (thread)
- thread_unlock(thread);
- splx(s);
- act_unlock_thread(act);
- } else if (act->swap_state == TH_SW_IN) {
- /*
- * Mark swap_state as TH_SW_TASK_SWAPPING to avoid
- * race with thread swapper, which will only
- * swap thread if swap_state is TH_SW_IN.
- * This way, the thread can only be swapped by
- * the task swapping mechanism.
- */
- act->swap_state |= TH_SW_TASK_SWAPPING;
- /* assert(act->suspend_count == 0); XXX ? */
- if (thread)
- thread_unlock(thread);
- if (act->suspend_count++ == 0) /* inline thread_hold */
- install_special_handler(act);
- /* self->state |= TH_HALTED; */
- thread_ast_clear(act, AST_SWAPOUT);
- /*
- * Initialize the swap_queue fields to allow an extra
- * queue_remove() in task_swapin if we lose the race
- * (task_swapin can be called before we complete
- * thread_swapout_enqueue).
- */
- queue_init((queue_t) &act->swap_queue);
- splx(s);
- act_unlock_thread(act);
- /* this must be called at normal interrupt level */
- thread_swapout_enqueue(act);
- } else {
- /* thread isn't swappable; continue running */
- assert(act->swap_state == TH_SW_UNSWAPPABLE);
- if (thread)
- thread_unlock(thread);
- thread_ast_clear(act, AST_SWAPOUT);
- splx(s);
- act_unlock_thread(act);
- }
-}
-
-#endif /* TASK_SWAPPER */