]>
Commit | Line | Data |
---|---|---|
1c79356b A |
1 | /* |
2 | * Copyright (c) 2000 Apple Computer, Inc. All rights reserved. | |
3 | * | |
4 | * @APPLE_LICENSE_HEADER_START@ | |
5 | * | |
6 | * The contents of this file constitute Original Code as defined in and | |
7 | * are subject to the Apple Public Source License Version 1.1 (the | |
8 | * "License"). You may not use this file except in compliance with the | |
9 | * License. Please obtain a copy of the License at | |
10 | * http://www.apple.com/publicsource and read it before using this file. | |
11 | * | |
12 | * This Original Code and all software distributed under the License are | |
13 | * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER | |
14 | * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, | |
15 | * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, | |
16 | * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the | |
17 | * License for the specific language governing rights and limitations | |
18 | * under the License. | |
19 | * | |
20 | * @APPLE_LICENSE_HEADER_END@ | |
21 | */ | |
22 | /* | |
23 | * @OSF_COPYRIGHT@ | |
24 | */ | |
25 | /* | |
26 | * File: kern/task_swap.c | |
27 | * | |
28 | * Task residency management primitives implementation. | |
29 | */ | |
30 | #include <mach_assert.h> | |
31 | #include <task_swapper.h> | |
32 | ||
33 | #include <kern/spl.h> | |
34 | #include <kern/lock.h> | |
35 | #include <kern/queue.h> | |
36 | #include <kern/host.h> | |
37 | #include <kern/task.h> | |
38 | #include <kern/task_swap.h> | |
39 | #include <kern/thread.h> | |
40 | #include <kern/thread_swap.h> | |
41 | #include <kern/host_statistics.h> | |
42 | #include <kern/misc_protos.h> | |
43 | #include <kern/assert.h> | |
44 | #include <mach/policy.h> | |
45 | ||
46 | #include <ipc/ipc_port.h> /* We use something from in here */ | |
47 | ||
48 | /* | |
49 | * Note: if TASK_SWAPPER is disabled, then this file defines only | |
50 | * a stub version of task_swappable(), so that the service can always | |
51 | * be defined, even if swapping has been configured out of the kernel. | |
52 | */ | |
53 | #if TASK_SWAPPER | |
54 | ||
55 | /* temporary debug flags */ | |
56 | #define TASK_SW_DEBUG 1 | |
57 | #define TASK_SW_STATS 1 | |
58 | ||
59 | int task_swap_debug = 0; | |
60 | int task_swap_stats = 0; | |
61 | int task_swap_enable = 1; | |
62 | int task_swap_on = 1; | |
63 | ||
64 | queue_head_t swapped_tasks; /* completely swapped out tasks */ | |
65 | queue_head_t swapout_thread_q; /* threads to be swapped out */ | |
66 | mutex_t task_swapper_lock; /* protects above queue */ | |
67 | ||
68 | #define task_swapper_lock() mutex_lock(&task_swapper_lock) | |
69 | #define task_swapper_unlock() mutex_unlock(&task_swapper_lock) | |
9bccf70c A |
70 | #define task_swapper_wakeup() thread_wakeup((event_t)&swapout_thread_q) |
71 | #define task_swapper_sleep() thread_sleep_mutex((event_t)&swapout_thread_q, \ | |
72 | &task_swapper_lock, \ | |
73 | THREAD_UNINT) | |
74 | ||
1c79356b A |
75 | |
76 | queue_head_t eligible_tasks; /* tasks eligible for swapout */ | |
77 | mutex_t task_swapout_list_lock; /* protects above queue */ | |
78 | #define task_swapout_lock() mutex_lock(&task_swapout_list_lock) | |
79 | #define task_swapout_unlock() mutex_unlock(&task_swapout_list_lock) | |
80 | ||
81 | /* | |
82 | * The next section of constants and globals are tunable parameters | |
83 | * used in making swapping decisions. They may be changed dynamically | |
84 | * without adversely affecting the robustness of the system; however, | |
85 | * the policy will change, one way or the other. | |
86 | */ | |
87 | ||
88 | #define SHORT_AVG_INTERVAL 5 /* in seconds */ | |
89 | #define LONG_AVG_INTERVAL 30 /* in seconds */ | |
90 | #define AVE_SCALE 1024 | |
91 | ||
92 | unsigned int short_avg_interval = SHORT_AVG_INTERVAL; | |
93 | unsigned int long_avg_interval = LONG_AVG_INTERVAL; | |
94 | ||
95 | #ifndef MIN_SWAP_PAGEOUT_RATE | |
96 | #define MIN_SWAP_PAGEOUT_RATE 10 | |
97 | #endif | |
98 | ||
99 | /* | |
100 | * The following are all stored in fixed-point representation (the actual | |
101 | * value times AVE_SCALE), to allow more accurate computing of decaying | |
102 | * averages. So all variables that end with "avg" must be divided by | |
103 | * AVE_SCALE to convert them or compare them to ints. | |
104 | */ | |
105 | unsigned int vm_grab_rate_avg; | |
106 | unsigned int vm_pageout_rate_avg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; | |
107 | unsigned int vm_pageout_rate_longavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; | |
108 | unsigned int vm_pageout_rate_peakavg = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE; | |
109 | unsigned int vm_page_free_avg; /* average free pages over short_avg_interval */ | |
110 | unsigned int vm_page_free_longavg; /* avg free pages over long_avg_interval */ | |
111 | ||
112 | /* | |
113 | * Trigger task swapping when paging activity reaches | |
114 | * SWAP_HIGH_WATER_MARK per cent of the maximum paging activity ever observed. | |
115 | * Turn off task swapping when paging activity goes back down to below | |
116 | * SWAP_PAGEOUT_LOW_WATER_MARK per cent of the maximum. | |
117 | * These numbers have been found empirically and might need some tuning... | |
118 | */ | |
119 | #ifndef SWAP_PAGEOUT_HIGH_WATER_MARK | |
120 | #define SWAP_PAGEOUT_HIGH_WATER_MARK 30 | |
121 | #endif | |
122 | #ifndef SWAP_PAGEOUT_LOW_WATER_MARK | |
123 | #define SWAP_PAGEOUT_LOW_WATER_MARK 10 | |
124 | #endif | |
125 | ||
126 | #ifndef MAX_GRAB_RATE | |
127 | #define MAX_GRAB_RATE ((unsigned int) -1) /* XXX no maximum */ | |
128 | #endif | |
129 | ||
130 | /* | |
131 | * swap_{start,stop}_pageout_rate start at the minimum value, then increase | |
132 | * to adjust to the hardware's performance, following the paging rate peaks. | |
133 | */ | |
134 | unsigned int swap_pageout_high_water_mark = SWAP_PAGEOUT_HIGH_WATER_MARK; | |
135 | unsigned int swap_pageout_low_water_mark = SWAP_PAGEOUT_LOW_WATER_MARK; | |
136 | unsigned int swap_start_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE * | |
137 | SWAP_PAGEOUT_HIGH_WATER_MARK / 100; | |
138 | unsigned int swap_stop_pageout_rate = MIN_SWAP_PAGEOUT_RATE * AVE_SCALE * | |
139 | SWAP_PAGEOUT_LOW_WATER_MARK / 100; | |
140 | #if TASK_SW_DEBUG | |
141 | unsigned int fixed_swap_start_pageout_rate = 0; /* only for testing purpose */ | |
142 | unsigned int fixed_swap_stop_pageout_rate = 0; /* only for testing purpose */ | |
143 | #endif /* TASK_SW_DEBUG */ | |
144 | unsigned int max_grab_rate = MAX_GRAB_RATE; | |
145 | ||
146 | #ifndef MIN_SWAP_TIME | |
147 | #define MIN_SWAP_TIME 1 | |
148 | #endif | |
149 | ||
150 | int min_swap_time = MIN_SWAP_TIME; /* in seconds */ | |
151 | ||
152 | #ifndef MIN_RES_TIME | |
153 | #define MIN_RES_TIME 6 | |
154 | #endif | |
155 | ||
156 | int min_res_time = MIN_RES_TIME; /* in seconds */ | |
157 | ||
158 | #ifndef MIN_ACTIVE_TASKS | |
159 | #define MIN_ACTIVE_TASKS 4 | |
160 | #endif | |
161 | ||
162 | int min_active_tasks = MIN_ACTIVE_TASKS; | |
163 | ||
164 | #ifndef TASK_SWAP_CYCLE_TIME | |
165 | #define TASK_SWAP_CYCLE_TIME 2 | |
166 | #endif | |
167 | ||
168 | int task_swap_cycle_time = TASK_SWAP_CYCLE_TIME; /* in seconds */ | |
169 | ||
170 | int last_task_swap_cycle = 0; | |
171 | ||
172 | /* temporary statistics */ | |
173 | int task_swapouts = 0; | |
174 | int task_swapins = 0; | |
175 | int task_swaprss_out = 0; /* total rss at swapout time */ | |
176 | int task_swaprss_in = 0; /* total rss at swapin time */ | |
177 | int task_swap_total_time = 0; /* total time spent swapped out */ | |
178 | int tasks_swapped_out = 0; /* number of tasks swapped out now */ | |
179 | ||
180 | #ifdef TASK_SW_STATS | |
181 | #define TASK_STATS_INCR(cnt) (cnt)++ | |
182 | #else | |
183 | #define TASK_STATS_INCR(cnt) | |
184 | #endif /* TASK_SW_STATS */ | |
185 | ||
186 | #if TASK_SW_DEBUG | |
187 | boolean_t on_swapped_list(task_t task); /* forward */ | |
188 | /* | |
189 | * Debug function to determine if a task is already on the | |
190 | * swapped out tasks list. It also checks for tasks on the list | |
191 | * that are in an illegal state (i.e. swapped in). | |
192 | */ | |
193 | boolean_t | |
194 | on_swapped_list(task_t task) | |
195 | { | |
196 | task_t ltask; | |
197 | /* task_swapper_lock is locked. */ | |
198 | ||
199 | if (queue_empty(&swapped_tasks)) { | |
200 | return(FALSE); | |
201 | } | |
202 | ltask = (task_t)queue_first(&swapped_tasks); | |
203 | while (!queue_end(&swapped_tasks, (queue_entry_t)ltask)) { | |
204 | /* check for illegal state */ | |
205 | if (ltask->swap_state == TASK_SW_IN) { | |
206 | printf("on_swapped_list and in: 0x%X\n",ltask); | |
207 | Debugger(""); | |
208 | } | |
209 | if (ltask == task) | |
210 | return(TRUE); | |
211 | ltask = (task_t)queue_next(<ask->swapped_tasks); | |
212 | } | |
213 | return(FALSE); | |
214 | } | |
215 | #endif /* TASK_SW_DEBUG */ | |
216 | ||
217 | /* | |
218 | * task_swapper_init: [exported] | |
219 | */ | |
220 | void | |
221 | task_swapper_init() | |
222 | { | |
223 | queue_init(&swapped_tasks); | |
224 | queue_init(&eligible_tasks); | |
225 | queue_init(&swapout_thread_q); | |
226 | mutex_init(&task_swapper_lock, ETAP_THREAD_TASK_SWAP); | |
227 | mutex_init(&task_swapout_list_lock, ETAP_THREAD_TASK_SWAPOUT); | |
228 | vm_page_free_avg = vm_page_free_count * AVE_SCALE; | |
229 | vm_page_free_longavg = vm_page_free_count * AVE_SCALE; | |
230 | } | |
231 | ||
232 | #endif /* TASK_SWAPPER */ | |
233 | ||
234 | /* | |
235 | * task_swappable: [exported] | |
236 | * | |
237 | * Make a task swappable or non-swappable. If made non-swappable, | |
238 | * it will be swapped in. | |
239 | * | |
240 | * Locking: task_swapout_lock is taken before task lock. | |
241 | */ | |
242 | kern_return_t | |
243 | task_swappable( | |
244 | host_priv_t host_priv, | |
245 | task_t task, | |
246 | boolean_t make_swappable) | |
247 | { | |
248 | if (host_priv == HOST_PRIV_NULL) | |
249 | return(KERN_INVALID_ARGUMENT); | |
250 | ||
251 | if (task == TASK_NULL) | |
252 | return(KERN_INVALID_ARGUMENT); | |
253 | ||
254 | #if !TASK_SWAPPER | |
255 | ||
256 | /* | |
257 | * If we don't support swapping, this call is purely advisory. | |
258 | */ | |
259 | return(KERN_SUCCESS); | |
260 | ||
261 | #else /* TASK_SWAPPER */ | |
262 | ||
263 | task_lock(task); | |
264 | if (make_swappable) { | |
265 | /* make task swappable */ | |
266 | if (task->swap_state == TASK_SW_UNSWAPPABLE) { | |
267 | task->swap_state = TASK_SW_IN; | |
268 | task_unlock(task); | |
269 | task_swapout_eligible(task); | |
270 | } | |
271 | } else { | |
272 | switch (task->swap_state) { | |
273 | case TASK_SW_IN: | |
274 | task->swap_state = TASK_SW_UNSWAPPABLE; | |
275 | task_unlock(task); | |
276 | task_swapout_ineligible(task); | |
277 | break; | |
278 | case TASK_SW_UNSWAPPABLE: | |
279 | task_unlock(task); | |
280 | break; | |
281 | default: | |
282 | /* | |
283 | * swap_state could be TASK_SW_OUT, TASK_SW_GOING_OUT, | |
284 | * or TASK_SW_COMING_IN. task_swapin handles all | |
285 | * three, and its default case will catch any bad | |
286 | * states. | |
287 | */ | |
288 | task_unlock(task); | |
289 | task_swapin(task, TRUE); | |
290 | break; | |
291 | } | |
292 | } | |
293 | return(KERN_SUCCESS); | |
294 | ||
295 | #endif /* TASK_SWAPPER */ | |
296 | ||
297 | } | |
298 | ||
299 | #if TASK_SWAPPER | |
300 | ||
301 | /* | |
302 | * task_swapout: | |
303 | * A reference to the task must be held. | |
304 | * | |
305 | * Start swapping out a task by sending an AST_SWAPOUT to each thread. | |
306 | * When the threads reach a clean point, they queue themselves up on the | |
307 | * swapout_thread_q to be swapped out by the task_swap_swapout_thread. | |
308 | * The task can be swapped in at any point in this process. | |
309 | * | |
310 | * A task will not be fully swapped out (i.e. its map residence count | |
311 | * at zero) until all currently-swapped threads run and reach | |
312 | * a clean point, at which time they will be swapped again, | |
313 | * decrementing the swap_ast_waiting count on the task. | |
314 | * | |
315 | * Locking: no locks held upon entry and exit. | |
316 | * Task_lock is held throughout this function. | |
317 | */ | |
318 | kern_return_t | |
319 | task_swapout(task_t task) | |
320 | { | |
321 | thread_act_t thr_act; | |
322 | thread_t thread; | |
323 | queue_head_t *list; | |
324 | int s; | |
325 | ||
326 | task_swapout_lock(); | |
327 | task_lock(task); | |
328 | /* | |
329 | * NOTE: look into turning these into assertions if they | |
330 | * are invariants. | |
331 | */ | |
332 | if ((task->swap_state != TASK_SW_IN) || (!task->active)) { | |
333 | task_unlock(task); | |
334 | task_swapout_unlock(); | |
335 | return(KERN_FAILURE); | |
336 | } | |
337 | if (task->swap_flags & TASK_SW_ELIGIBLE) { | |
338 | queue_remove(&eligible_tasks, task, task_t, swapped_tasks); | |
339 | task->swap_flags &= ~TASK_SW_ELIGIBLE; | |
340 | } | |
341 | task_swapout_unlock(); | |
342 | ||
343 | /* set state to avoid races with task_swappable(FALSE) */ | |
344 | task->swap_state = TASK_SW_GOING_OUT; | |
345 | task->swap_rss = pmap_resident_count(task->map->pmap); | |
346 | task_swaprss_out += task->swap_rss; | |
347 | task->swap_ast_waiting = task->thr_act_count; | |
348 | ||
349 | /* | |
350 | * halt all threads in this task: | |
351 | * We don't need the thread list lock for traversal. | |
352 | */ | |
353 | list = &task->thr_acts; | |
354 | thr_act = (thread_act_t) queue_first(list); | |
355 | while (!queue_end(list, (queue_entry_t) thr_act)) { | |
356 | boolean_t swappable; | |
357 | thread_act_t ract; | |
358 | ||
359 | thread = act_lock_thread(thr_act); | |
360 | s = splsched(); | |
361 | if (!thread) | |
362 | swappable = (thr_act->swap_state != TH_SW_UNSWAPPABLE); | |
363 | else { | |
364 | thread_lock(thread); | |
365 | swappable = TRUE; | |
366 | for (ract = thread->top_act; ract; ract = ract->lower) | |
367 | if (ract->swap_state == TH_SW_UNSWAPPABLE) { | |
368 | swappable = FALSE; | |
369 | break; | |
370 | } | |
371 | } | |
372 | if (swappable) | |
373 | thread_ast_set(thr_act, AST_SWAPOUT); | |
374 | if (thread) | |
375 | thread_unlock(thread); | |
376 | splx(s); | |
377 | assert((thr_act->ast & AST_TERMINATE) == 0); | |
378 | act_unlock_thread(thr_act); | |
379 | thr_act = (thread_act_t) queue_next(&thr_act->thr_acts); | |
380 | } | |
381 | ||
382 | task->swap_stamp = sched_tick; | |
383 | task->swap_nswap++; | |
384 | assert((task->swap_flags&TASK_SW_WANT_IN) == 0); | |
385 | /* put task on the queue of swapped out tasks */ | |
386 | task_swapper_lock(); | |
387 | #if TASK_SW_DEBUG | |
388 | if (task_swap_debug && on_swapped_list(task)) { | |
389 | printf("task 0x%X already on list\n", task); | |
390 | Debugger(""); | |
391 | } | |
392 | #endif /* TASK_SW_DEBUG */ | |
393 | queue_enter(&swapped_tasks, task, task_t, swapped_tasks); | |
394 | tasks_swapped_out++; | |
395 | task_swapouts++; | |
396 | task_swapper_unlock(); | |
397 | task_unlock(task); | |
398 | ||
399 | return(KERN_SUCCESS); | |
400 | } | |
401 | ||
402 | #ifdef TASK_SW_STATS | |
403 | int task_sw_race_in = 0; | |
404 | int task_sw_race_coming_in = 0; | |
405 | int task_sw_race_going_out = 0; | |
406 | int task_sw_before_ast = 0; | |
407 | int task_sw_before_swap = 0; | |
408 | int task_sw_after_swap = 0; | |
409 | int task_sw_race_in_won = 0; | |
410 | int task_sw_unswappable = 0; | |
411 | int task_sw_act_inactive = 0; | |
412 | #endif /* TASK_SW_STATS */ | |
413 | ||
414 | /* | |
415 | * thread_swapout_enqueue is called by thread_halt_self when it | |
416 | * processes AST_SWAPOUT to enqueue threads to be swapped out. | |
417 | * It must be called at normal interrupt priority for the | |
418 | * sake of the task_swapper_lock. | |
419 | * | |
420 | * There can be races with task swapin here. | |
421 | * First lock task and decrement swap_ast_waiting count, and if | |
422 | * it's 0, we can decrement the residence count on the task's map | |
423 | * and set the task's swap state to TASK_SW_OUT. | |
424 | */ | |
425 | void | |
426 | thread_swapout_enqueue(thread_act_t thr_act) | |
427 | { | |
428 | task_t task = thr_act->task; | |
429 | task_lock(task); | |
430 | /* | |
431 | * If the swap_state is not TASK_SW_GOING_OUT, then | |
432 | * task_swapin has beaten us to this operation, and | |
433 | * we have nothing to do. | |
434 | */ | |
435 | if (task->swap_state != TASK_SW_GOING_OUT) { | |
436 | task_unlock(task); | |
437 | return; | |
438 | } | |
439 | if (--task->swap_ast_waiting == 0) { | |
440 | vm_map_t map = task->map; | |
441 | task->swap_state = TASK_SW_OUT; | |
442 | task_unlock(task); | |
443 | mutex_lock(&map->s_lock); | |
444 | vm_map_res_deallocate(map); | |
445 | mutex_unlock(&map->s_lock); | |
446 | } else | |
447 | task_unlock(task); | |
448 | ||
449 | task_swapper_lock(); | |
450 | act_lock(thr_act); | |
451 | if (! (thr_act->swap_state & TH_SW_TASK_SWAPPING)) { | |
452 | /* | |
453 | * We lost a race with task_swapin(): don't enqueue. | |
454 | */ | |
455 | } else { | |
456 | queue_enter(&swapout_thread_q, thr_act, | |
457 | thread_act_t, swap_queue); | |
9bccf70c | 458 | task_swapper_wakeup(); |
1c79356b A |
459 | } |
460 | act_unlock(thr_act); | |
461 | task_swapper_unlock(); | |
462 | } | |
463 | ||
464 | /* | |
465 | * task_swap_swapout_thread: [exported] | |
466 | * | |
467 | * Executes as a separate kernel thread. | |
468 | * Its job is to swap out threads that have been halted by AST_SWAPOUT. | |
469 | */ | |
470 | void | |
471 | task_swap_swapout_thread(void) | |
472 | { | |
473 | thread_act_t thr_act; | |
474 | thread_t thread, nthread; | |
475 | task_t task; | |
476 | int s; | |
477 | ||
478 | thread_swappable(current_act(), FALSE); | |
479 | stack_privilege(current_thread()); | |
480 | ||
481 | spllo(); | |
482 | ||
9bccf70c | 483 | task_swapper_lock(); |
1c79356b | 484 | while (TRUE) { |
1c79356b A |
485 | while (! queue_empty(&swapout_thread_q)) { |
486 | ||
487 | queue_remove_first(&swapout_thread_q, thr_act, | |
488 | thread_act_t, swap_queue); | |
489 | /* | |
490 | * If we're racing with task_swapin, we need | |
491 | * to make it safe for it to do remque on the | |
492 | * thread, so make its links point to itself. | |
493 | * Allowing this ugliness is cheaper than | |
494 | * making task_swapin search the entire queue. | |
495 | */ | |
496 | act_lock(thr_act); | |
497 | queue_init((queue_t) &thr_act->swap_queue); | |
498 | act_unlock(thr_act); | |
499 | task_swapper_unlock(); | |
500 | /* | |
501 | * Wait for thread's RUN bit to be deasserted. | |
502 | */ | |
503 | thread = act_lock_thread(thr_act); | |
504 | if (thread == THREAD_NULL) | |
505 | act_unlock_thread(thr_act); | |
506 | else { | |
507 | boolean_t r; | |
508 | ||
509 | thread_reference(thread); | |
510 | thread_hold(thr_act); | |
511 | act_unlock_thread(thr_act); | |
512 | r = thread_stop_wait(thread); | |
513 | nthread = act_lock_thread(thr_act); | |
514 | thread_release(thr_act); | |
515 | thread_deallocate(thread); | |
516 | act_unlock_thread(thr_act); | |
517 | if (!r || nthread != thread) { | |
518 | task_swapper_lock(); | |
519 | continue; | |
520 | } | |
521 | } | |
522 | task = thr_act->task; | |
523 | task_lock(task); | |
524 | /* | |
525 | * we can race with swapin, which would set the | |
526 | * state to TASK_SW_IN. | |
527 | */ | |
528 | if ((task->swap_state != TASK_SW_OUT) && | |
529 | (task->swap_state != TASK_SW_GOING_OUT)) { | |
530 | task_unlock(task); | |
531 | task_swapper_lock(); | |
532 | TASK_STATS_INCR(task_sw_race_in_won); | |
533 | if (thread != THREAD_NULL) | |
534 | thread_unstop(thread); | |
535 | continue; | |
536 | } | |
537 | nthread = act_lock_thread(thr_act); | |
538 | if (nthread != thread || thr_act->active == FALSE) { | |
539 | act_unlock_thread(thr_act); | |
540 | task_unlock(task); | |
541 | task_swapper_lock(); | |
542 | TASK_STATS_INCR(task_sw_act_inactive); | |
543 | if (thread != THREAD_NULL) | |
544 | thread_unstop(thread); | |
545 | continue; | |
546 | } | |
547 | s = splsched(); | |
548 | if (thread != THREAD_NULL) | |
549 | thread_lock(thread); | |
550 | /* | |
551 | * Thread cannot have been swapped out yet because | |
552 | * TH_SW_TASK_SWAPPING was set in AST. If task_swapin | |
553 | * beat us here, we either wouldn't have found it on | |
554 | * the queue, or the task->swap_state would have | |
555 | * changed. The synchronization is on the | |
556 | * task's swap_state and the task_lock. | |
557 | * The thread can't be swapped in any other way | |
558 | * because its task has been swapped. | |
559 | */ | |
560 | assert(thr_act->swap_state & TH_SW_TASK_SWAPPING); | |
561 | assert(thread == THREAD_NULL || | |
562 | !(thread->state & (TH_SWAPPED_OUT|TH_RUN))); | |
563 | assert((thr_act->swap_state & TH_SW_STATE) == TH_SW_IN); | |
564 | /* assert(thread->state & TH_HALTED); */ | |
565 | /* this also clears TH_SW_TASK_SWAPPING flag */ | |
566 | thr_act->swap_state = TH_SW_GOING_OUT; | |
567 | if (thread != THREAD_NULL) { | |
568 | if (thread->top_act == thr_act) { | |
569 | thread->state |= TH_SWAPPED_OUT; | |
570 | /* | |
571 | * Once we unlock the task, things can happen | |
572 | * to the thread, so make sure it's consistent | |
573 | * for thread_swapout. | |
574 | */ | |
575 | } | |
576 | thread->ref_count++; | |
577 | thread_unlock(thread); | |
578 | thread_unstop(thread); | |
579 | } | |
580 | splx(s); | |
581 | act_locked_act_reference(thr_act); | |
582 | act_unlock_thread(thr_act); | |
583 | task_unlock(task); | |
584 | ||
585 | thread_swapout(thr_act); /* do the work */ | |
586 | ||
587 | if (thread != THREAD_NULL) | |
588 | thread_deallocate(thread); | |
589 | act_deallocate(thr_act); | |
590 | task_swapper_lock(); | |
591 | } | |
9bccf70c | 592 | task_swapper_sleep(); |
1c79356b A |
593 | } |
594 | } | |
595 | ||
596 | /* | |
597 | * task_swapin: | |
598 | * | |
599 | * Make a task resident. | |
600 | * Performs all of the work to make a task resident and possibly | |
601 | * non-swappable. If we race with a competing task_swapin call, | |
602 | * we wait for its completion, then return. | |
603 | * | |
604 | * Locking: no locks held upon entry and exit. | |
605 | * | |
606 | * Note that TASK_SW_MAKE_UNSWAPPABLE can only be set when the | |
607 | * state is TASK_SW_COMING_IN. | |
608 | */ | |
609 | ||
610 | kern_return_t | |
611 | task_swapin(task_t task, boolean_t make_unswappable) | |
612 | { | |
613 | register queue_head_t *list; | |
614 | register thread_act_t thr_act, next; | |
615 | thread_t thread; | |
616 | int s; | |
617 | boolean_t swappable = TRUE; | |
618 | ||
619 | task_lock(task); | |
620 | switch (task->swap_state) { | |
621 | case TASK_SW_OUT: | |
622 | { | |
623 | vm_map_t map = task->map; | |
624 | /* | |
625 | * Task has made it all the way out, which means | |
626 | * that vm_map_res_deallocate has been done; set | |
627 | * state to TASK_SW_COMING_IN, then bring map | |
628 | * back in. We could actually be racing with | |
629 | * the thread_swapout_enqueue, which does the | |
630 | * vm_map_res_deallocate, but that race is covered. | |
631 | */ | |
632 | task->swap_state = TASK_SW_COMING_IN; | |
633 | assert(task->swap_ast_waiting == 0); | |
634 | assert(map->res_count >= 0); | |
635 | task_unlock(task); | |
636 | mutex_lock(&map->s_lock); | |
637 | vm_map_res_reference(map); | |
638 | mutex_unlock(&map->s_lock); | |
639 | task_lock(task); | |
640 | assert(task->swap_state == TASK_SW_COMING_IN); | |
641 | } | |
642 | break; | |
643 | ||
644 | case TASK_SW_GOING_OUT: | |
645 | /* | |
646 | * Task isn't all the way out yet. There is | |
647 | * still at least one thread not swapped, and | |
648 | * vm_map_res_deallocate has not been done. | |
649 | */ | |
650 | task->swap_state = TASK_SW_COMING_IN; | |
651 | assert(task->swap_ast_waiting > 0 || | |
652 | (task->swap_ast_waiting == 0 && | |
653 | task->thr_act_count == 0)); | |
654 | assert(task->map->res_count > 0); | |
655 | TASK_STATS_INCR(task_sw_race_going_out); | |
656 | break; | |
657 | case TASK_SW_IN: | |
658 | assert(task->map->res_count > 0); | |
659 | #if TASK_SW_DEBUG | |
660 | task_swapper_lock(); | |
661 | if (task_swap_debug && on_swapped_list(task)) { | |
662 | printf("task 0x%X on list, state is SW_IN\n", | |
663 | task); | |
664 | Debugger(""); | |
665 | } | |
666 | task_swapper_unlock(); | |
667 | #endif /* TASK_SW_DEBUG */ | |
668 | TASK_STATS_INCR(task_sw_race_in); | |
669 | if (make_unswappable) { | |
670 | task->swap_state = TASK_SW_UNSWAPPABLE; | |
671 | task_unlock(task); | |
672 | task_swapout_ineligible(task); | |
673 | } else | |
674 | task_unlock(task); | |
675 | return(KERN_SUCCESS); | |
676 | case TASK_SW_COMING_IN: | |
677 | /* | |
678 | * Raced with another task_swapin and lost; | |
679 | * wait for other one to complete first | |
680 | */ | |
681 | assert(task->map->res_count >= 0); | |
682 | /* | |
683 | * set MAKE_UNSWAPPABLE so that whoever is swapping | |
684 | * the task in will make it unswappable, and return | |
685 | */ | |
686 | if (make_unswappable) | |
687 | task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE; | |
688 | task->swap_flags |= TASK_SW_WANT_IN; | |
689 | assert_wait((event_t)&task->swap_state, THREAD_UNINT); | |
690 | task_unlock(task); | |
9bccf70c | 691 | thread_block(THREAD_CONTINUE_NULL); |
1c79356b A |
692 | TASK_STATS_INCR(task_sw_race_coming_in); |
693 | return(KERN_SUCCESS); | |
694 | case TASK_SW_UNSWAPPABLE: | |
695 | /* | |
696 | * This can happen, since task_terminate | |
697 | * unconditionally calls task_swapin. | |
698 | */ | |
699 | task_unlock(task); | |
700 | return(KERN_SUCCESS); | |
701 | default: | |
702 | panic("task_swapin bad state"); | |
703 | break; | |
704 | } | |
705 | if (make_unswappable) | |
706 | task->swap_flags |= TASK_SW_MAKE_UNSWAPPABLE; | |
707 | assert(task->swap_state == TASK_SW_COMING_IN); | |
708 | task_swapper_lock(); | |
709 | #if TASK_SW_DEBUG | |
710 | if (task_swap_debug && !on_swapped_list(task)) { | |
711 | printf("task 0x%X not on list\n", task); | |
712 | Debugger(""); | |
713 | } | |
714 | #endif /* TASK_SW_DEBUG */ | |
715 | queue_remove(&swapped_tasks, task, task_t, swapped_tasks); | |
716 | tasks_swapped_out--; | |
717 | task_swapins++; | |
718 | task_swapper_unlock(); | |
719 | ||
720 | /* | |
721 | * Iterate through all threads for this task and | |
722 | * release them, as required. They may not have been swapped | |
723 | * out yet. The task remains locked throughout. | |
724 | */ | |
725 | list = &task->thr_acts; | |
726 | thr_act = (thread_act_t) queue_first(list); | |
727 | while (!queue_end(list, (queue_entry_t) thr_act)) { | |
728 | boolean_t need_to_release; | |
729 | next = (thread_act_t) queue_next(&thr_act->thr_acts); | |
730 | /* | |
731 | * Keep task_swapper_lock across thread handling | |
732 | * to synchronize with task_swap_swapout_thread | |
733 | */ | |
734 | task_swapper_lock(); | |
735 | thread = act_lock_thread(thr_act); | |
736 | s = splsched(); | |
737 | if (thr_act->ast & AST_SWAPOUT) { | |
738 | /* thread hasn't gotten the AST yet, just clear it */ | |
739 | thread_ast_clear(thr_act, AST_SWAPOUT); | |
740 | need_to_release = FALSE; | |
741 | TASK_STATS_INCR(task_sw_before_ast); | |
742 | splx(s); | |
743 | act_unlock_thread(thr_act); | |
744 | } else { | |
745 | /* | |
746 | * If AST_SWAPOUT was cleared, then thread_hold, | |
747 | * or equivalent was done. | |
748 | */ | |
749 | need_to_release = TRUE; | |
750 | /* | |
751 | * Thread has hit AST, but it may not have | |
752 | * been dequeued yet, so we need to check. | |
753 | * NOTE: the thread may have been dequeued, but | |
754 | * has not yet been swapped (the task_swapper_lock | |
755 | * has been dropped, but the thread is not yet | |
756 | * locked), and the TH_SW_TASK_SWAPPING flag may | |
757 | * not have been cleared. In this case, we will do | |
758 | * an extra remque, which the task_swap_swapout_thread | |
759 | * has made safe, and clear the flag, which is also | |
760 | * checked by the t_s_s_t before doing the swapout. | |
761 | */ | |
762 | if (thread) | |
763 | thread_lock(thread); | |
764 | if (thr_act->swap_state & TH_SW_TASK_SWAPPING) { | |
765 | /* | |
766 | * hasn't yet been dequeued for swapout, | |
767 | * so clear flags and dequeue it first. | |
768 | */ | |
769 | thr_act->swap_state &= ~TH_SW_TASK_SWAPPING; | |
770 | assert(thr_act->thread == THREAD_NULL || | |
771 | !(thr_act->thread->state & | |
772 | TH_SWAPPED_OUT)); | |
773 | queue_remove(&swapout_thread_q, thr_act, | |
774 | thread_act_t, swap_queue); | |
775 | TASK_STATS_INCR(task_sw_before_swap); | |
776 | } else { | |
777 | TASK_STATS_INCR(task_sw_after_swap); | |
778 | /* | |
779 | * It's possible that the thread was | |
780 | * made unswappable before hitting the | |
781 | * AST, in which case it's still running. | |
782 | */ | |
783 | if (thr_act->swap_state == TH_SW_UNSWAPPABLE) { | |
784 | need_to_release = FALSE; | |
785 | TASK_STATS_INCR(task_sw_unswappable); | |
786 | } | |
787 | } | |
788 | if (thread) | |
789 | thread_unlock(thread); | |
790 | splx(s); | |
791 | act_unlock_thread(thr_act); | |
792 | } | |
793 | task_swapper_unlock(); | |
794 | ||
795 | /* | |
796 | * thread_release will swap in the thread if it's been | |
797 | * swapped out. | |
798 | */ | |
799 | if (need_to_release) { | |
800 | act_lock_thread(thr_act); | |
801 | thread_release(thr_act); | |
802 | act_unlock_thread(thr_act); | |
803 | } | |
804 | thr_act = next; | |
805 | } | |
806 | ||
807 | if (task->swap_flags & TASK_SW_MAKE_UNSWAPPABLE) { | |
808 | task->swap_flags &= ~TASK_SW_MAKE_UNSWAPPABLE; | |
809 | task->swap_state = TASK_SW_UNSWAPPABLE; | |
810 | swappable = FALSE; | |
811 | } else { | |
812 | task->swap_state = TASK_SW_IN; | |
813 | } | |
814 | ||
815 | task_swaprss_in += pmap_resident_count(task->map->pmap); | |
816 | task_swap_total_time += sched_tick - task->swap_stamp; | |
817 | /* note when task came back in */ | |
818 | task->swap_stamp = sched_tick; | |
819 | if (task->swap_flags & TASK_SW_WANT_IN) { | |
820 | task->swap_flags &= ~TASK_SW_WANT_IN; | |
821 | thread_wakeup((event_t)&task->swap_state); | |
822 | } | |
823 | assert((task->swap_flags & TASK_SW_ELIGIBLE) == 0); | |
824 | task_unlock(task); | |
825 | #if TASK_SW_DEBUG | |
826 | task_swapper_lock(); | |
827 | if (task_swap_debug && on_swapped_list(task)) { | |
828 | printf("task 0x%X on list at end of swap in\n", task); | |
829 | Debugger(""); | |
830 | } | |
831 | task_swapper_unlock(); | |
832 | #endif /* TASK_SW_DEBUG */ | |
833 | /* | |
834 | * Make the task eligible to be swapped again | |
835 | */ | |
836 | if (swappable) | |
837 | task_swapout_eligible(task); | |
838 | return(KERN_SUCCESS); | |
839 | } | |
840 | ||
841 | void wake_task_swapper(boolean_t now); /* forward */ | |
842 | ||
843 | /* | |
844 | * wake_task_swapper: [exported] | |
845 | * | |
846 | * Wakes up task swapper if now == TRUE or if at least | |
847 | * task_swap_cycle_time has elapsed since the last call. | |
848 | * | |
849 | * NOTE: this function is not multithreaded, so if there is | |
850 | * more than one caller, it must be modified. | |
851 | */ | |
852 | void | |
853 | wake_task_swapper(boolean_t now) | |
854 | { | |
855 | /* last_task_swap_cycle may require locking */ | |
856 | if (now || | |
857 | (sched_tick > (last_task_swap_cycle + task_swap_cycle_time))) { | |
858 | last_task_swap_cycle = sched_tick; | |
859 | if (task_swap_debug) | |
860 | printf("wake_task_swapper: waking swapper\n"); | |
861 | thread_wakeup((event_t)&swapped_tasks); /* poke swapper */ | |
862 | } | |
863 | } | |
864 | ||
865 | task_t pick_intask(void); /* forward */ | |
866 | /* | |
867 | * pick_intask: | |
868 | * returns a task to be swapped in, or TASK_NULL if nothing suitable is found. | |
869 | * | |
870 | * current algorithm: Return the task that has been swapped out the | |
871 | * longest, as long as it is > min_swap_time. It will be dequeued | |
872 | * if actually swapped in. | |
873 | * | |
874 | * NOTE:********************************************** | |
875 | * task->swap_rss (the size when the task was swapped out) could be used to | |
876 | * further refine the selection. Another possibility would be to look at | |
877 | * the state of the thread(s) to see if the task/threads would run if they | |
878 | * were swapped in. | |
879 | * *************************************************** | |
880 | * | |
881 | * Locking: no locks held upon entry and exit. | |
882 | */ | |
883 | task_t | |
884 | pick_intask(void) | |
885 | { | |
886 | register task_t task = TASK_NULL; | |
887 | ||
888 | task_swapper_lock(); | |
889 | /* the oldest task is the first one */ | |
890 | if (!queue_empty(&swapped_tasks)) { | |
891 | task = (task_t) queue_first(&swapped_tasks); | |
892 | assert(task != TASK_NULL); | |
893 | /* Make sure it's been out min_swap_time */ | |
894 | if ((sched_tick - task->swap_stamp) < min_swap_time) | |
895 | task = TASK_NULL; | |
896 | } | |
897 | task_swapper_unlock(); | |
898 | return(task); | |
899 | #if 0 | |
900 | /* | |
901 | * This code looks at the entire list of swapped tasks, but since | |
902 | * it does not yet do anything but look at time swapped, we | |
903 | * can simply use the fact that the queue is ordered, and take | |
904 | * the first one off the queue. | |
905 | */ | |
906 | task = (task_t)queue_first(&swapped_tasks); | |
907 | while (!queue_end(&swapped_tasks, (queue_entry_t)task)) { | |
908 | task_lock(task); | |
909 | tmp_time = sched_tick - task->swap_stamp; | |
910 | if (tmp_time > min_swap_time && tmp_time > time_swapped) { | |
911 | target_task = task; | |
912 | time_swapped = tmp_time; | |
913 | } | |
914 | task_unlock(task); | |
915 | task = (task_t)queue_next(&task->swapped_tasks); | |
916 | } | |
917 | task_swapper_unlock(); | |
918 | return(target_task); | |
919 | #endif | |
920 | } | |
921 | ||
922 | task_t pick_outtask(void); /* forward */ | |
923 | /* | |
924 | * pick_outtask: | |
925 | * returns a task to be swapped out, with a reference on the task, | |
926 | * or NULL if no suitable task is found. | |
927 | * | |
928 | * current algorithm: | |
929 | * | |
930 | * Examine all eligible tasks. While looking, use the first thread in | |
931 | * each task as an indication of the task's activity. Count up | |
932 | * "active" threads (those either runnable or sleeping). If the task | |
933 | * is active (by these criteria), swapped in, and resident | |
934 | * for at least min_res_time, then select the task with the largest | |
935 | * number of pages in memory. If there are less | |
936 | * than min_active_tasks active tasks in the system, then don't | |
937 | * swap anything out (this avoids swapping out the only running task | |
938 | * in the system, for example). | |
939 | * | |
940 | * NOTE: the task selected will not be removed from the eligible list. | |
941 | * This means that it will be selected again if it is not swapped | |
942 | * out, where it is removed from the list. | |
943 | * | |
944 | * Locking: no locks held upon entry and exit. Task_swapout_lock must be | |
945 | * taken before task locks. | |
946 | * | |
947 | * *************************************************** | |
948 | * TBD: | |
949 | * This algorithm only examines the first thread in the task. Currently, since | |
950 | * most swappable tasks in the system are single-threaded, this generalization | |
951 | * works reasonably well. However, the algorithm should be changed | |
952 | * to consider all threads in the task if more multi-threaded tasks were used. | |
953 | * *************************************************** | |
954 | */ | |
955 | ||
956 | #ifdef TASK_SW_STATS | |
957 | int inactive_task_count = 0; | |
958 | int empty_task_count = 0; | |
959 | #endif /* TASK_SW_STATS */ | |
960 | ||
961 | task_t | |
962 | pick_outtask(void) | |
963 | { | |
964 | register task_t task; | |
965 | register task_t target_task = TASK_NULL; | |
966 | unsigned long task_rss; | |
967 | unsigned long target_rss = 0; | |
968 | boolean_t wired; | |
969 | boolean_t active; | |
970 | int nactive = 0; | |
971 | ||
972 | task_swapout_lock(); | |
973 | if (queue_empty(&eligible_tasks)) { | |
974 | /* not likely to happen */ | |
975 | task_swapout_unlock(); | |
976 | return(TASK_NULL); | |
977 | } | |
978 | task = (task_t)queue_first(&eligible_tasks); | |
979 | while (!queue_end(&eligible_tasks, (queue_entry_t)task)) { | |
980 | int s; | |
981 | register thread_act_t thr_act; | |
982 | thread_t th; | |
983 | ||
984 | ||
985 | task_lock(task); | |
986 | /* | |
987 | * Don't swap real-time tasks. | |
988 | * XXX Should we enforce that or can we let really critical | |
989 | * tasks use task_swappable() to make sure they never end up | |
990 | * n the eligible list ? | |
991 | */ | |
992 | if (task->policy & POLICYCLASS_FIXEDPRI) { | |
993 | goto tryagain; | |
994 | } | |
995 | if (!task->active) { | |
996 | TASK_STATS_INCR(inactive_task_count); | |
997 | goto tryagain; | |
998 | } | |
999 | if (task->res_act_count == 0) { | |
1000 | TASK_STATS_INCR(empty_task_count); | |
1001 | goto tryagain; | |
1002 | } | |
1003 | assert(!queue_empty(&task->thr_acts)); | |
1004 | thr_act = (thread_act_t)queue_first(&task->thr_acts); | |
1005 | active = FALSE; | |
1006 | th = act_lock_thread(thr_act); | |
1007 | s = splsched(); | |
1008 | if (th != THREAD_NULL) | |
1009 | thread_lock(th); | |
1010 | if ((th == THREAD_NULL) || | |
1011 | (th->state == TH_RUN) || | |
1012 | (th->state & TH_WAIT)) { | |
1013 | /* | |
1014 | * thread is "active": either runnable | |
1015 | * or sleeping. Count it and examine | |
1016 | * it further below. | |
1017 | */ | |
1018 | nactive++; | |
1019 | active = TRUE; | |
1020 | } | |
1021 | if (th != THREAD_NULL) | |
1022 | thread_unlock(th); | |
1023 | splx(s); | |
1024 | act_unlock_thread(thr_act); | |
1025 | if (active && | |
1026 | (task->swap_state == TASK_SW_IN) && | |
1027 | ((sched_tick - task->swap_stamp) > min_res_time)) { | |
1028 | long rescount = pmap_resident_count(task->map->pmap); | |
1029 | /* | |
1030 | * thread must be "active", task must be swapped | |
1031 | * in and resident for at least min_res_time | |
1032 | */ | |
1033 | #if 0 | |
1034 | /* DEBUG Test round-robin strategy. Picking biggest task could cause extreme | |
1035 | * unfairness to such large interactive programs as xterm. Instead, pick the | |
1036 | * first task that has any pages resident: | |
1037 | */ | |
1038 | if (rescount > 1) { | |
1039 | task->ref_count++; | |
1040 | target_task = task; | |
1041 | task_unlock(task); | |
1042 | task_swapout_unlock(); | |
1043 | return(target_task); | |
1044 | } | |
1045 | #else | |
1046 | if (rescount > target_rss) { | |
1047 | /* | |
1048 | * task is not swapped, and it has the | |
1049 | * largest rss seen so far. | |
1050 | */ | |
1051 | task->ref_count++; | |
1052 | target_rss = rescount; | |
1053 | assert(target_task != task); | |
1054 | if (target_task != TASK_NULL) | |
1055 | task_deallocate(target_task); | |
1056 | target_task = task; | |
1057 | } | |
1058 | #endif | |
1059 | } | |
1060 | tryagain: | |
1061 | task_unlock(task); | |
1062 | task = (task_t)queue_next(&task->swapped_tasks); | |
1063 | } | |
1064 | task_swapout_unlock(); | |
1065 | /* only swap out if there are at least min_active_tasks */ | |
1066 | if (nactive < min_active_tasks) { | |
1067 | if (target_task != TASK_NULL) { | |
1068 | task_deallocate(target_task); | |
1069 | target_task = TASK_NULL; | |
1070 | } | |
1071 | } | |
1072 | return(target_task); | |
1073 | } | |
1074 | ||
1075 | #if TASK_SW_DEBUG | |
1076 | void print_pid(task_t task, unsigned long n1, unsigned long n2, | |
1077 | const char *comp, const char *inout); /* forward */ | |
1078 | void | |
1079 | print_pid( | |
1080 | task_t task, | |
1081 | unsigned long n1, | |
1082 | unsigned long n2, | |
1083 | const char *comp, | |
1084 | const char *inout) | |
1085 | { | |
1086 | long rescount; | |
1087 | task_lock(task); | |
1088 | rescount = pmap_resident_count(task->map->pmap); | |
1089 | task_unlock(task); | |
1090 | printf("task_swapper: swapped %s task %x; %d %s %d; res=%d\n", | |
1091 | inout, task, n1, comp, n2, rescount); | |
1092 | } | |
1093 | #endif | |
1094 | ||
1095 | /* | |
1096 | * task_swapper: [exported] | |
1097 | * | |
1098 | * Executes as a separate kernel thread. | |
1099 | */ | |
1100 | #define MAX_LOOP 3 | |
1101 | void | |
1102 | task_swapper(void) | |
1103 | { | |
1104 | task_t outtask, intask; | |
1105 | int timeout; | |
1106 | int loopcnt = 0; | |
1107 | boolean_t start_swapping; | |
1108 | boolean_t stop_swapping; | |
1109 | int local_page_free_avg; | |
1110 | extern int hz; | |
1111 | ||
1112 | thread_swappable(current_act(), FALSE); | |
1113 | stack_privilege(current_thread()); | |
1114 | ||
1115 | spllo(); | |
1116 | ||
1117 | for (;;) { | |
1118 | local_page_free_avg = vm_page_free_avg; | |
1119 | while (TRUE) { | |
1120 | #if 0 | |
1121 | if (task_swap_debug) | |
1122 | printf("task_swapper: top of loop; cnt = %d\n",loopcnt); | |
1123 | #endif | |
1124 | intask = pick_intask(); | |
1125 | ||
1126 | start_swapping = ((vm_pageout_rate_avg > swap_start_pageout_rate) || | |
1127 | (vm_grab_rate_avg > max_grab_rate)); | |
1128 | stop_swapping = (vm_pageout_rate_avg < swap_stop_pageout_rate); | |
1129 | ||
1130 | /* | |
1131 | * If a lot of paging is going on, or another task should come | |
1132 | * in but memory is tight, find something to swap out and start | |
1133 | * it. Don't swap any task out if task swapping is disabled. | |
1134 | * vm_page_queue_free_lock protects the vm globals. | |
1135 | */ | |
1136 | outtask = TASK_NULL; | |
1137 | if (start_swapping || | |
1138 | (!stop_swapping && intask && | |
1139 | ((local_page_free_avg / AVE_SCALE) < vm_page_free_target)) | |
1140 | ) { | |
1141 | if (task_swap_enable && | |
1142 | (outtask = pick_outtask()) && | |
1143 | (task_swapout(outtask) == KERN_SUCCESS)) { | |
1144 | unsigned long rss; | |
1145 | #if TASK_SW_DEBUG | |
1146 | if (task_swap_debug) | |
1147 | print_pid(outtask, local_page_free_avg / AVE_SCALE, | |
1148 | vm_page_free_target, "<", | |
1149 | "out"); | |
1150 | #endif | |
1151 | rss = outtask->swap_rss; | |
1152 | if (outtask->swap_nswap == 1) | |
1153 | rss /= 2; /* divide by 2 if never out */ | |
1154 | local_page_free_avg += (rss/short_avg_interval) * AVE_SCALE; | |
1155 | } | |
1156 | if (outtask != TASK_NULL) | |
1157 | task_deallocate(outtask); | |
1158 | } | |
1159 | ||
1160 | /* | |
1161 | * If there is an eligible task to bring in and there are at | |
1162 | * least vm_page_free_target free pages, swap it in. If task | |
1163 | * swapping has been disabled, bring the task in anyway. | |
1164 | */ | |
1165 | if (intask && ((local_page_free_avg / AVE_SCALE) >= | |
1166 | vm_page_free_target || | |
1167 | stop_swapping || !task_swap_enable)) { | |
1168 | if (task_swapin(intask, FALSE) == KERN_SUCCESS) { | |
1169 | unsigned long rss; | |
1170 | #if TASK_SW_DEBUG | |
1171 | if (task_swap_debug) | |
1172 | print_pid(intask, local_page_free_avg / AVE_SCALE, | |
1173 | vm_page_free_target, ">=", | |
1174 | "in"); | |
1175 | #endif | |
1176 | rss = intask->swap_rss; | |
1177 | if (intask->swap_nswap == 1) | |
1178 | rss /= 2; /* divide by 2 if never out */ | |
1179 | local_page_free_avg -= (rss/short_avg_interval) * AVE_SCALE; | |
1180 | } | |
1181 | } | |
1182 | /* | |
1183 | * XXX | |
1184 | * Here we have to decide whether to continue swapping | |
1185 | * in and/or out before sleeping. The decision should | |
1186 | * be made based on the previous action (swapin/out) and | |
1187 | * current system parameters, such as paging rates and | |
1188 | * demand. | |
1189 | * The function, compute_vm_averages, which does these | |
1190 | * calculations, depends on being called every second, | |
1191 | * so we can't just do the same thing. | |
1192 | */ | |
1193 | if (++loopcnt < MAX_LOOP) | |
1194 | continue; | |
1195 | ||
1196 | /* | |
1197 | * Arrange to be awakened if paging is still heavy or there are | |
1198 | * any tasks partially or completely swapped out. (Otherwise, | |
1199 | * the wakeup will come from the external trigger(s).) | |
1200 | */ | |
1201 | timeout = 0; | |
1202 | if (start_swapping) | |
1203 | timeout = task_swap_cycle_time; | |
1204 | else { | |
1205 | task_swapper_lock(); | |
1206 | if (!queue_empty(&swapped_tasks)) | |
1207 | timeout = min_swap_time; | |
1208 | task_swapper_unlock(); | |
1209 | } | |
1210 | assert_wait((event_t)&swapped_tasks, THREAD_UNINT); | |
1211 | if (timeout) { | |
1212 | if (task_swap_debug) | |
1213 | printf("task_swapper: set timeout of %d\n", | |
1214 | timeout); | |
1215 | thread_set_timeout(timeout, NSEC_PER_SEC); | |
1216 | } | |
1217 | if (task_swap_debug) | |
1218 | printf("task_swapper: blocking\n"); | |
9bccf70c | 1219 | thread_block(THREAD_CONTINUE_NULL); |
1c79356b A |
1220 | if (timeout) { |
1221 | thread_cancel_timeout(current_thread()); | |
1222 | } | |
1223 | /* reset locals */ | |
1224 | loopcnt = 0; | |
1225 | local_page_free_avg = vm_page_free_avg; | |
1226 | } | |
1227 | } | |
1228 | } | |
1229 | ||
1230 | /* from BSD */ | |
1231 | #define ave(smooth, cnt, time) \ | |
1232 | smooth = ((time - 1) * (smooth) + ((cnt) * AVE_SCALE)) / (time) | |
1233 | ||
1234 | /* | |
1235 | * We estimate the system paging load in more than one metric: | |
1236 | * 1) the total number of calls into the function, vm_page_grab, | |
1237 | * which allocates all page frames for real pages. | |
1238 | * 2) the total number of pages paged in and out of paging files. | |
1239 | * This is a measure of page cleaning and faulting from backing | |
1240 | * store. | |
1241 | * | |
1242 | * When either metric passes a threshold, tasks are swapped out. | |
1243 | */ | |
1244 | long last_grab_count = 0; | |
1245 | long last_pageout_count = 0; | |
1246 | ||
1247 | /* | |
1248 | * compute_vm_averages: [exported] | |
1249 | * | |
1250 | * This function is to be called once a second to calculate average paging | |
1251 | * demand and average numbers of free pages for use by the task swapper. | |
1252 | * Can also be used to wake up task swapper at desired thresholds. | |
1253 | * | |
1254 | * NOTE: this function is single-threaded, and requires locking if | |
1255 | * ever there are multiple callers. | |
1256 | */ | |
1257 | void | |
1258 | compute_vm_averages(void) | |
1259 | { | |
1260 | extern unsigned long vm_page_grab_count; | |
1261 | long grab_count, pageout_count; | |
1262 | int i; | |
1263 | ||
1264 | ave(vm_page_free_avg, vm_page_free_count, short_avg_interval); | |
1265 | ave(vm_page_free_longavg, vm_page_free_count, long_avg_interval); | |
1266 | ||
1267 | /* | |
1268 | * NOTE: the vm_page_grab_count and vm_stat structure are | |
1269 | * under control of vm_page_queue_free_lock. We're simply reading | |
1270 | * memory here, and the numbers don't depend on each other, so | |
1271 | * no lock is taken. | |
1272 | */ | |
1273 | ||
1274 | grab_count = vm_page_grab_count; | |
1275 | pageout_count = 0; | |
1276 | for (i = 0; i < NCPUS; i++) { | |
1277 | pageout_count += vm_stat[i].pageouts; | |
1278 | } | |
1279 | ||
1280 | ave(vm_pageout_rate_avg, pageout_count - last_pageout_count, | |
1281 | short_avg_interval); | |
1282 | ave(vm_pageout_rate_longavg, pageout_count - last_pageout_count, | |
1283 | long_avg_interval); | |
1284 | ave(vm_grab_rate_avg, grab_count - last_grab_count, | |
1285 | short_avg_interval); | |
1286 | last_grab_count = grab_count; | |
1287 | last_pageout_count = pageout_count; | |
1288 | ||
1289 | /* | |
1290 | * Adjust swap_{start,stop}_pageout_rate to the paging rate peak. | |
1291 | * This is an attempt to find the optimum paging rates at which | |
1292 | * to trigger task swapping on or off to regulate paging activity, | |
1293 | * depending on the hardware capacity. | |
1294 | */ | |
1295 | if (vm_pageout_rate_avg > vm_pageout_rate_peakavg) { | |
1296 | unsigned int desired_max; | |
1297 | ||
1298 | vm_pageout_rate_peakavg = vm_pageout_rate_avg; | |
1299 | swap_start_pageout_rate = | |
1300 | vm_pageout_rate_peakavg * swap_pageout_high_water_mark / 100; | |
1301 | swap_stop_pageout_rate = | |
1302 | vm_pageout_rate_peakavg * swap_pageout_low_water_mark / 100; | |
1303 | } | |
1304 | ||
1305 | #if TASK_SW_DEBUG | |
1306 | /* | |
1307 | * For measurements, allow fixed values. | |
1308 | */ | |
1309 | if (fixed_swap_start_pageout_rate) | |
1310 | swap_start_pageout_rate = fixed_swap_start_pageout_rate; | |
1311 | if (fixed_swap_stop_pageout_rate) | |
1312 | swap_stop_pageout_rate = fixed_swap_stop_pageout_rate; | |
1313 | #endif /* TASK_SW_DEBUG */ | |
1314 | ||
1315 | #if TASK_SW_DEBUG | |
1316 | if (task_swap_stats) | |
1317 | printf("vm_avgs: pageout_rate: %d %d (on/off: %d/%d); page_free: %d %d (tgt: %d)\n", | |
1318 | vm_pageout_rate_avg / AVE_SCALE, | |
1319 | vm_pageout_rate_longavg / AVE_SCALE, | |
1320 | swap_start_pageout_rate / AVE_SCALE, | |
1321 | swap_stop_pageout_rate / AVE_SCALE, | |
1322 | vm_page_free_avg / AVE_SCALE, | |
1323 | vm_page_free_longavg / AVE_SCALE, | |
1324 | vm_page_free_target); | |
1325 | #endif /* TASK_SW_DEBUG */ | |
1326 | ||
1327 | if (vm_page_free_avg / AVE_SCALE <= vm_page_free_target) { | |
1328 | if (task_swap_on) { | |
1329 | /* The following is a delicate attempt to balance the | |
1330 | * need for reasonably rapid response to system | |
1331 | * thrashing, with the equally important desire to | |
1332 | * prevent the onset of swapping simply because of a | |
1333 | * short burst of paging activity. | |
1334 | */ | |
1335 | if ((vm_pageout_rate_longavg > swap_stop_pageout_rate) && | |
1336 | (vm_pageout_rate_avg > swap_start_pageout_rate) || | |
1337 | (vm_pageout_rate_avg > vm_pageout_rate_peakavg) || | |
1338 | (vm_grab_rate_avg > max_grab_rate)) | |
1339 | wake_task_swapper(FALSE); | |
1340 | } | |
1341 | } else /* page demand is low; should consider swapin */ { | |
1342 | if (tasks_swapped_out != 0) | |
1343 | wake_task_swapper(TRUE); | |
1344 | } | |
1345 | } | |
1346 | ||
1347 | void | |
1348 | task_swapout_eligible(task_t task) | |
1349 | { | |
1350 | #if TASK_SW_DEBUG | |
1351 | task_swapper_lock(); | |
1352 | if (task_swap_debug && on_swapped_list(task)) { | |
1353 | printf("swapout_eligible: task 0x%X on swapped list\n", task); | |
1354 | Debugger(""); | |
1355 | } | |
1356 | task_swapper_unlock(); | |
1357 | #endif | |
1358 | task_swapout_lock(); | |
1359 | task_lock(task); | |
1360 | #if TASK_SW_DEBUG | |
1361 | if (task->swap_flags & TASK_SW_ELIGIBLE) { | |
1362 | printf("swapout_eligible: task 0x%X already eligible\n", task); | |
1363 | } | |
1364 | #endif /* TASK_SW_DEBUG */ | |
1365 | if ((task->swap_state == TASK_SW_IN) && | |
1366 | ((task->swap_flags & TASK_SW_ELIGIBLE) == 0)) { | |
1367 | queue_enter(&eligible_tasks,task,task_t,swapped_tasks); | |
1368 | task->swap_flags |= TASK_SW_ELIGIBLE; | |
1369 | } | |
1370 | task_unlock(task); | |
1371 | task_swapout_unlock(); | |
1372 | } | |
1373 | ||
1374 | void | |
1375 | task_swapout_ineligible(task_t task) | |
1376 | { | |
1377 | #if TASK_SW_DEBUG | |
1378 | task_swapper_lock(); | |
1379 | if (task_swap_debug && on_swapped_list(task)) { | |
1380 | printf("swapout_ineligible: task 0x%X on swapped list\n", task); | |
1381 | Debugger(""); | |
1382 | } | |
1383 | task_swapper_unlock(); | |
1384 | #endif | |
1385 | task_swapout_lock(); | |
1386 | task_lock(task); | |
1387 | #if TASK_SW_DEBUG | |
1388 | if (!(task->swap_flags & TASK_SW_ELIGIBLE)) | |
1389 | printf("swapout_ineligible: task 0x%X already inel.\n", task); | |
1390 | #endif /* TASK_SW_DEBUG */ | |
1391 | if ((task->swap_state != TASK_SW_IN) && | |
1392 | (task->swap_flags & TASK_SW_ELIGIBLE)) { | |
1393 | queue_remove(&eligible_tasks, task, task_t, swapped_tasks); | |
1394 | task->swap_flags &= ~TASK_SW_ELIGIBLE; | |
1395 | } | |
1396 | task_unlock(task); | |
1397 | task_swapout_unlock(); | |
1398 | } | |
1399 | ||
1400 | int task_swap_ast_aborted = 0; | |
1401 | ||
1402 | /* | |
1403 | * Process an AST_SWAPOUT. | |
1404 | */ | |
1405 | void | |
1406 | swapout_ast() | |
1407 | { | |
1408 | spl_t s; | |
1409 | thread_act_t act; | |
1410 | thread_t thread; | |
1411 | ||
1412 | act = current_act(); | |
1413 | ||
1414 | /* | |
1415 | * Task is being swapped out. First mark it as suspended | |
1416 | * and halted, then call thread_swapout_enqueue to put | |
1417 | * the thread on the queue for task_swap_swapout_threads | |
1418 | * to swap out the thread. | |
1419 | */ | |
1420 | /* | |
1421 | * Don't swap unswappable threads | |
1422 | */ | |
1423 | thread = act_lock_thread(act); | |
1424 | s = splsched(); | |
1425 | if (thread) | |
1426 | thread_lock(thread); | |
1427 | if ((act->ast & AST_SWAPOUT) == 0) { | |
1428 | /* | |
1429 | * Race with task_swapin. Abort swapout. | |
1430 | */ | |
1431 | task_swap_ast_aborted++; /* not locked XXX */ | |
1432 | if (thread) | |
1433 | thread_unlock(thread); | |
1434 | splx(s); | |
1435 | act_unlock_thread(act); | |
1436 | } else if (act->swap_state == TH_SW_IN) { | |
1437 | /* | |
1438 | * Mark swap_state as TH_SW_TASK_SWAPPING to avoid | |
1439 | * race with thread swapper, which will only | |
1440 | * swap thread if swap_state is TH_SW_IN. | |
1441 | * This way, the thread can only be swapped by | |
1442 | * the task swapping mechanism. | |
1443 | */ | |
1444 | act->swap_state |= TH_SW_TASK_SWAPPING; | |
1445 | /* assert(act->suspend_count == 0); XXX ? */ | |
1446 | if (thread) | |
1447 | thread_unlock(thread); | |
1448 | if (act->suspend_count++ == 0) /* inline thread_hold */ | |
1449 | install_special_handler(act); | |
1450 | /* self->state |= TH_HALTED; */ | |
1451 | thread_ast_clear(act, AST_SWAPOUT); | |
1452 | /* | |
1453 | * Initialize the swap_queue fields to allow an extra | |
1454 | * queue_remove() in task_swapin if we lose the race | |
1455 | * (task_swapin can be called before we complete | |
1456 | * thread_swapout_enqueue). | |
1457 | */ | |
1458 | queue_init((queue_t) &act->swap_queue); | |
1459 | splx(s); | |
1460 | act_unlock_thread(act); | |
1461 | /* this must be called at normal interrupt level */ | |
1462 | thread_swapout_enqueue(act); | |
1463 | } else { | |
1464 | /* thread isn't swappable; continue running */ | |
1465 | assert(act->swap_state == TH_SW_UNSWAPPABLE); | |
1466 | if (thread) | |
1467 | thread_unlock(thread); | |
1468 | thread_ast_clear(act, AST_SWAPOUT); | |
1469 | splx(s); | |
1470 | act_unlock_thread(act); | |
1471 | } | |
1472 | } | |
1473 | ||
1474 | #endif /* TASK_SWAPPER */ |