2 * Copyright (c) 2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach/mach_types.h>
30 #include <mach/machine.h>
31 #include <mach/policy.h>
32 #include <mach/sync_policy.h>
33 #include <mach/thread_act.h>
35 #include <machine/machine_routines.h>
36 #include <machine/sched_param.h>
37 #include <machine/machine_cpu.h>
39 #include <kern/kern_types.h>
40 #include <kern/clock.h>
41 #include <kern/counters.h>
42 #include <kern/cpu_number.h>
43 #include <kern/cpu_data.h>
44 #include <kern/debug.h>
45 #include <kern/lock.h>
46 #include <kern/macro_help.h>
47 #include <kern/machine.h>
48 #include <kern/misc_protos.h>
49 #include <kern/processor.h>
50 #include <kern/queue.h>
51 #include <kern/sched.h>
52 #include <kern/sched_prim.h>
53 #include <kern/syscall_subr.h>
54 #include <kern/task.h>
55 #include <kern/thread.h>
56 #include <kern/wait_queue.h>
59 #include <vm/vm_kern.h>
60 #include <vm/vm_map.h>
64 #include <sys/kdebug.h>
66 #if defined(CONFIG_SCHED_GRRR_CORE)
69 grrr_priority_mapping_init(void);
87 grrr_sorted_list_insert_group(grrr_run_queue_t rq
,
91 grrr_rescale_work(grrr_run_queue_t rq
);
94 grrr_runqueue_init(grrr_run_queue_t runq
);
96 /* Map Mach priorities to ones suitable for proportional sharing */
97 static grrr_proportional_priority_t grrr_priority_mapping
[NRQS
];
99 /* Map each proportional priority to its group */
100 static grrr_group_index_t grrr_group_mapping
[NUM_GRRR_PROPORTIONAL_PRIORITIES
];
102 uint32_t grrr_rescale_tick
;
104 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
106 #if defined(CONFIG_SCHED_GRRR)
109 sched_grrr_init(void);
112 sched_grrr_timebase_init(void);
115 sched_grrr_processor_init(processor_t processor
);
118 sched_grrr_pset_init(processor_set_t pset
);
121 sched_grrr_maintenance_continuation(void);
124 sched_grrr_choose_thread(processor_t processor
,
128 sched_grrr_steal_thread(processor_set_t pset
);
131 sched_grrr_compute_priority(thread_t thread
,
132 boolean_t override_depress
);
135 sched_grrr_choose_processor( processor_set_t pset
,
136 processor_t processor
,
140 sched_grrr_processor_enqueue(
141 processor_t processor
,
146 sched_grrr_processor_queue_shutdown(
147 processor_t processor
);
150 sched_grrr_processor_queue_remove(
151 processor_t processor
,
155 sched_grrr_processor_queue_empty(processor_t processor
);
158 sched_grrr_processor_queue_has_priority(processor_t processor
,
163 sched_grrr_priority_is_urgent(int priority
);
166 sched_grrr_processor_csw_check(processor_t processor
);
169 sched_grrr_initial_quantum_size(thread_t thread
);
172 sched_grrr_initial_thread_sched_mode(task_t parent_task
);
175 sched_grrr_supports_timeshare_mode(void);
178 sched_grrr_can_update_priority(thread_t thread
);
181 sched_grrr_update_priority(thread_t thread
);
184 sched_grrr_lightweight_update_priority(thread_t thread
);
187 sched_grrr_quantum_expire(thread_t thread
);
190 sched_grrr_should_current_thread_rechoose_processor(processor_t processor
);
193 sched_grrr_processor_runq_count(processor_t processor
);
196 sched_grrr_processor_runq_stats_count_sum(processor_t processor
);
198 const struct sched_dispatch_table sched_grrr_dispatch
= {
200 sched_grrr_timebase_init
,
201 sched_grrr_processor_init
,
202 sched_grrr_pset_init
,
203 sched_grrr_maintenance_continuation
,
204 sched_grrr_choose_thread
,
205 sched_grrr_steal_thread
,
206 sched_grrr_compute_priority
,
207 sched_grrr_choose_processor
,
208 sched_grrr_processor_enqueue
,
209 sched_grrr_processor_queue_shutdown
,
210 sched_grrr_processor_queue_remove
,
211 sched_grrr_processor_queue_empty
,
212 sched_grrr_priority_is_urgent
,
213 sched_grrr_processor_csw_check
,
214 sched_grrr_processor_queue_has_priority
,
215 sched_grrr_initial_quantum_size
,
216 sched_grrr_initial_thread_sched_mode
,
217 sched_grrr_supports_timeshare_mode
,
218 sched_grrr_can_update_priority
,
219 sched_grrr_update_priority
,
220 sched_grrr_lightweight_update_priority
,
221 sched_grrr_quantum_expire
,
222 sched_grrr_should_current_thread_rechoose_processor
,
223 sched_grrr_processor_runq_count
,
224 sched_grrr_processor_runq_stats_count_sum
,
225 sched_grrr_fairshare_init
,
226 sched_grrr_fairshare_runq_count
,
227 sched_grrr_fairshare_runq_stats_count_sum
,
228 sched_grrr_fairshare_enqueue
,
229 sched_grrr_fairshare_dequeue
,
230 sched_grrr_fairshare_queue_remove
,
231 TRUE
/* direct_dispatch_to_idle_processors */
234 extern int default_preemption_rate
;
235 extern int max_unsafe_quanta
;
237 static uint32_t grrr_quantum_us
;
238 static uint32_t grrr_quantum
;
240 static uint64_t sched_grrr_tick_deadline
;
243 sched_grrr_init(void)
245 if (default_preemption_rate
< 1)
246 default_preemption_rate
= 100;
247 grrr_quantum_us
= (1000 * 1000) / default_preemption_rate
;
249 printf("standard grrr timeslicing quantum is %d us\n", grrr_quantum_us
);
251 grrr_priority_mapping_init();
255 sched_grrr_timebase_init(void)
259 /* standard timeslicing quantum */
260 clock_interval_to_absolutetime_interval(
261 grrr_quantum_us
, NSEC_PER_USEC
, &abstime
);
262 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
263 grrr_quantum
= (uint32_t)abstime
;
265 thread_depress_time
= 1 * grrr_quantum
;
266 default_timeshare_computation
= grrr_quantum
/ 2;
267 default_timeshare_constraint
= grrr_quantum
;
269 max_unsafe_computation
= max_unsafe_quanta
* grrr_quantum
;
270 sched_safe_duration
= 2 * max_unsafe_quanta
* grrr_quantum
;
275 sched_grrr_processor_init(processor_t processor
)
277 grrr_runqueue_init(&processor
->grrr_runq
);
281 sched_grrr_pset_init(processor_set_t pset __unused
)
286 sched_grrr_maintenance_continuation(void)
288 uint64_t abstime
= mach_absolute_time();
293 * Compute various averages.
297 if (sched_grrr_tick_deadline
== 0)
298 sched_grrr_tick_deadline
= abstime
;
300 clock_deadline_for_periodic_event(10*sched_one_second_interval
, abstime
,
301 &sched_grrr_tick_deadline
);
303 assert_wait_deadline((event_t
)sched_grrr_maintenance_continuation
, THREAD_UNINT
, sched_grrr_tick_deadline
);
304 thread_block((thread_continue_t
)sched_grrr_maintenance_continuation
);
310 sched_grrr_choose_thread(processor_t processor
,
311 int priority __unused
)
313 grrr_run_queue_t rq
= &processor
->grrr_runq
;
315 return grrr_select(rq
);
319 sched_grrr_steal_thread(processor_set_t pset
)
323 return (THREAD_NULL
);
328 sched_grrr_compute_priority(thread_t thread
,
329 boolean_t override_depress __unused
)
331 set_sched_pri(thread
, thread
->priority
);
335 sched_grrr_choose_processor( processor_set_t pset
,
336 processor_t processor
,
339 return choose_processor(pset
, processor
, thread
);
343 sched_grrr_processor_enqueue(
344 processor_t processor
,
346 integer_t options __unused
)
348 grrr_run_queue_t rq
= &processor
->grrr_runq
;
351 result
= grrr_enqueue(rq
, thread
);
353 thread
->runq
= processor
;
359 sched_grrr_processor_queue_shutdown(
360 processor_t processor
)
362 processor_set_t pset
= processor
->processor_set
;
364 queue_head_t tqueue
, bqueue
;
369 while ((thread
= sched_grrr_choose_thread(processor
, IDLEPRI
)) != THREAD_NULL
) {
370 if (thread
->bound_processor
== PROCESSOR_NULL
) {
371 enqueue_tail(&tqueue
, (queue_entry_t
)thread
);
373 enqueue_tail(&bqueue
, (queue_entry_t
)thread
);
377 while ((thread
= (thread_t
)dequeue_head(&bqueue
)) != THREAD_NULL
) {
378 sched_grrr_processor_enqueue(processor
, thread
, SCHED_TAILQ
);
383 while ((thread
= (thread_t
)dequeue_head(&tqueue
)) != THREAD_NULL
) {
386 thread_setrun(thread
, SCHED_TAILQ
);
388 thread_unlock(thread
);
393 sched_grrr_processor_queue_remove(
394 processor_t processor
,
399 rqlock
= &processor
->processor_set
->sched_lock
;
402 if (processor
== thread
->runq
) {
404 * Thread is on a run queue and we have a lock on
407 grrr_run_queue_t rq
= &processor
->grrr_runq
;
409 grrr_remove(rq
, thread
);
412 * The thread left the run queue before we could
413 * lock the run queue.
415 assert(thread
->runq
== PROCESSOR_NULL
);
416 processor
= PROCESSOR_NULL
;
419 simple_unlock(rqlock
);
421 return (processor
!= PROCESSOR_NULL
);
425 sched_grrr_processor_queue_empty(processor_t processor __unused
)
429 result
= (processor
->grrr_runq
.count
== 0);
435 sched_grrr_processor_queue_has_priority(processor_t processor
,
437 boolean_t gte __unused
)
439 grrr_run_queue_t rq
= &processor
->grrr_runq
;
442 i
= grrr_group_mapping
[grrr_priority_mapping
[priority
]];
443 for ( ; i
< NUM_GRRR_GROUPS
; i
++) {
444 if (rq
->groups
[i
].count
> 0)
451 /* Implement sched_preempt_pri in code */
453 sched_grrr_priority_is_urgent(int priority
)
455 if (priority
<= BASEPRI_FOREGROUND
)
458 if (priority
< MINPRI_KERNEL
)
461 if (priority
>= BASEPRI_PREEMPT
)
468 sched_grrr_processor_csw_check(processor_t processor
)
472 count
= sched_grrr_processor_runq_count(processor
);
483 sched_grrr_initial_quantum_size(thread_t thread __unused
)
489 sched_grrr_initial_thread_sched_mode(task_t parent_task
)
491 if (parent_task
== kernel_task
)
492 return TH_MODE_FIXED
;
494 return TH_MODE_TIMESHARE
;
498 sched_grrr_supports_timeshare_mode(void)
504 sched_grrr_can_update_priority(thread_t thread __unused
)
510 sched_grrr_update_priority(thread_t thread __unused
)
516 sched_grrr_lightweight_update_priority(thread_t thread __unused
)
522 sched_grrr_quantum_expire(
523 thread_t thread __unused
)
529 sched_grrr_should_current_thread_rechoose_processor(processor_t processor __unused
)
535 sched_grrr_processor_runq_count(processor_t processor
)
537 return processor
->grrr_runq
.count
;
541 sched_grrr_processor_runq_stats_count_sum(processor_t processor
)
543 return processor
->grrr_runq
.runq_stats
.count_sum
;
546 #endif /* defined(CONFIG_SCHED_GRRR) */
548 #if defined(CONFIG_SCHED_GRRR_CORE)
551 grrr_priority_mapping_init(void)
555 /* Map 0->0 up to 10->20 */
556 for (i
=0; i
<= 10; i
++) {
557 grrr_priority_mapping
[i
] = 2*i
;
560 /* Map user priorities 11->33 up to 51 -> 153 */
561 for (i
=11; i
<= 51; i
++) {
562 grrr_priority_mapping
[i
] = 3*i
;
565 /* Map high priorities 52->180 up to 127->255 */
566 for (i
=52; i
<= 127; i
++) {
567 grrr_priority_mapping
[i
] = 128 + i
;
570 for (i
= 0; i
< NUM_GRRR_PROPORTIONAL_PRIORITIES
; i
++) {
574 /* Calculate log(i); */
575 for (j
=0, k
=1; k
<= i
; j
++, k
*= 2);
579 grrr_group_mapping
[i
] = i
>> 2;
585 grrr_intragroup_schedule(grrr_group_t group
)
589 if (group
->count
== 0) {
593 thread
= group
->current_client
;
594 if (thread
== THREAD_NULL
) {
595 thread
= (thread_t
)queue_first(&group
->clients
);
598 if (1 /* deficit */) {
599 group
->current_client
= (thread_t
)queue_next((queue_entry_t
)thread
);
600 if (queue_end(&group
->clients
, (queue_entry_t
)group
->current_client
)) {
601 group
->current_client
= (thread_t
)queue_first(&group
->clients
);
604 thread
= group
->current_client
;
611 grrr_intergroup_schedule(grrr_run_queue_t rq
)
616 if (rq
->count
== 0) {
620 group
= rq
->current_group
;
622 if (group
== GRRR_GROUP_NULL
) {
623 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
626 thread
= grrr_intragroup_schedule(group
);
628 if ((group
->work
>= (UINT32_MAX
-256)) || (rq
->last_rescale_tick
!= grrr_rescale_tick
)) {
629 grrr_rescale_work(rq
);
633 if (queue_end(&rq
->sorted_group_list
, queue_next((queue_entry_t
)group
))) {
634 /* last group, go back to beginning */
635 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
637 grrr_group_t nextgroup
= (grrr_group_t
)queue_next((queue_entry_t
)group
);
638 uint64_t orderleft
, orderright
;
641 * The well-ordering condition for intergroup selection is:
643 * (group->work+1) / (nextgroup->work+1) > (group->weight) / (nextgroup->weight)
645 * Multiply both sides by their denominators to avoid division
648 orderleft
= (group
->work
+ 1) * ((uint64_t)nextgroup
->weight
);
649 orderright
= (nextgroup
->work
+ 1) * ((uint64_t)group
->weight
);
650 if (orderleft
> orderright
) {
653 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
657 rq
->current_group
= group
;
663 grrr_runqueue_init(grrr_run_queue_t runq
)
665 grrr_group_index_t index
;
669 for (index
= 0; index
< NUM_GRRR_GROUPS
; index
++) {
670 unsigned int prisearch
;
673 prisearch
< NUM_GRRR_PROPORTIONAL_PRIORITIES
;
675 if (grrr_group_mapping
[prisearch
] == index
) {
676 runq
->groups
[index
].minpriority
= (grrr_proportional_priority_t
)prisearch
;
681 runq
->groups
[index
].index
= index
;
683 queue_init(&runq
->groups
[index
].clients
);
684 runq
->groups
[index
].count
= 0;
685 runq
->groups
[index
].weight
= 0;
686 runq
->groups
[index
].work
= 0;
687 runq
->groups
[index
].current_client
= THREAD_NULL
;
690 queue_init(&runq
->sorted_group_list
);
692 runq
->current_group
= GRRR_GROUP_NULL
;
696 grrr_rescale_work(grrr_run_queue_t rq
)
698 grrr_group_index_t index
;
700 /* avoid overflow by scaling by 1/8th */
701 for (index
= 0; index
< NUM_GRRR_GROUPS
; index
++) {
702 rq
->groups
[index
].work
>>= 3;
705 rq
->last_rescale_tick
= grrr_rescale_tick
;
713 grrr_proportional_priority_t gpriority
;
714 grrr_group_index_t gindex
;
717 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
718 gindex
= grrr_group_mapping
[gpriority
];
719 group
= &rq
->groups
[gindex
];
722 thread
->grrr_deficit
= 0;
725 if (group
->count
== 0) {
726 /* Empty group, this is the first client */
727 enqueue_tail(&group
->clients
, (queue_entry_t
)thread
);
729 group
->weight
= gpriority
;
730 group
->current_client
= thread
;
732 /* Insert before the current client */
733 if (group
->current_client
== THREAD_NULL
||
734 queue_first(&group
->clients
) == (queue_entry_t
)group
->current_client
) {
735 enqueue_head(&group
->clients
, (queue_entry_t
)thread
);
737 insque((queue_entry_t
)thread
, queue_prev((queue_entry_t
)group
->current_client
));
739 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
741 group
->weight
+= gpriority
;
743 /* Since there was already a client, this is on the per-processor sorted list already */
744 remqueue((queue_entry_t
)group
);
747 grrr_sorted_list_insert_group(rq
, group
);
750 rq
->weight
+= gpriority
;
756 grrr_select(grrr_run_queue_t rq
)
760 thread
= grrr_intergroup_schedule(rq
);
761 if (thread
!= THREAD_NULL
) {
762 grrr_proportional_priority_t gpriority
;
763 grrr_group_index_t gindex
;
766 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
767 gindex
= grrr_group_mapping
[gpriority
];
768 group
= &rq
->groups
[gindex
];
770 remqueue((queue_entry_t
)thread
);
771 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
773 group
->weight
-= gpriority
;
774 if (group
->current_client
== thread
) {
775 group
->current_client
= THREAD_NULL
;
778 remqueue((queue_entry_t
)group
);
779 if (group
->count
== 0) {
780 if (rq
->current_group
== group
) {
781 rq
->current_group
= GRRR_GROUP_NULL
;
784 /* Need to re-insert in sorted location */
785 grrr_sorted_list_insert_group(rq
, group
);
789 rq
->weight
-= gpriority
;
791 thread
->runq
= PROCESSOR_NULL
;
803 grrr_proportional_priority_t gpriority
;
804 grrr_group_index_t gindex
;
807 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
808 gindex
= grrr_group_mapping
[gpriority
];
809 group
= &rq
->groups
[gindex
];
811 remqueue((queue_entry_t
)thread
);
812 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
814 group
->weight
-= gpriority
;
815 if (group
->current_client
== thread
) {
816 group
->current_client
= THREAD_NULL
;
819 remqueue((queue_entry_t
)group
);
820 if (group
->count
== 0) {
821 if (rq
->current_group
== group
) {
822 rq
->current_group
= GRRR_GROUP_NULL
;
825 /* Need to re-insert in sorted location */
826 grrr_sorted_list_insert_group(rq
, group
);
830 rq
->weight
-= gpriority
;
832 thread
->runq
= PROCESSOR_NULL
;
836 grrr_sorted_list_insert_group(grrr_run_queue_t rq
,
839 /* Simple insertion sort */
840 if (queue_empty(&rq
->sorted_group_list
)) {
841 enqueue_tail(&rq
->sorted_group_list
, (queue_entry_t
)group
);
843 grrr_group_t search_group
;
845 /* Start searching from the head (heaviest weight) for the first
846 * element less than us, so we can insert before it
848 search_group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
849 while (!queue_end(&rq
->sorted_group_list
, (queue_entry_t
)search_group
) ) {
851 if (search_group
->weight
< group
->weight
) {
852 /* we should be before this */
853 search_group
= (grrr_group_t
)queue_prev((queue_entry_t
)search_group
);
855 } if (search_group
->weight
== group
->weight
) {
856 /* Use group index as a tie breaker */
857 if (search_group
->index
< group
->index
) {
858 search_group
= (grrr_group_t
)queue_prev((queue_entry_t
)search_group
);
863 /* otherwise, our weight is too small, keep going */
864 search_group
= (grrr_group_t
)queue_next((queue_entry_t
)search_group
);
867 if (queue_end(&rq
->sorted_group_list
, (queue_entry_t
)search_group
)) {
868 enqueue_tail(&rq
->sorted_group_list
, (queue_entry_t
)group
);
870 insque((queue_entry_t
)group
, (queue_entry_t
)search_group
);
875 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
877 #if defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY)
879 static struct grrr_run_queue fs_grrr_runq
;
880 #define FS_GRRR_RUNQ ((processor_t)-2)
881 decl_simple_lock_data(static,fs_grrr_lock
);
884 sched_grrr_fairshare_init(void)
886 grrr_priority_mapping_init();
888 simple_lock_init(&fs_grrr_lock
, 0);
889 grrr_runqueue_init(&fs_grrr_runq
);
894 sched_grrr_fairshare_runq_count(void)
896 return fs_grrr_runq
.count
;
900 sched_grrr_fairshare_runq_stats_count_sum(void)
902 return fs_grrr_runq
.runq_stats
.count_sum
;
906 sched_grrr_fairshare_enqueue(thread_t thread
)
908 simple_lock(&fs_grrr_lock
);
910 (void)grrr_enqueue(&fs_grrr_runq
, thread
);
912 thread
->runq
= FS_GRRR_RUNQ
;
914 simple_unlock(&fs_grrr_lock
);
917 thread_t
sched_grrr_fairshare_dequeue(void)
921 simple_lock(&fs_grrr_lock
);
922 if (fs_grrr_runq
.count
> 0) {
923 thread
= grrr_select(&fs_grrr_runq
);
925 simple_unlock(&fs_grrr_lock
);
929 simple_unlock(&fs_grrr_lock
);
934 boolean_t
sched_grrr_fairshare_queue_remove(thread_t thread
)
937 simple_lock(&fs_grrr_lock
);
939 if (FS_GRRR_RUNQ
== thread
->runq
) {
940 grrr_remove(&fs_grrr_runq
, thread
);
942 simple_unlock(&fs_grrr_lock
);
947 * The thread left the run queue before we could
948 * lock the run queue.
950 assert(thread
->runq
== PROCESSOR_NULL
);
951 simple_unlock(&fs_grrr_lock
);
956 #endif /* defined(CONFIG_SCHED_GRRR) || defined(CONFIG_SCHED_FIXEDPRIORITY) */