2 * Copyright (c) 2009-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach/mach_types.h>
30 #include <mach/machine.h>
31 #include <mach/policy.h>
32 #include <mach/sync_policy.h>
33 #include <mach/thread_act.h>
35 #include <machine/machine_routines.h>
36 #include <machine/sched_param.h>
37 #include <machine/machine_cpu.h>
39 #include <kern/kern_types.h>
40 #include <kern/clock.h>
41 #include <kern/counters.h>
42 #include <kern/cpu_number.h>
43 #include <kern/cpu_data.h>
44 #include <kern/debug.h>
45 #include <kern/macro_help.h>
46 #include <kern/machine.h>
47 #include <kern/misc_protos.h>
48 #include <kern/processor.h>
49 #include <kern/queue.h>
50 #include <kern/sched.h>
51 #include <kern/sched_prim.h>
52 #include <kern/syscall_subr.h>
53 #include <kern/task.h>
54 #include <kern/thread.h>
57 #include <vm/vm_kern.h>
58 #include <vm/vm_map.h>
62 #include <sys/kdebug.h>
64 #if defined(CONFIG_SCHED_GRRR_CORE)
67 grrr_priority_mapping_init(void);
85 grrr_sorted_list_insert_group(grrr_run_queue_t rq
,
89 grrr_rescale_work(grrr_run_queue_t rq
);
92 grrr_runqueue_init(grrr_run_queue_t runq
);
94 /* Map Mach priorities to ones suitable for proportional sharing */
95 static grrr_proportional_priority_t grrr_priority_mapping
[NRQS
];
97 /* Map each proportional priority to its group */
98 static grrr_group_index_t grrr_group_mapping
[NUM_GRRR_PROPORTIONAL_PRIORITIES
];
100 uint32_t grrr_rescale_tick
;
102 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */
104 #if defined(CONFIG_SCHED_GRRR)
107 sched_grrr_init(void);
110 sched_grrr_timebase_init(void);
113 sched_grrr_processor_init(processor_t processor
);
116 sched_grrr_pset_init(processor_set_t pset
);
119 sched_grrr_maintenance_continuation(void);
122 sched_grrr_choose_thread(processor_t processor
,
127 sched_grrr_steal_thread(processor_set_t pset
);
130 sched_grrr_compute_priority(thread_t thread
);
133 sched_grrr_choose_processor( processor_set_t pset
,
134 processor_t processor
,
138 sched_grrr_processor_enqueue(
139 processor_t processor
,
141 sched_options_t options
);
144 sched_grrr_processor_queue_shutdown(
145 processor_t processor
);
148 sched_grrr_processor_queue_remove(
149 processor_t processor
,
153 sched_grrr_processor_queue_empty(processor_t processor
);
156 sched_grrr_processor_queue_has_priority(processor_t processor
,
161 sched_grrr_priority_is_urgent(int priority
);
164 sched_grrr_processor_csw_check(processor_t processor
);
167 sched_grrr_initial_quantum_size(thread_t thread
);
170 sched_grrr_initial_thread_sched_mode(task_t parent_task
);
173 sched_grrr_can_update_priority(thread_t thread
);
176 sched_grrr_update_priority(thread_t thread
);
179 sched_grrr_lightweight_update_priority(thread_t thread
);
182 sched_grrr_processor_runq_count(processor_t processor
);
185 sched_grrr_processor_runq_stats_count_sum(processor_t processor
);
188 sched_grrr_processor_bound_count(processor_t processor
);
191 sched_grrr_thread_update_scan(sched_update_scan_context_t scan_context
);
193 const struct sched_dispatch_table sched_grrr_dispatch
= {
194 .sched_name
= "grrr",
195 .init
= sched_grrr_init
,
196 .timebase_init
= sched_grrr_timebase_init
,
197 .processor_init
= sched_grrr_processor_init
,
198 .pset_init
= sched_grrr_pset_init
,
199 .maintenance_continuation
= sched_grrr_maintenance_continuation
,
200 .choose_thread
= sched_grrr_choose_thread
,
201 .steal_thread_enabled
= sched_steal_thread_DISABLED
,
202 .steal_thread
= sched_grrr_steal_thread
,
203 .compute_timeshare_priority
= sched_grrr_compute_priority
,
204 .choose_node
= sched_choose_node
,
205 .choose_processor
= sched_grrr_choose_processor
,
206 .processor_enqueue
= sched_grrr_processor_enqueue
,
207 .processor_queue_shutdown
= sched_grrr_processor_queue_shutdown
,
208 .processor_queue_remove
= sched_grrr_processor_queue_remove
,
209 .processor_queue_empty
= sched_grrr_processor_queue_empty
,
210 .priority_is_urgent
= sched_grrr_priority_is_urgent
,
211 .processor_csw_check
= sched_grrr_processor_csw_check
,
212 .processor_queue_has_priority
= sched_grrr_processor_queue_has_priority
,
213 .initial_quantum_size
= sched_grrr_initial_quantum_size
,
214 .initial_thread_sched_mode
= sched_grrr_initial_thread_sched_mode
,
215 .can_update_priority
= sched_grrr_can_update_priority
,
216 .update_priority
= sched_grrr_update_priority
,
217 .lightweight_update_priority
= sched_grrr_lightweight_update_priority
,
218 .quantum_expire
= sched_default_quantum_expire
,
219 .processor_runq_count
= sched_grrr_processor_runq_count
,
220 .processor_runq_stats_count_sum
= sched_grrr_processor_runq_stats_count_sum
,
221 .processor_bound_count
= sched_grrr_processor_bound_count
,
222 .thread_update_scan
= sched_grrr_thread_update_scan
,
223 .multiple_psets_enabled
= TRUE
,
224 .sched_groups_enabled
= FALSE
,
225 .avoid_processor_enabled
= FALSE
,
226 .thread_avoid_processor
= NULL
,
227 .processor_balance
= sched_SMT_balance
,
229 .rt_runq
= sched_rtlocal_runq
,
230 .rt_init
= sched_rtlocal_init
,
231 .rt_queue_shutdown
= sched_rtlocal_queue_shutdown
,
232 .rt_runq_scan
= sched_rtlocal_runq_scan
,
233 .rt_runq_count_sum
= sched_rtlocal_runq_count_sum
,
235 .qos_max_parallelism
= sched_qos_max_parallelism
,
236 .check_spill
= sched_check_spill
,
237 .ipi_policy
= sched_ipi_policy
,
238 .thread_should_yield
= sched_thread_should_yield
,
239 .run_count_incr
= sched_run_incr
,
240 .run_count_decr
= sched_run_decr
,
241 .update_thread_bucket
= sched_update_thread_bucket
,
242 .pset_made_schedulable
= sched_pset_made_schedulable
,
245 extern int max_unsafe_quanta
;
247 static uint32_t grrr_quantum_us
;
248 static uint32_t grrr_quantum
;
250 static uint64_t sched_grrr_tick_deadline
;
253 sched_grrr_init(void)
255 if (default_preemption_rate
< 1) {
256 default_preemption_rate
= 100;
258 grrr_quantum_us
= (1000 * 1000) / default_preemption_rate
;
260 printf("standard grrr timeslicing quantum is %d us\n", grrr_quantum_us
);
262 grrr_priority_mapping_init();
266 sched_grrr_timebase_init(void)
270 /* standard timeslicing quantum */
271 clock_interval_to_absolutetime_interval(
272 grrr_quantum_us
, NSEC_PER_USEC
, &abstime
);
273 assert((abstime
>> 32) == 0 && (uint32_t)abstime
!= 0);
274 grrr_quantum
= (uint32_t)abstime
;
276 thread_depress_time
= 1 * grrr_quantum
;
277 default_timeshare_computation
= grrr_quantum
/ 2;
278 default_timeshare_constraint
= grrr_quantum
;
280 max_unsafe_computation
= max_unsafe_quanta
* grrr_quantum
;
281 sched_safe_duration
= 2 * max_unsafe_quanta
* grrr_quantum
;
285 sched_grrr_processor_init(processor_t processor
)
287 grrr_runqueue_init(&processor
->grrr_runq
);
291 sched_grrr_pset_init(processor_set_t pset __unused
)
296 sched_grrr_maintenance_continuation(void)
298 uint64_t abstime
= mach_absolute_time();
303 * Compute various averages.
307 if (sched_grrr_tick_deadline
== 0) {
308 sched_grrr_tick_deadline
= abstime
;
311 clock_deadline_for_periodic_event(10 * sched_one_second_interval
, abstime
,
312 &sched_grrr_tick_deadline
);
314 assert_wait_deadline((event_t
)sched_grrr_maintenance_continuation
, THREAD_UNINT
, sched_grrr_tick_deadline
);
315 thread_block((thread_continue_t
)sched_grrr_maintenance_continuation
);
320 sched_grrr_choose_thread(processor_t processor
,
321 int priority __unused
,
322 ast_t reason __unused
)
324 grrr_run_queue_t rq
= &processor
->grrr_runq
;
326 return grrr_select(rq
);
330 sched_grrr_steal_thread(processor_set_t pset
)
338 sched_grrr_compute_priority(thread_t thread
)
340 return thread
->base_pri
;
344 sched_grrr_choose_processor( processor_set_t pset
,
345 processor_t processor
,
348 return choose_processor(pset
, processor
, thread
);
352 sched_grrr_processor_enqueue(
353 processor_t processor
,
355 sched_options_t options __unused
)
357 grrr_run_queue_t rq
= &processor
->grrr_runq
;
360 result
= grrr_enqueue(rq
, thread
);
362 thread
->runq
= processor
;
368 sched_grrr_processor_queue_shutdown(
369 processor_t processor
)
371 processor_set_t pset
= processor
->processor_set
;
373 queue_head_t tqueue
, bqueue
;
378 while ((thread
= sched_grrr_choose_thread(processor
, IDLEPRI
, AST_NONE
)) != THREAD_NULL
) {
379 if (thread
->bound_processor
== PROCESSOR_NULL
) {
380 enqueue_tail(&tqueue
, (queue_entry_t
)thread
);
382 enqueue_tail(&bqueue
, (queue_entry_t
)thread
);
386 while ((thread
= (thread_t
)(void *)dequeue_head(&bqueue
)) != THREAD_NULL
) {
387 sched_grrr_processor_enqueue(processor
, thread
, SCHED_TAILQ
);
392 while ((thread
= (thread_t
)(void *)dequeue_head(&tqueue
)) != THREAD_NULL
) {
395 thread_setrun(thread
, SCHED_TAILQ
);
397 thread_unlock(thread
);
402 sched_grrr_processor_queue_remove(
403 processor_t processor
,
406 processor_set_t pset
= processor
->processor_set
;
410 if (processor
== thread
->runq
) {
412 * Thread is on a run queue and we have a lock on
415 grrr_run_queue_t rq
= &processor
->grrr_runq
;
417 grrr_remove(rq
, thread
);
420 * The thread left the run queue before we could
421 * lock the run queue.
423 assert(thread
->runq
== PROCESSOR_NULL
);
424 processor
= PROCESSOR_NULL
;
429 return processor
!= PROCESSOR_NULL
;
433 sched_grrr_processor_queue_empty(processor_t processor __unused
)
437 result
= (processor
->grrr_runq
.count
== 0);
443 sched_grrr_processor_queue_has_priority(processor_t processor
,
445 boolean_t gte __unused
)
447 grrr_run_queue_t rq
= &processor
->grrr_runq
;
450 i
= grrr_group_mapping
[grrr_priority_mapping
[priority
]];
451 for (; i
< NUM_GRRR_GROUPS
; i
++) {
452 if (rq
->groups
[i
].count
> 0) {
460 /* Implement sched_preempt_pri in code */
462 sched_grrr_priority_is_urgent(int priority
)
464 if (priority
<= BASEPRI_FOREGROUND
) {
468 if (priority
< MINPRI_KERNEL
) {
472 if (priority
>= BASEPRI_PREEMPT
) {
480 sched_grrr_processor_csw_check(processor_t processor
)
484 count
= sched_grrr_processor_runq_count(processor
);
494 sched_grrr_initial_quantum_size(thread_t thread __unused
)
500 sched_grrr_initial_thread_sched_mode(task_t parent_task
)
502 if (parent_task
== kernel_task
) {
503 return TH_MODE_FIXED
;
505 return TH_MODE_TIMESHARE
;
510 sched_grrr_can_update_priority(thread_t thread __unused
)
516 sched_grrr_update_priority(thread_t thread __unused
)
522 sched_grrr_lightweight_update_priority(thread_t thread __unused
)
528 sched_grrr_processor_runq_count(processor_t processor
)
530 return processor
->grrr_runq
.count
;
534 sched_grrr_processor_runq_stats_count_sum(processor_t processor
)
536 return processor
->grrr_runq
.runq_stats
.count_sum
;
540 sched_grrr_processor_bound_count(__unused processor_t processor
)
546 sched_grrr_thread_update_scan(__unused sched_update_scan_context_t scan_context
)
551 #endif /* defined(CONFIG_SCHED_GRRR) */
553 #if defined(CONFIG_SCHED_GRRR_CORE)
556 grrr_priority_mapping_init(void)
560 /* Map 0->0 up to 10->20 */
561 for (i
= 0; i
<= 10; i
++) {
562 grrr_priority_mapping
[i
] = (grrr_proportional_priority_t
)(2 * i
);
565 /* Map user priorities 11->33 up to 51 -> 153 */
566 for (i
= 11; i
<= 51; i
++) {
567 grrr_priority_mapping
[i
] = (grrr_proportional_priority_t
)(3 * i
);
570 /* Map high priorities 52->180 up to 127->255 */
571 for (i
= 52; i
<= 127; i
++) {
572 grrr_priority_mapping
[i
] = (grrr_proportional_priority_t
)(128 + i
);
575 for (i
= 0; i
< NUM_GRRR_PROPORTIONAL_PRIORITIES
; i
++) {
578 /* Calculate log(i); */
579 for (j
= 0, k
= 1; k
<= i
; j
++, k
*= 2) {
585 grrr_group_mapping
[i
] = (grrr_group_index_t
)(i
>> 2);
590 grrr_intragroup_schedule(grrr_group_t group
)
594 if (group
->count
== 0) {
598 thread
= group
->current_client
;
599 if (thread
== THREAD_NULL
) {
600 thread
= (thread_t
)(void *)queue_first(&group
->clients
);
603 if (1 /* deficit */) {
604 group
->current_client
= (thread_t
)(void *)queue_next((queue_entry_t
)thread
);
605 if (queue_end(&group
->clients
, (queue_entry_t
)group
->current_client
)) {
606 group
->current_client
= (thread_t
)(void *)queue_first(&group
->clients
);
609 thread
= group
->current_client
;
616 grrr_intergroup_schedule(grrr_run_queue_t rq
)
621 if (rq
->count
== 0) {
625 group
= rq
->current_group
;
627 if (group
== GRRR_GROUP_NULL
) {
628 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
631 thread
= grrr_intragroup_schedule(group
);
633 if ((group
->work
>= (UINT32_MAX
- 256)) || (rq
->last_rescale_tick
!= grrr_rescale_tick
)) {
634 grrr_rescale_work(rq
);
638 if (queue_end(&rq
->sorted_group_list
, queue_next((queue_entry_t
)group
))) {
639 /* last group, go back to beginning */
640 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
642 grrr_group_t nextgroup
= (grrr_group_t
)queue_next((queue_entry_t
)group
);
643 uint64_t orderleft
, orderright
;
646 * The well-ordering condition for intergroup selection is:
648 * (group->work+1) / (nextgroup->work+1) > (group->weight) / (nextgroup->weight)
650 * Multiply both sides by their denominators to avoid division
653 orderleft
= (group
->work
+ 1) * ((uint64_t)nextgroup
->weight
);
654 orderright
= (nextgroup
->work
+ 1) * ((uint64_t)group
->weight
);
655 if (orderleft
> orderright
) {
658 group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
662 rq
->current_group
= group
;
668 grrr_runqueue_init(grrr_run_queue_t runq
)
670 grrr_group_index_t index
;
674 for (index
= 0; index
< NUM_GRRR_GROUPS
; index
++) {
675 unsigned int prisearch
;
678 prisearch
< NUM_GRRR_PROPORTIONAL_PRIORITIES
;
680 if (grrr_group_mapping
[prisearch
] == index
) {
681 runq
->groups
[index
].minpriority
= (grrr_proportional_priority_t
)prisearch
;
686 runq
->groups
[index
].index
= index
;
688 queue_init(&runq
->groups
[index
].clients
);
689 runq
->groups
[index
].count
= 0;
690 runq
->groups
[index
].weight
= 0;
691 runq
->groups
[index
].work
= 0;
692 runq
->groups
[index
].current_client
= THREAD_NULL
;
695 queue_init(&runq
->sorted_group_list
);
697 runq
->current_group
= GRRR_GROUP_NULL
;
701 grrr_rescale_work(grrr_run_queue_t rq
)
703 grrr_group_index_t index
;
705 /* avoid overflow by scaling by 1/8th */
706 for (index
= 0; index
< NUM_GRRR_GROUPS
; index
++) {
707 rq
->groups
[index
].work
>>= 3;
710 rq
->last_rescale_tick
= grrr_rescale_tick
;
718 grrr_proportional_priority_t gpriority
;
719 grrr_group_index_t gindex
;
722 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
723 gindex
= grrr_group_mapping
[gpriority
];
724 group
= &rq
->groups
[gindex
];
726 if (group
->count
== 0) {
727 /* Empty group, this is the first client */
728 enqueue_tail(&group
->clients
, (queue_entry_t
)thread
);
730 group
->weight
= gpriority
;
731 group
->current_client
= thread
;
733 /* Insert before the current client */
734 if (group
->current_client
== THREAD_NULL
||
735 queue_first(&group
->clients
) == (queue_entry_t
)group
->current_client
) {
736 enqueue_head(&group
->clients
, (queue_entry_t
)thread
);
738 insque((queue_entry_t
)thread
, queue_prev((queue_entry_t
)group
->current_client
));
740 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
742 group
->weight
+= gpriority
;
744 /* Since there was already a client, this is on the per-processor sorted list already */
745 remqueue((queue_entry_t
)group
);
748 grrr_sorted_list_insert_group(rq
, group
);
751 rq
->weight
+= gpriority
;
757 grrr_select(grrr_run_queue_t rq
)
761 thread
= grrr_intergroup_schedule(rq
);
762 if (thread
!= THREAD_NULL
) {
763 grrr_proportional_priority_t gpriority
;
764 grrr_group_index_t gindex
;
767 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
768 gindex
= grrr_group_mapping
[gpriority
];
769 group
= &rq
->groups
[gindex
];
771 remqueue((queue_entry_t
)thread
);
772 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
774 group
->weight
-= gpriority
;
775 if (group
->current_client
== thread
) {
776 group
->current_client
= THREAD_NULL
;
779 remqueue((queue_entry_t
)group
);
780 if (group
->count
== 0) {
781 if (rq
->current_group
== group
) {
782 rq
->current_group
= GRRR_GROUP_NULL
;
785 /* Need to re-insert in sorted location */
786 grrr_sorted_list_insert_group(rq
, group
);
790 rq
->weight
-= gpriority
;
792 thread
->runq
= PROCESSOR_NULL
;
803 grrr_proportional_priority_t gpriority
;
804 grrr_group_index_t gindex
;
807 gpriority
= grrr_priority_mapping
[thread
->sched_pri
];
808 gindex
= grrr_group_mapping
[gpriority
];
809 group
= &rq
->groups
[gindex
];
811 remqueue((queue_entry_t
)thread
);
812 SCHED_STATS_RUNQ_CHANGE(&rq
->runq_stats
, rq
->count
);
814 group
->weight
-= gpriority
;
815 if (group
->current_client
== thread
) {
816 group
->current_client
= THREAD_NULL
;
819 remqueue((queue_entry_t
)group
);
820 if (group
->count
== 0) {
821 if (rq
->current_group
== group
) {
822 rq
->current_group
= GRRR_GROUP_NULL
;
825 /* Need to re-insert in sorted location */
826 grrr_sorted_list_insert_group(rq
, group
);
830 rq
->weight
-= gpriority
;
832 thread
->runq
= PROCESSOR_NULL
;
836 grrr_sorted_list_insert_group(grrr_run_queue_t rq
,
839 /* Simple insertion sort */
840 if (queue_empty(&rq
->sorted_group_list
)) {
841 enqueue_tail(&rq
->sorted_group_list
, (queue_entry_t
)group
);
843 grrr_group_t search_group
;
845 /* Start searching from the head (heaviest weight) for the first
846 * element less than us, so we can insert before it
848 search_group
= (grrr_group_t
)queue_first(&rq
->sorted_group_list
);
849 while (!queue_end(&rq
->sorted_group_list
, (queue_entry_t
)search_group
)) {
850 if (search_group
->weight
< group
->weight
) {
851 /* we should be before this */
852 search_group
= (grrr_group_t
)queue_prev((queue_entry_t
)search_group
);
855 if (search_group
->weight
== group
->weight
) {
856 /* Use group index as a tie breaker */
857 if (search_group
->index
< group
->index
) {
858 search_group
= (grrr_group_t
)queue_prev((queue_entry_t
)search_group
);
863 /* otherwise, our weight is too small, keep going */
864 search_group
= (grrr_group_t
)queue_next((queue_entry_t
)search_group
);
867 if (queue_end(&rq
->sorted_group_list
, (queue_entry_t
)search_group
)) {
868 enqueue_tail(&rq
->sorted_group_list
, (queue_entry_t
)group
);
870 insque((queue_entry_t
)group
, (queue_entry_t
)search_group
);
875 #endif /* defined(CONFIG_SCHED_GRRR_CORE) */