osfmk/arm/machine_routines_common.c

   1 /*
   2  * Copyright (c) 2007-2013 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <arm/machine_cpu.h>
  30 #include <arm/cpu_internal.h>
  31 #include <arm/cpuid.h>
  32 #include <arm/cpu_data.h>
  33 #include <arm/cpu_data_internal.h>
  34 #include <arm/misc_protos.h>
  35 #include <arm/machdep_call.h>
  36 #include <arm/machine_routines.h>
  37 #include <arm/rtclock.h>
  38 #include <kern/machine.h>
  39 #include <kern/thread.h>
  40 #include <kern/thread_group.h>
  41 #include <kern/policy_internal.h>
  42 #include <kern/startup.h>
  43 #include <machine/config.h>
  44 #include <machine/atomic.h>
  45 #include <pexpert/pexpert.h>
  46
  47 #if MONOTONIC
  48 #include <kern/monotonic.h>
  49 #include <machine/monotonic.h>
  50 #endif /* MONOTONIC */
  51
  52 #include <mach/machine.h>
  53
  54 #if !HAS_CONTINUOUS_HWCLOCK
  55 extern uint64_t mach_absolutetime_asleep;
  56 #else
  57 extern uint64_t wake_abstime;
  58 static uint64_t wake_conttime = UINT64_MAX;
  59 #endif
  60
  61 extern volatile uint32_t debug_enabled;
  62
  63 static int max_cpus_initialized = 0;
  64 #define MAX_CPUS_SET    0x1
  65 #define MAX_CPUS_WAIT   0x2
  66
  67 LCK_GRP_DECLARE(max_cpus_grp, "max_cpus");
  68 LCK_MTX_DECLARE(max_cpus_lock, &max_cpus_grp);
  69 uint32_t lockdown_done = 0;
  70 boolean_t is_clock_configured = FALSE;
  71
  72
  73 static void
  74 sched_perfcontrol_oncore_default(perfcontrol_state_t new_thread_state __unused, going_on_core_t on __unused)
  75 {
  76 }
  77
  78 static void
  79 sched_perfcontrol_switch_default(perfcontrol_state_t old_thread_state __unused, perfcontrol_state_t new_thread_state __unused)
  80 {
  81 }
  82
  83 static void
  84 sched_perfcontrol_offcore_default(perfcontrol_state_t old_thread_state __unused, going_off_core_t off __unused, boolean_t thread_terminating __unused)
  85 {
  86 }
  87
  88 static void
  89 sched_perfcontrol_thread_group_default(thread_group_data_t data __unused)
  90 {
  91 }
  92
  93 static void
  94 sched_perfcontrol_max_runnable_latency_default(perfcontrol_max_runnable_latency_t latencies __unused)
  95 {
  96 }
  97
  98 static void
  99 sched_perfcontrol_work_interval_notify_default(perfcontrol_state_t thread_state __unused,
 100     perfcontrol_work_interval_t work_interval __unused)
 101 {
 102 }
 103
 104 static void
 105 sched_perfcontrol_work_interval_ctl_default(perfcontrol_state_t thread_state __unused,
 106     perfcontrol_work_interval_instance_t instance __unused)
 107 {
 108 }
 109
 110 static void
 111 sched_perfcontrol_deadline_passed_default(__unused uint64_t deadline)
 112 {
 113 }
 114
 115 static void
 116 sched_perfcontrol_csw_default(
 117         __unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
 118         __unused uint32_t flags, __unused struct perfcontrol_thread_data *offcore,
 119         __unused struct perfcontrol_thread_data *oncore,
 120         __unused struct perfcontrol_cpu_counters *cpu_counters, __unused void *unused)
 121 {
 122 }
 123
 124 static void
 125 sched_perfcontrol_state_update_default(
 126         __unused perfcontrol_event event, __unused uint32_t cpu_id, __unused uint64_t timestamp,
 127         __unused uint32_t flags, __unused struct perfcontrol_thread_data *thr_data,
 128         __unused void *unused)
 129 {
 130 }
 131
 132 static void
 133 sched_perfcontrol_thread_group_blocked_default(
 134         __unused thread_group_data_t blocked_tg, __unused thread_group_data_t blocking_tg,
 135         __unused uint32_t flags, __unused perfcontrol_state_t blocked_thr_state)
 136 {
 137 }
 138
 139 static void
 140 sched_perfcontrol_thread_group_unblocked_default(
 141         __unused thread_group_data_t unblocked_tg, __unused thread_group_data_t unblocking_tg,
 142         __unused uint32_t flags, __unused perfcontrol_state_t unblocked_thr_state)
 143 {
 144 }
 145
 146 sched_perfcontrol_offcore_t                     sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
 147 sched_perfcontrol_context_switch_t              sched_perfcontrol_switch = sched_perfcontrol_switch_default;
 148 sched_perfcontrol_oncore_t                      sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
 149 sched_perfcontrol_thread_group_init_t           sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
 150 sched_perfcontrol_thread_group_deinit_t         sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
 151 sched_perfcontrol_thread_group_flags_update_t   sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
 152 sched_perfcontrol_max_runnable_latency_t        sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
 153 sched_perfcontrol_work_interval_notify_t        sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
 154 sched_perfcontrol_work_interval_ctl_t           sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
 155 sched_perfcontrol_deadline_passed_t             sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
 156 sched_perfcontrol_csw_t                         sched_perfcontrol_csw = sched_perfcontrol_csw_default;
 157 sched_perfcontrol_state_update_t                sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
 158 sched_perfcontrol_thread_group_blocked_t        sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
 159 sched_perfcontrol_thread_group_unblocked_t      sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
 160
 161 void
 162 sched_perfcontrol_register_callbacks(sched_perfcontrol_callbacks_t callbacks, unsigned long size_of_state)
 163 {
 164         assert(callbacks == NULL || callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_2);
 165
 166         if (size_of_state > sizeof(struct perfcontrol_state)) {
 167                 panic("%s: Invalid required state size %lu", __FUNCTION__, size_of_state);
 168         }
 169
 170         if (callbacks) {
 171 #if CONFIG_THREAD_GROUPS
 172                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_3) {
 173                         if (callbacks->thread_group_init != NULL) {
 174                                 sched_perfcontrol_thread_group_init = callbacks->thread_group_init;
 175                         } else {
 176                                 sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
 177                         }
 178                         if (callbacks->thread_group_deinit != NULL) {
 179                                 sched_perfcontrol_thread_group_deinit = callbacks->thread_group_deinit;
 180                         } else {
 181                                 sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
 182                         }
 183                         // tell CLPC about existing thread groups
 184                         thread_group_resync(TRUE);
 185                 }
 186
 187                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_6) {
 188                         if (callbacks->thread_group_flags_update != NULL) {
 189                                 sched_perfcontrol_thread_group_flags_update = callbacks->thread_group_flags_update;
 190                         } else {
 191                                 sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
 192                         }
 193                 }
 194
 195                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_8) {
 196                         if (callbacks->thread_group_blocked != NULL) {
 197                                 sched_perfcontrol_thread_group_blocked = callbacks->thread_group_blocked;
 198                         } else {
 199                                 sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
 200                         }
 201
 202                         if (callbacks->thread_group_unblocked != NULL) {
 203                                 sched_perfcontrol_thread_group_unblocked = callbacks->thread_group_unblocked;
 204                         } else {
 205                                 sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
 206                         }
 207                 }
 208 #endif
 209
 210                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_7) {
 211                         if (callbacks->work_interval_ctl != NULL) {
 212                                 sched_perfcontrol_work_interval_ctl = callbacks->work_interval_ctl;
 213                         } else {
 214                                 sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
 215                         }
 216                 }
 217
 218                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_5) {
 219                         if (callbacks->csw != NULL) {
 220                                 sched_perfcontrol_csw = callbacks->csw;
 221                         } else {
 222                                 sched_perfcontrol_csw = sched_perfcontrol_csw_default;
 223                         }
 224
 225                         if (callbacks->state_update != NULL) {
 226                                 sched_perfcontrol_state_update = callbacks->state_update;
 227                         } else {
 228                                 sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
 229                         }
 230                 }
 231
 232                 if (callbacks->version >= SCHED_PERFCONTROL_CALLBACKS_VERSION_4) {
 233                         if (callbacks->deadline_passed != NULL) {
 234                                 sched_perfcontrol_deadline_passed = callbacks->deadline_passed;
 235                         } else {
 236                                 sched_perfcontrol_deadline_passed = sched_perfcontrol_deadline_passed_default;
 237                         }
 238                 }
 239
 240                 if (callbacks->offcore != NULL) {
 241                         sched_perfcontrol_offcore = callbacks->offcore;
 242                 } else {
 243                         sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
 244                 }
 245
 246                 if (callbacks->context_switch != NULL) {
 247                         sched_perfcontrol_switch = callbacks->context_switch;
 248                 } else {
 249                         sched_perfcontrol_switch = sched_perfcontrol_switch_default;
 250                 }
 251
 252                 if (callbacks->oncore != NULL) {
 253                         sched_perfcontrol_oncore = callbacks->oncore;
 254                 } else {
 255                         sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
 256                 }
 257
 258                 if (callbacks->max_runnable_latency != NULL) {
 259                         sched_perfcontrol_max_runnable_latency = callbacks->max_runnable_latency;
 260                 } else {
 261                         sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
 262                 }
 263
 264                 if (callbacks->work_interval_notify != NULL) {
 265                         sched_perfcontrol_work_interval_notify = callbacks->work_interval_notify;
 266                 } else {
 267                         sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
 268                 }
 269         } else {
 270                 /* reset to defaults */
 271 #if CONFIG_THREAD_GROUPS
 272                 thread_group_resync(FALSE);
 273 #endif
 274                 sched_perfcontrol_offcore = sched_perfcontrol_offcore_default;
 275                 sched_perfcontrol_switch = sched_perfcontrol_switch_default;
 276                 sched_perfcontrol_oncore = sched_perfcontrol_oncore_default;
 277                 sched_perfcontrol_thread_group_init = sched_perfcontrol_thread_group_default;
 278                 sched_perfcontrol_thread_group_deinit = sched_perfcontrol_thread_group_default;
 279                 sched_perfcontrol_thread_group_flags_update = sched_perfcontrol_thread_group_default;
 280                 sched_perfcontrol_max_runnable_latency = sched_perfcontrol_max_runnable_latency_default;
 281                 sched_perfcontrol_work_interval_notify = sched_perfcontrol_work_interval_notify_default;
 282                 sched_perfcontrol_work_interval_ctl = sched_perfcontrol_work_interval_ctl_default;
 283                 sched_perfcontrol_csw = sched_perfcontrol_csw_default;
 284                 sched_perfcontrol_state_update = sched_perfcontrol_state_update_default;
 285                 sched_perfcontrol_thread_group_blocked = sched_perfcontrol_thread_group_blocked_default;
 286                 sched_perfcontrol_thread_group_unblocked = sched_perfcontrol_thread_group_unblocked_default;
 287         }
 288 }
 289
 290
 291 static void
 292 machine_switch_populate_perfcontrol_thread_data(struct perfcontrol_thread_data *data,
 293     thread_t thread,
 294     uint64_t same_pri_latency)
 295 {
 296         bzero(data, sizeof(struct perfcontrol_thread_data));
 297         data->perfctl_class = thread_get_perfcontrol_class(thread);
 298         data->energy_estimate_nj = 0;
 299         data->thread_id = thread->thread_id;
 300 #if CONFIG_THREAD_GROUPS
 301         struct thread_group *tg = thread_group_get(thread);
 302         data->thread_group_id = thread_group_get_id(tg);
 303         data->thread_group_data = thread_group_get_machine_data(tg);
 304 #endif
 305         data->scheduling_latency_at_same_basepri = same_pri_latency;
 306         data->perfctl_state = FIND_PERFCONTROL_STATE(thread);
 307 }
 308
 309 static void
 310 machine_switch_populate_perfcontrol_cpu_counters(struct perfcontrol_cpu_counters *cpu_counters)
 311 {
 312 #if MONOTONIC
 313         mt_perfcontrol(&cpu_counters->instructions, &cpu_counters->cycles);
 314 #else /* MONOTONIC */
 315         cpu_counters->instructions = 0;
 316         cpu_counters->cycles = 0;
 317 #endif /* !MONOTONIC */
 318 }
 319
 320 int perfcontrol_callout_stats_enabled = 0;
 321 static _Atomic uint64_t perfcontrol_callout_stats[PERFCONTROL_CALLOUT_MAX][PERFCONTROL_STAT_MAX];
 322 static _Atomic uint64_t perfcontrol_callout_count[PERFCONTROL_CALLOUT_MAX];
 323
 324 #if MONOTONIC
 325 static inline
 326 bool
 327 perfcontrol_callout_counters_begin(uint64_t *counters)
 328 {
 329         if (!perfcontrol_callout_stats_enabled) {
 330                 return false;
 331         }
 332         mt_fixed_counts(counters);
 333         return true;
 334 }
 335
 336 static inline
 337 void
 338 perfcontrol_callout_counters_end(uint64_t *start_counters,
 339     perfcontrol_callout_type_t type)
 340 {
 341         uint64_t end_counters[MT_CORE_NFIXED];
 342         mt_fixed_counts(end_counters);
 343         os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_CYCLES],
 344             end_counters[MT_CORE_CYCLES] - start_counters[MT_CORE_CYCLES], relaxed);
 345 #ifdef MT_CORE_INSTRS
 346         os_atomic_add(&perfcontrol_callout_stats[type][PERFCONTROL_STAT_INSTRS],
 347             end_counters[MT_CORE_INSTRS] - start_counters[MT_CORE_INSTRS], relaxed);
 348 #endif /* defined(MT_CORE_INSTRS) */
 349         os_atomic_inc(&perfcontrol_callout_count[type], relaxed);
 350 }
 351 #endif /* MONOTONIC */
 352
 353 uint64_t
 354 perfcontrol_callout_stat_avg(perfcontrol_callout_type_t type,
 355     perfcontrol_callout_stat_t stat)
 356 {
 357         if (!perfcontrol_callout_stats_enabled) {
 358                 return 0;
 359         }
 360         return os_atomic_load_wide(&perfcontrol_callout_stats[type][stat], relaxed) /
 361                os_atomic_load_wide(&perfcontrol_callout_count[type], relaxed);
 362 }
 363
 364
 365 void
 366 machine_switch_perfcontrol_context(perfcontrol_event event,
 367     uint64_t timestamp,
 368     uint32_t flags,
 369     uint64_t new_thread_same_pri_latency,
 370     thread_t old,
 371     thread_t new)
 372 {
 373
 374         if (sched_perfcontrol_switch != sched_perfcontrol_switch_default) {
 375                 perfcontrol_state_t old_perfcontrol_state = FIND_PERFCONTROL_STATE(old);
 376                 perfcontrol_state_t new_perfcontrol_state = FIND_PERFCONTROL_STATE(new);
 377                 sched_perfcontrol_switch(old_perfcontrol_state, new_perfcontrol_state);
 378         }
 379
 380         if (sched_perfcontrol_csw != sched_perfcontrol_csw_default) {
 381                 uint32_t cpu_id = (uint32_t)cpu_number();
 382                 struct perfcontrol_cpu_counters cpu_counters;
 383                 struct perfcontrol_thread_data offcore, oncore;
 384                 machine_switch_populate_perfcontrol_thread_data(&offcore, old, 0);
 385                 machine_switch_populate_perfcontrol_thread_data(&oncore, new,
 386                     new_thread_same_pri_latency);
 387                 machine_switch_populate_perfcontrol_cpu_counters(&cpu_counters);
 388
 389 #if MONOTONIC
 390                 uint64_t counters[MT_CORE_NFIXED];
 391                 bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
 392 #endif /* MONOTONIC */
 393                 sched_perfcontrol_csw(event, cpu_id, timestamp, flags,
 394                     &offcore, &oncore, &cpu_counters, NULL);
 395 #if MONOTONIC
 396                 if (ctrs_enabled) {
 397                         perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_CONTEXT);
 398                 }
 399 #endif /* MONOTONIC */
 400
 401 #if __arm64__
 402                 old->machine.energy_estimate_nj += offcore.energy_estimate_nj;
 403                 new->machine.energy_estimate_nj += oncore.energy_estimate_nj;
 404 #endif
 405         }
 406 }
 407
 408 void
 409 machine_switch_perfcontrol_state_update(perfcontrol_event event,
 410     uint64_t timestamp,
 411     uint32_t flags,
 412     thread_t thread)
 413 {
 414
 415         if (sched_perfcontrol_state_update == sched_perfcontrol_state_update_default) {
 416                 return;
 417         }
 418         uint32_t cpu_id = (uint32_t)cpu_number();
 419         struct perfcontrol_thread_data data;
 420         machine_switch_populate_perfcontrol_thread_data(&data, thread, 0);
 421
 422 #if MONOTONIC
 423         uint64_t counters[MT_CORE_NFIXED];
 424         bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
 425 #endif /* MONOTONIC */
 426         sched_perfcontrol_state_update(event, cpu_id, timestamp, flags,
 427             &data, NULL);
 428 #if MONOTONIC
 429         if (ctrs_enabled) {
 430                 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_STATE_UPDATE);
 431         }
 432 #endif /* MONOTONIC */
 433
 434 #if __arm64__
 435         thread->machine.energy_estimate_nj += data.energy_estimate_nj;
 436 #endif
 437 }
 438
 439 void
 440 machine_thread_going_on_core(thread_t   new_thread,
 441     thread_urgency_t        urgency,
 442     uint64_t   sched_latency,
 443     uint64_t   same_pri_latency,
 444     uint64_t   timestamp)
 445 {
 446         if (sched_perfcontrol_oncore == sched_perfcontrol_oncore_default) {
 447                 return;
 448         }
 449         struct going_on_core on_core;
 450         perfcontrol_state_t state = FIND_PERFCONTROL_STATE(new_thread);
 451
 452         on_core.thread_id = new_thread->thread_id;
 453         on_core.energy_estimate_nj = 0;
 454         on_core.qos_class = (uint16_t)proc_get_effective_thread_policy(new_thread, TASK_POLICY_QOS);
 455         on_core.urgency = (uint16_t)urgency;
 456         on_core.is_32_bit = thread_is_64bit_data(new_thread) ? FALSE : TRUE;
 457         on_core.is_kernel_thread = new_thread->task == kernel_task;
 458 #if CONFIG_THREAD_GROUPS
 459         struct thread_group *tg = thread_group_get(new_thread);
 460         on_core.thread_group_id = thread_group_get_id(tg);
 461         on_core.thread_group_data = thread_group_get_machine_data(tg);
 462 #endif
 463         on_core.scheduling_latency = sched_latency;
 464         on_core.start_time = timestamp;
 465         on_core.scheduling_latency_at_same_basepri = same_pri_latency;
 466
 467 #if MONOTONIC
 468         uint64_t counters[MT_CORE_NFIXED];
 469         bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
 470 #endif /* MONOTONIC */
 471         sched_perfcontrol_oncore(state, &on_core);
 472 #if MONOTONIC
 473         if (ctrs_enabled) {
 474                 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_ON_CORE);
 475         }
 476 #endif /* MONOTONIC */
 477
 478 #if __arm64__
 479         new_thread->machine.energy_estimate_nj += on_core.energy_estimate_nj;
 480 #endif
 481 }
 482
 483 void
 484 machine_thread_going_off_core(thread_t old_thread, boolean_t thread_terminating,
 485     uint64_t last_dispatch, __unused boolean_t thread_runnable)
 486 {
 487         if (sched_perfcontrol_offcore == sched_perfcontrol_offcore_default) {
 488                 return;
 489         }
 490         struct going_off_core off_core;
 491         perfcontrol_state_t state = FIND_PERFCONTROL_STATE(old_thread);
 492
 493         off_core.thread_id = old_thread->thread_id;
 494         off_core.energy_estimate_nj = 0;
 495         off_core.end_time = last_dispatch;
 496 #if CONFIG_THREAD_GROUPS
 497         struct thread_group *tg = thread_group_get(old_thread);
 498         off_core.thread_group_id = thread_group_get_id(tg);
 499         off_core.thread_group_data = thread_group_get_machine_data(tg);
 500 #endif
 501
 502 #if MONOTONIC
 503         uint64_t counters[MT_CORE_NFIXED];
 504         bool ctrs_enabled = perfcontrol_callout_counters_begin(counters);
 505 #endif /* MONOTONIC */
 506         sched_perfcontrol_offcore(state, &off_core, thread_terminating);
 507 #if MONOTONIC
 508         if (ctrs_enabled) {
 509                 perfcontrol_callout_counters_end(counters, PERFCONTROL_CALLOUT_OFF_CORE);
 510         }
 511 #endif /* MONOTONIC */
 512
 513 #if __arm64__
 514         old_thread->machine.energy_estimate_nj += off_core.energy_estimate_nj;
 515 #endif
 516 }
 517
 518 #if CONFIG_THREAD_GROUPS
 519 void
 520 machine_thread_group_init(struct thread_group *tg)
 521 {
 522         if (sched_perfcontrol_thread_group_init == sched_perfcontrol_thread_group_default) {
 523                 return;
 524         }
 525         struct thread_group_data data;
 526         data.thread_group_id = thread_group_get_id(tg);
 527         data.thread_group_data = thread_group_get_machine_data(tg);
 528         data.thread_group_size = thread_group_machine_data_size();
 529         data.thread_group_flags = thread_group_get_flags(tg);
 530         sched_perfcontrol_thread_group_init(&data);
 531 }
 532
 533 void
 534 machine_thread_group_deinit(struct thread_group *tg)
 535 {
 536         if (sched_perfcontrol_thread_group_deinit == sched_perfcontrol_thread_group_default) {
 537                 return;
 538         }
 539         struct thread_group_data data;
 540         data.thread_group_id = thread_group_get_id(tg);
 541         data.thread_group_data = thread_group_get_machine_data(tg);
 542         data.thread_group_size = thread_group_machine_data_size();
 543         data.thread_group_flags = thread_group_get_flags(tg);
 544         sched_perfcontrol_thread_group_deinit(&data);
 545 }
 546
 547 void
 548 machine_thread_group_flags_update(struct thread_group *tg, uint32_t flags)
 549 {
 550         if (sched_perfcontrol_thread_group_flags_update == sched_perfcontrol_thread_group_default) {
 551                 return;
 552         }
 553         struct thread_group_data data;
 554         data.thread_group_id = thread_group_get_id(tg);
 555         data.thread_group_data = thread_group_get_machine_data(tg);
 556         data.thread_group_size = thread_group_machine_data_size();
 557         data.thread_group_flags = flags;
 558         sched_perfcontrol_thread_group_flags_update(&data);
 559 }
 560
 561 void
 562 machine_thread_group_blocked(struct thread_group *blocked_tg,
 563     struct thread_group *blocking_tg,
 564     uint32_t flags,
 565     thread_t blocked_thread)
 566 {
 567         if (sched_perfcontrol_thread_group_blocked == sched_perfcontrol_thread_group_blocked_default) {
 568                 return;
 569         }
 570
 571         spl_t s = splsched();
 572
 573         perfcontrol_state_t state = FIND_PERFCONTROL_STATE(blocked_thread);
 574         struct thread_group_data blocked_data;
 575         assert(blocked_tg != NULL);
 576
 577         blocked_data.thread_group_id = thread_group_get_id(blocked_tg);
 578         blocked_data.thread_group_data = thread_group_get_machine_data(blocked_tg);
 579         blocked_data.thread_group_size = thread_group_machine_data_size();
 580
 581         if (blocking_tg == NULL) {
 582                 /*
 583                  * For special cases such as the render server, the blocking TG is a
 584                  * well known TG. Only in that case, the blocking_tg should be NULL.
 585                  */
 586                 assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
 587                 sched_perfcontrol_thread_group_blocked(&blocked_data, NULL, flags, state);
 588         } else {
 589                 struct thread_group_data blocking_data;
 590                 blocking_data.thread_group_id = thread_group_get_id(blocking_tg);
 591                 blocking_data.thread_group_data = thread_group_get_machine_data(blocking_tg);
 592                 blocking_data.thread_group_size = thread_group_machine_data_size();
 593                 sched_perfcontrol_thread_group_blocked(&blocked_data, &blocking_data, flags, state);
 594         }
 595         KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) | DBG_FUNC_START,
 596             thread_tid(blocked_thread), thread_group_get_id(blocked_tg),
 597             blocking_tg ? thread_group_get_id(blocking_tg) : THREAD_GROUP_INVALID,
 598             flags);
 599
 600         splx(s);
 601 }
 602
 603 void
 604 machine_thread_group_unblocked(struct thread_group *unblocked_tg,
 605     struct thread_group *unblocking_tg,
 606     uint32_t flags,
 607     thread_t unblocked_thread)
 608 {
 609         if (sched_perfcontrol_thread_group_unblocked == sched_perfcontrol_thread_group_unblocked_default) {
 610                 return;
 611         }
 612
 613         spl_t s = splsched();
 614
 615         perfcontrol_state_t state = FIND_PERFCONTROL_STATE(unblocked_thread);
 616         struct thread_group_data unblocked_data;
 617         assert(unblocked_tg != NULL);
 618
 619         unblocked_data.thread_group_id = thread_group_get_id(unblocked_tg);
 620         unblocked_data.thread_group_data = thread_group_get_machine_data(unblocked_tg);
 621         unblocked_data.thread_group_size = thread_group_machine_data_size();
 622
 623         if (unblocking_tg == NULL) {
 624                 /*
 625                  * For special cases such as the render server, the unblocking TG is a
 626                  * well known TG. Only in that case, the unblocking_tg should be NULL.
 627                  */
 628                 assert(flags & PERFCONTROL_CALLOUT_BLOCKING_TG_RENDER_SERVER);
 629                 sched_perfcontrol_thread_group_unblocked(&unblocked_data, NULL, flags, state);
 630         } else {
 631                 struct thread_group_data unblocking_data;
 632                 unblocking_data.thread_group_id = thread_group_get_id(unblocking_tg);
 633                 unblocking_data.thread_group_data = thread_group_get_machine_data(unblocking_tg);
 634                 unblocking_data.thread_group_size = thread_group_machine_data_size();
 635                 sched_perfcontrol_thread_group_unblocked(&unblocked_data, &unblocking_data, flags, state);
 636         }
 637         KDBG(MACHDBG_CODE(DBG_MACH_THREAD_GROUP, MACH_THREAD_GROUP_BLOCK) | DBG_FUNC_END,
 638             thread_tid(unblocked_thread), thread_group_get_id(unblocked_tg),
 639             unblocking_tg ? thread_group_get_id(unblocking_tg) : THREAD_GROUP_INVALID,
 640             flags);
 641
 642         splx(s);
 643 }
 644
 645 #endif /* CONFIG_THREAD_GROUPS */
 646
 647 void
 648 machine_max_runnable_latency(uint64_t bg_max_latency,
 649     uint64_t default_max_latency,
 650     uint64_t realtime_max_latency)
 651 {
 652         if (sched_perfcontrol_max_runnable_latency == sched_perfcontrol_max_runnable_latency_default) {
 653                 return;
 654         }
 655         struct perfcontrol_max_runnable_latency latencies = {
 656                 .max_scheduling_latencies = {
 657                         [THREAD_URGENCY_NONE] = 0,
 658                         [THREAD_URGENCY_BACKGROUND] = bg_max_latency,
 659                         [THREAD_URGENCY_NORMAL] = default_max_latency,
 660                         [THREAD_URGENCY_REAL_TIME] = realtime_max_latency
 661                 }
 662         };
 663
 664         sched_perfcontrol_max_runnable_latency(&latencies);
 665 }
 666
 667 void
 668 machine_work_interval_notify(thread_t thread,
 669     struct kern_work_interval_args* kwi_args)
 670 {
 671         if (sched_perfcontrol_work_interval_notify == sched_perfcontrol_work_interval_notify_default) {
 672                 return;
 673         }
 674         perfcontrol_state_t state = FIND_PERFCONTROL_STATE(thread);
 675         struct perfcontrol_work_interval work_interval = {
 676                 .thread_id      = thread->thread_id,
 677                 .qos_class      = (uint16_t)proc_get_effective_thread_policy(thread, TASK_POLICY_QOS),
 678                 .urgency        = kwi_args->urgency,
 679                 .flags          = kwi_args->notify_flags,
 680                 .work_interval_id = kwi_args->work_interval_id,
 681                 .start          = kwi_args->start,
 682                 .finish         = kwi_args->finish,
 683                 .deadline       = kwi_args->deadline,
 684                 .next_start     = kwi_args->next_start,
 685                 .create_flags   = kwi_args->create_flags,
 686         };
 687 #if CONFIG_THREAD_GROUPS
 688         struct thread_group *tg;
 689         tg = thread_group_get(thread);
 690         work_interval.thread_group_id = thread_group_get_id(tg);
 691         work_interval.thread_group_data = thread_group_get_machine_data(tg);
 692 #endif
 693         sched_perfcontrol_work_interval_notify(state, &work_interval);
 694 }
 695
 696
 697 void
 698 machine_perfcontrol_deadline_passed(uint64_t deadline)
 699 {
 700         if (sched_perfcontrol_deadline_passed != sched_perfcontrol_deadline_passed_default) {
 701                 sched_perfcontrol_deadline_passed(deadline);
 702         }
 703 }
 704
 705 #if INTERRUPT_MASKED_DEBUG
 706 /*
 707  * ml_spin_debug_reset()
 708  * Reset the timestamp on a thread that has been unscheduled
 709  * to avoid false alarms. Alarm will go off if interrupts are held
 710  * disabled for too long, starting from now.
 711  *
 712  * Call ml_get_timebase() directly to prevent extra overhead on newer
 713  * platforms that's enabled in DEVELOPMENT kernel configurations.
 714  */
 715 void
 716 ml_spin_debug_reset(thread_t thread)
 717 {
 718         if (thread->machine.intmask_timestamp) {
 719                 thread->machine.intmask_timestamp = ml_get_timebase();
 720         }
 721 }
 722
 723 /*
 724  * ml_spin_debug_clear()
 725  * Clear the timestamp on a thread that has been unscheduled
 726  * to avoid false alarms
 727  */
 728 void
 729 ml_spin_debug_clear(thread_t thread)
 730 {
 731         thread->machine.intmask_timestamp = 0;
 732 }
 733
 734 /*
 735  * ml_spin_debug_clear_self()
 736  * Clear the timestamp on the current thread to prevent
 737  * false alarms
 738  */
 739 void
 740 ml_spin_debug_clear_self()
 741 {
 742         ml_spin_debug_clear(current_thread());
 743 }
 744
 745 static inline void
 746 __ml_check_interrupts_disabled_duration(thread_t thread, uint64_t timeout, bool is_int_handler)
 747 {
 748         uint64_t start;
 749         uint64_t now;
 750
 751         start = is_int_handler ? thread->machine.inthandler_timestamp : thread->machine.intmask_timestamp;
 752         if (start != 0) {
 753                 now = ml_get_timebase();
 754
 755                 if ((now - start) > timeout * debug_cpu_performance_degradation_factor) {
 756                         mach_timebase_info_data_t timebase;
 757                         clock_timebase_info(&timebase);
 758
 759 #ifndef KASAN
 760                         /*
 761                          * Disable the actual panic for KASAN due to the overhead of KASAN itself, leave the rest of the
 762                          * mechanism enabled so that KASAN can catch any bugs in the mechanism itself.
 763                          */
 764                         if (is_int_handler) {
 765                                 panic("Processing of an interrupt (type = %u, handler address = %p, vector = %p) took %llu nanoseconds (timeout = %llu ns)",
 766                                     thread->machine.int_type, (void *)thread->machine.int_handler_addr, (void *)thread->machine.int_vector,
 767                                     (((now - start) * timebase.numer) / timebase.denom),
 768                                     ((timeout * debug_cpu_performance_degradation_factor) * timebase.numer) / timebase.denom);
 769                         } else {
 770                                 panic("Interrupts held disabled for %llu nanoseconds (timeout = %llu ns)",
 771                                     (((now - start) * timebase.numer) / timebase.denom),
 772                                     ((timeout * debug_cpu_performance_degradation_factor) * timebase.numer) / timebase.denom);
 773                         }
 774 #endif
 775                 }
 776         }
 777
 778         return;
 779 }
 780
 781 void
 782 ml_check_interrupts_disabled_duration(thread_t thread)
 783 {
 784         __ml_check_interrupts_disabled_duration(thread, interrupt_masked_timeout, false);
 785 }
 786
 787 void
 788 ml_check_stackshot_interrupt_disabled_duration(thread_t thread)
 789 {
 790         /* Use MAX() to let the user bump the timeout further if needed */
 791         __ml_check_interrupts_disabled_duration(thread, MAX(stackshot_interrupt_masked_timeout, interrupt_masked_timeout), false);
 792 }
 793
 794 void
 795 ml_check_interrupt_handler_duration(thread_t thread)
 796 {
 797         __ml_check_interrupts_disabled_duration(thread, interrupt_masked_timeout, true);
 798 }
 799
 800 void
 801 ml_irq_debug_start(uintptr_t handler, uintptr_t vector)
 802 {
 803         INTERRUPT_MASKED_DEBUG_START(handler, DBG_INTR_TYPE_OTHER);
 804         current_thread()->machine.int_vector = (uintptr_t)VM_KERNEL_STRIP_PTR(vector);
 805 }
 806
 807 void
 808 ml_irq_debug_end()
 809 {
 810         INTERRUPT_MASKED_DEBUG_END();
 811 }
 812 #endif // INTERRUPT_MASKED_DEBUG
 813
 814
 815 boolean_t
 816 ml_set_interrupts_enabled(boolean_t enable)
 817 {
 818         thread_t        thread;
 819         uint64_t        state;
 820
 821 #if __arm__
 822 #define INTERRUPT_MASK PSR_IRQF
 823         state = __builtin_arm_rsr("cpsr");
 824 #else
 825 #define INTERRUPT_MASK DAIF_IRQF
 826         state = __builtin_arm_rsr("DAIF");
 827 #endif
 828         if (enable && (state & INTERRUPT_MASK)) {
 829                 assert(getCpuDatap()->cpu_int_state == NULL); // Make sure we're not enabling interrupts from primary interrupt context
 830 #if INTERRUPT_MASKED_DEBUG
 831                 if (interrupt_masked_debug) {
 832                         // Interrupts are currently masked, we will enable them (after finishing this check)
 833                         thread = current_thread();
 834                         if (stackshot_active()) {
 835                                 ml_check_stackshot_interrupt_disabled_duration(thread);
 836                         } else {
 837                                 ml_check_interrupts_disabled_duration(thread);
 838                         }
 839                         thread->machine.intmask_timestamp = 0;
 840                 }
 841 #endif  // INTERRUPT_MASKED_DEBUG
 842                 if (get_preemption_level() == 0) {
 843                         thread = current_thread();
 844                         while (thread->machine.CpuDatap->cpu_pending_ast & AST_URGENT) {
 845 #if __ARM_USER_PROTECT__
 846                                 uintptr_t up = arm_user_protect_begin(thread);
 847 #endif
 848                                 ast_taken_kernel();
 849 #if __ARM_USER_PROTECT__
 850                                 arm_user_protect_end(thread, up, FALSE);
 851 #endif
 852                         }
 853                 }
 854 #if __arm__
 855                 __asm__ volatile ("cpsie if" ::: "memory"); // Enable IRQ FIQ
 856 #else
 857                 __builtin_arm_wsr("DAIFClr", DAIFSC_STANDARD_DISABLE);
 858 #endif
 859         } else if (!enable && ((state & INTERRUPT_MASK) == 0)) {
 860 #if __arm__
 861                 __asm__ volatile ("cpsid if" ::: "memory"); // Mask IRQ FIQ
 862 #else
 863                 __builtin_arm_wsr("DAIFSet", DAIFSC_STANDARD_DISABLE);
 864 #endif
 865 #if INTERRUPT_MASKED_DEBUG
 866                 if (interrupt_masked_debug) {
 867                         // Interrupts were enabled, we just masked them
 868                         current_thread()->machine.intmask_timestamp = ml_get_timebase();
 869                 }
 870 #endif
 871         }
 872         return (state & INTERRUPT_MASK) == 0;
 873 }
 874
 875 boolean_t
 876 ml_early_set_interrupts_enabled(boolean_t enable)
 877 {
 878         return ml_set_interrupts_enabled(enable);
 879 }
 880
 881 /*
 882  *      Routine:        ml_at_interrupt_context
 883  *      Function:       Check if running at interrupt context
 884  */
 885 boolean_t
 886 ml_at_interrupt_context(void)
 887 {
 888         /* Do not use a stack-based check here, as the top-level exception handler
 889          * is free to use some other stack besides the per-CPU interrupt stack.
 890          * Interrupts should always be disabled if we're at interrupt context.
 891          * Check that first, as we may be in a preemptible non-interrupt context, in
 892          * which case we could be migrated to a different CPU between obtaining
 893          * the per-cpu data pointer and loading cpu_int_state.  We then might end
 894          * up checking the interrupt state of a different CPU, resulting in a false
 895          * positive.  But if interrupts are disabled, we also know we cannot be
 896          * preempted. */
 897         return !ml_get_interrupts_enabled() && (getCpuDatap()->cpu_int_state != NULL);
 898 }
 899
 900 vm_offset_t
 901 ml_stack_remaining(void)
 902 {
 903         uintptr_t local = (uintptr_t) &local;
 904         vm_offset_t     intstack_top_ptr;
 905
 906         /* Since this is a stack-based check, we don't need to worry about
 907          * preemption as we do in ml_at_interrupt_context().  If we are preemptible,
 908          * then the sp should never be within any CPU's interrupt stack unless
 909          * something has gone horribly wrong. */
 910         intstack_top_ptr = getCpuDatap()->intstack_top;
 911         if ((local < intstack_top_ptr) && (local > intstack_top_ptr - INTSTACK_SIZE)) {
 912                 return local - (getCpuDatap()->intstack_top - INTSTACK_SIZE);
 913         } else {
 914                 return local - current_thread()->kernel_stack;
 915         }
 916 }
 917
 918 static boolean_t ml_quiescing = FALSE;
 919
 920 void
 921 ml_set_is_quiescing(boolean_t quiescing)
 922 {
 923         ml_quiescing = quiescing;
 924         os_atomic_thread_fence(release);
 925 }
 926
 927 boolean_t
 928 ml_is_quiescing(void)
 929 {
 930         os_atomic_thread_fence(acquire);
 931         return ml_quiescing;
 932 }
 933
 934 uint64_t
 935 ml_get_booter_memory_size(void)
 936 {
 937         uint64_t size;
 938         uint64_t roundsize = 512 * 1024 * 1024ULL;
 939         size = BootArgs->memSizeActual;
 940         if (!size) {
 941                 size  = BootArgs->memSize;
 942                 if (size < (2 * roundsize)) {
 943                         roundsize >>= 1;
 944                 }
 945                 size  = (size + roundsize - 1) & ~(roundsize - 1);
 946         }
 947
 948         size -= BootArgs->memSize;
 949
 950         return size;
 951 }
 952
 953 uint64_t
 954 ml_get_abstime_offset(void)
 955 {
 956         return rtclock_base_abstime;
 957 }
 958
 959 uint64_t
 960 ml_get_conttime_offset(void)
 961 {
 962 #if HIBERNATION && HAS_CONTINUOUS_HWCLOCK
 963         return hwclock_conttime_offset;
 964 #elif HAS_CONTINUOUS_HWCLOCK
 965         return 0;
 966 #else
 967         return rtclock_base_abstime + mach_absolutetime_asleep;
 968 #endif
 969 }
 970
 971 uint64_t
 972 ml_get_time_since_reset(void)
 973 {
 974 #if HAS_CONTINUOUS_HWCLOCK
 975         if (wake_conttime == UINT64_MAX) {
 976                 return UINT64_MAX;
 977         } else {
 978                 return mach_continuous_time() - wake_conttime;
 979         }
 980 #else
 981         /* The timebase resets across S2R, so just return the raw value. */
 982         return ml_get_hwclock();
 983 #endif
 984 }
 985
 986 void
 987 ml_set_reset_time(__unused uint64_t wake_time)
 988 {
 989 #if HAS_CONTINUOUS_HWCLOCK
 990         wake_conttime = wake_time;
 991 #endif
 992 }
 993
 994 uint64_t
 995 ml_get_conttime_wake_time(void)
 996 {
 997 #if HAS_CONTINUOUS_HWCLOCK
 998         /*
 999          * For now, we will reconstitute the timebase value from
1000          * cpu_timebase_init and use it as the wake time.
1001          */
1002         return wake_abstime - ml_get_abstime_offset();
1003 #else /* HAS_CONTINOUS_HWCLOCK */
1004         /* The wake time is simply our continuous time offset. */
1005         return ml_get_conttime_offset();
1006 #endif /* HAS_CONTINOUS_HWCLOCK */
1007 }
1008
1009 /*
1010  * ml_snoop_thread_is_on_core(thread_t thread)
1011  * Check if the given thread is currently on core.  This function does not take
1012  * locks, disable preemption, or otherwise guarantee synchronization.  The
1013  * result should be considered advisory.
1014  */
1015 bool
1016 ml_snoop_thread_is_on_core(thread_t thread)
1017 {
1018         unsigned int cur_cpu_num = 0;
1019         const unsigned int max_cpu_id = ml_get_max_cpu_number();
1020
1021         for (cur_cpu_num = 0; cur_cpu_num <= max_cpu_id; cur_cpu_num++) {
1022                 if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr) {
1023                         if (CpuDataEntries[cur_cpu_num].cpu_data_vaddr->cpu_active_thread == thread) {
1024                                 return true;
1025                         }
1026                 }
1027         }
1028
1029         return false;
1030 }
1031
1032 int
1033 ml_early_cpu_max_number(void)
1034 {
1035         assert(startup_phase >= STARTUP_SUB_TUNABLES);
1036         return ml_get_max_cpu_number();
1037 }
1038
1039 void
1040 ml_set_max_cpus(unsigned int max_cpus __unused)
1041 {
1042         lck_mtx_lock(&max_cpus_lock);
1043         if (max_cpus_initialized != MAX_CPUS_SET) {
1044                 if (max_cpus_initialized == MAX_CPUS_WAIT) {
1045                         thread_wakeup((event_t) &max_cpus_initialized);
1046                 }
1047                 max_cpus_initialized = MAX_CPUS_SET;
1048         }
1049         lck_mtx_unlock(&max_cpus_lock);
1050 }
1051
1052 unsigned int
1053 ml_wait_max_cpus(void)
1054 {
1055         assert(lockdown_done);
1056         lck_mtx_lock(&max_cpus_lock);
1057         while (max_cpus_initialized != MAX_CPUS_SET) {
1058                 max_cpus_initialized = MAX_CPUS_WAIT;
1059                 lck_mtx_sleep(&max_cpus_lock, LCK_SLEEP_DEFAULT, &max_cpus_initialized, THREAD_UNINT);
1060         }
1061         lck_mtx_unlock(&max_cpus_lock);
1062         return machine_info.max_cpus;
1063 }
1064 void
1065 machine_conf(void)
1066 {
1067         /*
1068          * This is known to be inaccurate. mem_size should always be capped at 2 GB
1069          */
1070         machine_info.memory_size = (uint32_t)mem_size;
1071
1072         // rdar://problem/58285685: Userland expects _COMM_PAGE_LOGICAL_CPUS to report
1073         // (max_cpu_id+1) rather than a literal *count* of logical CPUs.
1074         unsigned int num_cpus = ml_get_topology_info()->max_cpu_id + 1;
1075         machine_info.max_cpus = num_cpus;
1076         machine_info.physical_cpu_max = num_cpus;
1077         machine_info.logical_cpu_max = num_cpus;
1078 }
1079
1080 void
1081 machine_init(void)
1082 {
1083         debug_log_init();
1084         clock_config();
1085         is_clock_configured = TRUE;
1086         if (debug_enabled) {
1087                 pmap_map_globals();
1088         }
1089         ml_lockdown_init();
1090 }