]> git.saurik.com Git - apple/system_cmds.git/blob - KDBG/CPUSummary.hpp
eaafb6503f9ab49bd9a564eee958c7b2f9985023
[apple/system_cmds.git] / KDBG / CPUSummary.hpp
1 //
2 // CPUSummary.hpp
3 // KDBG
4 //
5 // Created by James McIlree on 4/22/13.
6 // Copyright (c) 2014 Apple. All rights reserved.
7 //
8
9 #ifndef kdprof_CPUSummary_hpp
10 #define kdprof_CPUSummary_hpp
11
12 template <typename SIZE>
13 class MachineCPU;
14
15 template <typename SIZE>
16 class CPUSummary {
17 private:
18 // Disallow copy constructor to make sure that the compiler
19 // is moving these, instead of copying them when we pass around
20 CPUSummary(const CPUSummary& that) = delete;
21 CPUSummary& operator=(const CPUSummary& other) = delete;
22
23 friend class Machine<SIZE>;
24
25 public:
26 typedef std::unordered_set<ProcessSummary<SIZE>, ProcessSummaryHash<SIZE>, ProcessSummaryEqualTo<SIZE>> ProcessSummarySet;
27 typedef std::unordered_set<const MachineCPU<SIZE>*> CPUSummaryMachineCPUSet;
28
29 protected:
30 AbsTime _total_unknown_time;
31 AbsTime _total_run_time;
32 AbsTime _total_idle_time;
33 AbsTime _total_intr_time;
34 AbsTime _total_future_run_time;
35 AbsTime _total_wallclock_run_time;
36 AbsTime _total_all_cpus_idle_time;
37 AbsTime _total_vm_fault_time;
38 AbsTime _total_io_time;
39 AbsTime _total_jetsam_time;
40
41 uint32_t _context_switch_count;
42 uint32_t _count_idle_events;
43 uint32_t _count_intr_events;
44 uint32_t _count_vm_fault_events;
45 uint32_t _count_io_events;
46 uint32_t _count_processes_jetsamed;
47 uint32_t _active_cpus;
48
49 uint64_t _io_bytes_completed;
50
51 CPUSummaryMachineCPUSet _cpus;
52 ProcessSummarySet _process_summaries;
53
54 std::vector<AbsInterval> _wallclock_run_intervals; // This is the actual wallclock run interval data.
55 std::vector<AbsInterval> _per_cpu_wallclock_run_intervals; // We need to accumulate intervals during summary generation, this is a temp buffer.
56
57 bool _should_merge_all_cpus_idle_intervals;
58 std::vector<AbsInterval> _all_cpus_idle_intervals;
59 std::vector<AbsInterval> _per_cpu_all_cpus_idle_intervals;
60
61 void add_unknown_time(AbsTime time) { _total_unknown_time += time; }
62 void add_run_time(AbsTime time) { _total_run_time += time; }
63 void add_idle_time(AbsTime time) { _total_idle_time += time; _count_idle_events++; }
64 void add_intr_time(AbsTime time) { _total_intr_time += time; _count_intr_events++; }
65 void add_future_run_time(AbsTime time) { _total_future_run_time += time; }
66 void add_vm_fault_time(AbsTime time) { _total_vm_fault_time += time; _count_vm_fault_events++; }
67 void add_io_time(AbsTime time) { _total_io_time += time; _count_io_events++; } // We want to bump the event count on all IO activity, not just on completion
68 void add_jetsam_time(AbsTime time) { _total_jetsam_time += time; }
69
70 void add_io_bytes_completed(typename SIZE::ptr_t bytes) { _io_bytes_completed += bytes; }
71
72 void increment_processes_jetsamed() { _count_processes_jetsamed++; }
73
74 //
75 // NOTE! Why are the various interval(s) accumulated one cpu at a time,
76 // instead of storing them all in a single vector, sorting it and processing
77 // once at the end?
78 //
79 // The single vector, sort and postprocess would work for wallclock time
80 // calculation, because wallclock times involve "union" operations where
81 // the number of cpu(s) don't matter.
82 //
83 // However, for the all-idle and idle-while-wating-on-IO calculations, we
84 // need "intersects" operations, I.E. all 16 cores need to be idle to count
85 // as "all-idle". In this mode, the number of cores matters, an intersection
86 // requires all 16 cores to simultaneously be the same state. This is difficult
87 // to calculate with more than 2 sources. By calculating one at a time,
88 // that is avoided, the state remains sanity-checkable throughout.
89 //
90
91
92 //
93 // Wallclock run intervals are added as each cpu timeline is walked.
94 // Between cpu(s), the results are accumulated to a single buffer
95 // After all cpus have been processed, the single buffer is summarized
96 //
97 // wallclock run time is the *union* of cpu run intervals.
98 //
99 void add_wallclock_run_interval(AbsInterval interval);
100 void accumulate_wallclock_run_intervals();
101 void summarize_wallclock_run_intervals();
102
103 //
104 // all cpus idle intervals are added as each cpu timeline is walked.
105 // Between cpu(s), the results are accumulated to a single buffer
106 // After all cpus have been processed, the single buffer is summarized.
107 //
108 // all cpus idle time is the *intersection* of cpu idle intervals
109 //
110 void add_all_cpus_idle_interval(AbsInterval interval);
111 void accumulate_all_cpus_idle_intervals();
112 void summarize_all_cpus_idle_intervals();
113
114 void incr_context_switches() { _context_switch_count++; }
115 void incr_active_cpus() { _active_cpus++; }
116
117 // These bracket individual cpu timeline walks
118 void begin_cpu_timeline_walk(const MachineCPU<SIZE>* cpu);
119 void end_cpu_timeline_walk(const MachineCPU<SIZE>* cpu);
120
121 // These bracket all cpu timeline walks
122 void begin_cpu_timeline_walks(void);
123 void end_cpu_timeline_walks(void);
124
125 ProcessSummary<SIZE>* mutable_process_summary(const MachineProcess<SIZE>* process) {
126 auto it = _process_summaries.find(process);
127 if (it == _process_summaries.end()) {
128 // We create any process summary that is missing.
129 auto insert_result = _process_summaries.emplace(process);
130 ASSERT(insert_result.second, "Sanity");
131 it = insert_result.first;
132 }
133
134 // NOTE! Because we are using a Set instead of a Map, STL wants
135 // the objects to be immutable. "it" refers to a const Record, to
136 // prevent us from changing the hash or equality of the Set. We
137 // know that the allowed set of mutations will not change these,
138 // and so we evil hack(tm) and cast away the const'ness.
139 return const_cast<ProcessSummary<SIZE>*>(&*it);
140 }
141
142 ProcessSummarySet& mutable_process_summaries() { return _process_summaries; }
143
144 public:
145 CPUSummary() :
146 _context_switch_count(0),
147 _count_idle_events(0),
148 _count_intr_events(0),
149 _count_vm_fault_events(0),
150 _count_io_events(0),
151 _count_processes_jetsamed(0),
152 _active_cpus(0),
153 _io_bytes_completed(0),
154 _should_merge_all_cpus_idle_intervals(false)
155 {
156 }
157
158 CPUSummary (CPUSummary&& rhs) noexcept :
159 _total_unknown_time(rhs._total_unknown_time),
160 _total_run_time(rhs._total_run_time),
161 _total_idle_time(rhs._total_idle_time),
162 _total_intr_time(rhs._total_intr_time),
163 _total_future_run_time(rhs._total_future_run_time),
164 _total_wallclock_run_time(rhs._total_wallclock_run_time),
165 _total_all_cpus_idle_time(rhs._total_all_cpus_idle_time),
166 _total_vm_fault_time(rhs._total_vm_fault_time),
167 _total_io_time(rhs._total_io_time),
168 _context_switch_count(rhs._context_switch_count),
169 _count_idle_events(rhs._count_idle_events),
170 _count_intr_events(rhs._count_intr_events),
171 _count_vm_fault_events(rhs._count_vm_fault_events),
172 _count_io_events(rhs._count_io_events),
173 _count_processes_jetsamed(rhs._count_processes_jetsamed),
174 _active_cpus(rhs._active_cpus),
175 _io_bytes_completed(rhs._io_bytes_completed),
176 _cpus(rhs._cpus),
177 _process_summaries(rhs._process_summaries),
178 // _wallclock_run_intervals
179 // _per_cpu_wallclock_run_intervals
180 _should_merge_all_cpus_idle_intervals(false)
181 // _all_cpus_idle_intervals
182 // _per_cpu_all_cpus_idle_intervals
183 // _wallclock_vm_fault_intervals
184 // _wallclock_pgin_intervals
185 // _wallclock_disk_read_intervals
186 {
187 ASSERT(rhs._all_cpus_idle_intervals.empty(), "Sanity");
188 ASSERT(rhs._per_cpu_all_cpus_idle_intervals.empty(), "Sanity");
189 ASSERT(rhs._wallclock_run_intervals.empty(), "Sanity");
190 ASSERT(rhs._per_cpu_wallclock_run_intervals.empty(), "Sanity");
191 ASSERT(rhs._should_merge_all_cpus_idle_intervals == false, "Sanity");
192 }
193
194 AbsTime total_time() const { return _total_unknown_time + _total_run_time + _total_idle_time + _total_intr_time; }
195
196 AbsTime total_unknown_time() const { return _total_unknown_time; }
197 AbsTime total_run_time() const { return _total_run_time; }
198 AbsTime total_idle_time() const { return _total_idle_time; }
199 AbsTime total_intr_time() const { return _total_intr_time; }
200 AbsTime total_future_run_time() const { return _total_future_run_time; }
201 AbsTime total_wallclock_run_time() const { return _total_wallclock_run_time; }
202 AbsTime total_all_cpus_idle_time() const { return _total_all_cpus_idle_time; }
203 AbsTime total_vm_fault_time() const { return _total_vm_fault_time; }
204 AbsTime total_io_time() const { return _total_io_time; }
205 AbsTime total_jetsam_time() const { return _total_jetsam_time; }
206
207 AbsTime avg_on_cpu_time() const { return _total_run_time / _context_switch_count; }
208
209 uint32_t context_switches() const { return _context_switch_count; }
210 uint32_t num_idle_events() const { return _count_idle_events; }
211 uint32_t num_intr_events() const { return _count_intr_events; }
212 uint32_t num_vm_fault_events() const { return _count_vm_fault_events; }
213 uint32_t num_io_events() const { return _count_io_events; }
214 uint32_t num_processes_jetsammed() const { return _count_processes_jetsamed; }
215
216 uint32_t active_cpus() const { return _active_cpus; }
217
218 uint64_t io_bytes_completed() const { return _io_bytes_completed; }
219
220
221 // A CPUSummary may be a summary of one or more CPUs.
222 // The cpus set are the MachineCPU(s) that were used to
223 // construct this summary.
224 const CPUSummaryMachineCPUSet& cpus() const { return _cpus; }
225
226 const ProcessSummarySet& process_summaries() const { return _process_summaries; }
227 const ProcessSummary<SIZE>* process_summary(const MachineProcess<SIZE>* process) const {
228 auto it = _process_summaries.find(process);
229 return (it == _process_summaries.end()) ? NULL : &*it;
230 }
231
232 DEBUG_ONLY(void validate() const;)
233 };
234
235 template <typename SIZE>
236 void CPUSummary<SIZE>::begin_cpu_timeline_walks() {
237 _should_merge_all_cpus_idle_intervals = true;
238 }
239
240 template <typename SIZE>
241 void CPUSummary<SIZE>::begin_cpu_timeline_walk(const MachineCPU<SIZE>* cpu) {
242 ASSERT(cpu, "Sanity");
243 _cpus.emplace(cpu);
244 }
245
246 template <typename SIZE>
247 void CPUSummary<SIZE>::end_cpu_timeline_walk(const MachineCPU<SIZE>* cpu) {
248 ASSERT(cpu, "Sanity");
249
250 accumulate_wallclock_run_intervals();
251 accumulate_all_cpus_idle_intervals();
252 }
253
254 template <typename SIZE>
255 void CPUSummary<SIZE>::end_cpu_timeline_walks(void) {
256 summarize_wallclock_run_intervals();
257 summarize_all_cpus_idle_intervals();
258 }
259
260 template <typename SIZE>
261 void CPUSummary<SIZE>::add_wallclock_run_interval(AbsInterval interval) {
262 ASSERT(_per_cpu_wallclock_run_intervals.empty() || (_per_cpu_wallclock_run_intervals.back() < interval && !interval.intersects(_per_cpu_wallclock_run_intervals.back())), "Invariant violated");
263 _per_cpu_wallclock_run_intervals.emplace_back(interval);
264 }
265
266 template <typename SIZE>
267 void CPUSummary<SIZE>::accumulate_wallclock_run_intervals() {
268 _wallclock_run_intervals = trange_vector_union(_wallclock_run_intervals, _per_cpu_wallclock_run_intervals);
269 _per_cpu_wallclock_run_intervals.clear();
270 // We don't shrink_to_fit here as its expected another CPU's run intervals will be processed next.
271
272 for (auto& process_summary : _process_summaries) {
273 // NOTE! Because we are using a Set instead of a Map, STL wants
274 // the objects to be immutable. We know that the operations being
275 // invoked will not change the hash, but we still must throw away
276 // the const'ness. Care must be taken to avoid the construction of
277 // temporary objects, thus the use of pointers...
278 const_cast<ProcessSummary<SIZE>*>(&process_summary)->accumulate_wallclock_run_intervals();
279 }
280 }
281
282 template <typename SIZE>
283 void CPUSummary<SIZE>::summarize_wallclock_run_intervals() {
284 ASSERT(_per_cpu_wallclock_run_intervals.empty(), "Sanity");
285 _per_cpu_wallclock_run_intervals.shrink_to_fit();
286
287 ASSERT(_total_wallclock_run_time == 0, "Called more than once");
288
289 ASSERT(is_trange_vector_sorted_and_non_overlapping(_wallclock_run_intervals), "Sanity");
290
291 for (auto& interval : _wallclock_run_intervals) {
292 _total_wallclock_run_time += interval.length();
293 }
294
295 _wallclock_run_intervals.clear();
296 _wallclock_run_intervals.shrink_to_fit();
297
298 for (auto& process_summary : _process_summaries) {
299 // NOTE! Because we are using a Set instead of a Map, STL wants
300 // the objects to be immutable. We know that the operations being
301 // invoked will not change the hash, but we still must throw away
302 // the const'ness. Care must be taken to avoid the construction of
303 // temporary objects, thus the use of pointers...
304 const_cast<ProcessSummary<SIZE>*>(&process_summary)->summarize_wallclock_run_intervals();
305 }
306 }
307
308 template <typename SIZE>
309 void CPUSummary<SIZE>::add_all_cpus_idle_interval(AbsInterval interval) {
310 ASSERT(_per_cpu_all_cpus_idle_intervals.empty() || (_per_cpu_all_cpus_idle_intervals.back() < interval && !interval.intersects(_per_cpu_all_cpus_idle_intervals.back())), "Invariant violated");
311 _per_cpu_all_cpus_idle_intervals.emplace_back(interval);
312 }
313
314 template <typename SIZE>
315 void CPUSummary<SIZE>::accumulate_all_cpus_idle_intervals() {
316 if (_should_merge_all_cpus_idle_intervals) {
317 _should_merge_all_cpus_idle_intervals = false;
318 _all_cpus_idle_intervals = _per_cpu_all_cpus_idle_intervals;
319 } else {
320 _all_cpus_idle_intervals = trange_vector_intersect(_all_cpus_idle_intervals, _per_cpu_all_cpus_idle_intervals);
321 }
322 _per_cpu_all_cpus_idle_intervals.clear();
323 }
324
325 template <typename SIZE>
326 void CPUSummary<SIZE>::summarize_all_cpus_idle_intervals() {
327 ASSERT(!_should_merge_all_cpus_idle_intervals, "Sanity");
328 ASSERT(_per_cpu_all_cpus_idle_intervals.empty(), "Sanity");
329 ASSERT(_total_all_cpus_idle_time == 0, "Called more than once");
330 ASSERT(is_trange_vector_sorted_and_non_overlapping(_all_cpus_idle_intervals), "Sanity");
331
332 _per_cpu_all_cpus_idle_intervals.shrink_to_fit();
333 for (auto& interval : _all_cpus_idle_intervals) {
334 _total_all_cpus_idle_time += interval.length();
335 }
336
337 _all_cpus_idle_intervals.clear();
338 _all_cpus_idle_intervals.shrink_to_fit();
339 }
340
341 #if !defined(NDEBUG) && !defined(NS_BLOCK_ASSERTIONS)
342 template <typename SIZE>
343 void CPUSummary<SIZE>::validate() const {
344 ASSERT(_total_wallclock_run_time <= _total_run_time, "Sanity");
345 ASSERT(_total_all_cpus_idle_time <= _total_idle_time, "Sanity");
346
347 for (const auto& process_summary : _process_summaries) {
348 process_summary.validate();
349 }
350 }
351 #endif
352
353 #endif