]> git.saurik.com Git - apple/system_cmds.git/blob - kdprof/EventPrinting.hpp
system_cmds-671.10.3.tar.gz
[apple/system_cmds.git] / kdprof / EventPrinting.hpp
1 //
2 // EventPrinting.hpp
3 // kdprof
4 //
5 // Created by James McIlree on 4/20/13.
6 // Copyright (c) 2013 Apple. All rights reserved.
7 //
8
9 #ifndef kdprof_ParallelPrinting_hpp
10 #define kdprof_ParallelPrinting_hpp
11
12 void print_event_header(const Globals& globals, bool is_64_bit);
13
14 template <typename SIZE>
15 char* print_event(char* buf, char* buf_end, const Globals& globals, const Machine<SIZE>& machine, const KDEvent<SIZE>& event, uintptr_t event_index)
16 {
17 // Header is...
18 //
19 // [Index] Time Type Code arg1 arg2 arg3 arg4 thread cpu# command/IOP-name pid
20 // 8 16 4 34 8/16 8/16 8/16 8/16 10 4 16 6
21 //
22 // For now, each column is folding up the "after" spacing in a single printf, IOW
23 //
24 // buf += snprintf(buf, buf_end - buf, "%8s ", "COL"); /* + 2 spaces */
25 //
26 // Not:
27 //
28 // buf += snprintf(buf, buf_end - buf, "%8s", "COL");
29 // buf += snprintf(buf, buf_end - buf, " "); /* 2 spaces */
30
31 ASSERT(event.cpu() > -1 && event.cpu() < machine.cpus().size(), "cpu_id out of range");
32 const MachineCPU<SIZE>& cpu = machine.cpus()[event.cpu()];
33
34 //
35 // Okay, here is how snprintf works.
36 //
37 // char buf[2];
38 //
39 // snprintf(buf, 0, "a"); // Returns 1, buf is unchanged.
40 // snprintf(buf, 1, "a"); // Returns 1, buf = \0
41 // snprintf(buf, 2, "a"); // Returns 1, buf = 'a', \0
42
43 //
44 // If we cannot print successfully, we return the orignal pointer.
45 //
46 char* orig_buf = buf;
47
48 //
49 // [Index]
50 //
51 if (globals.should_print_event_index()) {
52 buf += snprintf(buf, buf_end - buf, "%8llu ", (uint64_t)event_index);
53 }
54
55 if (buf >= buf_end)
56 return orig_buf;
57
58 //
59 // Time
60 //
61 if (globals.should_print_mach_absolute_timestamps()) {
62 if (globals.beginning_of_time().value() == 0)
63 buf += snprintf(buf, buf_end - buf, "%16llX ", (event.timestamp() - globals.beginning_of_time()).value());
64 else
65 buf += snprintf(buf, buf_end - buf, "%16llu ", (event.timestamp() - globals.beginning_of_time()).value());
66 } else {
67 NanoTime ntime = (event.timestamp() - globals.beginning_of_time()).nano_time(globals.timebase());
68 buf += snprintf(buf, buf_end - buf, "%16.2f ", (double)ntime.value() / 1000.0);
69 }
70
71 if (buf >= buf_end)
72 return orig_buf;
73
74 //
75 // Type Code
76 //
77 const char* type = event.is_func_start() ? "beg" : (event.is_func_end() ? "end" : "---");
78 auto trace_code_it = globals.trace_codes().find(event.dbg_cooked());
79 if (cpu.is_iop() || !globals.should_print_symbolic_event_codes() || trace_code_it == globals.trace_codes().end()) {
80 buf += snprintf(buf, buf_end - buf, "%4s %-34x ", type, event.dbg_cooked());
81 } else {
82 buf += snprintf(buf, buf_end - buf, "%4s %-34s ", type, trace_code_it->second.c_str());
83 }
84
85 if (buf >= buf_end)
86 return orig_buf;
87
88 //
89 // arg1
90 //
91 if (event.dbg_class() == DBG_IOKIT && event.dbg_subclass() == DBG_IOPOWER) {
92 std::string kext_name = event.arg1_as_string();
93 std::reverse(kext_name.begin(), kext_name.end());
94
95 if (SIZE::is_64_bit)
96 buf += snprintf(buf, buf_end - buf, "%-16s ", kext_name.c_str());
97 else
98 buf += snprintf(buf, buf_end - buf, "%-8s ", kext_name.c_str());
99 } else {
100 if (SIZE::is_64_bit)
101 buf += snprintf(buf, buf_end - buf, "%-16llX ", (uint64_t)event.arg1());
102 else
103 buf += snprintf(buf, buf_end - buf, "%-8x ", (uint32_t)event.arg1());
104 }
105
106 if (buf >= buf_end)
107 return orig_buf;
108
109 //
110 // Profiling showed that the repeated snprintf calls were hot, rolling them up is ~2.5% per on a HUGE file.
111 //
112 // arg2 arg3 arg4 thread cpu
113 //
114 if (SIZE::is_64_bit)
115 buf += snprintf(buf, buf_end - buf, "%-16llX %-16llX %-16llX %10llX %4u ", (uint64_t)event.arg2(), (uint64_t)event.arg3(), (uint64_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
116 else
117 buf += snprintf(buf, buf_end - buf, "%-8x %-8x %-8x %10llX %4u ", (uint32_t)event.arg2(), (uint32_t)event.arg3(), (uint32_t)event.arg4(), (uint64_t)event.tid(), event.cpu());
118
119 if (buf >= buf_end)
120 return orig_buf;
121
122 //
123 // command & pid (handled together due to IOP not printing a pid
124 //
125 if (cpu.is_iop()) {
126 // We print the IOP name instead of a command
127 buf += snprintf(buf, buf_end - buf, "%-16s\n", cpu.name());
128 } else {
129 if (const MachineThread<SIZE>* thread = machine.thread(event.tid(), event.timestamp())) {
130 buf += snprintf(buf, buf_end - buf, "%-16s %-6d\n", thread->process().name(), thread->process().pid());
131 } else {
132 buf += snprintf(buf, buf_end - buf, "%-16s %-6s\n", "?????", "???");
133 }
134 }
135
136 // Still need to check this, its an error if we overflow on the last print!
137 if (buf >= buf_end)
138 return orig_buf;
139
140 return buf;
141 }
142
143 template <typename SIZE>
144 char* print_event_range_to_buffer(const Globals& globals, const Machine<SIZE>& machine, TRange<uintptr_t> range, MemoryBuffer<char>& buffer ) {
145 char* cursor = buffer.data();
146 char* cursor_end = cursor + buffer.capacity();
147
148 if (const KDEvent<SIZE>* events = machine.events()) {
149 ASSERT(TRange<uintptr_t>(0, machine.event_count()).contains(range), "Sanity");
150 for (uintptr_t index = range.location(); index < range.max(); ++index) {
151 char* temp = print_event(cursor, cursor_end, globals, machine, events[index], index);
152 if (temp != cursor)
153 cursor = temp;
154 else {
155 // Changing the capacity will invalidate the cursor
156 ptrdiff_t offset = cursor - buffer.data();
157 buffer.set_capacity(buffer.capacity()*2);
158 cursor = buffer.data() + offset;
159 cursor_end = buffer.data() + buffer.capacity();
160 }
161 }
162 }
163
164 return cursor;
165 }
166
167 class PrintWorkUnit {
168 protected:
169 MemoryBuffer<char> _buffer;
170 TRange<uintptr_t> _event_range;
171 char* _buffer_end;
172
173 // We do not want work units copied.
174 PrintWorkUnit(const PrintWorkUnit& that) = delete;
175 PrintWorkUnit& operator=(const PrintWorkUnit& other) = delete;
176
177 public:
178 PrintWorkUnit(MemoryBuffer<char>&& buffer, TRange<uintptr_t> event_range, char* buffer_end) :
179 _buffer(std::move(buffer)),
180 _event_range(event_range),
181 _buffer_end(buffer_end)
182 {
183 ASSERT(_buffer.capacity(), "Sanity");
184 ASSERT(_buffer.data(), "Sanity");
185 ASSERT(!_buffer_end || _buffer_end > _buffer.data(), "Sanity");
186 ASSERT(!_buffer_end || (_buffer_end < _buffer.data() + _buffer.capacity()), "Sanity");
187 }
188
189 MemoryBuffer<char>& buffer() { return _buffer; }
190
191 TRange<uintptr_t> event_range() { return _event_range; }
192 void set_event_range(TRange<uintptr_t> range) { _event_range = range; }
193
194 char* buffer_end() const { return _buffer_end; }
195 void set_buffer_end(char* buffer_end) { _buffer_end = buffer_end; }
196 };
197
198 template <typename SIZE>
199 class PrintProducer {
200 protected:
201 const Globals& _globals;
202 const Machine<SIZE>& _machine;
203 uintptr_t _start_index;
204 uintptr_t _end_index;
205 uintptr_t _chunk_size;
206
207 public:
208 PrintProducer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t chunk_size) :
209 _globals(globals),
210 _machine(machine),
211 _chunk_size(chunk_size)
212 {
213 _start_index = 0;
214 _end_index = machine.event_count();
215
216 if (globals.is_summary_start_set() || globals.is_summary_stop_set()) {
217 AbsInterval machine_timespan = machine.timespan();
218
219 KDEvent<SIZE> start_event(globals.summary_start(machine_timespan));
220 auto it = std::lower_bound(machine.events(), machine.events() + _end_index, start_event);
221 ASSERT(&*it >= machine.events(), "Returned start index lower than start");
222 _start_index = std::distance(machine.events(), it);
223
224 KDEvent<SIZE> end_event(globals.summary_stop(machine_timespan));
225 it = std::lower_bound(machine.events(), machine.events() + _end_index, end_event);
226 ASSERT(&*it <= machine.events() + _end_index, "Returned end index greater than end");
227 _end_index = std::distance(machine.events(), it);
228
229 ASSERT(_start_index <= _end_index, "start index is > end index");
230 }
231 }
232
233 bool produce(PrintWorkUnit& work_unit) {
234 // Claim a chunk of work to do
235 uintptr_t orig_start_index, new_start_index;
236 do {
237 orig_start_index = _start_index;
238 new_start_index = orig_start_index + std::min(_chunk_size, _end_index - orig_start_index);
239 } while (orig_start_index < _end_index && !OSAtomicCompareAndSwapPtrBarrier((void*)orig_start_index, (void *)new_start_index, (void * volatile *)&_start_index));
240
241 // Did we claim work?
242 if (orig_start_index < _end_index) {
243 TRange<uintptr_t> event_range(orig_start_index, new_start_index - orig_start_index);
244 char* end = print_event_range_to_buffer(_globals, _machine, event_range, work_unit.buffer());
245
246 work_unit.set_event_range(event_range);
247 work_unit.set_buffer_end(end);
248 return true;
249 }
250
251 return false;
252 }
253
254 uintptr_t start_index() const { return _start_index; }
255 };
256
257 template <typename SIZE>
258 class PrintConsumer {
259 protected:
260 const Globals& _globals;
261 uintptr_t _write_index;
262 std::mutex _write_mutex;
263 std::condition_variable _write_condition;
264
265 public:
266 PrintConsumer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t start_index) :
267 _globals(globals),
268 _write_index(start_index)
269 {
270 }
271
272 void consume(PrintWorkUnit& work_unit) {
273 std::unique_lock<std::mutex> guard(_write_mutex);
274 _write_condition.wait(guard, [&](){ return work_unit.event_range().location() == this->_write_index; });
275
276 ASSERT(work_unit.event_range().location() == _write_index, "Sanity");
277
278 char* data = work_unit.buffer().data();
279 size_t bytes = work_unit.buffer_end() - data;
280 write(_globals.output_fd(), work_unit.buffer().data(), bytes);
281 _write_index = work_unit.event_range().max();
282
283 _write_condition.notify_all();
284 }
285 };
286
287 template <typename SIZE>
288 uintptr_t print_machine_events(const Globals& globals, const Machine<SIZE>& machine) {
289 print_event_header(globals, SIZE::is_64_bit);
290
291 if (const KDEvent<SIZE>* events = machine.events()) {
292 if (uintptr_t event_count = machine.event_count()) {
293
294 //
295 // We want to chunk this up into reasonably sized pieces of work.
296 // Because each piece of work can potentially accumulate a large
297 // amount of memory, we need to limit the amount of work "in-flight".
298 //
299 uint32_t active_cpus = Kernel::active_cpu_count();
300
301 uintptr_t chunk_size = 2000;
302
303 PrintProducer<SIZE> producer(globals, machine, chunk_size);
304 PrintConsumer<SIZE> consumer(globals, machine, producer.start_index());
305
306 std::vector<std::thread> threads;
307 for (uint32_t i=0; i<active_cpus; ++i) {
308 threads.push_back(std::thread([&]() {
309 PrintWorkUnit work_unit(MemoryBuffer<char>(160 * chunk_size), TRange<uintptr_t>(0, 0), (char*)NULL);
310 while (producer.produce(work_unit)) {
311 consumer.consume(work_unit);
312 }
313 }));
314 }
315
316 for(auto& thread : threads){
317 thread.join();
318 }
319
320 uint32_t totalProcesses = 0;
321 uint32_t totalThreads = 0;
322
323 for (auto process : machine.processes()) {
324 if (!process->is_created_by_previous_machine_state()) {
325 totalProcesses++;
326 }
327 }
328
329 for (auto thread : machine.threads()) {
330 if (!thread->is_created_by_previous_machine_state()) {
331 totalThreads++;
332 }
333 }
334
335 dprintf(globals.output_fd(), "Total Events: %llu\n", (uint64_t)event_count);
336 dprintf(globals.output_fd(), "Total Processes: %u\n", totalProcesses);
337 dprintf(globals.output_fd(), "Total Threads: %u\n", totalThreads);
338
339 return event_count;
340 }
341 }
342
343 return 0;
344 }
345
346 #endif