]>
Commit | Line | Data |
---|---|---|
bd6521f0 A |
1 | // |
2 | // EventPrinting.hpp | |
3 | // kdprof | |
4 | // | |
5 | // Created by James McIlree on 4/20/13. | |
6 | // Copyright (c) 2013 Apple. All rights reserved. | |
7 | // | |
8 | ||
9 | #ifndef kdprof_ParallelPrinting_hpp | |
10 | #define kdprof_ParallelPrinting_hpp | |
11 | ||
12 | void print_event_header(const Globals& globals, bool is_64_bit); | |
13 | ||
14 | template <typename SIZE> | |
15 | char* print_event(char* buf, char* buf_end, const Globals& globals, const Machine<SIZE>& machine, const KDEvent<SIZE>& event, uintptr_t event_index) | |
16 | { | |
17 | // Header is... | |
18 | // | |
19 | // [Index] Time Type Code arg1 arg2 arg3 arg4 thread cpu# command/IOP-name pid | |
20 | // 8 16 4 34 8/16 8/16 8/16 8/16 10 4 16 6 | |
21 | // | |
22 | // For now, each column is folding up the "after" spacing in a single printf, IOW | |
23 | // | |
24 | // buf += snprintf(buf, buf_end - buf, "%8s ", "COL"); /* + 2 spaces */ | |
25 | // | |
26 | // Not: | |
27 | // | |
28 | // buf += snprintf(buf, buf_end - buf, "%8s", "COL"); | |
29 | // buf += snprintf(buf, buf_end - buf, " "); /* 2 spaces */ | |
30 | ||
31 | ASSERT(event.cpu() > -1 && event.cpu() < machine.cpus().size(), "cpu_id out of range"); | |
32 | const MachineCPU<SIZE>& cpu = machine.cpus()[event.cpu()]; | |
33 | ||
34 | // | |
35 | // Okay, here is how snprintf works. | |
36 | // | |
37 | // char buf[2]; | |
38 | // | |
39 | // snprintf(buf, 0, "a"); // Returns 1, buf is unchanged. | |
40 | // snprintf(buf, 1, "a"); // Returns 1, buf = \0 | |
41 | // snprintf(buf, 2, "a"); // Returns 1, buf = 'a', \0 | |
42 | ||
43 | // | |
44 | // If we cannot print successfully, we return the orignal pointer. | |
45 | // | |
46 | char* orig_buf = buf; | |
47 | ||
48 | // | |
49 | // [Index] | |
50 | // | |
51 | if (globals.should_print_event_index()) { | |
52 | buf += snprintf(buf, buf_end - buf, "%8llu ", (uint64_t)event_index); | |
53 | } | |
54 | ||
55 | if (buf >= buf_end) | |
56 | return orig_buf; | |
57 | ||
58 | // | |
59 | // Time | |
60 | // | |
61 | if (globals.should_print_mach_absolute_timestamps()) { | |
62 | if (globals.beginning_of_time().value() == 0) | |
63 | buf += snprintf(buf, buf_end - buf, "%16llX ", (event.timestamp() - globals.beginning_of_time()).value()); | |
64 | else | |
65 | buf += snprintf(buf, buf_end - buf, "%16llu ", (event.timestamp() - globals.beginning_of_time()).value()); | |
66 | } else { | |
67 | NanoTime ntime = (event.timestamp() - globals.beginning_of_time()).nano_time(globals.timebase()); | |
68 | buf += snprintf(buf, buf_end - buf, "%16.2f ", (double)ntime.value() / 1000.0); | |
69 | } | |
70 | ||
71 | if (buf >= buf_end) | |
72 | return orig_buf; | |
73 | ||
74 | // | |
75 | // Type Code | |
76 | // | |
77 | const char* type = event.is_func_start() ? "beg" : (event.is_func_end() ? "end" : "---"); | |
78 | auto trace_code_it = globals.trace_codes().find(event.dbg_cooked()); | |
79 | if (cpu.is_iop() || !globals.should_print_symbolic_event_codes() || trace_code_it == globals.trace_codes().end()) { | |
80 | buf += snprintf(buf, buf_end - buf, "%4s %-34x ", type, event.dbg_cooked()); | |
81 | } else { | |
82 | buf += snprintf(buf, buf_end - buf, "%4s %-34s ", type, trace_code_it->second.c_str()); | |
83 | } | |
84 | ||
85 | if (buf >= buf_end) | |
86 | return orig_buf; | |
87 | ||
88 | // | |
89 | // arg1 | |
90 | // | |
91 | if (event.dbg_class() == DBG_IOKIT && event.dbg_subclass() == DBG_IOPOWER) { | |
92 | std::string kext_name = event.arg1_as_string(); | |
93 | std::reverse(kext_name.begin(), kext_name.end()); | |
94 | ||
95 | if (SIZE::is_64_bit) | |
96 | buf += snprintf(buf, buf_end - buf, "%-16s ", kext_name.c_str()); | |
97 | else | |
98 | buf += snprintf(buf, buf_end - buf, "%-8s ", kext_name.c_str()); | |
99 | } else { | |
100 | if (SIZE::is_64_bit) | |
101 | buf += snprintf(buf, buf_end - buf, "%-16llX ", (uint64_t)event.arg1()); | |
102 | else | |
103 | buf += snprintf(buf, buf_end - buf, "%-8x ", (uint32_t)event.arg1()); | |
104 | } | |
105 | ||
106 | if (buf >= buf_end) | |
107 | return orig_buf; | |
108 | ||
109 | // | |
110 | // Profiling showed that the repeated snprintf calls were hot, rolling them up is ~2.5% per on a HUGE file. | |
111 | // | |
112 | // arg2 arg3 arg4 thread cpu | |
113 | // | |
114 | if (SIZE::is_64_bit) | |
115 | buf += snprintf(buf, buf_end - buf, "%-16llX %-16llX %-16llX %10llX %4u ", (uint64_t)event.arg2(), (uint64_t)event.arg3(), (uint64_t)event.arg4(), (uint64_t)event.tid(), event.cpu()); | |
116 | else | |
117 | buf += snprintf(buf, buf_end - buf, "%-8x %-8x %-8x %10llX %4u ", (uint32_t)event.arg2(), (uint32_t)event.arg3(), (uint32_t)event.arg4(), (uint64_t)event.tid(), event.cpu()); | |
118 | ||
119 | if (buf >= buf_end) | |
120 | return orig_buf; | |
121 | ||
122 | // | |
123 | // command & pid (handled together due to IOP not printing a pid | |
124 | // | |
125 | if (cpu.is_iop()) { | |
126 | // We print the IOP name instead of a command | |
127 | buf += snprintf(buf, buf_end - buf, "%-16s\n", cpu.name()); | |
128 | } else { | |
129 | if (const MachineThread<SIZE>* thread = machine.thread(event.tid(), event.timestamp())) { | |
130 | buf += snprintf(buf, buf_end - buf, "%-16s %-6d\n", thread->process().name(), thread->process().pid()); | |
131 | } else { | |
132 | buf += snprintf(buf, buf_end - buf, "%-16s %-6s\n", "?????", "???"); | |
133 | } | |
134 | } | |
135 | ||
136 | // Still need to check this, its an error if we overflow on the last print! | |
137 | if (buf >= buf_end) | |
138 | return orig_buf; | |
139 | ||
140 | return buf; | |
141 | } | |
142 | ||
143 | template <typename SIZE> | |
144 | char* print_event_range_to_buffer(const Globals& globals, const Machine<SIZE>& machine, TRange<uintptr_t> range, MemoryBuffer<char>& buffer ) { | |
145 | char* cursor = buffer.data(); | |
146 | char* cursor_end = cursor + buffer.capacity(); | |
147 | ||
148 | if (const KDEvent<SIZE>* events = machine.events()) { | |
149 | ASSERT(TRange<uintptr_t>(0, machine.event_count()).contains(range), "Sanity"); | |
150 | for (uintptr_t index = range.location(); index < range.max(); ++index) { | |
151 | char* temp = print_event(cursor, cursor_end, globals, machine, events[index], index); | |
152 | if (temp != cursor) | |
153 | cursor = temp; | |
154 | else { | |
155 | // Changing the capacity will invalidate the cursor | |
156 | ptrdiff_t offset = cursor - buffer.data(); | |
157 | buffer.set_capacity(buffer.capacity()*2); | |
158 | cursor = buffer.data() + offset; | |
159 | cursor_end = buffer.data() + buffer.capacity(); | |
160 | } | |
161 | } | |
162 | } | |
163 | ||
164 | return cursor; | |
165 | } | |
166 | ||
167 | class PrintWorkUnit { | |
168 | protected: | |
169 | MemoryBuffer<char> _buffer; | |
170 | TRange<uintptr_t> _event_range; | |
171 | char* _buffer_end; | |
172 | ||
173 | // We do not want work units copied. | |
174 | PrintWorkUnit(const PrintWorkUnit& that) = delete; | |
175 | PrintWorkUnit& operator=(const PrintWorkUnit& other) = delete; | |
176 | ||
177 | public: | |
178 | PrintWorkUnit(MemoryBuffer<char>&& buffer, TRange<uintptr_t> event_range, char* buffer_end) : | |
179 | _buffer(std::move(buffer)), | |
180 | _event_range(event_range), | |
181 | _buffer_end(buffer_end) | |
182 | { | |
183 | ASSERT(_buffer.capacity(), "Sanity"); | |
184 | ASSERT(_buffer.data(), "Sanity"); | |
185 | ASSERT(!_buffer_end || _buffer_end > _buffer.data(), "Sanity"); | |
186 | ASSERT(!_buffer_end || (_buffer_end < _buffer.data() + _buffer.capacity()), "Sanity"); | |
187 | } | |
188 | ||
189 | MemoryBuffer<char>& buffer() { return _buffer; } | |
190 | ||
191 | TRange<uintptr_t> event_range() { return _event_range; } | |
192 | void set_event_range(TRange<uintptr_t> range) { _event_range = range; } | |
193 | ||
194 | char* buffer_end() const { return _buffer_end; } | |
195 | void set_buffer_end(char* buffer_end) { _buffer_end = buffer_end; } | |
196 | }; | |
197 | ||
198 | template <typename SIZE> | |
199 | class PrintProducer { | |
200 | protected: | |
201 | const Globals& _globals; | |
202 | const Machine<SIZE>& _machine; | |
203 | uintptr_t _start_index; | |
204 | uintptr_t _end_index; | |
205 | uintptr_t _chunk_size; | |
206 | ||
207 | public: | |
208 | PrintProducer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t chunk_size) : | |
209 | _globals(globals), | |
210 | _machine(machine), | |
211 | _chunk_size(chunk_size) | |
212 | { | |
213 | _start_index = 0; | |
214 | _end_index = machine.event_count(); | |
215 | ||
216 | if (globals.is_summary_start_set() || globals.is_summary_stop_set()) { | |
217 | AbsInterval machine_timespan = machine.timespan(); | |
218 | ||
219 | KDEvent<SIZE> start_event(globals.summary_start(machine_timespan)); | |
220 | auto it = std::lower_bound(machine.events(), machine.events() + _end_index, start_event); | |
221 | ASSERT(&*it >= machine.events(), "Returned start index lower than start"); | |
222 | _start_index = std::distance(machine.events(), it); | |
223 | ||
224 | KDEvent<SIZE> end_event(globals.summary_stop(machine_timespan)); | |
225 | it = std::lower_bound(machine.events(), machine.events() + _end_index, end_event); | |
226 | ASSERT(&*it <= machine.events() + _end_index, "Returned end index greater than end"); | |
227 | _end_index = std::distance(machine.events(), it); | |
228 | ||
229 | ASSERT(_start_index <= _end_index, "start index is > end index"); | |
230 | } | |
231 | } | |
232 | ||
233 | bool produce(PrintWorkUnit& work_unit) { | |
234 | // Claim a chunk of work to do | |
235 | uintptr_t orig_start_index, new_start_index; | |
236 | do { | |
237 | orig_start_index = _start_index; | |
238 | new_start_index = orig_start_index + std::min(_chunk_size, _end_index - orig_start_index); | |
239 | } while (orig_start_index < _end_index && !OSAtomicCompareAndSwapPtrBarrier((void*)orig_start_index, (void *)new_start_index, (void * volatile *)&_start_index)); | |
240 | ||
241 | // Did we claim work? | |
242 | if (orig_start_index < _end_index) { | |
243 | TRange<uintptr_t> event_range(orig_start_index, new_start_index - orig_start_index); | |
244 | char* end = print_event_range_to_buffer(_globals, _machine, event_range, work_unit.buffer()); | |
245 | ||
246 | work_unit.set_event_range(event_range); | |
247 | work_unit.set_buffer_end(end); | |
248 | return true; | |
249 | } | |
250 | ||
251 | return false; | |
252 | } | |
253 | ||
254 | uintptr_t start_index() const { return _start_index; } | |
255 | }; | |
256 | ||
257 | template <typename SIZE> | |
258 | class PrintConsumer { | |
259 | protected: | |
260 | const Globals& _globals; | |
261 | uintptr_t _write_index; | |
262 | std::mutex _write_mutex; | |
263 | std::condition_variable _write_condition; | |
264 | ||
265 | public: | |
266 | PrintConsumer(const Globals& globals, const Machine<SIZE>& machine, uintptr_t start_index) : | |
267 | _globals(globals), | |
268 | _write_index(start_index) | |
269 | { | |
270 | } | |
271 | ||
272 | void consume(PrintWorkUnit& work_unit) { | |
273 | std::unique_lock<std::mutex> guard(_write_mutex); | |
274 | _write_condition.wait(guard, [&](){ return work_unit.event_range().location() == this->_write_index; }); | |
275 | ||
276 | ASSERT(work_unit.event_range().location() == _write_index, "Sanity"); | |
277 | ||
278 | char* data = work_unit.buffer().data(); | |
279 | size_t bytes = work_unit.buffer_end() - data; | |
280 | write(_globals.output_fd(), work_unit.buffer().data(), bytes); | |
281 | _write_index = work_unit.event_range().max(); | |
282 | ||
283 | _write_condition.notify_all(); | |
284 | } | |
285 | }; | |
286 | ||
287 | template <typename SIZE> | |
288 | uintptr_t print_machine_events(const Globals& globals, const Machine<SIZE>& machine) { | |
289 | print_event_header(globals, SIZE::is_64_bit); | |
290 | ||
291 | if (const KDEvent<SIZE>* events = machine.events()) { | |
292 | if (uintptr_t event_count = machine.event_count()) { | |
293 | ||
294 | // | |
295 | // We want to chunk this up into reasonably sized pieces of work. | |
296 | // Because each piece of work can potentially accumulate a large | |
297 | // amount of memory, we need to limit the amount of work "in-flight". | |
298 | // | |
299 | uint32_t active_cpus = Kernel::active_cpu_count(); | |
300 | ||
301 | uintptr_t chunk_size = 2000; | |
302 | ||
303 | PrintProducer<SIZE> producer(globals, machine, chunk_size); | |
304 | PrintConsumer<SIZE> consumer(globals, machine, producer.start_index()); | |
305 | ||
306 | std::vector<std::thread> threads; | |
307 | for (uint32_t i=0; i<active_cpus; ++i) { | |
308 | threads.push_back(std::thread([&]() { | |
309 | PrintWorkUnit work_unit(MemoryBuffer<char>(160 * chunk_size), TRange<uintptr_t>(0, 0), (char*)NULL); | |
310 | while (producer.produce(work_unit)) { | |
311 | consumer.consume(work_unit); | |
312 | } | |
313 | })); | |
314 | } | |
315 | ||
316 | for(auto& thread : threads){ | |
317 | thread.join(); | |
318 | } | |
319 | ||
320 | uint32_t totalProcesses = 0; | |
321 | uint32_t totalThreads = 0; | |
322 | ||
323 | for (auto process : machine.processes()) { | |
324 | if (!process->is_created_by_previous_machine_state()) { | |
325 | totalProcesses++; | |
326 | } | |
327 | } | |
328 | ||
329 | for (auto thread : machine.threads()) { | |
330 | if (!thread->is_created_by_previous_machine_state()) { | |
331 | totalThreads++; | |
332 | } | |
333 | } | |
334 | ||
335 | dprintf(globals.output_fd(), "Total Events: %llu\n", (uint64_t)event_count); | |
336 | dprintf(globals.output_fd(), "Total Processes: %u\n", totalProcesses); | |
337 | dprintf(globals.output_fd(), "Total Threads: %u\n", totalThreads); | |
338 | ||
339 | return event_count; | |
340 | } | |
341 | } | |
342 | ||
343 | return 0; | |
344 | } | |
345 | ||
346 | #endif |