]> git.saurik.com Git - apple/xnu.git/blame - osfmk/profiling/i386/profile-asm.s
xnu-517.12.7.tar.gz
[apple/xnu.git] / osfmk / profiling / i386 / profile-asm.s
CommitLineData
1c79356b
A
1/*
2 * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
e5568f75
A
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
1c79356b 11 *
e5568f75
A
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
1c79356b
A
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
e5568f75
A
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
18 * under the License.
1c79356b
A
19 *
20 * @APPLE_LICENSE_HEADER_END@
21 */
22/*
23 * @OSF_COPYRIGHT@
24 */
25/*
26 * HISTORY
27 *
28 * Revision 1.1.1.1 1998/09/22 21:05:49 wsanchez
29 * Import of Mac OS X kernel (~semeria)
30 *
31 * Revision 1.1.1.1 1998/03/07 02:26:08 wsanchez
32 * Import of OSF Mach kernel (~mburg)
33 *
34 * Revision 1.1.7.1 1997/09/22 17:41:24 barbou
35 * MP+RT: protect cpu_number() usage against preemption.
36 * [97/09/16 barbou]
37 *
38 * Revision 1.1.5.1 1995/01/06 19:53:37 devrcs
39 * mk6 CR668 - 1.3b26 merge
40 * new file for mk6
41 * [1994/10/12 22:25:20 dwm]
42 *
43 * Revision 1.1.2.2 1994/05/16 19:19:17 meissner
44 * Add support for converting 64-bit integers to a decimal string.
45 * Use the correct address (selfpc) when creating the prof header for gprof.
46 * [1994/04/28 21:44:59 meissner]
47 *
48 * Revision 1.1.2.1 1994/04/08 17:51:42 meissner
49 * Make most stats 64 bits, except for things like memory allocation.
50 * [1994/04/02 14:58:21 meissner]
51 *
52 * Do not provide old mcount support under MK or server.
53 * Fixup stats size so it is the same as in profile-md.h.
54 * [1994/03/29 21:00:03 meissner]
55 *
56 * Use faster sequence for overflow addition.
57 * Keep {dummy,prof,gprof,old}_mcount counts in double precision.
58 * Add kernel NCPUS > 1 support.
59 * [1994/03/17 20:13:23 meissner]
60 *
61 * Add gprof/prof overflow support
62 * [1994/03/17 14:56:44 meissner]
63 *
64 * Add size of histogram counters & unused fields to profile_profil struct
65 * [1994/02/17 21:41:44 meissner]
66 *
67 * Add too_low/too_high to profile_stats.
68 * [1994/02/16 22:38:11 meissner]
69 *
70 * Bump # allocation contexts to 32 from 16.
71 * Store unique ptr address in gprof function header structure for _profile_reset.
72 * Add new fields from profile-{internal,md}.h.
73 * Align loop looking for an unlocked acontext.
74 * Count # times a locked context block was found.
75 * Expand copyright.
76 * [1994/02/07 12:40:56 meissner]
77 *
78 * Keep track of the number of times the kernel overflows the HISTCOUNTER counter.
79 * [1994/02/03 20:13:23 meissner]
80 *
81 * Add stats for {user,kernel,idle} mode in the kernel.
82 * [1994/02/03 15:17:22 meissner]
83 *
84 * No change.
85 * [1994/02/03 00:58:49 meissner]
86 *
87 * Combine _profile_{vars,stats,md}; Allow more than one _profile_vars.
88 * [1994/02/01 12:03:56 meissner]
89 *
90 * Move _mcount_ptr to be closer to other data declarations.
91 * Add text_len to profile_profil structure for mk.
92 * Split records_cnt into prof_cnt/gprof_cnt.
93 * Always update prof_cnt/gprof_cnt even if not DO_STATS.
94 * Add current/max cpu indicator to stats for kernel.
95 * [1994/01/28 23:33:20 meissner]
96 *
97 * Don't do 4+Lgotoff(lab), use separate labels.
98 * Change GPROF_HASH_SHIFT to 9 (from 8).
99 * [1994/01/26 22:00:59 meissner]
100 *
101 * Fixup NO_RECURSIVE_ALLOC to do byte loads, not word loads.
102 * [1994/01/26 20:30:57 meissner]
103 *
104 * Move callback pointers into separate allocation context.
105 * Add size fields for other structures to profile-vars.
106 * Allocate string table as one large allocation.
107 * Rewrite old mcount code once again.
108 * Use multiply to make hash value, not divide.
109 * Hash table is now a power of two.
110 * [1994/01/26 20:23:32 meissner]
111 *
112 * Cut hash table size back to 16189.
113 * Add size fields to all structures.
114 * Add major/minor version number to _profile_md.
115 * Move allocation context block pointers to _profile_vars.
116 * Move _gprof_dummy after _profile_md.
117 * New function header code now falls into hash an element
118 * to avoid having the hash code duplicated or use a macro.
119 * Fix bug in _gprof_mcount with ELF shared libraries.
120 * [1994/01/25 01:45:59 meissner]
121 *
122 * Move init functions to C code; rearrange profil varaibles.
123 * [1994/01/22 01:11:14 meissner]
124 *
125 * No change.
126 * [1994/01/20 20:56:43 meissner]
127 *
128 * Fixup copyright.
129 * [1994/01/18 23:07:39 meissner]
130 *
131 * Make flags byte-sized.
132 * Add have_bb flag.
133 * Add init_format flag.
134 * Always put word size multipler first in .space.
135 * [1994/01/18 21:57:14 meissner]
136 *
137 * Fix elfpic problems in last change.
138 * [1994/01/16 14:04:26 meissner]
139 *
140 * Rewrite gprof caching to be faster & not need a lock.
141 * Record prof information for gprof too.
142 * Bump reserved stats to 64.
143 * Bump up hash table size 30799.
144 * Conditionally use lock prefix.
145 * Change most #ifdef's to #if.
146 * DEBUG_PROFILE turns on stack frames now.
147 * Conditionally add externs to gprof to determine where time is spent.
148 * Prof_mcount uses xchgl to update function pointer.
149 * [1994/01/15 18:40:33 meissner]
150 *
151 * Fix a comment.
152 * Separate statistics from debugging (though debugging turns it on).
153 * Remove debug code that traces each gprof request.
154 * [1994/01/15 00:59:02 meissner]
155 *
156 * Move max hash bucket calculation into _gprof_write & put info in stats structure.
157 * [1994/01/04 16:15:14 meissner]
158 *
159 * Use _profile_printf to write diagnostics; add diag_stream to hold stream to write to.
160 * [1994/01/04 15:37:44 meissner]
161 *
162 * Add more allocation memory pools (gprof function hdrs in particular).
163 * For prof, gprof arc, and gprof function hdrs, allocate 16 pages at a time.
164 * Add major/minor version numbers to _profile_{vars,stats}.
165 * Add # profil buckets field to _profil_stats.
166 * [19
167 *
168 * $EndLog$
169 */
170
171/*
172 * Common 386 profiling module that is shared between the kernel, mach
173 * servers, and the user space library. Each environment includes
174 * this file.
175 */
176
177 .file "profile-asm.s"
178
179#include <cpus.h>
180
181#include <machine/asm.h>
182
183/*
184 * By default, debugging turns on statistics and stack frames.
185 */
186
187#if DEBUG_PROFILE
188#ifndef DO_STATS
189#define DO_STATS 1
190#endif
191
192#ifndef STACK_FRAMES
193#define STACK_FRAMES 1
194#endif
195#endif
196
197#ifndef OLD_MCOUNT
198#define OLD_MCOUNT 0 /* do not compile old code for mcount */
199#endif
200
201#ifndef DO_STATS
202#define DO_STATS 1 /* compile in statistics code */
203#endif
204
205#ifndef DO_LOCK
206#define DO_LOCK 0 /* use lock; in front of increments */
207#endif
208
209#ifndef LOCK_STATS
210#define LOCK_STATS DO_LOCK /* update stats with lock set */
211#endif
212
213#ifndef STACK_FRAMES
214#define STACK_FRAMES 0 /* create stack frames for debugger */
215#endif
216
217#ifndef NO_RECURSIVE_ALLOC
218#define NO_RECURSIVE_ALLOC 0 /* check for recursive allocs */
219 /* (not thread safe!) */
220#endif
221
222#ifndef MARK_GPROF
223#define MARK_GPROF 0 /* add externs for gprof profiling */
224#endif
225
226#ifndef OVERFLOW
227#define OVERFLOW 1 /* add overflow checking support */
228#endif
229
230/*
231 * Turn on the use of the lock prefix if desired.
232 */
233
234#ifndef LOCK
235#if DO_LOCK
236#define LOCK lock;
237#else
238#define LOCK
239#endif
240#endif
241
242#ifndef SLOCK
243#if LOCK_STATS
244#define SLOCK LOCK
245#else
246#define SLOCK
247#endif
248#endif
249
250/*
251 * Double or single precision incrementing
252 */
253
254#if OVERFLOW
255#define DINC(mem) LOCK addl $1,mem; LOCK adcl $0,4+mem
256#define DINC2(mem,mem2) LOCK addl $1,mem; LOCK adcl $0,mem2
257#define SDINC(mem) SLOCK addl $1,mem; SLOCK adcl $0,4+mem
258#define SDADD(val,mem) SLOCK addl val,mem; SLOCK adcl $0,4+mem
259#define SDADDNEG(val,mem) SLOCK subl val,mem; SLOCK adcl $0,4+mem
260#define SDSUB(val,mem) SLOCK subl val,mem; SLOCK sbbl $0,4+mem
261
262#else
263#define DINC(mem) LOCK incl mem
264#define DINC2(mem,mem2) LOCK incl mem
265#define SDINC(mem) SLOCK incl mem
266#define SDADD(val,mem) SLOCK addl val,mem
267#define SDADDNEG(val,mem) SLOCK subl val,mem
268#define SDSUB(val,mem) SLOCK subl val,mem
269#endif
270
271/*
272 * Stack frame support so that debugger traceback works.
273 */
274
275#if STACK_FRAMES
276#define ENTER pushl %ebp; movl %esp,%ebp
277#define LEAVE0 popl %ebp
278#define Estack 4
279#else
280#define ENTER
281#define LEAVE0
282#define Estack 0
283#endif
284
285/*
286 * Gprof profiling.
287 */
288
289#if MARK_GPROF
290#define MARK(name) .globl EXT(name); ELF_FUNC(EXT(name)); ELF_SIZE(EXT(name),0); LEXT(name)
291#else
292#define MARK(name)
293#endif
294
295/*
296 * Profiling allocation context block. Each time memory is needed, the
297 * allocator loops until it finds an unlocked context block, and allocates
298 * from that block. If no context blocks are available, a new memory
299 * pool is allocated, and added to the end of the chain.
300 */
301
302LCL(A_next) = 0 /* next context block link (must be 0) */
303LCL(A_plist) = LCL(A_next)+4 /* head of page list for context block */
304LCL(A_lock) = LCL(A_plist)+4 /* lock word */
305LCL(A_size) = LCL(A_lock)+4 /* size of context block */
306
307#define A_next LCL(A_next)
308#define A_plist LCL(A_plist)
309#define A_lock LCL(A_lock)
310#define A_size LCL(A_size)
311
312/*
313 * Allocation contexts used.
314 */
315
316LCL(C_prof) = 0 /* prof records */
317LCL(C_gprof) = 1 /* gprof arc records */
318LCL(C_gfunc) = 2 /* gprof function headers */
319LCL(C_misc) = 3 /* misc. allocations */
320LCL(C_profil) = 4 /* memory for profil */
321LCL(C_dci) = 5 /* memory for dci */
322LCL(C_bb) = 6 /* memory for basic blocks */
323LCL(C_callback) = 7 /* memory for callbacks */
324LCL(C_max) = 32 /* # allocation contexts */
325
326#define C_prof LCL(C_prof)
327#define C_gprof LCL(C_gprof)
328#define C_gfunc LCL(C_gfunc)
329#define C_max LCL(C_max)
330
331/*
332 * Linked list of memory allocations.
333 */
334
335LCL(M_first) = 0 /* pointer to first byte available */
336LCL(M_ptr) = LCL(M_first)+4 /* pointer to next available byte */
337LCL(M_next) = LCL(M_ptr)+4 /* next page allocated */
338LCL(M_nfree) = LCL(M_next)+4 /* # bytes available */
339LCL(M_nalloc) = LCL(M_nfree)+4 /* # bytes allocated */
340LCL(M_num) = LCL(M_nalloc)+4 /* # allocations done on this page */
341LCL(M_size) = LCL(M_num)+4 /* size of page header */
342
343#define M_first LCL(M_first)
344#define M_ptr LCL(M_ptr)
345#define M_next LCL(M_next)
346#define M_nfree LCL(M_nfree)
347#define M_nalloc LCL(M_nalloc)
348#define M_num LCL(M_num)
349#define M_size LCL(M_size)
350
351/*
352 * Prof data type.
353 */
354
355LCL(P_addr) = 0 /* function address */
356LCL(P_count) = LCL(P_addr)+4 /* # times function called */
357LCL(P_overflow) = LCL(P_count)+4 /* # times count overflowed */
358LCL(P_size) = LCL(P_overflow)+4 /* size of prof data type */
359
360#define P_addr LCL(P_addr)
361#define P_count LCL(P_count)
362#define P_overflow LCL(P_overflow)
363#define P_size LCL(P_size)
364
365/*
366 * Gprof data type.
367 */
368
369LCL(G_next) = 0 /* next hash link (must be 0) */
370LCL(G_frompc) = LCL(G_next)+4 /* caller's caller */
371LCL(G_selfpc) = LCL(G_frompc)+4 /* caller's address */
372LCL(G_count) = LCL(G_selfpc)+4 /* # times arc traversed */
373LCL(G_overflow) = LCL(G_count)+4 /* # times count overflowed */
374LCL(G_size) = LCL(G_overflow)+4 /* size of gprof data type */
375
376#define G_next LCL(G_next)
377#define G_frompc LCL(G_frompc)
378#define G_selfpc LCL(G_selfpc)
379#define G_count LCL(G_count)
380#define G_overflow LCL(G_overflow)
381#define G_size LCL(G_size)
382
383/*
384 * Gprof header.
385 *
386 * At least one header is allocated for each unique function that is profiled.
387 * In order to save time calculating the hash value, the last H_maxcache
388 * distinct arcs are cached within this structure. Also, to avoid loading
389 * the GOT when searching the hash table, we copy the hash pointer to this
390 * structure, so that we only load the GOT when we need to allocate an arc.
391 */
392
393LCL(H_maxcache) = 3 /* # of cache table entries */
394LCL(H_csize) = 4*LCL(H_maxcache) /* size of each cache array */
395
396LCL(H_hash_ptr) = 0 /* hash table to use */
397LCL(H_unique_ptr) = LCL(H_hash_ptr)+4 /* function unique pointer */
398LCL(H_prof) = LCL(H_unique_ptr)+4 /* prof statistics */
399LCL(H_cache_ptr) = LCL(H_prof)+P_size /* cache table of element pointers */
400LCL(H_size) = LCL(H_cache_ptr)+LCL(H_csize) /* size of gprof header type */
401
402#define H_maxcache LCL(H_maxcache)
403#define H_csize LCL(H_csize)
404#define H_hash_ptr LCL(H_hash_ptr)
405#define H_unique_ptr LCL(H_unique_ptr)
406#define H_prof LCL(H_prof)
407#define H_cache_ptr LCL(H_cache_ptr)
408#define H_size LCL(H_size)
409
410/*
411 * Number of digits needed to write a 64 bit number including trailing null.
412 * (rounded up to be divisable by 4).
413 */
414
415#define N_digit 24
416
417\f
418 .data
419
420/*
421 * Default gprof hash table size, which must be a power of two.
422 * The shift specifies how many low order bits to eliminate when
423 * calculating the hash value.
424 */
425
426#ifndef GPROF_HASH_SIZE
427#define GPROF_HASH_SIZE 16384
428#endif
429
430#ifndef GPROF_HASH_SHIFT
431#define GPROF_HASH_SHIFT 9
432#endif
433
434#define GPROF_HASH_MASK (GPROF_HASH_SIZE-1)
435
436DATA(_profile_hash_size)
437 .long GPROF_HASH_SIZE
438ENDDATA(_profile_hash_size)
439
440\f
441
442/*
443 * Pointer that the compiler uses to call to the appropriate mcount function.
444 */
445
446DATA(_mcount_ptr)
447 .long EXT(_dummy_mcount)
448ENDDATA(_mcount_ptr)
449
450/*
451 * Global profile variables. The structure that accesses this in C is declared
452 * in profile-internal.h. All items in .data that follow this will be used as
453 * one giant record, and each unique machine, thread, kgmon output or what have
454 * you will create a separate instance. Typically there is only one instance
455 * which will be the memory laid out below.
456 */
457
458LCL(var_major_version) = 0 /* major version number */
459LCL(var_minor_version) = LCL(var_major_version)+4 /* minor version number */
460LCL(vars_size) = LCL(var_minor_version)+4 /* size of _profile_vars structure */
461LCL(plist_size) = LCL(vars_size)+4 /* size of page_list structure */
462LCL(acontext_size) = LCL(plist_size)+4 /* size of allocation contexts */
463LCL(callback_size) = LCL(acontext_size)+4 /* size of callback structure */
464LCL(type) = LCL(callback_size)+4 /* profile type (gprof, prof) */
465LCL(error_msg) = LCL(type)+4 /* error message for perror */
466LCL(filename) = LCL(error_msg)+4 /* filename to write to */
467LCL(str_ptr) = LCL(filename)+4 /* string table pointer */
468LCL(stream) = LCL(str_ptr)+4 /* stdio stream to write to */
469LCL(diag_stream) = LCL(stream)+4 /* stdio stream to write diagnostics to */
470LCL(fwrite_func) = LCL(diag_stream)+4 /* function like fwrite to output bytes */
471LCL(page_size) = LCL(fwrite_func)+4 /* page size in bytes */
472LCL(str_bytes) = LCL(page_size)+4 /* # bytes in string table */
473LCL(str_total) = LCL(str_bytes)+4 /* # total bytes allocated for string table */
474LCL(clock_ticks) = LCL(str_total)+4 /* # clock ticks per second */
475
476 /* profil variables */
477LCL(profil_start) = LCL(clock_ticks)+4 /* start of profil variables */
478LCL(lowpc) = LCL(clock_ticks)+4 /* lowest address */
479LCL(highpc) = LCL(lowpc)+4 /* highest address */
480LCL(text_len) = LCL(highpc)+4 /* highpc-lowpc */
481LCL(profil_len) = LCL(text_len)+4 /* size of profil buffer */
482LCL(counter_size) = LCL(profil_len)+4 /* size of indivual counter */
483LCL(scale) = LCL(counter_size)+4 /* scale factor */
484LCL(profil_unused) = LCL(scale)+4 /* unused fields */
485LCL(profil_end) = LCL(profil_unused)+4*8 /* end of profil_info structure */
486LCL(profil_buf) = LCL(profil_end) /* buffer for profil */
487
488 /* Output selection func ptrs */
489LCL(output_init) = LCL(profil_buf)+4 /* Initialization */
490LCL(output) = LCL(output_init)+4 /* Write out profiling info */
491LCL(output_ptr) = LCL(output)+4 /* Output specific data ptr */
492
493 /* Memory allocation support */
494LCL(acontext) = LCL(output_ptr)+4 /* pointers to allocation context blocks */
495
496LCL(bogus_func) = LCL(acontext)+4*C_max /* function to use if gprof arc is bad */
497LCL(vars_unused) = LCL(bogus_func)+4 /* future growth */
498
499 /* flags */
500LCL(init) = LCL(vars_unused)+4*63 /* whether initializations were done */
501LCL(active) = LCL(init)+1 /* whether profiling is active */
502LCL(do_profile) = LCL(active)+1 /* whether to do profiling */
503LCL(use_dci) = LCL(do_profile)+1 /* whether to use DCI */
504LCL(use_profil) = LCL(use_dci)+1 /* whether to use profil */
505LCL(recursive_alloc) = LCL(use_profil)+1 /* alloc called recursively */
506LCL(output_uarea) = LCL(recursive_alloc)+1 /* output uarea */
507LCL(output_stats) = LCL(output_uarea)+1 /* output stats info */
508LCL(output_clock) = LCL(output_stats)+1 /* output the clock ticks */
509LCL(multiple_sections) = LCL(output_clock)+1 /* multiple sections are ok */
510LCL(have_bb) = LCL(multiple_sections)+1 /* whether we have basic block data */
511LCL(init_format) = LCL(have_bb)+1 /* The output format has been chosen */
512LCL(debug) = LCL(init_format)+1 /* Whether or not we are debugging */
513LCL(check_funcs) = LCL(debug)+1 /* Whether to check functions for validity */
514LCL(flag_unused) = LCL(check_funcs)+1 /* unused flags */
515LCL(end_of_vars) = LCL(flag_unused)+62 /* size of machine independent vars */
516
517/*
518 * Data that contains profile statistics that can be dumped out
519 * into the {,g}mon.out file. This is defined in profile-md.h.
520 */
521
522LCL(stats_start) = LCL(end_of_vars) /* start of stats substructure */
523LCL(stats_major_version)= LCL(stats_start) /* major version number */
524LCL(stats_minor_version)= LCL(stats_major_version)+4 /* minor version number */
525LCL(stats_size) = LCL(stats_minor_version)+4 /* size of _profile_stats structure */
526LCL(profil_buckets) = LCL(stats_size)+4 /* # profil buckets */
527LCL(my_cpu) = LCL(profil_buckets)+4 /* identify which cpu/thread this is */
528LCL(max_cpu) = LCL(my_cpu)+4 /* identify which cpu/thread this is */
529LCL(prof_records) = LCL(max_cpu)+4 /* # of profiled functions */
530LCL(gprof_records) = LCL(prof_records)+4 /* # of gprof arcs created */
531LCL(hash_buckets) = LCL(gprof_records)+4 /* max gprof hash buckets on a chain */
532LCL(bogus_count) = LCL(hash_buckets)+4 /* # bogus functions found in gprof */
533
534LCL(cnt) = LCL(bogus_count)+4 /* # of _{prof,gprof}_mcount calls */
535LCL(dummy) = LCL(cnt)+8 /* # of _dummy_mcount calls */
536LCL(old_mcount) = LCL(dummy)+8 /* # of old mcount calls */
537LCL(hash_search) = LCL(old_mcount)+8 /* # gprof hash buckets searched */
538LCL(hash_num) = LCL(hash_search)+8 /* # times hash table searched */
539LCL(user_ticks) = LCL(hash_num)+8 /* # ticks within user space */
540LCL(kernel_ticks) = LCL(user_ticks)+8 /* # ticks within kernel space */
541LCL(idle_ticks) = LCL(kernel_ticks)+8 /* # ticks cpu was idle */
542LCL(overflow_ticks) = LCL(idle_ticks)+8 /* # ticks where histcounter overflowed */
543LCL(acontext_locked) = LCL(overflow_ticks)+8 /* # times an acontext was locked */
544LCL(too_low) = LCL(acontext_locked)+8 /* # times histogram tick too low */
545LCL(too_high) = LCL(too_low)+8 /* # times histogram tick too low */
546LCL(prof_overflow) = LCL(too_high)+8 /* # times the prof count field overflowed */
547LCL(gprof_overflow) = LCL(prof_overflow)+8 /* # times the gprof count field overflowed */
548LCL(num_alloc) = LCL(gprof_overflow)+8 /* # allocations in each context */
549LCL(bytes_alloc) = LCL(num_alloc)+4*C_max /* bytes allocated in each context */
550LCL(num_context) = LCL(bytes_alloc)+4*C_max /* # allocation context blocks */
551LCL(wasted) = LCL(num_context)+4*C_max /* # bytes wasted */
552LCL(overhead) = LCL(wasted)+4*C_max /* # bytes of overhead */
553LCL(buckets) = LCL(overhead)+4*C_max /* # hash indexes that have n buckets */
554LCL(cache_hits1) = LCL(buckets)+4*10 /* # gprof cache hits in bucket #1 */
555LCL(cache_hits2) = LCL(cache_hits1)+8 /* # gprof cache hits in bucket #2 */
556LCL(cache_hits3) = LCL(cache_hits2)+8 /* # gprof cache hits in bucket #3 */
557LCL(stats_unused) = LCL(cache_hits3)+8 /* reserved for future use */
558LCL(stats_end) = LCL(stats_unused)+8*64 /* end of stats structure */
559
560/*
561 * Machine dependent variables that no C file should access (except for
562 * profile-md.c).
563 */
564
565LCL(md_start) = LCL(stats_end) /* start of md structure */
566LCL(md_major_version) = LCL(md_start) /* major version number */
567LCL(md_minor_version) = LCL(md_major_version)+4 /* minor version number */
568LCL(md_size) = LCL(md_minor_version)+4 /* size of _profile_stats structure */
569LCL(hash_ptr) = LCL(md_size)+4 /* gprof hash pointer */
570LCL(hash_size) = LCL(hash_ptr)+4 /* gprof hash size */
571LCL(num_cache) = LCL(hash_size)+4 /* # of cache entries */
572LCL(save_mcount_ptr) = LCL(num_cache)+4 /* save for mcount_ptr when suspending profiling */
573LCL(mcount_ptr_ptr) = LCL(save_mcount_ptr)+4 /* pointer to _mcount_ptr */
574LCL(dummy_ptr) = LCL(mcount_ptr_ptr)+4 /* pointer to gprof_dummy */
575LCL(alloc_pages) = LCL(dummy_ptr)+4 /* allocate more memory */
576LCL(num_buffer) = LCL(alloc_pages)+4 /* buffer to convert 64 bit ints in */
577LCL(md_unused) = LCL(num_buffer)+N_digit /* unused fields */
578LCL(md_end) = LCL(md_unused)+4*58 /* end of md structure */
579LCL(total_size) = LCL(md_end) /* size of entire structure */
580
581/*
582 * Size of the entire _profile_vars structure.
583 */
584
585DATA(_profile_size)
586 .long LCL(total_size)
587ENDDATA(_profile_size)
588
589/*
590 * Size of the statistics substructure.
591 */
592
593DATA(_profile_stats_size)
594 .long LCL(stats_end)-LCL(stats_start)
595ENDDATA(_profile_stats_size)
596
597/*
598 * Size of the profil info substructure.
599 */
600
601DATA(_profile_profil_size)
602 .long LCL(profil_end)-LCL(profil_start)
603ENDDATA(_profile_profil_size)
604
605/*
606 * Size of the machine dependent substructure.
607 */
608
609DATA(_profile_md_size)
610 .long LCL(md_end)-LCL(md_start)
611ENDDATA(_profile_profil_size)
612
613/*
614 * Whether statistics are supported.
615 */
616
617DATA(_profile_do_stats)
618 .long DO_STATS
619ENDDATA(_profile_do_stats)
620
621 .text
622
623/*
624 * Map LCL(xxx) -> into simpler names
625 */
626
627#define V_acontext LCL(acontext)
628#define V_acontext_locked LCL(acontext_locked)
629#define V_alloc_pages LCL(alloc_pages)
630#define V_bogus_func LCL(bogus_func)
631#define V_bytes_alloc LCL(bytes_alloc)
632#define V_cache_hits1 LCL(cache_hits1)
633#define V_cache_hits2 LCL(cache_hits2)
634#define V_cache_hits3 LCL(cache_hits3)
635#define V_cnt LCL(cnt)
636#define V_cnt_overflow LCL(cnt_overflow)
637#define V_check_funcs LCL(check_funcs)
638#define V_dummy LCL(dummy)
639#define V_dummy_overflow LCL(dummy_overflow)
640#define V_dummy_ptr LCL(dummy_ptr)
641#define V_gprof_records LCL(gprof_records)
642#define V_hash_num LCL(hash_num)
643#define V_hash_ptr LCL(hash_ptr)
644#define V_hash_search LCL(hash_search)
645#define V_mcount_ptr_ptr LCL(mcount_ptr_ptr)
646#define V_num_alloc LCL(num_alloc)
647#define V_num_buffer LCL(num_buffer)
648#define V_num_context LCL(num_context)
649#define V_old_mcount LCL(old_mcount)
650#define V_old_mcount_overflow LCL(old_mcount_overflow)
651#define V_overhead LCL(overhead)
652#define V_page_size LCL(page_size)
653#define V_prof_records LCL(prof_records)
654#define V_recursive_alloc LCL(recursive_alloc)
655#define V_wasted LCL(wasted)
656
657/*
658 * Loadup %ebx with the address of _profile_vars. On a multiprocessor, this
659 * will loads up the appropriate machine's _profile_vars structure.
660 * For ELF shared libraries, rely on the fact that we won't need a GOT,
661 * except to load this pointer.
662 */
663
664#if defined (MACH_KERNEL) && NCPUS > 1
665#define ASSEMBLER
55e303ae 666#include <i386/mp.h>
1c79356b
A
667
668#if SQT
669#include <i386/SQT/asm_macros.h>
670#endif
671
672#ifndef CPU_NUMBER
673#error "Cannot determine how to get CPU number"
674#endif
675
676#define Vload CPU_NUMBER(%ebx); movl EXT(_profile_vars_cpus)(,%ebx,4),%ebx
677
678#else /* not kernel or not multiprocessor */
679#define Vload Gload; Egaddr(%ebx,_profile_vars)
680#endif
681
682\f
683/*
684 * Allocate some memory for profiling. This memory is guaranteed to
685 * be zero.
686 * %eax contains the memory size requested and will contain ptr on exit.
687 * %ebx contains the address of the appropriate profile_vars structure.
688 * %ecx is the number of the memory pool to allocate from (trashed on exit).
689 * %edx is trashed.
690 * %esi is preserved.
691 * %edi is preserved.
692 * %ebp is preserved.
693 */
694
695Entry(_profile_alloc_asm)
696 ENTER
697 pushl %esi
698 pushl %edi
699
700 movl %ecx,%edi /* move context number to saved reg */
701
702#if NO_RECURSIVE_ALLOC
703 movb $-1,%cl
704 xchgb %cl,V_recursive_alloc(%ebx)
705 cmpb $0,%cl
706 je LCL(no_recurse)
707
708 int $3
709
710 .align ALIGN
711LCL(no_recurse):
712#endif
713
714 leal V_acontext(%ebx,%edi,4),%ecx
715
716 /* Loop looking for a free allocation context. */
717 /* %eax = size, %ebx = vars addr, %ecx = ptr to allocation context to try */
718 /* %edi = context number */
719
720 .align ALIGN
721LCL(alloc_loop):
722 movl %ecx,%esi /* save ptr in case no more contexts */
723 movl A_next(%ecx),%ecx /* next context block */
724 cmpl $0,%ecx
725 je LCL(alloc_context) /* need to allocate a new context block */
726
727 movl $-1,%edx
728 xchgl %edx,A_lock(%ecx) /* %edx == 0 if context available */
729
730#if DO_STATS
731 SDADDNEG(%edx,V_acontext_locked(%ebx)) /* increment counter if lock was held */
732#endif
733
734 cmpl $0,%edx
735 jne LCL(alloc_loop) /* go back if this context block is not available */
736
737 /* Allocation context found (%ecx), now allocate. */
738 movl A_plist(%ecx),%edx /* pointer to current block */
739 cmpl $0,%edx /* first allocation? */
740 je LCL(alloc_new)
741
742 cmpl %eax,M_nfree(%edx) /* see if we have enough space */
743 jl LCL(alloc_new) /* jump if not enough space */
744
745 /* Allocate from local block (and common exit) */
746 /* %eax = bytes to allocate, %ebx = GOT, %ecx = context, %edx = memory block */
747 /* %edi = context number */
748
749 .align ALIGN
750LCL(alloc_ret):
751
752#if DO_STATS
753 SLOCK incl V_num_alloc(%ebx,%edi,4) /* update global counters */
754 SLOCK addl %eax,V_bytes_alloc(%ebx,%edi,4)
755 SLOCK subl %eax,V_wasted(%ebx,%edi,4)
756#endif
757
758 movl M_ptr(%edx),%esi /* pointer return value */
759 subl %eax,M_nfree(%edx) /* decrement bytes remaining */
760 addl %eax,M_nalloc(%edx) /* increment bytes allocated */
761 incl M_num(%edx) /* increment # allocations */
762 addl %eax,M_ptr(%edx) /* advance pointer */
763 movl $0,A_lock(%ecx) /* unlock context block */
764 movl %esi,%eax /* return pointer */
765
766#if NO_RECURSIVE_ALLOC
767 movb $0,V_recursive_alloc(%ebx)
768#endif
769
770 popl %edi
771 popl %esi
772 LEAVE0
773 ret /* return to the caller */
774
775 /* Allocate space in whole number of pages */
776 /* %eax = bytes to allocate, %ebx = vars address, %ecx = context */
777 /* %edi = context number */
778
779 .align ALIGN
780LCL(alloc_new):
781 pushl %eax /* save regs */
782 pushl %ecx
783 movl V_page_size(%ebx),%edx
784 addl $(M_size-1),%eax /* add in overhead size & subtract 1 */
785 decl %edx /* page_size - 1 */
786 addl %edx,%eax /* round up to whole number of pages */
787 notl %edx
788 andl %edx,%eax
789 leal -M_size(%eax),%esi /* save allocation size */
790 pushl %eax /* argument to _profile_alloc_pages */
791 call *V_alloc_pages(%ebx) /* allocate some memory */
792 addl $4,%esp /* pop off argument */
793
794#if DO_STATS
795 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* udpate global counters */
55e303ae 796 SLOCK addl $(M_size),V_overhead(%ebx,%edi,4)
1c79356b
A
797#endif
798
799 popl %ecx /* context block */
800 movl %eax,%edx /* memory block pointer */
801 movl %esi,M_nfree(%edx) /* # free bytes */
55e303ae 802 addl $(M_size),%eax /* bump past overhead */
1c79356b
A
803 movl A_plist(%ecx),%esi /* previous memory block or 0 */
804 movl %eax,M_first(%edx) /* first space available */
805 movl %eax,M_ptr(%edx) /* current address available */
806 movl %esi,M_next(%edx) /* next memory block allocated */
807 movl %edx,A_plist(%ecx) /* update current page list */
808 popl %eax /* user size request */
809 jmp LCL(alloc_ret) /* goto common return code */
810
811 /* Allocate a context header in addition to memory block header + data */
812 /* %eax = bytes to allocate, %ebx = GOT, %esi = ptr to store context ptr */
813 /* %edi = context number */
814
815 .align ALIGN
816LCL(alloc_context):
817 pushl %eax /* save regs */
818 pushl %esi
819 movl V_page_size(%ebx),%edx
820 addl $(A_size+M_size-1),%eax /* add in overhead size & subtract 1 */
821 decl %edx /* page_size - 1 */
822 addl %edx,%eax /* round up to whole number of pages */
823 notl %edx
824 andl %edx,%eax
825 leal -A_size-M_size(%eax),%esi /* save allocation size */
826 pushl %eax /* argument to _profile_alloc_pages */
827 call *V_alloc_pages(%ebx) /* allocate some memory */
828 addl $4,%esp /* pop off argument */
829
830#if DO_STATS
831 SLOCK incl V_num_context(%ebx,%edi,4) /* bump # context blocks */
832 SLOCK addl %esi,V_wasted(%ebx,%edi,4) /* update global counters */
833 SLOCK addl $(A_size+M_size),V_overhead(%ebx,%edi,4)
834#endif
835
836 movl %eax,%ecx /* context pointer */
837 leal A_size(%eax),%edx /* memory block pointer */
838 movl %esi,M_nfree(%edx) /* # free bytes */
839 addl $(A_size+M_size),%eax /* bump past overhead */
840 movl %eax,M_first(%edx) /* first space available */
841 movl %eax,M_ptr(%edx) /* current address available */
842 movl $0,M_next(%edx) /* next memory block allocated */
843 movl %edx,A_plist(%ecx) /* head of memory block list */
844 movl $1,A_lock(%ecx) /* set lock */
845 popl %esi /* ptr to store context block link */
846 movl %ecx,%eax /* context pointer temp */
847 xchgl %eax,A_next(%esi) /* link into chain */
848 movl %eax,A_next(%ecx) /* add links in case of threading */
849 popl %eax /* user size request */
850 jmp LCL(alloc_ret) /* goto common return code */
851
852END(_profile_alloc_asm)
853
854/*
855 * C callable version of the profile memory allocator.
856 * extern void *_profile_alloc(struct profile_vars *, size_t, acontext_type_t);
857*/
858
859Entry(_profile_alloc)
860 ENTER
861 pushl %ebx
862 movl 12+Estack(%esp),%eax /* memory size */
863 movl 8+Estack(%esp),%ebx /* provile_vars address */
864 addl $3,%eax /* round up to word boundary */
865 movl 16+Estack(%esp),%ecx /* which memory pool to allocate from */
866 andl $0xfffffffc,%eax
867 call EXT(_profile_alloc_asm)
868 popl %ebx
869 LEAVE0
870 ret
871END(_profile_alloc)
872
873\f
874/*
875 * Dummy mcount routine that just returns.
876 *
877 * +-------------------------------+
878 * | |
879 * | |
880 * | caller's caller stack, |
881 * | saved registers, params. |
882 * | |
883 * | |
884 * +-------------------------------+
885 * | caller's caller return addr. |
886 * +-------------------------------+
887 * esp --> | caller's return address |
888 * +-------------------------------+
889 *
890 * edx --> function unqiue LCL
891 */
892
893Entry(_dummy_mcount)
894 ENTER
895
896#if DO_STATS
897 pushl %ebx
898 MP_DISABLE_PREEMPTION(%ebx)
899 Vload
900 SDINC(V_dummy(%ebx))
901 MP_ENABLE_PREEMPTION(%ebx)
902 popl %ebx
903#endif
904
905 LEAVE0
906 ret
907END(_dummy_mcount)
908
909\f
910/*
911 * Entry point for System V based profiling, count how many times each function
912 * is called. The function label is passed in %edx, and the top two words on
913 * the stack are the caller's address, and the caller's return address.
914 *
915 * +-------------------------------+
916 * | |
917 * | |
918 * | caller's caller stack, |
919 * | saved registers, params. |
920 * | |
921 * | |
922 * +-------------------------------+
923 * | caller's caller return addr. |
924 * +-------------------------------+
925 * esp --> | caller's return address |
926 * +-------------------------------+
927 *
928 * edx --> function unique label
929 *
930 * We don't worry about the possibility about two threads calling
931 * the same function for the first time simulataneously. If that
932 * happens, two records will be created, and one of the records
933 * address will be stored in in the function unique label (which
934 * is aligned by the compiler, so we don't have to watch out for
935 * crossing page/cache boundaries).
936 */
937
938Entry(_prof_mcount)
939 ENTER
940
941#if DO_STATS
942 pushl %ebx
943 MP_DISABLE_PREEMPTION(%ebx)
944 Vload
945 SDINC(V_cnt(%ebx))
946#endif
947
948 movl (%edx),%eax /* initialized? */
949 cmpl $0,%eax
950 je LCL(pnew)
951
952 DINC2(P_count(%eax),P_overflow(%eax)) /* bump function count (double precision) */
953
954#if DO_STATS
955 MP_ENABLE_PREEMPTION(%ebx)
956 popl %ebx
957#endif
958
959 LEAVE0
960 ret
961
962 .align ALIGN
963LCL(pnew):
964
965#if !DO_STATS
966 pushl %ebx
967 MP_DISABLE_PREEMPTION(%ebx)
968 Vload
969#endif
970
971 SLOCK incl V_prof_records(%ebx)
972 pushl %edx
55e303ae
A
973 movl $(P_size),%eax /* allocation size */
974 movl $(C_prof),%ecx /* allocation pool */
1c79356b
A
975 call EXT(_profile_alloc_asm) /* allocate a new record */
976 popl %edx
977
978 movl Estack+4(%esp),%ecx /* caller's address */
979 movl %ecx,P_addr(%eax)
980 movl $1,P_count(%eax) /* call count */
981 xchgl %eax,(%edx) /* update function header */
982 MP_ENABLE_PREEMPTION(%ebx)
983 popl %ebx
984 LEAVE0
985 ret
986
987END(_prof_mcount)
988
989\f
990/*
991 * Entry point for BSD based graph profiling, count how many times each unique
992 * call graph (caller + callee) is called. The function label is passed in
993 * %edx, and the top two words on the stack are the caller's address, and the
994 * caller's return address.
995 *
996 * +-------------------------------+
997 * | |
998 * | |
999 * | caller's caller stack, |
1000 * | saved registers, params. |
1001 * | |
1002 * | |
1003 * +-------------------------------+
1004 * | caller's caller return addr. |
1005 * +-------------------------------+
1006 * esp --> | caller's return address |
1007 * +-------------------------------+
1008 *
1009 * edx --> function unqiue label
1010 *
1011 * We don't worry about the possibility about two threads calling the same
1012 * function simulataneously. If that happens, two records will be created, and
1013 * one of the records address will be stored in in the function unique label
1014 * (which is aligned by the compiler).
1015 *
1016 * By design, the gprof header is not locked. Each of the cache pointers is
1017 * always a valid pointer (possibily to a null record), and if another thread
1018 * comes in and modifies the pointer, it does so automatically with a simple store.
1019 * Since all arcs are in the hash table, the caches are just to avoid doing
1020 * a multiplication in the common case, and if they don't match, the arcs will
1021 * still be found.
1022 */
1023
1024Entry(_gprof_mcount)
1025
1026 ENTER
1027 movl Estack+4(%esp),%ecx /* caller's caller address */
1028
1029#if DO_STATS
1030 pushl %ebx
1031 MP_DISABLE_PREEMPTION(%ebx)
1032 Vload
1033 SDINC(V_cnt(%ebx)) /* bump profile call counter (double int) */
1034#endif
1035
1036 movl (%edx),%eax /* Gprof header allocated? */
1037 cmpl $0,%eax
1038 je LCL(gnew) /* skip if first call */
1039
1040 DINC2(H_prof+P_count(%eax),H_prof+P_overflow(%eax)) /* bump function count */
1041
1042 /* See if this call arc is the same as the last time */
1043MARK(_gprof_mcount_cache1)
1044 movl H_cache_ptr(%eax),%edx /* last arc searched */
1045 cmpl %ecx,G_frompc(%edx) /* skip if not equal */
1046 jne LCL(gcache2)
1047
1048 /* Same as last time, increment and return */
1049
1050 DINC2(G_count(%edx),G_overflow(%edx)) /* bump arc count */
1051
1052#if DO_STATS
1053 SDINC(V_cache_hits1(%ebx)) /* update counter */
1054 MP_ENABLE_PREEMPTION(%ebx)
1055 popl %ebx
1056#endif
1057
1058 LEAVE0
1059 ret
1060
1061 /* Search second cache entry */
1062 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1063 /* %edx = first arc searched */
1064 /* %ebx if DO_STATS pushed on stack */
1065
1066 .align ALIGN
1067MARK(_gprof_mcount_cache2)
1068LCL(gcache2):
1069 pushl %esi /* get a saved register */
1070 movl H_cache_ptr+4(%eax),%esi /* 2nd arc to be searched */
1071 cmpl %ecx,G_frompc(%esi) /* skip if not equal */
1072 jne LCL(gcache3)
1073
1074 /* Element found, increment, reset last arc searched and return */
1075
1076 DINC2(G_count(%esi),G_overflow(%esi)) /* bump arc count */
1077
1078 movl %esi,H_cache_ptr+0(%eax) /* swap 1st and 2nd cached arcs */
1079 popl %esi
1080 movl %edx,H_cache_ptr+4(%eax)
1081
1082#if DO_STATS
1083 SDINC(V_cache_hits2(%ebx)) /* update counter */
1084 MP_ENABLE_PREEMPTION(%ebx)
1085 popl %ebx
1086#endif
1087
1088 LEAVE0
1089 ret
1090
1091 /* Search third cache entry */
1092 /* %eax = gprof func header, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1093 /* %edx = first arc searched, %esi = second arc searched */
1094 /* %esi, %ebx if DO_STATS pushed on stack */
1095
1096 .align ALIGN
1097MARK(_gprof_mcount_cache3)
1098LCL(gcache3):
1099 pushl %edi
1100 movl H_cache_ptr+8(%eax),%edi /* 3rd arc to be searched */
1101 cmpl %ecx,G_frompc(%edi) /* skip if not equal */
1102 jne LCL(gnocache)
1103
1104 /* Element found, increment, reset last arc searched and return */
1105
1106 DINC2(G_count(%edi),G_overflow(%edi)) /* bump arc count */
1107
1108 movl %edi,H_cache_ptr+0(%eax) /* make this 1st cached arc */
1109 movl %esi,H_cache_ptr+8(%eax)
1110 movl %edx,H_cache_ptr+4(%eax)
1111 popl %edi
1112 popl %esi
1113
1114#if DO_STATS
1115 SDINC(V_cache_hits3(%ebx)) /* update counter */
1116 MP_ENABLE_PREEMPTION(%ebx)
1117 popl %ebx
1118#endif
1119
1120 LEAVE0
1121 ret
1122
1123 /* No function context, allocate a new context */
1124 /* %ebx is the variables address if DO_STATS */
1125 /* %ecx is the caller's caller's address */
1126 /* %edx is the unique function pointer */
1127 /* %ebx if DO_STATS pushed on stack */
1128
1129 .align ALIGN
1130MARK(_gprof_mcount_new)
1131LCL(gnew):
1132 pushl %esi
1133 pushl %edi
1134
1135#if !DO_STATS
1136 pushl %ebx /* Address of vars needed for alloc */
1137 MP_DISABLE_PREEMPTION(%ebx)
1138 Vload /* stats already loaded address */
1139#endif
1140
1141 SLOCK incl V_prof_records(%ebx)
1142 movl %edx,%esi /* save unique function ptr */
1143 movl %ecx,%edi /* and caller's caller address */
55e303ae
A
1144 movl $(H_size),%eax /* memory block size */
1145 movl $(C_gfunc),%ecx /* gprof function header memory pool */
1c79356b
A
1146 call EXT(_profile_alloc_asm)
1147
1148 movl V_hash_ptr(%ebx),%ecx /* copy hash_ptr to func header */
1149 movl V_dummy_ptr(%ebx),%edx /* dummy cache entry */
1150 movl %ecx,H_hash_ptr(%eax)
1151 movl %edx,H_cache_ptr+0(%eax) /* store dummy cache ptrs */
1152 movl %edx,H_cache_ptr+4(%eax)
1153 movl %edx,H_cache_ptr+8(%eax)
1154 movl %esi,H_unique_ptr(%eax) /* remember function unique ptr */
1155 movl Estack+12(%esp),%ecx /* caller's address */
1156 movl $1,H_prof+P_count(%eax) /* function called once so far */
1157 movl %ecx,H_prof+P_addr(%eax) /* set up prof information */
1158 movl %eax,(%esi) /* update context block address */
1159 movl %edi,%ecx /* caller's caller address */
1160 movl %edx,%esi /* 2nd cached arc */
1161
1162#if !DO_STATS
1163 popl %ebx
1164#endif
1165
1166 /* Fall through to add element to the hash table. This may involve */
1167 /* searching a few hash table elements that don't need to be searched */
1168 /* since we have a new element, but it allows the hash table function */
1169 /* to be specified in only one place */
1170
1171 /* Didn't find entry in cache, search the global hash table */
1172 /* %eax = gprof func header, %ebx = vars address if DO_STATS */
1173 /* %ecx = caller's caller */
1174 /* %edx, %esi = cached arcs that were searched */
1175 /* %edi, %esi, %ebx if DO_STATS pushed on stack */
1176
1177 .align ALIGN
1178MARK(_gprof_mcount_hash)
1179LCL(gnocache):
1180
1181 pushl %esi /* save 2nd arc searched */
1182 pushl %edx /* save 1st arc searched */
1183 movl %eax,%esi /* save gprof func header */
1184
1185#if DO_STATS
1186 SDINC(V_hash_num(%ebx))
1187 movl Estack+20(%esp),%edi /* caller's address */
1188#else
1189 movl Estack+16(%esp),%edi /* caller's address */
1190#endif
1191 movl %ecx,%eax /* caller's caller address */
1192 imull %edi,%eax /* multiply to get hash */
1193 movl H_hash_ptr(%esi),%edx /* hash pointer */
55e303ae
A
1194 shrl $(GPROF_HASH_SHIFT),%eax /* eliminate low order bits */
1195 andl $(GPROF_HASH_MASK),%eax /* mask to get hash value */
1c79356b
A
1196 leal 0(%edx,%eax,4),%eax /* pointer to hash bucket */
1197 movl %eax,%edx /* save hash bucket address */
1198
1199 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1200 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1201 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1202
1203 .align ALIGN
1204LCL(ghash):
1205 movl G_next(%eax),%eax /* get next hash element */
1206 cmpl $0,%eax /* end of line? */
1207 je LCL(ghashnew) /* skip if allocate new hash */
1208
1209#if DO_STATS
1210 SDINC(V_hash_search(%ebx))
1211#endif
1212
1213 cmpl G_selfpc(%eax),%edi /* loop back if not one we want */
1214 jne LCL(ghash)
1215
1216 cmpl G_frompc(%eax),%ecx /* loop back if not one we want */
1217 jne LCL(ghash)
1218
1219 /* Found an entry, increment count, set up for caching, and return */
1220 /* %eax = arc, %ebx = vars address if DO_STATS, %esi = func header */
1221 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1222
1223 DINC2(G_count(%eax),G_overflow(%eax)) /* bump arc count */
1224
1225 popl %ecx /* previous 1st arc searched */
1226 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1227 popl %edi /* previous 2nd arc searched */
1228 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1229 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1230 popl %edi
1231 popl %esi
1232
1233#if DO_STATS
1234 MP_ENABLE_PREEMPTION(%ebx)
1235 popl %ebx
1236#endif
1237
1238 LEAVE0
1239 ret /* return to user */
1240
1241 /* Allocate new arc */
1242 /* %eax = old arc, %ebx = vars address if DO_STATS, %ecx = caller's caller */
1243 /* %edx = hash bucket address, %esi = gfunc ptr, %edi = caller's addr */
1244 /* 2 old arcs, %edi, %esi, %ebx if DO_STATS pushed on stack */
1245
1246 .align ALIGN
1247MARK(_gprof_mcount_hashnew)
1248LCL(ghashnew):
1249
1250#if !DO_STATS
1251 pushl %ebx /* load address of vars if we haven't */
1252 MP_DISABLE_PREEMPTION(%ebx)
1253 Vload /* already done so */
1254#endif
1255
1256 SLOCK incl V_gprof_records(%ebx)
1257 pushl %edx
1258 movl %ecx,%edi /* save caller's caller */
55e303ae
A
1259 movl $(G_size),%eax /* arc size */
1260 movl $(C_gprof),%ecx /* gprof memory pool */
1c79356b
A
1261 call EXT(_profile_alloc_asm)
1262 popl %edx
1263
1264 movl $1,G_count(%eax) /* set call count */
1265 movl Estack+20(%esp),%ecx /* caller's address */
1266 movl %edi,G_frompc(%eax) /* caller's caller */
1267 movl %ecx,G_selfpc(%eax)
1268
1269#if !DO_STATS
1270 popl %ebx /* release %ebx if no stats */
1271#endif
1272
1273 movl (%edx),%ecx /* first hash bucket */
1274 movl %ecx,G_next(%eax) /* update link */
1275 movl %eax,%ecx /* copy for xchgl */
1276 xchgl %ecx,(%edx) /* add to hash linked list */
1277 movl %ecx,G_next(%eax) /* update in case list changed */
1278
1279 popl %ecx /* previous 1st arc searched */
1280 popl %edi /* previous 2nd arc searched */
1281 movl %eax,H_cache_ptr+0(%esi) /* this element is now 1st arc */
1282 movl %ecx,H_cache_ptr+4(%esi) /* new 2nd arc to be searched */
1283 movl %edi,H_cache_ptr+8(%esi) /* new 3rd arc to be searched */
1284
1285 popl %edi
1286 popl %esi
1287
1288#if DO_STATS
1289 MP_ENABLE_PREEMPTION(%ebx)
1290 popl %ebx
1291#endif
1292
1293 LEAVE0
1294 ret /* return to user */
1295
1296END(_gprof_mcount)
1297
1298\f
1299/*
1300 * This function assumes that neither the caller or it's caller
1301 * has not omitted the frame pointer in order to get the caller's
1302 * caller. The stack looks like the following at the time of the call:
1303 *
1304 * +-------------------------------+
1305 * | |
1306 * | |
1307 * | caller's caller stack, |
1308 * | saved registers, params. |
1309 * | |
1310 * | |
1311 * +-------------------------------+
1312 * | caller's caller return addr. |
1313 * +-------------------------------+
1314 * fp --> | previous frame pointer |
1315 * +-------------------------------+
1316 * | |
1317 * | caller's stack, saved regs, |
1318 * | params. |
1319 * | |
1320 * +-------------------------------+
1321 * sp --> | caller's return address |
1322 * +-------------------------------+
1323 *
1324 * Recent versions of the compiler put the address of the pointer
1325 * sized word in %edx. Previous versions did not, but this code
1326 * does not support them.
1327 */
1328
1329/*
1330 * Note that OSF/rose blew defining _mcount, since it prepends leading
1331 * underscores, and _mcount didn't have a second leading underscore. However,
1332 * some of the kernel/server functions 'know' that mcount has a leading
1333 * underscore, so we satisfy both camps.
1334 */
1335
1336#if OLD_MCOUNT
1337 .globl mcount
1338 .globl _mcount
1339 ELF_FUNC(mcount)
1340 ELF_FUNC(_mcount)
1341 .align FALIGN
1342_mcount:
1343mcount:
1344
1345 pushl %ebx
1346 MP_DISABLE_PREEMPTION(%ebx)
1347 Vload
1348
1349#if DO_STATS
1350 SDINC(V_old_mcount(%ebx))
1351#endif
1352
1353 /* In calling the functions, we will actually leave 1 extra word on the */
1354 /* top of the stack, but generated code will not notice, since the function */
1355 /* uses a frame pointer */
1356
1357 movl V_mcount_ptr_ptr(%ebx),%ecx /* address of mcount_ptr */
1358 MP_ENABLE_PREEMPTION(%ebx)
1359 popl %ebx
1360 movl 4(%ebp),%eax /* caller's caller return address */
1361 xchgl %eax,(%esp) /* push & get return address */
1362 pushl %eax /* push return address */
1363 jmp *(%ecx) /* go to profile the function */
1364
1365End(mcount)
1366End(_mcount)
1367#endif
1368
1369\f
1370#if !defined(KERNEL) && !defined(MACH_KERNEL)
1371
1372/*
1373 * Convert a 64-bit integer to a string.
1374 * Arg #1 is a pointer to a string (at least 24 bytes) or NULL
1375 * Arg #2 is the low part of the 64-bit integer.
1376 * Arg #3 is the high part of the 64-bit integer.
1377 */
1378
1379Entry(_profile_cnt_to_decimal)
1380 ENTER
1381 pushl %ebx
1382 pushl %esi
1383 pushl %edi
1384 movl Estack+16(%esp),%ebx /* pointer or null */
1385 movl Estack+20(%esp),%edi /* low part of number */
1386 movl $10,%ecx /* divisor */
1387 cmpl $0,%ebx /* skip if pointer ok */
1388 jne LCL(cvt_nonnull)
1389
1390 MP_DISABLE_PREEMPTION(%ebx)
1391 Vload /* get _profile_vars address */
1392 leal V_num_buffer(%ebx),%ebx /* temp buffer to use */
1393
1394 .align ALIGN
1395LCL(cvt_nonnull):
1396 addl $(N_digit-1),%ebx /* point string at end */
1397 movb $0,0(%ebx) /* null terminate string */
1398
1399#if OVERFLOW
1400 movl Estack+24(%esp),%esi /* high part of number */
1401 cmpl $0,%esi /* any thing left in high part? */
1402 je LCL(cvt_low)
1403
1404 .align ALIGN
1405LCL(cvt_high):
1406 movl %esi,%eax /* calculate high/10 & high%10 */
1407 xorl %edx,%edx
1408 divl %ecx
1409 movl %eax,%esi
1410
1411 movl %edi,%eax /* calculate (low + (high%10)*2^32) / 10 */
1412 divl %ecx
1413 movl %eax,%edi
1414
1415 decl %ebx /* decrement string pointer */
1416 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1417 movb %dl,0(%ebx) /* store digit in string */
1418 cmpl $0,%esi /* any thing left in high part? */
1419 jne LCL(cvt_high)
1420
1421#endif /* OVERFLOW */
1422
1423 .align ALIGN
1424LCL(cvt_low):
1425 movl %edi,%eax /* get low part into %eax */
1426
1427 .align ALIGN
1428LCL(cvt_low2):
1429 xorl %edx,%edx /* 0 */
1430 divl %ecx /* calculate next digit */
1431 decl %ebx /* decrement string pointer */
1432 addl $48,%edx /* convert from 0..9 -> '0'..'9' */
1433 movb %dl,0(%ebx) /* store digit in string */
1434 cmpl $0,%eax /* any more digits to convert? */
1435 jne LCL(cvt_low2)
1436
1437 movl %ebx,%eax /* return value */
1438 popl %edi
1439 popl %esi
1440 MP_ENABLE_PREEMPTION(%ebx)
1441 popl %ebx
1442 LEAVE0
1443 ret
1444
1445END(_profile_cnt_to_decimal)
1446
1447#endif