1 | /* |
---|
2 | (C) Copyright IBM Corp. 2008 |
---|
3 | |
---|
4 | All rights reserved. |
---|
5 | |
---|
6 | Redistribution and use in source and binary forms, with or without |
---|
7 | modification, are permitted provided that the following conditions are met: |
---|
8 | |
---|
9 | * Redistributions of source code must retain the above copyright notice, |
---|
10 | this list of conditions and the following disclaimer. |
---|
11 | * Redistributions in binary form must reproduce the above copyright |
---|
12 | notice, this list of conditions and the following disclaimer in the |
---|
13 | documentation and/or other materials provided with the distribution. |
---|
14 | * Neither the name of IBM nor the names of its contributors may be |
---|
15 | used to endorse or promote products derived from this software without |
---|
16 | specific prior written permission. |
---|
17 | |
---|
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
---|
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
---|
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
---|
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
---|
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
---|
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
---|
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
---|
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
---|
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
---|
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
---|
28 | POSSIBILITY OF SUCH DAMAGE. |
---|
29 | |
---|
30 | Author: Ken Werner <ken.werner@de.ibm.com> |
---|
31 | */ |
---|
32 | |
---|
33 | #include <stdio.h> |
---|
34 | #include <stdlib.h> |
---|
35 | #include <string.h> |
---|
36 | #include <unistd.h> |
---|
37 | #include <sys/types.h> |
---|
38 | #include <sys/stat.h> |
---|
39 | #include <sys/uio.h> |
---|
40 | #include <fcntl.h> |
---|
41 | #include <ea.h> |
---|
42 | #include <spu_intrinsics.h> |
---|
43 | #include <spu_mfcio.h> |
---|
44 | #include <spu_timer.h> |
---|
45 | #include <limits.h> |
---|
46 | #include <sys/linux_syscalls.h> |
---|
47 | |
---|
48 | /* Magic cookie. */ |
---|
49 | #define GMON_MAGIC_COOKIE "gmon" |
---|
50 | |
---|
51 | /* Version number. */ |
---|
52 | #define GMON_VERSION 1 |
---|
53 | |
---|
54 | /* Fraction of text space to allocate for histogram counters. */ |
---|
55 | #define HISTFRACTION 4 |
---|
56 | |
---|
57 | /* Histogram counter type. */ |
---|
58 | #define HISTCOUNTER unsigned short |
---|
59 | |
---|
60 | /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is |
---|
61 | based on the minimum number of bytes of separation between two subroutine |
---|
62 | call points in the object code. */ |
---|
63 | #define HASHFRACTION 4 |
---|
64 | |
---|
65 | /* Percent of text space to allocate for tostructs with a minimum. */ |
---|
66 | #define ARCDENSITY 3 |
---|
67 | |
---|
68 | /* Minimal amount of arcs. */ |
---|
69 | #define MINARCS 50 |
---|
70 | |
---|
71 | /* Rounding macros. */ |
---|
72 | #define ROUNDDOWN(x,y) (((x)/(y))*(y)) |
---|
73 | #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y)) |
---|
74 | |
---|
75 | /* Sampling rate in Hertz. */ |
---|
76 | #define SAMPLE_INTERVAL 100 |
---|
77 | |
---|
78 | /* Tag definitions for the gmon.out sub headers. */ |
---|
79 | #define GMON_TAG_TIME_HIST 0 |
---|
80 | #define GMON_TAG_CG_ARC 1 |
---|
81 | |
---|
82 | struct tostruct |
---|
83 | { |
---|
84 | uintptr_t selfpc; |
---|
85 | long count; |
---|
86 | unsigned short link; |
---|
87 | }; |
---|
88 | |
---|
89 | struct gmon_hdr |
---|
90 | { |
---|
91 | char cookie[4]; |
---|
92 | int32_t version; |
---|
93 | char spare[3 * 4]; |
---|
94 | }; |
---|
95 | |
---|
96 | struct gmon_hist_hdr |
---|
97 | { |
---|
98 | uintptr_t low_pc; |
---|
99 | uintptr_t high_pc; |
---|
100 | int32_t hist_size; |
---|
101 | int32_t prof_rate; |
---|
102 | char dimen[15]; |
---|
103 | char dimen_abbrev; |
---|
104 | } __attribute__ ((packed)); |
---|
105 | |
---|
106 | struct rawarc |
---|
107 | { |
---|
108 | uintptr_t raw_frompc; |
---|
109 | uintptr_t raw_selfpc; |
---|
110 | long raw_count; |
---|
111 | } __attribute__ ((packed)); |
---|
112 | |
---|
113 | /* start and end of the text section */ |
---|
114 | extern char _start; |
---|
115 | extern char _etext; |
---|
116 | |
---|
117 | /* EAR entry for the starting address of SPE executable image. */ |
---|
118 | extern const unsigned long long _EAR_; |
---|
119 | asm (".section .toe,\"a\",@nobits\n\r" |
---|
120 | ".align 4\n\r" |
---|
121 | ".type _EAR_, @object\n\r" |
---|
122 | ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous"); |
---|
123 | |
---|
124 | /* froms are indexing tos */ |
---|
125 | static __ea unsigned short *froms; |
---|
126 | static __ea struct tostruct *tos = 0; |
---|
127 | static long tolimit = 0; |
---|
128 | static uintptr_t s_lowpc = 0; |
---|
129 | static uintptr_t s_highpc = 0; |
---|
130 | static unsigned long s_textsize = 0; |
---|
131 | |
---|
132 | static int fd; |
---|
133 | static int hist_size; |
---|
134 | static int timer_id; |
---|
135 | |
---|
136 | void |
---|
137 | __sample (int id) |
---|
138 | { |
---|
139 | unsigned int pc; |
---|
140 | unsigned int pc_backup; |
---|
141 | off_t offset; |
---|
142 | unsigned short val; |
---|
143 | |
---|
144 | if (id != timer_id) |
---|
145 | return; |
---|
146 | |
---|
147 | /* Fetch program counter. */ |
---|
148 | pc = spu_read_srr0 () & ~3; |
---|
149 | pc_backup = pc; |
---|
150 | if (pc < s_lowpc || pc > s_highpc) |
---|
151 | return; |
---|
152 | pc -= (uintptr_t) & _start; |
---|
153 | offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr) |
---|
154 | + 1 + sizeof (struct gmon_hist_hdr); |
---|
155 | |
---|
156 | /* Read, increment and write the counter. */ |
---|
157 | if (pread (fd, &val, 2, offset) != 2) |
---|
158 | { |
---|
159 | perror ("can't read the histogram"); |
---|
160 | return; |
---|
161 | } |
---|
162 | if (val < USHRT_MAX) |
---|
163 | ++val; |
---|
164 | if (pwrite (fd, &val, 2, offset) != 2) |
---|
165 | { |
---|
166 | perror ("can't write the histogram"); |
---|
167 | } |
---|
168 | } |
---|
169 | |
---|
170 | static void |
---|
171 | write_histogram (int fd) |
---|
172 | { |
---|
173 | struct gmon_hist_hdr hist_hdr; |
---|
174 | u_char tag = GMON_TAG_TIME_HIST; |
---|
175 | hist_hdr.low_pc = s_lowpc; |
---|
176 | hist_hdr.high_pc = s_highpc; |
---|
177 | hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */ |
---|
178 | hist_hdr.prof_rate = 100; /* Hertz. */ |
---|
179 | strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen)); |
---|
180 | hist_hdr.dimen_abbrev = 's'; |
---|
181 | struct iovec iov[2] = { |
---|
182 | {&tag, sizeof (tag)}, |
---|
183 | {&hist_hdr, sizeof (struct gmon_hist_hdr)} |
---|
184 | }; |
---|
185 | if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag)) |
---|
186 | perror ("can't write the histogram header"); |
---|
187 | |
---|
188 | /* Skip the already written histogram data. */ |
---|
189 | lseek (fd, hist_size, SEEK_CUR); |
---|
190 | } |
---|
191 | |
---|
192 | static void |
---|
193 | write_callgraph (int fd) |
---|
194 | { |
---|
195 | int fromindex, endfrom; |
---|
196 | uintptr_t frompc; |
---|
197 | int toindex; |
---|
198 | struct rawarc rawarc; |
---|
199 | u_char tag = GMON_TAG_CG_ARC; |
---|
200 | endfrom = s_textsize / (HASHFRACTION * sizeof (*froms)); |
---|
201 | for (fromindex = 0; fromindex < endfrom; ++fromindex) |
---|
202 | { |
---|
203 | if (froms[fromindex]) |
---|
204 | { |
---|
205 | frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms)); |
---|
206 | for (toindex = froms[fromindex]; toindex != 0; |
---|
207 | toindex = tos[toindex].link) |
---|
208 | { |
---|
209 | rawarc.raw_frompc = frompc; |
---|
210 | rawarc.raw_selfpc = tos[toindex].selfpc; |
---|
211 | rawarc.raw_count = tos[toindex].count; |
---|
212 | struct iovec iov[2] = { |
---|
213 | {&tag, sizeof (tag)}, |
---|
214 | {&rawarc, sizeof (struct rawarc)} |
---|
215 | }; |
---|
216 | if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc)) |
---|
217 | perror ("can't write the callgraph"); |
---|
218 | } |
---|
219 | } |
---|
220 | } |
---|
221 | } |
---|
222 | |
---|
223 | void |
---|
224 | __mcleanup (void) |
---|
225 | { |
---|
226 | struct gmon_hdr ghdr; |
---|
227 | |
---|
228 | /* Disable sampling. */ |
---|
229 | spu_timer_stop (timer_id); |
---|
230 | spu_timer_free (timer_id); |
---|
231 | spu_clock_stop (); |
---|
232 | |
---|
233 | /* Jump to the beginning of the gmon.out file. */ |
---|
234 | if (lseek (fd, 0, SEEK_SET) == -1) |
---|
235 | { |
---|
236 | perror ("Cannot seek to the beginning of the gmon.out file."); |
---|
237 | close (fd); |
---|
238 | return; |
---|
239 | } |
---|
240 | |
---|
241 | /* Write the gmon.out header. */ |
---|
242 | memset (&ghdr, '\0', sizeof (struct gmon_hdr)); |
---|
243 | memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie)); |
---|
244 | ghdr.version = GMON_VERSION; |
---|
245 | if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1) |
---|
246 | { |
---|
247 | perror ("Cannot write the gmon header to the gmon.out file."); |
---|
248 | close (fd); |
---|
249 | return; |
---|
250 | } |
---|
251 | |
---|
252 | /* Write the sampling buffer (histogram). */ |
---|
253 | write_histogram (fd); |
---|
254 | |
---|
255 | /* Write the call graph. */ |
---|
256 | write_callgraph (fd); |
---|
257 | |
---|
258 | close (fd); |
---|
259 | } |
---|
260 | |
---|
261 | void |
---|
262 | __monstartup (unsigned long long spu_id) |
---|
263 | { |
---|
264 | char filename[64]; |
---|
265 | s_lowpc = |
---|
266 | ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER)); |
---|
267 | s_highpc = |
---|
268 | ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER)); |
---|
269 | s_textsize = s_highpc - s_lowpc; |
---|
270 | |
---|
271 | hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER); |
---|
272 | |
---|
273 | /* Allocate froms. */ |
---|
274 | froms = malloc_ea (s_textsize / HASHFRACTION); |
---|
275 | if (froms == NULL) |
---|
276 | { |
---|
277 | fprintf (stderr, "Cannot allocate ea memory for the froms array.\n"); |
---|
278 | return; |
---|
279 | } |
---|
280 | memset_ea (froms, 0, s_textsize / HASHFRACTION); |
---|
281 | |
---|
282 | /* Determine tolimit. */ |
---|
283 | tolimit = s_textsize * ARCDENSITY / 100; |
---|
284 | if (tolimit < MINARCS) |
---|
285 | tolimit = MINARCS; |
---|
286 | |
---|
287 | /* Allocate tos. */ |
---|
288 | tos = malloc_ea (tolimit * sizeof (struct tostruct)); |
---|
289 | if (tos == NULL) |
---|
290 | { |
---|
291 | fprintf (stderr, "Cannot allocate ea memory for the tos array.\n"); |
---|
292 | return; |
---|
293 | } |
---|
294 | memset_ea (tos, 0, tolimit * sizeof (struct tostruct)); |
---|
295 | |
---|
296 | /* Determine the gmon.out file name. */ |
---|
297 | if (spu_id) |
---|
298 | snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out", |
---|
299 | linux_getpid (), spu_id, _EAR_); |
---|
300 | else |
---|
301 | strncpy (filename, "gmon.out", sizeof (filename)); |
---|
302 | /* Open the gmon.out file. */ |
---|
303 | fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644); |
---|
304 | if (fd == -1) |
---|
305 | { |
---|
306 | char errstr[128]; |
---|
307 | snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename); |
---|
308 | perror (errstr); |
---|
309 | return; |
---|
310 | } |
---|
311 | /* Truncate the file up to the size where the histogram fits in. */ |
---|
312 | if (ftruncate (fd, |
---|
313 | sizeof (struct gmon_hdr) + 1 + |
---|
314 | sizeof (struct gmon_hist_hdr) + hist_size) == -1) |
---|
315 | { |
---|
316 | char errstr[128]; |
---|
317 | snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename); |
---|
318 | perror (errstr); |
---|
319 | return; |
---|
320 | } |
---|
321 | |
---|
322 | /* Start the histogram sampler. */ |
---|
323 | spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih); |
---|
324 | timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample); |
---|
325 | spu_clock_start (); |
---|
326 | spu_timer_start (timer_id); |
---|
327 | |
---|
328 | atexit (__mcleanup); |
---|
329 | } |
---|
330 | |
---|
331 | void |
---|
332 | __mcount_internal (uintptr_t frompc, uintptr_t selfpc) |
---|
333 | { |
---|
334 | /* sefpc: the address of the function just entered. */ |
---|
335 | /* frompc: the caller of the function just entered. */ |
---|
336 | unsigned int mach_stat; |
---|
337 | __ea unsigned short *frompcindex; |
---|
338 | unsigned short toindex; |
---|
339 | __ea struct tostruct *top; |
---|
340 | __ea struct tostruct *prevtop; |
---|
341 | |
---|
342 | /* Save current state and disable interrupts. */ |
---|
343 | mach_stat = spu_readch(SPU_RdMachStat); |
---|
344 | spu_idisable (); |
---|
345 | |
---|
346 | /* Sanity checks. */ |
---|
347 | if (frompc < s_lowpc || frompc > s_highpc) |
---|
348 | goto done; |
---|
349 | frompc -= s_lowpc; |
---|
350 | if (frompc > s_textsize) |
---|
351 | goto done; |
---|
352 | |
---|
353 | /* frompc indexes into the froms array the value at that position indexes |
---|
354 | into the tos array. */ |
---|
355 | frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))]; |
---|
356 | toindex = *frompcindex; |
---|
357 | if (toindex == 0) |
---|
358 | { |
---|
359 | /* First time traversing this arc link of tos[0] incremented. */ |
---|
360 | toindex = ++tos[0].link; |
---|
361 | /* Sanity check. */ |
---|
362 | if (toindex >= tolimit) |
---|
363 | { |
---|
364 | --tos[0].link; |
---|
365 | goto done; |
---|
366 | } |
---|
367 | /* Save the index into the froms array for the next time we traverse this arc. */ |
---|
368 | *frompcindex = toindex; |
---|
369 | top = &tos[toindex]; |
---|
370 | /* Sets the address of the function just entered. */ |
---|
371 | top->selfpc = selfpc; |
---|
372 | top->count = 1; |
---|
373 | top->link = 0; |
---|
374 | goto done; |
---|
375 | } |
---|
376 | |
---|
377 | /* toindex points to a tostruct */ |
---|
378 | top = &tos[toindex]; |
---|
379 | if (top->selfpc == selfpc) |
---|
380 | { |
---|
381 | /* The arc is at front of the chain. This is the most common case. */ |
---|
382 | top->count++; |
---|
383 | goto done; |
---|
384 | } |
---|
385 | |
---|
386 | /* top->selfpc != selfpc |
---|
387 | The pc we have got is not the pc we already stored (i.e. multiple function |
---|
388 | calls to the same fuction within a function. The arc is not at front of |
---|
389 | the chain. */ |
---|
390 | for (;;) |
---|
391 | { |
---|
392 | if (top->link == 0) |
---|
393 | { |
---|
394 | /* We are at the end of the chain and selfpc was not found. Thus we create |
---|
395 | a new tostruct and link it to the head of the chain. */ |
---|
396 | toindex = ++tos[0].link; |
---|
397 | /* Sanity check. */ |
---|
398 | if (toindex >= tolimit) |
---|
399 | { |
---|
400 | --tos[0].link; |
---|
401 | goto done; |
---|
402 | } |
---|
403 | top = &tos[toindex]; |
---|
404 | top->selfpc = selfpc; |
---|
405 | top->count = 1; |
---|
406 | /* Link back to the old tos entry. */ |
---|
407 | top->link = *frompcindex; |
---|
408 | /* Store a link to the new top in the froms array which makes the |
---|
409 | current tos head of the chain. */ |
---|
410 | *frompcindex = toindex; |
---|
411 | goto done; |
---|
412 | } |
---|
413 | else |
---|
414 | { |
---|
415 | /* Otherwise check the next arc on the chain. */ |
---|
416 | prevtop = top; |
---|
417 | top = &tos[top->link]; |
---|
418 | if (top->selfpc == selfpc) |
---|
419 | { |
---|
420 | /* selfpc matches; increment its count. */ |
---|
421 | top->count++; |
---|
422 | /* Move it to the head of the chain. */ |
---|
423 | /* Save previous tos index. */ |
---|
424 | toindex = prevtop->link; |
---|
425 | /* Link the former to to the current tos. */ |
---|
426 | prevtop->link = top->link; |
---|
427 | /* Link back to the old tos entry. */ |
---|
428 | top->link = *frompcindex; |
---|
429 | /* Store a link to the new top in the froms array which makes the |
---|
430 | current tos head of the chain. */ |
---|
431 | *frompcindex = toindex; |
---|
432 | goto done; |
---|
433 | } |
---|
434 | } |
---|
435 | } |
---|
436 | done: |
---|
437 | /* Enable interrupts if necessary. */ |
---|
438 | if (__builtin_expect (mach_stat & 1, 0)) |
---|
439 | spu_ienable (); |
---|
440 | } |
---|