blob: 3d46fb832d01febaeb71cb9e78f079292b42edf2 [file] [log] [blame]
David Reissc3b36222010-10-06 17:10:10 +00001#!/usr/bin/env python
2#
3# Licensed to the Apache Software Foundation (ASF) under one
4# or more contributor license agreements. See the NOTICE file
5# distributed with this work for additional information
6# regarding copyright ownership. The ASF licenses this file
7# to you under the Apache License, Version 2.0 (the
8# "License"); you may not use this file except in compliance
9# with the License. You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing,
14# software distributed under the License is distributed on an
15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16# KIND, either express or implied. See the License for the
17# specific language governing permissions and limitations
18# under the License.
19#
20"""
21This script can be used to make the output from
22apache::thrift::profile_print_info() more human-readable.
23
24It translates each executable file name and address into the corresponding
25source file name, line number, and function name. By default, it also
26demangles C++ symbol names.
27"""
28
29import optparse
30import os
31import re
32import subprocess
33import sys
34
35
36class AddressInfo(object):
37 """
38 A class to store information about a particular address in an object file.
39 """
40 def __init__(self, obj_file, address):
41 self.objectFile = obj_file
42 self.address = address
43 self.sourceFile = None
44 self.sourceLine = None
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +010045 self.function = None
David Reissc3b36222010-10-06 17:10:10 +000046
47
48g_addrs_by_filename = {}
49def get_address(filename, address):
50 """
51 Retrieve an AddressInfo object for the specified object file and address.
52
53 Keeps a global list of AddressInfo objects. Two calls to get_address()
54 with the same filename and address will always return the same AddressInfo
55 object.
56 """
57 global g_addrs_by_filename
58 try:
59 by_address = g_addrs_by_filename[filename]
60 except KeyError:
61 by_address = {}
62 g_addrs_by_filename[filename] = by_address
63
64 try:
65 addr_info = by_address[address]
66 except KeyError:
67 addr_info = AddressInfo(filename, address)
68 by_address[address] = addr_info
69 return addr_info
70
71
72def translate_file_addresses(filename, addresses, options):
73 """
74 Use addr2line to look up information for the specified addresses.
75 All of the addresses must belong to the same object file.
76 """
77 # Do nothing if we can't find the file
78 if not os.path.isfile(filename):
79 return
80
81 args = ['addr2line']
82 if options.printFunctions:
83 args.append('-f')
84 args.extend(['-e', filename])
85
86 proc = subprocess.Popen(args, stdin=subprocess.PIPE,
87 stdout=subprocess.PIPE)
88 for address in addresses:
89 assert address.objectFile == filename
90 proc.stdin.write(address.address + '\n')
91
92 if options.printFunctions:
93 function = proc.stdout.readline()
94 function = function.strip()
95 if not function:
96 raise Exception('unexpected EOF from addr2line')
97 address.function = function
98
99 file_and_line = proc.stdout.readline()
100 file_and_line = file_and_line.strip()
101 if not file_and_line:
102 raise Exception('unexpected EOF from addr2line')
103 idx = file_and_line.rfind(':')
104 if idx < 0:
105 msg = 'expected file and line number from addr2line; got %r' % \
106 (file_and_line,)
107 msg += '\nfile=%r, address=%r' % (filename, address.address)
108 raise Exception(msg)
109
110 address.sourceFile = file_and_line[:idx]
111 address.sourceLine = file_and_line[idx+1:]
112
113 (remaining_out, cmd_err) = proc.communicate()
114 retcode = proc.wait()
115 if retcode != 0:
116 raise subprocess.CalledProcessError(retcode, args)
117
118
119def lookup_addresses(options):
120 """
121 Look up source file information for all of the addresses currently stored
122 in the global list of AddressInfo objects.
123 """
124 global g_addrs_by_filename
125 for (file, addresses) in g_addrs_by_filename.items():
126 translate_file_addresses(file, addresses.values(), options)
127
128
129class Entry(object):
130 """
131 An entry in the thrift profile output.
132 Contains a header line, and a backtrace.
133 """
134 def __init__(self, header):
135 self.header = header
136 self.bt = []
137
138 def addFrame(self, filename, address):
139 # If libc was able to determine the symbols names, the filename
140 # argument will be of the form <filename>(<function>+<offset>)
141 # So, strip off anything after the last '('
142 idx = filename.rfind('(')
143 if idx >= 0:
144 filename = filename[:idx]
145
146 addr = get_address(filename, address)
147 self.bt.append(addr)
148
149 def write(self, f, options):
150 f.write(self.header)
151 f.write('\n')
152 n = 0
153 for address in self.bt:
154 f.write(' #%-2d %s:%s\n' % (n, address.sourceFile,
155 address.sourceLine))
156 n += 1
157 if options.printFunctions:
158 if address.function:
159 f.write(' %s\n' % (address.function,))
160 else:
161 f.write(' ??\n')
162
163
164def process_file(in_file, out_file, options):
165 """
166 Read thrift profile output from the specified input file, and print
167 prettier information on the output file.
168 """
169 #
170 # A naive approach would be to read the input line by line,
171 # and each time we come to a filename and address, pass it to addr2line
172 # and print the resulting information. Unfortunately, addr2line can be
173 # quite slow, especially with large executables.
174 #
175 # This approach is much faster. We read in all of the input, storing
176 # the addresses in each file that need to be resolved. We then call
177 # addr2line just once for each file. This is much faster than calling
178 # addr2line once per address.
179 #
180
181 virt_call_regex = re.compile(r'^\s*T_VIRTUAL_CALL: (\d+) calls on (.*):$')
182 gen_prot_regex = re.compile(
183 r'^\s*T_GENERIC_PROTOCOL: (\d+) calls to (.*) with a (.*):$')
184 bt_regex = re.compile(r'^\s*#(\d+)\s*(.*) \[(0x[0-9A-Za-z]+)\]$')
185
186 # Parse all of the input, and store it as Entry objects
187 entries = []
188 current_entry = None
189 while True:
190 line = in_file.readline()
191 if not line:
192 break
193
194 if line == '\n' or line.startswith('Thrift virtual call info:'):
195 continue
196
197 virt_call_match = virt_call_regex.match(line)
198 if virt_call_match:
199 num_calls = int(virt_call_match.group(1))
200 type_name = virt_call_match.group(2)
201 if options.cxxfilt:
202 # Type names reported by typeid() are internal names.
203 # By default, c++filt doesn't demangle internal type names.
204 # (Some versions of c++filt have a "-t" option to enable this.
205 # Other versions don't have this argument, but demangle type
206 # names passed as an argument, but not on stdin.)
207 #
208 # If the output is being filtered through c++filt, prepend
209 # "_Z" to the type name to make it look like an external name.
210 type_name = '_Z' + type_name
211 header = 'T_VIRTUAL_CALL: %d calls on "%s"' % \
212 (num_calls, type_name)
213 if current_entry is not None:
214 entries.append(current_entry)
215 current_entry = Entry(header)
216 continue
217
218 gen_prot_match = gen_prot_regex.match(line)
219 if gen_prot_match:
220 num_calls = int(gen_prot_match.group(1))
221 type_name1 = gen_prot_match.group(2)
222 type_name2 = gen_prot_match.group(3)
223 if options.cxxfilt:
224 type_name1 = '_Z' + type_name1
225 type_name2 = '_Z' + type_name2
226 header = 'T_GENERIC_PROTOCOL: %d calls to "%s" with a "%s"' % \
227 (num_calls, type_name1, type_name2)
228 if current_entry is not None:
229 entries.append(current_entry)
230 current_entry = Entry(header)
231 continue
232
233 bt_match = bt_regex.match(line)
234 if bt_match:
235 if current_entry is None:
236 raise Exception('found backtrace frame before entry header')
237 frame_num = int(bt_match.group(1))
238 filename = bt_match.group(2)
239 address = bt_match.group(3)
240 current_entry.addFrame(filename, address)
241 continue
242
243 raise Exception('unexpected line in input: %r' % (line,))
244
245 # Add the last entry we were processing to the list
246 if current_entry is not None:
247 entries.append(current_entry)
248 current_entry = None
249
250 # Look up all of the addresses
251 lookup_addresses(options)
252
253 # Print out the entries, now that the information has been translated
254 for entry in entries:
255 entry.write(out_file, options)
256 out_file.write('\n')
257
258
259def start_cppfilt():
260 (read_pipe, write_pipe) = os.pipe()
261
262 # Fork. Run c++filt in the parent process,
263 # and then continue normal processing in the child.
264 pid = os.fork()
265 if pid == 0:
266 # child
267 os.dup2(write_pipe, sys.stdout.fileno())
268 os.close(read_pipe)
269 os.close(write_pipe)
270 return
271 else:
272 # parent
273 os.dup2(read_pipe, sys.stdin.fileno())
274 os.close(read_pipe)
275 os.close(write_pipe)
276
277 cmd = ['c++filt']
278 os.execvp(cmd[0], cmd)
279
280
281def main(argv):
282 parser = optparse.OptionParser(usage='%prog [options] [<file>]')
283 parser.add_option('--no-functions', help='Don\'t print function names',
284 dest='printFunctions', action='store_false',
285 default=True)
286 parser.add_option('--no-demangle',
287 help='Don\'t demangle C++ symbol names',
288 dest='cxxfilt', action='store_false',
289 default=True)
290
291 (options, args) = parser.parse_args(argv[1:])
292 num_args = len(args)
293 if num_args == 0:
294 in_file = sys.stdin
295 elif num_args == 1:
296 in_file = open(argv[1], 'r')
297 else:
298 parser.print_usage(sys.stderr)
299 print >> sys.stderr, 'trailing arguments: %s' % (' '.join(args[1:],))
300 return 1
301
302 if options.cxxfilt:
303 start_cppfilt()
304
305 process_file(in_file, sys.stdout, options)
306
307
308if __name__ == '__main__':
309 rc = main(sys.argv)
310 sys.exit(rc)