blob: 0be5f29ed7e9148f8bd71acaf32adbfe20e44152 [file] [log] [blame]
David Reissc3b36222010-10-06 17:10:10 +00001#!/usr/bin/env python
2#
3# Licensed to the Apache Software Foundation (ASF) under one
4# or more contributor license agreements. See the NOTICE file
5# distributed with this work for additional information
6# regarding copyright ownership. The ASF licenses this file
7# to you under the Apache License, Version 2.0 (the
8# "License"); you may not use this file except in compliance
9# with the License. You may obtain a copy of the License at
10#
11# http://www.apache.org/licenses/LICENSE-2.0
12#
13# Unless required by applicable law or agreed to in writing,
14# software distributed under the License is distributed on an
15# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
16# KIND, either express or implied. See the License for the
17# specific language governing permissions and limitations
18# under the License.
19#
20"""
21This script can be used to make the output from
22apache::thrift::profile_print_info() more human-readable.
23
24It translates each executable file name and address into the corresponding
25source file name, line number, and function name. By default, it also
26demangles C++ symbol names.
27"""
28
29import optparse
30import os
31import re
32import subprocess
33import sys
34
35
36class AddressInfo(object):
37 """
38 A class to store information about a particular address in an object file.
39 """
40 def __init__(self, obj_file, address):
41 self.objectFile = obj_file
42 self.address = address
43 self.sourceFile = None
44 self.sourceLine = None
Konrad Grochowski3b5dacb2014-11-24 10:55:31 +010045 self.function = None
David Reissc3b36222010-10-06 17:10:10 +000046
47
48g_addrs_by_filename = {}
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +090049
50
David Reissc3b36222010-10-06 17:10:10 +000051def get_address(filename, address):
52 """
53 Retrieve an AddressInfo object for the specified object file and address.
54
55 Keeps a global list of AddressInfo objects. Two calls to get_address()
56 with the same filename and address will always return the same AddressInfo
57 object.
58 """
59 global g_addrs_by_filename
60 try:
61 by_address = g_addrs_by_filename[filename]
62 except KeyError:
63 by_address = {}
64 g_addrs_by_filename[filename] = by_address
65
66 try:
67 addr_info = by_address[address]
68 except KeyError:
69 addr_info = AddressInfo(filename, address)
70 by_address[address] = addr_info
71 return addr_info
72
73
74def translate_file_addresses(filename, addresses, options):
75 """
76 Use addr2line to look up information for the specified addresses.
77 All of the addresses must belong to the same object file.
78 """
79 # Do nothing if we can't find the file
80 if not os.path.isfile(filename):
81 return
82
83 args = ['addr2line']
84 if options.printFunctions:
85 args.append('-f')
86 args.extend(['-e', filename])
87
88 proc = subprocess.Popen(args, stdin=subprocess.PIPE,
89 stdout=subprocess.PIPE)
90 for address in addresses:
91 assert address.objectFile == filename
92 proc.stdin.write(address.address + '\n')
93
94 if options.printFunctions:
95 function = proc.stdout.readline()
96 function = function.strip()
97 if not function:
98 raise Exception('unexpected EOF from addr2line')
99 address.function = function
100
101 file_and_line = proc.stdout.readline()
102 file_and_line = file_and_line.strip()
103 if not file_and_line:
104 raise Exception('unexpected EOF from addr2line')
105 idx = file_and_line.rfind(':')
106 if idx < 0:
107 msg = 'expected file and line number from addr2line; got %r' % \
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900108 (file_and_line,)
David Reissc3b36222010-10-06 17:10:10 +0000109 msg += '\nfile=%r, address=%r' % (filename, address.address)
110 raise Exception(msg)
111
112 address.sourceFile = file_and_line[:idx]
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900113 address.sourceLine = file_and_line[idx + 1:]
David Reissc3b36222010-10-06 17:10:10 +0000114
115 (remaining_out, cmd_err) = proc.communicate()
116 retcode = proc.wait()
117 if retcode != 0:
118 raise subprocess.CalledProcessError(retcode, args)
119
120
121def lookup_addresses(options):
122 """
123 Look up source file information for all of the addresses currently stored
124 in the global list of AddressInfo objects.
125 """
126 global g_addrs_by_filename
127 for (file, addresses) in g_addrs_by_filename.items():
128 translate_file_addresses(file, addresses.values(), options)
129
130
131class Entry(object):
132 """
133 An entry in the thrift profile output.
134 Contains a header line, and a backtrace.
135 """
136 def __init__(self, header):
137 self.header = header
138 self.bt = []
139
140 def addFrame(self, filename, address):
141 # If libc was able to determine the symbols names, the filename
142 # argument will be of the form <filename>(<function>+<offset>)
143 # So, strip off anything after the last '('
144 idx = filename.rfind('(')
145 if idx >= 0:
146 filename = filename[:idx]
147
148 addr = get_address(filename, address)
149 self.bt.append(addr)
150
151 def write(self, f, options):
152 f.write(self.header)
153 f.write('\n')
154 n = 0
155 for address in self.bt:
156 f.write(' #%-2d %s:%s\n' % (n, address.sourceFile,
157 address.sourceLine))
158 n += 1
159 if options.printFunctions:
160 if address.function:
161 f.write(' %s\n' % (address.function,))
162 else:
163 f.write(' ??\n')
164
165
166def process_file(in_file, out_file, options):
167 """
168 Read thrift profile output from the specified input file, and print
169 prettier information on the output file.
170 """
171 #
172 # A naive approach would be to read the input line by line,
173 # and each time we come to a filename and address, pass it to addr2line
174 # and print the resulting information. Unfortunately, addr2line can be
175 # quite slow, especially with large executables.
176 #
177 # This approach is much faster. We read in all of the input, storing
178 # the addresses in each file that need to be resolved. We then call
179 # addr2line just once for each file. This is much faster than calling
180 # addr2line once per address.
181 #
182
183 virt_call_regex = re.compile(r'^\s*T_VIRTUAL_CALL: (\d+) calls on (.*):$')
184 gen_prot_regex = re.compile(
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900185 r'^\s*T_GENERIC_PROTOCOL: (\d+) calls to (.*) with a (.*):$')
David Reissc3b36222010-10-06 17:10:10 +0000186 bt_regex = re.compile(r'^\s*#(\d+)\s*(.*) \[(0x[0-9A-Za-z]+)\]$')
187
188 # Parse all of the input, and store it as Entry objects
189 entries = []
190 current_entry = None
191 while True:
192 line = in_file.readline()
193 if not line:
194 break
195
196 if line == '\n' or line.startswith('Thrift virtual call info:'):
197 continue
198
199 virt_call_match = virt_call_regex.match(line)
200 if virt_call_match:
201 num_calls = int(virt_call_match.group(1))
202 type_name = virt_call_match.group(2)
203 if options.cxxfilt:
204 # Type names reported by typeid() are internal names.
205 # By default, c++filt doesn't demangle internal type names.
206 # (Some versions of c++filt have a "-t" option to enable this.
207 # Other versions don't have this argument, but demangle type
208 # names passed as an argument, but not on stdin.)
209 #
210 # If the output is being filtered through c++filt, prepend
211 # "_Z" to the type name to make it look like an external name.
212 type_name = '_Z' + type_name
213 header = 'T_VIRTUAL_CALL: %d calls on "%s"' % \
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900214 (num_calls, type_name)
David Reissc3b36222010-10-06 17:10:10 +0000215 if current_entry is not None:
216 entries.append(current_entry)
217 current_entry = Entry(header)
218 continue
219
220 gen_prot_match = gen_prot_regex.match(line)
221 if gen_prot_match:
222 num_calls = int(gen_prot_match.group(1))
223 type_name1 = gen_prot_match.group(2)
224 type_name2 = gen_prot_match.group(3)
225 if options.cxxfilt:
226 type_name1 = '_Z' + type_name1
227 type_name2 = '_Z' + type_name2
228 header = 'T_GENERIC_PROTOCOL: %d calls to "%s" with a "%s"' % \
Nobuaki Sukegawa10308cb2016-02-03 01:57:03 +0900229 (num_calls, type_name1, type_name2)
David Reissc3b36222010-10-06 17:10:10 +0000230 if current_entry is not None:
231 entries.append(current_entry)
232 current_entry = Entry(header)
233 continue
234
235 bt_match = bt_regex.match(line)
236 if bt_match:
237 if current_entry is None:
238 raise Exception('found backtrace frame before entry header')
239 frame_num = int(bt_match.group(1))
240 filename = bt_match.group(2)
241 address = bt_match.group(3)
242 current_entry.addFrame(filename, address)
243 continue
244
245 raise Exception('unexpected line in input: %r' % (line,))
246
247 # Add the last entry we were processing to the list
248 if current_entry is not None:
249 entries.append(current_entry)
250 current_entry = None
251
252 # Look up all of the addresses
253 lookup_addresses(options)
254
255 # Print out the entries, now that the information has been translated
256 for entry in entries:
257 entry.write(out_file, options)
258 out_file.write('\n')
259
260
261def start_cppfilt():
262 (read_pipe, write_pipe) = os.pipe()
263
264 # Fork. Run c++filt in the parent process,
265 # and then continue normal processing in the child.
266 pid = os.fork()
267 if pid == 0:
268 # child
269 os.dup2(write_pipe, sys.stdout.fileno())
270 os.close(read_pipe)
271 os.close(write_pipe)
272 return
273 else:
274 # parent
275 os.dup2(read_pipe, sys.stdin.fileno())
276 os.close(read_pipe)
277 os.close(write_pipe)
278
279 cmd = ['c++filt']
280 os.execvp(cmd[0], cmd)
281
282
283def main(argv):
284 parser = optparse.OptionParser(usage='%prog [options] [<file>]')
285 parser.add_option('--no-functions', help='Don\'t print function names',
286 dest='printFunctions', action='store_false',
287 default=True)
288 parser.add_option('--no-demangle',
289 help='Don\'t demangle C++ symbol names',
290 dest='cxxfilt', action='store_false',
291 default=True)
292
293 (options, args) = parser.parse_args(argv[1:])
294 num_args = len(args)
295 if num_args == 0:
296 in_file = sys.stdin
297 elif num_args == 1:
298 in_file = open(argv[1], 'r')
299 else:
300 parser.print_usage(sys.stderr)
301 print >> sys.stderr, 'trailing arguments: %s' % (' '.join(args[1:],))
302 return 1
303
304 if options.cxxfilt:
305 start_cppfilt()
306
307 process_file(in_file, sys.stdout, options)
308
309
310if __name__ == '__main__':
311 rc = main(sys.argv)
312 sys.exit(rc)