| David Reiss | c3b3622 | 2010-10-06 17:10:10 +0000 | [diff] [blame] | 1 | #!/usr/bin/env python | 
|  | 2 | # | 
|  | 3 | # Licensed to the Apache Software Foundation (ASF) under one | 
|  | 4 | # or more contributor license agreements. See the NOTICE file | 
|  | 5 | # distributed with this work for additional information | 
|  | 6 | # regarding copyright ownership. The ASF licenses this file | 
|  | 7 | # to you under the Apache License, Version 2.0 (the | 
|  | 8 | # "License"); you may not use this file except in compliance | 
|  | 9 | # with the License. You may obtain a copy of the License at | 
|  | 10 | # | 
|  | 11 | #   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 12 | # | 
|  | 13 | # Unless required by applicable law or agreed to in writing, | 
|  | 14 | # software distributed under the License is distributed on an | 
|  | 15 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | 16 | # KIND, either express or implied. See the License for the | 
|  | 17 | # specific language governing permissions and limitations | 
|  | 18 | # under the License. | 
|  | 19 | # | 
|  | 20 | """ | 
|  | 21 | This script can be used to make the output from | 
|  | 22 | apache::thrift::profile_print_info() more human-readable. | 
|  | 23 |  | 
|  | 24 | It translates each executable file name and address into the corresponding | 
|  | 25 | source file name, line number, and function name.  By default, it also | 
|  | 26 | demangles C++ symbol names. | 
|  | 27 | """ | 
|  | 28 |  | 
|  | 29 | import optparse | 
|  | 30 | import os | 
|  | 31 | import re | 
|  | 32 | import subprocess | 
|  | 33 | import sys | 
|  | 34 |  | 
|  | 35 |  | 
|  | 36 | class AddressInfo(object): | 
|  | 37 | """ | 
|  | 38 | A class to store information about a particular address in an object file. | 
|  | 39 | """ | 
|  | 40 | def __init__(self, obj_file, address): | 
|  | 41 | self.objectFile = obj_file | 
|  | 42 | self.address = address | 
|  | 43 | self.sourceFile = None | 
|  | 44 | self.sourceLine = None | 
|  | 45 | self.funtion = None | 
|  | 46 |  | 
|  | 47 |  | 
|  | 48 | g_addrs_by_filename = {} | 
|  | 49 | def get_address(filename, address): | 
|  | 50 | """ | 
|  | 51 | Retrieve an AddressInfo object for the specified object file and address. | 
|  | 52 |  | 
|  | 53 | Keeps a global list of AddressInfo objects.  Two calls to get_address() | 
|  | 54 | with the same filename and address will always return the same AddressInfo | 
|  | 55 | object. | 
|  | 56 | """ | 
|  | 57 | global g_addrs_by_filename | 
|  | 58 | try: | 
|  | 59 | by_address = g_addrs_by_filename[filename] | 
|  | 60 | except KeyError: | 
|  | 61 | by_address = {} | 
|  | 62 | g_addrs_by_filename[filename] = by_address | 
|  | 63 |  | 
|  | 64 | try: | 
|  | 65 | addr_info = by_address[address] | 
|  | 66 | except KeyError: | 
|  | 67 | addr_info = AddressInfo(filename, address) | 
|  | 68 | by_address[address] = addr_info | 
|  | 69 | return addr_info | 
|  | 70 |  | 
|  | 71 |  | 
|  | 72 | def translate_file_addresses(filename, addresses, options): | 
|  | 73 | """ | 
|  | 74 | Use addr2line to look up information for the specified addresses. | 
|  | 75 | All of the addresses must belong to the same object file. | 
|  | 76 | """ | 
|  | 77 | # Do nothing if we can't find the file | 
|  | 78 | if not os.path.isfile(filename): | 
|  | 79 | return | 
|  | 80 |  | 
|  | 81 | args = ['addr2line'] | 
|  | 82 | if options.printFunctions: | 
|  | 83 | args.append('-f') | 
|  | 84 | args.extend(['-e', filename]) | 
|  | 85 |  | 
|  | 86 | proc = subprocess.Popen(args, stdin=subprocess.PIPE, | 
|  | 87 | stdout=subprocess.PIPE) | 
|  | 88 | for address in addresses: | 
|  | 89 | assert address.objectFile == filename | 
|  | 90 | proc.stdin.write(address.address + '\n') | 
|  | 91 |  | 
|  | 92 | if options.printFunctions: | 
|  | 93 | function = proc.stdout.readline() | 
|  | 94 | function = function.strip() | 
|  | 95 | if not function: | 
|  | 96 | raise Exception('unexpected EOF from addr2line') | 
|  | 97 | address.function = function | 
|  | 98 |  | 
|  | 99 | file_and_line = proc.stdout.readline() | 
|  | 100 | file_and_line = file_and_line.strip() | 
|  | 101 | if not file_and_line: | 
|  | 102 | raise Exception('unexpected EOF from addr2line') | 
|  | 103 | idx = file_and_line.rfind(':') | 
|  | 104 | if idx < 0: | 
|  | 105 | msg = 'expected file and line number from addr2line; got %r' % \ | 
|  | 106 | (file_and_line,) | 
|  | 107 | msg += '\nfile=%r, address=%r' % (filename, address.address) | 
|  | 108 | raise Exception(msg) | 
|  | 109 |  | 
|  | 110 | address.sourceFile = file_and_line[:idx] | 
|  | 111 | address.sourceLine = file_and_line[idx+1:] | 
|  | 112 |  | 
|  | 113 | (remaining_out, cmd_err) = proc.communicate() | 
|  | 114 | retcode = proc.wait() | 
|  | 115 | if retcode != 0: | 
|  | 116 | raise subprocess.CalledProcessError(retcode, args) | 
|  | 117 |  | 
|  | 118 |  | 
|  | 119 | def lookup_addresses(options): | 
|  | 120 | """ | 
|  | 121 | Look up source file information for all of the addresses currently stored | 
|  | 122 | in the global list of AddressInfo objects. | 
|  | 123 | """ | 
|  | 124 | global g_addrs_by_filename | 
|  | 125 | for (file, addresses) in g_addrs_by_filename.items(): | 
|  | 126 | translate_file_addresses(file, addresses.values(), options) | 
|  | 127 |  | 
|  | 128 |  | 
|  | 129 | class Entry(object): | 
|  | 130 | """ | 
|  | 131 | An entry in the thrift profile output. | 
|  | 132 | Contains a header line, and a backtrace. | 
|  | 133 | """ | 
|  | 134 | def __init__(self, header): | 
|  | 135 | self.header = header | 
|  | 136 | self.bt = [] | 
|  | 137 |  | 
|  | 138 | def addFrame(self, filename, address): | 
|  | 139 | # If libc was able to determine the symbols names, the filename | 
|  | 140 | # argument will be of the form <filename>(<function>+<offset>) | 
|  | 141 | # So, strip off anything after the last '(' | 
|  | 142 | idx = filename.rfind('(') | 
|  | 143 | if idx >= 0: | 
|  | 144 | filename = filename[:idx] | 
|  | 145 |  | 
|  | 146 | addr = get_address(filename, address) | 
|  | 147 | self.bt.append(addr) | 
|  | 148 |  | 
|  | 149 | def write(self, f, options): | 
|  | 150 | f.write(self.header) | 
|  | 151 | f.write('\n') | 
|  | 152 | n = 0 | 
|  | 153 | for address in self.bt: | 
|  | 154 | f.write('  #%-2d %s:%s\n' % (n, address.sourceFile, | 
|  | 155 | address.sourceLine)) | 
|  | 156 | n += 1 | 
|  | 157 | if options.printFunctions: | 
|  | 158 | if address.function: | 
|  | 159 | f.write('      %s\n' % (address.function,)) | 
|  | 160 | else: | 
|  | 161 | f.write('      ??\n') | 
|  | 162 |  | 
|  | 163 |  | 
|  | 164 | def process_file(in_file, out_file, options): | 
|  | 165 | """ | 
|  | 166 | Read thrift profile output from the specified input file, and print | 
|  | 167 | prettier information on the output file. | 
|  | 168 | """ | 
|  | 169 | # | 
|  | 170 | # A naive approach would be to read the input line by line, | 
|  | 171 | # and each time we come to a filename and address, pass it to addr2line | 
|  | 172 | # and print the resulting information.  Unfortunately, addr2line can be | 
|  | 173 | # quite slow, especially with large executables. | 
|  | 174 | # | 
|  | 175 | # This approach is much faster.  We read in all of the input, storing | 
|  | 176 | # the addresses in each file that need to be resolved.  We then call | 
|  | 177 | # addr2line just once for each file.  This is much faster than calling | 
|  | 178 | # addr2line once per address. | 
|  | 179 | # | 
|  | 180 |  | 
|  | 181 | virt_call_regex = re.compile(r'^\s*T_VIRTUAL_CALL: (\d+) calls on (.*):$') | 
|  | 182 | gen_prot_regex = re.compile( | 
|  | 183 | r'^\s*T_GENERIC_PROTOCOL: (\d+) calls to (.*) with a (.*):$') | 
|  | 184 | bt_regex = re.compile(r'^\s*#(\d+)\s*(.*) \[(0x[0-9A-Za-z]+)\]$') | 
|  | 185 |  | 
|  | 186 | # Parse all of the input, and store it as Entry objects | 
|  | 187 | entries = [] | 
|  | 188 | current_entry = None | 
|  | 189 | while True: | 
|  | 190 | line = in_file.readline() | 
|  | 191 | if not line: | 
|  | 192 | break | 
|  | 193 |  | 
|  | 194 | if line == '\n' or line.startswith('Thrift virtual call info:'): | 
|  | 195 | continue | 
|  | 196 |  | 
|  | 197 | virt_call_match = virt_call_regex.match(line) | 
|  | 198 | if virt_call_match: | 
|  | 199 | num_calls = int(virt_call_match.group(1)) | 
|  | 200 | type_name = virt_call_match.group(2) | 
|  | 201 | if options.cxxfilt: | 
|  | 202 | # Type names reported by typeid() are internal names. | 
|  | 203 | # By default, c++filt doesn't demangle internal type names. | 
|  | 204 | # (Some versions of c++filt have a "-t" option to enable this. | 
|  | 205 | # Other versions don't have this argument, but demangle type | 
|  | 206 | # names passed as an argument, but not on stdin.) | 
|  | 207 | # | 
|  | 208 | # If the output is being filtered through c++filt, prepend | 
|  | 209 | # "_Z" to the type name to make it look like an external name. | 
|  | 210 | type_name = '_Z' + type_name | 
|  | 211 | header = 'T_VIRTUAL_CALL: %d calls on "%s"' % \ | 
|  | 212 | (num_calls, type_name) | 
|  | 213 | if current_entry is not None: | 
|  | 214 | entries.append(current_entry) | 
|  | 215 | current_entry = Entry(header) | 
|  | 216 | continue | 
|  | 217 |  | 
|  | 218 | gen_prot_match = gen_prot_regex.match(line) | 
|  | 219 | if gen_prot_match: | 
|  | 220 | num_calls = int(gen_prot_match.group(1)) | 
|  | 221 | type_name1 = gen_prot_match.group(2) | 
|  | 222 | type_name2 = gen_prot_match.group(3) | 
|  | 223 | if options.cxxfilt: | 
|  | 224 | type_name1 = '_Z' + type_name1 | 
|  | 225 | type_name2 = '_Z' + type_name2 | 
|  | 226 | header = 'T_GENERIC_PROTOCOL: %d calls to "%s" with a "%s"' % \ | 
|  | 227 | (num_calls, type_name1, type_name2) | 
|  | 228 | if current_entry is not None: | 
|  | 229 | entries.append(current_entry) | 
|  | 230 | current_entry = Entry(header) | 
|  | 231 | continue | 
|  | 232 |  | 
|  | 233 | bt_match = bt_regex.match(line) | 
|  | 234 | if bt_match: | 
|  | 235 | if current_entry is None: | 
|  | 236 | raise Exception('found backtrace frame before entry header') | 
|  | 237 | frame_num = int(bt_match.group(1)) | 
|  | 238 | filename = bt_match.group(2) | 
|  | 239 | address = bt_match.group(3) | 
|  | 240 | current_entry.addFrame(filename, address) | 
|  | 241 | continue | 
|  | 242 |  | 
|  | 243 | raise Exception('unexpected line in input: %r' % (line,)) | 
|  | 244 |  | 
|  | 245 | # Add the last entry we were processing to the list | 
|  | 246 | if current_entry is not None: | 
|  | 247 | entries.append(current_entry) | 
|  | 248 | current_entry = None | 
|  | 249 |  | 
|  | 250 | # Look up all of the addresses | 
|  | 251 | lookup_addresses(options) | 
|  | 252 |  | 
|  | 253 | # Print out the entries, now that the information has been translated | 
|  | 254 | for entry in entries: | 
|  | 255 | entry.write(out_file, options) | 
|  | 256 | out_file.write('\n') | 
|  | 257 |  | 
|  | 258 |  | 
|  | 259 | def start_cppfilt(): | 
|  | 260 | (read_pipe, write_pipe) = os.pipe() | 
|  | 261 |  | 
|  | 262 | # Fork.  Run c++filt in the parent process, | 
|  | 263 | # and then continue normal processing in the child. | 
|  | 264 | pid = os.fork() | 
|  | 265 | if pid == 0: | 
|  | 266 | # child | 
|  | 267 | os.dup2(write_pipe, sys.stdout.fileno()) | 
|  | 268 | os.close(read_pipe) | 
|  | 269 | os.close(write_pipe) | 
|  | 270 | return | 
|  | 271 | else: | 
|  | 272 | # parent | 
|  | 273 | os.dup2(read_pipe, sys.stdin.fileno()) | 
|  | 274 | os.close(read_pipe) | 
|  | 275 | os.close(write_pipe) | 
|  | 276 |  | 
|  | 277 | cmd = ['c++filt'] | 
|  | 278 | os.execvp(cmd[0], cmd) | 
|  | 279 |  | 
|  | 280 |  | 
|  | 281 | def main(argv): | 
|  | 282 | parser = optparse.OptionParser(usage='%prog [options] [<file>]') | 
|  | 283 | parser.add_option('--no-functions', help='Don\'t print function names', | 
|  | 284 | dest='printFunctions', action='store_false', | 
|  | 285 | default=True) | 
|  | 286 | parser.add_option('--no-demangle', | 
|  | 287 | help='Don\'t demangle C++ symbol names', | 
|  | 288 | dest='cxxfilt', action='store_false', | 
|  | 289 | default=True) | 
|  | 290 |  | 
|  | 291 | (options, args) = parser.parse_args(argv[1:]) | 
|  | 292 | num_args = len(args) | 
|  | 293 | if num_args == 0: | 
|  | 294 | in_file = sys.stdin | 
|  | 295 | elif num_args == 1: | 
|  | 296 | in_file = open(argv[1], 'r') | 
|  | 297 | else: | 
|  | 298 | parser.print_usage(sys.stderr) | 
|  | 299 | print >> sys.stderr, 'trailing arguments: %s' % (' '.join(args[1:],)) | 
|  | 300 | return 1 | 
|  | 301 |  | 
|  | 302 | if options.cxxfilt: | 
|  | 303 | start_cppfilt() | 
|  | 304 |  | 
|  | 305 | process_file(in_file, sys.stdout, options) | 
|  | 306 |  | 
|  | 307 |  | 
|  | 308 | if __name__ == '__main__': | 
|  | 309 | rc = main(sys.argv) | 
|  | 310 | sys.exit(rc) |