Greg Stein | db84a10 | 2009-01-31 07:40:26 +0000 | [diff] [blame^] | 1 | # |
| 2 | # simple parser for Thrift. |
| 3 | # |
| 4 | |
| 5 | # Note: the scanner module is designed to allow this wildcard import |
| 6 | from scanner import * |
| 7 | |
| 8 | |
| 9 | def parse(contents): |
| 10 | |
| 11 | scanner = Scanner(contents) |
| 12 | program = Program() |
| 13 | |
| 14 | while True: |
| 15 | |
| 16 | t = scanner.get() |
| 17 | if t is None: |
| 18 | return program |
| 19 | |
| 20 | ### delta: we don't enforce HeaderList followed by DefinitionList |
| 21 | ### delta: deprecated namespaces are not parsed |
| 22 | |
| 23 | if t == ID_INCLUDE: |
| 24 | inc = scanner.value_of(TYPE_LIT) |
| 25 | program.add_include(inc) |
| 26 | elif t == ID_NAMESPACE: |
| 27 | lang = scanner.value_of(TYPE_ID) |
| 28 | ns = scanner.value_of(TYPE_ID) |
| 29 | program.add_namespace(lang, ns) |
| 30 | elif t == ID_CPP_INCLUDE: |
| 31 | inc = scanner.value_of(TYPE_LIT) |
| 32 | program.add_cpp_include(inc) |
| 33 | elif t == ID_PHP_NAMESPACE: |
| 34 | ns = scanner.value_of(TYPE_ID) |
| 35 | program.set_php_namespace(ns) |
| 36 | elif t == ID_XSD_NAMESPACE: |
| 37 | ns = scanner.value_of(TYPE_LIT) |
| 38 | program.set_xsd_namespace(ns) |
| 39 | elif t == ID_CONST: |
| 40 | doc = scanner.doc |
| 41 | ft = parse_field_type(scanner, True) |
| 42 | ident = scanner.value_of(TYPE_ID) |
| 43 | scanner.eat_expected(SYM_EQ) |
| 44 | value = parse_const_value(scanner) |
| 45 | scanner.eat_commasemi() |
| 46 | program.add_const(ident, ft, value, doc) |
| 47 | elif t == ID_TYPEDEF: |
| 48 | doc = scanner.doc |
| 49 | ft = parse_field_type(scanner, False) |
| 50 | ident = scanner.value_of(TYPE_ID) |
| 51 | program.add_typedef(ident, ft, doc) |
| 52 | elif t == ID_ENUM: |
| 53 | enum_doc = scanner.doc |
| 54 | enum_ident = scanner.value_of(TYPE_ID) |
| 55 | scanner.eat_expected(SYM_LBRACE) |
| 56 | values = [ ] |
| 57 | while True: |
| 58 | t = scanner.get(eof_allowed=False) |
| 59 | if t == SYM_RBRACE: |
| 60 | break |
| 61 | if t.ttype != TYPE_ID: |
| 62 | raise ExpectedType(TYPE_ID, t.ttype, scanner.lineno) |
| 63 | doc = scanner.doc |
| 64 | ident = t.tvalue |
| 65 | t = scanner.get(eof_allowed=False) |
| 66 | if t == SYM_EQ: |
| 67 | value = scanner.value_of(TYPE_INT) |
| 68 | else: |
| 69 | scanner.pushback(t) |
| 70 | value = None |
| 71 | scanner.eat_commasemi() |
| 72 | values.append(EnumValue(ident, value, doc)) |
| 73 | program.add_enum(enum_ident, values, enum_doc) |
| 74 | elif t == ID_SENUM: |
| 75 | doc = scanner.doc |
| 76 | ident = scanner.value_of(TYPE_ID) |
| 77 | scanner.eat_expected(SYM_LBRACE) |
| 78 | values = [ ] |
| 79 | while True: |
| 80 | t = scanner.get(eof_allowed=False) |
| 81 | if t == SYM_RBRACE: |
| 82 | break |
| 83 | if t.ttype != TYPE_LIT: |
| 84 | raise ExpectedType(TYPE_LIT, t.ttype, scanner.lineno) |
| 85 | scanner.eat_commasemi() |
| 86 | values.append(t.tvalue) |
| 87 | program.add_senum(ident, values, doc) |
| 88 | elif t == ID_STRUCT: |
| 89 | doc = scanner.doc |
| 90 | ident = scanner.value_of(TYPE_ID) |
| 91 | t = scanner.get(eof_allowed=False) |
| 92 | if t == ID_XSD_ALL: |
| 93 | xsd_all = True |
| 94 | else: |
| 95 | xsd_all = False |
| 96 | scanner.pushback(t) |
| 97 | fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE) |
| 98 | annotations = parse_annotations(scanner) |
| 99 | program.add_struct(ident, fields, annotations, doc) |
| 100 | elif t == ID_EXCEPTION: |
| 101 | doc = scanner.doc |
| 102 | ident = scanner.value_of(TYPE_ID) |
| 103 | fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE) |
| 104 | program.add_exception(ident, fields, doc) |
| 105 | elif t == ID_SERVICE: |
| 106 | svc_doc = scanner.doc |
| 107 | svc_ident = scanner.value_of(TYPE_ID) |
| 108 | t = scanner.get(eof_allowed=False) |
| 109 | if t == ID_EXTENDS: |
| 110 | extends = t.tvalue |
| 111 | t = scanner.get(eof_allowed=False) |
| 112 | else: |
| 113 | extends = None |
| 114 | if t != SYM_LBRACE: |
| 115 | raise ExpectedError(SYM_LBRACE, t, scanner.lineno) |
| 116 | functions = [ ] |
| 117 | while True: |
| 118 | t = scanner.get(eof_allowed=False) |
| 119 | doc = scanner.doc |
| 120 | if t == SYM_RBRACE: |
| 121 | break |
| 122 | if t == ID_ASYNC: |
| 123 | async = True |
| 124 | t = scanner.get(eof_allowed=False) |
| 125 | else: |
| 126 | async = False |
| 127 | if t == ID_VOID: |
| 128 | ft = FieldType(ident=ID_VOID) |
| 129 | else: |
| 130 | scanner.pushback(t) |
| 131 | ft = parse_field_type(scanner, True) |
| 132 | ident = scanner.value_of(TYPE_ID) |
| 133 | params = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN) |
| 134 | t = scanner.get(eof_allowed=False) |
| 135 | if t == ID_THROWS: |
| 136 | throws = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN) |
| 137 | else: |
| 138 | throws = None |
| 139 | scanner.pushback(t) |
| 140 | scanner.eat_commasemi() |
| 141 | functions.append(Function(ident, async, ft, params, throws, doc)) |
| 142 | program.add_service(svc_ident, extends, functions, svc_doc) |
| 143 | else: |
| 144 | raise IncorrectSyntax(scanner.lineno) |
| 145 | |
| 146 | |
| 147 | def parse_field_type(scanner, ident_allowed): |
| 148 | ident = scanner.get_type(TYPE_ID) |
| 149 | if ident in BASE_TYPES: |
| 150 | return FieldType(ident=ident) |
| 151 | |
| 152 | cpp_type = None |
| 153 | |
| 154 | if ident == ID_MAP: |
| 155 | t = scanner.get(eof_allowed=False) |
| 156 | if t == ID_CPP_TYPE: |
| 157 | cpp_type = scanner.value_of(TYPE_LITERAL) |
| 158 | t = scanner.get() |
| 159 | if t != SYM_LT: |
| 160 | raise ExpectedError(SYM_LT, t, scanner.lineno) |
| 161 | map_from = parse_field_type(scanner, True) |
| 162 | scanner.eat_expected(SYM_COMMA) |
| 163 | map_to = parse_field_type(scanner, True) |
| 164 | scanner.eat_expected(SYM_GT) |
| 165 | return FieldType(cpp_type=cpp_type, map_from=map_from, map_to=map_to, |
| 166 | annotations=parse_annotations(scanner)) |
| 167 | |
| 168 | if ident == ID_SET: |
| 169 | t = scanner.get(eof_allowed=False) |
| 170 | if t == ID_CPP_TYPE: |
| 171 | cpp_type = scanner.value_of(TYPE_LITERAL) |
| 172 | t = scanner.get() |
| 173 | if t != SYM_LT: |
| 174 | raise ExpectedError(SYM_LT, t, scanner.lineno) |
| 175 | set_of = parse_field_type(scanner, True) |
| 176 | scanner.eat_expected(SYM_GT) |
| 177 | return FieldType(cpp_type=cpp_type, set_of=set_of, |
| 178 | annotations=parse_annotations(scanner)) |
| 179 | |
| 180 | if ident == ID_LIST: |
| 181 | scanner.eat_expected(SYM_LT) |
| 182 | list_of = parse_field_type(scanner, True) |
| 183 | scanner.eat_expected(SYM_GT) |
| 184 | t = scanner.get() |
| 185 | if t == ID_CPP_TYPE: |
| 186 | cpp_type = scanner.value_of(TYPE_LITERAL) |
| 187 | elif t is not None: |
| 188 | scanner.pushback(t) |
| 189 | return FieldType(cpp_type=cpp_type, list_of=list_of, |
| 190 | annotations=parse_annotations(scanner)) |
| 191 | |
| 192 | # random identifiers are allowed for FieldType, but not DefinitionType |
| 193 | if ident_allowed: |
| 194 | return FieldType(ident=ident) |
| 195 | |
| 196 | raise IncorrectSyntax(scanner.lineno) |
| 197 | |
| 198 | |
| 199 | def parse_const_value(scanner): |
| 200 | value = scanner.get(eof_allowed=False) |
| 201 | if value.ttype in [TYPE_INT, TYPE_HEX, TYPE_DUB, TYPE_LIT, TYPE_ID]: |
| 202 | return ConstValue(ConstValue.CTYPE_BASE, value) |
| 203 | |
| 204 | if value == SYM_LBRKT: |
| 205 | values = [ ] |
| 206 | while True: |
| 207 | t = scanner.get(eof_allowed=False) |
| 208 | if t == SYM_RBRKT: |
| 209 | return ConstValue(ConstValue.CTYPE_LIST, values) |
| 210 | scanner.pushback(t) |
| 211 | scanner.eat_commasemi() |
| 212 | values.append(parse_const_value(scanner)) |
| 213 | |
| 214 | if value == SYM_LBRACE: |
| 215 | values = [ ] |
| 216 | while True: |
| 217 | t = scanner.get(eof_allowed=False) |
| 218 | if t == SYM_RBRACE: |
| 219 | return ConstValue(ConstValue.CTYPE_MAP, values) |
| 220 | scanner.pushback(t) |
| 221 | key = parse_const_value(scanner) |
| 222 | scanner.eat_expected(SYM_COLON) |
| 223 | value = parse_const_value(scanner) |
| 224 | scanner.eat_commasemi() |
| 225 | values.append(KeyValuePair(key, value)) |
| 226 | |
| 227 | raise IncorrectSyntax(scanner.lineno) |
| 228 | |
| 229 | |
| 230 | def parse_field_list(scanner, start, end): |
| 231 | scanner.eat_expected(start) |
| 232 | |
| 233 | fields = [ ] |
| 234 | while True: |
| 235 | t = scanner.get(eof_allowed=False) |
| 236 | if t == end: |
| 237 | return fields |
| 238 | doc = scanner.doc |
| 239 | if t.ttype == TYPE_INT: |
| 240 | field_id = t.tvalue |
| 241 | scanner.eat_expected(SYM_COLON) |
| 242 | t = scanner.get(eof_allowed=False) |
| 243 | else: |
| 244 | field_id = None |
| 245 | if t == ID_REQUIRED or t == ID_OPTIONAL: |
| 246 | ### delta: we don't warn when this occurs in an arglist |
| 247 | requiredness = t |
| 248 | else: |
| 249 | requiredness = None |
| 250 | scanner.pushback(t) |
| 251 | ft = parse_field_type(scanner, True) |
| 252 | ident = scanner.value_of(TYPE_ID) |
| 253 | t = scanner.get() |
| 254 | if t == SYM_EQ: |
| 255 | value = parse_const_value(scanner) |
| 256 | t = scanner.get() |
| 257 | else: |
| 258 | value = None |
| 259 | if t == ID_XSD_OPTIONAL: |
| 260 | xsd_optional = True |
| 261 | t = scanner.get() |
| 262 | else: |
| 263 | xsd_optional = False |
| 264 | if t == ID_XSD_NILLABLE: |
| 265 | xsd_nillable = True |
| 266 | t = scanner.get() |
| 267 | else: |
| 268 | xsd_nillable = False |
| 269 | if t == ID_XSD_ATTRS: |
| 270 | xsd_attrs = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE) |
| 271 | else: |
| 272 | xsd_attrs = None |
| 273 | if t is not None: |
| 274 | scanner.pushback(t) |
| 275 | scanner.eat_commasemi() |
| 276 | fields.append(Field(ident, ft, doc, field_id, requiredness, value, |
| 277 | xsd_optional, xsd_nillable, xsd_attrs)) |
| 278 | |
| 279 | |
| 280 | def parse_annotations(scanner): |
| 281 | t = scanner.get() |
| 282 | if t is None: |
| 283 | return None |
| 284 | if t != SYM_LPAREN: |
| 285 | scanner.pushback(t) |
| 286 | return None |
| 287 | annotations = [ ] |
| 288 | while True: |
| 289 | ident = scanner.value_of(TYPE_ID) |
| 290 | scanner.eat_expected(SYM_EQ) |
| 291 | value = scanner.value_of(TYPE_LIT) |
| 292 | annotations.append(KeyValuePair(ident, value)) |
| 293 | |
| 294 | scanner.eat_commasemi() |
| 295 | t = scanner.get() |
| 296 | if t == SYM_RPAREN: |
| 297 | return annotations |
| 298 | scanner.pushback(t) |
| 299 | |
| 300 | |
| 301 | class Program(object): |
| 302 | def __init__(self): |
| 303 | self.includes = [ ] |
| 304 | self.namespaces = [ ] |
| 305 | self.cpp_includes = [ ] |
| 306 | self.php_namespace = None |
| 307 | self.xsd_namespace = None |
| 308 | self.consts = [ ] |
| 309 | self.typedefs = [ ] |
| 310 | self.enums = [ ] |
| 311 | self.structs = [ ] |
| 312 | self.exceptions = [ ] |
| 313 | self.services = [ ] |
| 314 | |
| 315 | def add_include(self, include): |
| 316 | self.includes.append(include) |
| 317 | |
| 318 | def add_namespace(self, lang, namespace): |
| 319 | self.namespaces.append(Namespace(lang, namespace)) |
| 320 | |
| 321 | def add_cpp_include(self, include): |
| 322 | self.cpp_includes.append(include) |
| 323 | |
| 324 | def set_php_namespace(self, namespace): |
| 325 | self.php_namespace = namespace |
| 326 | |
| 327 | def set_xsd_namespace(self, namespace): |
| 328 | self.xsd_namespace = namespace |
| 329 | |
| 330 | def add_const(self, ident, field_type, value, doc): |
| 331 | self.consts.append(ConstDef(ident, field_type, value, doc)) |
| 332 | |
| 333 | def add_typedef(self, ident, field_type, doc): |
| 334 | self.typedefs.append(Typedef(ident, field_type, doc)) |
| 335 | |
| 336 | def add_enum(self, ident, value, doc): |
| 337 | self.enums.append(Enum(ident, value, doc)) |
| 338 | |
| 339 | def add_senum(self, ident, values, doc): |
| 340 | self.typedefs.append(Typedef(ident, FieldType(values=values), doc)) |
| 341 | |
| 342 | def add_struct(self, ident, fields, annotations, doc): |
| 343 | self.structs.append(Struct(ident, fields, annotations, doc)) |
| 344 | |
| 345 | def add_exception(self, ident, fields, doc): |
| 346 | self.exceptions.append(Exception(ident, fields, doc)) |
| 347 | |
| 348 | def add_service(self, ident, extends, functions, doc): |
| 349 | self.services.append(Service(ident, extends, functions, doc)) |
| 350 | |
| 351 | |
| 352 | class Service(object): |
| 353 | def __init__(self, ident, extends, functions, doc): |
| 354 | self.ident = ident |
| 355 | self.extends = extends |
| 356 | self.functions = functions |
| 357 | self.doc = doc |
| 358 | |
| 359 | |
| 360 | class Function(object): |
| 361 | def __init__(self, ident, async, field_type, params, throws, doc): |
| 362 | self.ident = ident |
| 363 | self.async = async |
| 364 | self.field_type = field_type |
| 365 | self.params = params |
| 366 | self.throws = throws |
| 367 | self.doc = doc |
| 368 | |
| 369 | |
| 370 | class Enum(object): |
| 371 | def __init__(self, ident, values, doc): |
| 372 | self.ident = ident |
| 373 | self.values = values |
| 374 | self.doc = doc |
| 375 | |
| 376 | for i in range(1, len(values)): |
| 377 | if values[i].value is None: |
| 378 | ### keep as integer? |
| 379 | values[i].value = str(int(values[i - 1].value) + 1) |
| 380 | |
| 381 | |
| 382 | class EnumValue(object): |
| 383 | def __init__(self, ident, value, doc): |
| 384 | self.ident = ident |
| 385 | self.value = value |
| 386 | self.doc = doc |
| 387 | |
| 388 | |
| 389 | class Field(object): |
| 390 | def __init__(self, ident, field_type, doc, field_id, requiredness, value, |
| 391 | xsd_optional, xsd_nillable, xsd_attrs): |
| 392 | assert value is None or isinstance(value, ConstValue) |
| 393 | |
| 394 | self.ident = ident |
| 395 | self.field_type = field_type |
| 396 | self.doc = doc |
| 397 | self.field_id = field_id |
| 398 | self.requiredness = requiredness |
| 399 | self.value = value |
| 400 | self.xsd_optional = xsd_optional |
| 401 | self.xsd_nillable = xsd_nillable |
| 402 | self.xsd_attrs = xsd_attrs |
| 403 | |
| 404 | |
| 405 | class FieldType(object): |
| 406 | def __init__(self, ident=None, cpp_type=None, map_from=None, map_to=None, |
| 407 | set_of=None, list_of=None, annotations=None, values=None): |
| 408 | if map_from is not None: |
| 409 | self.ident = ID_MAP |
| 410 | elif set_of is not None: |
| 411 | self.ident = ID_SET |
| 412 | elif list_of is not None: |
| 413 | self.ident = ID_LIST |
| 414 | elif values is not None: |
| 415 | self.ident = ID_STRING |
| 416 | else: |
| 417 | assert ident is not None |
| 418 | self.ident = ident |
| 419 | self.cpp_type = cpp_type |
| 420 | self.map_from = map_from |
| 421 | self.map_to = map_to |
| 422 | self.set_of = set_of |
| 423 | self.list_of = list_of |
| 424 | self.annotations = annotations |
| 425 | self.values = values |
| 426 | |
| 427 | |
| 428 | class KeyValuePair(object): |
| 429 | def __init__(self, key, value): |
| 430 | self.key = key |
| 431 | self.value = value |
| 432 | |
| 433 | |
| 434 | class ConstDef(object): |
| 435 | def __init__(self, ident, field_type, value, doc): |
| 436 | assert isinstance(value, ConstValue) |
| 437 | |
| 438 | self.ident = ident |
| 439 | self.field_type = field_type |
| 440 | self.value = value |
| 441 | self.doc = doc |
| 442 | |
| 443 | |
| 444 | class ConstValue(object): |
| 445 | CTYPE_BASE = 'base' |
| 446 | CTYPE_LIST = 'list' |
| 447 | CTYPE_MAP = 'map' |
| 448 | |
| 449 | def __init__(self, ctype, value): |
| 450 | self.ctype = ctype |
| 451 | self.value = value |
| 452 | |
| 453 | |
| 454 | class Typedef(object): |
| 455 | def __init__(self, ident, field_type, doc): |
| 456 | self.ident = ident |
| 457 | self.field_type = field_type |
| 458 | self.doc = doc |
| 459 | |
| 460 | |
| 461 | class Struct(object): |
| 462 | def __init__(self, ident, fields, annotations, doc): |
| 463 | self.ident = ident |
| 464 | self.fields = fields |
| 465 | self.annotations = annotations |
| 466 | self.doc = doc |
| 467 | |
| 468 | |
| 469 | class Exception(object): |
| 470 | def __init__(self, ident, fields, doc): |
| 471 | self.ident = ident |
| 472 | self.fields = fields |
| 473 | self.doc = doc |
| 474 | |
| 475 | |
| 476 | class Namespace(object): |
| 477 | def __init__(self, lang, namespace): |
| 478 | self.lang = lang |
| 479 | self.namespace = namespace |
| 480 | |
| 481 | |
| 482 | BASE_TYPES = [ |
| 483 | ID_STRING, |
| 484 | ID_BINARY, |
| 485 | ID_SLIST, |
| 486 | ID_BOOL, |
| 487 | ID_BYTE, |
| 488 | ID_I16, |
| 489 | ID_I32, |
| 490 | ID_I64, |
| 491 | ID_DOUBLE, |
| 492 | ] |
| 493 | |
| 494 | |
| 495 | if __name__ == '__main__': |
| 496 | import sys |
| 497 | parse(open(sys.argv[1]).read()) |