Snapshot (back up) my work-in-progress before I hop on a plane.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/branches/py-compiler@739520 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/py/src/parser.py b/compiler/py/src/parser.py
new file mode 100644
index 0000000..b348da2
--- /dev/null
+++ b/compiler/py/src/parser.py
@@ -0,0 +1,497 @@
+#
+# simple parser for Thrift.
+#
+
+# Note: the scanner module is designed to allow this wildcard import
+from scanner import *
+
+
+def parse(contents):
+
+ scanner = Scanner(contents)
+ program = Program()
+
+ while True:
+
+ t = scanner.get()
+ if t is None:
+ return program
+
+ ### delta: we don't enforce HeaderList followed by DefinitionList
+ ### delta: deprecated namespaces are not parsed
+
+ if t == ID_INCLUDE:
+ inc = scanner.value_of(TYPE_LIT)
+ program.add_include(inc)
+ elif t == ID_NAMESPACE:
+ lang = scanner.value_of(TYPE_ID)
+ ns = scanner.value_of(TYPE_ID)
+ program.add_namespace(lang, ns)
+ elif t == ID_CPP_INCLUDE:
+ inc = scanner.value_of(TYPE_LIT)
+ program.add_cpp_include(inc)
+ elif t == ID_PHP_NAMESPACE:
+ ns = scanner.value_of(TYPE_ID)
+ program.set_php_namespace(ns)
+ elif t == ID_XSD_NAMESPACE:
+ ns = scanner.value_of(TYPE_LIT)
+ program.set_xsd_namespace(ns)
+ elif t == ID_CONST:
+ doc = scanner.doc
+ ft = parse_field_type(scanner, True)
+ ident = scanner.value_of(TYPE_ID)
+ scanner.eat_expected(SYM_EQ)
+ value = parse_const_value(scanner)
+ scanner.eat_commasemi()
+ program.add_const(ident, ft, value, doc)
+ elif t == ID_TYPEDEF:
+ doc = scanner.doc
+ ft = parse_field_type(scanner, False)
+ ident = scanner.value_of(TYPE_ID)
+ program.add_typedef(ident, ft, doc)
+ elif t == ID_ENUM:
+ enum_doc = scanner.doc
+ enum_ident = scanner.value_of(TYPE_ID)
+ scanner.eat_expected(SYM_LBRACE)
+ values = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ if t == SYM_RBRACE:
+ break
+ if t.ttype != TYPE_ID:
+ raise ExpectedType(TYPE_ID, t.ttype, scanner.lineno)
+ doc = scanner.doc
+ ident = t.tvalue
+ t = scanner.get(eof_allowed=False)
+ if t == SYM_EQ:
+ value = scanner.value_of(TYPE_INT)
+ else:
+ scanner.pushback(t)
+ value = None
+ scanner.eat_commasemi()
+ values.append(EnumValue(ident, value, doc))
+ program.add_enum(enum_ident, values, enum_doc)
+ elif t == ID_SENUM:
+ doc = scanner.doc
+ ident = scanner.value_of(TYPE_ID)
+ scanner.eat_expected(SYM_LBRACE)
+ values = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ if t == SYM_RBRACE:
+ break
+ if t.ttype != TYPE_LIT:
+ raise ExpectedType(TYPE_LIT, t.ttype, scanner.lineno)
+ scanner.eat_commasemi()
+ values.append(t.tvalue)
+ program.add_senum(ident, values, doc)
+ elif t == ID_STRUCT:
+ doc = scanner.doc
+ ident = scanner.value_of(TYPE_ID)
+ t = scanner.get(eof_allowed=False)
+ if t == ID_XSD_ALL:
+ xsd_all = True
+ else:
+ xsd_all = False
+ scanner.pushback(t)
+ fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+ annotations = parse_annotations(scanner)
+ program.add_struct(ident, fields, annotations, doc)
+ elif t == ID_EXCEPTION:
+ doc = scanner.doc
+ ident = scanner.value_of(TYPE_ID)
+ fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+ program.add_exception(ident, fields, doc)
+ elif t == ID_SERVICE:
+ svc_doc = scanner.doc
+ svc_ident = scanner.value_of(TYPE_ID)
+ t = scanner.get(eof_allowed=False)
+ if t == ID_EXTENDS:
+ extends = t.tvalue
+ t = scanner.get(eof_allowed=False)
+ else:
+ extends = None
+ if t != SYM_LBRACE:
+ raise ExpectedError(SYM_LBRACE, t, scanner.lineno)
+ functions = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ doc = scanner.doc
+ if t == SYM_RBRACE:
+ break
+ if t == ID_ASYNC:
+ async = True
+ t = scanner.get(eof_allowed=False)
+ else:
+ async = False
+ if t == ID_VOID:
+ ft = FieldType(ident=ID_VOID)
+ else:
+ scanner.pushback(t)
+ ft = parse_field_type(scanner, True)
+ ident = scanner.value_of(TYPE_ID)
+ params = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN)
+ t = scanner.get(eof_allowed=False)
+ if t == ID_THROWS:
+ throws = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN)
+ else:
+ throws = None
+ scanner.pushback(t)
+ scanner.eat_commasemi()
+ functions.append(Function(ident, async, ft, params, throws, doc))
+ program.add_service(svc_ident, extends, functions, svc_doc)
+ else:
+ raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_field_type(scanner, ident_allowed):
+ ident = scanner.get_type(TYPE_ID)
+ if ident in BASE_TYPES:
+ return FieldType(ident=ident)
+
+ cpp_type = None
+
+ if ident == ID_MAP:
+ t = scanner.get(eof_allowed=False)
+ if t == ID_CPP_TYPE:
+ cpp_type = scanner.value_of(TYPE_LITERAL)
+ t = scanner.get()
+ if t != SYM_LT:
+ raise ExpectedError(SYM_LT, t, scanner.lineno)
+ map_from = parse_field_type(scanner, True)
+ scanner.eat_expected(SYM_COMMA)
+ map_to = parse_field_type(scanner, True)
+ scanner.eat_expected(SYM_GT)
+ return FieldType(cpp_type=cpp_type, map_from=map_from, map_to=map_to,
+ annotations=parse_annotations(scanner))
+
+ if ident == ID_SET:
+ t = scanner.get(eof_allowed=False)
+ if t == ID_CPP_TYPE:
+ cpp_type = scanner.value_of(TYPE_LITERAL)
+ t = scanner.get()
+ if t != SYM_LT:
+ raise ExpectedError(SYM_LT, t, scanner.lineno)
+ set_of = parse_field_type(scanner, True)
+ scanner.eat_expected(SYM_GT)
+ return FieldType(cpp_type=cpp_type, set_of=set_of,
+ annotations=parse_annotations(scanner))
+
+ if ident == ID_LIST:
+ scanner.eat_expected(SYM_LT)
+ list_of = parse_field_type(scanner, True)
+ scanner.eat_expected(SYM_GT)
+ t = scanner.get()
+ if t == ID_CPP_TYPE:
+ cpp_type = scanner.value_of(TYPE_LITERAL)
+ elif t is not None:
+ scanner.pushback(t)
+ return FieldType(cpp_type=cpp_type, list_of=list_of,
+ annotations=parse_annotations(scanner))
+
+ # random identifiers are allowed for FieldType, but not DefinitionType
+ if ident_allowed:
+ return FieldType(ident=ident)
+
+ raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_const_value(scanner):
+ value = scanner.get(eof_allowed=False)
+ if value.ttype in [TYPE_INT, TYPE_HEX, TYPE_DUB, TYPE_LIT, TYPE_ID]:
+ return ConstValue(ConstValue.CTYPE_BASE, value)
+
+ if value == SYM_LBRKT:
+ values = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ if t == SYM_RBRKT:
+ return ConstValue(ConstValue.CTYPE_LIST, values)
+ scanner.pushback(t)
+ scanner.eat_commasemi()
+ values.append(parse_const_value(scanner))
+
+ if value == SYM_LBRACE:
+ values = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ if t == SYM_RBRACE:
+ return ConstValue(ConstValue.CTYPE_MAP, values)
+ scanner.pushback(t)
+ key = parse_const_value(scanner)
+ scanner.eat_expected(SYM_COLON)
+ value = parse_const_value(scanner)
+ scanner.eat_commasemi()
+ values.append(KeyValuePair(key, value))
+
+ raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_field_list(scanner, start, end):
+ scanner.eat_expected(start)
+
+ fields = [ ]
+ while True:
+ t = scanner.get(eof_allowed=False)
+ if t == end:
+ return fields
+ doc = scanner.doc
+ if t.ttype == TYPE_INT:
+ field_id = t.tvalue
+ scanner.eat_expected(SYM_COLON)
+ t = scanner.get(eof_allowed=False)
+ else:
+ field_id = None
+ if t == ID_REQUIRED or t == ID_OPTIONAL:
+ ### delta: we don't warn when this occurs in an arglist
+ requiredness = t
+ else:
+ requiredness = None
+ scanner.pushback(t)
+ ft = parse_field_type(scanner, True)
+ ident = scanner.value_of(TYPE_ID)
+ t = scanner.get()
+ if t == SYM_EQ:
+ value = parse_const_value(scanner)
+ t = scanner.get()
+ else:
+ value = None
+ if t == ID_XSD_OPTIONAL:
+ xsd_optional = True
+ t = scanner.get()
+ else:
+ xsd_optional = False
+ if t == ID_XSD_NILLABLE:
+ xsd_nillable = True
+ t = scanner.get()
+ else:
+ xsd_nillable = False
+ if t == ID_XSD_ATTRS:
+ xsd_attrs = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+ else:
+ xsd_attrs = None
+ if t is not None:
+ scanner.pushback(t)
+ scanner.eat_commasemi()
+ fields.append(Field(ident, ft, doc, field_id, requiredness, value,
+ xsd_optional, xsd_nillable, xsd_attrs))
+
+
+def parse_annotations(scanner):
+ t = scanner.get()
+ if t is None:
+ return None
+ if t != SYM_LPAREN:
+ scanner.pushback(t)
+ return None
+ annotations = [ ]
+ while True:
+ ident = scanner.value_of(TYPE_ID)
+ scanner.eat_expected(SYM_EQ)
+ value = scanner.value_of(TYPE_LIT)
+ annotations.append(KeyValuePair(ident, value))
+
+ scanner.eat_commasemi()
+ t = scanner.get()
+ if t == SYM_RPAREN:
+ return annotations
+ scanner.pushback(t)
+
+
+class Program(object):
+ def __init__(self):
+ self.includes = [ ]
+ self.namespaces = [ ]
+ self.cpp_includes = [ ]
+ self.php_namespace = None
+ self.xsd_namespace = None
+ self.consts = [ ]
+ self.typedefs = [ ]
+ self.enums = [ ]
+ self.structs = [ ]
+ self.exceptions = [ ]
+ self.services = [ ]
+
+ def add_include(self, include):
+ self.includes.append(include)
+
+ def add_namespace(self, lang, namespace):
+ self.namespaces.append(Namespace(lang, namespace))
+
+ def add_cpp_include(self, include):
+ self.cpp_includes.append(include)
+
+ def set_php_namespace(self, namespace):
+ self.php_namespace = namespace
+
+ def set_xsd_namespace(self, namespace):
+ self.xsd_namespace = namespace
+
+ def add_const(self, ident, field_type, value, doc):
+ self.consts.append(ConstDef(ident, field_type, value, doc))
+
+ def add_typedef(self, ident, field_type, doc):
+ self.typedefs.append(Typedef(ident, field_type, doc))
+
+ def add_enum(self, ident, value, doc):
+ self.enums.append(Enum(ident, value, doc))
+
+ def add_senum(self, ident, values, doc):
+ self.typedefs.append(Typedef(ident, FieldType(values=values), doc))
+
+ def add_struct(self, ident, fields, annotations, doc):
+ self.structs.append(Struct(ident, fields, annotations, doc))
+
+ def add_exception(self, ident, fields, doc):
+ self.exceptions.append(Exception(ident, fields, doc))
+
+ def add_service(self, ident, extends, functions, doc):
+ self.services.append(Service(ident, extends, functions, doc))
+
+
+class Service(object):
+ def __init__(self, ident, extends, functions, doc):
+ self.ident = ident
+ self.extends = extends
+ self.functions = functions
+ self.doc = doc
+
+
+class Function(object):
+ def __init__(self, ident, async, field_type, params, throws, doc):
+ self.ident = ident
+ self.async = async
+ self.field_type = field_type
+ self.params = params
+ self.throws = throws
+ self.doc = doc
+
+
+class Enum(object):
+ def __init__(self, ident, values, doc):
+ self.ident = ident
+ self.values = values
+ self.doc = doc
+
+ for i in range(1, len(values)):
+ if values[i].value is None:
+ ### keep as integer?
+ values[i].value = str(int(values[i - 1].value) + 1)
+
+
+class EnumValue(object):
+ def __init__(self, ident, value, doc):
+ self.ident = ident
+ self.value = value
+ self.doc = doc
+
+
+class Field(object):
+ def __init__(self, ident, field_type, doc, field_id, requiredness, value,
+ xsd_optional, xsd_nillable, xsd_attrs):
+ assert value is None or isinstance(value, ConstValue)
+
+ self.ident = ident
+ self.field_type = field_type
+ self.doc = doc
+ self.field_id = field_id
+ self.requiredness = requiredness
+ self.value = value
+ self.xsd_optional = xsd_optional
+ self.xsd_nillable = xsd_nillable
+ self.xsd_attrs = xsd_attrs
+
+
+class FieldType(object):
+ def __init__(self, ident=None, cpp_type=None, map_from=None, map_to=None,
+ set_of=None, list_of=None, annotations=None, values=None):
+ if map_from is not None:
+ self.ident = ID_MAP
+ elif set_of is not None:
+ self.ident = ID_SET
+ elif list_of is not None:
+ self.ident = ID_LIST
+ elif values is not None:
+ self.ident = ID_STRING
+ else:
+ assert ident is not None
+ self.ident = ident
+ self.cpp_type = cpp_type
+ self.map_from = map_from
+ self.map_to = map_to
+ self.set_of = set_of
+ self.list_of = list_of
+ self.annotations = annotations
+ self.values = values
+
+
+class KeyValuePair(object):
+ def __init__(self, key, value):
+ self.key = key
+ self.value = value
+
+
+class ConstDef(object):
+ def __init__(self, ident, field_type, value, doc):
+ assert isinstance(value, ConstValue)
+
+ self.ident = ident
+ self.field_type = field_type
+ self.value = value
+ self.doc = doc
+
+
+class ConstValue(object):
+ CTYPE_BASE = 'base'
+ CTYPE_LIST = 'list'
+ CTYPE_MAP = 'map'
+
+ def __init__(self, ctype, value):
+ self.ctype = ctype
+ self.value = value
+
+
+class Typedef(object):
+ def __init__(self, ident, field_type, doc):
+ self.ident = ident
+ self.field_type = field_type
+ self.doc = doc
+
+
+class Struct(object):
+ def __init__(self, ident, fields, annotations, doc):
+ self.ident = ident
+ self.fields = fields
+ self.annotations = annotations
+ self.doc = doc
+
+
+class Exception(object):
+ def __init__(self, ident, fields, doc):
+ self.ident = ident
+ self.fields = fields
+ self.doc = doc
+
+
+class Namespace(object):
+ def __init__(self, lang, namespace):
+ self.lang = lang
+ self.namespace = namespace
+
+
+BASE_TYPES = [
+ ID_STRING,
+ ID_BINARY,
+ ID_SLIST,
+ ID_BOOL,
+ ID_BYTE,
+ ID_I16,
+ ID_I32,
+ ID_I64,
+ ID_DOUBLE,
+ ]
+
+
+if __name__ == '__main__':
+ import sys
+ parse(open(sys.argv[1]).read())