Snapshot (back up) my work-in-progress before I hop on a plane.


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/branches/py-compiler@739520 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/py/src/gen.py b/compiler/py/src/gen.py
new file mode 100644
index 0000000..d3c1126
--- /dev/null
+++ b/compiler/py/src/gen.py
@@ -0,0 +1,169 @@
+#
+# simple generator for Thrift
+#
+
+import sys
+import os
+import cStringIO
+import operator
+
+import parser
+import ezt
+
+
+### temporary
+PATH = '/Users/gstein/src/asf/thrift/compiler/py/src/templates-py'
+t_py = ezt.Template(os.path.join(PATH, 'py.ezt'),
+                    compress_whitespace=False)
+t_py_ser = ezt.Template(os.path.join(PATH, 'py_ser.ezt'),
+                        compress_whitespace=False)
+t_py_deser = ezt.Template(os.path.join(PATH, 'py_deser.ezt'),
+                          compress_whitespace=False)
+t_py_cvalue = ezt.Template(os.path.join(PATH, 'py_cvalue.ezt'),
+                           compress_whitespace=False)
+
+
+def generate(program):
+  t_py.generate(sys.stdout, Proxy(program))
+
+
+class AutoVars(object):
+  def __init__(self):
+    self._counter = 0
+    self._mapping = { }
+    self._saved = [ ]
+
+  def open_context(self):
+    self._saved.append(self._mapping)
+    self._mapping = { }
+
+  def close_context(self):
+    self._mapping = self._saved.pop()
+
+  def __getattr__(self, name):
+    if name.startswith('__'):
+      raise AttributeError(name)
+
+    if name in self._mapping:
+      return self._mapping[name]
+    var = '%s%d' % (name, self._counter)
+    self._counter += 1
+    self._mapping[name] = var
+    return var
+
+
+class Proxy(object):
+  def __init__(self, ob):
+    self._ob = ob
+
+    for name, value in vars(ob).items():
+      proxy = custom_proxy(value)
+      if proxy:
+        value = proxy(value)
+      elif isinstance(value, list) and value:
+        # lists are homogenous, so check the first item
+        proxy = custom_proxy(value[0])
+        if proxy:
+          value = [proxy(ob) for ob in value]
+        elif hasattr(value[0], '__dict__'):
+          value = [Proxy(ob) for ob in value]
+      setattr(self, name, value)
+
+  def __getattr__(self, name):
+    if name == 'auto':
+      return g_auto
+    raise AttributeError(name)
+
+
+class ProxyFieldType(Proxy):
+  def __getattr__(self, name):
+    if name == 'serializer':
+      return Subtemplate(t_py_ser, self)
+    if name == 'deserializer':
+      return Subtemplate(t_py_deser, self)
+    return Proxy.__getattr__(self, name)
+
+
+class Subtemplate(object):
+  def __init__(self, template, data):
+    self._template = template
+    self._data = data
+
+  def __getattr__(self, name):
+    # jam the name of the result variable into the data params
+    self._data.result_var = getattr(g_auto, name)
+
+    # use a new variable context for this template generation
+    g_auto.open_context()
+    value = gen_value(self._template, self._data)
+    g_auto.close_context()
+
+    return value
+
+
+class ProxyField(Proxy):
+  def __getattr__(self, name):
+    if name == 'type_enum':
+      return TYPE_ENUM.get(self._ob.field_type.ident,
+                           self._ob.field_type.ident.tvalue)
+    return Proxy.__getattr__(self, name)
+
+
+class ProxyStruct(Proxy):
+  def __getattr__(self, name):
+    if name == 'sorted_fields':
+      highest = max(int(f.field_id or -1) for f in self._ob.fields)
+      fields = [None] * (highest + 1)
+      for field in self._ob.fields:
+        if field.field_id:
+          id = int(field.field_id)
+          if id > 0:
+            fields[id] = ProxyField(field)
+      return fields
+    return Proxy.__getattr__(self, name)
+
+
+class ProxyConstValue(Proxy):
+  def __getattr__(self, name):
+    if name == 'cvalue':
+      return gen_value(t_py_cvalue, self)
+    return Proxy.__getattr__(self, name)
+
+
+def custom_proxy(value):
+  if isinstance(value, parser.FieldType):
+    return ProxyFieldType
+  if isinstance(value, parser.Field):
+    return ProxyField
+  if isinstance(value, parser.Struct):
+    return ProxyStruct
+  if isinstance(value, parser.ConstValue):
+    return ProxyConstValue
+  return None
+
+
+TYPE_ENUM = {
+  parser.ID_STRING: 'TType.STRING',
+  parser.ID_BOOL: 'TType.BOOL',
+  parser.ID_BYTE: 'TType.BYTE',
+  parser.ID_I16: 'TType.I16',
+  parser.ID_I32: 'TType.I32',
+  parser.ID_I64: 'TType.I64',
+  parser.ID_DOUBLE: 'TType.DOUBLE',
+  parser.ID_MAP: 'TType.MAP',
+  parser.ID_SET: 'TType.SET',
+  parser.ID_LIST: 'TType.LIST',
+  # TType.STRUCT and TType.I32 for enums
+  }
+
+
+def gen_value(template, ob):
+  buf = cStringIO.StringIO()
+  template.generate(buf, ob)
+  return buf.getvalue()
+
+
+if __name__ == '__main__':
+  import sys
+  program = parser.parse(open(sys.argv[1]).read())
+  generate(program)
diff --git a/compiler/py/src/parser.py b/compiler/py/src/parser.py
new file mode 100644
index 0000000..b348da2
--- /dev/null
+++ b/compiler/py/src/parser.py
@@ -0,0 +1,497 @@
+#
+# simple parser for Thrift.
+#
+
+# Note: the scanner module is designed to allow this wildcard import
+from scanner import *
+
+
+def parse(contents):
+
+  scanner = Scanner(contents)
+  program = Program()
+
+  while True:
+
+    t = scanner.get()
+    if t is None:
+      return program
+
+    ### delta: we don't enforce HeaderList followed by DefinitionList
+    ### delta: deprecated namespaces are not parsed
+
+    if t == ID_INCLUDE:
+      inc = scanner.value_of(TYPE_LIT)
+      program.add_include(inc)
+    elif t == ID_NAMESPACE:
+      lang = scanner.value_of(TYPE_ID)
+      ns = scanner.value_of(TYPE_ID)
+      program.add_namespace(lang, ns)
+    elif t == ID_CPP_INCLUDE:
+      inc = scanner.value_of(TYPE_LIT)
+      program.add_cpp_include(inc)
+    elif t == ID_PHP_NAMESPACE:
+      ns = scanner.value_of(TYPE_ID)
+      program.set_php_namespace(ns)
+    elif t == ID_XSD_NAMESPACE:
+      ns = scanner.value_of(TYPE_LIT)
+      program.set_xsd_namespace(ns)
+    elif t == ID_CONST:
+      doc = scanner.doc
+      ft = parse_field_type(scanner, True)
+      ident = scanner.value_of(TYPE_ID)
+      scanner.eat_expected(SYM_EQ)
+      value = parse_const_value(scanner)
+      scanner.eat_commasemi()
+      program.add_const(ident, ft, value, doc)
+    elif t == ID_TYPEDEF:
+      doc = scanner.doc
+      ft = parse_field_type(scanner, False)
+      ident = scanner.value_of(TYPE_ID)
+      program.add_typedef(ident, ft, doc)
+    elif t == ID_ENUM:
+      enum_doc = scanner.doc
+      enum_ident = scanner.value_of(TYPE_ID)
+      scanner.eat_expected(SYM_LBRACE)
+      values = [ ]
+      while True:
+        t = scanner.get(eof_allowed=False)
+        if t == SYM_RBRACE:
+          break
+        if t.ttype != TYPE_ID:
+          raise ExpectedType(TYPE_ID, t.ttype, scanner.lineno)
+        doc = scanner.doc
+        ident = t.tvalue
+        t = scanner.get(eof_allowed=False)
+        if t == SYM_EQ:
+          value = scanner.value_of(TYPE_INT)
+        else:
+          scanner.pushback(t)
+          value = None
+        scanner.eat_commasemi()
+        values.append(EnumValue(ident, value, doc))
+      program.add_enum(enum_ident, values, enum_doc)
+    elif t == ID_SENUM:
+      doc = scanner.doc
+      ident = scanner.value_of(TYPE_ID)
+      scanner.eat_expected(SYM_LBRACE)
+      values = [ ]
+      while True:
+        t = scanner.get(eof_allowed=False)
+        if t == SYM_RBRACE:
+          break
+        if t.ttype != TYPE_LIT:
+          raise ExpectedType(TYPE_LIT, t.ttype, scanner.lineno)
+        scanner.eat_commasemi()
+        values.append(t.tvalue)
+      program.add_senum(ident, values, doc)
+    elif t == ID_STRUCT:
+      doc = scanner.doc
+      ident = scanner.value_of(TYPE_ID)
+      t = scanner.get(eof_allowed=False)
+      if t == ID_XSD_ALL:
+        xsd_all = True
+      else:
+        xsd_all = False
+        scanner.pushback(t)
+      fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+      annotations = parse_annotations(scanner)
+      program.add_struct(ident, fields, annotations, doc)
+    elif t == ID_EXCEPTION:
+      doc = scanner.doc
+      ident = scanner.value_of(TYPE_ID)
+      fields = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+      program.add_exception(ident, fields, doc)
+    elif t == ID_SERVICE:
+      svc_doc = scanner.doc
+      svc_ident = scanner.value_of(TYPE_ID)
+      t = scanner.get(eof_allowed=False)
+      if t == ID_EXTENDS:
+        extends = t.tvalue
+        t = scanner.get(eof_allowed=False)
+      else:
+        extends = None
+      if t != SYM_LBRACE:
+        raise ExpectedError(SYM_LBRACE, t, scanner.lineno)
+      functions = [ ]
+      while True:
+        t = scanner.get(eof_allowed=False)
+        doc = scanner.doc
+        if t == SYM_RBRACE:
+          break
+        if t == ID_ASYNC:
+          async = True
+          t = scanner.get(eof_allowed=False)
+        else:
+          async = False
+        if t == ID_VOID:
+          ft = FieldType(ident=ID_VOID)
+        else:
+          scanner.pushback(t)
+          ft = parse_field_type(scanner, True)
+        ident = scanner.value_of(TYPE_ID)
+        params = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN)
+        t = scanner.get(eof_allowed=False)
+        if t == ID_THROWS:
+          throws = parse_field_list(scanner, SYM_LPAREN, SYM_RPAREN)
+        else:
+          throws = None
+          scanner.pushback(t)
+        scanner.eat_commasemi()
+        functions.append(Function(ident, async, ft, params, throws, doc))
+      program.add_service(svc_ident, extends, functions, svc_doc)
+    else:
+      raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_field_type(scanner, ident_allowed):
+  ident = scanner.get_type(TYPE_ID)
+  if ident in BASE_TYPES:
+    return FieldType(ident=ident)
+
+  cpp_type = None
+
+  if ident == ID_MAP:
+    t = scanner.get(eof_allowed=False)
+    if t == ID_CPP_TYPE:
+      cpp_type = scanner.value_of(TYPE_LITERAL)
+      t = scanner.get()
+    if t != SYM_LT:
+      raise ExpectedError(SYM_LT, t, scanner.lineno)
+    map_from = parse_field_type(scanner, True)
+    scanner.eat_expected(SYM_COMMA)
+    map_to = parse_field_type(scanner, True)
+    scanner.eat_expected(SYM_GT)
+    return FieldType(cpp_type=cpp_type, map_from=map_from, map_to=map_to,
+                     annotations=parse_annotations(scanner))
+
+  if ident == ID_SET:
+    t = scanner.get(eof_allowed=False)
+    if t == ID_CPP_TYPE:
+      cpp_type = scanner.value_of(TYPE_LITERAL)
+      t = scanner.get()
+    if t != SYM_LT:
+      raise ExpectedError(SYM_LT, t, scanner.lineno)
+    set_of = parse_field_type(scanner, True)
+    scanner.eat_expected(SYM_GT)
+    return FieldType(cpp_type=cpp_type, set_of=set_of,
+                     annotations=parse_annotations(scanner))
+
+  if ident == ID_LIST:
+    scanner.eat_expected(SYM_LT)
+    list_of = parse_field_type(scanner, True)
+    scanner.eat_expected(SYM_GT)
+    t = scanner.get()
+    if t == ID_CPP_TYPE:
+      cpp_type = scanner.value_of(TYPE_LITERAL)
+    elif t is not None:
+      scanner.pushback(t)
+    return FieldType(cpp_type=cpp_type, list_of=list_of,
+                     annotations=parse_annotations(scanner))
+
+  # random identifiers are allowed for FieldType, but not DefinitionType
+  if ident_allowed:
+    return FieldType(ident=ident)
+
+  raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_const_value(scanner):
+  value = scanner.get(eof_allowed=False)
+  if value.ttype in [TYPE_INT, TYPE_HEX, TYPE_DUB, TYPE_LIT, TYPE_ID]:
+    return ConstValue(ConstValue.CTYPE_BASE, value)
+
+  if value == SYM_LBRKT:
+    values = [ ]
+    while True:
+      t = scanner.get(eof_allowed=False)
+      if t == SYM_RBRKT:
+        return ConstValue(ConstValue.CTYPE_LIST, values)
+      scanner.pushback(t)
+      scanner.eat_commasemi()
+      values.append(parse_const_value(scanner))
+
+  if value == SYM_LBRACE:
+    values = [ ]
+    while True:
+      t = scanner.get(eof_allowed=False)
+      if t == SYM_RBRACE:
+        return ConstValue(ConstValue.CTYPE_MAP, values)
+      scanner.pushback(t)
+      key = parse_const_value(scanner)
+      scanner.eat_expected(SYM_COLON)
+      value = parse_const_value(scanner)
+      scanner.eat_commasemi()
+      values.append(KeyValuePair(key, value))
+
+  raise IncorrectSyntax(scanner.lineno)
+
+
+def parse_field_list(scanner, start, end):
+  scanner.eat_expected(start)
+
+  fields = [ ]
+  while True:
+    t = scanner.get(eof_allowed=False)
+    if t == end:
+      return fields
+    doc = scanner.doc
+    if t.ttype == TYPE_INT:
+      field_id = t.tvalue
+      scanner.eat_expected(SYM_COLON)
+      t = scanner.get(eof_allowed=False)
+    else:
+      field_id = None
+    if t == ID_REQUIRED or t == ID_OPTIONAL:
+      ### delta: we don't warn when this occurs in an arglist
+      requiredness = t
+    else:
+      requiredness = None
+      scanner.pushback(t)
+    ft = parse_field_type(scanner, True)
+    ident = scanner.value_of(TYPE_ID)
+    t = scanner.get()
+    if t == SYM_EQ:
+      value = parse_const_value(scanner)
+      t = scanner.get()
+    else:
+      value = None
+    if t == ID_XSD_OPTIONAL:
+      xsd_optional = True
+      t = scanner.get()
+    else:
+      xsd_optional = False
+    if t == ID_XSD_NILLABLE:
+      xsd_nillable = True
+      t = scanner.get()
+    else:
+      xsd_nillable = False
+    if t == ID_XSD_ATTRS:
+      xsd_attrs = parse_field_list(scanner, SYM_LBRACE, SYM_RBRACE)
+    else:
+      xsd_attrs = None
+      if t is not None:
+        scanner.pushback(t)
+    scanner.eat_commasemi()
+    fields.append(Field(ident, ft, doc, field_id, requiredness, value,
+                        xsd_optional, xsd_nillable, xsd_attrs))
+
+
+def parse_annotations(scanner):
+  t = scanner.get()
+  if t is None:
+    return None
+  if t != SYM_LPAREN:
+    scanner.pushback(t)
+    return None
+  annotations = [ ]
+  while True:
+    ident = scanner.value_of(TYPE_ID)
+    scanner.eat_expected(SYM_EQ)
+    value = scanner.value_of(TYPE_LIT)
+    annotations.append(KeyValuePair(ident, value))
+
+    scanner.eat_commasemi()
+    t = scanner.get()
+    if t == SYM_RPAREN:
+      return annotations
+    scanner.pushback(t)
+
+
+class Program(object):
+  def __init__(self):
+    self.includes = [ ]
+    self.namespaces = [ ]
+    self.cpp_includes = [ ]
+    self.php_namespace = None
+    self.xsd_namespace = None
+    self.consts = [ ]
+    self.typedefs = [ ]
+    self.enums = [ ]
+    self.structs = [ ]
+    self.exceptions = [ ]
+    self.services = [ ]
+
+  def add_include(self, include):
+    self.includes.append(include)
+
+  def add_namespace(self, lang, namespace):
+    self.namespaces.append(Namespace(lang, namespace))
+
+  def add_cpp_include(self, include):
+    self.cpp_includes.append(include)
+
+  def set_php_namespace(self, namespace):
+    self.php_namespace = namespace
+
+  def set_xsd_namespace(self, namespace):
+    self.xsd_namespace = namespace
+
+  def add_const(self, ident, field_type, value, doc):
+    self.consts.append(ConstDef(ident, field_type, value, doc))
+
+  def add_typedef(self, ident, field_type, doc):
+    self.typedefs.append(Typedef(ident, field_type, doc))
+
+  def add_enum(self, ident, value, doc):
+    self.enums.append(Enum(ident, value, doc))
+
+  def add_senum(self, ident, values, doc):
+    self.typedefs.append(Typedef(ident, FieldType(values=values), doc))
+
+  def add_struct(self, ident, fields, annotations, doc):
+    self.structs.append(Struct(ident, fields, annotations, doc))
+
+  def add_exception(self, ident, fields, doc):
+    self.exceptions.append(Exception(ident, fields, doc))
+
+  def add_service(self, ident, extends, functions, doc):
+    self.services.append(Service(ident, extends, functions, doc))
+
+
+class Service(object):
+  def __init__(self, ident, extends, functions, doc):
+    self.ident = ident
+    self.extends = extends
+    self.functions = functions
+    self.doc = doc
+
+
+class Function(object):
+  def __init__(self, ident, async, field_type, params, throws, doc):
+    self.ident = ident
+    self.async = async
+    self.field_type = field_type
+    self.params = params
+    self.throws = throws
+    self.doc = doc
+
+
+class Enum(object):
+  def __init__(self, ident, values, doc):
+    self.ident = ident
+    self.values = values
+    self.doc = doc
+
+    for i in range(1, len(values)):
+      if values[i].value is None:
+        ### keep as integer?
+        values[i].value = str(int(values[i - 1].value) + 1)
+
+
+class EnumValue(object):
+  def __init__(self, ident, value, doc):
+    self.ident = ident
+    self.value = value
+    self.doc = doc
+
+
+class Field(object):
+  def __init__(self, ident, field_type, doc, field_id, requiredness, value,
+               xsd_optional, xsd_nillable, xsd_attrs):
+    assert value is None or isinstance(value, ConstValue)
+
+    self.ident = ident
+    self.field_type = field_type
+    self.doc = doc
+    self.field_id = field_id
+    self.requiredness = requiredness
+    self.value = value
+    self.xsd_optional = xsd_optional
+    self.xsd_nillable = xsd_nillable
+    self.xsd_attrs = xsd_attrs
+
+
+class FieldType(object):
+  def __init__(self, ident=None, cpp_type=None, map_from=None, map_to=None,
+               set_of=None, list_of=None, annotations=None, values=None):
+    if map_from is not None:
+      self.ident = ID_MAP
+    elif set_of is not None:
+      self.ident = ID_SET
+    elif list_of is not None:
+      self.ident = ID_LIST
+    elif values is not None:
+      self.ident = ID_STRING
+    else:
+      assert ident is not None
+      self.ident = ident
+    self.cpp_type = cpp_type
+    self.map_from = map_from
+    self.map_to = map_to
+    self.set_of = set_of
+    self.list_of = list_of
+    self.annotations = annotations
+    self.values = values
+
+
+class KeyValuePair(object):
+  def __init__(self, key, value):
+    self.key = key
+    self.value = value
+
+
+class ConstDef(object):
+  def __init__(self, ident, field_type, value, doc):
+    assert isinstance(value, ConstValue)
+
+    self.ident = ident
+    self.field_type = field_type
+    self.value = value
+    self.doc = doc
+
+
+class ConstValue(object):
+  CTYPE_BASE = 'base'
+  CTYPE_LIST = 'list'
+  CTYPE_MAP = 'map'
+
+  def __init__(self, ctype, value):
+    self.ctype = ctype
+    self.value = value
+
+
+class Typedef(object):
+  def __init__(self, ident, field_type, doc):
+    self.ident = ident
+    self.field_type = field_type
+    self.doc = doc
+
+
+class Struct(object):
+  def __init__(self, ident, fields, annotations, doc):
+    self.ident = ident
+    self.fields = fields
+    self.annotations = annotations
+    self.doc = doc
+
+
+class Exception(object):
+  def __init__(self, ident, fields, doc):
+    self.ident = ident
+    self.fields = fields
+    self.doc = doc
+
+
+class Namespace(object):
+  def __init__(self, lang, namespace):
+    self.lang = lang
+    self.namespace = namespace
+
+
+BASE_TYPES = [
+  ID_STRING,
+  ID_BINARY,
+  ID_SLIST,
+  ID_BOOL,
+  ID_BYTE,
+  ID_I16,
+  ID_I32,
+  ID_I64,
+  ID_DOUBLE,
+  ]
+
+
+if __name__ == '__main__':
+  import sys
+  parse(open(sys.argv[1]).read())
diff --git a/compiler/py/src/scanner.py b/compiler/py/src/scanner.py
new file mode 100644
index 0000000..6524768
--- /dev/null
+++ b/compiler/py/src/scanner.py
@@ -0,0 +1,271 @@
+#
+# simple scanner for Thrift. emits tokens.
+#
+
+__all__ = ['Scanner', 'SimpleScanner', 'Token', 'TYPE_INT',
+           'ExpectedError', 'ExpectedType', 'UnexpectedEOF',
+           'UnknownToken', 'IncorrectSyntax',
+           ]
+
+import re
+
+re_int = re.compile('[+-]?[0-9]+$')  # special handling
+re_hex = re.compile('0x[0-9A-Fa-f]+')
+re_dub = re.compile(r'[+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?')
+
+re_white = re.compile('[ \t\r\n]+')
+re_silly = re.compile(r'/\*+\*/')
+re_multi = re.compile(r'/\*[^*]/*([^*/]|[^*]/|\*[^/])*\*+/')
+re_comment = re.compile('//[^\n]*')
+re_unix = re.compile('#[^\n]*')
+
+re_doc = re.compile(r'/\*\*([^*/]|[^*]/|\*[^/])*\*+/')
+
+re_ident = re.compile('[a-zA-Z_][\.a-zA-Z_0-9]*')
+re_symbol = re.compile(r'[:;,{}()=<>\[\]]')
+re_dliteral = re.compile('"[^"]*"')
+re_sliteral = re.compile("'[^']*'")
+re_st_ident = re.compile('[a-zA-Z-][.a-zA-Z_0-9-]*')
+
+skip_re = [re_white, re_silly, re_multi, re_comment, re_unix]
+
+types = [
+  ('HEX', re_hex),  # keep before re_dub
+  ('DUB', re_dub),
+  ('DOC', re_doc),
+  ('ID', re_ident),
+  ('SYM', re_symbol),
+  ('LIT', re_dliteral),
+  ('LIT', re_sliteral),
+  ('STID', re_st_ident),
+  ]
+
+for key, pattern in types:
+  globals()['TYPE_' + key] = key
+  __all__.append('TYPE_' + key)
+TYPE_INT = 'INT'
+
+
+class SimpleScanner(object):
+
+  def __init__(self, contents):
+    self.contents = contents
+    self.lineno = 1
+
+  def get(self):
+    """Get the next token.
+
+    Consumes and returns the next token. Note that leading whitespace is
+    skipped.
+
+    Returns None if there are no more tokens.
+    """
+    self._skip()
+
+    if not self.contents:
+      return None
+
+    for ttype, pattern in types:
+      m = pattern.match(self.contents)
+      if m:
+        if m.end() == 0:
+          continue
+        tvalue = m.group()
+        if pattern is re_dub and re_int.match(tvalue):
+          ttype = TYPE_INT
+        elif ttype == TYPE_LIT:
+          # strip quotes
+          tvalue = tvalue[1:-1]
+        ### fold TYPE_HEX into TYPE_INT? convert INT/DUB away from string?
+        token = Token(ttype, tvalue)
+        self._chomp(m.end())
+        return token
+
+    raise UnknownToken(self.lineno)
+
+  def _skip(self):
+    "Skip over leading whitespace."
+
+    while True:
+      for pattern in skip_re:
+        m = pattern.match(self.contents)
+        if m:
+          self._chomp(m.end())
+          break
+      else:
+        # nothing matched. all done.
+        return
+
+  def _chomp(self, amt):
+    "Chomp AMT bytes off the front of the contents. Count newlines."
+    self.lineno += self.contents[:amt].count('\n')
+    self.contents = self.contents[amt:]
+
+
+class Scanner(SimpleScanner):
+  def __init__(self, contents):
+    SimpleScanner.__init__(self, contents)
+
+    self.doc = None
+    self.pending = None
+
+  def get(self, eof_allowed=True):
+    if self.pending is not None:
+      token = self.pending
+      self.pending = None
+      return token
+
+    self.doc = None
+    while True:
+      t = SimpleScanner.get(self)
+      if t is None:
+        if eof_allowed:
+          return None
+        raise UnexpectedEOF(self.lineno)
+      if t.ttype != TYPE_DOC:
+        #print 'TOKEN:', t
+        return t
+      self.doc = t
+
+  def get_type(self, ttype):
+    "Get the next token, ensuring it is of the given type."
+    t = self.get(eof_allowed=False)
+    if t.ttype != ttype:
+      raise ExpectedType(ttype, t.ttype, self.lineno)
+    return t
+
+  def value_of(self, ttype):
+    "Get the next token's value, ensuring it is of the given type."
+    return self.get_type(ttype).tvalue
+
+  def pushback(self, token):
+    "Push a token back into the scanner; it was unused."
+    assert token is not None
+    assert self.pending is None
+    self.pending = token
+
+  def eat_commasemi(self):
+    "Eat a comma or a semicolon, if present."
+    t = self.get()
+    if t != SYM_COMMA and t != SYM_SEMI:
+      self.pushback(t)
+
+  def eat_expected(self, token):
+    "Eat the expected token, or raise a ExpectedError."
+    t = self.get()
+    if t != token:
+      raise ExpectedError(token, t, self.lineno)
+
+
+class Token(object):
+  def __init__(self, ttype, tvalue=None):
+    self.ttype = ttype
+    self.tvalue = tvalue
+
+  def __str__(self):
+    if self.tvalue is None:
+      return 'T(%s)' % self.ttype
+    return 'T(%s, "%s")' % (self.ttype, self.tvalue)
+
+  def __eq__(self, other):
+    return self.ttype == other.ttype and self.tvalue == other.tvalue
+
+  def __ne__(self, other):
+    return self.ttype != other.ttype or self.tvalue != other.tvalue
+
+  def __hash__(self):
+    return hash((self.ttype, self.tvalue))
+
+
+for ident in ['namespace',
+              'cpp_namespace',
+              'cpp_include',
+              'cpp_type',
+              'java_package',
+              'cocoa_prefix',
+              'csharp_namespace',
+              'php_namespace',
+              'py_module',
+              'perl_package',
+              'ruby_namespace',
+              'smalltalk_category',
+              'smalltalk_prefix',
+              'xsd_all',
+              'xsd_optional',
+              'xsd_nillable',
+              'xsd_namespace',
+              'xsd_attrs',
+              'include',
+              'void',
+              'bool',
+              'byte',
+              'i16',
+              'i32',
+              'i64',
+              'double',
+              'string',
+              'binary',
+              'slist',
+              'senum',
+              'map',
+              'list',
+              'set',
+              'async',
+              'typedef',
+              'struct',
+              'exception',
+              'extends',
+              'throws',
+              'service',
+              'enum',
+              'const',
+              'required',
+              'optional',
+              ]:
+  name = 'ID_' + ident.upper()
+  globals()[name] = Token(TYPE_ID, ident)
+  __all__.append(name)
+
+
+for name, sym in [('COLON', ':'),
+                  ('SEMI', ';'),
+                  ('COMMA', ','),
+                  ('LBRACE', '{'),
+                  ('RBRACE', '}'),
+                  ('LPAREN', '('),
+                  ('RPAREN', ')'),
+                  ('LBRKT', '['),
+                  ('RBRKT', ']'),
+                  ('EQ', '='),
+                  ('LT', '<'),
+                  ('GT', '>'),
+                  ]:
+  globals()['SYM_' + name] = Token(TYPE_SYM, sym)
+  __all__.append('SYM_' + name)
+
+
+class ExpectedError(Exception):
+  "Expected token was not present."
+
+class ExpectedType(Exception):
+  "Expected token type was not present."
+
+class UnexpectedEOF(Exception):
+  "EOF reached unexpectedly."
+
+class UnknownToken(Exception):
+  "Unknown token encountered."
+
+class IncorrectSyntax(Exception):
+  "Incorrect syntax encountered."
+
+
+if __name__ == '__main__':
+  import sys
+
+  s = Scanner(open(sys.argv[1]).read())
+  while True:
+    token = s.get()
+    if token is None:
+      break
+    print token
diff --git a/compiler/py/src/templates-plain/py.ezt b/compiler/py/src/templates-plain/py.ezt
new file mode 100644
index 0000000..21f44f9
--- /dev/null
+++ b/compiler/py/src/templates-plain/py.ezt
@@ -0,0 +1,33 @@
+[if-any includes]Includes:[for includes]
+  [includes][end]
+
+[end][if-any namespaces]Namespaces:[for namespaces]
+  [namespaces.lang] [namespaces.namespace][end]
+
+[end][if-any cpp_includes]C++ Includes:[for cpp_includes]
+  [cpp_includes][end]
+
+[end][if-any php_namespace]PHP Namespace: [php_namespace]
+[end][if-any xsd_namespace]XSD Namespace: [xsd_namespace]
+[end][if-any consts]Constants:[for consts]
+  [consts.ident] [consts.field_type.serializer] = [consts.value.cvalue][end]
+
+[end][if-any typedefs]Typedefs:[for typedefs]
+  [typedefs.ident] => [typedefs.field_type.serializer][end]
+
+[end][if-any enums]Enums:[for enums]
+  [enums.ident] {[for enums.values]
+    [enums.values.ident] = [enums.values.value],[end]
+  }[end]
+
+[end][if-any structs]Structs:[for structs]
+  [structs.ident] {[for structs.fields]
+    [structs.fields.field_id]: [structs.fields.field_type.serializer] [structs.fields.ident],[end]
+  }[end]
+
+[end][if-any exceptions]Exceptions:[for exceptions]
+  [exceptions.ident][end]
+
+[end][if-any services]Services:[for services]
+  [services.ident][end]
+[end]
diff --git a/compiler/py/src/templates-plain/py_cvalue.ezt b/compiler/py/src/templates-plain/py_cvalue.ezt
new file mode 100644
index 0000000..1275a2d
--- /dev/null
+++ b/compiler/py/src/templates-plain/py_cvalue.ezt
@@ -0,0 +1,3 @@
+[is ctype "map"]{[for value][value.key.cvalue]: [value.value.cvalue],[end]}[#
+#][else][is ctype "list"][[][for value][value.cvalue],[end]][#
+#][else][value.tvalue][end][end]
\ No newline at end of file
diff --git a/compiler/py/src/templates-plain/py_deser.ezt b/compiler/py/src/templates-plain/py_deser.ezt
new file mode 100644
index 0000000..8c7d31b
--- /dev/null
+++ b/compiler/py/src/templates-plain/py_deser.ezt
@@ -0,0 +1 @@
+deserializer
diff --git a/compiler/py/src/templates-plain/py_ser.ezt b/compiler/py/src/templates-plain/py_ser.ezt
new file mode 100644
index 0000000..fb6bb38
--- /dev/null
+++ b/compiler/py/src/templates-plain/py_ser.ezt
@@ -0,0 +1,6 @@
+[is ident.tvalue "map"]map<[map_from.serializer],[map_to.serializer]>[#
+][else][is ident.tvalue "set"]set<[set_of.serializer]>[#
+][else][is ident.tvalue "list"]list<[list_of.serializer]>[#
+][else][if-any values]string {[for values][values],[end]}[#
+][else][ident.tvalue][#
+][end][end][end][end]
\ No newline at end of file
diff --git a/compiler/py/src/templates-py/py.ezt b/compiler/py/src/templates-py/py.ezt
new file mode 100644
index 0000000..bf3f95a
--- /dev/null
+++ b/compiler/py/src/templates-py/py.ezt
@@ -0,0 +1,100 @@
+#
+# Autogenerated by Thrift
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+
+from thrift.Thrift import *
+[# ### probably need some massaging to really locate this module
+][for includes]import [includes].ttypes
+[end]
+from thrift.transport import TTransport
+from thrift.protocol import TBinaryProtocol
+try:
+  from thrift.protocol import fastbinary
+except:
+  fastbinary = None
+
+
+[# ### need gen_newstyle flag
+][for enums]class [enums.ident](object):[for enums.values]
+  [enums.values.ident] = [enums.values.value][end]
+[end]
+
+[for consts][consts.ident] = [consts.value.cvalue]
+[end]
+
+[# ### need gen_newstyle flag
+][for structs]class [structs.ident](object):
+
+  thrift_spec = ([# ### should sort fields. need None markers for gaps.
+][if-any structs.fields][for structs.sorted_fields]
+    [if-any structs.sorted_fields]([structs.sorted_fields.field_id], [structs.sorted_fields.type_enum], [#
+]'[structs.sorted_fields.ident]', [#
+]None, [# ### should have spec_args here
+][if-any structs.sorted_fields.value][structs.sorted_fields.value.cvalue][else]None[end], [#
+]),[else]None,[end] # ### list-index[# structs.sorted_fields.list-index][end]
+  )
+[else]  thrift_spec = None
+[end]
+[if-any structs.fields]  def __init__(self,[#
+][for structs.fields] [structs.fields.ident]=[#
+][if-any structs.fields.value]thrift_spec[[][structs.fields.field_id]][[]4][#
+][else]None[end],[end]):
+[for structs.fields][if-any ""][# ### complex test here
+]    if [structs.fields.ident] is self.thrift_spec[[]structs.fields.field_id][[]4]:
+      [structs.fields.ident] = [structs.fields.value.cvalue]
+[end]    self.[structs.fields.ident] = [structs.fields.ident]
+[end]
+[end][# close: if-any structs.fields]
+
+  def read(self, iprot):
+    if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated [#
+]and isinstance(iprot.trans, TTransport.CReadableTransport) [#
+]and self.thrift_spec is not None and fastbinary is not None:
+      fastbinary.decode_binary(self, iprot.trans, [#
+](self.__class__, self.thrift_spec))
+      return
+    iprot.readStructBegin()
+    while True:
+      (fname, ftype, fid) = iprot.readFieldBegin()
+      if ftype == TType.STOP:
+        break[for structs.fields]
+      [if-index structs.fields first]if[else]elif[end] fid == [#
+][structs.fields.field_id]:
+        if ftype == [structs.fields.type_enum]:
+          pass # deserialize
+        else:
+          iprot.skip(ftype)[end]
+      else:
+        iprot.skip(ftype)
+      iprot.readFieldEnd()
+    iprot.readStructEnd()
+
+  def write(self, oprot):
+    if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated [#
+]and self.thrift_spec is not None and fastbinary is not None:
+      oprot.trans.write(fastbinary.encode_binary(self, [#
+](self.__class__, self.thrift_spec)))
+      return
+    oprot.writeStructBegin('[structs.ident]')[for structs.fields]
+    if self.[structs.fields.ident] != None:
+      oprot.writeFieldBegin('[structs.fields.ident]', [#
+][structs.fields.type_enum], [structs.fields.field_id])
+      # serialize
+      oprot.writeFieldEnd()[end]
+    oprot.writeFieldStop()
+    oprot.writeStructEnd()
+
+  def __repr__(self):
+    L = [[]'%s=%r' % (key, value)
+      for key, value in self.__dict__.iteritems()]
+    return '%s(%s)' % (self.__class__.__name__, ', '.join(L))
+
+  def __eq__(self, other):
+    return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
+
+  def __ne__(self, other):
+    return not (self == other)
+
+[end][# for structs]
diff --git a/compiler/py/src/templates-py/py_cvalue.ezt b/compiler/py/src/templates-py/py_cvalue.ezt
new file mode 100644
index 0000000..be7fab5
--- /dev/null
+++ b/compiler/py/src/templates-py/py_cvalue.ezt
@@ -0,0 +1,10 @@
+[is ctype "map"]{
+  [for value][value.key.cvalue]: [value.value.cvalue],
+[end]}[#
+#][else][is ctype "set"]set([[]
+  [for value][value.cvalue],
+[end]])[#
+#][else][is ctype "list"][[]
+  [for value][value.cvalue],
+[end]][#
+#][else][value.tvalue][end][end][end]
\ No newline at end of file
diff --git a/compiler/py/src/templates-py/py_deser.ezt b/compiler/py/src/templates-py/py_deser.ezt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/compiler/py/src/templates-py/py_deser.ezt
diff --git a/compiler/py/src/templates-py/py_ser.ezt b/compiler/py/src/templates-py/py_ser.ezt
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/compiler/py/src/templates-py/py_ser.ezt