| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 1 | #!python | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 2 | """ Thrift IDL parser/compiler | 
|  | 3 |  | 
|  | 4 | This parser uses the Python PLY LALR parser generator to build a parser for the Thrift IDL grammar. | 
|  | 5 |  | 
|  | 6 | If a compiles \"thyc\" file exists for a given source \"thrift\" file it computes a hash of the file and determines | 
|  | 7 | if if it is the source of the \"thyc\" file.  If  so, it simply returns the parse tree previously computed, otherwise it | 
|  | 8 | parses the source and generates a new \"thyc\" file  (assuming of course the source file contains no errors.) | 
|  | 9 |  | 
|  | 10 | When the parser encounters import statements it searches for corresponding \"thrift\" or \"thyc\" files in paths corresponding to | 
|  | 11 | the specified namespace. | 
|  | 12 |  | 
|  | 13 | Author(s): Mark Slee(mclee@facebook.com), Marc Kwiatkowski (marc@facebook.com) | 
|  | 14 |  | 
|  | 15 | $Id: | 
|  | 16 | """ | 
|  | 17 |  | 
|  | 18 | import lex | 
|  | 19 | import os | 
|  | 20 | import pickle | 
|  | 21 | import string | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 22 | import yacc | 
|  | 23 |  | 
|  | 24 | class Error(object): | 
|  | 25 |  | 
|  | 26 | def __init__(self, start=0, end=0, message=""): | 
|  | 27 | if len(message) == 0: | 
|  | 28 | raise Exception, "NO MESSAGE" | 
|  | 29 | self.message = message | 
|  | 30 | self.start = start | 
|  | 31 | self.end = end | 
|  | 32 |  | 
|  | 33 | def __str__(self): | 
|  | 34 | return str(self.start)+": error: "+self.message | 
|  | 35 |  | 
|  | 36 | class SyntaxError(Error): | 
|  | 37 | def __init__(self, lexToken): | 
|  | 38 | Error.__init__(self, lexToken.lineno, lexToken.lineno, "syntax error "+str(lexToken.value)) | 
|  | 39 |  | 
|  | 40 | class SymanticsError(Error): | 
|  | 41 |  | 
|  | 42 | def __init__(self, definition, message): | 
|  | 43 | Error.__init__(self, definition.start, definition.end, message) | 
|  | 44 | self.definition = definition | 
|  | 45 |  | 
|  | 46 | def __str__(self): | 
|  | 47 | return str(self.start)+": error: "+self.message | 
|  | 48 |  | 
|  | 49 | class ErrorException(Exception): | 
|  | 50 |  | 
|  | 51 | def __init__(self, errors=None): | 
|  | 52 | self.errors = errors | 
|  | 53 |  | 
|  | 54 | class Definition(object): | 
|  | 55 | """ Abstract thrift IDL definition unit """ | 
|  | 56 |  | 
|  | 57 | def __init__(self, symbols=None, name="", id=None): | 
|  | 58 | if symbols: | 
|  | 59 | self.lines(symbols) | 
|  | 60 | self.name = name | 
|  | 61 | self.id = id | 
|  | 62 |  | 
|  | 63 | def validate(self): | 
|  | 64 | pass | 
|  | 65 |  | 
|  | 66 | def lines(self, symbols): | 
|  | 67 | self.start = symbols.lineno(1) | 
|  | 68 | self.end = symbols.lineno(len(symbols) - 1) | 
|  | 69 |  | 
|  | 70 | class Identifier(Definition): | 
|  | 71 | """ An Identifier - name and optional integer id """ | 
|  | 72 |  | 
|  | 73 | def __init__(self, symbols, name, id=None): | 
|  | 74 | Definition.__init__(self, symbols, name, id) | 
|  | 75 |  | 
|  | 76 | def __str__(self): | 
|  | 77 | result = self.name | 
|  | 78 | if self.id != 0: | 
|  | 79 | result+="="+str(self.id) | 
|  | 80 | return result | 
|  | 81 |  | 
|  | 82 | class Type(Definition): | 
|  | 83 | """ Abstract Type definition """ | 
|  | 84 |  | 
|  | 85 | def __init__(self, symbols, name): | 
|  | 86 | Definition.__init__(self, symbols, name) | 
|  | 87 | self.name = name | 
|  | 88 |  | 
|  | 89 | def __str__(self): | 
|  | 90 | return self.name | 
|  | 91 |  | 
|  | 92 | class TypeDef(Type): | 
|  | 93 |  | 
|  | 94 | def __init__(self, symbols, name, definitionType): | 
|  | 95 | Type.__init__(self, symbols, name) | 
|  | 96 | self.definitionType = definitionType | 
|  | 97 |  | 
|  | 98 | def __str__(self): | 
|  | 99 | return self.name+"<"+str(self.name)+", "+str(self.definitionType)+">" | 
|  | 100 |  | 
|  | 101 | """ Primitive Types """ | 
|  | 102 |  | 
|  | 103 | class PrimitiveType(Type): | 
|  | 104 |  | 
|  | 105 | def __init__(self, name): | 
|  | 106 | Type.__init__(self, None, name) | 
|  | 107 |  | 
|  | 108 |  | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 109 | STOP_TYPE =  PrimitiveType("stop") | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 110 | VOID_TYPE =  PrimitiveType("void") | 
|  | 111 | BOOL_TYPE = PrimitiveType("bool") | 
|  | 112 | STRING_TYPE =PrimitiveType("utf7") | 
|  | 113 | UTF7_TYPE = PrimitiveType("utf7") | 
|  | 114 | UTF8_TYPE = PrimitiveType("utf8") | 
|  | 115 | UTF16_TYPE = PrimitiveType("utf16") | 
|  | 116 | BYTE_TYPE = PrimitiveType("u08") | 
|  | 117 | I08_TYPE = PrimitiveType("i08") | 
|  | 118 | I16_TYPE = PrimitiveType("i16") | 
|  | 119 | I32_TYPE = PrimitiveType("i32") | 
|  | 120 | I64_TYPE = PrimitiveType("i64") | 
|  | 121 | U08_TYPE = PrimitiveType("u08") | 
|  | 122 | U16_TYPE = PrimitiveType("u16") | 
|  | 123 | U32_TYPE = PrimitiveType("u32") | 
|  | 124 | U64_TYPE = PrimitiveType("u64") | 
|  | 125 | FLOAT_TYPE = PrimitiveType("float") | 
|  | 126 |  | 
|  | 127 | PRIMITIVE_MAP = { | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 128 | "stop" : STOP_TYPE, | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 129 | "void" : VOID_TYPE, | 
|  | 130 | "bool" : BOOL_TYPE, | 
|  | 131 | "string": UTF7_TYPE, | 
|  | 132 | "utf7": UTF7_TYPE, | 
|  | 133 | "utf8": UTF8_TYPE, | 
|  | 134 | "utf16": UTF16_TYPE, | 
|  | 135 | "byte" : U08_TYPE, | 
|  | 136 | "i08": I08_TYPE, | 
|  | 137 | "i16": I16_TYPE, | 
|  | 138 | "i32": I32_TYPE, | 
|  | 139 | "i64": I64_TYPE, | 
|  | 140 | "u08": U08_TYPE, | 
|  | 141 | "u16": U16_TYPE, | 
|  | 142 | "u32": U32_TYPE, | 
|  | 143 | "u64": U64_TYPE, | 
|  | 144 | "float": FLOAT_TYPE | 
|  | 145 | } | 
|  | 146 |  | 
|  | 147 | """ Collection Types """ | 
|  | 148 |  | 
|  | 149 | class CollectionType(Type): | 
|  | 150 |  | 
|  | 151 | def __init__(self, symbols, name): | 
|  | 152 | Type.__init__(self, symbols, name) | 
|  | 153 |  | 
|  | 154 | class Map(CollectionType): | 
|  | 155 |  | 
|  | 156 | def __init__(self, symbols, keyType, valueType): | 
|  | 157 | CollectionType.__init__(self, symbols, "map<"+keyType.name+","+valueType.name +">") | 
|  | 158 | self.keyType = keyType | 
|  | 159 | self.valueType = valueType | 
|  | 160 |  | 
|  | 161 | class Set(CollectionType): | 
|  | 162 |  | 
|  | 163 | def __init__(self, symbols, valueType): | 
|  | 164 | CollectionType.__init__(self, symbols, "set<"+valueType.name+">") | 
|  | 165 | self.valueType = valueType | 
|  | 166 |  | 
|  | 167 | class List(CollectionType): | 
|  | 168 |  | 
|  | 169 | def __init__(self, symbols, valueType): | 
|  | 170 | CollectionType.__init__(self, symbols, "list<"+valueType.name+">") | 
|  | 171 | self.valueType = valueType | 
|  | 172 |  | 
|  | 173 | class Enum(Definition): | 
|  | 174 |  | 
|  | 175 | def __init__(self, symbols, name, enumDefs): | 
|  | 176 | Definition.__init__(self, symbols, name) | 
|  | 177 | self.enumDefs = enumDefs | 
|  | 178 |  | 
|  | 179 | def validate(self): | 
|  | 180 | ids = {} | 
|  | 181 | names = {} | 
|  | 182 | errors = [] | 
|  | 183 |  | 
|  | 184 | for enumDef in self.enumDefs: | 
|  | 185 |  | 
|  | 186 | if enumDef.name in names: | 
|  | 187 | errors.append(SymanticsError(enumDef, self.name+"."+str(enumDef.name)+" already defined at line "+str(names[enumDef.name].start))) | 
|  | 188 | else: | 
|  | 189 | names[enumDef.name] = enumDef | 
|  | 190 |  | 
|  | 191 | if enumDef.id != None: | 
|  | 192 | oldEnumDef = ids.get(enumDef.id) | 
|  | 193 | if oldEnumDef: | 
|  | 194 | errors.append(SymanticsError(enumDef, "enum "+self.name+" \""+str(enumDef.name)+"\" uses constant already assigned to \""+oldEnumDef.name+"\"")) | 
|  | 195 | else: | 
|  | 196 | ids[enumDef.id] = enumDef | 
|  | 197 |  | 
|  | 198 | if len(errors): | 
|  | 199 | raise ErrorException(errors) | 
|  | 200 |  | 
|  | 201 | def assignId(enumDef, currentId, ids): | 
|  | 202 | 'Finds the next available id number for an enum definition' | 
|  | 203 |  | 
|  | 204 | id= currentId + 1 | 
|  | 205 |  | 
|  | 206 | while id in ids: | 
|  | 207 | id += 1 | 
|  | 208 |  | 
|  | 209 | enumDef.id = id | 
|  | 210 |  | 
|  | 211 | ids[enumDef.id] = enumDef | 
|  | 212 |  | 
|  | 213 | # assign ids for all enum defs with unspecified ids | 
|  | 214 |  | 
|  | 215 | currentId = 0 | 
|  | 216 |  | 
|  | 217 | for enumDef in self.enumDefs: | 
|  | 218 | if not enumDef.id: | 
|  | 219 | assignId(enumDef, currentId, ids) | 
|  | 220 | currentId = enumDef.id | 
|  | 221 |  | 
|  | 222 | def __repr__(self): | 
|  | 223 | return str(self) | 
|  | 224 |  | 
|  | 225 | def __str__(self): | 
|  | 226 | return self.name+"<"+string.join(map(lambda enumDef: str(enumDef), self.enumDefs), ", ") | 
|  | 227 |  | 
|  | 228 | class EnumDef(Definition): | 
|  | 229 |  | 
|  | 230 | def __init__(self, symbols, name, id=None): | 
|  | 231 | Definition.__init__(self, symbols, name, id) | 
|  | 232 |  | 
|  | 233 | def __repr__(self): | 
|  | 234 | return str(self) | 
|  | 235 |  | 
|  | 236 | def __str__(self): | 
|  | 237 | result = self.name | 
|  | 238 | if self.id: | 
|  | 239 | result+= ":"+str(self.id) | 
|  | 240 | return result | 
|  | 241 |  | 
|  | 242 |  | 
|  | 243 | class Field(Definition): | 
|  | 244 |  | 
|  | 245 | def __init__(self, symbols, type, identifier): | 
|  | 246 | Definition.__init__(self, symbols, identifier.name, identifier.id) | 
|  | 247 | self.type = type | 
|  | 248 | self.identifier = identifier | 
|  | 249 |  | 
|  | 250 | def __str__(self): | 
|  | 251 | return "<"+str(self.type)+", "+str(self.identifier)+">" | 
|  | 252 |  | 
|  | 253 | def validateFieldList(fieldList): | 
|  | 254 |  | 
|  | 255 | errors = [] | 
|  | 256 | names = {} | 
|  | 257 | ids = {} | 
|  | 258 |  | 
|  | 259 | for field in fieldList: | 
|  | 260 |  | 
|  | 261 | if field.name in names: | 
|  | 262 | oldField = names[field.name] | 
|  | 263 | errors.append(SymanticsError(field, "field \""+field.name+"\" already defined at "+str(oldField.start))) | 
|  | 264 | else: | 
|  | 265 | names[field.name] = field | 
|  | 266 |  | 
|  | 267 | if field.id != None: | 
|  | 268 | oldField = ids.get(field.id) | 
|  | 269 | if oldField: | 
|  | 270 | errors.append(SymanticsError(field, "field \""+field.name+"\" uses constant already assigned to \""+oldField.name+"\"")) | 
|  | 271 | else: | 
|  | 272 | ids[field.id] = field | 
|  | 273 |  | 
|  | 274 | if len(errors): | 
|  | 275 | raise ErrorException(errors) | 
|  | 276 |  | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 277 | def assignId(field, currentId, ids): | 
|  | 278 | 'Finds the next available id number for a field' | 
|  | 279 | id= currentId - 1 | 
|  | 280 |  | 
|  | 281 | while id in ids: | 
|  | 282 | id -= 1 | 
|  | 283 |  | 
|  | 284 | field.id = id | 
|  | 285 |  | 
|  | 286 | ids[field.id] = field | 
|  | 287 |  | 
|  | 288 | return id | 
|  | 289 |  | 
|  | 290 | # assign ids for all fields with unspecified ids | 
|  | 291 |  | 
|  | 292 | currentId = 0 | 
|  | 293 |  | 
|  | 294 | for fields in fieldList: | 
|  | 295 | if not field.id: | 
|  | 296 | currentId = assignId(field, currentId, ids) | 
|  | 297 |  | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 298 | class Struct(Type): | 
|  | 299 |  | 
|  | 300 | def __init__(self, symbols, name, fieldList): | 
|  | 301 | Type.__init__(self, symbols, name) | 
|  | 302 | self.fieldList = fieldList | 
|  | 303 |  | 
|  | 304 | def validate(self): | 
|  | 305 | validateFieldList(self.fieldList) | 
|  | 306 |  | 
|  | 307 | def __str__(self): | 
|  | 308 | return self.name+"<"+string.join(map(lambda a: str(a), self.fieldList), ", ")+">" | 
|  | 309 |  | 
|  | 310 | class Function(Definition): | 
|  | 311 |  | 
|  | 312 | def __init__(self, symbols, name, resultType, argFieldList): | 
|  | 313 | Definition.__init__(self, symbols, name) | 
|  | 314 | self.resultType = resultType | 
|  | 315 | self.argFieldList = argFieldList | 
|  | 316 |  | 
|  | 317 | def validate(self): | 
|  | 318 | validateFieldList(self.argFieldList) | 
|  | 319 |  | 
|  | 320 | def __str__(self): | 
|  | 321 | return self.name+"("+string.join(map(lambda a: str(a), self.argFieldList), ", ")+") => "+str(self.resultType) | 
|  | 322 |  | 
|  | 323 | class Service(Definition): | 
|  | 324 |  | 
|  | 325 | def __init__(self, symbols, name, functionList): | 
|  | 326 | Definition.__init__(self, symbols, name) | 
|  | 327 | self.functionList = functionList | 
|  | 328 |  | 
|  | 329 | def validate(self): | 
|  | 330 |  | 
|  | 331 | errors = [] | 
|  | 332 | functionNames = {} | 
|  | 333 | for function in self.functionList: | 
|  | 334 | if function.name in functionNames: | 
|  | 335 | oldFunction = functionName[function.name] | 
|  | 336 | errors.append(SymanticsError(function, "function "+function.name+" already defined at "+str(oldFunction.start))) | 
|  | 337 |  | 
|  | 338 | if len(errors): | 
|  | 339 | raise ErrorException(errors) | 
|  | 340 |  | 
|  | 341 | def __str__(self): | 
|  | 342 | return self.name+"("+string.join(map(lambda a: str(a), self.functionList), ", ")+")" | 
|  | 343 |  | 
|  | 344 | class Program(object): | 
|  | 345 |  | 
|  | 346 | def __init__(self, symbols=None, name="", definitions=None, serviceMap=None, typedefMap=None, enumMap=None, structMap=None, collectionMap=None, | 
|  | 347 | primitiveMap=None): | 
|  | 348 |  | 
|  | 349 | self.name = name | 
|  | 350 |  | 
|  | 351 | if not definitions: | 
|  | 352 | definitions = [] | 
|  | 353 | self.definitions = definitions | 
|  | 354 |  | 
|  | 355 | if not serviceMap: | 
|  | 356 | serviceMap = {} | 
|  | 357 | self.serviceMap = serviceMap | 
|  | 358 |  | 
|  | 359 | if not typedefMap: | 
|  | 360 | typedefMap = {} | 
|  | 361 | self.typedefMap = typedefMap | 
|  | 362 |  | 
|  | 363 | if not enumMap: | 
|  | 364 | enumMap = {} | 
|  | 365 | self.enumMap = enumMap | 
|  | 366 |  | 
|  | 367 | if not structMap: | 
|  | 368 | structMap = {} | 
|  | 369 | self.structMap = structMap | 
|  | 370 |  | 
|  | 371 | if not collectionMap: | 
|  | 372 | collectionMap = {} | 
|  | 373 | self.collectionMap = collectionMap | 
|  | 374 |  | 
|  | 375 | if not primitiveMap: | 
|  | 376 | primitiveMap = PRIMITIVE_MAP | 
|  | 377 | self.primitiveMap = primitiveMap | 
|  | 378 |  | 
|  | 379 | def addDefinition(self, definition, definitionMap, definitionTypeName): | 
|  | 380 |  | 
|  | 381 | oldDefinition = definitionMap.get(definition.name) | 
|  | 382 | if oldDefinition: | 
|  | 383 | raise ErrorException([SymanticsError(definition, definitionTypeName+" "+definition.name+" is already defined at "+str(oldDefinition.start))]) | 
|  | 384 | else: | 
|  | 385 | definitionMap[definition.name] = definition | 
|  | 386 |  | 
|  | 387 | # keep an ordered list of definitions so that stub/skel generators can determine the original order | 
|  | 388 |  | 
|  | 389 | self.definitions.append(definition) | 
|  | 390 |  | 
|  | 391 | def addStruct(self, struct): | 
|  | 392 | self.addDefinition(struct, self.structMap, "struct") | 
|  | 393 |  | 
|  | 394 | def addTypedef(self, typedef): | 
|  | 395 | self.addDefinition(typedef, self.typedefMap, "typedef") | 
|  | 396 |  | 
|  | 397 | def addEnum(self, enum): | 
|  | 398 | self.addDefinition(enum, self.enumMap, "enum") | 
|  | 399 |  | 
|  | 400 | def addService(self, service): | 
|  | 401 | self.addDefinition(service, self.serviceMap, "service") | 
|  | 402 |  | 
|  | 403 | def addCollection(self, collection): | 
|  | 404 | if collection.name in self.collectionMap: | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 405 | return self.collectionMap[collection.name] | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 406 | else: | 
|  | 407 | self.collectionMap[collection.name] = collection | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 408 | return collection | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 409 |  | 
|  | 410 | def getType(self, parent, symbol): | 
|  | 411 | """ Get the type definition for a symbol""" | 
|  | 412 |  | 
|  | 413 | typeName = None | 
|  | 414 |  | 
|  | 415 | if isinstance(symbol, Type): | 
|  | 416 | return symbol | 
|  | 417 | elif isinstance(symbol, Field): | 
|  | 418 | typeName = symbol.type.name | 
|  | 419 | elif isinstance(symbol, Identifier): | 
|  | 420 | typeName = symbol.name | 
|  | 421 | else: | 
|  | 422 | raise ErrorException([SymanticsError(parent, "unknown symbol \""+str(symbol)+"\"")]) | 
|  | 423 |  | 
|  | 424 | for map in (self.primitiveMap, self.collectionMap, self.typedefMap, self.enumMap, self.structMap): | 
|  | 425 | if typeName in map: | 
|  | 426 | return map[typeName] | 
|  | 427 |  | 
|  | 428 | raise ErrorException([SymanticsError(parent, "\""+typeName+"\"  is not defined.")]) | 
|  | 429 |  | 
|  | 430 | def hasType(self, parent, symbol): | 
|  | 431 | """ Determine if a type definition exists for the symbol""" | 
|  | 432 |  | 
|  | 433 | return self.getType(parent, symbol) == True | 
|  | 434 |  | 
|  | 435 | def validate(self): | 
|  | 436 |  | 
|  | 437 | errors = [] | 
|  | 438 |  | 
|  | 439 | # Verify that struct fields types, collection key and element types, and typedef defined types exists and replaces | 
|  | 440 | # type names with references to the type objects | 
|  | 441 |  | 
|  | 442 | for struct in self.structMap.values(): | 
|  | 443 | for field in struct.fieldList: | 
|  | 444 | try: | 
|  | 445 | field.type = self.getType(struct, field) | 
|  | 446 | except ErrorException, e: | 
|  | 447 | errors+= e.errors | 
|  | 448 |  | 
|  | 449 | for collection in self.collectionMap.values(): | 
|  | 450 | try: | 
|  | 451 | if isinstance(collection, Map): | 
|  | 452 | collection.keyType = self.getType(collection, collection.keyType) | 
|  | 453 |  | 
|  | 454 | collection.valueType = self.getType(collection, collection.valueType) | 
|  | 455 |  | 
|  | 456 | except ErrorException, e: | 
|  | 457 | errors+= e.errors | 
|  | 458 |  | 
|  | 459 | for typedef in self.typedefMap.values(): | 
|  | 460 | try: | 
|  | 461 | typedef.definitionType = self.getType(self, typedef.definitionType) | 
|  | 462 |  | 
|  | 463 | except ErrorException, e: | 
|  | 464 | errors+= e.errors | 
|  | 465 |  | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 466 | # Verify that service function result and arg list types exist and replace type name with reference to definition | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 467 |  | 
|  | 468 | for service in self.serviceMap.values(): | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 469 |  | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 470 | for function in service.functionList: | 
|  | 471 | try: | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 472 | function.resultType = self.getType(service, function.resultType) | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 473 | except ErrorException, e: | 
|  | 474 | errors+= e.errors | 
|  | 475 |  | 
|  | 476 | for field in function.argFieldList: | 
|  | 477 | try: | 
|  | 478 | field.type = self.getType(function, field) | 
|  | 479 | except ErrorException, e: | 
|  | 480 | errors+= e.errors | 
|  | 481 |  | 
|  | 482 | if len(errors): | 
|  | 483 | raise ErrorException(errors) | 
|  | 484 |  | 
|  | 485 |  | 
|  | 486 | class Parser(object): | 
|  | 487 |  | 
|  | 488 | reserved = ("BYTE", | 
|  | 489 | # "CONST", | 
|  | 490 | "DOUBLE", | 
|  | 491 | "ENUM", | 
|  | 492 | # "EXCEPTION", | 
|  | 493 | # "EXTENDS", | 
|  | 494 | "I08", | 
|  | 495 | "I16", | 
|  | 496 | "I32", | 
|  | 497 | "I64", | 
|  | 498 | "LIST", | 
|  | 499 | "MAP", | 
|  | 500 | "SERVICE", | 
|  | 501 | "SET", | 
|  | 502 | # "STATIC", | 
|  | 503 | "STRING", | 
|  | 504 | "STRUCT", | 
|  | 505 | # "SYNCHRONIZED", | 
|  | 506 | "TYPEDEF", | 
|  | 507 | "U08", | 
|  | 508 | "U16", | 
|  | 509 | "U32", | 
|  | 510 | "U64", | 
|  | 511 | "UTF16", | 
|  | 512 | "UTF8", | 
|  | 513 | "VOID" | 
|  | 514 | ) | 
|  | 515 |  | 
|  | 516 | tokens = reserved + ( | 
|  | 517 | # Literals (identifier, integer constant, float constant, string constant, char const) | 
|  | 518 | 'ID', 'ICONST', 'SCONST', 'FCONST', | 
|  | 519 | # Operators default=, optional*, variable... | 
|  | 520 | 'ASSIGN',  #'OPTIONAL', 'ELLIPSIS', | 
|  | 521 | # Delimeters ( ) { } < > , . ; : | 
|  | 522 | 'LPAREN', 'RPAREN', | 
|  | 523 | 'LBRACE', 'RBRACE', | 
|  | 524 | 'LANGLE', 'RANGLE', | 
|  | 525 | 'COMMA' #, 'PERIOD', 'SEMI' , 'COLON' | 
|  | 526 | ) | 
|  | 527 |  | 
|  | 528 | precendence = () | 
|  | 529 |  | 
|  | 530 | reserved_map = {} | 
|  | 531 |  | 
|  | 532 | for r in reserved: | 
|  | 533 | reserved_map[r.lower()] = r | 
|  | 534 |  | 
|  | 535 | def t_ID(self, t): | 
|  | 536 | r'[A-Za-z_][\w_]*' | 
|  | 537 | t.type = self.reserved_map.get(t.value,"ID") | 
|  | 538 | return t | 
|  | 539 |  | 
|  | 540 | # Completely ignored characters | 
|  | 541 | t_ignore           = ' \t\x0c' | 
|  | 542 |  | 
|  | 543 | #    t_OPTIONAL         = r'\*' | 
|  | 544 | t_ASSIGN           = r'=' | 
|  | 545 |  | 
|  | 546 | # Delimeters | 
|  | 547 | t_LPAREN           = r'\(' | 
|  | 548 | t_RPAREN           = r'\)' | 
|  | 549 | t_LANGLE           = r'\<' | 
|  | 550 | t_RANGLE           = r'\>' | 
|  | 551 | t_LBRACE           = r'\{' | 
|  | 552 | t_RBRACE           = r'\}' | 
|  | 553 | t_COMMA            = r',' | 
|  | 554 | #    t_PERIOD           = r'\.' | 
|  | 555 | #    t_SEMI             = r';' | 
|  | 556 | #    t_COLON            = r':' | 
|  | 557 | #    t_ELLIPSIS         = r'\.\.\.' | 
|  | 558 |  | 
|  | 559 | # Integer literal | 
|  | 560 | t_ICONST = r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?' | 
|  | 561 |  | 
|  | 562 | # Floating literal | 
|  | 563 | t_FCONST = r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?' | 
|  | 564 |  | 
|  | 565 | # String literal | 
|  | 566 | t_SCONST = r'\"([^\\\n]|(\\.))*?\"' | 
|  | 567 |  | 
|  | 568 | # Comments | 
|  | 569 | def t_comment(self, t): | 
|  | 570 | r'(?:/\*(.|\n)*?\*/)|(?://[^\n]*\n)' | 
|  | 571 | t.lineno += t.value.count('\n') | 
|  | 572 |  | 
|  | 573 | def t_error(self, t): | 
|  | 574 | print "Illegal character %s" % repr(t.value[0]) | 
|  | 575 | t.skip(1) | 
|  | 576 |  | 
|  | 577 | # Newlines | 
|  | 578 | def t_newline(self, t): | 
|  | 579 | r'\n+' | 
|  | 580 | t.lineno += t.value.count("\n") | 
|  | 581 |  | 
|  | 582 | def p_program(self, p): | 
|  | 583 | 'program : definitionlist' | 
|  | 584 | pass | 
|  | 585 |  | 
|  | 586 | def p_definitionlist_1(self, p): | 
|  | 587 | 'definitionlist : definitionlist definition' | 
|  | 588 | pass | 
|  | 589 |  | 
|  | 590 | def p_definitionlist_2(self, p): | 
|  | 591 | 'definitionlist :' | 
|  | 592 | pass | 
|  | 593 |  | 
|  | 594 | def p_definition_1(self, p): | 
|  | 595 | 'definition : typedef' | 
|  | 596 | self.pdebug("p_definition_1", p) | 
|  | 597 | p[0] = p[1] | 
|  | 598 | try: | 
|  | 599 | self.program.addTypedef(p[0]) | 
|  | 600 | except ErrorException, e: | 
|  | 601 | self.errors+= e.errors | 
|  | 602 |  | 
|  | 603 | def p_definition_2(self, p): | 
|  | 604 | 'definition : enum' | 
|  | 605 | self.pdebug("p_definition_2", p) | 
|  | 606 | p[0] = p[1] | 
|  | 607 | try: | 
|  | 608 | self.program.addEnum(p[0]) | 
|  | 609 | except ErrorException, e: | 
|  | 610 | self.errors+= e.errors | 
|  | 611 |  | 
|  | 612 | def p_definition_3(self, p): | 
|  | 613 | 'definition : struct' | 
|  | 614 | self.pdebug("p_definition_3", p) | 
|  | 615 | p[0] = p[1] | 
|  | 616 | try: | 
|  | 617 | self.program.addStruct(p[0]) | 
|  | 618 | except ErrorException, e: | 
|  | 619 | self.errors+= e.errors | 
|  | 620 |  | 
|  | 621 | def p_definition_4(self, p): | 
|  | 622 | 'definition : service' | 
|  | 623 | self.pdebug("p_definition_4", p) | 
|  | 624 | p[0] = p[1] | 
|  | 625 | try: | 
|  | 626 | self.program.addService(p[0]) | 
|  | 627 | except ErrorException, e: | 
|  | 628 | self.errors+= e.errors | 
|  | 629 |  | 
|  | 630 | def p_typedef(self, p): | 
|  | 631 | 'typedef : TYPEDEF definitiontype ID' | 
|  | 632 | self.pdebug("p_typedef", p) | 
|  | 633 | p[0] = TypeDef(p, p[3], p[2]) | 
|  | 634 | try: | 
|  | 635 | p[0].validate() | 
|  | 636 |  | 
|  | 637 | except ErrorException, e: | 
|  | 638 | self.errors+= e.errors | 
|  | 639 |  | 
|  | 640 | #    def p_definition_or_referencye_type_1(self, p): | 
|  | 641 | #       XXX need to all typedef struct foo foo_t by allowing references | 
|  | 642 | #	pass | 
|  | 643 |  | 
|  | 644 | def p_enum(self, p): | 
|  | 645 | 'enum : ENUM ID LBRACE enumdeflist RBRACE' | 
|  | 646 | self.pdebug("p_enum", p) | 
|  | 647 | p[0] = Enum(p, p[2], p[4]) | 
|  | 648 |  | 
|  | 649 | try: | 
|  | 650 | p[0].validate() | 
|  | 651 | except ErrorException, e: | 
|  | 652 | self.errors+= e.errors | 
|  | 653 |  | 
|  | 654 | def p_enumdeflist_1(self, p): | 
|  | 655 | 'enumdeflist : enumdeflist COMMA enumdef' | 
|  | 656 | self.pdebug("p_enumdeflist_1", p) | 
|  | 657 | p[0] = p[1] + (p[3],) | 
|  | 658 |  | 
|  | 659 | def p_enumdeflist_2(self, p): | 
|  | 660 | 'enumdeflist : enumdef' | 
|  | 661 | self.pdebug("p_enumdeflist_2", p) | 
|  | 662 | p[0] = (p[1],) | 
|  | 663 |  | 
|  | 664 | def p_enumdef_0(self, p): | 
|  | 665 | 'enumdef : ID ASSIGN ICONST' | 
|  | 666 | self.pdebug("p_enumdef_0", p) | 
|  | 667 | p[0] = EnumDef(p, p[1], int(p[3])) | 
|  | 668 |  | 
|  | 669 | def p_enumdef_1(self, p): | 
|  | 670 | 'enumdef : ID' | 
|  | 671 | self.pdebug("p_enumdef_1", p) | 
|  | 672 | p[0] = EnumDef(p, p[1]) | 
|  | 673 |  | 
|  | 674 | def p_struct(self, p): | 
|  | 675 | 'struct :  STRUCT ID LBRACE fieldlist RBRACE' | 
|  | 676 | self.pdebug("p_struct", p) | 
|  | 677 | p[0] = Struct(p, p[2], p[4]) | 
|  | 678 |  | 
|  | 679 | try: | 
|  | 680 | p[0].validate() | 
|  | 681 | except ErrorException, e: | 
|  | 682 | self.errors+= e.errors | 
|  | 683 |  | 
|  | 684 | def p_service(self, p): | 
|  | 685 | 'service : SERVICE ID LBRACE functionlist RBRACE' | 
|  | 686 | self.pdebug("p_service", p) | 
|  | 687 | p[0] =  Service(p, p[2], p[4]) | 
|  | 688 | try: | 
|  | 689 | p[0].validate() | 
|  | 690 | except ErrorException, e: | 
|  | 691 | self.errors+= e.errors | 
|  | 692 |  | 
|  | 693 | def p_functionlist_1(self, p): | 
|  | 694 | 'functionlist : functionlist function' | 
|  | 695 | self.pdebug("p_functionlist_1", p) | 
|  | 696 | p[0] = p[1] + (p[2],) | 
|  | 697 |  | 
|  | 698 | def p_functionlist_2(self, p): | 
|  | 699 | 'functionlist :' | 
|  | 700 | self.pdebug("p_functionlist_2", p) | 
|  | 701 | p[0] = () | 
|  | 702 |  | 
|  | 703 | def p_function(self, p): | 
|  | 704 | 'function : functiontype functionmodifiers ID LPAREN fieldlist RPAREN' | 
|  | 705 | self.pdebug("p_function", p) | 
|  | 706 | p[0] = Function(p, p[3], p[1], p[5]) | 
|  | 707 | try: | 
|  | 708 | p[0].validate() | 
|  | 709 | except ErrorException, e: | 
|  | 710 | self.errors+= e.errors | 
|  | 711 |  | 
|  | 712 | def p_functionmodifiers(self, p): | 
|  | 713 | 'functionmodifiers :' | 
|  | 714 | self.pdebug("p_functionmodifiers", p) | 
|  | 715 | p[0] = () | 
|  | 716 |  | 
|  | 717 | def p_fieldlist_1(self, p): | 
|  | 718 | 'fieldlist : fieldlist COMMA field' | 
|  | 719 | self.pdebug("p_fieldlist_1", p) | 
|  | 720 | p[0] = p[1] + (p[3],) | 
|  | 721 |  | 
|  | 722 | def p_fieldlist_2(self, p): | 
|  | 723 | 'fieldlist : field' | 
|  | 724 | self.pdebug("p_fieldlist_2", p) | 
|  | 725 | p[0] = (p[1],) | 
|  | 726 |  | 
|  | 727 | def p_fieldlist_3(self, p): | 
|  | 728 | 'fieldlist :' | 
|  | 729 | self.pdebug("p_fieldlist_3", p) | 
|  | 730 | p[0] = () | 
|  | 731 |  | 
|  | 732 | def p_field_1(self, p): | 
|  | 733 | 'field : fieldtype ID ASSIGN ICONST' | 
|  | 734 | self.pdebug("p_field_1", p) | 
|  | 735 | p[0] = Field(p, p[1], Identifier(None, p[2], int(p[4]))) | 
|  | 736 |  | 
|  | 737 | def p_field_2(self, p): | 
|  | 738 | 'field : fieldtype ID' | 
|  | 739 | self.pdebug("p_field_2", p) | 
|  | 740 | p[0] = Field(p, p[1], Identifier(None, p[2])) | 
|  | 741 |  | 
|  | 742 | def p_definitiontype_1(self, p): | 
|  | 743 | 'definitiontype : basetype' | 
|  | 744 | self.pdebug("p_definitiontype_1", p) | 
|  | 745 | p[0] = p[1] | 
|  | 746 |  | 
|  | 747 | def p_definitiontype_2(self, p): | 
|  | 748 | 'definitiontype : collectiontype' | 
|  | 749 | self.pdebug("p_definitiontype_2", p) | 
|  | 750 | p[0] = p[1] | 
|  | 751 |  | 
|  | 752 | def p_functiontype_1(self, p): | 
|  | 753 | 'functiontype : fieldtype' | 
|  | 754 | self.pdebug("p_functiontype_1", p) | 
|  | 755 | p[0] = p[1] | 
|  | 756 |  | 
|  | 757 | def p_functiontype_2(self, p): | 
|  | 758 | 'functiontype : VOID' | 
|  | 759 | self.pdebug("p_functiontype_2", p) | 
|  | 760 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 761 |  | 
|  | 762 | def p_fieldtype_1(self, p): | 
|  | 763 | 'fieldtype : ID' | 
|  | 764 | self.pdebug("p_fieldtype_1", p) | 
|  | 765 | p[0] = Identifier(p, p[1]) | 
|  | 766 |  | 
|  | 767 | def p_fieldtype_2(self, p): | 
|  | 768 | 'fieldtype : basetype' | 
|  | 769 | self.pdebug("p_fieldtype_2", p) | 
|  | 770 | p[0] = p[1] | 
|  | 771 |  | 
|  | 772 | def p_fieldtype_3(self, p): | 
|  | 773 | 'fieldtype : collectiontype' | 
|  | 774 | self.pdebug("p_fieldtype_3", p) | 
|  | 775 | p[0] = p[1] | 
|  | 776 |  | 
|  | 777 | def p_basetype_1(self, p): | 
|  | 778 | 'basetype : STRING' | 
|  | 779 | self.pdebug("p_basetype_1", p) | 
|  | 780 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 781 |  | 
|  | 782 | def p_basetype_2(self, p): | 
|  | 783 | 'basetype : BYTE' | 
|  | 784 | self.pdebug("p_basetype_2", p) | 
|  | 785 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 786 |  | 
|  | 787 | def p_basetype_3(self, p): | 
|  | 788 | 'basetype : I08' | 
|  | 789 | self.pdebug("p_basetype_3", p) | 
|  | 790 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 791 |  | 
|  | 792 | def p_basetype_4(self, p): | 
|  | 793 | 'basetype : U08' | 
|  | 794 | self.pdebug("p_basetype_4", p) | 
|  | 795 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 796 |  | 
|  | 797 | def p_basetype_5(self, p): | 
|  | 798 | 'basetype : I16' | 
|  | 799 | self.pdebug("p_basetype_5", p) | 
|  | 800 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 801 |  | 
|  | 802 | def p_basetype_6(self, p): | 
|  | 803 | 'basetype : U16' | 
|  | 804 | self.pdebug("p_basetype_6", p) | 
|  | 805 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 806 |  | 
|  | 807 | def p_basetype_7(self, p): | 
|  | 808 | 'basetype : I32' | 
|  | 809 | self.pdebug("p_basetype_7", p) | 
|  | 810 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 811 |  | 
|  | 812 | def p_basetype_8(self, p): | 
|  | 813 | 'basetype : U32' | 
|  | 814 | self.pdebug("p_basetype_8", p) | 
|  | 815 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 816 |  | 
|  | 817 | def p_basetype_9(self, p): | 
|  | 818 | 'basetype : I64' | 
|  | 819 | self.pdebug("p_basetype_9", p) | 
|  | 820 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 821 |  | 
|  | 822 | def p_basetype_10(self, p): | 
|  | 823 | 'basetype : U64' | 
|  | 824 | self.pdebug("p_basetype_10", p) | 
|  | 825 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 826 |  | 
|  | 827 | def p_basetype_11(self, p): | 
|  | 828 | 'basetype : UTF8' | 
|  | 829 | self.pdebug("p_basetype_11", p) | 
|  | 830 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 831 |  | 
|  | 832 | def p_basetype_12(self, p): | 
|  | 833 | 'basetype : UTF16' | 
|  | 834 | self.pdebug("p_basetype_12", p) | 
|  | 835 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 836 |  | 
|  | 837 | def p_basetype_13(self, p): | 
|  | 838 | 'basetype : DOUBLE' | 
|  | 839 | self.pdebug("p_basetype_13", p) | 
|  | 840 | p[0] = self.program.primitiveMap[p[1].lower()] | 
|  | 841 |  | 
|  | 842 | def p_collectiontype_1(self, p): | 
|  | 843 | 'collectiontype : maptype' | 
|  | 844 | self.pdebug("p_collectiontype_1", p) | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 845 | p[0] = self.program.addCollection(p[1]) | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 846 |  | 
|  | 847 | def p_collectiontype_2(self, p): | 
|  | 848 | 'collectiontype : settype' | 
|  | 849 | self.pdebug("p_collectiontype_2", p) | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 850 | p[0] = self.program.addCollection(p[1]) | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 851 |  | 
|  | 852 | def p_collectiontype_3(self, p): | 
|  | 853 | 'collectiontype : listtype' | 
|  | 854 | self.pdebug("p_collectiontype_3", p) | 
| Marc Slemko | c0e07a2 | 2006-08-09 23:34:57 +0000 | [diff] [blame] | 855 | p[0] = self.program.addCollection(p[1]) | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 856 |  | 
|  | 857 | def p_maptype(self, p): | 
|  | 858 | 'maptype : MAP LANGLE fieldtype COMMA fieldtype RANGLE' | 
|  | 859 | self.pdebug("p_maptype", p) | 
|  | 860 | p[0] = Map(p, p[3], p[5]) | 
|  | 861 |  | 
|  | 862 | def p_settype(self, p): | 
|  | 863 | 'settype : SET LANGLE fieldtype RANGLE' | 
|  | 864 | self.pdebug("p_settype", p) | 
|  | 865 | p[0] = Set(p, p[3]) | 
|  | 866 |  | 
|  | 867 | def p_listtype(self, p): | 
|  | 868 | 'listtype : LIST LANGLE fieldtype RANGLE' | 
|  | 869 | self.pdebug("p_listtype", p) | 
| Marc Slemko | c4eb9e8 | 2006-08-10 03:29:29 +0000 | [diff] [blame] | 870 | p[0] = List(p, p[3]) | 
| Marc Slemko | b2039e7 | 2006-08-09 01:00:17 +0000 | [diff] [blame] | 871 |  | 
|  | 872 | def p_error(self, p): | 
|  | 873 | self.errors.append(SyntaxError(p)) | 
|  | 874 |  | 
|  | 875 | def pdebug(self, name, p): | 
|  | 876 | if self.debug: | 
|  | 877 | print(name+"("+string.join(map(lambda t: "<<"+str(t)+">>", p), ", ")+")") | 
|  | 878 |  | 
|  | 879 | def __init__(self, **kw): | 
|  | 880 | self.debug = kw.get('debug', 0) | 
|  | 881 | self.names = { } | 
|  | 882 | self.program = Program() | 
|  | 883 | self.errors = [] | 
|  | 884 |  | 
|  | 885 | try: | 
|  | 886 | modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__ | 
|  | 887 | except: | 
|  | 888 | modname = "parser"+"_"+self.__class__.__name__ | 
|  | 889 | self.debugfile = modname + ".dbg" | 
|  | 890 | self.tabmodule = modname + "_" + "parsetab" | 
|  | 891 | #print self.debugfile, self.tabmodule | 
|  | 892 |  | 
|  | 893 | # Build the lexer and parser | 
|  | 894 | lex.lex(module=self, debug=self.debug) | 
|  | 895 | yacc.yacc(module=self, | 
|  | 896 | debug=self.debug, | 
|  | 897 | debugfile=self.debugfile, | 
|  | 898 | tabmodule=self.tabmodule) | 
|  | 899 |  | 
|  | 900 | def parsestring(self, s, filename=""): | 
|  | 901 | yacc.parse(s) | 
|  | 902 |  | 
|  | 903 | if len(self.errors) == 0: | 
|  | 904 | try: | 
|  | 905 | self.program.validate() | 
|  | 906 | except ErrorException, e: | 
|  | 907 | self.errors+= e.errors | 
|  | 908 |  | 
|  | 909 | if len(self.errors): | 
|  | 910 | for error in self.errors: | 
|  | 911 | print(filename+":"+str(error)) | 
|  | 912 |  | 
|  | 913 | def parse(self, filename, doPickle=True): | 
|  | 914 |  | 
|  | 915 | f = file(filename, "r") | 
|  | 916 |  | 
|  | 917 | self.parsestring(f.read(), filename) | 
|  | 918 |  | 
|  | 919 | if len(self.errors) == 0 and doPickle: | 
|  | 920 |  | 
|  | 921 | outf = file(os.path.splitext(filename)[0]+".thyc", "w") | 
|  | 922 |  | 
|  | 923 | pickle.dump(self.program, outf) |