Thrift compiler frontend support for constants

Summary: The parser now accepts constants and adds them into the parse tree


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664880 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index 78278a2..ff176a5 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -117,8 +117,8 @@
     indent() << "enum " << tenum->get_name() << " {" << endl;
   indent_up();
 
-  vector<t_constant*> constants = tenum->get_constants();
-  vector<t_constant*>::iterator c_iter;
+  vector<t_enum_value*> constants = tenum->get_constants();
+  vector<t_enum_value*>::iterator c_iter;
   bool first = true;
   for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
     if (first) {
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index 21ade43..e2e9b71 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -89,8 +89,8 @@
     "public class " << tenum->get_name() << " ";
   scope_up(f_enum);
 
-  vector<t_constant*> constants = tenum->get_constants();
-  vector<t_constant*>::iterator c_iter;
+  vector<t_enum_value*> constants = tenum->get_constants();
+  vector<t_enum_value*>::iterator c_iter;
   int value = -1;
   for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
     if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/generate/t_php_generator.cc b/compiler/cpp/src/generate/t_php_generator.cc
index 18030af..f76b66f 100644
--- a/compiler/cpp/src/generate/t_php_generator.cc
+++ b/compiler/cpp/src/generate/t_php_generator.cc
@@ -59,8 +59,8 @@
   f_types_ <<
     "$GLOBALS['E_" << tenum->get_name() << "'] = array(" << endl;
   
-  vector<t_constant*> constants = tenum->get_constants();
-  vector<t_constant*>::iterator c_iter;
+  vector<t_enum_value*> constants = tenum->get_constants();
+  vector<t_enum_value*>::iterator c_iter;
   int value = -1;
   for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
     if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index 1cbcfd9..e29de80 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -88,8 +88,8 @@
     "class " << tenum->get_name() << ":" << endl;
   indent_up();
   
-  vector<t_constant*> constants = tenum->get_constants();
-  vector<t_constant*>::iterator c_iter;
+  vector<t_enum_value*> constants = tenum->get_constants();
+  vector<t_enum_value*>::iterator c_iter;
   int value = -1;
   for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
     if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index 005ce59..7280a37 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -145,7 +145,7 @@
     return;
   }
   va_list args;
-  printf("[PARSE] ");
+  printf("[PARSE:%d] ", yylineno);
   va_start(args, fmt);
   vprintf(fmt, args);
   va_end(args);
@@ -189,7 +189,7 @@
  *
  * @param fmt C format string followed by additional arguments
  */
-void failure(char* fmt, ...) {
+void failure(const char* fmt, ...) {
   va_list args; 
   fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
   va_start(args, fmt);
@@ -295,6 +295,121 @@
 }
 
 /**
+ * You know, when I started working on Thrift I really thought it wasn't going
+ * to become a programming language because it was just a generator and it
+ * wouldn't need runtime type information and all that jazz. But then we
+ * decided to add constants, and all of a sudden that means runtime type
+ * validation and inference, except the "runtime" is the code generator
+ * runtime. Shit. I've been had.
+ */
+void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
+  if (type->is_void()) {
+    throw "type error: cannot declare a void const: " + name;
+  }
+
+  if (type->is_base_type()) {
+    t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
+    switch (tbase) {
+    case t_base_type::TYPE_STRING:
+      if (value->get_type() != t_const_value::CV_STRING) {
+        throw "type error: const \"" + name + "\" was declared as string";
+      }
+      break;
+    case t_base_type::TYPE_BOOL:
+      if (value->get_type() != t_const_value::CV_INTEGER) {
+        throw "type error: const \"" + name + "\" was declared as bool";
+      }
+      break;
+    case t_base_type::TYPE_BYTE:
+      if (value->get_type() != t_const_value::CV_INTEGER) {
+        throw "type error: const \"" + name + "\" was declared as byte";
+      }
+      break;
+    case t_base_type::TYPE_I16:
+      if (value->get_type() != t_const_value::CV_INTEGER) {
+        throw "type error: const \"" + name + "\" was declared as i16";
+      }
+      break;
+    case t_base_type::TYPE_I32:
+      if (value->get_type() != t_const_value::CV_INTEGER) {
+        throw "type error: const \"" + name + "\" was declared as i32";
+      }
+      break;
+    case t_base_type::TYPE_I64:
+      if (value->get_type() != t_const_value::CV_INTEGER) {
+        throw "type error: const \"" + name + "\" was declared as i64";
+      }
+      break;
+    case t_base_type::TYPE_DOUBLE:
+      if (value->get_type() != t_const_value::CV_INTEGER &&
+          value->get_type() != t_const_value::CV_DOUBLE) {
+        throw "type error: const \"" + name + "\" was declared as double";
+      }
+      break;
+    default:
+      throw "compiler error: no const of base type " + tbase + name;
+    }
+  } else if (type->is_enum()) {
+    if (value->get_type() != t_const_value::CV_INTEGER) {
+      throw "type error: const \"" + name + "\" was declared as enum";
+    }
+  } else if (type->is_struct() || type->is_xception()) {
+    if (value->get_type() != t_const_value::CV_MAP) {
+      throw "type error: const \"" + name + "\" was declared as struct/xception";
+    }
+    const vector<t_field*>& fields = ((t_struct*)type)->get_members();
+    vector<t_field*>::const_iterator f_iter;
+
+    const map<t_const_value*, t_const_value*>& val = value->get_map();
+    map<t_const_value*, t_const_value*>::const_iterator v_iter;
+    for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+      if (v_iter->first->get_type() != t_const_value::CV_STRING) {
+        throw "type error: " + name + " struct key must be string";
+      }
+      t_type* field_type = NULL;
+      for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
+        if ((*f_iter)->get_name() == v_iter->first->get_string()) {
+          field_type = (*f_iter)->get_type();
+        }
+      }
+      if (field_type == NULL) {
+        throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
+      }
+
+      validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
+    }
+  } else if (type->is_map()) {
+    t_type* k_type = ((t_map*)type)->get_key_type();
+    t_type* v_type = ((t_map*)type)->get_val_type();
+    const map<t_const_value*, t_const_value*>& val = value->get_map();
+    map<t_const_value*, t_const_value*>::const_iterator v_iter;
+    for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+      validate_const_rec(name + "<key>", k_type, v_iter->first);
+      validate_const_rec(name + "<val>", v_type, v_iter->second);
+    }    
+  } else if (type->is_list() || type->is_set()) {
+    t_type* e_type;
+    if (type->is_list()) {
+      e_type = ((t_list*)type)->get_elem_type();
+    } else {
+      e_type = ((t_set*)type)->get_elem_type();
+    }
+    const vector<t_const_value*>& val = value->get_list();
+    vector<t_const_value*>::const_iterator v_iter;
+    for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+      validate_const_rec(name + "<elem>", e_type, *v_iter);
+    }
+  }
+}
+
+/**
+ * Check the type of the parsed const information against its declared type
+ */
+void validate_const_type(t_const* c) {
+  validate_const_rec(c->get_name(), c->get_type(), c->get_value());
+}
+
+/**
  * Parses a program
  */
 void parse(t_program* program, t_program* parent_program) {  
@@ -316,8 +431,12 @@
   g_parse_mode = INCLUDES; 
   g_program = program;
   g_scope = program->scope();
-  if (yyparse() != 0) {
-    failure("Parser error during include pass.");
+  try {
+    if (yyparse() != 0) {
+      failure("Parser error during include pass.");
+    }
+  } catch (string x) {
+    failure(x.c_str());
   }
   fclose(yyin);
 
diff --git a/compiler/cpp/src/main.h b/compiler/cpp/src/main.h
index 9a3a81e..a7c1d3d 100644
--- a/compiler/cpp/src/main.h
+++ b/compiler/cpp/src/main.h
@@ -2,6 +2,7 @@
 #define T_MAIN_H
 
 #include <string>
+#include "parse/t_const.h"
 
 /**
  * Defined in the flex library
@@ -29,7 +30,12 @@
 /**
  * Failure!
  */
-void failure(char* fmt, ...);
+void failure(const char* fmt, ...);
+
+/**
+ * Check constant types
+ */
+void validate_const_type(t_const* c);
 
 /**
  * Converts a string filename into a thrift program name
diff --git a/compiler/cpp/src/parse/t_const.h b/compiler/cpp/src/parse/t_const.h
new file mode 100644
index 0000000..77d7e18
--- /dev/null
+++ b/compiler/cpp/src/parse/t_const.h
@@ -0,0 +1,41 @@
+#ifndef T_CONST_H
+#define T_CONST_H
+
+#include "t_type.h"
+#include "t_const_value.h"
+
+/**
+ * A const is a constant value defined across languages that has a type and
+ * a value. The trick here is that the declared type might not match the type
+ * of the value object, since that is not determined until after parsing the
+ * whole thing out.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class t_const {
+ public:
+  t_const(t_type* type, std::string name, t_const_value* value) :
+    type_(type),
+    name_(name),
+    value_(value) {}
+
+  t_type* get_type() const {
+    return type_;
+  }
+
+  std::string get_name() const {
+    return name_;
+  }
+
+  t_const_value* get_value() const {
+    return value_;
+  }
+
+ private:
+  t_type* type_;
+  std::string name_;
+  t_const_value* value_;
+};
+
+#endif
+
diff --git a/compiler/cpp/src/parse/t_enum.h b/compiler/cpp/src/parse/t_enum.h
index 002ca82..5fe6f89 100644
--- a/compiler/cpp/src/parse/t_enum.h
+++ b/compiler/cpp/src/parse/t_enum.h
@@ -1,11 +1,11 @@
 #ifndef T_ENUM_H
 #define T_ENUM_H
 
-#include "t_constant.h"
+#include "t_enum_value.h"
 #include <vector>
 
 /**
- * An enumerated type. A list of t_constant objects with a name for the type.
+ * An enumerated type. A list of constant objects with a name for the type.
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
@@ -18,11 +18,11 @@
     name_ = name;
   }
   
-  void append(t_constant* constant) {
+  void append(t_enum_value* constant) {
     constants_.push_back(constant);
   }
 
-  const std::vector<t_constant*>& get_constants() {
+  const std::vector<t_enum_value*>& get_constants() {
     return constants_;
   }
 
@@ -31,7 +31,7 @@
   }
 
  private:
-  std::vector<t_constant*> constants_;
+  std::vector<t_enum_value*> constants_;
 };
 
 #endif
diff --git a/compiler/cpp/src/parse/t_constant.h b/compiler/cpp/src/parse/t_enum_value.h
similarity index 77%
rename from compiler/cpp/src/parse/t_constant.h
rename to compiler/cpp/src/parse/t_enum_value.h
index f502f75..65e61cb 100644
--- a/compiler/cpp/src/parse/t_constant.h
+++ b/compiler/cpp/src/parse/t_enum_value.h
@@ -1,5 +1,5 @@
-#ifndef T_CONSTANT_H
-#define T_CONSTANT_H
+#ifndef T_ENUM_VALUE_H
+#define T_ENUM_VALUE_H
 
 #include <string>
 
@@ -10,19 +10,19 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_constant {
+class t_enum_value {
  public:
-  t_constant(std::string name) :
+  t_enum_value(std::string name) :
     name_(name),
     has_value_(false),
     value_(0) {}
 
-  t_constant(std::string name, int value) :
+  t_enum_value(std::string name, int value) :
     name_(name),
     has_value_(true),
     value_(value) {}
 
-  ~t_constant() {}
+  ~t_enum_value() {}
 
   const std::string& get_name() {
     return name_;
diff --git a/compiler/cpp/src/parse/t_program.h b/compiler/cpp/src/parse/t_program.h
index a3270ad..b0eb60e 100644
--- a/compiler/cpp/src/parse/t_program.h
+++ b/compiler/cpp/src/parse/t_program.h
@@ -12,6 +12,7 @@
 #include "t_base_type.h"
 #include "t_typedef.h"
 #include "t_enum.h"
+#include "t_const.h"
 #include "t_struct.h"
 #include "t_service.h"
 #include "t_list.h"
@@ -24,6 +25,7 @@
  *
  *   Typedefs
  *   Enumerations
+ *   Constants
  *   Structs
  *   Exceptions
  *   Services
@@ -58,6 +60,7 @@
   // Accessors for program elements
   const std::vector<t_typedef*>& get_typedefs()  const { return typedefs_;  }
   const std::vector<t_enum*>&    get_enums()     const { return enums_;     }
+  const std::vector<t_const*>&   get_consts()    const { return consts_;    }
   const std::vector<t_struct*>&  get_structs()   const { return structs_;   }
   const std::vector<t_struct*>&  get_xceptions() const { return xceptions_; }
   const std::vector<t_service*>& get_services()  const { return services_;  }
@@ -65,6 +68,7 @@
   // Program elements
   void add_typedef  (t_typedef* td) { typedefs_.push_back(td);  }
   void add_enum     (t_enum*    te) { enums_.push_back(te);     }
+  void add_const    (t_const*   tc) { consts_.push_back(tc);    }
   void add_struct   (t_struct*  ts) { structs_.push_back(ts);   }
   void add_xception (t_struct*  tx) { xceptions_.push_back(tx); }
   void add_service  (t_service* ts) { services_.push_back(ts);  }
@@ -139,6 +143,7 @@
   // Components to generate code for
   std::vector<t_typedef*> typedefs_;
   std::vector<t_enum*>    enums_;
+  std::vector<t_const*>   consts_;
   std::vector<t_struct*>  structs_;
   std::vector<t_struct*>  xceptions_;
   std::vector<t_service*> services_;
diff --git a/compiler/cpp/src/thrift.l b/compiler/cpp/src/thrift.l
index e93bd26..0c113cb 100644
--- a/compiler/cpp/src/thrift.l
+++ b/compiler/cpp/src/thrift.l
@@ -27,14 +27,16 @@
  * Helper definitions, comments, constants, and whatnot
  */
 
-intconstant  ([0-9]+)
+intconstant  ([+-]?[0-9]+)
+dubconstant  ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
 identifier   ([a-zA-Z_][\.a-zA-Z_0-9]*)
 whitespace   ([ \t\r\n]*)
 multicomm    ("/*""/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
 comment      ("//"[^\n]*)
 unixcomment  ("#"[^\n]*)
 symbol       ([:;\,\{\}\(\)\=<>\[\]])
-literal      ("\""[^"]*"\"")
+dliteral      ("\""[^"]*"\"")
+sliteral      ("'"[^']*"'")
 
 %%
 
@@ -71,18 +73,30 @@
 "throws"        { return tok_throws;        }
 "service"       { return tok_service;       }
 "enum"          { return tok_enum;          }
+"const"         { return tok_const;         }
 
 {intconstant} {
   yylval.iconst = atoi(yytext);
   return tok_int_constant;
 }
 
+{dubconstant} {
+  yylval.dconst = atof(yytext);
+  return tok_dub_constant;
+}
+
 {identifier} {
   yylval.id = strdup(yytext);
   return tok_identifier;
 }
 
-{literal} {
+{dliteral} {
+  yylval.id = strdup(yytext+1);
+  yylval.id[strlen(yylval.id)-1] = '\0';
+  return tok_literal;
+}
+
+{sliteral} {
   yylval.id = strdup(yytext+1);
   yylval.id[strlen(yylval.id)-1] = '\0';
   return tok_literal;
diff --git a/compiler/cpp/src/thrift.y b/compiler/cpp/src/thrift.y
index e1dd84f..06b71e1 100644
--- a/compiler/cpp/src/thrift.y
+++ b/compiler/cpp/src/thrift.y
@@ -28,17 +28,20 @@
  * various parse nodes.
  */
 %union {
-  char*       id;
-  int         iconst;
-  bool        tbool;
-  t_type*     ttype;
-  t_typedef*  ttypedef;
-  t_enum*     tenum;
-  t_struct*   tstruct;
-  t_service*  tservice;
-  t_function* tfunction;
-  t_field*    tfield;
-  t_constant* tconstant;
+  char*          id;
+  int            iconst;
+  double         dconst;
+  bool           tbool;
+  t_type*        ttype;
+  t_typedef*     ttypedef;
+  t_enum*        tenum;
+  t_enum_value*  tenumv;
+  t_const*       tconst;
+  t_const_value* tconstv;
+  t_struct*      tstruct;
+  t_service*     tservice;
+  t_function*    tfunction;
+  t_field*       tfield;
 }
 
 /**
@@ -48,9 +51,10 @@
 %token<id>     tok_literal
 
 /**
- * Integer constant value
+ * Constant values
  */
 %token<iconst> tok_int_constant
+%token<dconst> tok_dub_constant
 
 /**
  * Header keywoards
@@ -96,6 +100,7 @@
 %token tok_extends
 %token tok_service
 %token tok_enum
+%token tok_const
 
 /**
  * Grammar nodes
@@ -118,7 +123,14 @@
 
 %type<tenum>     Enum
 %type<tenum>     EnumDefList
-%type<tconstant> EnumDef
+%type<tenumv>    EnumDef
+
+%type<tconst>    Const
+%type<tconstv>   ConstValue
+%type<tconstv>   ConstList
+%type<tconstv>   ConstListContents
+%type<tconstv>   ConstMap
+%type<tconstv>   ConstMapContents
 
 %type<tstruct>   Struct
 %type<tstruct>   Xception
@@ -217,7 +229,14 @@
     }
 
 Definition:
-  TypeDefinition
+  Const
+    {
+      pdebug("Definition -> Const");
+      if (g_parse_mode == PROGRAM) {
+        g_program->add_const($1);
+      }    
+    }
+| TypeDefinition
     {
       pdebug("Definition -> TypeDefinition");
       if (g_parse_mode == PROGRAM) {
@@ -309,17 +328,97 @@
 EnumDef:
   tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
     {
-      pdebug("EnumDef => tok_identifier = tok_int_constant");
+      pdebug("EnumDef -> tok_identifier = tok_int_constant");
       if ($3 < 0) {
         pwarning(1, "Negative value supplied for enum %s.\n", $1);
       }
-      $$ = new t_constant($1, $3);
+      $$ = new t_enum_value($1, $3);
     }
 |
   tok_identifier CommaOrSemicolonOptional
     {
-      pdebug("EnumDef => tok_identifier");
-      $$ = new t_constant($1);
+      pdebug("EnumDef -> tok_identifier");
+      $$ = new t_enum_value($1);
+    }
+
+Const:
+  tok_const FieldType tok_identifier '=' ConstValue CommaOrSemicolonOptional
+    {
+      pdebug("Const -> tok_const FieldType tok_identifier = ConstValue");
+      $$ = new t_const($2, $3, $5);
+      validate_const_type($$);
+    }
+
+ConstValue:
+  tok_int_constant
+    {
+      pdebug("ConstValue => tok_int_constant");
+      $$ = new t_const_value();
+      $$->set_integer($1);
+    }
+| tok_dub_constant
+    {
+      pdebug("ConstValue => tok_dub_constant");
+      $$ = new t_const_value();
+      $$->set_double($1);
+    }
+| tok_literal
+    {
+      pdebug("ConstValue => tok_literal");
+      $$ = new t_const_value();
+      $$->set_string($1);
+    }
+| ConstList
+    {
+      pdebug("ConstValue => ConstList");
+      $$ = $1;
+    }
+| ConstMap
+    {
+      pdebug("ConstValue => ConstMap");
+      $$ = $1; 
+    }
+
+ConstList:
+  '[' ConstListContents ']'
+    {
+      pdebug("ConstList => [ ConstListContents ]");
+      $$ = $2;
+    }
+
+ConstListContents:
+  ConstListContents ConstValue CommaOrSemicolonOptional
+    {
+      pdebug("ConstListContents => ConstListContents ConstValue CommaOrSemicolonOptional");
+      $$ = $1;
+      $$->add_list($2);
+    }
+|
+    {
+      pdebug("ConstListContents =>");
+      $$ = new t_const_value();
+      $$->set_list();
+    }
+
+ConstMap:
+  '{' ConstMapContents '}'
+    {
+      pdebug("ConstMap => { ConstMapContents }");
+      $$ = $2;
+    }
+
+ConstMapContents:
+  ConstMapContents ConstValue ':' ConstValue CommaOrSemicolonOptional
+    {
+      pdebug("ConstMapContents => ConstMapContents ConstValue CommaOrSemicolonOptional");
+      $$ = $1;
+      $$->add_map($2, $4);
+    }
+|
+    {
+      pdebug("ConstMapContents =>");
+      $$ = new t_const_value();
+      $$->set_map();
     }
 
 Struct: