Thrift compiler frontend support for constants
Summary: The parser now accepts constants and adds them into the parse tree
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664880 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index 78278a2..ff176a5 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -117,8 +117,8 @@
indent() << "enum " << tenum->get_name() << " {" << endl;
indent_up();
- vector<t_constant*> constants = tenum->get_constants();
- vector<t_constant*>::iterator c_iter;
+ vector<t_enum_value*> constants = tenum->get_constants();
+ vector<t_enum_value*>::iterator c_iter;
bool first = true;
for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
if (first) {
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index 21ade43..e2e9b71 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -89,8 +89,8 @@
"public class " << tenum->get_name() << " ";
scope_up(f_enum);
- vector<t_constant*> constants = tenum->get_constants();
- vector<t_constant*>::iterator c_iter;
+ vector<t_enum_value*> constants = tenum->get_constants();
+ vector<t_enum_value*>::iterator c_iter;
int value = -1;
for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/generate/t_php_generator.cc b/compiler/cpp/src/generate/t_php_generator.cc
index 18030af..f76b66f 100644
--- a/compiler/cpp/src/generate/t_php_generator.cc
+++ b/compiler/cpp/src/generate/t_php_generator.cc
@@ -59,8 +59,8 @@
f_types_ <<
"$GLOBALS['E_" << tenum->get_name() << "'] = array(" << endl;
- vector<t_constant*> constants = tenum->get_constants();
- vector<t_constant*>::iterator c_iter;
+ vector<t_enum_value*> constants = tenum->get_constants();
+ vector<t_enum_value*>::iterator c_iter;
int value = -1;
for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index 1cbcfd9..e29de80 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -88,8 +88,8 @@
"class " << tenum->get_name() << ":" << endl;
indent_up();
- vector<t_constant*> constants = tenum->get_constants();
- vector<t_constant*>::iterator c_iter;
+ vector<t_enum_value*> constants = tenum->get_constants();
+ vector<t_enum_value*>::iterator c_iter;
int value = -1;
for (c_iter = constants.begin(); c_iter != constants.end(); ++c_iter) {
if ((*c_iter)->has_value()) {
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index 005ce59..7280a37 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -145,7 +145,7 @@
return;
}
va_list args;
- printf("[PARSE] ");
+ printf("[PARSE:%d] ", yylineno);
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
@@ -189,7 +189,7 @@
*
* @param fmt C format string followed by additional arguments
*/
-void failure(char* fmt, ...) {
+void failure(const char* fmt, ...) {
va_list args;
fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
va_start(args, fmt);
@@ -295,6 +295,121 @@
}
/**
+ * You know, when I started working on Thrift I really thought it wasn't going
+ * to become a programming language because it was just a generator and it
+ * wouldn't need runtime type information and all that jazz. But then we
+ * decided to add constants, and all of a sudden that means runtime type
+ * validation and inference, except the "runtime" is the code generator
+ * runtime. Shit. I've been had.
+ */
+void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
+ if (type->is_void()) {
+ throw "type error: cannot declare a void const: " + name;
+ }
+
+ if (type->is_base_type()) {
+ t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
+ switch (tbase) {
+ case t_base_type::TYPE_STRING:
+ if (value->get_type() != t_const_value::CV_STRING) {
+ throw "type error: const \"" + name + "\" was declared as string";
+ }
+ break;
+ case t_base_type::TYPE_BOOL:
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as bool";
+ }
+ break;
+ case t_base_type::TYPE_BYTE:
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as byte";
+ }
+ break;
+ case t_base_type::TYPE_I16:
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as i16";
+ }
+ break;
+ case t_base_type::TYPE_I32:
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as i32";
+ }
+ break;
+ case t_base_type::TYPE_I64:
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as i64";
+ }
+ break;
+ case t_base_type::TYPE_DOUBLE:
+ if (value->get_type() != t_const_value::CV_INTEGER &&
+ value->get_type() != t_const_value::CV_DOUBLE) {
+ throw "type error: const \"" + name + "\" was declared as double";
+ }
+ break;
+ default:
+ throw "compiler error: no const of base type " + tbase + name;
+ }
+ } else if (type->is_enum()) {
+ if (value->get_type() != t_const_value::CV_INTEGER) {
+ throw "type error: const \"" + name + "\" was declared as enum";
+ }
+ } else if (type->is_struct() || type->is_xception()) {
+ if (value->get_type() != t_const_value::CV_MAP) {
+ throw "type error: const \"" + name + "\" was declared as struct/xception";
+ }
+ const vector<t_field*>& fields = ((t_struct*)type)->get_members();
+ vector<t_field*>::const_iterator f_iter;
+
+ const map<t_const_value*, t_const_value*>& val = value->get_map();
+ map<t_const_value*, t_const_value*>::const_iterator v_iter;
+ for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+ if (v_iter->first->get_type() != t_const_value::CV_STRING) {
+ throw "type error: " + name + " struct key must be string";
+ }
+ t_type* field_type = NULL;
+ for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
+ if ((*f_iter)->get_name() == v_iter->first->get_string()) {
+ field_type = (*f_iter)->get_type();
+ }
+ }
+ if (field_type == NULL) {
+ throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
+ }
+
+ validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
+ }
+ } else if (type->is_map()) {
+ t_type* k_type = ((t_map*)type)->get_key_type();
+ t_type* v_type = ((t_map*)type)->get_val_type();
+ const map<t_const_value*, t_const_value*>& val = value->get_map();
+ map<t_const_value*, t_const_value*>::const_iterator v_iter;
+ for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+ validate_const_rec(name + "<key>", k_type, v_iter->first);
+ validate_const_rec(name + "<val>", v_type, v_iter->second);
+ }
+ } else if (type->is_list() || type->is_set()) {
+ t_type* e_type;
+ if (type->is_list()) {
+ e_type = ((t_list*)type)->get_elem_type();
+ } else {
+ e_type = ((t_set*)type)->get_elem_type();
+ }
+ const vector<t_const_value*>& val = value->get_list();
+ vector<t_const_value*>::const_iterator v_iter;
+ for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
+ validate_const_rec(name + "<elem>", e_type, *v_iter);
+ }
+ }
+}
+
+/**
+ * Check the type of the parsed const information against its declared type
+ */
+void validate_const_type(t_const* c) {
+ validate_const_rec(c->get_name(), c->get_type(), c->get_value());
+}
+
+/**
* Parses a program
*/
void parse(t_program* program, t_program* parent_program) {
@@ -316,8 +431,12 @@
g_parse_mode = INCLUDES;
g_program = program;
g_scope = program->scope();
- if (yyparse() != 0) {
- failure("Parser error during include pass.");
+ try {
+ if (yyparse() != 0) {
+ failure("Parser error during include pass.");
+ }
+ } catch (string x) {
+ failure(x.c_str());
}
fclose(yyin);
diff --git a/compiler/cpp/src/main.h b/compiler/cpp/src/main.h
index 9a3a81e..a7c1d3d 100644
--- a/compiler/cpp/src/main.h
+++ b/compiler/cpp/src/main.h
@@ -2,6 +2,7 @@
#define T_MAIN_H
#include <string>
+#include "parse/t_const.h"
/**
* Defined in the flex library
@@ -29,7 +30,12 @@
/**
* Failure!
*/
-void failure(char* fmt, ...);
+void failure(const char* fmt, ...);
+
+/**
+ * Check constant types
+ */
+void validate_const_type(t_const* c);
/**
* Converts a string filename into a thrift program name
diff --git a/compiler/cpp/src/parse/t_const.h b/compiler/cpp/src/parse/t_const.h
new file mode 100644
index 0000000..77d7e18
--- /dev/null
+++ b/compiler/cpp/src/parse/t_const.h
@@ -0,0 +1,41 @@
+#ifndef T_CONST_H
+#define T_CONST_H
+
+#include "t_type.h"
+#include "t_const_value.h"
+
+/**
+ * A const is a constant value defined across languages that has a type and
+ * a value. The trick here is that the declared type might not match the type
+ * of the value object, since that is not determined until after parsing the
+ * whole thing out.
+ *
+ * @author Mark Slee <mcslee@facebook.com>
+ */
+class t_const {
+ public:
+ t_const(t_type* type, std::string name, t_const_value* value) :
+ type_(type),
+ name_(name),
+ value_(value) {}
+
+ t_type* get_type() const {
+ return type_;
+ }
+
+ std::string get_name() const {
+ return name_;
+ }
+
+ t_const_value* get_value() const {
+ return value_;
+ }
+
+ private:
+ t_type* type_;
+ std::string name_;
+ t_const_value* value_;
+};
+
+#endif
+
diff --git a/compiler/cpp/src/parse/t_enum.h b/compiler/cpp/src/parse/t_enum.h
index 002ca82..5fe6f89 100644
--- a/compiler/cpp/src/parse/t_enum.h
+++ b/compiler/cpp/src/parse/t_enum.h
@@ -1,11 +1,11 @@
#ifndef T_ENUM_H
#define T_ENUM_H
-#include "t_constant.h"
+#include "t_enum_value.h"
#include <vector>
/**
- * An enumerated type. A list of t_constant objects with a name for the type.
+ * An enumerated type. A list of constant objects with a name for the type.
*
* @author Mark Slee <mcslee@facebook.com>
*/
@@ -18,11 +18,11 @@
name_ = name;
}
- void append(t_constant* constant) {
+ void append(t_enum_value* constant) {
constants_.push_back(constant);
}
- const std::vector<t_constant*>& get_constants() {
+ const std::vector<t_enum_value*>& get_constants() {
return constants_;
}
@@ -31,7 +31,7 @@
}
private:
- std::vector<t_constant*> constants_;
+ std::vector<t_enum_value*> constants_;
};
#endif
diff --git a/compiler/cpp/src/parse/t_constant.h b/compiler/cpp/src/parse/t_enum_value.h
similarity index 77%
rename from compiler/cpp/src/parse/t_constant.h
rename to compiler/cpp/src/parse/t_enum_value.h
index f502f75..65e61cb 100644
--- a/compiler/cpp/src/parse/t_constant.h
+++ b/compiler/cpp/src/parse/t_enum_value.h
@@ -1,5 +1,5 @@
-#ifndef T_CONSTANT_H
-#define T_CONSTANT_H
+#ifndef T_ENUM_VALUE_H
+#define T_ENUM_VALUE_H
#include <string>
@@ -10,19 +10,19 @@
*
* @author Mark Slee <mcslee@facebook.com>
*/
-class t_constant {
+class t_enum_value {
public:
- t_constant(std::string name) :
+ t_enum_value(std::string name) :
name_(name),
has_value_(false),
value_(0) {}
- t_constant(std::string name, int value) :
+ t_enum_value(std::string name, int value) :
name_(name),
has_value_(true),
value_(value) {}
- ~t_constant() {}
+ ~t_enum_value() {}
const std::string& get_name() {
return name_;
diff --git a/compiler/cpp/src/parse/t_program.h b/compiler/cpp/src/parse/t_program.h
index a3270ad..b0eb60e 100644
--- a/compiler/cpp/src/parse/t_program.h
+++ b/compiler/cpp/src/parse/t_program.h
@@ -12,6 +12,7 @@
#include "t_base_type.h"
#include "t_typedef.h"
#include "t_enum.h"
+#include "t_const.h"
#include "t_struct.h"
#include "t_service.h"
#include "t_list.h"
@@ -24,6 +25,7 @@
*
* Typedefs
* Enumerations
+ * Constants
* Structs
* Exceptions
* Services
@@ -58,6 +60,7 @@
// Accessors for program elements
const std::vector<t_typedef*>& get_typedefs() const { return typedefs_; }
const std::vector<t_enum*>& get_enums() const { return enums_; }
+ const std::vector<t_const*>& get_consts() const { return consts_; }
const std::vector<t_struct*>& get_structs() const { return structs_; }
const std::vector<t_struct*>& get_xceptions() const { return xceptions_; }
const std::vector<t_service*>& get_services() const { return services_; }
@@ -65,6 +68,7 @@
// Program elements
void add_typedef (t_typedef* td) { typedefs_.push_back(td); }
void add_enum (t_enum* te) { enums_.push_back(te); }
+ void add_const (t_const* tc) { consts_.push_back(tc); }
void add_struct (t_struct* ts) { structs_.push_back(ts); }
void add_xception (t_struct* tx) { xceptions_.push_back(tx); }
void add_service (t_service* ts) { services_.push_back(ts); }
@@ -139,6 +143,7 @@
// Components to generate code for
std::vector<t_typedef*> typedefs_;
std::vector<t_enum*> enums_;
+ std::vector<t_const*> consts_;
std::vector<t_struct*> structs_;
std::vector<t_struct*> xceptions_;
std::vector<t_service*> services_;
diff --git a/compiler/cpp/src/thrift.l b/compiler/cpp/src/thrift.l
index e93bd26..0c113cb 100644
--- a/compiler/cpp/src/thrift.l
+++ b/compiler/cpp/src/thrift.l
@@ -27,14 +27,16 @@
* Helper definitions, comments, constants, and whatnot
*/
-intconstant ([0-9]+)
+intconstant ([+-]?[0-9]+)
+dubconstant ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
identifier ([a-zA-Z_][\.a-zA-Z_0-9]*)
whitespace ([ \t\r\n]*)
multicomm ("/*""/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
comment ("//"[^\n]*)
unixcomment ("#"[^\n]*)
symbol ([:;\,\{\}\(\)\=<>\[\]])
-literal ("\""[^"]*"\"")
+dliteral ("\""[^"]*"\"")
+sliteral ("'"[^']*"'")
%%
@@ -71,18 +73,30 @@
"throws" { return tok_throws; }
"service" { return tok_service; }
"enum" { return tok_enum; }
+"const" { return tok_const; }
{intconstant} {
yylval.iconst = atoi(yytext);
return tok_int_constant;
}
+{dubconstant} {
+ yylval.dconst = atof(yytext);
+ return tok_dub_constant;
+}
+
{identifier} {
yylval.id = strdup(yytext);
return tok_identifier;
}
-{literal} {
+{dliteral} {
+ yylval.id = strdup(yytext+1);
+ yylval.id[strlen(yylval.id)-1] = '\0';
+ return tok_literal;
+}
+
+{sliteral} {
yylval.id = strdup(yytext+1);
yylval.id[strlen(yylval.id)-1] = '\0';
return tok_literal;
diff --git a/compiler/cpp/src/thrift.y b/compiler/cpp/src/thrift.y
index e1dd84f..06b71e1 100644
--- a/compiler/cpp/src/thrift.y
+++ b/compiler/cpp/src/thrift.y
@@ -28,17 +28,20 @@
* various parse nodes.
*/
%union {
- char* id;
- int iconst;
- bool tbool;
- t_type* ttype;
- t_typedef* ttypedef;
- t_enum* tenum;
- t_struct* tstruct;
- t_service* tservice;
- t_function* tfunction;
- t_field* tfield;
- t_constant* tconstant;
+ char* id;
+ int iconst;
+ double dconst;
+ bool tbool;
+ t_type* ttype;
+ t_typedef* ttypedef;
+ t_enum* tenum;
+ t_enum_value* tenumv;
+ t_const* tconst;
+ t_const_value* tconstv;
+ t_struct* tstruct;
+ t_service* tservice;
+ t_function* tfunction;
+ t_field* tfield;
}
/**
@@ -48,9 +51,10 @@
%token<id> tok_literal
/**
- * Integer constant value
+ * Constant values
*/
%token<iconst> tok_int_constant
+%token<dconst> tok_dub_constant
/**
* Header keywoards
@@ -96,6 +100,7 @@
%token tok_extends
%token tok_service
%token tok_enum
+%token tok_const
/**
* Grammar nodes
@@ -118,7 +123,14 @@
%type<tenum> Enum
%type<tenum> EnumDefList
-%type<tconstant> EnumDef
+%type<tenumv> EnumDef
+
+%type<tconst> Const
+%type<tconstv> ConstValue
+%type<tconstv> ConstList
+%type<tconstv> ConstListContents
+%type<tconstv> ConstMap
+%type<tconstv> ConstMapContents
%type<tstruct> Struct
%type<tstruct> Xception
@@ -217,7 +229,14 @@
}
Definition:
- TypeDefinition
+ Const
+ {
+ pdebug("Definition -> Const");
+ if (g_parse_mode == PROGRAM) {
+ g_program->add_const($1);
+ }
+ }
+| TypeDefinition
{
pdebug("Definition -> TypeDefinition");
if (g_parse_mode == PROGRAM) {
@@ -309,17 +328,97 @@
EnumDef:
tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
{
- pdebug("EnumDef => tok_identifier = tok_int_constant");
+ pdebug("EnumDef -> tok_identifier = tok_int_constant");
if ($3 < 0) {
pwarning(1, "Negative value supplied for enum %s.\n", $1);
}
- $$ = new t_constant($1, $3);
+ $$ = new t_enum_value($1, $3);
}
|
tok_identifier CommaOrSemicolonOptional
{
- pdebug("EnumDef => tok_identifier");
- $$ = new t_constant($1);
+ pdebug("EnumDef -> tok_identifier");
+ $$ = new t_enum_value($1);
+ }
+
+Const:
+ tok_const FieldType tok_identifier '=' ConstValue CommaOrSemicolonOptional
+ {
+ pdebug("Const -> tok_const FieldType tok_identifier = ConstValue");
+ $$ = new t_const($2, $3, $5);
+ validate_const_type($$);
+ }
+
+ConstValue:
+ tok_int_constant
+ {
+ pdebug("ConstValue => tok_int_constant");
+ $$ = new t_const_value();
+ $$->set_integer($1);
+ }
+| tok_dub_constant
+ {
+ pdebug("ConstValue => tok_dub_constant");
+ $$ = new t_const_value();
+ $$->set_double($1);
+ }
+| tok_literal
+ {
+ pdebug("ConstValue => tok_literal");
+ $$ = new t_const_value();
+ $$->set_string($1);
+ }
+| ConstList
+ {
+ pdebug("ConstValue => ConstList");
+ $$ = $1;
+ }
+| ConstMap
+ {
+ pdebug("ConstValue => ConstMap");
+ $$ = $1;
+ }
+
+ConstList:
+ '[' ConstListContents ']'
+ {
+ pdebug("ConstList => [ ConstListContents ]");
+ $$ = $2;
+ }
+
+ConstListContents:
+ ConstListContents ConstValue CommaOrSemicolonOptional
+ {
+ pdebug("ConstListContents => ConstListContents ConstValue CommaOrSemicolonOptional");
+ $$ = $1;
+ $$->add_list($2);
+ }
+|
+ {
+ pdebug("ConstListContents =>");
+ $$ = new t_const_value();
+ $$->set_list();
+ }
+
+ConstMap:
+ '{' ConstMapContents '}'
+ {
+ pdebug("ConstMap => { ConstMapContents }");
+ $$ = $2;
+ }
+
+ConstMapContents:
+ ConstMapContents ConstValue ':' ConstValue CommaOrSemicolonOptional
+ {
+ pdebug("ConstMapContents => ConstMapContents ConstValue CommaOrSemicolonOptional");
+ $$ = $1;
+ $$->add_map($2, $4);
+ }
+|
+ {
+ pdebug("ConstMapContents =>");
+ $$ = new t_const_value();
+ $$->set_map();
}
Struct: