blob: 2fbf913b07d21e8105cefe271f6a2e440d25956a [file] [log] [blame]
/**
* thrift - a lightweight cross-language rpc/serialization tool
*
* This file contains the main compiler engine for Thrift, which invokes the
* scanner/parser to build the thrift object tree. The interface generation
* code for each language lives in a file by the language name under the
* generate/ folder, and all parse structures live in parse/
*
* @author Mark Slee <mcslee@facebook.com>
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <sys/types.h>
#include <sys/stat.h>
// Careful: must include globals first for extern definitions
#include "globals.h"
#include "main.h"
#include "parse/t_program.h"
#include "parse/t_scope.h"
#include "generate/t_cpp_generator.h"
#include "generate/t_java_generator.h"
#include "generate/t_php_generator.h"
#include "generate/t_py_generator.h"
#include "generate/t_xsd_generator.h"
using namespace std;
/**
* Global program tree
*/
t_program* g_program;
/**
* Global types
*/
t_type* g_type_void;
t_type* g_type_string;
t_type* g_type_slist;
t_type* g_type_bool;
t_type* g_type_byte;
t_type* g_type_i16;
t_type* g_type_i32;
t_type* g_type_i64;
t_type* g_type_double;
/**
* Global scope
*/
t_scope* g_scope;
/**
* Parent scope to also parse types
*/
t_scope* g_parent_scope;
/**
* Prefix for putting types in parent scope
*/
string g_parent_prefix;
/**
* Parsing pass
*/
PARSE_MODE g_parse_mode;
/**
* Current directory of file being parsed
*/
string g_curdir;
/**
* Current file being parsed
*/
string g_curpath;
/**
* Search path for inclusions
*/
vector<string> g_incl_searchpath;
/**
* Global debug state
*/
int g_debug = 0;
/**
* Warning level
*/
int g_warn = 1;
/**
* Verbose output
*/
int g_verbose = 0;
/**
* Global time string
*/
char* g_time_str;
/**
* Flags to control code generation
*/
bool gen_cpp = false;
bool gen_java = false;
bool gen_py = false;
bool gen_xsd = false;
bool gen_php = false;
bool gen_phpi = false;
bool gen_recurse = false;
/**
* Report an error to the user. This is called yyerror for historical
* reasons (lex and yacc expect the error reporting routine to be called
* this). Call this function to report any errors to the user.
* yyerror takes printf style arguments.
*
* @param fmt C format string followed by additional arguments
*/
void yyerror(char* fmt, ...) {
va_list args;
fprintf(stderr,
"[ERROR:%s:%d] (last token was '%s')\n",
g_curpath.c_str(),
yylineno,
yytext);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
fprintf(stderr, "\n");
}
/**
* Prints a debug message from the parser.
*
* @param fmt C format string followed by additional arguments
*/
void pdebug(char* fmt, ...) {
if (g_debug == 0) {
return;
}
va_list args;
printf("[PARSE:%d] ", yylineno);
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
printf("\n");
}
/**
* Prints a verbose output mode message
*
* @param fmt C format string followed by additional arguments
*/
void pverbose(char* fmt, ...) {
if (g_verbose == 0) {
return;
}
va_list args;
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
}
/**
* Prints a warning message
*
* @param fmt C format string followed by additional arguments
*/
void pwarning(int level, char* fmt, ...) {
if (g_warn < level) {
return;
}
va_list args;
printf("[WARNING:%s:%d] ", g_curpath.c_str(), yylineno);
va_start(args, fmt);
vprintf(fmt, args);
va_end(args);
printf("\n");
}
/**
* Prints a failure message and exits
*
* @param fmt C format string followed by additional arguments
*/
void failure(const char* fmt, ...) {
va_list args;
fprintf(stderr, "[FAILURE:%s:%d] ", g_curpath.c_str(), yylineno);
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
printf("\n");
exit(1);
}
/**
* Converts a string filename into a thrift program name
*/
string program_name(string filename) {
string::size_type slash = filename.rfind("/");
if (slash != string::npos) {
filename = filename.substr(slash+1);
}
string::size_type dot = filename.rfind(".");
if (dot != string::npos) {
filename = filename.substr(0, dot);
}
return filename;
}
/**
* Gets the directory path of a filename
*/
string directory_name(string filename) {
string::size_type slash = filename.rfind("/");
// No slash, just use the current directory
if (slash == string::npos) {
return ".";
}
return filename.substr(0, slash);
}
/**
* Finds the appropriate file path for the given filename
*/
string include_file(string filename) {
// Absolute path? Just try that
if (filename[0] == '/') {
// Realpath!
char rp[PATH_MAX];
if (realpath(filename.c_str(), rp) == NULL) {
pwarning(0, "Cannot open include file %s\n", filename.c_str());
return std::string();
}
// Stat this files
struct stat finfo;
if (stat(rp, &finfo) == 0) {
return rp;
}
} else { // relative path, start searching
// new search path with current dir global
vector<string> sp = g_incl_searchpath;
sp.insert(sp.begin(), g_curdir);
// iterate through paths
vector<string>::iterator it;
for (it = sp.begin(); it != sp.end(); it++) {
string sfilename = *(it) + "/" + filename;
// Realpath!
char rp[PATH_MAX];
if (realpath(sfilename.c_str(), rp) == NULL) {
continue;
}
// Stat this files
struct stat finfo;
if (stat(rp, &finfo) == 0) {
return rp;
}
}
}
// Uh oh
pwarning(0, "Could not find include file %s\n", filename.c_str());
return std::string();
}
/**
* Diplays the usage message and then exits with an error code.
*/
void usage() {
fprintf(stderr, "Usage: thrift [options] file\n");
fprintf(stderr, "Options:\n");
fprintf(stderr, " -cpp Generate C++ output files\n");
fprintf(stderr, " -java Generate Java output files\n");
fprintf(stderr, " -php Generate PHP output files\n");
fprintf(stderr, " -phpi Generate PHP inlined files\n");
fprintf(stderr, " -py Generate Python output files\n");
fprintf(stderr, " -I dir Add a directory to the list of directories \n");
fprintf(stderr, " searched for include directives\n");
fprintf(stderr, " -nowarn Suppress all compiler warnings (BAD!)\n");
fprintf(stderr, " -strict Strict compiler warnings on\n");
fprintf(stderr, " -v[erbose] Verbose mode\n");
fprintf(stderr, " -r[ecurse] Also generate included files\n");
fprintf(stderr, " -debug Parse debug trace to stdout\n");
exit(1);
}
/**
* You know, when I started working on Thrift I really thought it wasn't going
* to become a programming language because it was just a generator and it
* wouldn't need runtime type information and all that jazz. But then we
* decided to add constants, and all of a sudden that means runtime type
* validation and inference, except the "runtime" is the code generator
* runtime. Shit. I've been had.
*/
void validate_const_rec(std::string name, t_type* type, t_const_value* value) {
if (type->is_void()) {
throw "type error: cannot declare a void const: " + name;
}
if (type->is_base_type()) {
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
if (value->get_type() != t_const_value::CV_STRING) {
throw "type error: const \"" + name + "\" was declared as string";
}
break;
case t_base_type::TYPE_BOOL:
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as bool";
}
break;
case t_base_type::TYPE_BYTE:
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as byte";
}
break;
case t_base_type::TYPE_I16:
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as i16";
}
break;
case t_base_type::TYPE_I32:
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as i32";
}
break;
case t_base_type::TYPE_I64:
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as i64";
}
break;
case t_base_type::TYPE_DOUBLE:
if (value->get_type() != t_const_value::CV_INTEGER &&
value->get_type() != t_const_value::CV_DOUBLE) {
throw "type error: const \"" + name + "\" was declared as double";
}
break;
default:
throw "compiler error: no const of base type " + tbase + name;
}
} else if (type->is_enum()) {
if (value->get_type() != t_const_value::CV_INTEGER) {
throw "type error: const \"" + name + "\" was declared as enum";
}
} else if (type->is_struct() || type->is_xception()) {
if (value->get_type() != t_const_value::CV_MAP) {
throw "type error: const \"" + name + "\" was declared as struct/xception";
}
const vector<t_field*>& fields = ((t_struct*)type)->get_members();
vector<t_field*>::const_iterator f_iter;
const map<t_const_value*, t_const_value*>& val = value->get_map();
map<t_const_value*, t_const_value*>::const_iterator v_iter;
for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
if (v_iter->first->get_type() != t_const_value::CV_STRING) {
throw "type error: " + name + " struct key must be string";
}
t_type* field_type = NULL;
for (f_iter = fields.begin(); f_iter != fields.end(); ++f_iter) {
if ((*f_iter)->get_name() == v_iter->first->get_string()) {
field_type = (*f_iter)->get_type();
}
}
if (field_type == NULL) {
throw "type error: " + type->get_name() + " has no field " + v_iter->first->get_string();
}
validate_const_rec(name + "." + v_iter->first->get_string(), field_type, v_iter->second);
}
} else if (type->is_map()) {
t_type* k_type = ((t_map*)type)->get_key_type();
t_type* v_type = ((t_map*)type)->get_val_type();
const map<t_const_value*, t_const_value*>& val = value->get_map();
map<t_const_value*, t_const_value*>::const_iterator v_iter;
for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
validate_const_rec(name + "<key>", k_type, v_iter->first);
validate_const_rec(name + "<val>", v_type, v_iter->second);
}
} else if (type->is_list() || type->is_set()) {
t_type* e_type;
if (type->is_list()) {
e_type = ((t_list*)type)->get_elem_type();
} else {
e_type = ((t_set*)type)->get_elem_type();
}
const vector<t_const_value*>& val = value->get_list();
vector<t_const_value*>::const_iterator v_iter;
for (v_iter = val.begin(); v_iter != val.end(); ++v_iter) {
validate_const_rec(name + "<elem>", e_type, *v_iter);
}
}
}
/**
* Check the type of the parsed const information against its declared type
*/
void validate_const_type(t_const* c) {
validate_const_rec(c->get_name(), c->get_type(), c->get_value());
}
/**
* Parses a program
*/
void parse(t_program* program, t_program* parent_program) {
// Get scope file path
string path = program->get_path();
// Set current dir global, which is used in the include_file function
g_curdir = directory_name(path);
g_curpath = path;
// Open the file
yyin = fopen(path.c_str(), "r");
if (yyin == 0) {
failure("Could not open input file: \"%s\"", path.c_str());
}
// Create new scope and scan for includes
pverbose("Scanning %s for includes\n", path.c_str());
g_parse_mode = INCLUDES;
g_program = program;
g_scope = program->scope();
try {
if (yyparse() != 0) {
failure("Parser error during include pass.");
}
} catch (string x) {
failure(x.c_str());
}
fclose(yyin);
// Recursively parse all the include programs
vector<t_program*>& includes = program->get_includes();
vector<t_program*>::iterator iter;
for (iter = includes.begin(); iter != includes.end(); ++iter) {
parse(*iter, program);
}
// Parse the program the file
g_parse_mode = PROGRAM;
g_program = program;
g_scope = program->scope();
g_parent_scope = (parent_program != NULL) ? parent_program->scope() : NULL;
g_parent_prefix = program->get_name() + ".";
g_curpath = path;
yyin = fopen(path.c_str(), "r");
if (yyin == 0) {
failure("Could not open input file: \"%s\"", path.c_str());
}
pverbose("Parsing %s for types\n", path.c_str());
if (yyparse() != 0) {
failure("Parser error during types pass.");
}
fclose(yyin);
}
/**
* Generate code
*/
void generate(t_program* program) {
// Oooohh, recursive code generation, hot!!
if (gen_recurse) {
const vector<t_program*>& includes = program->get_includes();
for (size_t i = 0; i < includes.size(); ++i) {
generate(includes[i]);
}
}
// Generate code!
try {
pverbose("Program: %s\n", program->get_path().c_str());
if (gen_cpp) {
pverbose("Generating C++\n");
t_cpp_generator* cpp = new t_cpp_generator(program);
cpp->generate_program();
delete cpp;
}
if (gen_java) {
pverbose("Generating Java\n");
t_java_generator* java = new t_java_generator(program);
java->generate_program();
delete java;
}
if (gen_php) {
pverbose("Generating PHP\n");
t_php_generator* php = new t_php_generator(program, false);
php->generate_program();
delete php;
}
if (gen_phpi) {
pverbose("Generating PHP-inline\n");
t_php_generator* phpi = new t_php_generator(program, true);
phpi->generate_program();
delete phpi;
}
if (gen_py) {
pverbose("Generating Python\n");
t_py_generator* py = new t_py_generator(program);
py->generate_program();
delete py;
}
if (gen_xsd) {
pverbose("Generating XSD\n");
t_xsd_generator* xsd = new t_xsd_generator(program);
xsd->generate_program();
delete xsd;
}
} catch (string s) {
printf("Error: %s\n", s.c_str());
} catch (const char* exc) {
printf("Error: %s\n", exc);
}
}
/**
* Parse it up.. then spit it back out, in pretty much every language. Alright
* not that many languages, but the cool ones that we care about.
*/
int main(int argc, char** argv) {
int i;
// Setup time string
time_t now = time(NULL);
g_time_str = ctime(&now);
// Check for necessary arguments, you gotta have at least a filename and
// an output language flag
if (argc < 2) {
usage();
}
// Hacky parameter handling... I didn't feel like using a library sorry!
for (i = 1; i < argc-1; i++) {
char* arg;
arg = strtok(argv[i], " ");
while (arg != NULL) {
// Treat double dashes as single dashes
if (arg[0] == '-' && arg[1] == '-') {
++arg;
}
if (strcmp(arg, "-debug") == 0) {
g_debug = 1;
} else if (strcmp(arg, "-nowarn") == 0) {
g_warn = 0;
} else if (strcmp(arg, "-strict") == 0) {
g_warn = 2;
} else if (strcmp(arg, "-v") == 0 || strcmp(arg, "-verbose") == 0 ) {
g_verbose = 1;
} else if (strcmp(arg, "-r") == 0 || strcmp(arg, "-recurse") == 0 ) {
gen_recurse = true;
} else if (strcmp(arg, "-cpp") == 0) {
gen_cpp = true;
} else if (strcmp(arg, "-java") == 0) {
gen_java = true;
} else if (strcmp(arg, "-php") == 0) {
gen_php = true;
} else if (strcmp(arg, "-phpi") == 0) {
gen_phpi = true;
} else if (strcmp(arg, "-py") == 0) {
gen_py = true;
} else if (strcmp(arg, "-xsd") == 0) {
gen_xsd = true;
} else if (strcmp(arg, "-I") == 0) {
// An argument of "-I\ asdf" is invalid and has unknown results
arg = argv[++i];
if (arg == NULL) {
fprintf(stderr, "!!! Missing Include directory");
usage();
}
g_incl_searchpath.push_back(arg);
} else {
fprintf(stderr, "!!! Unrecognized option: %s\n", arg);
usage();
}
// Tokenize more
arg = strtok(NULL, " ");
}
}
// You gotta generate something!
if (!gen_cpp && !gen_java && !gen_php && !gen_phpi && !gen_py && !gen_xsd) {
fprintf(stderr, "!!! No output language(s) specified\n\n");
usage();
}
// Real-pathify it
char rp[PATH_MAX];
if (realpath(argv[i], rp) == NULL) {
failure("Could not open input file: %s", argv[i]);
}
string input_file(rp);
// Instance of the global parse tree
t_program* program = new t_program(input_file);
// Initialize global types
g_type_void = new t_base_type("void", t_base_type::TYPE_VOID);
g_type_string = new t_base_type("string", t_base_type::TYPE_STRING);
g_type_slist = new t_base_type("string", t_base_type::TYPE_STRING);
((t_base_type*)g_type_slist)->set_string_list(true);
g_type_bool = new t_base_type("bool", t_base_type::TYPE_BOOL);
g_type_byte = new t_base_type("byte", t_base_type::TYPE_BYTE);
g_type_i16 = new t_base_type("i16", t_base_type::TYPE_I16);
g_type_i32 = new t_base_type("i32", t_base_type::TYPE_I32);
g_type_i64 = new t_base_type("i64", t_base_type::TYPE_I64);
g_type_double = new t_base_type("double", t_base_type::TYPE_DOUBLE);
// Parse it!
parse(program, NULL);
// Generate it!
generate(program);
// Clean up. Who am I kidding... this program probably orphans heap memory
// all over the place, but who cares because it is about to exit and it is
// all referenced and used by this wacky parse tree up until now anyways.
delete program;
delete g_type_void;
delete g_type_string;
delete g_type_bool;
delete g_type_byte;
delete g_type_i16;
delete g_type_i32;
delete g_type_i64;
delete g_type_double;
// Finished
return 0;
}