THRIFT-153. Proper handling of strings with escapes (in IDL)
- Recognize and parse escape characters in .thrift files.
- Escape strings used as constants in generated source files.
git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@758922 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/generate/t_cocoa_generator.cc b/compiler/cpp/src/generate/t_cocoa_generator.cc
index f2121ca..04b0ad7 100644
--- a/compiler/cpp/src/generate/t_cocoa_generator.cc
+++ b/compiler/cpp/src/generate/t_cocoa_generator.cc
@@ -1794,7 +1794,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- render << "@\"" + value->get_string() + "\"";
+ render << "@\"" << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "YES" : "NO");
diff --git a/compiler/cpp/src/generate/t_cpp_generator.cc b/compiler/cpp/src/generate/t_cpp_generator.cc
index 5ad390a..c9b2055 100644
--- a/compiler/cpp/src/generate/t_cpp_generator.cc
+++ b/compiler/cpp/src/generate/t_cpp_generator.cc
@@ -538,7 +538,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- render << "\"" + value->get_string() + "\"";
+ render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_csharp_generator.cc b/compiler/cpp/src/generate/t_csharp_generator.cc
index 6387906..f4c6f89 100644
--- a/compiler/cpp/src/generate/t_csharp_generator.cc
+++ b/compiler/cpp/src/generate/t_csharp_generator.cc
@@ -330,7 +330,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- render << "\"" + value->get_string() + "\"";
+ render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_erl_generator.cc b/compiler/cpp/src/generate/t_erl_generator.cc
index e7648ba..1027cd2 100644
--- a/compiler/cpp/src/generate/t_erl_generator.cc
+++ b/compiler/cpp/src/generate/t_erl_generator.cc
@@ -315,7 +315,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "\"" << value->get_string() << "\"";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_generator.cc b/compiler/cpp/src/generate/t_generator.cc
index 6766cd1..a04446b 100644
--- a/compiler/cpp/src/generate/t_generator.cc
+++ b/compiler/cpp/src/generate/t_generator.cc
@@ -59,6 +59,19 @@
close_generator();
}
+string t_generator::escape_string(const string &in) const {
+ string result = "";
+ for (string::const_iterator it = in.begin(); it < in.end(); it++) {
+ std::map<char, std::string>::const_iterator res = escape_.find(*it);
+ if (res != escape_.end()) {
+ result.append(res->second);
+ } else {
+ result.push_back(*it);
+ }
+ }
+ return result;
+}
+
void t_generator::generate_consts(vector<t_const*> consts) {
vector<t_const*>::iterator c_iter;
for (c_iter = consts.begin(); c_iter != consts.end(); ++c_iter) {
diff --git a/compiler/cpp/src/generate/t_generator.h b/compiler/cpp/src/generate/t_generator.h
index 060e656..8fdf5ff 100644
--- a/compiler/cpp/src/generate/t_generator.h
+++ b/compiler/cpp/src/generate/t_generator.h
@@ -27,6 +27,11 @@
indent_ = 0;
program_ = program;
program_name_ = get_program_name(program);
+ escape_['\n'] = "\\n";
+ escape_['\r'] = "\\r";
+ escape_['\t'] = "\\t";
+ escape_['"'] = "\\\"";
+ escape_['\\'] = "\\\\";
}
virtual ~t_generator() {}
@@ -45,6 +50,16 @@
const std::string& line_prefix,
const std::string& contents,
const std::string& comment_end);
+
+ /**
+ * Escape string to use one in generated sources.
+ */
+ virtual std::string escape_string(const std::string &in) const;
+
+ std::string get_escaped_string(t_const_value* constval) {
+ return escape_string(constval->get_string());
+ }
+
protected:
/**
@@ -184,6 +199,11 @@
*/
std::string out_dir_base_;
+ /**
+ * Map of characters to escape in string literals.
+ */
+ std::map<char, std::string> escape_;
+
private:
/**
* Current code indentation level
diff --git a/compiler/cpp/src/generate/t_hs_generator.cc b/compiler/cpp/src/generate/t_hs_generator.cc
index 18a07f0..94fb959 100644
--- a/compiler/cpp/src/generate/t_hs_generator.cc
+++ b/compiler/cpp/src/generate/t_hs_generator.cc
@@ -303,7 +303,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "\"" << value->get_string() << "\"";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "True" : "False");
diff --git a/compiler/cpp/src/generate/t_html_generator.cc b/compiler/cpp/src/generate/t_html_generator.cc
index fc05709..671019e 100644
--- a/compiler/cpp/src/generate/t_html_generator.cc
+++ b/compiler/cpp/src/generate/t_html_generator.cc
@@ -32,6 +32,12 @@
: t_generator(program)
{
out_dir_base_ = "gen-html";
+ escape_.clear();
+ escape_['&'] = "&";
+ escape_['<'] = "<";
+ escape_['>'] = ">";
+ escape_['"'] = """;
+ escape_['\''] = "'";
}
void generate_program();
@@ -396,7 +402,7 @@
f_out_ << tvalue->get_double();
break;
case t_const_value::CV_STRING:
- f_out_ << "\"" << tvalue->get_string() << "\"";
+ f_out_ << '"' << get_escaped_string(tvalue) << '"';
break;
case t_const_value::CV_MAP:
{
diff --git a/compiler/cpp/src/generate/t_java_generator.cc b/compiler/cpp/src/generate/t_java_generator.cc
index e258a47..6e2d0b3 100644
--- a/compiler/cpp/src/generate/t_java_generator.cc
+++ b/compiler/cpp/src/generate/t_java_generator.cc
@@ -519,7 +519,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- render << "\"" + value->get_string() + "\"";
+ render << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
render << ((value->get_integer() > 0) ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_ocaml_generator.cc b/compiler/cpp/src/generate/t_ocaml_generator.cc
index a60e00c..8c8f0c0 100644
--- a/compiler/cpp/src/generate/t_ocaml_generator.cc
+++ b/compiler/cpp/src/generate/t_ocaml_generator.cc
@@ -361,7 +361,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "\"" << value->get_string() << "\"";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_perl_generator.cc b/compiler/cpp/src/generate/t_perl_generator.cc
index 02ac779..be72891 100644
--- a/compiler/cpp/src/generate/t_perl_generator.cc
+++ b/compiler/cpp/src/generate/t_perl_generator.cc
@@ -31,6 +31,8 @@
: t_oop_generator(program)
{
out_dir_base_ = "gen-perl";
+ escape_['$'] = "\\$";
+ escape_['@'] = "\\@";
}
/**
@@ -328,7 +330,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "'" << value->get_string() << "'";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "1" : "0");
@@ -1796,5 +1798,4 @@
throw "INVALID TYPE IN type_to_enum: " + type->get_name();
}
-
THRIFT_REGISTER_GENERATOR(perl, "Perl", "");
diff --git a/compiler/cpp/src/generate/t_php_generator.cc b/compiler/cpp/src/generate/t_php_generator.cc
index f74076c..7b3e617 100644
--- a/compiler/cpp/src/generate/t_php_generator.cc
+++ b/compiler/cpp/src/generate/t_php_generator.cc
@@ -51,6 +51,7 @@
}
out_dir_base_ = (binary_inline_ ? "gen-phpi" : "gen-php");
+ escape_['$'] = "\\$";
}
/**
@@ -371,7 +372,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "'" << value->get_string() << "'";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_py_generator.cc b/compiler/cpp/src/generate/t_py_generator.cc
index cde70a9..59a4705 100644
--- a/compiler/cpp/src/generate/t_py_generator.cc
+++ b/compiler/cpp/src/generate/t_py_generator.cc
@@ -387,7 +387,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "'" << value->get_string() << "'";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "True" : "False");
diff --git a/compiler/cpp/src/generate/t_rb_generator.cc b/compiler/cpp/src/generate/t_rb_generator.cc
index d3ac711..2a7f7d9 100644
--- a/compiler/cpp/src/generate/t_rb_generator.cc
+++ b/compiler/cpp/src/generate/t_rb_generator.cc
@@ -338,7 +338,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "%q\"" << value->get_string() << '"';
+ out << "%q\"" << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");
diff --git a/compiler/cpp/src/generate/t_st_generator.cc b/compiler/cpp/src/generate/t_st_generator.cc
index 960db63..76a8c9d 100644
--- a/compiler/cpp/src/generate/t_st_generator.cc
+++ b/compiler/cpp/src/generate/t_st_generator.cc
@@ -348,7 +348,7 @@
t_base_type::t_base tbase = ((t_base_type*)type)->get_base();
switch (tbase) {
case t_base_type::TYPE_STRING:
- out << "'" << value->get_string() << "'";
+ out << '"' << get_escaped_string(value) << '"';
break;
case t_base_type::TYPE_BOOL:
out << (value->get_integer() > 0 ? "true" : "false");
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index 0b4e960..a700ca3 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -14,6 +14,7 @@
%{
+#include <string>
#include <errno.h>
#include "main.h"
@@ -58,10 +59,8 @@
comment ("//"[^\n]*)
unixcomment ("#"[^\n]*)
symbol ([:;\,\{\}\(\)\=<>\[\]])
-dliteral ("\""[^"]*"\"")
-sliteral ("'"[^']*"'")
st_identifier ([a-zA-Z-][\.a-zA-Z_0-9-]*)
-
+literal_begin (['\"])
%%
@@ -222,17 +221,56 @@
return tok_st_identifier;
}
-{dliteral} {
- yylval.id = strdup(yytext+1);
- yylval.id[strlen(yylval.id)-1] = '\0';
- return tok_literal;
+{literal_begin} {
+ char mark = yytext[0];
+ std::string result;
+ for(;;)
+ {
+ int ch = yyinput();
+ switch (ch) {
+ case EOF:
+ yyerror("End of file while read string at %d\n", yylineno);
+ exit(1);
+ case '\n':
+ yyerror("End of line while read string at %d\n", yylineno - 1);
+ exit(1);
+ case '\\':
+ ch = yyinput();
+ switch (ch) {
+ case 'r':
+ result.push_back('\r');
+ continue;
+ case 'n':
+ result.push_back('\n');
+ continue;
+ case 't':
+ result.push_back('\t');
+ continue;
+ case '"':
+ result.push_back('"');
+ continue;
+ case '\'':
+ result.push_back('\'');
+ continue;
+ case '\\':
+ result.push_back('\\');
+ continue;
+ default:
+ yyerror("Bad escape character\n");
+ return -1;
+ }
+ break;
+ default:
+ if (ch == mark) {
+ yylval.id = strdup(result.c_str());
+ return tok_literal;
+ } else {
+ result.push_back(ch);
+ }
+ }
+ }
}
-{sliteral} {
- yylval.id = strdup(yytext+1);
- yylval.id[strlen(yylval.id)-1] = '\0';
- return tok_literal;
-}
{doctext} {
/* This does not show up in the parse tree. */