Thrift: docstring revamp step 2.

Summary:
It was a bad idea to let doxygen comments become a part of the parse tree.
We now get them a totally different way.  The lexer stashes the docsting
contents in a global, and the parser actions (not the rules) pull it out.
This should prevent doxygen comments from ever causing parse errors.

Blame Rev: 52678, 52732

Reviewed By: mcslee

Test Plan:
Recompiled thrift.
Thrifted a bunch of files and saw no parse errors (or C++ compile errors).
Thrifted DocTest.thrift with dump_docs on.

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665201 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/Makefile.am b/compiler/cpp/Makefile.am
index 8ddb161..c2084a4 100644
--- a/compiler/cpp/Makefile.am
+++ b/compiler/cpp/Makefile.am
@@ -15,9 +15,9 @@
                  src/generate/t_rb_generator.cc \
                  src/generate/t_xsd_generator.cc \
                  src/generate/t_perl_generator.cc \
-		 src/generate/t_ocaml_generator.cc \
+                 src/generate/t_ocaml_generator.cc \
                  src/generate/t_erl_generator.cc \
-		 src/generate/t_hs_generator.cc 
+                 src/generate/t_hs_generator.cc 
 
 thrift_CXXFLAGS = -Wall -Isrc $(BOOST_CPPFLAGS)
 thrift_LDFLAGS = -Wall $(BOOST_LDFLAGS)
diff --git a/compiler/cpp/src/globals.h b/compiler/cpp/src/globals.h
index 8b3ae19..8560efd 100644
--- a/compiler/cpp/src/globals.h
+++ b/compiler/cpp/src/globals.h
@@ -87,4 +87,14 @@
  */
 extern char* g_time_str;
 
+/**
+ * The last parsed doctext comment.
+ */
+extern char* g_doctext;
+
+/**
+ * The location of the last parsed doctext comment.
+ */
+extern int g_doctext_lineno;
+
 #endif
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index 76676f3..ba88ae3 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -117,6 +117,16 @@
 char* g_time_str;
 
 /**
+ * The last parsed doctext comment.
+ */
+char* g_doctext;
+
+/**
+ * The location of the last parsed doctext comment.
+ */
+int g_doctext_lineno;
+
+/**
  * Flags to control code generation
  */
 bool gen_cpp = false;
@@ -295,6 +305,18 @@
 }
 
 /**
+ * Clears any previously stored doctext string.
+ * Also prints a warning if we are discarding information.
+ */
+void clear_doctext() {
+  if (g_doctext != NULL) {
+    pwarning(2, "Uncaptured doctext at on line %d.", g_doctext_lineno);
+  }
+  free(g_doctext);
+  g_doctext = NULL;
+}
+
+/**
  * Cleans up text commonly found in doxygen-like comments
  *
  * Warning: if you mix tabs and spaces in a non-uniform way,
diff --git a/compiler/cpp/src/main.h b/compiler/cpp/src/main.h
index a60399b..f8828af 100644
--- a/compiler/cpp/src/main.h
+++ b/compiler/cpp/src/main.h
@@ -65,6 +65,11 @@
 std::string include_file(std::string filename);
 
 /**
+ * Clears any previously stored doctext string.
+ */
+void clear_doctext();
+
+/**
  * Cleans up text commonly found in doxygen-like comments
  */
 char* clean_up_doctext(char* doctext);
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index 5e9fca9..4612858 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -16,6 +16,7 @@
 %{
 
 #include "main.h"
+#include "globals.h"
 #include "parse/t_program.h"
 
 /**
@@ -202,9 +203,15 @@
 }
 
 {doctext} {
- yylval.id = strdup(yytext + 3);
- yylval.id[strlen(yylval.id) - 2] = '\0';
- return tok_doctext;
+ /* This does not show up in the parse tree. */
+ /* Rather, the parser will grab it out of the global. */
+  if (g_parse_mode == PROGRAM) {
+    clear_doctext();
+    g_doctext = strdup(yytext + 3);
+    g_doctext[strlen(g_doctext) - 2] = '\0';
+    g_doctext = clean_up_doctext(g_doctext);
+    g_doctext_lineno = yylineno;
+  }
 }
 
 
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index 5f7e3b9..c8be3f0 100644
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -175,7 +175,7 @@
 %type<tstruct>   XsdAttributes
 %type<id>        CppType
 
-%type<dtext>     DocTextOptional
+%type<dtext>     CaptureDocText
 
 %%
 
@@ -188,16 +188,41 @@
  */
 
 Program:
-  DocTextOptional HeaderList DefinitionList
+  HeaderList DefinitionList
     {
       pdebug("Program -> Headers DefinitionList");
+      /*
+      TODO(dreiss): Decide whether full-program doctext is worth the trouble.
       if ($1 != NULL) {
         g_program->set_doc($1);
       }
+      */
+      clear_doctext();
     }
 
+CaptureDocText:
+    {
+      if (g_parse_mode == PROGRAM) {
+        $$ = g_doctext; 
+        g_doctext = NULL;
+      }
+      else {
+        $$ = NULL;
+      }
+    }
+
+/* TODO(dreiss): Try to DestroyDocText in all sorts or random places. */
+DestroyDocText:
+    {
+      if (g_parse_mode == PROGRAM) {
+        clear_doctext();
+      }
+    }
+
+/* We have to DestroyDocText here, otherwise it catches the doctext
+   on the first real element. */
 HeaderList:
-  HeaderList Header
+  HeaderList DestroyDocText Header
     {
       pdebug("HeaderList -> HeaderList Header");
     }
@@ -275,7 +300,7 @@
     }
 
 DefinitionList:
-  DefinitionList DocTextOptional Definition
+  DefinitionList CaptureDocText Definition
     {
       pdebug("DefinitionList -> DefinitionList Definition");
       if ($2 != NULL && $3 != NULL) {
@@ -365,17 +390,6 @@
       $$ = td;
     }
 
-DocTextOptional:
-  tok_doctext
-    {
-      pdebug("DocTextOptional -> tok_doctext");
-      $$ = clean_up_doctext($1);
-    }
-|
-    {
-      $$ = NULL; 
-    }
-    
 CommaOrSemicolonOptional:
   ','
     {}
@@ -406,7 +420,7 @@
     }
 
 EnumDef:
-  DocTextOptional tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
+  CaptureDocText tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier = tok_int_constant");
       if ($4 < 0) {
@@ -424,7 +438,7 @@
       }
     }
 |
-  DocTextOptional tok_identifier CommaOrSemicolonOptional
+  CaptureDocText tok_identifier CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier");
       $$ = new t_enum_value($2);
@@ -664,7 +678,7 @@
     }
 
 Function:
-  DocTextOptional Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
+  CaptureDocText Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
     {
       $6->set_name(std::string($4) + "_args");
       $$ = new t_function($3, $4, $6, $8, $2);
@@ -709,7 +723,7 @@
     }
 
 Field:
-  DocTextOptional FieldIdentifier FieldType tok_identifier FieldValue XsdOptional XsdNillable XsdAttributes CommaOrSemicolonOptional
+  CaptureDocText FieldIdentifier FieldType tok_identifier FieldValue XsdOptional XsdNillable XsdAttributes CommaOrSemicolonOptional
     {
       pdebug("tok_int_constant : Field -> FieldType tok_identifier");
       if ($2 < 0) {