THRIFT-2320 Program level doctext does not get attached by parser

Patch: Craig Peterson & Jens Geyer
diff --git a/compiler/cpp/src/globals.h b/compiler/cpp/src/globals.h
index 2b1865b..957e5d1 100644
--- a/compiler/cpp/src/globals.h
+++ b/compiler/cpp/src/globals.h
@@ -115,6 +115,24 @@
 extern int g_doctext_lineno;
 
 /**
+ * Status of program level doctext candidate
+ */
+enum PROGDOCTEXT_STATUS {
+  INVALID = 0,
+  STILL_CANDIDATE = 1,      // the text may or may not be the program doctext
+  ALREADY_PROCESSED = 2,    // doctext has been used and is no longer available
+  ABSOLUTELY_SURE = 3       // this is the program doctext
+};
+
+
+/**
+ * The program level doctext. Stored seperately to make parsing easier.
+ */
+extern char* g_program_doctext_candidate;
+extern int   g_program_doctext_lineno;
+extern PROGDOCTEXT_STATUS  g_program_doctext_status;
+
+/**
  * Whether or not negative field keys are accepted.
  *
  * When a field does not have a user-specified key, thrift automatically
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index 4ce22b4..bba4cdc 100755
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -146,6 +146,13 @@
  */
 int g_doctext_lineno;
 
+/** 
+ * The First doctext comment
+ */
+char* g_program_doctext_candidate;
+int  g_program_doctext_lineno = 0;
+PROGDOCTEXT_STATUS  g_program_doctext_status = INVALID;
+
 /**
  * Whether or not negative field keys are accepted.
  */
@@ -390,6 +397,27 @@
 }
 
 /**
+ * Reset program doctext information after processing a file
+ */
+void reset_program_doctext_info() {
+  if(g_program_doctext_candidate != NULL) {
+    free(g_program_doctext_candidate);
+    g_program_doctext_candidate = NULL;
+  }
+  g_program_doctext_lineno = 0;
+  g_program_doctext_status = INVALID;
+}
+
+/**
+ * We are sure the program doctext candidate is really the program doctext.
+ */
+void declare_valid_program_doctext() {
+  if((g_program_doctext_candidate != NULL) && (g_program_doctext_status == STILL_CANDIDATE)) {
+    g_program_doctext_status = ABSOLUTELY_SURE;  
+  }
+}
+
+/**
  * Cleans up text commonly found in doxygen-like comments
  *
  * Warning: if you mix tabs and spaces in a non-uniform way,
@@ -911,6 +939,9 @@
     parse(*iter, program);
   }
 
+  // reset program doctext status before parsing a new file
+  reset_program_doctext_info();
+
   // Parse the program file
   g_parse_mode = PROGRAM;
   g_program = program;
diff --git a/compiler/cpp/src/main.h b/compiler/cpp/src/main.h
index 46ad835..d554970 100644
--- a/compiler/cpp/src/main.h
+++ b/compiler/cpp/src/main.h
@@ -103,6 +103,12 @@
 char* clean_up_doctext(char* doctext);
 
 /**
+ * We are sure the program doctext candidate is really the program doctext.
+ */
+void declare_valid_program_doctext();
+
+
+/**
  * Flex utilities
  */
 
diff --git a/compiler/cpp/src/parse/t_doc.h b/compiler/cpp/src/parse/t_doc.h
index e52068c..a7c8cc9 100644
--- a/compiler/cpp/src/parse/t_doc.h
+++ b/compiler/cpp/src/parse/t_doc.h
@@ -20,6 +20,8 @@
 #ifndef T_DOC_H
 #define T_DOC_H
 
+#include "globals.h"
+
 /**
  * Documentation stubs
  *
@@ -32,6 +34,9 @@
   void set_doc(const std::string& doc) {
     doc_ = doc;
     has_doc_ = true;
+    if( (g_program_doctext_lineno == g_doctext_lineno) &&  (g_program_doctext_status == STILL_CANDIDATE)) {
+      g_program_doctext_status = ALREADY_PROCESSED;
+    }
   }
 
   const std::string& get_doc() const {
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index a1faa6b..4df4ccb 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -384,6 +384,11 @@
     g_doctext[strlen(g_doctext) - 1] = '\0';
     g_doctext = clean_up_doctext(g_doctext);
     g_doctext_lineno = yylineno;
+    if(g_program_doctext_candidate == NULL){
+      g_program_doctext_candidate = strdup(g_doctext);
+      g_program_doctext_lineno = g_doctext_lineno;
+      g_program_doctext_status = STILL_CANDIDATE;
+    }
   }
 }
 
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index 8814332..fd72b70 100755
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -243,12 +243,11 @@
   HeaderList DefinitionList
     {
       pdebug("Program -> Headers DefinitionList");
-      /*
-      TODO(dreiss): Decide whether full-program doctext is worth the trouble.
-      if ($1 != NULL) {
-        g_program->set_doc($1);
+      if((g_program_doctext_candidate != NULL) && (g_program_doctext_status != ALREADY_PROCESSED))
+      {
+        g_program->set_doc(g_program_doctext_candidate);
+        g_program_doctext_status = ALREADY_PROCESSED;
       }
-      */
       clear_doctext();
     }
 
@@ -290,6 +289,7 @@
 | tok_namespace tok_identifier tok_identifier
     {
       pdebug("Header -> tok_namespace tok_identifier tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace($2, $3);
       }
@@ -297,6 +297,7 @@
 | tok_namespace '*' tok_identifier
     {
       pdebug("Header -> tok_namespace * tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("*", $3);
       }
@@ -306,6 +307,7 @@
     {
       pwarning(1, "'cpp_namespace' is deprecated. Use 'namespace cpp' instead");
       pdebug("Header -> tok_cpp_namespace tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("cpp", $2);
       }
@@ -313,6 +315,7 @@
 | tok_cpp_include tok_literal
     {
       pdebug("Header -> tok_cpp_include tok_literal");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->add_cpp_include($2);
       }
@@ -321,6 +324,7 @@
     {
       pwarning(1, "'php_namespace' is deprecated. Use 'namespace php' instead");
       pdebug("Header -> tok_php_namespace tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("php", $2);
       }
@@ -330,6 +334,7 @@
     {
       pwarning(1, "'py_module' is deprecated. Use 'namespace py' instead");
       pdebug("Header -> tok_py_module tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("py", $2);
       }
@@ -339,6 +344,7 @@
     {
       pwarning(1, "'perl_package' is deprecated. Use 'namespace perl' instead");
       pdebug("Header -> tok_perl_namespace tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("perl", $2);
       }
@@ -348,6 +354,7 @@
     {
       pwarning(1, "'ruby_namespace' is deprecated. Use 'namespace rb' instead");
       pdebug("Header -> tok_ruby_namespace tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("rb", $2);
       }
@@ -357,6 +364,7 @@
     {
       pwarning(1, "'smalltalk_category' is deprecated. Use 'namespace smalltalk.category' instead");
       pdebug("Header -> tok_smalltalk_category tok_st_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("smalltalk.category", $2);
       }
@@ -366,6 +374,7 @@
     {
       pwarning(1, "'smalltalk_prefix' is deprecated. Use 'namespace smalltalk.prefix' instead");
       pdebug("Header -> tok_smalltalk_prefix tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("smalltalk.prefix", $2);
       }
@@ -375,6 +384,7 @@
     {
       pwarning(1, "'java_package' is deprecated. Use 'namespace java' instead");
       pdebug("Header -> tok_java_package tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("java", $2);
       }
@@ -384,6 +394,7 @@
     {
       pwarning(1, "'cocoa_prefix' is deprecated. Use 'namespace cocoa' instead");
       pdebug("Header -> tok_cocoa_prefix tok_identifier");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("cocoa", $2);
       }
@@ -393,6 +404,7 @@
     {
       pwarning(1, "'xsd_namespace' is deprecated. Use 'namespace xsd' instead");
       pdebug("Header -> tok_xsd_namespace tok_literal");
+      declare_valid_program_doctext();  
       if (g_parse_mode == PROGRAM) {
         g_program->set_namespace("cocoa", $2);
       }
@@ -402,6 +414,7 @@
    {
      pwarning(1, "'csharp_namespace' is deprecated. Use 'namespace csharp' instead");
      pdebug("Header -> tok_csharp_namespace tok_identifier");
+     declare_valid_program_doctext();  
      if (g_parse_mode == PROGRAM) {
        g_program->set_namespace("csharp", $2);
      }
@@ -411,6 +424,7 @@
    {
      pwarning(1, "'delphi_namespace' is deprecated. Use 'namespace delphi' instead");
      pdebug("Header -> tok_delphi_namespace tok_identifier");
+     declare_valid_program_doctext();  
      if (g_parse_mode == PROGRAM) {
        g_program->set_namespace("delphi", $2);
      }
@@ -420,6 +434,7 @@
   tok_include tok_literal
     {
       pdebug("Include -> tok_include tok_literal");
+      declare_valid_program_doctext();  
       if (g_parse_mode == INCLUDES) {
         std::string path = include_file(std::string($2));
         if (!path.empty()) {