adding documentation syntax to thrift

see DocTest.thrift for examples.

todo: integrate parsed documentation into code generation

review: marc k, mcslee
test plan: DocTest.thrift


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664970 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index a582436..621116d 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -289,6 +289,8 @@
   fprintf(stderr, "  -php        Generate PHP output files\n");
   fprintf(stderr, "  -phpi       Generate PHP inlined files\n");
   fprintf(stderr, "  -py         Generate Python output files\n");
+  fprintf(stderr, "  -rb         Generate Ruby output files\n");
+  fprintf(stderr, "  -xsd        Generate XSD output files\n");
   fprintf(stderr, "  -I dir       Add a directory to the list of directories \n");
   fprintf(stderr, "               searched for include directives\n");
   fprintf(stderr, "  -nowarn     Suppress all compiler warnings (BAD!)\n");
diff --git a/compiler/cpp/src/parse/t_enum_value.h b/compiler/cpp/src/parse/t_enum_value.h
index 65e61cb..66be3b3 100644
--- a/compiler/cpp/src/parse/t_enum_value.h
+++ b/compiler/cpp/src/parse/t_enum_value.h
@@ -2,6 +2,7 @@
 #define T_ENUM_VALUE_H
 
 #include <string>
+#include "t_doc.h"
 
 /**
  * A constant. These are used inside of enum definitions. Constants are just
@@ -10,7 +11,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_enum_value {
+class t_enum_value : public t_doc {
  public:
   t_enum_value(std::string name) :
     name_(name),
diff --git a/compiler/cpp/src/parse/t_field.h b/compiler/cpp/src/parse/t_field.h
index 058b9fb..03d4ba7 100644
--- a/compiler/cpp/src/parse/t_field.h
+++ b/compiler/cpp/src/parse/t_field.h
@@ -55,14 +55,30 @@
     return value_;
   }
 
+  const std::string& get_doc() const {
+    return doc_;
+  }
+
+  bool has_doc() {
+    return has_doc_;
+  }                                                           
+
+  void set_doc(const std::string& doc) {                      
+    doc_ = doc;                                               
+    has_doc_ = true;                                          
+  }                                                           
+
  private:
   t_type* type_;
   std::string name_;
   int32_t key_;
   t_const_value* value_;
-  
+
   bool xsd_optional_;
 
+  std::string doc_;                                           
+  bool has_doc_;                                              
+
 };
 
 #endif
diff --git a/compiler/cpp/src/parse/t_function.h b/compiler/cpp/src/parse/t_function.h
index 6f06abe..e75c659 100644
--- a/compiler/cpp/src/parse/t_function.h
+++ b/compiler/cpp/src/parse/t_function.h
@@ -4,6 +4,7 @@
 #include <string>
 #include "t_type.h"
 #include "t_struct.h"
+#include "t_doc.h"
 
 /**
  * Representation of a function. Key parts are return type, function name,
@@ -12,7 +13,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_function {
+class t_function : public t_doc {
  public:
   t_function(t_type* returntype,
              std::string name,
diff --git a/compiler/cpp/src/parse/t_program.h b/compiler/cpp/src/parse/t_program.h
index 7f7af44..610867a 100644
--- a/compiler/cpp/src/parse/t_program.h
+++ b/compiler/cpp/src/parse/t_program.h
@@ -18,6 +18,7 @@
 #include "t_list.h"
 #include "t_map.h"
 #include "t_set.h"
+//#include "t_doc.h"
 
 /**
  * Top level class representing an entire thrift program. A program consists
diff --git a/compiler/cpp/src/parse/t_type.h b/compiler/cpp/src/parse/t_type.h
index da062b5..f6aa7ab 100644
--- a/compiler/cpp/src/parse/t_type.h
+++ b/compiler/cpp/src/parse/t_type.h
@@ -2,6 +2,7 @@
 #define T_TYPE_H
 
 #include <string>
+#include "t_doc.h"
 
 class t_program;
 
@@ -14,7 +15,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_type {
+class t_type : public t_doc {
  public:
   virtual ~t_type() {}
 
@@ -55,9 +56,10 @@
 
   t_type(std::string name) :
     name_(name) {}
-    
+
   t_program* program_;
   std::string name_;
+
 };
 
 #endif
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index d54a2ad..5450ad2 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -40,9 +40,11 @@
 multicomm    ("/*""/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
 comment      ("//"[^\n]*)
 unixcomment  ("#"[^\n]*)
+doctext      ("["(("["[^\]\[]*"]")|[^\]\[])*"]") /* allows one level of nesting */
 symbol       ([:;\,\{\}\(\)\=<>\[\]])
-dliteral      ("\""[^"]*"\"")
-sliteral      ("'"[^']*"'")
+dliteral     ("\""[^"]*"\"")
+sliteral     ("'"[^']*"'")
+
 
 %%
 
@@ -184,4 +186,11 @@
   return tok_literal;
 }
 
+{doctext} {
+ yylval.id = strdup(yytext + 1);
+ yylval.id[strlen(yylval.id) - 1] = '\0';
+ return tok_doctext;
+}
+
+
 %%
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index a6492fe..9c38996 100644
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -42,6 +42,7 @@
   t_service*     tservice;
   t_function*    tfunction;
   t_field*       tfield;
+  char*          tdoc;
 }
 
 /**
@@ -49,6 +50,7 @@
  */
 %token<id>     tok_identifier
 %token<id>     tok_literal
+%token<tdoc>   tok_doctext
 
 /**
  * Constant values
@@ -154,6 +156,8 @@
 %type<tbool>     XsdOptional
 %type<id>        CppType
 
+%type<tdoc>      DocTextOptional
+
 %%
 
 /**
@@ -312,19 +316,37 @@
     }
 
 Typedef:
-  tok_typedef DefinitionType tok_identifier
+  DocTextOptional tok_typedef DefinitionType tok_identifier 
     {
       pdebug("TypeDef -> tok_typedef DefinitionType tok_identifier");
-      t_typedef *td = new t_typedef(g_program, $2, $3);
+      t_typedef *td = new t_typedef(g_program, $3, $4);
       $$ = td;
+      if ($1 != NULL) {
+        td->set_doc($1);
+      }
     }
 
+DocTextOptional:
+  tok_doctext
+    {
+      pdebug("DocTextOptional -> tok_doctext");
+      $$ = $1;
+    }
+|
+    {
+      $$ = NULL; 
+    }
+    
+
 Enum:
-  tok_enum tok_identifier '{' EnumDefList '}'
+  DocTextOptional tok_enum tok_identifier '{' EnumDefList '}'
     {
       pdebug("Enum -> tok_enum tok_identifier { EnumDefList }");
-      $$ = $4;
-      $$->set_name($2);
+      $$ = $5;
+      $$->set_name($3);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 CommaOrSemicolonOptional:
@@ -349,19 +371,25 @@
     }
 
 EnumDef:
-  tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
+  DocTextOptional tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier = tok_int_constant");
-      if ($3 < 0) {
-        pwarning(1, "Negative value supplied for enum %s.\n", $1);
+      if ($4 < 0) {
+        pwarning(1, "Negative value supplied for enum %s.\n", $2);
       }
-      $$ = new t_enum_value($1, $3);
+      $$ = new t_enum_value($2, $4);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 |
-  tok_identifier CommaOrSemicolonOptional
+  DocTextOptional tok_identifier CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier");
-      $$ = new t_enum_value($1);
+      $$ = new t_enum_value($2);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 Const:
@@ -455,12 +483,15 @@
     }
 
 Struct:
-  tok_struct tok_identifier XsdAll '{' FieldList '}'
+  DocTextOptional tok_struct tok_identifier XsdAll '{' FieldList '}'
     {
       pdebug("Struct -> tok_struct tok_identifier { FieldList }");
-      $5->set_name($2);
-      $5->set_xsd_all($3);
-      $$ = $5;
+      $6->set_name($3);
+      if ($1 != NULL) {
+        $6->set_doc($1);
+      }
+      $6->set_xsd_all($4);
+      $$ = $6;
       y_field_val = -1;
     }
 
@@ -490,17 +521,25 @@
       pdebug("Xception -> tok_xception tok_identifier { FieldList }");
       $4->set_name($2);
       $4->set_xception(true);
+/*
+      if ($4 != NULL) {
+        $5->set_doc($4);
+      }
+*/
       $$ = $4;
       y_field_val = -1;
     }
 
 Service:
-  tok_service tok_identifier Extends '{' FunctionList '}'
+  DocTextOptional tok_service tok_identifier Extends '{' FunctionList '}'
     {
       pdebug("Service -> tok_service tok_identifier { FunctionList }");
-      $$ = $5;
-      $$->set_name($2);
-      $$->set_extends($3);
+      $$ = $6;
+      $$->set_name($3);
+      $$->set_extends($4);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 Extends:
@@ -535,10 +574,13 @@
     }
 
 Function:
-  Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
+  DocTextOptional Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
     {
-      $5->set_name(std::string($3) + "_args");
-      $$ = new t_function($2, $3, $5, $7, $1);
+      $6->set_name(std::string($4) + "_args");
+      $$ = new t_function($3, $4, $6, $8, $2);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
       y_field_val = -1;
     }
 
@@ -577,18 +619,21 @@
     }
 
 Field:
-  FieldIdentifier FieldType tok_identifier FieldValue XsdOptional CommaOrSemicolonOptional
+  DocTextOptional FieldIdentifier FieldType tok_identifier FieldValue XsdOptional CommaOrSemicolonOptional
     {
       pdebug("tok_int_constant : Field -> FieldType tok_identifier");
-      if ($1 < 0) {
+      if ($2 < 0) {
         pwarning(2, "No field key specified for %s, resulting protocol may have conflicts or not be backwards compatible!\n", $3);
       }
-      $$ = new t_field($2, $3, $1);
-      if ($4 != NULL) {
-        validate_field_value($$, $4);
-        $$->set_value($4);
+      $$ = new t_field($3, $4, $2);
+      if ($5 != NULL) {
+        validate_field_value($$, $5);
+        $$->set_value($5);
       }
-      $$->set_xsd_optional($5);
+      $$->set_xsd_optional($6);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 FieldIdentifier: