adding documentation syntax to thrift

see DocTest.thrift for examples.

todo: integrate parsed documentation into code generation

review: marc k, mcslee
test plan: DocTest.thrift


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@664970 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index a582436..621116d 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -289,6 +289,8 @@
   fprintf(stderr, "  -php        Generate PHP output files\n");
   fprintf(stderr, "  -phpi       Generate PHP inlined files\n");
   fprintf(stderr, "  -py         Generate Python output files\n");
+  fprintf(stderr, "  -rb         Generate Ruby output files\n");
+  fprintf(stderr, "  -xsd        Generate XSD output files\n");
   fprintf(stderr, "  -I dir       Add a directory to the list of directories \n");
   fprintf(stderr, "               searched for include directives\n");
   fprintf(stderr, "  -nowarn     Suppress all compiler warnings (BAD!)\n");
diff --git a/compiler/cpp/src/parse/t_enum_value.h b/compiler/cpp/src/parse/t_enum_value.h
index 65e61cb..66be3b3 100644
--- a/compiler/cpp/src/parse/t_enum_value.h
+++ b/compiler/cpp/src/parse/t_enum_value.h
@@ -2,6 +2,7 @@
 #define T_ENUM_VALUE_H
 
 #include <string>
+#include "t_doc.h"
 
 /**
  * A constant. These are used inside of enum definitions. Constants are just
@@ -10,7 +11,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_enum_value {
+class t_enum_value : public t_doc {
  public:
   t_enum_value(std::string name) :
     name_(name),
diff --git a/compiler/cpp/src/parse/t_field.h b/compiler/cpp/src/parse/t_field.h
index 058b9fb..03d4ba7 100644
--- a/compiler/cpp/src/parse/t_field.h
+++ b/compiler/cpp/src/parse/t_field.h
@@ -55,14 +55,30 @@
     return value_;
   }
 
+  const std::string& get_doc() const {
+    return doc_;
+  }
+
+  bool has_doc() {
+    return has_doc_;
+  }                                                           
+
+  void set_doc(const std::string& doc) {                      
+    doc_ = doc;                                               
+    has_doc_ = true;                                          
+  }                                                           
+
  private:
   t_type* type_;
   std::string name_;
   int32_t key_;
   t_const_value* value_;
-  
+
   bool xsd_optional_;
 
+  std::string doc_;                                           
+  bool has_doc_;                                              
+
 };
 
 #endif
diff --git a/compiler/cpp/src/parse/t_function.h b/compiler/cpp/src/parse/t_function.h
index 6f06abe..e75c659 100644
--- a/compiler/cpp/src/parse/t_function.h
+++ b/compiler/cpp/src/parse/t_function.h
@@ -4,6 +4,7 @@
 #include <string>
 #include "t_type.h"
 #include "t_struct.h"
+#include "t_doc.h"
 
 /**
  * Representation of a function. Key parts are return type, function name,
@@ -12,7 +13,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_function {
+class t_function : public t_doc {
  public:
   t_function(t_type* returntype,
              std::string name,
diff --git a/compiler/cpp/src/parse/t_program.h b/compiler/cpp/src/parse/t_program.h
index 7f7af44..610867a 100644
--- a/compiler/cpp/src/parse/t_program.h
+++ b/compiler/cpp/src/parse/t_program.h
@@ -18,6 +18,7 @@
 #include "t_list.h"
 #include "t_map.h"
 #include "t_set.h"
+//#include "t_doc.h"
 
 /**
  * Top level class representing an entire thrift program. A program consists
diff --git a/compiler/cpp/src/parse/t_type.h b/compiler/cpp/src/parse/t_type.h
index da062b5..f6aa7ab 100644
--- a/compiler/cpp/src/parse/t_type.h
+++ b/compiler/cpp/src/parse/t_type.h
@@ -2,6 +2,7 @@
 #define T_TYPE_H
 
 #include <string>
+#include "t_doc.h"
 
 class t_program;
 
@@ -14,7 +15,7 @@
  *
  * @author Mark Slee <mcslee@facebook.com>
  */
-class t_type {
+class t_type : public t_doc {
  public:
   virtual ~t_type() {}
 
@@ -55,9 +56,10 @@
 
   t_type(std::string name) :
     name_(name) {}
-    
+
   t_program* program_;
   std::string name_;
+
 };
 
 #endif
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index d54a2ad..5450ad2 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -40,9 +40,11 @@
 multicomm    ("/*""/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
 comment      ("//"[^\n]*)
 unixcomment  ("#"[^\n]*)
+doctext      ("["(("["[^\]\[]*"]")|[^\]\[])*"]") /* allows one level of nesting */
 symbol       ([:;\,\{\}\(\)\=<>\[\]])
-dliteral      ("\""[^"]*"\"")
-sliteral      ("'"[^']*"'")
+dliteral     ("\""[^"]*"\"")
+sliteral     ("'"[^']*"'")
+
 
 %%
 
@@ -184,4 +186,11 @@
   return tok_literal;
 }
 
+{doctext} {
+ yylval.id = strdup(yytext + 1);
+ yylval.id[strlen(yylval.id) - 1] = '\0';
+ return tok_doctext;
+}
+
+
 %%
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index a6492fe..9c38996 100644
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -42,6 +42,7 @@
   t_service*     tservice;
   t_function*    tfunction;
   t_field*       tfield;
+  char*          tdoc;
 }
 
 /**
@@ -49,6 +50,7 @@
  */
 %token<id>     tok_identifier
 %token<id>     tok_literal
+%token<tdoc>   tok_doctext
 
 /**
  * Constant values
@@ -154,6 +156,8 @@
 %type<tbool>     XsdOptional
 %type<id>        CppType
 
+%type<tdoc>      DocTextOptional
+
 %%
 
 /**
@@ -312,19 +316,37 @@
     }
 
 Typedef:
-  tok_typedef DefinitionType tok_identifier
+  DocTextOptional tok_typedef DefinitionType tok_identifier 
     {
       pdebug("TypeDef -> tok_typedef DefinitionType tok_identifier");
-      t_typedef *td = new t_typedef(g_program, $2, $3);
+      t_typedef *td = new t_typedef(g_program, $3, $4);
       $$ = td;
+      if ($1 != NULL) {
+        td->set_doc($1);
+      }
     }
 
+DocTextOptional:
+  tok_doctext
+    {
+      pdebug("DocTextOptional -> tok_doctext");
+      $$ = $1;
+    }
+|
+    {
+      $$ = NULL; 
+    }
+    
+
 Enum:
-  tok_enum tok_identifier '{' EnumDefList '}'
+  DocTextOptional tok_enum tok_identifier '{' EnumDefList '}'
     {
       pdebug("Enum -> tok_enum tok_identifier { EnumDefList }");
-      $$ = $4;
-      $$->set_name($2);
+      $$ = $5;
+      $$->set_name($3);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 CommaOrSemicolonOptional:
@@ -349,19 +371,25 @@
     }
 
 EnumDef:
-  tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
+  DocTextOptional tok_identifier '=' tok_int_constant CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier = tok_int_constant");
-      if ($3 < 0) {
-        pwarning(1, "Negative value supplied for enum %s.\n", $1);
+      if ($4 < 0) {
+        pwarning(1, "Negative value supplied for enum %s.\n", $2);
       }
-      $$ = new t_enum_value($1, $3);
+      $$ = new t_enum_value($2, $4);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 |
-  tok_identifier CommaOrSemicolonOptional
+  DocTextOptional tok_identifier CommaOrSemicolonOptional
     {
       pdebug("EnumDef -> tok_identifier");
-      $$ = new t_enum_value($1);
+      $$ = new t_enum_value($2);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 Const:
@@ -455,12 +483,15 @@
     }
 
 Struct:
-  tok_struct tok_identifier XsdAll '{' FieldList '}'
+  DocTextOptional tok_struct tok_identifier XsdAll '{' FieldList '}'
     {
       pdebug("Struct -> tok_struct tok_identifier { FieldList }");
-      $5->set_name($2);
-      $5->set_xsd_all($3);
-      $$ = $5;
+      $6->set_name($3);
+      if ($1 != NULL) {
+        $6->set_doc($1);
+      }
+      $6->set_xsd_all($4);
+      $$ = $6;
       y_field_val = -1;
     }
 
@@ -490,17 +521,25 @@
       pdebug("Xception -> tok_xception tok_identifier { FieldList }");
       $4->set_name($2);
       $4->set_xception(true);
+/*
+      if ($4 != NULL) {
+        $5->set_doc($4);
+      }
+*/
       $$ = $4;
       y_field_val = -1;
     }
 
 Service:
-  tok_service tok_identifier Extends '{' FunctionList '}'
+  DocTextOptional tok_service tok_identifier Extends '{' FunctionList '}'
     {
       pdebug("Service -> tok_service tok_identifier { FunctionList }");
-      $$ = $5;
-      $$->set_name($2);
-      $$->set_extends($3);
+      $$ = $6;
+      $$->set_name($3);
+      $$->set_extends($4);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 Extends:
@@ -535,10 +574,13 @@
     }
 
 Function:
-  Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
+  DocTextOptional Async FunctionType tok_identifier '(' FieldList ')' Throws CommaOrSemicolonOptional
     {
-      $5->set_name(std::string($3) + "_args");
-      $$ = new t_function($2, $3, $5, $7, $1);
+      $6->set_name(std::string($4) + "_args");
+      $$ = new t_function($3, $4, $6, $8, $2);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
       y_field_val = -1;
     }
 
@@ -577,18 +619,21 @@
     }
 
 Field:
-  FieldIdentifier FieldType tok_identifier FieldValue XsdOptional CommaOrSemicolonOptional
+  DocTextOptional FieldIdentifier FieldType tok_identifier FieldValue XsdOptional CommaOrSemicolonOptional
     {
       pdebug("tok_int_constant : Field -> FieldType tok_identifier");
-      if ($1 < 0) {
+      if ($2 < 0) {
         pwarning(2, "No field key specified for %s, resulting protocol may have conflicts or not be backwards compatible!\n", $3);
       }
-      $$ = new t_field($2, $3, $1);
-      if ($4 != NULL) {
-        validate_field_value($$, $4);
-        $$->set_value($4);
+      $$ = new t_field($3, $4, $2);
+      if ($5 != NULL) {
+        validate_field_value($$, $5);
+        $$->set_value($5);
       }
-      $$->set_xsd_optional($5);
+      $$->set_xsd_optional($6);
+      if ($1 != NULL) {
+        $$->set_doc($1);
+      }
     }
 
 FieldIdentifier:
diff --git a/test/DocTest.thrift b/test/DocTest.thrift
new file mode 100755
index 0000000..682edff
--- /dev/null
+++ b/test/DocTest.thrift
@@ -0,0 +1,126 @@
+java_package thrift.test
+cpp_namespace thrift.test
+
+// C++ comment
+/* c style comment */
+
+# the new unix comment
+
+[Some doc text goes here.  Wow I am [nesting these].]
+enum Numberz
+{
+
+  [This is how to document a parameter]
+  ONE = 1,
+
+  [And this is a doc for a parameter that has no specific value assigned]
+  TWO,
+
+  THREE,
+  FIVE = 5,
+  SIX,
+  EIGHT = 8
+}
+
+[This is how you would do a typedef doc]
+typedef i64 UserId 
+
+[And this is where you would document a struct]
+struct Xtruct
+{
+
+  [And the members of a struct]
+  1:  string string_thing
+
+  [doct text goes before a comma]
+  4:  byte   byte_thing,
+
+  9:  i32    i32_thing,
+  11: i64    i64_thing
+}
+
+struct Xtruct2
+{
+  1: byte   byte_thing,
+  2: Xtruct struct_thing,
+  3: i32    i32_thing
+}
+
+[Struct insanity]
+struct Insanity
+{
+
+  [This is doc for field 1]
+  1: map<Numberz, UserId> userMap,
+
+  [And this is doc for field 2]
+  2: list<Xtruct> xtructs 
+}
+
+exception Xception {
+  1: i32 errorCode,
+  2: string message
+}
+
+exception Xception2 {
+  1: i32 errorCode,
+  2: Xtruct struct_thing
+}
+ 
+struct EmptyStruct {}
+
+struct OneField {
+  1: EmptyStruct field
+}
+
+[This is where you would document a Service]
+service ThriftTest
+{
+
+  [And this is how you would document functions in a service]
+  void         testVoid(),
+  string       testString(1: string thing),
+  byte         testByte(1: byte thing),
+  i32          testI32(1: i32 thing),
+
+  [Like this one]
+  i64          testI64(1: i64 thing),
+  double       testDouble(1: double thing),
+  Xtruct       testStruct(1: Xtruct thing),
+  Xtruct2      testNest(1: Xtruct2 thing),
+  map<i32,i32> testMap(1: map<i32,i32> thing),
+  set<i32>     testSet(1: set<i32> thing),
+  list<i32>    testList(1: list<i32> thing),
+
+  [This is an example of a function with params documented]
+  Numberz      testEnum(
+
+    [This param is a thing]
+    1: Numberz thing
+
+  ),
+
+  UserId       testTypedef(1: UserId thing),
+
+  map<i32,map<i32,i32>> testMapMap(1: i32 hello),
+
+  /* So you think you've got this all worked, out eh? */
+  map<UserId, map<Numberz,Insanity>> testInsanity(1: Insanity argument),
+
+  /* Multiple parameters */
+  
+  Xtruct	testMulti(byte arg0, i32 arg1, i64 arg2, map<i16, string> arg3, Numberz arg4, UserId arg5),
+
+  /* Exception specifier */
+
+  void testException(string arg) throws(Xception err1),
+
+  /* Multiple exceptions specifier */
+
+  Xtruct testMultiException(string arg0, string arg1) throws(Xception err1, Xception2 err2)
+}
+
+service SecondService
+{
+  void blahBlah()
+}