Thrift: Change docstring syntax.

Summary:
The old docstring syntax collided with the syntax for list constants.
The new syntax looks a lot like doxygent comments.

Trac Bug: #4664

Blame Rev: 32392

Reviewed By: mcslee

Test Plan:
../compiler/cpp/thrift -cpp DocTest.thrift
(with dump_docs on)
and looked at the output.
Generated C++ is identical to installed thrift.

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665182 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index a6f55f9..4d00306 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -295,6 +295,158 @@
 }
 
 /**
+ * Cleans up text commonly found in doxygen-like comments
+ *
+ * Warning: if you mix tabs and spaces in a non-uniform way,
+ * you will get what you deserve.
+ */
+char* clean_up_doctext(char* doctext) {
+  // Convert to C++ string, and remove Windows's carriage returns.
+  string docstring = doctext;
+  docstring.erase(
+      remove(docstring.begin(), docstring.end(), '\r'),
+      docstring.end());
+
+  // Separate into lines.
+  vector<string> lines;
+  string::size_type pos = string::npos;
+  string::size_type last;
+  while (true) {
+    last = (pos == string::npos) ? 0 : pos+1;
+    pos = docstring.find('\n', last);
+    if (pos == string::npos) {
+      // First bit of cleaning.  If the last line is only whitespace, drop it.
+      string::size_type nonwhite = docstring.find_first_not_of(" \t", last);
+      if (nonwhite != string::npos) {
+        lines.push_back(docstring.substr(last));
+      }
+      break;
+    }
+    lines.push_back(docstring.substr(last, pos-last));
+  }
+
+  // A very profound docstring.
+  if (lines.empty()) {
+    return NULL;
+  }
+
+  // Clear leading whitespace from the first line.
+  pos = lines.front().find_first_not_of(" \t");
+  lines.front().erase(0, pos);
+
+  // If every nonblank line after the first has the same number of spaces/tabs,
+  // then a star, remove them.
+  bool have_prefix = true;
+  bool found_prefix = false;
+  string::size_type prefix_len = 0;
+  vector<string>::iterator l_iter;
+  for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+    if (l_iter->empty()) {
+      continue;
+    }
+
+    pos = l_iter->find_first_not_of(" \t");
+    if (!found_prefix) {
+      if (pos != string::npos) {
+        if (l_iter->at(pos) == '*') {
+          found_prefix = true;
+          prefix_len = pos;
+        } else {
+          have_prefix = false;
+          break;
+        }
+      } else {
+        // Whitespace-only line.  Truncate it.
+        l_iter->clear();
+      }
+    } else if (l_iter->size() > pos
+        && l_iter->at(pos) == '*'
+        && pos == prefix_len) {
+      // Business as usual.
+    } else if (pos == string::npos) {
+      // Whitespace-only line.  Let's truncate it for them.
+      l_iter->clear();
+    } else {
+      // The pattern has been broken.
+      have_prefix = false;
+      break;
+    }
+  }
+
+  // If our prefix survived, delete it from every line.
+  if (have_prefix) {
+    // Get the star too.
+    prefix_len++;
+    for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+      l_iter->erase(0, prefix_len);
+    }
+  }
+
+  // Now delete the minimum amount of leading whitespace from each line.
+  prefix_len = string::npos;
+  for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+    if (l_iter->empty()) {
+      continue;
+    }
+    pos = l_iter->find_first_not_of(" \t");
+    if (pos != string::npos
+        && (prefix_len == string::npos || pos < prefix_len)) {
+      prefix_len = pos;
+    }
+  }
+
+  // If our prefix survived, delete it from every line.
+  if (prefix_len != string::npos) {
+    for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+      l_iter->erase(0, prefix_len);
+    }
+  }
+
+  // Remove trailing whitespace from every line.
+  for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
+    pos = l_iter->find_last_not_of(" \t");
+    if (pos != string::npos && pos != l_iter->length()-1) {
+      l_iter->erase(pos+1);
+    }
+  }
+
+  // If the first line is empty, remove it.
+  // Don't do this earlier because a lot of steps skip the first line.
+  if (lines.front().empty()) {
+    lines.erase(lines.begin());
+  }
+
+  // Now rejoin the lines and copy them back into doctext.
+  docstring.clear();
+  for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
+    docstring += *l_iter;
+    docstring += '\n';
+  }
+
+  assert(docstring.length() <= strlen(doctext));
+  strcpy(doctext, docstring.c_str());
+  return doctext;
+}
+
+/** Set to true to debug docstring parsing */
+static bool dump_docs = false;
+
+/**
+ * Dumps docstrings to stdout
+ * Only works for typedefs
+ */
+void dump_docstrings(t_program* program) {
+  const vector<t_typedef*>& typedefs = program->get_typedefs();
+  vector<t_typedef*>::const_iterator t_iter;
+  for (t_iter = typedefs.begin(); t_iter != typedefs.end(); ++t_iter) {
+    t_typedef* td = *t_iter;
+    if (td->has_doc()) {
+      printf("%s:\n%s\n", td->get_name().c_str(), td->get_doc().c_str());
+    }
+  }
+}
+
+/**
  * Diplays the usage message and then exits with an error code.
  */
 void usage() {
@@ -596,6 +748,9 @@
       hs->generate_program();
       delete hs;
     }
+    if (dump_docs) {
+      dump_docstrings(program);
+    }
 
   } catch (string s) {
     printf("Error: %s\n", s.c_str());
diff --git a/compiler/cpp/src/main.h b/compiler/cpp/src/main.h
index b8d7971..a60399b 100644
--- a/compiler/cpp/src/main.h
+++ b/compiler/cpp/src/main.h
@@ -65,6 +65,11 @@
 std::string include_file(std::string filename);
 
 /**
+ * Cleans up text commonly found in doxygen-like comments
+ */
+char* clean_up_doctext(char* doctext);
+
+/**
  * Flex utilities
  */
 
diff --git a/compiler/cpp/src/thriftl.ll b/compiler/cpp/src/thriftl.ll
index 803f786..5e9fca9 100644
--- a/compiler/cpp/src/thriftl.ll
+++ b/compiler/cpp/src/thriftl.ll
@@ -45,10 +45,11 @@
 dubconstant  ([+-]?[0-9]*(\.[0-9]+)?([eE][+-]?[0-9]+)?)
 identifier   ([a-zA-Z_][\.a-zA-Z_0-9]*)
 whitespace   ([ \t\r\n]*)
-multicomm    ("/*""/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
+sillycomm    ("/*""*"*"*/")
+multicomm    ("/*"[^*]"/"*([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
+doctext      ("/**"([^*/]|[^*]"/"|"*"[^/])*"*"*"*/")
 comment      ("//"[^\n]*)
 unixcomment  ("#"[^\n]*)
-doctext      ("["(("["[^\]\[]*"]")|[^\]\[])*"]") /* allows one level of nesting */
 symbol       ([:;\,\{\}\(\)\=<>\[\]])
 dliteral     ("\""[^"]*"\"")
 sliteral     ("'"[^']*"'")
@@ -57,6 +58,7 @@
 %%
 
 {whitespace}  { /* do nothing */ }
+{sillycomm}   { /* do nothing */ }
 {multicomm}   { /* do nothing */ }
 {comment}     { /* do nothing */ }
 {unixcomment} { /* do nothing */ }
@@ -200,8 +202,8 @@
 }
 
 {doctext} {
- yylval.id = strdup(yytext + 1);
- yylval.id[strlen(yylval.id) - 1] = '\0';
+ yylval.id = strdup(yytext + 3);
+ yylval.id[strlen(yylval.id) - 2] = '\0';
  return tok_doctext;
 }
 
diff --git a/compiler/cpp/src/thrifty.yy b/compiler/cpp/src/thrifty.yy
index ce11bf8..68384be 100644
--- a/compiler/cpp/src/thrifty.yy
+++ b/compiler/cpp/src/thrifty.yy
@@ -361,7 +361,7 @@
   tok_doctext
     {
       pdebug("DocTextOptional -> tok_doctext");
-      $$ = $1;
+      $$ = clean_up_doctext($1);
     }
 |
     {
diff --git a/test/DocTest.thrift b/test/DocTest.thrift
index 682edff..c06bd21 100755
--- a/test/DocTest.thrift
+++ b/test/DocTest.thrift
@@ -6,14 +6,14 @@
 
 # the new unix comment
 
-[Some doc text goes here.  Wow I am [nesting these].]
+/** Some doc text goes here.  Wow I am [nesting these] (no more nesting.) */
 enum Numberz
 {
 
-  [This is how to document a parameter]
+  /** This is how to document a parameter */
   ONE = 1,
 
-  [And this is a doc for a parameter that has no specific value assigned]
+  /** And this is a doc for a parameter that has no specific value assigned */
   TWO,
 
   THREE,
@@ -22,17 +22,17 @@
   EIGHT = 8
 }
 
-[This is how you would do a typedef doc]
+/** This is how you would do a typedef doc */
 typedef i64 UserId 
 
-[And this is where you would document a struct]
+/** And this is where you would document a struct */
 struct Xtruct
 {
 
-  [And the members of a struct]
+  /** And the members of a struct */
   1:  string string_thing
 
-  [doct text goes before a comma]
+  /** doct text goes before a comma */
   4:  byte   byte_thing,
 
   9:  i32    i32_thing,
@@ -46,14 +46,14 @@
   3: i32    i32_thing
 }
 
-[Struct insanity]
+/** Struct insanity */
 struct Insanity
 {
 
-  [This is doc for field 1]
+  /** This is doc for field 1 */
   1: map<Numberz, UserId> userMap,
 
-  [And this is doc for field 2]
+  /** And this is doc for field 2 */
   2: list<Xtruct> xtructs 
 }
 
@@ -73,17 +73,17 @@
   1: EmptyStruct field
 }
 
-[This is where you would document a Service]
+/** This is where you would document a Service */
 service ThriftTest
 {
 
-  [And this is how you would document functions in a service]
+  /** And this is how you would document functions in a service */
   void         testVoid(),
   string       testString(1: string thing),
   byte         testByte(1: byte thing),
   i32          testI32(1: i32 thing),
 
-  [Like this one]
+  /** Like this one */
   i64          testI64(1: i64 thing),
   double       testDouble(1: double thing),
   Xtruct       testStruct(1: Xtruct thing),
@@ -92,10 +92,10 @@
   set<i32>     testSet(1: set<i32> thing),
   list<i32>    testList(1: list<i32> thing),
 
-  [This is an example of a function with params documented]
+  /** This is an example of a function with params documented */
   Numberz      testEnum(
 
-    [This param is a thing]
+    /** This param is a thing */
     1: Numberz thing
 
   ),
@@ -120,7 +120,94 @@
   Xtruct testMultiException(string arg0, string arg1) throws(Xception err1, Xception2 err2)
 }
 
-service SecondService
-{
-  void blahBlah()
-}
+/// This style of Doxy-comment doesn't work.
+typedef i32 SorryNoGo
+
+/**
+ * This is a trivial example of a multiline docstring.
+ */
+typedef i32 TrivialMultiLine
+
+/**
+ * This is the cannonical example
+ * of a multiline docstring.
+ */
+typedef i32 StandardMultiLine
+
+/**
+ * The last line is non-blank.
+ * I said non-blank! */
+typedef i32 LastLine
+
+/** Both the first line
+ * are non blank. ;-)
+ * and the last line */
+typedef i32 FirstAndLastLine
+
+/**
+ *    INDENTED TITLE
+ * The text is less indented.
+ */
+typedef i32 IndentedTitle
+
+/**       First line indented.
+ * Unfortunately, this does not get indented.
+ */
+typedef i32 FirstLineIndent
+
+
+/**
+ * void code_in_comment() {
+ *   printf("hooray code!");
+ * }
+ */
+typedef i32 CodeInComment
+
+    /**
+     * Indented Docstring.
+     * This whole docstring is indented.
+     *   This line is indented further.
+     */
+typedef i32 IndentedDocstring
+
+/** Irregular docstring.
+ * We will have to punt
+  * on this thing */
+typedef i32 Irregular1
+
+/**
+ * note the space
+ * before these lines
+* but not this
+ * one
+ */
+typedef i32 Irregular2
+
+/**
+* Flush against
+* the left.
+*/
+typedef i32 Flush
+
+/**
+  No stars in this one.
+  It should still work fine, though.
+    Including indenting.
+    */
+typedef i32 NoStars
+
+/** Trailing whitespace   
+Sloppy trailing whitespace   
+is truncated.   */
+typedef i32 TrailingWhitespace
+
+/**
+ * This is a big one.
+ *
+ * We'll have some blank lines in it.
+ * 
+ * void as_well_as(some code) {
+ *   puts("YEEHAW!");
+ * }
+ */
+typedef i32 BigDog