Thrift: Change docstring syntax.

Summary:
The old docstring syntax collided with the syntax for list constants.
The new syntax looks a lot like doxygent comments.

Trac Bug: #4664

Blame Rev: 32392

Reviewed By: mcslee

Test Plan:
../compiler/cpp/thrift -cpp DocTest.thrift
(with dump_docs on)
and looked at the output.
Generated C++ is identical to installed thrift.

Revert Plan: ok


git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@665182 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/compiler/cpp/src/main.cc b/compiler/cpp/src/main.cc
index a6f55f9..4d00306 100644
--- a/compiler/cpp/src/main.cc
+++ b/compiler/cpp/src/main.cc
@@ -295,6 +295,158 @@
 }
 
 /**
+ * Cleans up text commonly found in doxygen-like comments
+ *
+ * Warning: if you mix tabs and spaces in a non-uniform way,
+ * you will get what you deserve.
+ */
+char* clean_up_doctext(char* doctext) {
+  // Convert to C++ string, and remove Windows's carriage returns.
+  string docstring = doctext;
+  docstring.erase(
+      remove(docstring.begin(), docstring.end(), '\r'),
+      docstring.end());
+
+  // Separate into lines.
+  vector<string> lines;
+  string::size_type pos = string::npos;
+  string::size_type last;
+  while (true) {
+    last = (pos == string::npos) ? 0 : pos+1;
+    pos = docstring.find('\n', last);
+    if (pos == string::npos) {
+      // First bit of cleaning.  If the last line is only whitespace, drop it.
+      string::size_type nonwhite = docstring.find_first_not_of(" \t", last);
+      if (nonwhite != string::npos) {
+        lines.push_back(docstring.substr(last));
+      }
+      break;
+    }
+    lines.push_back(docstring.substr(last, pos-last));
+  }
+
+  // A very profound docstring.
+  if (lines.empty()) {
+    return NULL;
+  }
+
+  // Clear leading whitespace from the first line.
+  pos = lines.front().find_first_not_of(" \t");
+  lines.front().erase(0, pos);
+
+  // If every nonblank line after the first has the same number of spaces/tabs,
+  // then a star, remove them.
+  bool have_prefix = true;
+  bool found_prefix = false;
+  string::size_type prefix_len = 0;
+  vector<string>::iterator l_iter;
+  for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+    if (l_iter->empty()) {
+      continue;
+    }
+
+    pos = l_iter->find_first_not_of(" \t");
+    if (!found_prefix) {
+      if (pos != string::npos) {
+        if (l_iter->at(pos) == '*') {
+          found_prefix = true;
+          prefix_len = pos;
+        } else {
+          have_prefix = false;
+          break;
+        }
+      } else {
+        // Whitespace-only line.  Truncate it.
+        l_iter->clear();
+      }
+    } else if (l_iter->size() > pos
+        && l_iter->at(pos) == '*'
+        && pos == prefix_len) {
+      // Business as usual.
+    } else if (pos == string::npos) {
+      // Whitespace-only line.  Let's truncate it for them.
+      l_iter->clear();
+    } else {
+      // The pattern has been broken.
+      have_prefix = false;
+      break;
+    }
+  }
+
+  // If our prefix survived, delete it from every line.
+  if (have_prefix) {
+    // Get the star too.
+    prefix_len++;
+    for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+      l_iter->erase(0, prefix_len);
+    }
+  }
+
+  // Now delete the minimum amount of leading whitespace from each line.
+  prefix_len = string::npos;
+  for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+    if (l_iter->empty()) {
+      continue;
+    }
+    pos = l_iter->find_first_not_of(" \t");
+    if (pos != string::npos
+        && (prefix_len == string::npos || pos < prefix_len)) {
+      prefix_len = pos;
+    }
+  }
+
+  // If our prefix survived, delete it from every line.
+  if (prefix_len != string::npos) {
+    for (l_iter = lines.begin()+1; l_iter != lines.end(); ++l_iter) {
+      l_iter->erase(0, prefix_len);
+    }
+  }
+
+  // Remove trailing whitespace from every line.
+  for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
+    pos = l_iter->find_last_not_of(" \t");
+    if (pos != string::npos && pos != l_iter->length()-1) {
+      l_iter->erase(pos+1);
+    }
+  }
+
+  // If the first line is empty, remove it.
+  // Don't do this earlier because a lot of steps skip the first line.
+  if (lines.front().empty()) {
+    lines.erase(lines.begin());
+  }
+
+  // Now rejoin the lines and copy them back into doctext.
+  docstring.clear();
+  for (l_iter = lines.begin(); l_iter != lines.end(); ++l_iter) {
+    docstring += *l_iter;
+    docstring += '\n';
+  }
+
+  assert(docstring.length() <= strlen(doctext));
+  strcpy(doctext, docstring.c_str());
+  return doctext;
+}
+
+/** Set to true to debug docstring parsing */
+static bool dump_docs = false;
+
+/**
+ * Dumps docstrings to stdout
+ * Only works for typedefs
+ */
+void dump_docstrings(t_program* program) {
+  const vector<t_typedef*>& typedefs = program->get_typedefs();
+  vector<t_typedef*>::const_iterator t_iter;
+  for (t_iter = typedefs.begin(); t_iter != typedefs.end(); ++t_iter) {
+    t_typedef* td = *t_iter;
+    if (td->has_doc()) {
+      printf("%s:\n%s\n", td->get_name().c_str(), td->get_doc().c_str());
+    }
+  }
+}
+
+/**
  * Diplays the usage message and then exits with an error code.
  */
 void usage() {
@@ -596,6 +748,9 @@
       hs->generate_program();
       delete hs;
     }
+    if (dump_docs) {
+      dump_docstrings(program);
+    }
 
   } catch (string s) {
     printf("Error: %s\n", s.c_str());