revert unintentional partial commit of THRIFT-247

git-svn-id: https://svn.apache.org/repos/asf/incubator/thrift/trunk@980204 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/cpp/src/transport/THttpClient.cpp b/lib/cpp/src/transport/THttpClient.cpp
index 400597e..59f2339 100644
--- a/lib/cpp/src/transport/THttpClient.cpp
+++ b/lib/cpp/src/transport/THttpClient.cpp
@@ -20,38 +20,250 @@
 #include <cstdlib>
 #include <sstream>
 
-#include <transport/THttpClient.h>
-#include <transport/TSocket.h>
+#include "THttpClient.h"
+#include "TSocket.h"
 
 namespace apache { namespace thrift { namespace transport {
 
 using namespace std;
 
-THttpClient::THttpClient(boost::shared_ptr<TTransport> transport, std::string host, std::string path) :
-  THttpTransport(transport), host_(host), path_(path) {
+/**
+ * Http client implementation.
+ *
+ */
+
+// Yeah, yeah, hacky to put these here, I know.
+static const char* CRLF = "\r\n";
+static const int CRLF_LEN = 2;
+
+THttpClient::THttpClient(boost::shared_ptr<TTransport> transport, string host, string path) :
+  transport_(transport),
+  host_(host),
+  path_(path),
+  readHeaders_(true),
+  chunked_(false),
+  chunkedDone_(false),
+  chunkSize_(0),
+  contentLength_(0),
+  httpBuf_(NULL),
+  httpPos_(0),
+  httpBufLen_(0),
+  httpBufSize_(1024) {
+  init();
 }
 
 THttpClient::THttpClient(string host, int port, string path) :
-  THttpTransport(boost::shared_ptr<TTransport>(new TSocket(host, port))), host_(host), path_(path) {
+  host_(host),
+  path_(path),
+  readHeaders_(true),
+  chunked_(false),
+  chunkedDone_(false),
+  chunkSize_(0),
+  contentLength_(0),
+  httpBuf_(NULL),
+  httpPos_(0),
+  httpBufLen_(0),
+  httpBufSize_(1024) {
+  transport_ = boost::shared_ptr<TTransport>(new TSocket(host, port));
+  init();
 }
 
-THttpClient::~THttpClient() {}
-
-void THttpClient::parseHeader(char* header) {
-  char* colon = strchr(header, ':');
-  if (colon == NULL) {
-    return;
+void THttpClient::init() {
+  httpBuf_ = (char*)std::malloc(httpBufSize_+1);
+  if (httpBuf_ == NULL) {
+    throw TTransportException("Out of memory.");
   }
-  uint32_t sz = colon - header;
-  char* value = colon+1;
+  httpBuf_[httpBufLen_] = '\0';
+}
 
-  if (strncmp(header, "Transfer-Encoding", sz) == 0) {
-    if (strstr(value, "chunked") != NULL) {
-      chunked_ = true;
+THttpClient::~THttpClient() {
+  if (httpBuf_ != NULL) {
+    std::free(httpBuf_);
+  }
+}
+
+uint32_t THttpClient::read(uint8_t* buf, uint32_t len) {
+  if (readBuffer_.available_read() == 0) {
+    readBuffer_.resetBuffer();
+    uint32_t got = readMoreData();
+    if (got == 0) {
+      return 0;
     }
-  } else if (strncmp(header, "Content-Length", sz) == 0) {
-    chunked_ = false;
-    contentLength_ = atoi(value);
+  }
+  return readBuffer_.read(buf, len);
+}
+
+void THttpClient::readEnd() {
+  // Read any pending chunked data (footers etc.)
+  if (chunked_) {
+    while (!chunkedDone_) {
+      readChunked();
+    }
+  }
+}
+
+uint32_t THttpClient::readMoreData() {
+  // Get more data!
+  refill();
+
+  if (readHeaders_) {
+    readHeaders();
+  }
+
+  if (chunked_) {
+    return readChunked();
+  } else {
+    return readContent(contentLength_);
+  }
+}
+
+uint32_t THttpClient::readChunked() {
+  uint32_t length = 0;
+
+  char* line = readLine();
+  uint32_t chunkSize = parseChunkSize(line);
+  if (chunkSize == 0) {
+    readChunkedFooters();
+  } else {
+    // Read data content
+    length += readContent(chunkSize);
+    // Read trailing CRLF after content
+    readLine();
+  }
+  return length;
+}
+
+void THttpClient::readChunkedFooters() {
+  // End of data, read footer lines until a blank one appears
+  while (true) {
+    char* line = readLine();
+    if (strlen(line) == 0) {
+      chunkedDone_ = true;
+      break;
+    }
+  }
+}
+
+uint32_t THttpClient::parseChunkSize(char* line) {
+  char* semi = strchr(line, ';');
+  if (semi != NULL) {
+    *semi = '\0';
+  }
+  int size = 0;
+  sscanf(line, "%x", &size);
+  return (uint32_t)size;
+}
+
+uint32_t THttpClient::readContent(uint32_t size) {
+  uint32_t need = size;
+  while (need > 0) {
+    uint32_t avail = httpBufLen_ - httpPos_;
+    if (avail == 0) {
+      // We have given all the data, reset position to head of the buffer
+      httpPos_ = 0;
+      httpBufLen_ = 0;
+      refill();
+
+      // Now have available however much we read
+      avail = httpBufLen_;
+    }
+    uint32_t give = avail;
+    if (need < give) {
+      give = need;
+    }
+    readBuffer_.write((uint8_t*)(httpBuf_+httpPos_), give);
+    httpPos_ += give;
+    need -= give;
+  }
+  return size;
+}
+
+char* THttpClient::readLine() {
+  while (true) {
+    char* eol = NULL;
+
+    eol = strstr(httpBuf_+httpPos_, CRLF);
+
+    // No CRLF yet?
+    if (eol == NULL) {
+      // Shift whatever we have now to front and refill
+      shift();
+      refill();
+    } else {
+      // Return pointer to next line
+      *eol = '\0';
+      char* line = httpBuf_+httpPos_;
+      httpPos_ = (eol-httpBuf_) + CRLF_LEN;
+      return line;
+    }
+  }
+
+}
+
+void THttpClient::shift() {
+  if (httpBufLen_ > httpPos_) {
+    // Shift down remaining data and read more
+    uint32_t length = httpBufLen_ - httpPos_;
+    memmove(httpBuf_, httpBuf_+httpPos_, length);
+    httpBufLen_ = length;
+  } else {
+    httpBufLen_ = 0;
+  }
+  httpPos_ = 0;
+  httpBuf_[httpBufLen_] = '\0';
+}
+
+void THttpClient::refill() {
+  uint32_t avail = httpBufSize_ - httpBufLen_;
+  if (avail <= (httpBufSize_ / 4)) {
+    httpBufSize_ *= 2;
+    httpBuf_ = (char*)std::realloc(httpBuf_, httpBufSize_+1);
+    if (httpBuf_ == NULL) {
+      throw TTransportException("Out of memory.");
+    }
+  }
+
+  // Read more data
+  uint32_t got = transport_->read((uint8_t*)(httpBuf_+httpBufLen_), httpBufSize_-httpBufLen_);
+  httpBufLen_ += got;
+  httpBuf_[httpBufLen_] = '\0';
+
+  if (got == 0) {
+    throw TTransportException("Could not refill buffer");
+  }
+}
+
+void THttpClient::readHeaders() {
+  // Initialize headers state variables
+  contentLength_ = 0;
+  chunked_ = false;
+  chunkedDone_ = false;
+  chunkSize_ = 0;
+
+  // Control state flow
+  bool statusLine = true;
+  bool finished = false;
+
+  // Loop until headers are finished
+  while (true) {
+    char* line = readLine();
+
+    if (strlen(line) == 0) {
+      if (finished) {
+        readHeaders_ = false;
+        return;
+      } else {
+        // Must have been an HTTP 100, keep going for another status line
+        statusLine = true;
+      }
+    } else {
+      if (statusLine) {
+        statusLine = false;
+        finished = parseStatusLine(line);
+      } else {
+        parseHeader(line);
+      }
+    }
   }
 }
 
@@ -83,6 +295,28 @@
   }
 }
 
+void THttpClient::parseHeader(char* header) {
+  char* colon = strchr(header, ':');
+  if (colon == NULL) {
+    return;
+  }
+  uint32_t sz = colon - header;
+  char* value = colon+1;
+
+  if (strncmp(header, "Transfer-Encoding", sz) == 0) {
+    if (strstr(value, "chunked") != NULL) {
+      chunked_ = true;
+    }
+  } else if (strncmp(header, "Content-Length", sz) == 0) {
+    chunked_ = false;
+    contentLength_ = atoi(value);
+  }
+}
+
+void THttpClient::write(const uint8_t* buf, uint32_t len) {
+  writeBuffer_.write(buf, len);
+}
+
 void THttpClient::flush() {
   // Fetch the contents of the write buffer
   uint8_t* buf;
@@ -97,7 +331,7 @@
     "Content-Type: application/x-thrift" << CRLF <<
     "Content-Length: " << len << CRLF <<
     "Accept: application/x-thrift" << CRLF <<
-    "User-Agent: Thrift/" << VERSION << " (C++/THttpClient)" << CRLF <<
+    "User-Agent: C++/THttpClient" << CRLF <<
     CRLF;
   string header = h.str();
 
diff --git a/lib/cpp/src/transport/THttpClient.h b/lib/cpp/src/transport/THttpClient.h
index 142063d..f4be4c1 100644
--- a/lib/cpp/src/transport/THttpClient.h
+++ b/lib/cpp/src/transport/THttpClient.h
@@ -20,11 +20,19 @@
 #ifndef _THRIFT_TRANSPORT_THTTPCLIENT_H_
 #define _THRIFT_TRANSPORT_THTTPCLIENT_H_ 1
 
-#include <transport/THttpTransport.h>
+#include <transport/TBufferTransports.h>
 
 namespace apache { namespace thrift { namespace transport {
 
-class THttpClient : public THttpTransport {
+/**
+ * HTTP client implementation of the thrift transport. This was irritating
+ * to write, but the alternatives in C++ land are daunting. Linking CURL
+ * requires 23 dynamic libraries last time I checked (WTF?!?). All we have
+ * here is a VERY basic HTTP/1.1 client which supports HTTP 100 Continue,
+ * chunked transfer encoding, keepalive, etc. Tested against Apache.
+ *
+ */
+class THttpClient : public TTransport {
  public:
   THttpClient(boost::shared_ptr<TTransport> transport, std::string host, std::string path="");
 
@@ -32,15 +40,69 @@
 
   virtual ~THttpClient();
 
-  virtual void flush();
+  void open() {
+    transport_->open();
+  }
+
+  bool isOpen() {
+    return transport_->isOpen();
+  }
+
+  bool peek() {
+    return transport_->peek();
+  }
+
+  void close() {
+    transport_->close();
+  }
+
+  uint32_t read(uint8_t* buf, uint32_t len);
+
+  void readEnd();
+
+  void write(const uint8_t* buf, uint32_t len);
+
+  void flush();
+
+ private:
+  void init();
 
  protected:
 
+  boost::shared_ptr<TTransport> transport_;
+
+  TMemoryBuffer writeBuffer_;
+  TMemoryBuffer readBuffer_;
+
   std::string host_;
   std::string path_;
 
-  virtual void parseHeader(char* header);
-  virtual bool parseStatusLine(char* status);
+  bool readHeaders_;
+  bool chunked_;
+  bool chunkedDone_;
+  uint32_t chunkSize_;
+  uint32_t contentLength_;
+
+  char* httpBuf_;
+  uint32_t httpPos_;
+  uint32_t httpBufLen_;
+  uint32_t httpBufSize_;
+
+  uint32_t readMoreData();
+  char* readLine();
+
+  void readHeaders();
+  void parseHeader(char* header);
+  bool parseStatusLine(char* status);
+
+  uint32_t readChunked();
+  void readChunkedFooters();
+  uint32_t parseChunkSize(char* line);
+
+  uint32_t readContent(uint32_t size);
+
+  void refill();
+  void shift();
 
 };