THRIFT-3768 fix TThreadedServer refactoring issues with client lifetime guarantees
Client: C++
Patch: Jim King <jim.king@simplivity.com>

This closes #980
diff --git a/lib/cpp/test/TServerIntegrationTest.cpp b/lib/cpp/test/TServerIntegrationTest.cpp
index ce1cbd3..2180448 100644
--- a/lib/cpp/test/TServerIntegrationTest.cpp
+++ b/lib/cpp/test/TServerIntegrationTest.cpp
@@ -19,7 +19,9 @@
 
 #define BOOST_TEST_MODULE TServerIntegrationTest
 #include <boost/test/auto_unit_test.hpp>
+#include <boost/atomic.hpp>
 #include <boost/bind.hpp>
+#include <boost/date_time/posix_time/ptime.hpp>
 #include <boost/foreach.hpp>
 #include <boost/format.hpp>
 #include <boost/make_shared.hpp>
@@ -33,6 +35,7 @@
 #include <thrift/transport/TSocket.h>
 #include <thrift/transport/TTransport.h>
 #include "gen-cpp/ParentService.h"
+#include <string>
 #include <vector>
 
 using apache::thrift::concurrency::Guard;
@@ -152,7 +155,10 @@
                                   new TServerSocket("localhost", 0)),
                               boost::shared_ptr<TTransportFactory>(new TTransportFactory),
                               boost::shared_ptr<TProtocolFactory>(new TBinaryProtocolFactory))),
-      pEventHandler(boost::shared_ptr<TServerReadyEventHandler>(new TServerReadyEventHandler)) {
+      pEventHandler(boost::shared_ptr<TServerReadyEventHandler>(new TServerReadyEventHandler)),
+	  bStressDone(false),
+	  bStressConnectionCount(0),
+	  bStressRequestCount(0) {
     pServer->setServerEventHandler(pEventHandler);
   }
 
@@ -162,7 +168,10 @@
                           boost::shared_ptr<TServerTransport>(new TServerSocket("localhost", 0)),
                           boost::shared_ptr<TTransportFactory>(new TTransportFactory),
                           boost::shared_ptr<TProtocolFactory>(new TBinaryProtocolFactory))),
-      pEventHandler(boost::shared_ptr<TServerReadyEventHandler>(new TServerReadyEventHandler)) {
+      pEventHandler(boost::shared_ptr<TServerReadyEventHandler>(new TServerReadyEventHandler)),
+      bStressDone(false),
+	  bStressConnectionCount(0),
+	  bStressRequestCount(0) {
     pServer->setServerEventHandler(pEventHandler);
   }
 
@@ -175,7 +184,7 @@
       pEventHandler->wait();
     }
 
-    BOOST_TEST_MESSAGE("server is listening");
+    BOOST_TEST_MESSAGE("  server is listening");
   }
 
   void blockUntilAccepted(uint64_t numAccepted) {
@@ -184,34 +193,35 @@
       pEventHandler->wait();
     }
 
-    BOOST_TEST_MESSAGE(boost::format("server has accepted %1%") % numAccepted);
+    BOOST_TEST_MESSAGE(boost::format("  server has accepted %1%") % numAccepted);
   }
 
   void stopServer() {
     if (pServerThread) {
       pServer->stop();
-      BOOST_TEST_MESSAGE("server stop completed");
+      BOOST_TEST_MESSAGE("  server stop completed");
 
       pServerThread->join();
-      BOOST_TEST_MESSAGE("server thread joined");
+      BOOST_TEST_MESSAGE("  server thread joined");
       pServerThread.reset();
     }
   }
 
   ~TServerIntegrationTestFixture() { stopServer(); }
 
-  int getServerPort() {
-    TServerSocket* pSock = dynamic_cast<TServerSocket*>(pServer->getServerTransport().get());
-    return pSock->getPort();
-  }
+  /**
+   * Performs a baseline test where some clients are opened and issue a single operation
+   * and then disconnect at different intervals.
+   * \param[in]  numToMake  the number of concurrent clients
+   * \param[in]  expectedHWM  the high water mark we expect of concurrency
+   * \param[in]  purpose  a description of the test for logging purposes
+   */
+  void baseline(int64_t numToMake, int64_t expectedHWM, const std::string& purpose) {
+	BOOST_TEST_MESSAGE(boost::format("Testing %1%: %2% with %3% clients, expect %4% HWM")
+						% typeid(TServerType).name() % purpose % numToMake % expectedHWM);
 
-  void delayClose(boost::shared_ptr<TTransport> toClose, boost::posix_time::time_duration after) {
-    boost::this_thread::sleep(after);
-    toClose->close();
-  }
+	startServer();
 
-  void baseline(int64_t numToMake, int64_t expectedHWM) {
-    startServer();
     std::vector<boost::shared_ptr<TSocket> > holdSockets;
     std::vector<boost::shared_ptr<boost::thread> > holdThreads;
 
@@ -227,19 +237,91 @@
           new boost::thread(boost::bind(&TServerIntegrationTestFixture::delayClose,
                                         this,
                                         pClientSock,
-                                        milliseconds(100 * numToMake)))));
+                                        milliseconds(10 * numToMake)))));
     }
 
     BOOST_CHECK_EQUAL(expectedHWM, pServer->getConcurrentClientCountHWM());
-    stopServer();
+
     BOOST_FOREACH (boost::shared_ptr<boost::thread> pThread, holdThreads) { pThread->join(); }
     holdThreads.clear();
     holdSockets.clear();
+
+    stopServer();
+  }
+
+  /**
+   * Helper method used to close a connection after a delay.
+   * \param[in]  toClose  the connection to close
+   * \param[in]  after  the delay to impose
+   */
+  void delayClose(boost::shared_ptr<TTransport> toClose, boost::posix_time::time_duration after) {
+    boost::this_thread::sleep(after);
+    toClose->close();
+  }
+
+  /**
+   * \returns  the server port number
+   */
+  int getServerPort() {
+    TServerSocket* pSock = dynamic_cast<TServerSocket*>(pServer->getServerTransport().get());
+    return pSock->getPort();
+  }
+
+  /**
+   * Performs a stress test by spawning threads that connect, do a number of operations
+   * and disconnect, then a random delay, then do it over again.  This is done for a fixed
+   * period of time to test for concurrency correctness.
+   * \param[in]  numToMake  the number of concurrent clients
+   */
+  void stress(int64_t numToMake, const boost::posix_time::time_duration& duration) {
+    BOOST_TEST_MESSAGE(boost::format("Stress testing %1% with %2% clients for %3% seconds")
+        % typeid(TServerType).name() % numToMake % duration.total_seconds());
+
+    startServer();
+
+    std::vector<boost::shared_ptr<boost::thread> > holdThreads;
+    for (int64_t i = 0; i < numToMake; ++i) {
+      holdThreads.push_back(boost::shared_ptr<boost::thread>(
+        new boost::thread(boost::bind(&TServerIntegrationTestFixture::stressor, this))));
+    }
+
+    boost::this_thread::sleep(duration);
+    bStressDone = true;
+
+    BOOST_TEST_MESSAGE(boost::format("  serviced %1% connections (HWM %2%) totaling %3% requests")
+        % bStressConnectionCount % pServer->getConcurrentClientCountHWM() % bStressRequestCount);
+
+    BOOST_FOREACH (boost::shared_ptr<boost::thread> pThread, holdThreads) { pThread->join(); }
+    holdThreads.clear();
+
+    BOOST_CHECK(bStressRequestCount > 0);
+
+    stopServer();
+  }
+
+  /**
+   * Helper method to stress the system
+   */
+  void stressor() {
+	while (!bStressDone) {
+      boost::shared_ptr<TSocket> pSocket(new TSocket("localhost", getServerPort()), autoSocketCloser);
+      boost::shared_ptr<TProtocol> pProtocol(new TBinaryProtocol(pSocket));
+      ParentServiceClient client(pProtocol);
+      pSocket->open();
+      bStressConnectionCount.fetch_add(1, boost::memory_order_relaxed);
+      for (int i = 0; i < rand() % 1000; ++i) {
+    	client.incrementGeneration();
+        bStressRequestCount.fetch_add(1, boost::memory_order_relaxed);
+      }
+    }
   }
 
   boost::shared_ptr<TServerType> pServer;
   boost::shared_ptr<TServerReadyEventHandler> pEventHandler;
   boost::shared_ptr<boost::thread> pServerThread;
+  bool bStressDone;
+  boost::atomic_int64_t bStressConnectionCount;
+  boost::atomic_int64_t bStressRequestCount;
 };
 
 template <class TServerType>
@@ -264,26 +346,31 @@
 
 BOOST_FIXTURE_TEST_CASE(test_simple_factory,
                         TServerIntegrationProcessorFactoryTestFixture<TSimpleServer>) {
-  baseline(3, 1);
+  baseline(3, 1, "factory");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_simple, TServerIntegrationProcessorTestFixture<TSimpleServer>) {
-  baseline(3, 1);
+  baseline(3, 1, "processor");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threaded_factory,
                         TServerIntegrationProcessorFactoryTestFixture<TThreadedServer>) {
-  baseline(10, 10);
+  baseline(10, 10, "factory");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threaded, TServerIntegrationProcessorTestFixture<TThreadedServer>) {
-  baseline(10, 10);
+  baseline(10, 10, "processor");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threaded_bound,
                         TServerIntegrationProcessorTestFixture<TThreadedServer>) {
   pServer->setConcurrentClientLimit(4);
-  baseline(10, 4);
+  baseline(10, 4, "limit by server framework");
+}
+
+BOOST_FIXTURE_TEST_CASE(test_threaded_stress,
+                        TServerIntegrationProcessorFactoryTestFixture<TThreadedServer>) {
+  stress(10, boost::posix_time::seconds(3));
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threadpool_factory,
@@ -298,7 +385,7 @@
   // as accept() will be called to grab a 5th client socket, in this case
   // and then the thread factory will block adding the thread to manage
   // that client.
-  baseline(10, 5);
+  baseline(10, 5, "limit by thread manager");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threadpool,
@@ -313,7 +400,7 @@
   // as accept() will be called to grab a 5th client socket, in this case
   // and then the thread factory will block adding the thread to manage
   // that client.
-  baseline(10, 5);
+  baseline(10, 5, "limit by thread manager");
 }
 
 BOOST_FIXTURE_TEST_CASE(test_threadpool_bound,
@@ -324,7 +411,17 @@
   pServer->getThreadManager()->start();
   pServer->setConcurrentClientLimit(4);
 
-  baseline(10, 4);
+  baseline(10, 4, "server framework connection limit");
+}
+
+BOOST_FIXTURE_TEST_CASE(test_threadpool_stress,
+                        TServerIntegrationProcessorTestFixture<TThreadPoolServer>) {
+  pServer->getThreadManager()->threadFactory(
+      boost::shared_ptr<apache::thrift::concurrency::ThreadFactory>(
+          new apache::thrift::concurrency::PlatformThreadFactory));
+  pServer->getThreadManager()->start();
+
+  stress(10, boost::posix_time::seconds(3));
 }
 
 BOOST_AUTO_TEST_SUITE_END()
@@ -334,6 +431,7 @@
 
 BOOST_AUTO_TEST_CASE(test_stop_with_interruptable_clients_connected) {
   // This tests THRIFT-2441 new behavior: stopping the server disconnects clients
+  BOOST_TEST_MESSAGE("Testing stop with interruptable clients");
 
   startServer();
 
@@ -361,6 +459,7 @@
 BOOST_AUTO_TEST_CASE(test_stop_with_uninterruptable_clients_connected) {
   // This tests pre-THRIFT-2441 behavior: stopping the server blocks until clients
   // disconnect.
+	  BOOST_TEST_MESSAGE("Testing stop with uninterruptable clients");
 
   boost::dynamic_pointer_cast<TServerSocket>(pServer->getServerTransport())
       ->setInterruptableChildren(false); // returns to pre-THRIFT-2441 behavior
@@ -389,12 +488,14 @@
 
   // Once the clients disconnect the server will stop
   stopServer();
+  BOOST_CHECK(pServer->getConcurrentClientCountHWM() > 0);
   t1.join();
   t2.join();
 }
 
 BOOST_AUTO_TEST_CASE(test_concurrent_client_limit) {
   startServer();
+  BOOST_TEST_MESSAGE("Testing the concurrent client limit");
 
   BOOST_CHECK_EQUAL(INT64_MAX, pServer->getConcurrentClientLimit());
   pServer->setConcurrentClientLimit(2);
@@ -426,6 +527,7 @@
   BOOST_CHECK_EQUAL(2, pServer->getConcurrentClientCountHWM());
 
   stopServer();
+  BOOST_CHECK(pServer->getConcurrentClientCountHWM() > 0);
   t2.join();
 }