THRIFT-4106: fix errors concurrency_test was identifying
Client: C++
This closes #1343
diff --git a/lib/cpp/src/thrift/concurrency/BoostThreadFactory.cpp b/lib/cpp/src/thrift/concurrency/BoostThreadFactory.cpp
index 3661275..d7d8d54 100644
--- a/lib/cpp/src/thrift/concurrency/BoostThreadFactory.cpp
+++ b/lib/cpp/src/thrift/concurrency/BoostThreadFactory.cpp
@@ -51,6 +51,7 @@
private:
scoped_ptr<boost::thread> thread_;
+ Monitor monitor_;
STATE state_;
weak_ptr<BoostThread> self_;
bool detached_;
@@ -71,25 +72,46 @@
}
}
- void start() {
- if (state_ != uninitialized) {
- return;
- }
+ STATE getState() const
+ {
+ Synchronized sync(monitor_);
+ return state_;
+ }
+ void setState(STATE newState)
+ {
+ Synchronized sync(monitor_);
+ state_ = newState;
+
+ // unblock start() with the knowledge that the thread has actually
+ // started running, which avoids a race in detached threads.
+ if (newState == started) {
+ monitor_.notify();
+ }
+ }
+
+ void start() {
// Create reference
shared_ptr<BoostThread>* selfRef = new shared_ptr<BoostThread>();
*selfRef = self_.lock();
- state_ = starting;
+ setState(starting);
+ Synchronized sync(monitor_);
+
thread_.reset(new boost::thread(bind(threadMain, (void*)selfRef)));
if (detached_)
thread_->detach();
+
+ // Wait for the thread to start and get far enough to grab everything
+ // that it needs from the calling context, thus absolving the caller
+ // from being required to hold on to runnable indefinitely.
+ monitor_.wait();
}
void join() {
- if (!detached_ && state_ != uninitialized) {
+ if (!detached_ && getState() != uninitialized) {
thread_->join();
}
}
@@ -110,19 +132,11 @@
shared_ptr<BoostThread> thread = *(shared_ptr<BoostThread>*)arg;
delete reinterpret_cast<shared_ptr<BoostThread>*>(arg);
- if (!thread) {
- return (void*)0;
- }
-
- if (thread->state_ != starting) {
- return (void*)0;
- }
-
- thread->state_ = started;
+ thread->setState(started);
thread->runnable()->run();
- if (thread->state_ != stopping && thread->state_ != stopped) {
- thread->state_ = stopping;
+ if (thread->getState() != stopping && thread->getState() != stopped) {
+ thread->setState(stopping);
}
return (void*)0;
}
diff --git a/lib/cpp/src/thrift/concurrency/BoostThreadFactory.h b/lib/cpp/src/thrift/concurrency/BoostThreadFactory.h
index 4c7a45a..bf11a70 100644
--- a/lib/cpp/src/thrift/concurrency/BoostThreadFactory.h
+++ b/lib/cpp/src/thrift/concurrency/BoostThreadFactory.h
@@ -20,8 +20,8 @@
#ifndef _THRIFT_CONCURRENCY_BOOSTTHREADFACTORY_H_
#define _THRIFT_CONCURRENCY_BOOSTTHREADFACTORY_H_ 1
+#include <thrift/concurrency/Monitor.h>
#include <thrift/concurrency/Thread.h>
-
#include <thrift/stdcxx.h>
namespace apache {
diff --git a/lib/cpp/src/thrift/concurrency/PosixThreadFactory.cpp b/lib/cpp/src/thrift/concurrency/PosixThreadFactory.cpp
index d829d69..2e35446 100644
--- a/lib/cpp/src/thrift/concurrency/PosixThreadFactory.cpp
+++ b/lib/cpp/src/thrift/concurrency/PosixThreadFactory.cpp
@@ -20,7 +20,7 @@
#include <thrift/thrift-config.h>
#include <thrift/concurrency/Exception.h>
-#include <thrift/concurrency/Mutex.h>
+#include <thrift/concurrency/Monitor.h>
#include <thrift/concurrency/PosixThreadFactory.h>
#if GOOGLE_PERFTOOLS_REGISTER_THREAD
@@ -53,8 +53,8 @@
private:
pthread_t pthread_;
- Mutex state_mutex_;
- STATE state_;
+ Monitor monitor_; // guard to protect state_ and also notification
+ STATE state_; // to protect proper thread start behavior
int policy_;
int priority_;
int stackSize_;
@@ -96,14 +96,20 @@
STATE getState() const
{
- Guard g(state_mutex_);
+ Synchronized sync(monitor_);
return state_;
}
void setState(STATE newState)
{
- Guard g(state_mutex_);
+ Synchronized sync(monitor_);
state_ = newState;
+
+ // unblock start() with the knowledge that the thread has actually
+ // started running, which avoids a race in detached threads.
+ if (newState == started) {
+ monitor_.notify();
+ }
}
void start() {
@@ -154,9 +160,18 @@
setState(starting);
+ Synchronized sync(monitor_);
+
if (pthread_create(&pthread_, &thread_attr, threadMain, (void*)selfRef) != 0) {
throw SystemResourceException("pthread_create failed");
}
+
+ // The caller may not choose to guarantee the scope of the Runnable
+ // being used in the thread, so we must actually wait until the thread
+ // starts before we return. If we do not wait, it would be possible
+ // for the caller to start destructing the Runnable and the Thread,
+ // and we would end up in a race. This was identified with valgrind.
+ monitor_.wait();
}
void join() {
@@ -174,8 +189,6 @@
if (res != 0) {
GlobalOutput.printf("PthreadThread::join(): fail with code %d", res);
}
- } else {
- GlobalOutput.printf("PthreadThread::join(): detached thread");
}
}
@@ -202,14 +215,6 @@
stdcxx::shared_ptr<PthreadThread> thread = *(stdcxx::shared_ptr<PthreadThread>*)arg;
delete reinterpret_cast<stdcxx::shared_ptr<PthreadThread>*>(arg);
- if (thread == NULL) {
- return (void*)0;
- }
-
- if (thread->getState() != starting) {
- return (void*)0;
- }
-
#if GOOGLE_PERFTOOLS_REGISTER_THREAD
ProfilerRegisterThread();
#endif
diff --git a/lib/cpp/src/thrift/concurrency/StdThreadFactory.cpp b/lib/cpp/src/thrift/concurrency/StdThreadFactory.cpp
index 4067f24..da0c5e3 100644
--- a/lib/cpp/src/thrift/concurrency/StdThreadFactory.cpp
+++ b/lib/cpp/src/thrift/concurrency/StdThreadFactory.cpp
@@ -21,8 +21,9 @@
#if USE_STD_THREAD
-#include <thrift/concurrency/StdThreadFactory.h>
#include <thrift/concurrency/Exception.h>
+#include <thrift/concurrency/Monitor.h>
+#include <thrift/concurrency/StdThreadFactory.h>
#include <thrift/stdcxx.h>
#include <cassert>
@@ -49,6 +50,7 @@
private:
std::unique_ptr<std::thread> thread_;
+ Monitor monitor_;
STATE state_;
bool detached_;
@@ -68,18 +70,42 @@
}
}
+ STATE getState() const
+ {
+ Synchronized sync(monitor_);
+ return state_;
+ }
+
+ void setState(STATE newState)
+ {
+ Synchronized sync(monitor_);
+ state_ = newState;
+
+ // unblock start() with the knowledge that the thread has actually
+ // started running, which avoids a race in detached threads.
+ if (newState == started) {
+ monitor_.notify();
+ }
+ }
+
void start() {
- if (state_ != uninitialized) {
+ if (getState() != uninitialized) {
return;
}
stdcxx::shared_ptr<StdThread> selfRef = shared_from_this();
- state_ = starting;
+ setState(starting);
+ Synchronized sync(monitor_);
thread_ = std::unique_ptr<std::thread>(new std::thread(threadMain, selfRef));
if (detached_)
thread_->detach();
+
+ // Wait for the thread to start and get far enough to grab everything
+ // that it needs from the calling context, thus absolving the caller
+ // from being required to hold on to runnable indefinitely.
+ monitor_.wait();
}
void join() {
@@ -96,22 +122,16 @@
};
void StdThread::threadMain(stdcxx::shared_ptr<StdThread> thread) {
- if (thread == NULL) {
- return;
- }
+#if GOOGLE_PERFTOOLS_REGISTER_THREAD
+ ProfilerRegisterThread();
+#endif
- if (thread->state_ != starting) {
- return;
- }
-
- thread->state_ = started;
+ thread->setState(started);
thread->runnable()->run();
- if (thread->state_ != stopping && thread->state_ != stopped) {
- thread->state_ = stopping;
+ if (thread->getState() != stopping && thread->getState() != stopped) {
+ thread->setState(stopping);
}
-
- return;
}
StdThreadFactory::StdThreadFactory(bool detached) : ThreadFactory(detached) {
diff --git a/lib/cpp/src/thrift/transport/TServerSocket.cpp b/lib/cpp/src/thrift/transport/TServerSocket.cpp
index a704b06..3179b1a 100644
--- a/lib/cpp/src/thrift/transport/TServerSocket.cpp
+++ b/lib/cpp/src/thrift/transport/TServerSocket.cpp
@@ -658,14 +658,21 @@
}
void TServerSocket::interrupt() {
- notify(interruptSockWriter_);
+ concurrency::Guard g(rwMutex_);
+ if (interruptSockWriter_ != THRIFT_INVALID_SOCKET) {
+ notify(interruptSockWriter_);
+ }
}
void TServerSocket::interruptChildren() {
- notify(childInterruptSockWriter_);
+ concurrency::Guard g(rwMutex_);
+ if (childInterruptSockWriter_ != THRIFT_INVALID_SOCKET) {
+ notify(childInterruptSockWriter_);
+ }
}
void TServerSocket::close() {
+ concurrency::Guard g(rwMutex_);
if (serverSocket_ != THRIFT_INVALID_SOCKET) {
shutdown(serverSocket_, THRIFT_SHUT_RDWR);
::THRIFT_CLOSESOCKET(serverSocket_);
diff --git a/lib/cpp/src/thrift/transport/TServerSocket.h b/lib/cpp/src/thrift/transport/TServerSocket.h
index cb11dc4..1daaa82 100644
--- a/lib/cpp/src/thrift/transport/TServerSocket.h
+++ b/lib/cpp/src/thrift/transport/TServerSocket.h
@@ -20,6 +20,7 @@
#ifndef _THRIFT_TRANSPORT_TSERVERSOCKET_H_
#define _THRIFT_TRANSPORT_TSERVERSOCKET_H_ 1
+#include <thrift/concurrency/Mutex.h>
#include <thrift/stdcxx.h>
#include <thrift/transport/PlatformSocket.h>
#include <thrift/transport/TServerTransport.h>
@@ -169,6 +170,7 @@
bool keepAlive_;
bool listening_;
+ concurrency::Mutex rwMutex_; // thread-safe interrupt
THRIFT_SOCKET interruptSockWriter_; // is notified on interrupt()
THRIFT_SOCKET interruptSockReader_; // is used in select/poll with serverSocket_ for interruptability
THRIFT_SOCKET childInterruptSockWriter_; // is notified on interruptChildren()
diff --git a/lib/cpp/test/concurrency/Tests.cpp b/lib/cpp/test/concurrency/Tests.cpp
index 33af392..f49bb9f 100644
--- a/lib/cpp/test/concurrency/Tests.cpp
+++ b/lib/cpp/test/concurrency/Tests.cpp
@@ -25,6 +25,10 @@
#include "TimerManagerTests.h"
#include "ThreadManagerTests.h"
+// The test weight, where 10 is 10 times more threads than baseline
+// and the baseline is optimized for running in valgrind
+static size_t WEIGHT = 10;
+
int main(int argc, char** argv) {
std::string arg;
@@ -37,6 +41,11 @@
args[ix - 1] = std::string(argv[ix]);
}
+ if (getenv("VALGRIND") != 0) {
+ // lower the scale of every test
+ WEIGHT = 1;
+ }
+
bool runAll = args[0].compare("all") == 0;
if (runAll || args[0].compare("thread-factory") == 0) {
@@ -45,10 +54,10 @@
std::cout << "ThreadFactory tests..." << std::endl;
- int reapLoops = 20;
- int reapCount = 1000;
+ int reapLoops = 2 * WEIGHT;
+ int reapCount = 100 * WEIGHT;
size_t floodLoops = 3;
- size_t floodCount = 20000;
+ size_t floodCount = 500 * WEIGHT;
std::cout << "\t\tThreadFactory reap N threads test: N = " << reapLoops << "x" << reapCount << std::endl;
@@ -121,8 +130,8 @@
std::cout << "ThreadManager tests..." << std::endl;
{
- size_t workerCount = 100;
- size_t taskCount = 50000;
+ size_t workerCount = 10 * WEIGHT;
+ size_t taskCount = 500 * WEIGHT;
int64_t delay = 10LL;
ThreadManagerTests threadManagerTests;
@@ -160,13 +169,13 @@
size_t minWorkerCount = 2;
- size_t maxWorkerCount = 64;
+ size_t maxWorkerCount = 8;
- size_t tasksPerWorker = 1000;
+ size_t tasksPerWorker = 100 * WEIGHT;
int64_t delay = 5LL;
- for (size_t workerCount = minWorkerCount; workerCount < maxWorkerCount; workerCount *= 4) {
+ for (size_t workerCount = minWorkerCount; workerCount <= maxWorkerCount; workerCount *= 4) {
size_t taskCount = workerCount * tasksPerWorker;
diff --git a/lib/cpp/test/concurrency/ThreadFactoryTests.h b/lib/cpp/test/concurrency/ThreadFactoryTests.h
index bd6ed32..6ac9aa5 100644
--- a/lib/cpp/test/concurrency/ThreadFactoryTests.h
+++ b/lib/cpp/test/concurrency/ThreadFactoryTests.h
@@ -21,11 +21,12 @@
#include <thrift/concurrency/Thread.h>
#include <thrift/concurrency/PlatformThreadFactory.h>
#include <thrift/concurrency/Monitor.h>
+#include <thrift/concurrency/Mutex.h>
#include <thrift/concurrency/Util.h>
#include <assert.h>
#include <iostream>
-#include <set>
+#include <vector>
namespace apache {
namespace thrift {
@@ -78,13 +79,13 @@
int* activeCount = new int(count);
- std::set<shared_ptr<Thread> > threads;
+ std::vector<shared_ptr<Thread> > threads;
int tix;
for (tix = 0; tix < count; tix++) {
try {
- threads.insert(
+ threads.push_back(
threadFactory.newThread(shared_ptr<Runnable>(new ReapNTask(*monitor, *activeCount))));
} catch (SystemResourceException& e) {
std::cout << "\t\t\tfailed to create " << lix* count + tix << " thread " << e.what()
@@ -94,7 +95,7 @@
}
tix = 0;
- for (std::set<shared_ptr<Thread> >::const_iterator thread = threads.begin();
+ for (std::vector<shared_ptr<Thread> >::const_iterator thread = threads.begin();
thread != threads.end();
tix++, ++thread) {
@@ -113,6 +114,7 @@
monitor->wait(1000);
}
}
+
delete activeCount;
std::cout << "\t\t\treaped " << lix* count << " threads" << std::endl;
}
@@ -253,19 +255,22 @@
class FloodTask : public Runnable {
public:
- FloodTask(const size_t id) : _id(id) {}
+ FloodTask(const size_t id, Monitor& mon) : _id(id), _mon(mon) {}
~FloodTask() {
if (_id % 10000 == 0) {
+ Synchronized sync(_mon);
std::cout << "\t\tthread " << _id << " done" << std::endl;
}
}
void run() {
if (_id % 10000 == 0) {
+ Synchronized sync(_mon);
std::cout << "\t\tthread " << _id << " started" << std::endl;
}
}
const size_t _id;
+ Monitor& _mon;
};
void foo(PlatformThreadFactory* tf) { (void)tf; }
@@ -273,7 +278,8 @@
bool floodNTest(size_t loop = 1, size_t count = 100000) {
bool success = false;
-
+ Monitor mon;
+
for (size_t lix = 0; lix < loop; lix++) {
PlatformThreadFactory threadFactory = PlatformThreadFactory();
@@ -283,10 +289,8 @@
try {
- shared_ptr<FloodTask> task(new FloodTask(lix * count + tix));
-
+ shared_ptr<FloodTask> task(new FloodTask(lix * count + tix, mon));
shared_ptr<Thread> thread = threadFactory.newThread(task);
-
thread->start();
} catch (TException& e) {
@@ -298,8 +302,8 @@
}
}
+ Synchronized sync(mon);
std::cout << "\t\t\tflooded " << (lix + 1) * count << " threads" << std::endl;
-
success = true;
}
diff --git a/lib/cpp/test/concurrency/ThreadManagerTests.h b/lib/cpp/test/concurrency/ThreadManagerTests.h
index c07a21b..9ecd6ba 100644
--- a/lib/cpp/test/concurrency/ThreadManagerTests.h
+++ b/lib/cpp/test/concurrency/ThreadManagerTests.h
@@ -109,7 +109,7 @@
shared_ptr<ThreadManager> threadManager = ThreadManager::newSimpleThreadManager(workerCount);
shared_ptr<PlatformThreadFactory> threadFactory
- = shared_ptr<PlatformThreadFactory>(new PlatformThreadFactory());
+ = shared_ptr<PlatformThreadFactory>(new PlatformThreadFactory(false));
#if !USE_BOOST_THREAD && !USE_STD_THREAD
threadFactory->setPriority(PosixThreadFactory::HIGHEST);