blob: 641f7145987d48fdc98a3909fa85f7b6a6052e57 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
#define __STDC_FORMAT_MACROS
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include "TNonblockingServer.h"
#include <thrift/concurrency/Exception.h>
#include <thrift/transport/TSocket.h>
#include <thrift/concurrency/PlatformThreadFactory.h>
#include <iostream>
#ifdef HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#include <netinet/tcp.h>
#endif
#ifdef HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#ifdef HAVE_NETDB_H
#include <netdb.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#include <errno.h>
#include <assert.h>
#ifdef HAVE_SCHED_H
#include <sched.h>
#endif
#ifndef AF_LOCAL
#define AF_LOCAL AF_UNIX
#endif
#ifdef _MSC_VER
#define PRIu32 "I32u"
#endif
namespace apache { namespace thrift { namespace server {
using namespace apache::thrift::protocol;
using namespace apache::thrift::transport;
using namespace apache::thrift::concurrency;
using namespace std;
using apache::thrift::transport::TSocket;
using apache::thrift::transport::TTransportException;
using boost::shared_ptr;
/// Three states for sockets: recv frame size, recv data, and send mode
enum TSocketState {
SOCKET_RECV_FRAMING,
SOCKET_RECV,
SOCKET_SEND
};
/**
* Five states for the nonblocking server:
* 1) initialize
* 2) read 4 byte frame size
* 3) read frame of data
* 4) send back data (if any)
* 5) force immediate connection close
*/
enum TAppState {
APP_INIT,
APP_READ_FRAME_SIZE,
APP_READ_REQUEST,
APP_WAIT_TASK,
APP_SEND_RESULT,
APP_CLOSE_CONNECTION
};
/**
* Represents a connection that is handled via libevent. This connection
* essentially encapsulates a socket that has some associated libevent state.
*/
class TNonblockingServer::TConnection {
private:
/// Server IO Thread handling this connection
TNonblockingIOThread* ioThread_;
/// Server handle
TNonblockingServer* server_;
/// TProcessor
boost::shared_ptr<TProcessor> processor_;
/// Object wrapping network socket
boost::shared_ptr<TSocket> tSocket_;
/// Libevent object
struct event event_;
/// Libevent flags
short eventFlags_;
/// Socket mode
TSocketState socketState_;
/// Application state
TAppState appState_;
/// How much data needed to read
uint32_t readWant_;
/// Where in the read buffer are we
uint32_t readBufferPos_;
/// Read buffer
uint8_t* readBuffer_;
/// Read buffer size
uint32_t readBufferSize_;
/// Write buffer
uint8_t* writeBuffer_;
/// Write buffer size
uint32_t writeBufferSize_;
/// How far through writing are we?
uint32_t writeBufferPos_;
/// Largest size of write buffer seen since buffer was constructed
size_t largestWriteBufferSize_;
/// Count of the number of calls for use with getResizeBufferEveryN().
int32_t callsForResize_;
/// Task handle
int taskHandle_;
/// Task event
struct event taskEvent_;
/// Transport to read from
boost::shared_ptr<TMemoryBuffer> inputTransport_;
/// Transport that processor writes to
boost::shared_ptr<TMemoryBuffer> outputTransport_;
/// extra transport generated by transport factory (e.g. BufferedRouterTransport)
boost::shared_ptr<TTransport> factoryInputTransport_;
boost::shared_ptr<TTransport> factoryOutputTransport_;
/// Protocol decoder
boost::shared_ptr<TProtocol> inputProtocol_;
/// Protocol encoder
boost::shared_ptr<TProtocol> outputProtocol_;
/// Server event handler, if any
boost::shared_ptr<TServerEventHandler> serverEventHandler_;
/// Thrift call context, if any
void *connectionContext_;
/// Go into read mode
void setRead() {
setFlags(EV_READ | EV_PERSIST);
}
/// Go into write mode
void setWrite() {
setFlags(EV_WRITE | EV_PERSIST);
}
/// Set socket idle
void setIdle() {
setFlags(0);
}
/**
* Set event flags for this connection.
*
* @param eventFlags flags we pass to libevent for the connection.
*/
void setFlags(short eventFlags);
/**
* Libevent handler called (via our static wrapper) when the connection
* socket had something happen. Rather than use the flags libevent passed,
* we use the connection state to determine whether we need to read or
* write the socket.
*/
void workSocket();
public:
class Task;
/// Constructor
TConnection(int socket, TNonblockingIOThread* ioThread,
const sockaddr* addr, socklen_t addrLen) {
readBuffer_ = NULL;
readBufferSize_ = 0;
ioThread_ = ioThread;
server_ = ioThread->getServer();
// Allocate input and output transports these only need to be allocated
// once per TConnection (they don't need to be reallocated on init() call)
inputTransport_.reset(new TMemoryBuffer(readBuffer_, readBufferSize_));
outputTransport_.reset(new TMemoryBuffer(
server_->getWriteBufferDefaultSize()));
tSocket_.reset(new TSocket());
init(socket, ioThread, addr, addrLen);
}
~TConnection() {
std::free(readBuffer_);
}
/// Close this connection and free or reset its resources.
void close();
/**
* Check buffers against any size limits and shrink it if exceeded.
*
* @param readLimit we reduce read buffer size to this (if nonzero).
* @param writeLimit if nonzero and write buffer is larger, replace it.
*/
void checkIdleBufferMemLimit(size_t readLimit, size_t writeLimit);
/// Initialize
void init(int socket, TNonblockingIOThread* ioThread,
const sockaddr* addr, socklen_t addrLen);
/**
* This is called when the application transitions from one state into
* another. This means that it has finished writing the data that it needed
* to, or finished receiving the data that it needed to.
*/
void transition();
/**
* C-callable event handler for connection events. Provides a callback
* that libevent can understand which invokes connection_->workSocket().
*
* @param fd the descriptor the event occurred on.
* @param which the flags associated with the event.
* @param v void* callback arg where we placed TConnection's "this".
*/
static void eventHandler(evutil_socket_t fd, short /* which */, void* v) {
assert(fd == ((TConnection*)v)->getTSocket()->getSocketFD());
((TConnection*)v)->workSocket();
}
/**
* Notification to server that processing has ended on this request.
* Can be called either when processing is completed or when a waiting
* task has been preemptively terminated (on overload).
*
* Don't call this from the IO thread itself.
*
* @return true if successful, false if unable to notify (check errno).
*/
bool notifyIOThread() {
return ioThread_->notify(this);
}
/*
* Returns the number of this connection's currently assigned IO
* thread.
*/
int getIOThreadNumber() const {
return ioThread_->getThreadNumber();
}
/// Force connection shutdown for this connection.
void forceClose() {
appState_ = APP_CLOSE_CONNECTION;
if (!notifyIOThread()) {
throw TException("TConnection::forceClose: failed write on notify pipe");
}
}
/// return the server this connection was initialized for.
TNonblockingServer* getServer() const {
return server_;
}
/// get state of connection.
TAppState getState() const {
return appState_;
}
/// return the TSocket transport wrapping this network connection
boost::shared_ptr<TSocket> getTSocket() const {
return tSocket_;
}
/// return the server event handler if any
boost::shared_ptr<TServerEventHandler> getServerEventHandler() {
return serverEventHandler_;
}
/// return the Thrift connection context if any
void* getConnectionContext() {
return connectionContext_;
}
};
class TNonblockingServer::TConnection::Task: public Runnable {
public:
Task(boost::shared_ptr<TProcessor> processor,
boost::shared_ptr<TProtocol> input,
boost::shared_ptr<TProtocol> output,
TConnection* connection) :
processor_(processor),
input_(input),
output_(output),
connection_(connection),
serverEventHandler_(connection_->getServerEventHandler()),
connectionContext_(connection_->getConnectionContext()) {}
void run() {
try {
for (;;) {
if (serverEventHandler_ != NULL) {
serverEventHandler_->processContext(connectionContext_, connection_->getTSocket());
}
if (!processor_->process(input_, output_, connectionContext_) ||
!input_->getTransport()->peek()) {
break;
}
}
} catch (const TTransportException& ttx) {
GlobalOutput.printf("TNonblockingServer: client died: %s", ttx.what());
} catch (const bad_alloc&) {
GlobalOutput("TNonblockingServer: caught bad_alloc exception.");
exit(1);
} catch (const std::exception& x) {
GlobalOutput.printf("TNonblockingServer: process() exception: %s: %s",
typeid(x).name(), x.what());
} catch (...) {
GlobalOutput.printf(
"TNonblockingServer: unknown exception while processing.");
}
// Signal completion back to the libevent thread via a pipe
if (!connection_->notifyIOThread()) {
throw TException("TNonblockingServer::Task::run: failed write on notify pipe");
}
}
TConnection* getTConnection() {
return connection_;
}
private:
boost::shared_ptr<TProcessor> processor_;
boost::shared_ptr<TProtocol> input_;
boost::shared_ptr<TProtocol> output_;
TConnection* connection_;
boost::shared_ptr<TServerEventHandler> serverEventHandler_;
void* connectionContext_;
};
void TNonblockingServer::TConnection::init(int socket,
TNonblockingIOThread* ioThread,
const sockaddr* addr,
socklen_t addrLen) {
tSocket_->setSocketFD(socket);
tSocket_->setCachedAddress(addr, addrLen);
ioThread_ = ioThread;
server_ = ioThread->getServer();
appState_ = APP_INIT;
eventFlags_ = 0;
readBufferPos_ = 0;
readWant_ = 0;
writeBuffer_ = NULL;
writeBufferSize_ = 0;
writeBufferPos_ = 0;
largestWriteBufferSize_ = 0;
socketState_ = SOCKET_RECV_FRAMING;
callsForResize_ = 0;
// get input/transports
factoryInputTransport_ = server_->getInputTransportFactory()->getTransport(
inputTransport_);
factoryOutputTransport_ = server_->getOutputTransportFactory()->getTransport(
outputTransport_);
// Create protocol
inputProtocol_ = server_->getInputProtocolFactory()->getProtocol(
factoryInputTransport_);
outputProtocol_ = server_->getOutputProtocolFactory()->getProtocol(
factoryOutputTransport_);
// Set up for any server event handler
serverEventHandler_ = server_->getEventHandler();
if (serverEventHandler_ != NULL) {
connectionContext_ = serverEventHandler_->createContext(inputProtocol_,
outputProtocol_);
} else {
connectionContext_ = NULL;
}
// Get the processor
processor_ = server_->getProcessor(inputProtocol_, outputProtocol_, tSocket_);
}
void TNonblockingServer::TConnection::workSocket() {
int got=0, left=0, sent=0;
uint32_t fetch = 0;
switch (socketState_) {
case SOCKET_RECV_FRAMING:
union {
uint8_t buf[sizeof(uint32_t)];
uint32_t size;
} framing;
// if we've already received some bytes we kept them here
framing.size = readWant_;
// determine size of this frame
try {
// Read from the socket
fetch = tSocket_->read(&framing.buf[readBufferPos_],
uint32_t(sizeof(framing.size) - readBufferPos_));
if (fetch == 0) {
// Whenever we get here it means a remote disconnect
close();
return;
}
readBufferPos_ += fetch;
} catch (TTransportException& te) {
GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
close();
return;
}
if (readBufferPos_ < sizeof(framing.size)) {
// more needed before frame size is known -- save what we have so far
readWant_ = framing.size;
return;
}
readWant_ = ntohl(framing.size);
if (readWant_ > server_->getMaxFrameSize()) {
// Don't allow giant frame sizes. This prevents bad clients from
// causing us to try and allocate a giant buffer.
GlobalOutput.printf("TNonblockingServer: frame size too large "
"(%"PRIu32" > %zu) from client %s. remote side not "
"using TFramedTransport?",
readWant_, server_->getMaxFrameSize(),
tSocket_->getSocketInfo().c_str());
close();
return;
}
// size known; now get the rest of the frame
transition();
return;
case SOCKET_RECV:
// It is an error to be in this state if we already have all the data
assert(readBufferPos_ < readWant_);
try {
// Read from the socket
fetch = readWant_ - readBufferPos_;
got = tSocket_->read(readBuffer_ + readBufferPos_, fetch);
}
catch (TTransportException& te) {
GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
close();
return;
}
if (got > 0) {
// Move along in the buffer
readBufferPos_ += got;
// Check that we did not overdo it
assert(readBufferPos_ <= readWant_);
// We are done reading, move onto the next state
if (readBufferPos_ == readWant_) {
transition();
}
return;
}
// Whenever we get down here it means a remote disconnect
close();
return;
case SOCKET_SEND:
// Should never have position past size
assert(writeBufferPos_ <= writeBufferSize_);
// If there is no data to send, then let us move on
if (writeBufferPos_ == writeBufferSize_) {
GlobalOutput("WARNING: Send state with no data to send\n");
transition();
return;
}
try {
left = writeBufferSize_ - writeBufferPos_;
sent = tSocket_->write_partial(writeBuffer_ + writeBufferPos_, left);
}
catch (TTransportException& te) {
GlobalOutput.printf("TConnection::workSocket(): %s ", te.what());
close();
return;
}
writeBufferPos_ += sent;
// Did we overdo it?
assert(writeBufferPos_ <= writeBufferSize_);
// We are done!
if (writeBufferPos_ == writeBufferSize_) {
transition();
}
return;
default:
GlobalOutput.printf("Unexpected Socket State %d", socketState_);
assert(0);
}
}
/**
* This is called when the application transitions from one state into
* another. This means that it has finished writing the data that it needed
* to, or finished receiving the data that it needed to.
*/
void TNonblockingServer::TConnection::transition() {
// ensure this connection is active right now
assert(ioThread_);
assert(server_);
// Switch upon the state that we are currently in and move to a new state
switch (appState_) {
case APP_READ_REQUEST:
// We are done reading the request, package the read buffer into transport
// and get back some data from the dispatch function
inputTransport_->resetBuffer(readBuffer_, readBufferPos_);
outputTransport_->resetBuffer();
// Prepend four bytes of blank space to the buffer so we can
// write the frame size there later.
outputTransport_->getWritePtr(4);
outputTransport_->wroteBytes(4);
server_->incrementActiveProcessors();
if (server_->isThreadPoolProcessing()) {
// We are setting up a Task to do this work and we will wait on it
// Create task and dispatch to the thread manager
boost::shared_ptr<Runnable> task =
boost::shared_ptr<Runnable>(new Task(processor_,
inputProtocol_,
outputProtocol_,
this));
// The application is now waiting on the task to finish
appState_ = APP_WAIT_TASK;
try {
server_->addTask(task);
} catch (IllegalStateException & ise) {
// The ThreadManager is not ready to handle any more tasks (it's probably shutting down).
GlobalOutput.printf("IllegalStateException: Server::process() %s", ise.what());
close();
}
// Set this connection idle so that libevent doesn't process more
// data on it while we're still waiting for the threadmanager to
// finish this task
setIdle();
return;
} else {
try {
if (serverEventHandler_ != NULL) {
serverEventHandler_->processContext(connectionContext_,
getTSocket());
}
// Invoke the processor
processor_->process(inputProtocol_, outputProtocol_,
connectionContext_);
} catch (const TTransportException &ttx) {
GlobalOutput.printf("TNonblockingServer transport error in "
"process(): %s", ttx.what());
server_->decrementActiveProcessors();
close();
return;
} catch (const std::exception &x) {
GlobalOutput.printf("Server::process() uncaught exception: %s: %s",
typeid(x).name(), x.what());
server_->decrementActiveProcessors();
close();
return;
} catch (...) {
GlobalOutput.printf("Server::process() unknown exception");
server_->decrementActiveProcessors();
close();
return;
}
}
// Intentionally fall through here, the call to process has written into
// the writeBuffer_
case APP_WAIT_TASK:
// We have now finished processing a task and the result has been written
// into the outputTransport_, so we grab its contents and place them into
// the writeBuffer_ for actual writing by the libevent thread
server_->decrementActiveProcessors();
// Get the result of the operation
outputTransport_->getBuffer(&writeBuffer_, &writeBufferSize_);
// If the function call generated return data, then move into the send
// state and get going
// 4 bytes were reserved for frame size
if (writeBufferSize_ > 4) {
// Move into write state
writeBufferPos_ = 0;
socketState_ = SOCKET_SEND;
// Put the frame size into the write buffer
int32_t frameSize = (int32_t)htonl(writeBufferSize_ - 4);
memcpy(writeBuffer_, &frameSize, 4);
// Socket into write mode
appState_ = APP_SEND_RESULT;
setWrite();
// Try to work the socket immediately
// workSocket();
return;
}
// In this case, the request was oneway and we should fall through
// right back into the read frame header state
goto LABEL_APP_INIT;
case APP_SEND_RESULT:
// it's now safe to perform buffer size housekeeping.
if (writeBufferSize_ > largestWriteBufferSize_) {
largestWriteBufferSize_ = writeBufferSize_;
}
if (server_->getResizeBufferEveryN() > 0
&& ++callsForResize_ >= server_->getResizeBufferEveryN()) {
checkIdleBufferMemLimit(server_->getIdleReadBufferLimit(),
server_->getIdleWriteBufferLimit());
callsForResize_ = 0;
}
// N.B.: We also intentionally fall through here into the INIT state!
LABEL_APP_INIT:
case APP_INIT:
// Clear write buffer variables
writeBuffer_ = NULL;
writeBufferPos_ = 0;
writeBufferSize_ = 0;
// Into read4 state we go
socketState_ = SOCKET_RECV_FRAMING;
appState_ = APP_READ_FRAME_SIZE;
readBufferPos_ = 0;
// Register read event
setRead();
// Try to work the socket right away
// workSocket();
return;
case APP_READ_FRAME_SIZE:
// We just read the request length
// Double the buffer size until it is big enough
if (readWant_ > readBufferSize_) {
if (readBufferSize_ == 0) {
readBufferSize_ = 1;
}
uint32_t newSize = readBufferSize_;
while (readWant_ > newSize) {
newSize *= 2;
}
uint8_t* newBuffer = (uint8_t*)std::realloc(readBuffer_, newSize);
if (newBuffer == NULL) {
// nothing else to be done...
throw std::bad_alloc();
}
readBuffer_ = newBuffer;
readBufferSize_ = newSize;
}
readBufferPos_= 0;
// Move into read request state
socketState_ = SOCKET_RECV;
appState_ = APP_READ_REQUEST;
// Work the socket right away
// workSocket();
return;
case APP_CLOSE_CONNECTION:
server_->decrementActiveProcessors();
close();
return;
default:
GlobalOutput.printf("Unexpected Application State %d", appState_);
assert(0);
}
}
void TNonblockingServer::TConnection::setFlags(short eventFlags) {
// Catch the do nothing case
if (eventFlags_ == eventFlags) {
return;
}
// Delete a previously existing event
if (eventFlags_ != 0) {
if (event_del(&event_) == -1) {
GlobalOutput("TConnection::setFlags event_del");
return;
}
}
// Update in memory structure
eventFlags_ = eventFlags;
// Do not call event_set if there are no flags
if (!eventFlags_) {
return;
}
/*
* event_set:
*
* Prepares the event structure &event to be used in future calls to
* event_add() and event_del(). The event will be prepared to call the
* eventHandler using the 'sock' file descriptor to monitor events.
*
* The events can be either EV_READ, EV_WRITE, or both, indicating
* that an application can read or write from the file respectively without
* blocking.
*
* The eventHandler will be called with the file descriptor that triggered
* the event and the type of event which will be one of: EV_TIMEOUT,
* EV_SIGNAL, EV_READ, EV_WRITE.
*
* The additional flag EV_PERSIST makes an event_add() persistent until
* event_del() has been called.
*
* Once initialized, the &event struct can be used repeatedly with
* event_add() and event_del() and does not need to be reinitialized unless
* the eventHandler and/or the argument to it are to be changed. However,
* when an ev structure has been added to libevent using event_add() the
* structure must persist until the event occurs (assuming EV_PERSIST
* is not set) or is removed using event_del(). You may not reuse the same
* ev structure for multiple monitored descriptors; each descriptor needs
* its own ev.
*/
event_set(&event_, tSocket_->getSocketFD(), eventFlags_,
TConnection::eventHandler, this);
event_base_set(ioThread_->getEventBase(), &event_);
// Add the event
if (event_add(&event_, 0) == -1) {
GlobalOutput("TConnection::setFlags(): could not event_add");
}
}
/**
* Closes a connection
*/
void TNonblockingServer::TConnection::close() {
// Delete the registered libevent
if (event_del(&event_) == -1) {
GlobalOutput.perror("TConnection::close() event_del", errno);
}
if (serverEventHandler_ != NULL) {
serverEventHandler_->deleteContext(connectionContext_, inputProtocol_, outputProtocol_);
}
ioThread_ = NULL;
// Close the socket
tSocket_->close();
// close any factory produced transports
factoryInputTransport_->close();
factoryOutputTransport_->close();
// Give this object back to the server that owns it
server_->returnConnection(this);
}
void TNonblockingServer::TConnection::checkIdleBufferMemLimit(
size_t readLimit,
size_t writeLimit) {
if (readLimit > 0 && readBufferSize_ > readLimit) {
free(readBuffer_);
readBuffer_ = NULL;
readBufferSize_ = 0;
}
if (writeLimit > 0 && largestWriteBufferSize_ > writeLimit) {
// just start over
outputTransport_->resetBuffer(server_->getWriteBufferDefaultSize());
largestWriteBufferSize_ = 0;
}
}
TNonblockingServer::~TNonblockingServer() {
// Close any active connections (moves them to the idle connection stack)
while (activeConnections_.size()) {
activeConnections_.front()->close();
}
// Clean up unused TConnection objects in connectionStack_
while (!connectionStack_.empty()) {
TConnection* connection = connectionStack_.top();
connectionStack_.pop();
delete connection;
}
// The TNonblockingIOThread objects have shared_ptrs to the Thread
// objects and the Thread objects have shared_ptrs to the TNonblockingIOThread
// objects (as runnable) so these objects will never deallocate without help.
while (!ioThreads_.empty()) {
boost::shared_ptr<TNonblockingIOThread> iot = ioThreads_.back();
ioThreads_.pop_back();
iot->setThread(boost::shared_ptr<Thread>());
}
}
/**
* Creates a new connection either by reusing an object off the stack or
* by allocating a new one entirely
*/
TNonblockingServer::TConnection* TNonblockingServer::createConnection(
int socket, const sockaddr* addr, socklen_t addrLen) {
// Check the stack
Guard g(connMutex_);
// pick an IO thread to handle this connection -- currently round robin
assert(nextIOThread_ < ioThreads_.size());
int selectedThreadIdx = nextIOThread_;
nextIOThread_ = (nextIOThread_ + 1) % ioThreads_.size();
TNonblockingIOThread* ioThread = ioThreads_[selectedThreadIdx].get();
// Check the connection stack to see if we can re-use
TConnection* result = NULL;
if (connectionStack_.empty()) {
result = new TConnection(socket, ioThread, addr, addrLen);
++numTConnections_;
} else {
result = connectionStack_.top();
connectionStack_.pop();
result->init(socket, ioThread, addr, addrLen);
}
activeConnections_.push_back(result);
return result;
}
/**
* Returns a connection to the stack
*/
void TNonblockingServer::returnConnection(TConnection* connection) {
Guard g(connMutex_);
activeConnections_.erase(std::remove(activeConnections_.begin(), activeConnections_.end(), connection), activeConnections_.end());
if (connectionStackLimit_ &&
(connectionStack_.size() >= connectionStackLimit_)) {
delete connection;
--numTConnections_;
} else {
connection->checkIdleBufferMemLimit(idleReadBufferLimit_, idleWriteBufferLimit_);
connectionStack_.push(connection);
}
}
/**
* Server socket had something happen. We accept all waiting client
* connections on fd and assign TConnection objects to handle those requests.
*/
void TNonblockingServer::handleEvent(int fd, short which) {
(void) which;
// Make sure that libevent didn't mess up the socket handles
assert(fd == serverSocket_);
// Server socket accepted a new connection
socklen_t addrLen;
sockaddr_storage addrStorage;
sockaddr* addrp = (sockaddr*)&addrStorage;
addrLen = sizeof(addrStorage);
// Going to accept a new client socket
int clientSocket;
// Accept as many new clients as possible, even though libevent signaled only
// one, this helps us to avoid having to go back into the libevent engine so
// many times
while ((clientSocket = ::accept(fd, addrp, &addrLen)) != -1) {
// If we're overloaded, take action here
if (overloadAction_ != T_OVERLOAD_NO_ACTION && serverOverloaded()) {
Guard g(connMutex_);
nConnectionsDropped_++;
nTotalConnectionsDropped_++;
if (overloadAction_ == T_OVERLOAD_CLOSE_ON_ACCEPT) {
::close(clientSocket);
return;
} else if (overloadAction_ == T_OVERLOAD_DRAIN_TASK_QUEUE) {
if (!drainPendingTask()) {
// Nothing left to discard, so we drop connection instead.
::close(clientSocket);
return;
}
}
}
// Explicitly set this socket to NONBLOCK mode
int flags;
if ((flags = fcntl(clientSocket, F_GETFL, 0)) < 0 ||
fcntl(clientSocket, F_SETFL, flags | O_NONBLOCK) < 0) {
GlobalOutput.perror("thriftServerEventHandler: set O_NONBLOCK (fcntl) ", errno);
::close(clientSocket);
return;
}
// Create a new TConnection for this client socket.
TConnection* clientConnection =
createConnection(clientSocket, addrp, addrLen);
// Fail fast if we could not create a TConnection object
if (clientConnection == NULL) {
GlobalOutput.printf("thriftServerEventHandler: failed TConnection factory");
::close(clientSocket);
return;
}
/*
* Either notify the ioThread that is assigned this connection to
* start processing, or if it is us, we'll just ask this
* connection to do its initial state change here.
*
* (We need to avoid writing to our own notification pipe, to
* avoid possible deadlocks if the pipe is full.)
*
* The IO thread #0 is the only one that handles these listen
* events, so unless the connection has been assigned to thread #0
* we know it's not on our thread.
*/
if (clientConnection->getIOThreadNumber() == 0) {
clientConnection->transition();
} else {
clientConnection->notifyIOThread();
}
// addrLen is written by the accept() call, so needs to be set before the next call.
addrLen = sizeof(addrStorage);
}
// Done looping accept, now we have to make sure the error is due to
// blocking. Any other error is a problem
if (errno != EAGAIN && errno != EWOULDBLOCK) {
GlobalOutput.perror("thriftServerEventHandler: accept() ", errno);
}
}
/**
* Creates a socket to listen on and binds it to the local port.
*/
void TNonblockingServer::createAndListenOnSocket() {
int s;
struct addrinfo hints, *res, *res0;
int error;
char port[sizeof("65536") + 1];
memset(&hints, 0, sizeof(hints));
hints.ai_family = PF_UNSPEC;
hints.ai_socktype = SOCK_STREAM;
hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
sprintf(port, "%d", port_);
// Wildcard address
error = getaddrinfo(NULL, port, &hints, &res0);
if (error) {
throw TException("TNonblockingServer::serve() getaddrinfo " +
string(gai_strerror(error)));
}
// Pick the ipv6 address first since ipv4 addresses can be mapped
// into ipv6 space.
for (res = res0; res; res = res->ai_next) {
if (res->ai_family == AF_INET6 || res->ai_next == NULL)
break;
}
// Create the server socket
s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (s == -1) {
freeaddrinfo(res0);
throw TException("TNonblockingServer::serve() socket() -1");
}
#ifdef IPV6_V6ONLY
if (res->ai_family == AF_INET6) {
int zero = 0;
if (-1 == setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, const_cast_sockopt(&zero), sizeof(zero))) {
GlobalOutput("TServerSocket::listen() IPV6_V6ONLY");
}
}
#endif // #ifdef IPV6_V6ONLY
int one = 1;
// Set reuseaddr to avoid 2MSL delay on server restart
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, const_cast_sockopt(&one), sizeof(one));
if (::bind(s, res->ai_addr, res->ai_addrlen) == -1) {
::close(s);
freeaddrinfo(res0);
throw TTransportException(TTransportException::NOT_OPEN,
"TNonblockingServer::serve() bind",
errno);
}
// Done with the addr info
freeaddrinfo(res0);
// Set up this file descriptor for listening
listenSocket(s);
}
/**
* Takes a socket created by listenSocket() and sets various options on it
* to prepare for use in the server.
*/
void TNonblockingServer::listenSocket(int s) {
// Set socket to nonblocking mode
int flags;
if ((flags = fcntl(s, F_GETFL, 0)) < 0 ||
fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) {
::close(s);
throw TException("TNonblockingServer::serve() O_NONBLOCK");
}
int one = 1;
struct linger ling = {0, 0};
// Keepalive to ensure full result flushing
setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, const_cast_sockopt(&one), sizeof(one));
// Turn linger off to avoid hung sockets
setsockopt(s, SOL_SOCKET, SO_LINGER, const_cast_sockopt(&ling), sizeof(ling));
// Set TCP nodelay if available, MAC OS X Hack
// See http://lists.danga.com/pipermail/memcached/2005-March/001240.html
#ifndef TCP_NOPUSH
setsockopt(s, IPPROTO_TCP, TCP_NODELAY, const_cast_sockopt(&one), sizeof(one));
#endif
#ifdef TCP_LOW_MIN_RTO
if (TSocket::getUseLowMinRto()) {
setsockopt(s, IPPROTO_TCP, TCP_LOW_MIN_RTO, const_cast_sockopt(&one), sizeof(one));
}
#endif
if (listen(s, LISTEN_BACKLOG) == -1) {
::close(s);
throw TException("TNonblockingServer::serve() listen");
}
// Cool, this socket is good to go, set it as the serverSocket_
serverSocket_ = s;
}
void TNonblockingServer::setThreadManager(boost::shared_ptr<ThreadManager> threadManager) {
threadManager_ = threadManager;
if (threadManager != NULL) {
threadManager->setExpireCallback(apache::thrift::stdcxx::bind(&TNonblockingServer::expireClose, this, apache::thrift::stdcxx::placeholders::_1));
threadPoolProcessing_ = true;
} else {
threadPoolProcessing_ = false;
}
}
bool TNonblockingServer::serverOverloaded() {
size_t activeConnections = numTConnections_ - connectionStack_.size();
if (numActiveProcessors_ > maxActiveProcessors_ ||
activeConnections > maxConnections_) {
if (!overloaded_) {
GlobalOutput.printf("TNonblockingServer: overload condition begun.");
overloaded_ = true;
}
} else {
if (overloaded_ &&
(numActiveProcessors_ <= overloadHysteresis_ * maxActiveProcessors_) &&
(activeConnections <= overloadHysteresis_ * maxConnections_)) {
GlobalOutput.printf("TNonblockingServer: overload ended; "
"%u dropped (%llu total)",
nConnectionsDropped_, nTotalConnectionsDropped_);
nConnectionsDropped_ = 0;
overloaded_ = false;
}
}
return overloaded_;
}
bool TNonblockingServer::drainPendingTask() {
if (threadManager_) {
boost::shared_ptr<Runnable> task = threadManager_->removeNextPending();
if (task) {
TConnection* connection =
static_cast<TConnection::Task*>(task.get())->getTConnection();
assert(connection && connection->getServer()
&& connection->getState() == APP_WAIT_TASK);
connection->forceClose();
return true;
}
}
return false;
}
void TNonblockingServer::expireClose(boost::shared_ptr<Runnable> task) {
TConnection* connection =
static_cast<TConnection::Task*>(task.get())->getTConnection();
assert(connection && connection->getServer() &&
connection->getState() == APP_WAIT_TASK);
connection->forceClose();
}
void TNonblockingServer::stop() {
// Breaks the event loop in all threads so that they end ASAP.
for (uint32_t i = 0; i < ioThreads_.size(); ++i) {
ioThreads_[i]->stop();
}
}
/**
* Main workhorse function, starts up the server listening on a port and
* loops over the libevent handler.
*/
void TNonblockingServer::serve() {
// init listen socket
createAndListenOnSocket();
// set up the IO threads
assert(ioThreads_.empty());
if (!numIOThreads_) {
numIOThreads_ = DEFAULT_IO_THREADS;
}
for (uint32_t id = 0; id < numIOThreads_; ++id) {
// the first IO thread also does the listening on server socket
int listenFd = (id == 0 ? serverSocket_ : -1);
shared_ptr<TNonblockingIOThread> thread(
new TNonblockingIOThread(this, id, listenFd, useHighPriorityIOThreads_));
ioThreads_.push_back(thread);
}
// Notify handler of the preServe event
if (eventHandler_ != NULL) {
eventHandler_->preServe();
}
// Start all of our helper IO threads. Note that the threads run forever,
// only terminating if stop() is called.
assert(ioThreads_.size() == numIOThreads_);
assert(ioThreads_.size() > 0);
GlobalOutput.printf("TNonblockingServer: Serving on port %d, %d io threads.",
port_, ioThreads_.size());
// Launch all the secondary IO threads in separate threads
if (ioThreads_.size() > 1) {
ioThreadFactory_.reset(new PlatformThreadFactory(
#ifndef USE_BOOST_THREAD
PlatformThreadFactory::OTHER, // scheduler
PlatformThreadFactory::NORMAL, // priority
1, // stack size (MB)
#endif
false // detached
));
assert(ioThreadFactory_.get());
// intentionally starting at thread 1, not 0
for (uint32_t i = 1; i < ioThreads_.size(); ++i) {
shared_ptr<Thread> thread = ioThreadFactory_->newThread(ioThreads_[i]);
ioThreads_[i]->setThread(thread);
thread->start();
}
}
// Run the primary (listener) IO thread loop in our main thread; this will
// only return when the server is shutting down.
ioThreads_[0]->run();
// Ensure all threads are finished before exiting serve()
for (uint32_t i = 0; i < ioThreads_.size(); ++i) {
ioThreads_[i]->join();
GlobalOutput.printf("TNonblocking: join done for IO thread #%d", i);
}
}
TNonblockingIOThread::TNonblockingIOThread(TNonblockingServer* server,
int number,
int listenSocket,
bool useHighPriority)
: server_(server)
, number_(number)
, listenSocket_(listenSocket)
, useHighPriority_(useHighPriority)
, eventBase_(NULL) {
notificationPipeFDs_[0] = -1;
notificationPipeFDs_[1] = -1;
}
TNonblockingIOThread::~TNonblockingIOThread() {
// make sure our associated thread is fully finished
join();
if (eventBase_) {
event_base_free(eventBase_);
}
if (listenSocket_ >= 0) {
if (0 != ::close(listenSocket_)) {
GlobalOutput.perror("TNonblockingIOThread listenSocket_ close(): ",
errno);
}
listenSocket_ = TNonblockingServer::INVALID_SOCKET_VALUE;
}
for (int i = 0; i < 2; ++i) {
if (notificationPipeFDs_[i] >= 0) {
if (0 != ::close(notificationPipeFDs_[i])) {
GlobalOutput.perror("TNonblockingIOThread notificationPipe close(): ",
errno);
}
notificationPipeFDs_[i] = TNonblockingServer::INVALID_SOCKET_VALUE;
}
}
}
void TNonblockingIOThread::createNotificationPipe() {
if(evutil_socketpair(AF_LOCAL, SOCK_STREAM, 0, notificationPipeFDs_) == -1) {
GlobalOutput.perror("TNonblockingServer::createNotificationPipe ", EVUTIL_SOCKET_ERROR());
throw TException("can't create notification pipe");
}
if(evutil_make_socket_nonblocking(notificationPipeFDs_[0])<0 ||
evutil_make_socket_nonblocking(notificationPipeFDs_[1])<0) {
::close(notificationPipeFDs_[0]);
::close(notificationPipeFDs_[1]);
throw TException("TNonblockingServer::createNotificationPipe() O_NONBLOCK");
}
for (int i = 0; i < 2; ++i) {
#if LIBEVENT_VERSION_NUMBER < 0x02000000
int flags;
if ((flags = fcntl(notificationPipeFDs_[i], F_GETFD, 0)) < 0 ||
fcntl(notificationPipeFDs_[i], F_SETFD, flags | FD_CLOEXEC) < 0) {
#else
if (evutil_make_socket_closeonexec(notificationPipeFDs_[i]) < 0) {
#endif
::close(notificationPipeFDs_[0]);
::close(notificationPipeFDs_[1]);
throw TException("TNonblockingServer::createNotificationPipe() "
"FD_CLOEXEC");
}
}
}
/**
* Register the core libevent events onto the proper base.
*/
void TNonblockingIOThread::registerEvents() {
if (listenSocket_ >= 0) {
// Register the server event
event_set(&serverEvent_,
listenSocket_,
EV_READ | EV_PERSIST,
TNonblockingIOThread::listenHandler,
server_);
event_base_set(eventBase_, &serverEvent_);
// Add the event and start up the server
if (-1 == event_add(&serverEvent_, 0)) {
throw TException("TNonblockingServer::serve(): "
"event_add() failed on server listen event");
}
GlobalOutput.printf("TNonblocking: IO thread #%d registered for listen.",
number_);
}
createNotificationPipe();
// Create an event to be notified when a task finishes
event_set(&notificationEvent_,
getNotificationRecvFD(),
EV_READ | EV_PERSIST,
TNonblockingIOThread::notifyHandler,
this);
// Attach to the base
event_base_set(eventBase_, &notificationEvent_);
// Add the event and start up the server
if (-1 == event_add(&notificationEvent_, 0)) {
throw TException("TNonblockingServer::serve(): "
"event_add() failed on task-done notification event");
}
GlobalOutput.printf("TNonblocking: IO thread #%d registered for notify.",
number_);
}
bool TNonblockingIOThread::notify(TNonblockingServer::TConnection* conn) {
int fd = getNotificationSendFD();
if (fd < 0) {
return false;
}
const int kSize = sizeof(conn);
if (send(fd, const_cast_sockopt(&conn), kSize, 0) != kSize) {
return false;
}
return true;
}
/* static */
void TNonblockingIOThread::notifyHandler(evutil_socket_t fd, short which, void* v) {
TNonblockingIOThread* ioThread = (TNonblockingIOThread*) v;
assert(ioThread);
(void)which;
while (true) {
TNonblockingServer::TConnection* connection = 0;
const int kSize = sizeof(connection);
int nBytes = recv(fd, cast_sockopt(&connection), kSize, 0);
if (nBytes == kSize) {
if (connection == NULL) {
// this is the command to stop our thread, exit the handler!
return;
}
connection->transition();
} else if (nBytes > 0) {
// throw away these bytes and hope that next time we get a solid read
GlobalOutput.printf("notifyHandler: Bad read of %d bytes, wanted %d",
nBytes, kSize);
ioThread->breakLoop(true);
return;
} else if (nBytes == 0) {
GlobalOutput.printf("notifyHandler: Notify socket closed!");
// exit the loop
break;
} else { // nBytes < 0
if (errno != EWOULDBLOCK && errno != EAGAIN) {
GlobalOutput.perror(
"TNonblocking: notifyHandler read() failed: ", errno);
ioThread->breakLoop(true);
return;
}
// exit the loop
break;
}
}
}
void TNonblockingIOThread::breakLoop(bool error) {
if (error) {
GlobalOutput.printf(
"TNonblockingServer: IO thread #%d exiting with error.", number_);
// TODO: figure out something better to do here, but for now kill the
// whole process.
GlobalOutput.printf("TNonblockingServer: aborting process.");
::abort();
}
// sets a flag so that the loop exits on the next event
event_base_loopbreak(eventBase_);
// event_base_loopbreak() only causes the loop to exit the next time
// it wakes up. We need to force it to wake up, in case there are
// no real events it needs to process.
//
// If we're running in the same thread, we can't use the notify(0)
// mechanism to stop the thread, but happily if we're running in the
// same thread, this means the thread can't be blocking in the event
// loop either.
if (!Thread::is_current(threadId_)) {
notify(NULL);
}
}
void TNonblockingIOThread::setCurrentThreadHighPriority(bool value) {
#ifdef HAVE_SCHED_H
// Start out with a standard, low-priority setup for the sched params.
struct sched_param sp;
bzero((void*) &sp, sizeof(sp));
int policy = SCHED_OTHER;
// If desired, set up high-priority sched params structure.
if (value) {
// FIFO scheduler, ranked above default SCHED_OTHER queue
policy = SCHED_FIFO;
// The priority only compares us to other SCHED_FIFO threads, so we
// just pick a random priority halfway between min & max.
const int priority = (sched_get_priority_max(policy) +
sched_get_priority_min(policy)) / 2;
sp.sched_priority = priority;
}
// Actually set the sched params for the current thread.
if (0 == pthread_setschedparam(pthread_self(), policy, &sp)) {
GlobalOutput.printf(
"TNonblocking: IO Thread #%d using high-priority scheduler!", number_);
} else {
GlobalOutput.perror("TNonblocking: pthread_setschedparam(): ", errno);
}
#endif
}
void TNonblockingIOThread::run() {
threadId_ = Thread::get_current();
assert(eventBase_ == 0);
eventBase_ = event_base_new();
// Print some libevent stats
if (number_ == 0) {
GlobalOutput.printf("TNonblockingServer: using libevent %s method %s",
event_get_version(),
event_base_get_method(eventBase_));
}
registerEvents();
GlobalOutput.printf("TNonblockingServer: IO thread #%d entering loop...",
number_);
if (useHighPriority_) {
setCurrentThreadHighPriority(true);
}
// Run libevent engine, never returns, invokes calls to eventHandler
event_base_loop(eventBase_, 0);
if (useHighPriority_) {
setCurrentThreadHighPriority(false);
}
// cleans up our registered events
cleanupEvents();
GlobalOutput.printf("TNonblockingServer: IO thread #%d run() done!",
number_);
}
void TNonblockingIOThread::cleanupEvents() {
// stop the listen socket, if any
if (listenSocket_ >= 0) {
if (event_del(&serverEvent_) == -1) {
GlobalOutput.perror("TNonblockingIOThread::stop() event_del: ", errno);
}
}
event_del(&notificationEvent_);
}
void TNonblockingIOThread::stop() {
// This should cause the thread to fall out of its event loop ASAP.
breakLoop(false);
}
void TNonblockingIOThread::join() {
// If this was a thread created by a factory (not the thread that called
// serve()), we join() it to make sure we shut down fully.
if (thread_) {
try {
// Note that it is safe to both join() ourselves twice, as well as join
// the current thread as the pthread implementation checks for deadlock.
thread_->join();
} catch(...) {
// swallow everything
}
}
}
}}} // apache::thrift::server