blob: 69ae235155438be5946085b4aba29b2bc12de305 [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
Mark Slee9f0c6512007-02-28 23:58:26 +000019
Mark Slee2f6404d2006-10-10 01:37:40 +000020#include "TNonblockingServer.h"
David Reisse11f3072008-10-07 21:39:19 +000021#include <concurrency/Exception.h>
David Reiss1c20c872010-03-09 05:20:14 +000022#include <transport/TSocket.h>
Mark Slee2f6404d2006-10-10 01:37:40 +000023
Mark Sleee02385b2007-06-09 01:21:16 +000024#include <iostream>
Mark Slee2f6404d2006-10-10 01:37:40 +000025#include <sys/socket.h>
26#include <netinet/in.h>
27#include <netinet/tcp.h>
Mark Sleefb4b5142007-11-20 01:27:08 +000028#include <netdb.h>
Mark Slee2f6404d2006-10-10 01:37:40 +000029#include <fcntl.h>
30#include <errno.h>
31#include <assert.h>
32
David Reiss9b903442009-10-21 05:51:28 +000033#ifndef AF_LOCAL
34#define AF_LOCAL AF_UNIX
35#endif
36
T Jake Lucianib5e62212009-01-31 22:36:20 +000037namespace apache { namespace thrift { namespace server {
Mark Slee2f6404d2006-10-10 01:37:40 +000038
T Jake Lucianib5e62212009-01-31 22:36:20 +000039using namespace apache::thrift::protocol;
40using namespace apache::thrift::transport;
41using namespace apache::thrift::concurrency;
Mark Sleee02385b2007-06-09 01:21:16 +000042using namespace std;
David Reiss1c20c872010-03-09 05:20:14 +000043using apache::thrift::transport::TSocket;
44using apache::thrift::transport::TTransportException;
Mark Sleee02385b2007-06-09 01:21:16 +000045
46class TConnection::Task: public Runnable {
47 public:
48 Task(boost::shared_ptr<TProcessor> processor,
49 boost::shared_ptr<TProtocol> input,
50 boost::shared_ptr<TProtocol> output,
David Reiss01fe1532010-03-09 05:19:25 +000051 TConnection* connection) :
Mark Sleee02385b2007-06-09 01:21:16 +000052 processor_(processor),
53 input_(input),
54 output_(output),
David Reiss105961d2010-10-06 17:10:17 +000055 connection_(connection),
56 serverEventHandler_(connection_->getServerEventHandler()),
57 connectionContext_(connection_->getConnectionContext()) {}
Mark Sleee02385b2007-06-09 01:21:16 +000058
59 void run() {
60 try {
David Reiss105961d2010-10-06 17:10:17 +000061 for (;;) {
62 if (serverEventHandler_ != NULL) {
63 serverEventHandler_->processContext(connectionContext_, connection_->getTSocket());
64 }
65 if (!processor_->process(input_, output_, connectionContext_) ||
66 !input_->getTransport()->peek()) {
Mark Sleee02385b2007-06-09 01:21:16 +000067 break;
68 }
69 }
70 } catch (TTransportException& ttx) {
David Reissa79e4882008-03-05 07:51:47 +000071 cerr << "TNonblockingServer client died: " << ttx.what() << endl;
Mark Sleee02385b2007-06-09 01:21:16 +000072 } catch (TException& x) {
David Reissa79e4882008-03-05 07:51:47 +000073 cerr << "TNonblockingServer exception: " << x.what() << endl;
David Reiss28e88ec2010-03-09 05:19:27 +000074 } catch (bad_alloc&) {
75 cerr << "TNonblockingServer caught bad_alloc exception.";
76 exit(-1);
Mark Sleee02385b2007-06-09 01:21:16 +000077 } catch (...) {
David Reissa79e4882008-03-05 07:51:47 +000078 cerr << "TNonblockingServer uncaught exception." << endl;
Mark Sleee02385b2007-06-09 01:21:16 +000079 }
Mark Slee79b16942007-11-26 19:05:29 +000080
David Reiss01fe1532010-03-09 05:19:25 +000081 // Signal completion back to the libevent thread via a pipe
82 if (!connection_->notifyServer()) {
83 throw TException("TNonblockingServer::Task::run: failed write on notify pipe");
Mark Sleee02385b2007-06-09 01:21:16 +000084 }
David Reiss01fe1532010-03-09 05:19:25 +000085 }
86
87 TConnection* getTConnection() {
88 return connection_;
Mark Sleee02385b2007-06-09 01:21:16 +000089 }
90
91 private:
92 boost::shared_ptr<TProcessor> processor_;
93 boost::shared_ptr<TProtocol> input_;
94 boost::shared_ptr<TProtocol> output_;
David Reiss01fe1532010-03-09 05:19:25 +000095 TConnection* connection_;
David Reiss105961d2010-10-06 17:10:17 +000096 boost::shared_ptr<TServerEventHandler> serverEventHandler_;
97 void* connectionContext_;
Mark Sleee02385b2007-06-09 01:21:16 +000098};
Mark Slee5ea15f92007-03-05 22:55:59 +000099
David Reiss105961d2010-10-06 17:10:17 +0000100void TConnection::init(int socket, short eventFlags, TNonblockingServer* s,
101 const sockaddr* addr, socklen_t addrLen) {
102 tSocket_->setSocketFD(socket);
103 tSocket_->setCachedAddress(addr, addrLen);
104
Mark Slee2f6404d2006-10-10 01:37:40 +0000105 server_ = s;
106 appState_ = APP_INIT;
107 eventFlags_ = 0;
108
109 readBufferPos_ = 0;
110 readWant_ = 0;
111
112 writeBuffer_ = NULL;
113 writeBufferSize_ = 0;
114 writeBufferPos_ = 0;
David Reiss54bec5d2010-10-06 17:10:45 +0000115 largestWriteBufferSize_ = 0;
Mark Slee2f6404d2006-10-10 01:37:40 +0000116
David Reiss89a12942010-10-06 17:10:52 +0000117 socketState_ = SOCKET_RECV_FRAMING;
Mark Slee2f6404d2006-10-10 01:37:40 +0000118 appState_ = APP_INIT;
David Reiss54bec5d2010-10-06 17:10:45 +0000119 callsForResize_ = 0;
Mark Slee79b16942007-11-26 19:05:29 +0000120
Mark Slee2f6404d2006-10-10 01:37:40 +0000121 // Set flags, which also registers the event
122 setFlags(eventFlags);
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000123
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000124 // get input/transports
125 factoryInputTransport_ = s->getInputTransportFactory()->getTransport(inputTransport_);
126 factoryOutputTransport_ = s->getOutputTransportFactory()->getTransport(outputTransport_);
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000127
128 // Create protocol
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000129 inputProtocol_ = s->getInputProtocolFactory()->getProtocol(factoryInputTransport_);
130 outputProtocol_ = s->getOutputProtocolFactory()->getProtocol(factoryOutputTransport_);
David Reiss105961d2010-10-06 17:10:17 +0000131
132 // Set up for any server event handler
133 serverEventHandler_ = server_->getEventHandler();
134 if (serverEventHandler_ != NULL) {
135 connectionContext_ = serverEventHandler_->createContext(inputProtocol_, outputProtocol_);
136 } else {
137 connectionContext_ = NULL;
138 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000139}
140
141void TConnection::workSocket() {
David Reiss105961d2010-10-06 17:10:17 +0000142 int got=0, left=0, sent=0;
Mark Sleeaaa23ed2007-01-30 19:52:05 +0000143 uint32_t fetch = 0;
Mark Slee2f6404d2006-10-10 01:37:40 +0000144
145 switch (socketState_) {
David Reiss89a12942010-10-06 17:10:52 +0000146 case SOCKET_RECV_FRAMING:
147 union {
148 uint8_t buf[sizeof(uint32_t)];
149 int32_t size;
150 } framing;
Mark Slee2f6404d2006-10-10 01:37:40 +0000151
David Reiss89a12942010-10-06 17:10:52 +0000152 // if we've already received some bytes we kept them here
153 framing.size = readWant_;
154 // determine size of this frame
155 try {
156 // Read from the socket
157 fetch = tSocket_->read(&framing.buf[readBufferPos_],
158 uint32_t(sizeof(framing.size) - readBufferPos_));
159 if (fetch == 0) {
160 // Whenever we get here it means a remote disconnect
Mark Slee2f6404d2006-10-10 01:37:40 +0000161 close();
162 return;
163 }
David Reiss89a12942010-10-06 17:10:52 +0000164 readBufferPos_ += fetch;
165 } catch (TTransportException& te) {
166 GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
167 close();
168
169 return;
Mark Slee2f6404d2006-10-10 01:37:40 +0000170 }
171
David Reiss89a12942010-10-06 17:10:52 +0000172 if (readBufferPos_ < sizeof(framing.size)) {
173 // more needed before frame size is known -- save what we have so far
174 readWant_ = framing.size;
175 return;
176 }
177
178 readWant_ = ntohl(framing.size);
179 if (static_cast<int>(readWant_) <= 0) {
180 GlobalOutput.printf("TConnection:workSocket() Negative frame size %d, remote side not using TFramedTransport?", static_cast<int>(readWant_));
181 close();
182 return;
183 }
184 // size known; now get the rest of the frame
185 transition();
186 return;
187
188 case SOCKET_RECV:
189 // It is an error to be in this state if we already have all the data
190 assert(readBufferPos_ < readWant_);
191
David Reiss105961d2010-10-06 17:10:17 +0000192 try {
193 // Read from the socket
194 fetch = readWant_ - readBufferPos_;
195 got = tSocket_->read(readBuffer_ + readBufferPos_, fetch);
196 }
197 catch (TTransportException& te) {
198 GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
199 close();
Mark Slee79b16942007-11-26 19:05:29 +0000200
David Reiss105961d2010-10-06 17:10:17 +0000201 return;
202 }
203
Mark Slee2f6404d2006-10-10 01:37:40 +0000204 if (got > 0) {
205 // Move along in the buffer
206 readBufferPos_ += got;
207
208 // Check that we did not overdo it
209 assert(readBufferPos_ <= readWant_);
Mark Slee79b16942007-11-26 19:05:29 +0000210
Mark Slee2f6404d2006-10-10 01:37:40 +0000211 // We are done reading, move onto the next state
212 if (readBufferPos_ == readWant_) {
213 transition();
214 }
215 return;
Mark Slee2f6404d2006-10-10 01:37:40 +0000216 }
217
218 // Whenever we get down here it means a remote disconnect
219 close();
Mark Slee79b16942007-11-26 19:05:29 +0000220
Mark Slee2f6404d2006-10-10 01:37:40 +0000221 return;
222
223 case SOCKET_SEND:
224 // Should never have position past size
225 assert(writeBufferPos_ <= writeBufferSize_);
226
227 // If there is no data to send, then let us move on
228 if (writeBufferPos_ == writeBufferSize_) {
Mark Slee79b16942007-11-26 19:05:29 +0000229 GlobalOutput("WARNING: Send state with no data to send\n");
Mark Slee2f6404d2006-10-10 01:37:40 +0000230 transition();
231 return;
232 }
233
David Reiss105961d2010-10-06 17:10:17 +0000234 try {
235 left = writeBufferSize_ - writeBufferPos_;
236 sent = tSocket_->write_partial(writeBuffer_ + writeBufferPos_, left);
237 }
238 catch (TTransportException& te) {
239 GlobalOutput.printf("TConnection::workSocket(): %s ", te.what());
Mark Slee2f6404d2006-10-10 01:37:40 +0000240 close();
241 return;
242 }
243
244 writeBufferPos_ += sent;
245
246 // Did we overdo it?
247 assert(writeBufferPos_ <= writeBufferSize_);
248
Mark Slee79b16942007-11-26 19:05:29 +0000249 // We are done!
Mark Slee2f6404d2006-10-10 01:37:40 +0000250 if (writeBufferPos_ == writeBufferSize_) {
251 transition();
252 }
253
254 return;
255
256 default:
David Reiss3bb5e052010-01-25 19:31:31 +0000257 GlobalOutput.printf("Unexpected Socket State %d", socketState_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000258 assert(0);
259 }
260}
261
262/**
263 * This is called when the application transitions from one state into
264 * another. This means that it has finished writing the data that it needed
265 * to, or finished receiving the data that it needed to.
266 */
267void TConnection::transition() {
268 // Switch upon the state that we are currently in and move to a new state
269 switch (appState_) {
270
271 case APP_READ_REQUEST:
272 // We are done reading the request, package the read buffer into transport
273 // and get back some data from the dispatch function
274 inputTransport_->resetBuffer(readBuffer_, readBufferPos_);
David Reiss7197efb2010-10-06 17:10:43 +0000275 outputTransport_->resetBuffer();
David Reiss52cb7a72008-06-30 21:40:35 +0000276 // Prepend four bytes of blank space to the buffer so we can
277 // write the frame size there later.
278 outputTransport_->getWritePtr(4);
279 outputTransport_->wroteBytes(4);
Mark Slee79b16942007-11-26 19:05:29 +0000280
David Reiss01fe1532010-03-09 05:19:25 +0000281 server_->incrementActiveProcessors();
282
Mark Sleee02385b2007-06-09 01:21:16 +0000283 if (server_->isThreadPoolProcessing()) {
284 // We are setting up a Task to do this work and we will wait on it
Mark Slee79b16942007-11-26 19:05:29 +0000285
David Reiss01fe1532010-03-09 05:19:25 +0000286 // Create task and dispatch to the thread manager
287 boost::shared_ptr<Runnable> task =
288 boost::shared_ptr<Runnable>(new Task(server_->getProcessor(),
289 inputProtocol_,
290 outputProtocol_,
291 this));
292 // The application is now waiting on the task to finish
293 appState_ = APP_WAIT_TASK;
Mark Slee2f6404d2006-10-10 01:37:40 +0000294
David Reisse11f3072008-10-07 21:39:19 +0000295 try {
296 server_->addTask(task);
297 } catch (IllegalStateException & ise) {
298 // The ThreadManager is not ready to handle any more tasks (it's probably shutting down).
David Reissc53a5942008-10-07 23:55:24 +0000299 GlobalOutput.printf("IllegalStateException: Server::process() %s", ise.what());
David Reisse11f3072008-10-07 21:39:19 +0000300 close();
301 }
Mark Slee402ee282007-08-23 01:43:20 +0000302
David Reiss01fe1532010-03-09 05:19:25 +0000303 // Set this connection idle so that libevent doesn't process more
304 // data on it while we're still waiting for the threadmanager to
305 // finish this task
306 setIdle();
307 return;
Mark Sleee02385b2007-06-09 01:21:16 +0000308 } else {
309 try {
310 // Invoke the processor
David Reiss23248712010-10-06 17:10:08 +0000311 server_->getProcessor()->process(inputProtocol_, outputProtocol_, NULL);
Mark Sleee02385b2007-06-09 01:21:16 +0000312 } catch (TTransportException &ttx) {
David Reiss01e55c12008-07-13 22:18:51 +0000313 GlobalOutput.printf("TTransportException: Server::process() %s", ttx.what());
David Reiss01fe1532010-03-09 05:19:25 +0000314 server_->decrementActiveProcessors();
Mark Sleee02385b2007-06-09 01:21:16 +0000315 close();
316 return;
317 } catch (TException &x) {
David Reiss01e55c12008-07-13 22:18:51 +0000318 GlobalOutput.printf("TException: Server::process() %s", x.what());
David Reiss01fe1532010-03-09 05:19:25 +0000319 server_->decrementActiveProcessors();
Mark Slee79b16942007-11-26 19:05:29 +0000320 close();
Mark Sleee02385b2007-06-09 01:21:16 +0000321 return;
322 } catch (...) {
David Reiss01e55c12008-07-13 22:18:51 +0000323 GlobalOutput.printf("Server::process() unknown exception");
David Reiss01fe1532010-03-09 05:19:25 +0000324 server_->decrementActiveProcessors();
Mark Sleee02385b2007-06-09 01:21:16 +0000325 close();
326 return;
327 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000328 }
329
Mark Slee402ee282007-08-23 01:43:20 +0000330 // Intentionally fall through here, the call to process has written into
331 // the writeBuffer_
332
Mark Sleee02385b2007-06-09 01:21:16 +0000333 case APP_WAIT_TASK:
334 // We have now finished processing a task and the result has been written
335 // into the outputTransport_, so we grab its contents and place them into
336 // the writeBuffer_ for actual writing by the libevent thread
337
David Reiss01fe1532010-03-09 05:19:25 +0000338 server_->decrementActiveProcessors();
Mark Slee2f6404d2006-10-10 01:37:40 +0000339 // Get the result of the operation
340 outputTransport_->getBuffer(&writeBuffer_, &writeBufferSize_);
341
342 // If the function call generated return data, then move into the send
343 // state and get going
David Reissaf787782008-07-03 20:29:34 +0000344 // 4 bytes were reserved for frame size
David Reiss52cb7a72008-06-30 21:40:35 +0000345 if (writeBufferSize_ > 4) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000346
347 // Move into write state
348 writeBufferPos_ = 0;
349 socketState_ = SOCKET_SEND;
Mark Slee92f00fb2006-10-25 01:28:17 +0000350
David Reissaf787782008-07-03 20:29:34 +0000351 // Put the frame size into the write buffer
352 int32_t frameSize = (int32_t)htonl(writeBufferSize_ - 4);
353 memcpy(writeBuffer_, &frameSize, 4);
Mark Slee2f6404d2006-10-10 01:37:40 +0000354
355 // Socket into write mode
David Reiss52cb7a72008-06-30 21:40:35 +0000356 appState_ = APP_SEND_RESULT;
Mark Slee2f6404d2006-10-10 01:37:40 +0000357 setWrite();
358
359 // Try to work the socket immediately
Mark Sleee02385b2007-06-09 01:21:16 +0000360 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000361
362 return;
363 }
364
David Reissc51986f2009-03-24 20:01:25 +0000365 // In this case, the request was oneway and we should fall through
Mark Slee2f6404d2006-10-10 01:37:40 +0000366 // right back into the read frame header state
Mark Slee92f00fb2006-10-25 01:28:17 +0000367 goto LABEL_APP_INIT;
368
Mark Slee2f6404d2006-10-10 01:37:40 +0000369 case APP_SEND_RESULT:
David Reiss54bec5d2010-10-06 17:10:45 +0000370 // it's now safe to perform buffer size housekeeping.
371 if (writeBufferSize_ > largestWriteBufferSize_) {
372 largestWriteBufferSize_ = writeBufferSize_;
373 }
374 if (server_->getResizeBufferEveryN() > 0
375 && ++callsForResize_ >= server_->getResizeBufferEveryN()) {
376 checkIdleBufferMemLimit(server_->getIdleReadBufferLimit(),
377 server_->getIdleWriteBufferLimit());
378 callsForResize_ = 0;
379 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000380
381 // N.B.: We also intentionally fall through here into the INIT state!
382
Mark Slee92f00fb2006-10-25 01:28:17 +0000383 LABEL_APP_INIT:
Mark Slee2f6404d2006-10-10 01:37:40 +0000384 case APP_INIT:
385
386 // Clear write buffer variables
387 writeBuffer_ = NULL;
388 writeBufferPos_ = 0;
389 writeBufferSize_ = 0;
390
Mark Slee2f6404d2006-10-10 01:37:40 +0000391 // Into read4 state we go
David Reiss89a12942010-10-06 17:10:52 +0000392 socketState_ = SOCKET_RECV_FRAMING;
Mark Slee2f6404d2006-10-10 01:37:40 +0000393 appState_ = APP_READ_FRAME_SIZE;
394
David Reiss89a12942010-10-06 17:10:52 +0000395 readBufferPos_ = 0;
396
Mark Slee2f6404d2006-10-10 01:37:40 +0000397 // Register read event
398 setRead();
David Reiss84e63ab2008-03-07 20:12:28 +0000399
Mark Slee2f6404d2006-10-10 01:37:40 +0000400 // Try to work the socket right away
Mark Sleee02385b2007-06-09 01:21:16 +0000401 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000402
403 return;
404
405 case APP_READ_FRAME_SIZE:
David Reiss89a12942010-10-06 17:10:52 +0000406 // We just read the request length
407 // Double the buffer size until it is big enough
408 if (readWant_ > readBufferSize_) {
409 if (readBufferSize_ == 0) {
410 readBufferSize_ = 1;
411 }
412 uint32_t newSize = readBufferSize_;
413 while (readWant_ > newSize) {
414 newSize *= 2;
415 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000416
David Reiss89a12942010-10-06 17:10:52 +0000417 uint8_t* newBuffer = (uint8_t*)std::realloc(readBuffer_, newSize);
418 if (newBuffer == NULL) {
419 // nothing else to be done...
420 throw std::bad_alloc();
421 }
422 readBuffer_ = newBuffer;
423 readBufferSize_ = newSize;
Mark Slee2f6404d2006-10-10 01:37:40 +0000424 }
425
Mark Slee2f6404d2006-10-10 01:37:40 +0000426 readBufferPos_= 0;
427
428 // Move into read request state
David Reiss89a12942010-10-06 17:10:52 +0000429 socketState_ = SOCKET_RECV;
Mark Slee2f6404d2006-10-10 01:37:40 +0000430 appState_ = APP_READ_REQUEST;
431
432 // Work the socket right away
Mark Sleee02385b2007-06-09 01:21:16 +0000433 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000434
435 return;
436
David Reiss01fe1532010-03-09 05:19:25 +0000437 case APP_CLOSE_CONNECTION:
438 server_->decrementActiveProcessors();
439 close();
440 return;
441
Mark Slee2f6404d2006-10-10 01:37:40 +0000442 default:
David Reiss3bb5e052010-01-25 19:31:31 +0000443 GlobalOutput.printf("Unexpected Application State %d", appState_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000444 assert(0);
445 }
446}
447
448void TConnection::setFlags(short eventFlags) {
449 // Catch the do nothing case
450 if (eventFlags_ == eventFlags) {
451 return;
452 }
453
454 // Delete a previously existing event
455 if (eventFlags_ != 0) {
456 if (event_del(&event_) == -1) {
boz6ded7752007-06-05 22:41:18 +0000457 GlobalOutput("TConnection::setFlags event_del");
Mark Slee2f6404d2006-10-10 01:37:40 +0000458 return;
459 }
460 }
461
462 // Update in memory structure
463 eventFlags_ = eventFlags;
464
Mark Slee402ee282007-08-23 01:43:20 +0000465 // Do not call event_set if there are no flags
466 if (!eventFlags_) {
467 return;
468 }
469
David Reiss01fe1532010-03-09 05:19:25 +0000470 /*
Mark Slee2f6404d2006-10-10 01:37:40 +0000471 * event_set:
472 *
473 * Prepares the event structure &event to be used in future calls to
474 * event_add() and event_del(). The event will be prepared to call the
Mark Sleee02385b2007-06-09 01:21:16 +0000475 * eventHandler using the 'sock' file descriptor to monitor events.
Mark Slee2f6404d2006-10-10 01:37:40 +0000476 *
477 * The events can be either EV_READ, EV_WRITE, or both, indicating
478 * that an application can read or write from the file respectively without
479 * blocking.
480 *
Mark Sleee02385b2007-06-09 01:21:16 +0000481 * The eventHandler will be called with the file descriptor that triggered
Mark Slee2f6404d2006-10-10 01:37:40 +0000482 * the event and the type of event which will be one of: EV_TIMEOUT,
483 * EV_SIGNAL, EV_READ, EV_WRITE.
484 *
485 * The additional flag EV_PERSIST makes an event_add() persistent until
486 * event_del() has been called.
487 *
488 * Once initialized, the &event struct can be used repeatedly with
489 * event_add() and event_del() and does not need to be reinitialized unless
Mark Sleee02385b2007-06-09 01:21:16 +0000490 * the eventHandler and/or the argument to it are to be changed. However,
Mark Slee2f6404d2006-10-10 01:37:40 +0000491 * when an ev structure has been added to libevent using event_add() the
492 * structure must persist until the event occurs (assuming EV_PERSIST
493 * is not set) or is removed using event_del(). You may not reuse the same
494 * ev structure for multiple monitored descriptors; each descriptor needs
495 * its own ev.
496 */
David Reiss105961d2010-10-06 17:10:17 +0000497 event_set(&event_, tSocket_->getSocketFD(), eventFlags_,
498 TConnection::eventHandler, this);
Mark Slee79b16942007-11-26 19:05:29 +0000499 event_base_set(server_->getEventBase(), &event_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000500
501 // Add the event
502 if (event_add(&event_, 0) == -1) {
Mark Slee17496a02007-08-02 06:37:40 +0000503 GlobalOutput("TConnection::setFlags(): could not event_add");
Mark Slee2f6404d2006-10-10 01:37:40 +0000504 }
505}
506
507/**
508 * Closes a connection
509 */
510void TConnection::close() {
511 // Delete the registered libevent
512 if (event_del(&event_) == -1) {
David Reiss105961d2010-10-06 17:10:17 +0000513 GlobalOutput.perror("TConnection::close() event_del", errno);
514 }
515
516 if (serverEventHandler_ != NULL) {
517 serverEventHandler_->deleteContext(connectionContext_, inputProtocol_, outputProtocol_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000518 }
519
520 // Close the socket
David Reiss105961d2010-10-06 17:10:17 +0000521 tSocket_->close();
Mark Slee2f6404d2006-10-10 01:37:40 +0000522
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000523 // close any factory produced transports
524 factoryInputTransport_->close();
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000525 factoryOutputTransport_->close();
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000526
Mark Slee2f6404d2006-10-10 01:37:40 +0000527 // Give this object back to the server that owns it
528 server_->returnConnection(this);
529}
530
David Reiss54bec5d2010-10-06 17:10:45 +0000531void TConnection::checkIdleBufferMemLimit(size_t readLimit,
532 size_t writeLimit) {
533 if (readLimit > 0 && readBufferSize_ > readLimit) {
David Reiss89a12942010-10-06 17:10:52 +0000534 free(readBuffer_);
535 readBuffer_ = NULL;
536 readBufferSize_ = 0;
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000537 }
David Reiss54bec5d2010-10-06 17:10:45 +0000538
539 if (writeLimit > 0 && largestWriteBufferSize_ > writeLimit) {
540 // just start over
David Reiss89a12942010-10-06 17:10:52 +0000541 outputTransport_->resetBuffer(server_->getWriteBufferDefaultSize());
David Reiss54bec5d2010-10-06 17:10:45 +0000542 largestWriteBufferSize_ = 0;
543 }
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000544}
545
David Reiss8ede8182010-09-02 15:26:28 +0000546TNonblockingServer::~TNonblockingServer() {
547 // TODO: We currently leak any active TConnection objects.
548 // Since we're shutting down and destroying the event_base, the TConnection
549 // objects will never receive any additional callbacks. (And even if they
550 // did, it would be bad, since they keep a pointer around to the server,
551 // which is being destroyed.)
552
553 // Clean up unused TConnection objects in connectionStack_
554 while (!connectionStack_.empty()) {
555 TConnection* connection = connectionStack_.top();
556 connectionStack_.pop();
557 delete connection;
558 }
559
560 if (eventBase_) {
561 event_base_free(eventBase_);
562 }
563
564 if (serverSocket_ >= 0) {
565 close(serverSocket_);
566 }
567}
568
Mark Slee2f6404d2006-10-10 01:37:40 +0000569/**
570 * Creates a new connection either by reusing an object off the stack or
571 * by allocating a new one entirely
572 */
David Reiss105961d2010-10-06 17:10:17 +0000573TConnection* TNonblockingServer::createConnection(int socket, short flags,
574 const sockaddr* addr,
575 socklen_t addrLen) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000576 // Check the stack
577 if (connectionStack_.empty()) {
David Reiss105961d2010-10-06 17:10:17 +0000578 return new TConnection(socket, flags, this, addr, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000579 } else {
580 TConnection* result = connectionStack_.top();
581 connectionStack_.pop();
David Reiss105961d2010-10-06 17:10:17 +0000582 result->init(socket, flags, this, addr, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000583 return result;
584 }
585}
586
587/**
588 * Returns a connection to the stack
589 */
590void TNonblockingServer::returnConnection(TConnection* connection) {
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000591 if (connectionStackLimit_ &&
592 (connectionStack_.size() >= connectionStackLimit_)) {
593 delete connection;
594 } else {
David Reiss54bec5d2010-10-06 17:10:45 +0000595 connection->checkIdleBufferMemLimit(idleReadBufferLimit_, idleWriteBufferLimit_);
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000596 connectionStack_.push(connection);
597 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000598}
599
600/**
David Reissa79e4882008-03-05 07:51:47 +0000601 * Server socket had something happen. We accept all waiting client
602 * connections on fd and assign TConnection objects to handle those requests.
Mark Slee2f6404d2006-10-10 01:37:40 +0000603 */
604void TNonblockingServer::handleEvent(int fd, short which) {
Roger Meier3b771a12010-11-17 22:11:26 +0000605 (void) which;
David Reiss3bb5e052010-01-25 19:31:31 +0000606 // Make sure that libevent didn't mess up the socket handles
Mark Slee2f6404d2006-10-10 01:37:40 +0000607 assert(fd == serverSocket_);
Mark Slee79b16942007-11-26 19:05:29 +0000608
Mark Slee2f6404d2006-10-10 01:37:40 +0000609 // Server socket accepted a new connection
610 socklen_t addrLen;
David Reiss105961d2010-10-06 17:10:17 +0000611 sockaddr_storage addrStorage;
612 sockaddr* addrp = (sockaddr*)&addrStorage;
613 addrLen = sizeof(addrStorage);
Mark Slee79b16942007-11-26 19:05:29 +0000614
Mark Slee2f6404d2006-10-10 01:37:40 +0000615 // Going to accept a new client socket
616 int clientSocket;
Mark Slee79b16942007-11-26 19:05:29 +0000617
Mark Slee2f6404d2006-10-10 01:37:40 +0000618 // Accept as many new clients as possible, even though libevent signaled only
619 // one, this helps us to avoid having to go back into the libevent engine so
620 // many times
David Reiss105961d2010-10-06 17:10:17 +0000621 while ((clientSocket = ::accept(fd, addrp, &addrLen)) != -1) {
David Reiss01fe1532010-03-09 05:19:25 +0000622 // If we're overloaded, take action here
623 if (overloadAction_ != T_OVERLOAD_NO_ACTION && serverOverloaded()) {
624 nConnectionsDropped_++;
625 nTotalConnectionsDropped_++;
626 if (overloadAction_ == T_OVERLOAD_CLOSE_ON_ACCEPT) {
627 close(clientSocket);
David Reiss83b8fda2010-03-09 05:19:34 +0000628 return;
David Reiss01fe1532010-03-09 05:19:25 +0000629 } else if (overloadAction_ == T_OVERLOAD_DRAIN_TASK_QUEUE) {
630 if (!drainPendingTask()) {
631 // Nothing left to discard, so we drop connection instead.
632 close(clientSocket);
David Reiss83b8fda2010-03-09 05:19:34 +0000633 return;
David Reiss01fe1532010-03-09 05:19:25 +0000634 }
635 }
636 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000637 // Explicitly set this socket to NONBLOCK mode
638 int flags;
639 if ((flags = fcntl(clientSocket, F_GETFL, 0)) < 0 ||
640 fcntl(clientSocket, F_SETFL, flags | O_NONBLOCK) < 0) {
David Reiss01e55c12008-07-13 22:18:51 +0000641 GlobalOutput.perror("thriftServerEventHandler: set O_NONBLOCK (fcntl) ", errno);
Mark Slee2f6404d2006-10-10 01:37:40 +0000642 close(clientSocket);
643 return;
644 }
645
646 // Create a new TConnection for this client socket.
647 TConnection* clientConnection =
David Reiss105961d2010-10-06 17:10:17 +0000648 createConnection(clientSocket, EV_READ | EV_PERSIST, addrp, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000649
650 // Fail fast if we could not create a TConnection object
651 if (clientConnection == NULL) {
David Reiss01e55c12008-07-13 22:18:51 +0000652 GlobalOutput.printf("thriftServerEventHandler: failed TConnection factory");
Mark Slee2f6404d2006-10-10 01:37:40 +0000653 close(clientSocket);
654 return;
655 }
656
657 // Put this client connection into the proper state
658 clientConnection->transition();
David Reiss3e7fca42009-09-19 01:59:13 +0000659
660 // addrLen is written by the accept() call, so needs to be set before the next call.
David Reiss105961d2010-10-06 17:10:17 +0000661 addrLen = sizeof(addrStorage);
Mark Slee2f6404d2006-10-10 01:37:40 +0000662 }
Mark Slee79b16942007-11-26 19:05:29 +0000663
Mark Slee2f6404d2006-10-10 01:37:40 +0000664 // Done looping accept, now we have to make sure the error is due to
665 // blocking. Any other error is a problem
666 if (errno != EAGAIN && errno != EWOULDBLOCK) {
David Reiss01e55c12008-07-13 22:18:51 +0000667 GlobalOutput.perror("thriftServerEventHandler: accept() ", errno);
Mark Slee2f6404d2006-10-10 01:37:40 +0000668 }
669}
670
671/**
Mark Slee79b16942007-11-26 19:05:29 +0000672 * Creates a socket to listen on and binds it to the local port.
Mark Slee2f6404d2006-10-10 01:37:40 +0000673 */
Mark Slee79b16942007-11-26 19:05:29 +0000674void TNonblockingServer::listenSocket() {
675 int s;
Mark Sleefb4b5142007-11-20 01:27:08 +0000676 struct addrinfo hints, *res, *res0;
677 int error;
Mark Slee79b16942007-11-26 19:05:29 +0000678
Mark Sleefb4b5142007-11-20 01:27:08 +0000679 char port[sizeof("65536") + 1];
680 memset(&hints, 0, sizeof(hints));
681 hints.ai_family = PF_UNSPEC;
682 hints.ai_socktype = SOCK_STREAM;
Mark Slee256bdc42007-11-27 08:42:19 +0000683 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
Mark Sleefb4b5142007-11-20 01:27:08 +0000684 sprintf(port, "%d", port_);
685
686 // Wildcard address
687 error = getaddrinfo(NULL, port, &hints, &res0);
688 if (error) {
David Reiss9b209552008-04-08 06:26:05 +0000689 string errStr = "TNonblockingServer::serve() getaddrinfo " + string(gai_strerror(error));
690 GlobalOutput(errStr.c_str());
Mark Sleefb4b5142007-11-20 01:27:08 +0000691 return;
692 }
693
694 // Pick the ipv6 address first since ipv4 addresses can be mapped
695 // into ipv6 space.
696 for (res = res0; res; res = res->ai_next) {
697 if (res->ai_family == AF_INET6 || res->ai_next == NULL)
698 break;
699 }
700
Mark Slee2f6404d2006-10-10 01:37:40 +0000701 // Create the server socket
Mark Slee79b16942007-11-26 19:05:29 +0000702 s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
703 if (s == -1) {
704 freeaddrinfo(res0);
705 throw TException("TNonblockingServer::serve() socket() -1");
Mark Slee2f6404d2006-10-10 01:37:40 +0000706 }
707
David Reiss13aea462008-06-10 22:56:04 +0000708 #ifdef IPV6_V6ONLY
David Reisseee98be2010-03-09 05:20:10 +0000709 if (res->ai_family == AF_INET6) {
710 int zero = 0;
711 if (-1 == setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero))) {
712 GlobalOutput("TServerSocket::listen() IPV6_V6ONLY");
713 }
David Reiss13aea462008-06-10 22:56:04 +0000714 }
715 #endif // #ifdef IPV6_V6ONLY
716
717
Mark Slee79b16942007-11-26 19:05:29 +0000718 int one = 1;
719
720 // Set reuseaddr to avoid 2MSL delay on server restart
721 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
722
723 if (bind(s, res->ai_addr, res->ai_addrlen) == -1) {
724 close(s);
725 freeaddrinfo(res0);
726 throw TException("TNonblockingServer::serve() bind");
727 }
728
729 // Done with the addr info
730 freeaddrinfo(res0);
731
732 // Set up this file descriptor for listening
733 listenSocket(s);
734}
735
736/**
737 * Takes a socket created by listenSocket() and sets various options on it
738 * to prepare for use in the server.
739 */
740void TNonblockingServer::listenSocket(int s) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000741 // Set socket to nonblocking mode
742 int flags;
Mark Slee79b16942007-11-26 19:05:29 +0000743 if ((flags = fcntl(s, F_GETFL, 0)) < 0 ||
744 fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) {
745 close(s);
746 throw TException("TNonblockingServer::serve() O_NONBLOCK");
Mark Slee2f6404d2006-10-10 01:37:40 +0000747 }
748
749 int one = 1;
750 struct linger ling = {0, 0};
Mark Slee2f6404d2006-10-10 01:37:40 +0000751
752 // Keepalive to ensure full result flushing
Mark Slee79b16942007-11-26 19:05:29 +0000753 setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
Mark Slee2f6404d2006-10-10 01:37:40 +0000754
755 // Turn linger off to avoid hung sockets
Mark Slee79b16942007-11-26 19:05:29 +0000756 setsockopt(s, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling));
Mark Slee2f6404d2006-10-10 01:37:40 +0000757
758 // Set TCP nodelay if available, MAC OS X Hack
759 // See http://lists.danga.com/pipermail/memcached/2005-March/001240.html
760 #ifndef TCP_NOPUSH
Mark Slee79b16942007-11-26 19:05:29 +0000761 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one));
Mark Slee2f6404d2006-10-10 01:37:40 +0000762 #endif
763
David Reiss1c20c872010-03-09 05:20:14 +0000764 #ifdef TCP_LOW_MIN_RTO
765 if (TSocket::getUseLowMinRto()) {
766 setsockopt(s, IPPROTO_TCP, TCP_LOW_MIN_RTO, &one, sizeof(one));
767 }
768 #endif
769
Mark Slee79b16942007-11-26 19:05:29 +0000770 if (listen(s, LISTEN_BACKLOG) == -1) {
771 close(s);
772 throw TException("TNonblockingServer::serve() listen");
Mark Slee2f6404d2006-10-10 01:37:40 +0000773 }
774
Mark Slee79b16942007-11-26 19:05:29 +0000775 // Cool, this socket is good to go, set it as the serverSocket_
776 serverSocket_ = s;
777}
778
David Reiss01fe1532010-03-09 05:19:25 +0000779void TNonblockingServer::createNotificationPipe() {
780 if (pipe(notificationPipeFDs_) != 0) {
781 GlobalOutput.perror("TNonblockingServer::createNotificationPipe ", errno);
782 throw TException("can't create notification pipe");
783 }
David Reiss83b8fda2010-03-09 05:19:34 +0000784 int flags;
785 if ((flags = fcntl(notificationPipeFDs_[0], F_GETFL, 0)) < 0 ||
786 fcntl(notificationPipeFDs_[0], F_SETFL, flags | O_NONBLOCK) < 0) {
787 close(notificationPipeFDs_[0]);
788 close(notificationPipeFDs_[1]);
789 throw TException("TNonblockingServer::createNotificationPipe() O_NONBLOCK");
790 }
David Reiss01fe1532010-03-09 05:19:25 +0000791}
792
Mark Slee79b16942007-11-26 19:05:29 +0000793/**
794 * Register the core libevent events onto the proper base.
795 */
796void TNonblockingServer::registerEvents(event_base* base) {
797 assert(serverSocket_ != -1);
798 assert(!eventBase_);
799 eventBase_ = base;
800
801 // Print some libevent stats
David Reiss01e55c12008-07-13 22:18:51 +0000802 GlobalOutput.printf("libevent %s method %s",
Mark Slee79b16942007-11-26 19:05:29 +0000803 event_get_version(),
804 event_get_method());
Mark Slee2f6404d2006-10-10 01:37:40 +0000805
806 // Register the server event
Mark Slee79b16942007-11-26 19:05:29 +0000807 event_set(&serverEvent_,
Mark Slee2f6404d2006-10-10 01:37:40 +0000808 serverSocket_,
809 EV_READ | EV_PERSIST,
810 TNonblockingServer::eventHandler,
811 this);
Mark Slee79b16942007-11-26 19:05:29 +0000812 event_base_set(eventBase_, &serverEvent_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000813
814 // Add the event and start up the server
Mark Slee79b16942007-11-26 19:05:29 +0000815 if (-1 == event_add(&serverEvent_, 0)) {
816 throw TException("TNonblockingServer::serve(): coult not event_add");
Mark Slee2f6404d2006-10-10 01:37:40 +0000817 }
David Reiss01fe1532010-03-09 05:19:25 +0000818 if (threadPoolProcessing_) {
819 // Create an event to be notified when a task finishes
820 event_set(&notificationEvent_,
821 getNotificationRecvFD(),
822 EV_READ | EV_PERSIST,
823 TConnection::taskHandler,
824 this);
David Reiss1c20c872010-03-09 05:20:14 +0000825
David Reiss01fe1532010-03-09 05:19:25 +0000826 // Attach to the base
827 event_base_set(eventBase_, &notificationEvent_);
828
829 // Add the event and start up the server
830 if (-1 == event_add(&notificationEvent_, 0)) {
831 throw TException("TNonblockingServer::serve(): notification event_add fail");
832 }
833 }
834}
835
David Reiss068f4162010-03-09 05:19:45 +0000836void TNonblockingServer::setThreadManager(boost::shared_ptr<ThreadManager> threadManager) {
837 threadManager_ = threadManager;
838 if (threadManager != NULL) {
839 threadManager->setExpireCallback(std::tr1::bind(&TNonblockingServer::expireClose, this, std::tr1::placeholders::_1));
840 threadPoolProcessing_ = true;
841 } else {
842 threadPoolProcessing_ = false;
843 }
844}
845
David Reiss01fe1532010-03-09 05:19:25 +0000846bool TNonblockingServer::serverOverloaded() {
847 size_t activeConnections = numTConnections_ - connectionStack_.size();
848 if (numActiveProcessors_ > maxActiveProcessors_ ||
849 activeConnections > maxConnections_) {
850 if (!overloaded_) {
851 GlobalOutput.printf("thrift non-blocking server overload condition");
852 overloaded_ = true;
853 }
854 } else {
855 if (overloaded_ &&
856 (numActiveProcessors_ <= overloadHysteresis_ * maxActiveProcessors_) &&
857 (activeConnections <= overloadHysteresis_ * maxConnections_)) {
858 GlobalOutput.printf("thrift non-blocking server overload ended; %u dropped (%llu total)",
859 nConnectionsDropped_, nTotalConnectionsDropped_);
860 nConnectionsDropped_ = 0;
861 overloaded_ = false;
862 }
863 }
864
865 return overloaded_;
866}
867
868bool TNonblockingServer::drainPendingTask() {
869 if (threadManager_) {
870 boost::shared_ptr<Runnable> task = threadManager_->removeNextPending();
871 if (task) {
872 TConnection* connection =
873 static_cast<TConnection::Task*>(task.get())->getTConnection();
874 assert(connection && connection->getServer()
875 && connection->getState() == APP_WAIT_TASK);
876 connection->forceClose();
877 return true;
878 }
879 }
880 return false;
Mark Slee79b16942007-11-26 19:05:29 +0000881}
882
David Reiss068f4162010-03-09 05:19:45 +0000883void TNonblockingServer::expireClose(boost::shared_ptr<Runnable> task) {
884 TConnection* connection =
885 static_cast<TConnection::Task*>(task.get())->getTConnection();
886 assert(connection && connection->getServer()
887 && connection->getState() == APP_WAIT_TASK);
888 connection->forceClose();
889}
890
Mark Slee79b16942007-11-26 19:05:29 +0000891/**
892 * Main workhorse function, starts up the server listening on a port and
893 * loops over the libevent handler.
894 */
895void TNonblockingServer::serve() {
896 // Init socket
897 listenSocket();
898
David Reiss01fe1532010-03-09 05:19:25 +0000899 if (threadPoolProcessing_) {
900 // Init task completion notification pipe
901 createNotificationPipe();
902 }
903
Mark Slee79b16942007-11-26 19:05:29 +0000904 // Initialize libevent core
905 registerEvents(static_cast<event_base*>(event_init()));
Mark Slee2f6404d2006-10-10 01:37:40 +0000906
Mark Sleeb4d3e7b2007-11-28 01:51:43 +0000907 // Run the preServe event
908 if (eventHandler_ != NULL) {
909 eventHandler_->preServe();
dweatherford58985992007-06-19 23:10:19 +0000910 }
911
Mark Sleee02385b2007-06-09 01:21:16 +0000912 // Run libevent engine, never returns, invokes calls to eventHandler
Mark Slee79b16942007-11-26 19:05:29 +0000913 event_base_loop(eventBase_, 0);
Mark Slee2f6404d2006-10-10 01:37:40 +0000914}
915
T Jake Lucianib5e62212009-01-31 22:36:20 +0000916}}} // apache::thrift::server