blob: 8f5a9f38405f946102e42946463c2abf68b89bf2 [file] [log] [blame]
David Reissea2cba82009-03-30 21:35:00 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
Mark Slee9f0c6512007-02-28 23:58:26 +000019
Mark Slee2f6404d2006-10-10 01:37:40 +000020#include "TNonblockingServer.h"
David Reisse11f3072008-10-07 21:39:19 +000021#include <concurrency/Exception.h>
David Reiss1c20c872010-03-09 05:20:14 +000022#include <transport/TSocket.h>
Mark Slee2f6404d2006-10-10 01:37:40 +000023
Mark Sleee02385b2007-06-09 01:21:16 +000024#include <iostream>
Mark Slee2f6404d2006-10-10 01:37:40 +000025#include <sys/socket.h>
26#include <netinet/in.h>
27#include <netinet/tcp.h>
Mark Sleefb4b5142007-11-20 01:27:08 +000028#include <netdb.h>
Mark Slee2f6404d2006-10-10 01:37:40 +000029#include <fcntl.h>
30#include <errno.h>
31#include <assert.h>
32
David Reiss9b903442009-10-21 05:51:28 +000033#ifndef AF_LOCAL
34#define AF_LOCAL AF_UNIX
35#endif
36
T Jake Lucianib5e62212009-01-31 22:36:20 +000037namespace apache { namespace thrift { namespace server {
Mark Slee2f6404d2006-10-10 01:37:40 +000038
T Jake Lucianib5e62212009-01-31 22:36:20 +000039using namespace apache::thrift::protocol;
40using namespace apache::thrift::transport;
41using namespace apache::thrift::concurrency;
Mark Sleee02385b2007-06-09 01:21:16 +000042using namespace std;
David Reiss1c20c872010-03-09 05:20:14 +000043using apache::thrift::transport::TSocket;
44using apache::thrift::transport::TTransportException;
Mark Sleee02385b2007-06-09 01:21:16 +000045
46class TConnection::Task: public Runnable {
47 public:
48 Task(boost::shared_ptr<TProcessor> processor,
49 boost::shared_ptr<TProtocol> input,
50 boost::shared_ptr<TProtocol> output,
David Reiss01fe1532010-03-09 05:19:25 +000051 TConnection* connection) :
Mark Sleee02385b2007-06-09 01:21:16 +000052 processor_(processor),
53 input_(input),
54 output_(output),
David Reiss105961d2010-10-06 17:10:17 +000055 connection_(connection),
56 serverEventHandler_(connection_->getServerEventHandler()),
57 connectionContext_(connection_->getConnectionContext()) {}
Mark Sleee02385b2007-06-09 01:21:16 +000058
59 void run() {
60 try {
David Reiss105961d2010-10-06 17:10:17 +000061 for (;;) {
62 if (serverEventHandler_ != NULL) {
63 serverEventHandler_->processContext(connectionContext_, connection_->getTSocket());
64 }
65 if (!processor_->process(input_, output_, connectionContext_) ||
66 !input_->getTransport()->peek()) {
Mark Sleee02385b2007-06-09 01:21:16 +000067 break;
68 }
69 }
70 } catch (TTransportException& ttx) {
David Reissa79e4882008-03-05 07:51:47 +000071 cerr << "TNonblockingServer client died: " << ttx.what() << endl;
Mark Sleee02385b2007-06-09 01:21:16 +000072 } catch (TException& x) {
David Reissa79e4882008-03-05 07:51:47 +000073 cerr << "TNonblockingServer exception: " << x.what() << endl;
David Reiss28e88ec2010-03-09 05:19:27 +000074 } catch (bad_alloc&) {
75 cerr << "TNonblockingServer caught bad_alloc exception.";
76 exit(-1);
Mark Sleee02385b2007-06-09 01:21:16 +000077 } catch (...) {
David Reissa79e4882008-03-05 07:51:47 +000078 cerr << "TNonblockingServer uncaught exception." << endl;
Mark Sleee02385b2007-06-09 01:21:16 +000079 }
Mark Slee79b16942007-11-26 19:05:29 +000080
David Reiss01fe1532010-03-09 05:19:25 +000081 // Signal completion back to the libevent thread via a pipe
82 if (!connection_->notifyServer()) {
83 throw TException("TNonblockingServer::Task::run: failed write on notify pipe");
Mark Sleee02385b2007-06-09 01:21:16 +000084 }
David Reiss01fe1532010-03-09 05:19:25 +000085 }
86
87 TConnection* getTConnection() {
88 return connection_;
Mark Sleee02385b2007-06-09 01:21:16 +000089 }
90
91 private:
92 boost::shared_ptr<TProcessor> processor_;
93 boost::shared_ptr<TProtocol> input_;
94 boost::shared_ptr<TProtocol> output_;
David Reiss01fe1532010-03-09 05:19:25 +000095 TConnection* connection_;
David Reiss105961d2010-10-06 17:10:17 +000096 boost::shared_ptr<TServerEventHandler> serverEventHandler_;
97 void* connectionContext_;
Mark Sleee02385b2007-06-09 01:21:16 +000098};
Mark Slee5ea15f92007-03-05 22:55:59 +000099
David Reiss105961d2010-10-06 17:10:17 +0000100void TConnection::init(int socket, short eventFlags, TNonblockingServer* s,
101 const sockaddr* addr, socklen_t addrLen) {
102 tSocket_->setSocketFD(socket);
103 tSocket_->setCachedAddress(addr, addrLen);
104
Mark Slee2f6404d2006-10-10 01:37:40 +0000105 server_ = s;
106 appState_ = APP_INIT;
107 eventFlags_ = 0;
108
109 readBufferPos_ = 0;
110 readWant_ = 0;
111
112 writeBuffer_ = NULL;
113 writeBufferSize_ = 0;
114 writeBufferPos_ = 0;
David Reiss54bec5d2010-10-06 17:10:45 +0000115 largestWriteBufferSize_ = 0;
Mark Slee2f6404d2006-10-10 01:37:40 +0000116
David Reiss89a12942010-10-06 17:10:52 +0000117 socketState_ = SOCKET_RECV_FRAMING;
Mark Slee2f6404d2006-10-10 01:37:40 +0000118 appState_ = APP_INIT;
David Reiss54bec5d2010-10-06 17:10:45 +0000119 callsForResize_ = 0;
Mark Slee79b16942007-11-26 19:05:29 +0000120
Mark Slee2f6404d2006-10-10 01:37:40 +0000121 // Set flags, which also registers the event
122 setFlags(eventFlags);
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000123
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000124 // get input/transports
125 factoryInputTransport_ = s->getInputTransportFactory()->getTransport(inputTransport_);
126 factoryOutputTransport_ = s->getOutputTransportFactory()->getTransport(outputTransport_);
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000127
128 // Create protocol
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000129 inputProtocol_ = s->getInputProtocolFactory()->getProtocol(factoryInputTransport_);
130 outputProtocol_ = s->getOutputProtocolFactory()->getProtocol(factoryOutputTransport_);
David Reiss105961d2010-10-06 17:10:17 +0000131
132 // Set up for any server event handler
133 serverEventHandler_ = server_->getEventHandler();
134 if (serverEventHandler_ != NULL) {
135 connectionContext_ = serverEventHandler_->createContext(inputProtocol_, outputProtocol_);
136 } else {
137 connectionContext_ = NULL;
138 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000139}
140
141void TConnection::workSocket() {
David Reiss105961d2010-10-06 17:10:17 +0000142 int got=0, left=0, sent=0;
Mark Sleeaaa23ed2007-01-30 19:52:05 +0000143 uint32_t fetch = 0;
Mark Slee2f6404d2006-10-10 01:37:40 +0000144
145 switch (socketState_) {
David Reiss89a12942010-10-06 17:10:52 +0000146 case SOCKET_RECV_FRAMING:
147 union {
148 uint8_t buf[sizeof(uint32_t)];
149 int32_t size;
150 } framing;
Mark Slee2f6404d2006-10-10 01:37:40 +0000151
David Reiss89a12942010-10-06 17:10:52 +0000152 // if we've already received some bytes we kept them here
153 framing.size = readWant_;
154 // determine size of this frame
155 try {
156 // Read from the socket
157 fetch = tSocket_->read(&framing.buf[readBufferPos_],
158 uint32_t(sizeof(framing.size) - readBufferPos_));
159 if (fetch == 0) {
160 // Whenever we get here it means a remote disconnect
Mark Slee2f6404d2006-10-10 01:37:40 +0000161 close();
162 return;
163 }
David Reiss89a12942010-10-06 17:10:52 +0000164 readBufferPos_ += fetch;
165 } catch (TTransportException& te) {
166 GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
167 close();
168
169 return;
Mark Slee2f6404d2006-10-10 01:37:40 +0000170 }
171
David Reiss89a12942010-10-06 17:10:52 +0000172 if (readBufferPos_ < sizeof(framing.size)) {
173 // more needed before frame size is known -- save what we have so far
174 readWant_ = framing.size;
175 return;
176 }
177
178 readWant_ = ntohl(framing.size);
179 if (static_cast<int>(readWant_) <= 0) {
180 GlobalOutput.printf("TConnection:workSocket() Negative frame size %d, remote side not using TFramedTransport?", static_cast<int>(readWant_));
181 close();
182 return;
183 }
184 // size known; now get the rest of the frame
185 transition();
186 return;
187
188 case SOCKET_RECV:
189 // It is an error to be in this state if we already have all the data
190 assert(readBufferPos_ < readWant_);
191
David Reiss105961d2010-10-06 17:10:17 +0000192 try {
193 // Read from the socket
194 fetch = readWant_ - readBufferPos_;
195 got = tSocket_->read(readBuffer_ + readBufferPos_, fetch);
196 }
197 catch (TTransportException& te) {
198 GlobalOutput.printf("TConnection::workSocket(): %s", te.what());
199 close();
Mark Slee79b16942007-11-26 19:05:29 +0000200
David Reiss105961d2010-10-06 17:10:17 +0000201 return;
202 }
203
Mark Slee2f6404d2006-10-10 01:37:40 +0000204 if (got > 0) {
205 // Move along in the buffer
206 readBufferPos_ += got;
207
208 // Check that we did not overdo it
209 assert(readBufferPos_ <= readWant_);
Mark Slee79b16942007-11-26 19:05:29 +0000210
Mark Slee2f6404d2006-10-10 01:37:40 +0000211 // We are done reading, move onto the next state
212 if (readBufferPos_ == readWant_) {
213 transition();
214 }
215 return;
Mark Slee2f6404d2006-10-10 01:37:40 +0000216 }
217
218 // Whenever we get down here it means a remote disconnect
219 close();
Mark Slee79b16942007-11-26 19:05:29 +0000220
Mark Slee2f6404d2006-10-10 01:37:40 +0000221 return;
222
223 case SOCKET_SEND:
224 // Should never have position past size
225 assert(writeBufferPos_ <= writeBufferSize_);
226
227 // If there is no data to send, then let us move on
228 if (writeBufferPos_ == writeBufferSize_) {
Mark Slee79b16942007-11-26 19:05:29 +0000229 GlobalOutput("WARNING: Send state with no data to send\n");
Mark Slee2f6404d2006-10-10 01:37:40 +0000230 transition();
231 return;
232 }
233
David Reiss105961d2010-10-06 17:10:17 +0000234 try {
235 left = writeBufferSize_ - writeBufferPos_;
236 sent = tSocket_->write_partial(writeBuffer_ + writeBufferPos_, left);
237 }
238 catch (TTransportException& te) {
239 GlobalOutput.printf("TConnection::workSocket(): %s ", te.what());
Mark Slee2f6404d2006-10-10 01:37:40 +0000240 close();
241 return;
242 }
243
244 writeBufferPos_ += sent;
245
246 // Did we overdo it?
247 assert(writeBufferPos_ <= writeBufferSize_);
248
Mark Slee79b16942007-11-26 19:05:29 +0000249 // We are done!
Mark Slee2f6404d2006-10-10 01:37:40 +0000250 if (writeBufferPos_ == writeBufferSize_) {
251 transition();
252 }
253
254 return;
255
256 default:
David Reiss3bb5e052010-01-25 19:31:31 +0000257 GlobalOutput.printf("Unexpected Socket State %d", socketState_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000258 assert(0);
259 }
260}
261
262/**
263 * This is called when the application transitions from one state into
264 * another. This means that it has finished writing the data that it needed
265 * to, or finished receiving the data that it needed to.
266 */
267void TConnection::transition() {
268 // Switch upon the state that we are currently in and move to a new state
269 switch (appState_) {
270
271 case APP_READ_REQUEST:
272 // We are done reading the request, package the read buffer into transport
273 // and get back some data from the dispatch function
274 inputTransport_->resetBuffer(readBuffer_, readBufferPos_);
David Reiss7197efb2010-10-06 17:10:43 +0000275 outputTransport_->resetBuffer();
David Reiss52cb7a72008-06-30 21:40:35 +0000276 // Prepend four bytes of blank space to the buffer so we can
277 // write the frame size there later.
278 outputTransport_->getWritePtr(4);
279 outputTransport_->wroteBytes(4);
Mark Slee79b16942007-11-26 19:05:29 +0000280
David Reiss01fe1532010-03-09 05:19:25 +0000281 server_->incrementActiveProcessors();
282
Mark Sleee02385b2007-06-09 01:21:16 +0000283 if (server_->isThreadPoolProcessing()) {
284 // We are setting up a Task to do this work and we will wait on it
Mark Slee79b16942007-11-26 19:05:29 +0000285
David Reiss01fe1532010-03-09 05:19:25 +0000286 // Create task and dispatch to the thread manager
287 boost::shared_ptr<Runnable> task =
288 boost::shared_ptr<Runnable>(new Task(server_->getProcessor(),
289 inputProtocol_,
290 outputProtocol_,
291 this));
292 // The application is now waiting on the task to finish
293 appState_ = APP_WAIT_TASK;
Mark Slee2f6404d2006-10-10 01:37:40 +0000294
David Reisse11f3072008-10-07 21:39:19 +0000295 try {
296 server_->addTask(task);
297 } catch (IllegalStateException & ise) {
298 // The ThreadManager is not ready to handle any more tasks (it's probably shutting down).
David Reissc53a5942008-10-07 23:55:24 +0000299 GlobalOutput.printf("IllegalStateException: Server::process() %s", ise.what());
David Reisse11f3072008-10-07 21:39:19 +0000300 close();
301 }
Mark Slee402ee282007-08-23 01:43:20 +0000302
David Reiss01fe1532010-03-09 05:19:25 +0000303 // Set this connection idle so that libevent doesn't process more
304 // data on it while we're still waiting for the threadmanager to
305 // finish this task
306 setIdle();
307 return;
Mark Sleee02385b2007-06-09 01:21:16 +0000308 } else {
309 try {
310 // Invoke the processor
David Reiss23248712010-10-06 17:10:08 +0000311 server_->getProcessor()->process(inputProtocol_, outputProtocol_, NULL);
Mark Sleee02385b2007-06-09 01:21:16 +0000312 } catch (TTransportException &ttx) {
David Reiss01e55c12008-07-13 22:18:51 +0000313 GlobalOutput.printf("TTransportException: Server::process() %s", ttx.what());
David Reiss01fe1532010-03-09 05:19:25 +0000314 server_->decrementActiveProcessors();
Mark Sleee02385b2007-06-09 01:21:16 +0000315 close();
316 return;
317 } catch (TException &x) {
David Reiss01e55c12008-07-13 22:18:51 +0000318 GlobalOutput.printf("TException: Server::process() %s", x.what());
David Reiss01fe1532010-03-09 05:19:25 +0000319 server_->decrementActiveProcessors();
Mark Slee79b16942007-11-26 19:05:29 +0000320 close();
Mark Sleee02385b2007-06-09 01:21:16 +0000321 return;
322 } catch (...) {
David Reiss01e55c12008-07-13 22:18:51 +0000323 GlobalOutput.printf("Server::process() unknown exception");
David Reiss01fe1532010-03-09 05:19:25 +0000324 server_->decrementActiveProcessors();
Mark Sleee02385b2007-06-09 01:21:16 +0000325 close();
326 return;
327 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000328 }
329
Mark Slee402ee282007-08-23 01:43:20 +0000330 // Intentionally fall through here, the call to process has written into
331 // the writeBuffer_
332
Mark Sleee02385b2007-06-09 01:21:16 +0000333 case APP_WAIT_TASK:
334 // We have now finished processing a task and the result has been written
335 // into the outputTransport_, so we grab its contents and place them into
336 // the writeBuffer_ for actual writing by the libevent thread
337
David Reiss01fe1532010-03-09 05:19:25 +0000338 server_->decrementActiveProcessors();
Mark Slee2f6404d2006-10-10 01:37:40 +0000339 // Get the result of the operation
340 outputTransport_->getBuffer(&writeBuffer_, &writeBufferSize_);
341
342 // If the function call generated return data, then move into the send
343 // state and get going
David Reissaf787782008-07-03 20:29:34 +0000344 // 4 bytes were reserved for frame size
David Reiss52cb7a72008-06-30 21:40:35 +0000345 if (writeBufferSize_ > 4) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000346
347 // Move into write state
348 writeBufferPos_ = 0;
349 socketState_ = SOCKET_SEND;
Mark Slee92f00fb2006-10-25 01:28:17 +0000350
David Reissaf787782008-07-03 20:29:34 +0000351 // Put the frame size into the write buffer
352 int32_t frameSize = (int32_t)htonl(writeBufferSize_ - 4);
353 memcpy(writeBuffer_, &frameSize, 4);
Mark Slee2f6404d2006-10-10 01:37:40 +0000354
355 // Socket into write mode
David Reiss52cb7a72008-06-30 21:40:35 +0000356 appState_ = APP_SEND_RESULT;
Mark Slee2f6404d2006-10-10 01:37:40 +0000357 setWrite();
358
359 // Try to work the socket immediately
Mark Sleee02385b2007-06-09 01:21:16 +0000360 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000361
362 return;
363 }
364
David Reissc51986f2009-03-24 20:01:25 +0000365 // In this case, the request was oneway and we should fall through
Mark Slee2f6404d2006-10-10 01:37:40 +0000366 // right back into the read frame header state
Mark Slee92f00fb2006-10-25 01:28:17 +0000367 goto LABEL_APP_INIT;
368
Mark Slee2f6404d2006-10-10 01:37:40 +0000369 case APP_SEND_RESULT:
David Reiss54bec5d2010-10-06 17:10:45 +0000370 // it's now safe to perform buffer size housekeeping.
371 if (writeBufferSize_ > largestWriteBufferSize_) {
372 largestWriteBufferSize_ = writeBufferSize_;
373 }
374 if (server_->getResizeBufferEveryN() > 0
375 && ++callsForResize_ >= server_->getResizeBufferEveryN()) {
376 checkIdleBufferMemLimit(server_->getIdleReadBufferLimit(),
377 server_->getIdleWriteBufferLimit());
378 callsForResize_ = 0;
379 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000380
381 // N.B.: We also intentionally fall through here into the INIT state!
382
Mark Slee92f00fb2006-10-25 01:28:17 +0000383 LABEL_APP_INIT:
Mark Slee2f6404d2006-10-10 01:37:40 +0000384 case APP_INIT:
385
386 // Clear write buffer variables
387 writeBuffer_ = NULL;
388 writeBufferPos_ = 0;
389 writeBufferSize_ = 0;
390
Mark Slee2f6404d2006-10-10 01:37:40 +0000391 // Into read4 state we go
David Reiss89a12942010-10-06 17:10:52 +0000392 socketState_ = SOCKET_RECV_FRAMING;
Mark Slee2f6404d2006-10-10 01:37:40 +0000393 appState_ = APP_READ_FRAME_SIZE;
394
David Reiss89a12942010-10-06 17:10:52 +0000395 readBufferPos_ = 0;
396
Mark Slee2f6404d2006-10-10 01:37:40 +0000397 // Register read event
398 setRead();
David Reiss84e63ab2008-03-07 20:12:28 +0000399
Mark Slee2f6404d2006-10-10 01:37:40 +0000400 // Try to work the socket right away
Mark Sleee02385b2007-06-09 01:21:16 +0000401 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000402
403 return;
404
405 case APP_READ_FRAME_SIZE:
David Reiss89a12942010-10-06 17:10:52 +0000406 // We just read the request length
407 // Double the buffer size until it is big enough
408 if (readWant_ > readBufferSize_) {
409 if (readBufferSize_ == 0) {
410 readBufferSize_ = 1;
411 }
412 uint32_t newSize = readBufferSize_;
413 while (readWant_ > newSize) {
414 newSize *= 2;
415 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000416
David Reiss89a12942010-10-06 17:10:52 +0000417 uint8_t* newBuffer = (uint8_t*)std::realloc(readBuffer_, newSize);
418 if (newBuffer == NULL) {
419 // nothing else to be done...
420 throw std::bad_alloc();
421 }
422 readBuffer_ = newBuffer;
423 readBufferSize_ = newSize;
Mark Slee2f6404d2006-10-10 01:37:40 +0000424 }
425
Mark Slee2f6404d2006-10-10 01:37:40 +0000426 readBufferPos_= 0;
427
428 // Move into read request state
David Reiss89a12942010-10-06 17:10:52 +0000429 socketState_ = SOCKET_RECV;
Mark Slee2f6404d2006-10-10 01:37:40 +0000430 appState_ = APP_READ_REQUEST;
431
432 // Work the socket right away
Mark Sleee02385b2007-06-09 01:21:16 +0000433 // workSocket();
Mark Slee2f6404d2006-10-10 01:37:40 +0000434
435 return;
436
David Reiss01fe1532010-03-09 05:19:25 +0000437 case APP_CLOSE_CONNECTION:
438 server_->decrementActiveProcessors();
439 close();
440 return;
441
Mark Slee2f6404d2006-10-10 01:37:40 +0000442 default:
David Reiss3bb5e052010-01-25 19:31:31 +0000443 GlobalOutput.printf("Unexpected Application State %d", appState_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000444 assert(0);
445 }
446}
447
448void TConnection::setFlags(short eventFlags) {
449 // Catch the do nothing case
450 if (eventFlags_ == eventFlags) {
451 return;
452 }
453
454 // Delete a previously existing event
455 if (eventFlags_ != 0) {
456 if (event_del(&event_) == -1) {
boz6ded7752007-06-05 22:41:18 +0000457 GlobalOutput("TConnection::setFlags event_del");
Mark Slee2f6404d2006-10-10 01:37:40 +0000458 return;
459 }
460 }
461
462 // Update in memory structure
463 eventFlags_ = eventFlags;
464
Mark Slee402ee282007-08-23 01:43:20 +0000465 // Do not call event_set if there are no flags
466 if (!eventFlags_) {
467 return;
468 }
469
David Reiss01fe1532010-03-09 05:19:25 +0000470 /*
Mark Slee2f6404d2006-10-10 01:37:40 +0000471 * event_set:
472 *
473 * Prepares the event structure &event to be used in future calls to
474 * event_add() and event_del(). The event will be prepared to call the
Mark Sleee02385b2007-06-09 01:21:16 +0000475 * eventHandler using the 'sock' file descriptor to monitor events.
Mark Slee2f6404d2006-10-10 01:37:40 +0000476 *
477 * The events can be either EV_READ, EV_WRITE, or both, indicating
478 * that an application can read or write from the file respectively without
479 * blocking.
480 *
Mark Sleee02385b2007-06-09 01:21:16 +0000481 * The eventHandler will be called with the file descriptor that triggered
Mark Slee2f6404d2006-10-10 01:37:40 +0000482 * the event and the type of event which will be one of: EV_TIMEOUT,
483 * EV_SIGNAL, EV_READ, EV_WRITE.
484 *
485 * The additional flag EV_PERSIST makes an event_add() persistent until
486 * event_del() has been called.
487 *
488 * Once initialized, the &event struct can be used repeatedly with
489 * event_add() and event_del() and does not need to be reinitialized unless
Mark Sleee02385b2007-06-09 01:21:16 +0000490 * the eventHandler and/or the argument to it are to be changed. However,
Mark Slee2f6404d2006-10-10 01:37:40 +0000491 * when an ev structure has been added to libevent using event_add() the
492 * structure must persist until the event occurs (assuming EV_PERSIST
493 * is not set) or is removed using event_del(). You may not reuse the same
494 * ev structure for multiple monitored descriptors; each descriptor needs
495 * its own ev.
496 */
David Reiss105961d2010-10-06 17:10:17 +0000497 event_set(&event_, tSocket_->getSocketFD(), eventFlags_,
498 TConnection::eventHandler, this);
Mark Slee79b16942007-11-26 19:05:29 +0000499 event_base_set(server_->getEventBase(), &event_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000500
501 // Add the event
502 if (event_add(&event_, 0) == -1) {
Mark Slee17496a02007-08-02 06:37:40 +0000503 GlobalOutput("TConnection::setFlags(): could not event_add");
Mark Slee2f6404d2006-10-10 01:37:40 +0000504 }
505}
506
507/**
508 * Closes a connection
509 */
510void TConnection::close() {
511 // Delete the registered libevent
512 if (event_del(&event_) == -1) {
David Reiss105961d2010-10-06 17:10:17 +0000513 GlobalOutput.perror("TConnection::close() event_del", errno);
514 }
515
516 if (serverEventHandler_ != NULL) {
517 serverEventHandler_->deleteContext(connectionContext_, inputProtocol_, outputProtocol_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000518 }
519
520 // Close the socket
David Reiss105961d2010-10-06 17:10:17 +0000521 tSocket_->close();
Mark Slee2f6404d2006-10-10 01:37:40 +0000522
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000523 // close any factory produced transports
524 factoryInputTransport_->close();
Aditya Agarwal9abb0d62007-01-24 22:53:54 +0000525 factoryOutputTransport_->close();
Aditya Agarwal1ea90522007-01-19 02:02:12 +0000526
Mark Slee2f6404d2006-10-10 01:37:40 +0000527 // Give this object back to the server that owns it
528 server_->returnConnection(this);
529}
530
David Reiss54bec5d2010-10-06 17:10:45 +0000531void TConnection::checkIdleBufferMemLimit(size_t readLimit,
532 size_t writeLimit) {
533 if (readLimit > 0 && readBufferSize_ > readLimit) {
David Reiss89a12942010-10-06 17:10:52 +0000534 free(readBuffer_);
535 readBuffer_ = NULL;
536 readBufferSize_ = 0;
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000537 }
David Reiss54bec5d2010-10-06 17:10:45 +0000538
539 if (writeLimit > 0 && largestWriteBufferSize_ > writeLimit) {
540 // just start over
David Reiss89a12942010-10-06 17:10:52 +0000541 outputTransport_->resetBuffer(server_->getWriteBufferDefaultSize());
David Reiss54bec5d2010-10-06 17:10:45 +0000542 largestWriteBufferSize_ = 0;
543 }
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000544}
545
David Reiss8ede8182010-09-02 15:26:28 +0000546TNonblockingServer::~TNonblockingServer() {
547 // TODO: We currently leak any active TConnection objects.
548 // Since we're shutting down and destroying the event_base, the TConnection
549 // objects will never receive any additional callbacks. (And even if they
550 // did, it would be bad, since they keep a pointer around to the server,
551 // which is being destroyed.)
552
553 // Clean up unused TConnection objects in connectionStack_
554 while (!connectionStack_.empty()) {
555 TConnection* connection = connectionStack_.top();
556 connectionStack_.pop();
557 delete connection;
558 }
559
560 if (eventBase_) {
561 event_base_free(eventBase_);
562 }
563
564 if (serverSocket_ >= 0) {
565 close(serverSocket_);
566 }
567}
568
Mark Slee2f6404d2006-10-10 01:37:40 +0000569/**
570 * Creates a new connection either by reusing an object off the stack or
571 * by allocating a new one entirely
572 */
David Reiss105961d2010-10-06 17:10:17 +0000573TConnection* TNonblockingServer::createConnection(int socket, short flags,
574 const sockaddr* addr,
575 socklen_t addrLen) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000576 // Check the stack
577 if (connectionStack_.empty()) {
David Reiss105961d2010-10-06 17:10:17 +0000578 return new TConnection(socket, flags, this, addr, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000579 } else {
580 TConnection* result = connectionStack_.top();
581 connectionStack_.pop();
David Reiss105961d2010-10-06 17:10:17 +0000582 result->init(socket, flags, this, addr, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000583 return result;
584 }
585}
586
587/**
588 * Returns a connection to the stack
589 */
590void TNonblockingServer::returnConnection(TConnection* connection) {
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000591 if (connectionStackLimit_ &&
592 (connectionStack_.size() >= connectionStackLimit_)) {
593 delete connection;
594 } else {
David Reiss54bec5d2010-10-06 17:10:45 +0000595 connection->checkIdleBufferMemLimit(idleReadBufferLimit_, idleWriteBufferLimit_);
Kevin Clarkcbcd63a2009-03-19 03:50:05 +0000596 connectionStack_.push(connection);
597 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000598}
599
600/**
David Reissa79e4882008-03-05 07:51:47 +0000601 * Server socket had something happen. We accept all waiting client
602 * connections on fd and assign TConnection objects to handle those requests.
Mark Slee2f6404d2006-10-10 01:37:40 +0000603 */
604void TNonblockingServer::handleEvent(int fd, short which) {
David Reiss3bb5e052010-01-25 19:31:31 +0000605 // Make sure that libevent didn't mess up the socket handles
Mark Slee2f6404d2006-10-10 01:37:40 +0000606 assert(fd == serverSocket_);
Mark Slee79b16942007-11-26 19:05:29 +0000607
Mark Slee2f6404d2006-10-10 01:37:40 +0000608 // Server socket accepted a new connection
609 socklen_t addrLen;
David Reiss105961d2010-10-06 17:10:17 +0000610 sockaddr_storage addrStorage;
611 sockaddr* addrp = (sockaddr*)&addrStorage;
612 addrLen = sizeof(addrStorage);
Mark Slee79b16942007-11-26 19:05:29 +0000613
Mark Slee2f6404d2006-10-10 01:37:40 +0000614 // Going to accept a new client socket
615 int clientSocket;
Mark Slee79b16942007-11-26 19:05:29 +0000616
Mark Slee2f6404d2006-10-10 01:37:40 +0000617 // Accept as many new clients as possible, even though libevent signaled only
618 // one, this helps us to avoid having to go back into the libevent engine so
619 // many times
David Reiss105961d2010-10-06 17:10:17 +0000620 while ((clientSocket = ::accept(fd, addrp, &addrLen)) != -1) {
David Reiss01fe1532010-03-09 05:19:25 +0000621 // If we're overloaded, take action here
622 if (overloadAction_ != T_OVERLOAD_NO_ACTION && serverOverloaded()) {
623 nConnectionsDropped_++;
624 nTotalConnectionsDropped_++;
625 if (overloadAction_ == T_OVERLOAD_CLOSE_ON_ACCEPT) {
626 close(clientSocket);
David Reiss83b8fda2010-03-09 05:19:34 +0000627 return;
David Reiss01fe1532010-03-09 05:19:25 +0000628 } else if (overloadAction_ == T_OVERLOAD_DRAIN_TASK_QUEUE) {
629 if (!drainPendingTask()) {
630 // Nothing left to discard, so we drop connection instead.
631 close(clientSocket);
David Reiss83b8fda2010-03-09 05:19:34 +0000632 return;
David Reiss01fe1532010-03-09 05:19:25 +0000633 }
634 }
635 }
Mark Slee2f6404d2006-10-10 01:37:40 +0000636 // Explicitly set this socket to NONBLOCK mode
637 int flags;
638 if ((flags = fcntl(clientSocket, F_GETFL, 0)) < 0 ||
639 fcntl(clientSocket, F_SETFL, flags | O_NONBLOCK) < 0) {
David Reiss01e55c12008-07-13 22:18:51 +0000640 GlobalOutput.perror("thriftServerEventHandler: set O_NONBLOCK (fcntl) ", errno);
Mark Slee2f6404d2006-10-10 01:37:40 +0000641 close(clientSocket);
642 return;
643 }
644
645 // Create a new TConnection for this client socket.
646 TConnection* clientConnection =
David Reiss105961d2010-10-06 17:10:17 +0000647 createConnection(clientSocket, EV_READ | EV_PERSIST, addrp, addrLen);
Mark Slee2f6404d2006-10-10 01:37:40 +0000648
649 // Fail fast if we could not create a TConnection object
650 if (clientConnection == NULL) {
David Reiss01e55c12008-07-13 22:18:51 +0000651 GlobalOutput.printf("thriftServerEventHandler: failed TConnection factory");
Mark Slee2f6404d2006-10-10 01:37:40 +0000652 close(clientSocket);
653 return;
654 }
655
656 // Put this client connection into the proper state
657 clientConnection->transition();
David Reiss3e7fca42009-09-19 01:59:13 +0000658
659 // addrLen is written by the accept() call, so needs to be set before the next call.
David Reiss105961d2010-10-06 17:10:17 +0000660 addrLen = sizeof(addrStorage);
Mark Slee2f6404d2006-10-10 01:37:40 +0000661 }
Mark Slee79b16942007-11-26 19:05:29 +0000662
Mark Slee2f6404d2006-10-10 01:37:40 +0000663 // Done looping accept, now we have to make sure the error is due to
664 // blocking. Any other error is a problem
665 if (errno != EAGAIN && errno != EWOULDBLOCK) {
David Reiss01e55c12008-07-13 22:18:51 +0000666 GlobalOutput.perror("thriftServerEventHandler: accept() ", errno);
Mark Slee2f6404d2006-10-10 01:37:40 +0000667 }
668}
669
670/**
Mark Slee79b16942007-11-26 19:05:29 +0000671 * Creates a socket to listen on and binds it to the local port.
Mark Slee2f6404d2006-10-10 01:37:40 +0000672 */
Mark Slee79b16942007-11-26 19:05:29 +0000673void TNonblockingServer::listenSocket() {
674 int s;
Mark Sleefb4b5142007-11-20 01:27:08 +0000675 struct addrinfo hints, *res, *res0;
676 int error;
Mark Slee79b16942007-11-26 19:05:29 +0000677
Mark Sleefb4b5142007-11-20 01:27:08 +0000678 char port[sizeof("65536") + 1];
679 memset(&hints, 0, sizeof(hints));
680 hints.ai_family = PF_UNSPEC;
681 hints.ai_socktype = SOCK_STREAM;
Mark Slee256bdc42007-11-27 08:42:19 +0000682 hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
Mark Sleefb4b5142007-11-20 01:27:08 +0000683 sprintf(port, "%d", port_);
684
685 // Wildcard address
686 error = getaddrinfo(NULL, port, &hints, &res0);
687 if (error) {
David Reiss9b209552008-04-08 06:26:05 +0000688 string errStr = "TNonblockingServer::serve() getaddrinfo " + string(gai_strerror(error));
689 GlobalOutput(errStr.c_str());
Mark Sleefb4b5142007-11-20 01:27:08 +0000690 return;
691 }
692
693 // Pick the ipv6 address first since ipv4 addresses can be mapped
694 // into ipv6 space.
695 for (res = res0; res; res = res->ai_next) {
696 if (res->ai_family == AF_INET6 || res->ai_next == NULL)
697 break;
698 }
699
Mark Slee2f6404d2006-10-10 01:37:40 +0000700 // Create the server socket
Mark Slee79b16942007-11-26 19:05:29 +0000701 s = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
702 if (s == -1) {
703 freeaddrinfo(res0);
704 throw TException("TNonblockingServer::serve() socket() -1");
Mark Slee2f6404d2006-10-10 01:37:40 +0000705 }
706
David Reiss13aea462008-06-10 22:56:04 +0000707 #ifdef IPV6_V6ONLY
David Reisseee98be2010-03-09 05:20:10 +0000708 if (res->ai_family == AF_INET6) {
709 int zero = 0;
710 if (-1 == setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &zero, sizeof(zero))) {
711 GlobalOutput("TServerSocket::listen() IPV6_V6ONLY");
712 }
David Reiss13aea462008-06-10 22:56:04 +0000713 }
714 #endif // #ifdef IPV6_V6ONLY
715
716
Mark Slee79b16942007-11-26 19:05:29 +0000717 int one = 1;
718
719 // Set reuseaddr to avoid 2MSL delay on server restart
720 setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
721
722 if (bind(s, res->ai_addr, res->ai_addrlen) == -1) {
723 close(s);
724 freeaddrinfo(res0);
725 throw TException("TNonblockingServer::serve() bind");
726 }
727
728 // Done with the addr info
729 freeaddrinfo(res0);
730
731 // Set up this file descriptor for listening
732 listenSocket(s);
733}
734
735/**
736 * Takes a socket created by listenSocket() and sets various options on it
737 * to prepare for use in the server.
738 */
739void TNonblockingServer::listenSocket(int s) {
Mark Slee2f6404d2006-10-10 01:37:40 +0000740 // Set socket to nonblocking mode
741 int flags;
Mark Slee79b16942007-11-26 19:05:29 +0000742 if ((flags = fcntl(s, F_GETFL, 0)) < 0 ||
743 fcntl(s, F_SETFL, flags | O_NONBLOCK) < 0) {
744 close(s);
745 throw TException("TNonblockingServer::serve() O_NONBLOCK");
Mark Slee2f6404d2006-10-10 01:37:40 +0000746 }
747
748 int one = 1;
749 struct linger ling = {0, 0};
Mark Slee2f6404d2006-10-10 01:37:40 +0000750
751 // Keepalive to ensure full result flushing
Mark Slee79b16942007-11-26 19:05:29 +0000752 setsockopt(s, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one));
Mark Slee2f6404d2006-10-10 01:37:40 +0000753
754 // Turn linger off to avoid hung sockets
Mark Slee79b16942007-11-26 19:05:29 +0000755 setsockopt(s, SOL_SOCKET, SO_LINGER, &ling, sizeof(ling));
Mark Slee2f6404d2006-10-10 01:37:40 +0000756
757 // Set TCP nodelay if available, MAC OS X Hack
758 // See http://lists.danga.com/pipermail/memcached/2005-March/001240.html
759 #ifndef TCP_NOPUSH
Mark Slee79b16942007-11-26 19:05:29 +0000760 setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &one, sizeof(one));
Mark Slee2f6404d2006-10-10 01:37:40 +0000761 #endif
762
David Reiss1c20c872010-03-09 05:20:14 +0000763 #ifdef TCP_LOW_MIN_RTO
764 if (TSocket::getUseLowMinRto()) {
765 setsockopt(s, IPPROTO_TCP, TCP_LOW_MIN_RTO, &one, sizeof(one));
766 }
767 #endif
768
Mark Slee79b16942007-11-26 19:05:29 +0000769 if (listen(s, LISTEN_BACKLOG) == -1) {
770 close(s);
771 throw TException("TNonblockingServer::serve() listen");
Mark Slee2f6404d2006-10-10 01:37:40 +0000772 }
773
Mark Slee79b16942007-11-26 19:05:29 +0000774 // Cool, this socket is good to go, set it as the serverSocket_
775 serverSocket_ = s;
776}
777
David Reiss01fe1532010-03-09 05:19:25 +0000778void TNonblockingServer::createNotificationPipe() {
779 if (pipe(notificationPipeFDs_) != 0) {
780 GlobalOutput.perror("TNonblockingServer::createNotificationPipe ", errno);
781 throw TException("can't create notification pipe");
782 }
David Reiss83b8fda2010-03-09 05:19:34 +0000783 int flags;
784 if ((flags = fcntl(notificationPipeFDs_[0], F_GETFL, 0)) < 0 ||
785 fcntl(notificationPipeFDs_[0], F_SETFL, flags | O_NONBLOCK) < 0) {
786 close(notificationPipeFDs_[0]);
787 close(notificationPipeFDs_[1]);
788 throw TException("TNonblockingServer::createNotificationPipe() O_NONBLOCK");
789 }
David Reiss01fe1532010-03-09 05:19:25 +0000790}
791
Mark Slee79b16942007-11-26 19:05:29 +0000792/**
793 * Register the core libevent events onto the proper base.
794 */
795void TNonblockingServer::registerEvents(event_base* base) {
796 assert(serverSocket_ != -1);
797 assert(!eventBase_);
798 eventBase_ = base;
799
800 // Print some libevent stats
David Reiss01e55c12008-07-13 22:18:51 +0000801 GlobalOutput.printf("libevent %s method %s",
Mark Slee79b16942007-11-26 19:05:29 +0000802 event_get_version(),
803 event_get_method());
Mark Slee2f6404d2006-10-10 01:37:40 +0000804
805 // Register the server event
Mark Slee79b16942007-11-26 19:05:29 +0000806 event_set(&serverEvent_,
Mark Slee2f6404d2006-10-10 01:37:40 +0000807 serverSocket_,
808 EV_READ | EV_PERSIST,
809 TNonblockingServer::eventHandler,
810 this);
Mark Slee79b16942007-11-26 19:05:29 +0000811 event_base_set(eventBase_, &serverEvent_);
Mark Slee2f6404d2006-10-10 01:37:40 +0000812
813 // Add the event and start up the server
Mark Slee79b16942007-11-26 19:05:29 +0000814 if (-1 == event_add(&serverEvent_, 0)) {
815 throw TException("TNonblockingServer::serve(): coult not event_add");
Mark Slee2f6404d2006-10-10 01:37:40 +0000816 }
David Reiss01fe1532010-03-09 05:19:25 +0000817 if (threadPoolProcessing_) {
818 // Create an event to be notified when a task finishes
819 event_set(&notificationEvent_,
820 getNotificationRecvFD(),
821 EV_READ | EV_PERSIST,
822 TConnection::taskHandler,
823 this);
David Reiss1c20c872010-03-09 05:20:14 +0000824
David Reiss01fe1532010-03-09 05:19:25 +0000825 // Attach to the base
826 event_base_set(eventBase_, &notificationEvent_);
827
828 // Add the event and start up the server
829 if (-1 == event_add(&notificationEvent_, 0)) {
830 throw TException("TNonblockingServer::serve(): notification event_add fail");
831 }
832 }
833}
834
David Reiss068f4162010-03-09 05:19:45 +0000835void TNonblockingServer::setThreadManager(boost::shared_ptr<ThreadManager> threadManager) {
836 threadManager_ = threadManager;
837 if (threadManager != NULL) {
838 threadManager->setExpireCallback(std::tr1::bind(&TNonblockingServer::expireClose, this, std::tr1::placeholders::_1));
839 threadPoolProcessing_ = true;
840 } else {
841 threadPoolProcessing_ = false;
842 }
843}
844
David Reiss01fe1532010-03-09 05:19:25 +0000845bool TNonblockingServer::serverOverloaded() {
846 size_t activeConnections = numTConnections_ - connectionStack_.size();
847 if (numActiveProcessors_ > maxActiveProcessors_ ||
848 activeConnections > maxConnections_) {
849 if (!overloaded_) {
850 GlobalOutput.printf("thrift non-blocking server overload condition");
851 overloaded_ = true;
852 }
853 } else {
854 if (overloaded_ &&
855 (numActiveProcessors_ <= overloadHysteresis_ * maxActiveProcessors_) &&
856 (activeConnections <= overloadHysteresis_ * maxConnections_)) {
857 GlobalOutput.printf("thrift non-blocking server overload ended; %u dropped (%llu total)",
858 nConnectionsDropped_, nTotalConnectionsDropped_);
859 nConnectionsDropped_ = 0;
860 overloaded_ = false;
861 }
862 }
863
864 return overloaded_;
865}
866
867bool TNonblockingServer::drainPendingTask() {
868 if (threadManager_) {
869 boost::shared_ptr<Runnable> task = threadManager_->removeNextPending();
870 if (task) {
871 TConnection* connection =
872 static_cast<TConnection::Task*>(task.get())->getTConnection();
873 assert(connection && connection->getServer()
874 && connection->getState() == APP_WAIT_TASK);
875 connection->forceClose();
876 return true;
877 }
878 }
879 return false;
Mark Slee79b16942007-11-26 19:05:29 +0000880}
881
David Reiss068f4162010-03-09 05:19:45 +0000882void TNonblockingServer::expireClose(boost::shared_ptr<Runnable> task) {
883 TConnection* connection =
884 static_cast<TConnection::Task*>(task.get())->getTConnection();
885 assert(connection && connection->getServer()
886 && connection->getState() == APP_WAIT_TASK);
887 connection->forceClose();
888}
889
Mark Slee79b16942007-11-26 19:05:29 +0000890/**
891 * Main workhorse function, starts up the server listening on a port and
892 * loops over the libevent handler.
893 */
894void TNonblockingServer::serve() {
895 // Init socket
896 listenSocket();
897
David Reiss01fe1532010-03-09 05:19:25 +0000898 if (threadPoolProcessing_) {
899 // Init task completion notification pipe
900 createNotificationPipe();
901 }
902
Mark Slee79b16942007-11-26 19:05:29 +0000903 // Initialize libevent core
904 registerEvents(static_cast<event_base*>(event_init()));
Mark Slee2f6404d2006-10-10 01:37:40 +0000905
Mark Sleeb4d3e7b2007-11-28 01:51:43 +0000906 // Run the preServe event
907 if (eventHandler_ != NULL) {
908 eventHandler_->preServe();
dweatherford58985992007-06-19 23:10:19 +0000909 }
910
Mark Sleee02385b2007-06-09 01:21:16 +0000911 // Run libevent engine, never returns, invokes calls to eventHandler
Mark Slee79b16942007-11-26 19:05:29 +0000912 event_base_loop(eventBase_, 0);
Mark Slee2f6404d2006-10-10 01:37:40 +0000913}
914
T Jake Lucianib5e62212009-01-31 22:36:20 +0000915}}} // apache::thrift::server