|  | /* | 
|  | * Licensed to the Apache Software Foundation (ASF) under one | 
|  | * or more contributor license agreements. See the NOTICE file | 
|  | * distributed with this work for additional information | 
|  | * regarding copyright ownership. The ASF licenses this file | 
|  | * to you under the Apache License, Version 2.0 (the | 
|  | * "License"); you may not use this file except in compliance | 
|  | * with the License. You may obtain a copy of the License at | 
|  | * | 
|  | *   http://www.apache.org/licenses/LICENSE-2.0 | 
|  | * | 
|  | * Unless required by applicable law or agreed to in writing, | 
|  | * software distributed under the License is distributed on an | 
|  | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | 
|  | * KIND, either express or implied. See the License for the | 
|  | * specific language governing permissions and limitations | 
|  | * under the License. | 
|  | */ | 
|  |  | 
|  | #include <sys/time.h> | 
|  |  | 
|  | #include "FacebookBase.h" | 
|  | #include "ServiceTracker.h" | 
|  | #include <thrift/concurrency/ThreadManager.h> | 
|  |  | 
|  | using namespace std; | 
|  | using namespace facebook::fb303; | 
|  | using namespace apache::thrift::concurrency; | 
|  |  | 
|  |  | 
|  | uint64_t ServiceTracker::CHECKPOINT_MINIMUM_INTERVAL_SECONDS = 60; | 
|  | int ServiceTracker::LOG_LEVEL = 5; | 
|  |  | 
|  |  | 
|  | ServiceTracker::ServiceTracker(facebook::fb303::FacebookBase *handler, | 
|  | void (*logMethod)(int, const string &), | 
|  | bool featureCheckpoint, | 
|  | bool featureStatusCheck, | 
|  | bool featureThreadCheck, | 
|  | Stopwatch::Unit stopwatchUnit) | 
|  | : handler_(handler), logMethod_(logMethod), | 
|  | featureCheckpoint_(featureCheckpoint), | 
|  | featureStatusCheck_(featureStatusCheck), | 
|  | featureThreadCheck_(featureThreadCheck), | 
|  | stopwatchUnit_(stopwatchUnit), | 
|  | checkpointServices_(0) | 
|  | { | 
|  | if (featureCheckpoint_) { | 
|  | time_t now = time(NULL); | 
|  | checkpointTime_ = now; | 
|  | } else { | 
|  | checkpointTime_ = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Registers the beginning of a "service method": basically, any of | 
|  | * the implementations of Thrift remote procedure calls that a | 
|  | * FacebookBase handler is handling.  Controls concurrent | 
|  | * services and reports statistics (via log and via fb303 counters). | 
|  | * Throws an exception if the server is not ready to handle service | 
|  | * methods yet. | 
|  | * | 
|  | * note: The relationship between startService() and finishService() | 
|  | * is currently defined so that a call to finishService() should only | 
|  | * be matched to this call to startService() if this method returns | 
|  | * without exception.  It wouldn't be a problem to implement things | 
|  | * the other way, so that *every* start needed a finish, but this | 
|  | * convention was chosen to match the way an object's constructor and | 
|  | * destructor work together, i.e. to work well with ServiceMethod | 
|  | * objects. | 
|  | * | 
|  | * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod | 
|  | *                                           object instantiated at the start | 
|  | *                                           of the service method. | 
|  | */ | 
|  | void | 
|  | ServiceTracker::startService(const ServiceMethod &serviceMethod) | 
|  | { | 
|  | // note: serviceMethod.timer_ automatically starts at construction. | 
|  |  | 
|  | // log service start | 
|  | logMethod_(5, serviceMethod.signature_); | 
|  |  | 
|  | // check handler ready | 
|  | if (featureStatusCheck_ && !serviceMethod.featureLogOnly_) { | 
|  | // note: Throwing exceptions before counting statistics.  See note | 
|  | // in method header. | 
|  | // note: A STOPPING server is not accepting new connections, but it | 
|  | // is still handling any already-connected threads -- so from the | 
|  | // service method's point of view, a status of STOPPING is a green | 
|  | // light. | 
|  | facebook::fb303::fb_status status = handler_->getStatus(); | 
|  | if (status != facebook::fb303::ALIVE | 
|  | && status != facebook::fb303::STOPPING) { | 
|  | if (status == facebook::fb303::STARTING) { | 
|  | throw ServiceException("Server starting up; please try again later"); | 
|  | } else { | 
|  | throw ServiceException("Server not alive; please try again later"); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // check server threads | 
|  | if (featureThreadCheck_ && !serviceMethod.featureLogOnly_) { | 
|  | // note: Might want to put these messages in reportCheckpoint() if | 
|  | // log is getting spammed. | 
|  | if (threadManager_ != NULL) { | 
|  | size_t idle_count = threadManager_->idleWorkerCount(); | 
|  | if (idle_count == 0) { | 
|  | stringstream message; | 
|  | message << "service " << serviceMethod.signature_ | 
|  | << ": all threads (" << threadManager_->workerCount() | 
|  | << ") in use"; | 
|  | logMethod_(3, message.str()); | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Logs a significant step in the middle of a "service method"; see | 
|  | * startService. | 
|  | * | 
|  | * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod | 
|  | *                                           object instantiated at the start | 
|  | *                                           of the service method. | 
|  | * @return int64_t Elapsed units (see stopwatchUnit_) since ServiceMethod | 
|  | *                 instantiation. | 
|  | */ | 
|  | int64_t | 
|  | ServiceTracker::stepService(const ServiceMethod &serviceMethod, | 
|  | const string &stepName) | 
|  | { | 
|  | stringstream message; | 
|  | string elapsed_label; | 
|  | int64_t elapsed = serviceMethod.timer_.elapsedUnits(stopwatchUnit_, | 
|  | &elapsed_label); | 
|  | message << serviceMethod.signature_ | 
|  | << ' ' << stepName | 
|  | << " [" << elapsed_label << ']'; | 
|  | logMethod_(5, message.str()); | 
|  | return elapsed; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Registers the end of a "service method"; see startService(). | 
|  | * | 
|  | * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod | 
|  | *                                           object instantiated at the start | 
|  | *                                           of the service method. | 
|  | */ | 
|  | void | 
|  | ServiceTracker::finishService(const ServiceMethod &serviceMethod) | 
|  | { | 
|  | // log end of service | 
|  | stringstream message; | 
|  | string duration_label; | 
|  | int64_t duration = serviceMethod.timer_.elapsedUnits(stopwatchUnit_, | 
|  | &duration_label); | 
|  | message << serviceMethod.signature_ | 
|  | << " finish [" << duration_label << ']'; | 
|  | logMethod_(5, message.str()); | 
|  |  | 
|  | // count, record, and maybe report service statistics | 
|  | if (!serviceMethod.featureLogOnly_) { | 
|  |  | 
|  | if (!featureCheckpoint_) { | 
|  |  | 
|  | // lifetime counters | 
|  | // (note: No need to lock statisticsMutex_ if not doing checkpoint; | 
|  | // FacebookService::incrementCounter() is already thread-safe.) | 
|  | handler_->incrementCounter("lifetime_services"); | 
|  |  | 
|  | } else { | 
|  |  | 
|  | statisticsMutex_.lock(); | 
|  | // note: No exceptions expected from this code block.  Wrap in a try | 
|  | // just to be safe. | 
|  | try { | 
|  |  | 
|  | // lifetime counters | 
|  | // note: Good to synchronize this with the increment of | 
|  | // checkpoint services, even though incrementCounter() is | 
|  | // already thread-safe, for the sake of checkpoint reporting | 
|  | // consistency (i.e.  since the last checkpoint, | 
|  | // lifetime_services has incremented by checkpointServices_). | 
|  | handler_->incrementCounter("lifetime_services"); | 
|  |  | 
|  | // checkpoint counters | 
|  | checkpointServices_++; | 
|  | checkpointDuration_ += duration; | 
|  |  | 
|  | // per-service timing | 
|  | // note kjv: According to my tests it is very slightly faster to | 
|  | // call insert() once (and detect not-found) than calling find() | 
|  | // and then maybe insert (if not-found).  However, the difference | 
|  | // is tiny for small maps like this one, and the code for the | 
|  | // faster solution is slightly less readable.  Also, I wonder if | 
|  | // the instantiation of the (often unused) pair to insert makes | 
|  | // the first algorithm slower after all. | 
|  | map<string, pair<uint64_t, uint64_t> >::iterator iter; | 
|  | iter = checkpointServiceDuration_.find(serviceMethod.name_); | 
|  | if (iter != checkpointServiceDuration_.end()) { | 
|  | iter->second.first++; | 
|  | iter->second.second += duration; | 
|  | } else { | 
|  | checkpointServiceDuration_.insert(make_pair(serviceMethod.name_, | 
|  | make_pair(1, duration))); | 
|  | } | 
|  |  | 
|  | // maybe report checkpoint | 
|  | // note: ...if it's been long enough since the last report. | 
|  | time_t now = time(NULL); | 
|  | uint64_t check_interval = now - checkpointTime_; | 
|  | if (check_interval >= CHECKPOINT_MINIMUM_INTERVAL_SECONDS) { | 
|  | reportCheckpoint(); | 
|  | } | 
|  |  | 
|  | } catch (...) { | 
|  | statisticsMutex_.unlock(); | 
|  | throw; | 
|  | } | 
|  | statisticsMutex_.unlock(); | 
|  |  | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Logs some statistics gathered since the last call to this method. | 
|  | * | 
|  | * note: Thread race conditions on this method could cause | 
|  | * misreporting and/or undefined behavior; the caller must protect | 
|  | * uses of the object variables (and calls to this method) with a | 
|  | * mutex. | 
|  | * | 
|  | */ | 
|  | void | 
|  | ServiceTracker::reportCheckpoint() | 
|  | { | 
|  | time_t now = time(NULL); | 
|  |  | 
|  | uint64_t check_count = checkpointServices_; | 
|  | uint64_t check_interval = now - checkpointTime_; | 
|  | uint64_t check_duration = checkpointDuration_; | 
|  |  | 
|  | // export counters for timing of service methods (by service name) | 
|  | handler_->setCounter("checkpoint_time", check_interval); | 
|  | map<string, pair<uint64_t, uint64_t> >::iterator iter; | 
|  | uint64_t count; | 
|  | for (iter = checkpointServiceDuration_.begin(); | 
|  | iter != checkpointServiceDuration_.end(); | 
|  | iter++) { | 
|  | count = iter->second.first; | 
|  | handler_->setCounter(string("checkpoint_count_") + iter->first, count); | 
|  | if (count == 0) { | 
|  | handler_->setCounter(string("checkpoint_speed_") + iter->first, | 
|  | 0); | 
|  | } else { | 
|  | handler_->setCounter(string("checkpoint_speed_") + iter->first, | 
|  | iter->second.second / count); | 
|  | } | 
|  | } | 
|  |  | 
|  | // reset checkpoint variables | 
|  | // note: Clearing the map while other threads are using it might | 
|  | // cause undefined behavior. | 
|  | checkpointServiceDuration_.clear(); | 
|  | checkpointTime_ = now; | 
|  | checkpointServices_ = 0; | 
|  | checkpointDuration_ = 0; | 
|  |  | 
|  | // get lifetime variables | 
|  | uint64_t life_count = handler_->getCounter("lifetime_services"); | 
|  | uint64_t life_interval = now - handler_->aliveSince(); | 
|  |  | 
|  | // log checkpoint | 
|  | stringstream message; | 
|  | message << "checkpoint_time:" << check_interval | 
|  | << " checkpoint_services:" << check_count | 
|  | << " checkpoint_speed_sum:" << check_duration | 
|  | << " lifetime_time:" << life_interval | 
|  | << " lifetime_services:" << life_count; | 
|  | if (featureThreadCheck_ && threadManager_ != NULL) { | 
|  | size_t worker_count = threadManager_->workerCount(); | 
|  | size_t idle_count = threadManager_->idleWorkerCount(); | 
|  | message << " total_workers:" << worker_count | 
|  | << " active_workers:" << (worker_count - idle_count); | 
|  | } | 
|  | logMethod_(4, message.str()); | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Remembers the thread manager used in the server, for monitoring thread | 
|  | * activity. | 
|  | * | 
|  | * @param shared_ptr<ThreadManager> threadManager The server's thread manager. | 
|  | */ | 
|  | void | 
|  | ServiceTracker::setThreadManager(boost::shared_ptr<ThreadManager> | 
|  | threadManager) | 
|  | { | 
|  | threadManager_ = threadManager; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Logs messages to stdout; the passed message will be logged if the | 
|  | * passed level is less than or equal to LOG_LEVEL. | 
|  | * | 
|  | * This is the default logging method used by the ServiceTracker.  An | 
|  | * alternate logging method (that accepts the same parameters) may be | 
|  | * specified to the constructor. | 
|  | * | 
|  | * @param int level A level associated with the message: higher levels | 
|  | *                  are used to indicate higher levels of detail. | 
|  | * @param string message The message to log. | 
|  | */ | 
|  | void | 
|  | ServiceTracker::defaultLogMethod(int level, const string &message) | 
|  | { | 
|  | if (level <= LOG_LEVEL) { | 
|  | string level_string; | 
|  | time_t now = time(NULL); | 
|  | char now_pretty[26]; | 
|  | ctime_r(&now, now_pretty); | 
|  | now_pretty[24] = '\0'; | 
|  | switch (level) { | 
|  | case 1: | 
|  | level_string = "CRITICAL"; | 
|  | break; | 
|  | case 2: | 
|  | level_string = "ERROR"; | 
|  | break; | 
|  | case 3: | 
|  | level_string = "WARNING"; | 
|  | break; | 
|  | case 5: | 
|  | level_string = "DEBUG"; | 
|  | break; | 
|  | case 4: | 
|  | default: | 
|  | level_string = "INFO"; | 
|  | break; | 
|  | } | 
|  | cout << '[' << level_string << "] [" << now_pretty << "] " | 
|  | << message << endl; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /** | 
|  | * Creates a Stopwatch, which can report the time elapsed since its | 
|  | * creation. | 
|  | * | 
|  | */ | 
|  | Stopwatch::Stopwatch() | 
|  | { | 
|  | gettimeofday(&startTime_, NULL); | 
|  | } | 
|  |  | 
|  | void | 
|  | Stopwatch::reset() | 
|  | { | 
|  | gettimeofday(&startTime_, NULL); | 
|  | } | 
|  |  | 
|  | uint64_t | 
|  | Stopwatch::elapsedUnits(Stopwatch::Unit unit, string *label) const | 
|  | { | 
|  | timeval now_time; | 
|  | gettimeofday(&now_time, NULL); | 
|  | time_t duration_secs = now_time.tv_sec - startTime_.tv_sec; | 
|  |  | 
|  | uint64_t duration_units; | 
|  | switch (unit) { | 
|  | case UNIT_SECONDS: | 
|  | duration_units = duration_secs | 
|  | + (now_time.tv_usec - startTime_.tv_usec + 500000) / 1000000; | 
|  | if (NULL != label) { | 
|  | stringstream ss_label; | 
|  | ss_label << duration_units << " secs"; | 
|  | label->assign(ss_label.str()); | 
|  | } | 
|  | break; | 
|  | case UNIT_MICROSECONDS: | 
|  | duration_units = duration_secs * 1000000 | 
|  | + now_time.tv_usec - startTime_.tv_usec; | 
|  | if (NULL != label) { | 
|  | stringstream ss_label; | 
|  | ss_label << duration_units << " us"; | 
|  | label->assign(ss_label.str()); | 
|  | } | 
|  | break; | 
|  | case UNIT_MILLISECONDS: | 
|  | default: | 
|  | duration_units = duration_secs * 1000 | 
|  | + (now_time.tv_usec - startTime_.tv_usec + 500) / 1000; | 
|  | if (NULL != label) { | 
|  | stringstream ss_label; | 
|  | ss_label << duration_units << " ms"; | 
|  | label->assign(ss_label.str()); | 
|  | } | 
|  | break; | 
|  | } | 
|  | return duration_units; | 
|  | } | 
|  |  | 
|  | /** | 
|  | * Creates a ServiceMethod, used for tracking a single service method | 
|  | * invocation (via the ServiceTracker).  The passed name of the | 
|  | * ServiceMethod is used to group statistics (e.g. counts and durations) | 
|  | * for similar invocations; the passed signature is used to uniquely | 
|  | * identify the particular invocation in the log. | 
|  | * | 
|  | * note: A version of this constructor is provided that automatically | 
|  | * forms a signature the name and a passed numeric id.  Silly, sure, | 
|  | * but commonly used, since it often saves the caller a line or two of | 
|  | * code. | 
|  | * | 
|  | * @param ServiceTracker *tracker The service tracker that will track this | 
|  | *                                ServiceMethod. | 
|  | * @param const string &name The service method name (usually independent | 
|  | *                           of service method parameters). | 
|  | * @param const string &signature A signature uniquely identifying the method | 
|  | *                                invocation (usually name plus parameters). | 
|  | */ | 
|  | ServiceMethod::ServiceMethod(ServiceTracker *tracker, | 
|  | const string &name, | 
|  | const string &signature, | 
|  | bool featureLogOnly) | 
|  | : tracker_(tracker), name_(name), signature_(signature), | 
|  | featureLogOnly_(featureLogOnly) | 
|  | { | 
|  | // note: timer_ automatically starts at construction. | 
|  |  | 
|  | // invoke tracker to start service | 
|  | // note: Might throw.  If it throws, then this object's destructor | 
|  | // won't be called, which is according to plan: finishService() is | 
|  | // only supposed to be matched to startService() if startService() | 
|  | // returns without error. | 
|  | tracker_->startService(*this); | 
|  | } | 
|  |  | 
|  | ServiceMethod::ServiceMethod(ServiceTracker *tracker, | 
|  | const string &name, | 
|  | uint64_t id, | 
|  | bool featureLogOnly) | 
|  | : tracker_(tracker), name_(name), featureLogOnly_(featureLogOnly) | 
|  | { | 
|  | // note: timer_ automatically starts at construction. | 
|  | stringstream ss_signature; | 
|  | ss_signature << name << " (" << id << ')'; | 
|  | signature_ = ss_signature.str(); | 
|  |  | 
|  | // invoke tracker to start service | 
|  | // note: Might throw.  If it throws, then this object's destructor | 
|  | // won't be called, which is according to plan: finishService() is | 
|  | // only supposed to be matched to startService() if startService() | 
|  | // returns without error. | 
|  | tracker_->startService(*this); | 
|  | } | 
|  |  | 
|  | ServiceMethod::~ServiceMethod() | 
|  | { | 
|  | // invoke tracker to finish service | 
|  | // note: Not expecting an exception from this code, but | 
|  | // finishService() might conceivably throw an out-of-memory | 
|  | // exception. | 
|  | try { | 
|  | tracker_->finishService(*this); | 
|  | } catch (...) { | 
|  | // don't throw | 
|  | } | 
|  | } | 
|  |  | 
|  | uint64_t | 
|  | ServiceMethod::step(const std::string &stepName) | 
|  | { | 
|  | return tracker_->stepService(*this, stepName); | 
|  | } |