| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| #include <sys/time.h> |
| |
| #include "FacebookBase.h" |
| #include "ServiceTracker.h" |
| #include <thrift/concurrency/ThreadManager.h> |
| |
| using namespace std; |
| using namespace facebook::fb303; |
| using namespace apache::thrift::concurrency; |
| |
| |
| uint64_t ServiceTracker::CHECKPOINT_MINIMUM_INTERVAL_SECONDS = 60; |
| int ServiceTracker::LOG_LEVEL = 5; |
| |
| |
| ServiceTracker::ServiceTracker(facebook::fb303::FacebookBase *handler, |
| void (*logMethod)(int, const string &), |
| bool featureCheckpoint, |
| bool featureStatusCheck, |
| bool featureThreadCheck, |
| Stopwatch::Unit stopwatchUnit) |
| : handler_(handler), logMethod_(logMethod), |
| featureCheckpoint_(featureCheckpoint), |
| featureStatusCheck_(featureStatusCheck), |
| featureThreadCheck_(featureThreadCheck), |
| stopwatchUnit_(stopwatchUnit), |
| checkpointServices_(0) |
| { |
| if (featureCheckpoint_) { |
| time_t now = time(NULL); |
| checkpointTime_ = now; |
| } else { |
| checkpointTime_ = 0; |
| } |
| } |
| |
| /** |
| * Registers the beginning of a "service method": basically, any of |
| * the implementations of Thrift remote procedure calls that a |
| * FacebookBase handler is handling. Controls concurrent |
| * services and reports statistics (via log and via fb303 counters). |
| * Throws an exception if the server is not ready to handle service |
| * methods yet. |
| * |
| * note: The relationship between startService() and finishService() |
| * is currently defined so that a call to finishService() should only |
| * be matched to this call to startService() if this method returns |
| * without exception. It wouldn't be a problem to implement things |
| * the other way, so that *every* start needed a finish, but this |
| * convention was chosen to match the way an object's constructor and |
| * destructor work together, i.e. to work well with ServiceMethod |
| * objects. |
| * |
| * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod |
| * object instantiated at the start |
| * of the service method. |
| */ |
| void |
| ServiceTracker::startService(const ServiceMethod &serviceMethod) |
| { |
| // note: serviceMethod.timer_ automatically starts at construction. |
| |
| // log service start |
| logMethod_(5, serviceMethod.signature_); |
| |
| // check handler ready |
| if (featureStatusCheck_ && !serviceMethod.featureLogOnly_) { |
| // note: Throwing exceptions before counting statistics. See note |
| // in method header. |
| // note: A STOPPING server is not accepting new connections, but it |
| // is still handling any already-connected threads -- so from the |
| // service method's point of view, a status of STOPPING is a green |
| // light. |
| facebook::fb303::fb_status status = handler_->getStatus(); |
| if (status != facebook::fb303::ALIVE |
| && status != facebook::fb303::STOPPING) { |
| if (status == facebook::fb303::STARTING) { |
| throw ServiceException("Server starting up; please try again later"); |
| } else { |
| throw ServiceException("Server not alive; please try again later"); |
| } |
| } |
| } |
| |
| // check server threads |
| if (featureThreadCheck_ && !serviceMethod.featureLogOnly_) { |
| // note: Might want to put these messages in reportCheckpoint() if |
| // log is getting spammed. |
| if (threadManager_ != NULL) { |
| size_t idle_count = threadManager_->idleWorkerCount(); |
| if (idle_count == 0) { |
| stringstream message; |
| message << "service " << serviceMethod.signature_ |
| << ": all threads (" << threadManager_->workerCount() |
| << ") in use"; |
| logMethod_(3, message.str()); |
| } |
| } |
| } |
| } |
| |
| /** |
| * Logs a significant step in the middle of a "service method"; see |
| * startService. |
| * |
| * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod |
| * object instantiated at the start |
| * of the service method. |
| * @return int64_t Elapsed units (see stopwatchUnit_) since ServiceMethod |
| * instantiation. |
| */ |
| int64_t |
| ServiceTracker::stepService(const ServiceMethod &serviceMethod, |
| const string &stepName) |
| { |
| stringstream message; |
| string elapsed_label; |
| int64_t elapsed = serviceMethod.timer_.elapsedUnits(stopwatchUnit_, |
| &elapsed_label); |
| message << serviceMethod.signature_ |
| << ' ' << stepName |
| << " [" << elapsed_label << ']'; |
| logMethod_(5, message.str()); |
| return elapsed; |
| } |
| |
| /** |
| * Registers the end of a "service method"; see startService(). |
| * |
| * @param const ServiceMethod &serviceMethod A reference to the ServiceMethod |
| * object instantiated at the start |
| * of the service method. |
| */ |
| void |
| ServiceTracker::finishService(const ServiceMethod &serviceMethod) |
| { |
| // log end of service |
| stringstream message; |
| string duration_label; |
| int64_t duration = serviceMethod.timer_.elapsedUnits(stopwatchUnit_, |
| &duration_label); |
| message << serviceMethod.signature_ |
| << " finish [" << duration_label << ']'; |
| logMethod_(5, message.str()); |
| |
| // count, record, and maybe report service statistics |
| if (!serviceMethod.featureLogOnly_) { |
| |
| if (!featureCheckpoint_) { |
| |
| // lifetime counters |
| // (note: No need to lock statisticsMutex_ if not doing checkpoint; |
| // FacebookService::incrementCounter() is already thread-safe.) |
| handler_->incrementCounter("lifetime_services"); |
| |
| } else { |
| |
| statisticsMutex_.lock(); |
| // note: No exceptions expected from this code block. Wrap in a try |
| // just to be safe. |
| try { |
| |
| // lifetime counters |
| // note: Good to synchronize this with the increment of |
| // checkpoint services, even though incrementCounter() is |
| // already thread-safe, for the sake of checkpoint reporting |
| // consistency (i.e. since the last checkpoint, |
| // lifetime_services has incremented by checkpointServices_). |
| handler_->incrementCounter("lifetime_services"); |
| |
| // checkpoint counters |
| checkpointServices_++; |
| checkpointDuration_ += duration; |
| |
| // per-service timing |
| // note kjv: According to my tests it is very slightly faster to |
| // call insert() once (and detect not-found) than calling find() |
| // and then maybe insert (if not-found). However, the difference |
| // is tiny for small maps like this one, and the code for the |
| // faster solution is slightly less readable. Also, I wonder if |
| // the instantiation of the (often unused) pair to insert makes |
| // the first algorithm slower after all. |
| map<string, pair<uint64_t, uint64_t> >::iterator iter; |
| iter = checkpointServiceDuration_.find(serviceMethod.name_); |
| if (iter != checkpointServiceDuration_.end()) { |
| iter->second.first++; |
| iter->second.second += duration; |
| } else { |
| checkpointServiceDuration_.insert(make_pair(serviceMethod.name_, |
| make_pair(1, duration))); |
| } |
| |
| // maybe report checkpoint |
| // note: ...if it's been long enough since the last report. |
| time_t now = time(NULL); |
| uint64_t check_interval = now - checkpointTime_; |
| if (check_interval >= CHECKPOINT_MINIMUM_INTERVAL_SECONDS) { |
| reportCheckpoint(); |
| } |
| |
| } catch (...) { |
| statisticsMutex_.unlock(); |
| throw; |
| } |
| statisticsMutex_.unlock(); |
| |
| } |
| } |
| } |
| |
| /** |
| * Logs some statistics gathered since the last call to this method. |
| * |
| * note: Thread race conditions on this method could cause |
| * misreporting and/or undefined behavior; the caller must protect |
| * uses of the object variables (and calls to this method) with a |
| * mutex. |
| * |
| */ |
| void |
| ServiceTracker::reportCheckpoint() |
| { |
| time_t now = time(NULL); |
| |
| uint64_t check_count = checkpointServices_; |
| uint64_t check_interval = now - checkpointTime_; |
| uint64_t check_duration = checkpointDuration_; |
| |
| // export counters for timing of service methods (by service name) |
| handler_->setCounter("checkpoint_time", check_interval); |
| map<string, pair<uint64_t, uint64_t> >::iterator iter; |
| uint64_t count; |
| for (iter = checkpointServiceDuration_.begin(); |
| iter != checkpointServiceDuration_.end(); |
| ++iter) { |
| count = iter->second.first; |
| handler_->setCounter(string("checkpoint_count_") + iter->first, count); |
| if (count == 0) { |
| handler_->setCounter(string("checkpoint_speed_") + iter->first, |
| 0); |
| } else { |
| handler_->setCounter(string("checkpoint_speed_") + iter->first, |
| iter->second.second / count); |
| } |
| } |
| |
| // reset checkpoint variables |
| // note: Clearing the map while other threads are using it might |
| // cause undefined behavior. |
| checkpointServiceDuration_.clear(); |
| checkpointTime_ = now; |
| checkpointServices_ = 0; |
| checkpointDuration_ = 0; |
| |
| // get lifetime variables |
| uint64_t life_count = handler_->getCounter("lifetime_services"); |
| uint64_t life_interval = now - handler_->aliveSince(); |
| |
| // log checkpoint |
| stringstream message; |
| message << "checkpoint_time:" << check_interval |
| << " checkpoint_services:" << check_count |
| << " checkpoint_speed_sum:" << check_duration |
| << " lifetime_time:" << life_interval |
| << " lifetime_services:" << life_count; |
| if (featureThreadCheck_ && threadManager_ != NULL) { |
| size_t worker_count = threadManager_->workerCount(); |
| size_t idle_count = threadManager_->idleWorkerCount(); |
| message << " total_workers:" << worker_count |
| << " active_workers:" << (worker_count - idle_count); |
| } |
| logMethod_(4, message.str()); |
| } |
| |
| /** |
| * Remembers the thread manager used in the server, for monitoring thread |
| * activity. |
| * |
| * @param shared_ptr<ThreadManager> threadManager The server's thread manager. |
| */ |
| void |
| ServiceTracker::setThreadManager(boost::shared_ptr<ThreadManager> |
| threadManager) |
| { |
| threadManager_ = threadManager; |
| } |
| |
| /** |
| * Logs messages to stdout; the passed message will be logged if the |
| * passed level is less than or equal to LOG_LEVEL. |
| * |
| * This is the default logging method used by the ServiceTracker. An |
| * alternate logging method (that accepts the same parameters) may be |
| * specified to the constructor. |
| * |
| * @param int level A level associated with the message: higher levels |
| * are used to indicate higher levels of detail. |
| * @param string message The message to log. |
| */ |
| void |
| ServiceTracker::defaultLogMethod(int level, const string &message) |
| { |
| if (level <= LOG_LEVEL) { |
| string level_string; |
| time_t now = time(NULL); |
| char now_pretty[26]; |
| ctime_r(&now, now_pretty); |
| now_pretty[24] = '\0'; |
| switch (level) { |
| case 1: |
| level_string = "CRITICAL"; |
| break; |
| case 2: |
| level_string = "ERROR"; |
| break; |
| case 3: |
| level_string = "WARNING"; |
| break; |
| case 5: |
| level_string = "DEBUG"; |
| break; |
| case 4: |
| default: |
| level_string = "INFO"; |
| break; |
| } |
| cout << '[' << level_string << "] [" << now_pretty << "] " |
| << message << endl; |
| } |
| } |
| |
| |
| /** |
| * Creates a Stopwatch, which can report the time elapsed since its |
| * creation. |
| * |
| */ |
| Stopwatch::Stopwatch() |
| { |
| gettimeofday(&startTime_, NULL); |
| } |
| |
| void |
| Stopwatch::reset() |
| { |
| gettimeofday(&startTime_, NULL); |
| } |
| |
| uint64_t |
| Stopwatch::elapsedUnits(Stopwatch::Unit unit, string *label) const |
| { |
| timeval now_time; |
| gettimeofday(&now_time, NULL); |
| time_t duration_secs = now_time.tv_sec - startTime_.tv_sec; |
| |
| uint64_t duration_units; |
| switch (unit) { |
| case UNIT_SECONDS: |
| duration_units = duration_secs |
| + (now_time.tv_usec - startTime_.tv_usec + 500000) / 1000000; |
| if (NULL != label) { |
| stringstream ss_label; |
| ss_label << duration_units << " secs"; |
| label->assign(ss_label.str()); |
| } |
| break; |
| case UNIT_MICROSECONDS: |
| duration_units = duration_secs * 1000000 |
| + now_time.tv_usec - startTime_.tv_usec; |
| if (NULL != label) { |
| stringstream ss_label; |
| ss_label << duration_units << " us"; |
| label->assign(ss_label.str()); |
| } |
| break; |
| case UNIT_MILLISECONDS: |
| default: |
| duration_units = duration_secs * 1000 |
| + (now_time.tv_usec - startTime_.tv_usec + 500) / 1000; |
| if (NULL != label) { |
| stringstream ss_label; |
| ss_label << duration_units << " ms"; |
| label->assign(ss_label.str()); |
| } |
| break; |
| } |
| return duration_units; |
| } |
| |
| /** |
| * Creates a ServiceMethod, used for tracking a single service method |
| * invocation (via the ServiceTracker). The passed name of the |
| * ServiceMethod is used to group statistics (e.g. counts and durations) |
| * for similar invocations; the passed signature is used to uniquely |
| * identify the particular invocation in the log. |
| * |
| * note: A version of this constructor is provided that automatically |
| * forms a signature the name and a passed numeric id. Silly, sure, |
| * but commonly used, since it often saves the caller a line or two of |
| * code. |
| * |
| * @param ServiceTracker *tracker The service tracker that will track this |
| * ServiceMethod. |
| * @param const string &name The service method name (usually independent |
| * of service method parameters). |
| * @param const string &signature A signature uniquely identifying the method |
| * invocation (usually name plus parameters). |
| */ |
| ServiceMethod::ServiceMethod(ServiceTracker *tracker, |
| const string &name, |
| const string &signature, |
| bool featureLogOnly) |
| : tracker_(tracker), name_(name), signature_(signature), |
| featureLogOnly_(featureLogOnly) |
| { |
| // note: timer_ automatically starts at construction. |
| |
| // invoke tracker to start service |
| // note: Might throw. If it throws, then this object's destructor |
| // won't be called, which is according to plan: finishService() is |
| // only supposed to be matched to startService() if startService() |
| // returns without error. |
| tracker_->startService(*this); |
| } |
| |
| ServiceMethod::ServiceMethod(ServiceTracker *tracker, |
| const string &name, |
| uint64_t id, |
| bool featureLogOnly) |
| : tracker_(tracker), name_(name), featureLogOnly_(featureLogOnly) |
| { |
| // note: timer_ automatically starts at construction. |
| stringstream ss_signature; |
| ss_signature << name << " (" << id << ')'; |
| signature_ = ss_signature.str(); |
| |
| // invoke tracker to start service |
| // note: Might throw. If it throws, then this object's destructor |
| // won't be called, which is according to plan: finishService() is |
| // only supposed to be matched to startService() if startService() |
| // returns without error. |
| tracker_->startService(*this); |
| } |
| |
| ServiceMethod::~ServiceMethod() |
| { |
| // invoke tracker to finish service |
| // note: Not expecting an exception from this code, but |
| // finishService() might conceivably throw an out-of-memory |
| // exception. |
| try { |
| tracker_->finishService(*this); |
| } catch (...) { |
| // don't throw |
| } |
| } |
| |
| uint64_t |
| ServiceMethod::step(const std::string &stepName) |
| { |
| return tracker_->stepService(*this, stepName); |
| } |