Improve sf-notifier worker management
- tune uwsgi with feature flags based on
https://www.bloomberg.com/company/stories/configuring-uwsgi-production-deployment/
- add second HTTP process for redundancy
- increase worker lifetime to 1 hour
- handle Prometheus metrics in multiprocess mode
Change-Id: Ie63333d982fd73791db15eedc0604d0e1f01f34f
Related-PROD: PRODX-34058
diff --git a/entrypoint.sh b/entrypoint.sh
index 104de77..f6f475d 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,18 +1,38 @@
#!/bin/ash
export SIMPLE_SETTINGS=${SIMPLE_SETTINGS:-sf_notifier.settings.production}
+export PROMETHEUS_MULTIPROC_DIR=/tmp/prom
+
+mkdir -p ${PROMETHEUS_MULTIPROC_DIR}
WORKERS=${SF_NOTIFIER_WORKERS:-4}
BUFFER=${SF_NOTIFIER_BUFFER_SIZE:-32768}
PORT=${SF_NOTIFIER_APP_PORT:-5000}
-uwsgi -p ${WORKERS} \
+uwsgi \
+ --http-processes 2 \
+ --processes ${WORKERS} \
--uid 1000 \
--gid 1000 \
--http 0.0.0.0:${PORT} \
--wsgi-file sf_notifier/server.py \
- --callable app_dispatch \
+ --callable app \
--buffer-size=${BUFFER} \
- --max-worker-lifetime 300 \
--master \
- --req-logger=file:${LOGPATH}
+ --req-logger=file:${LOGPATH} \
+ --enable-threads \
+ --single-interpreter \
+ --vacuum \
+ --die-on-term \
+ --need-app \
+ --disable-logging \
+ --log-4xx \
+ --log-5xx \
+ --auto-procname \
+ --procname-prefix "sf_notifier " \
+ --max-requests 1000 \
+ --max-worker-lifetime 3600 \
+ --reload-on-rss 512 \
+ --worker-reload-mercy 60 \
+ --harakiri 60 \
+ --py-call-osafterfork
diff --git a/sf_notifier/salesforce/client.py b/sf_notifier/salesforce/client.py
index f4a8096..eb39b77 100644
--- a/sf_notifier/salesforce/client.py
+++ b/sf_notifier/salesforce/client.py
@@ -80,12 +80,8 @@
class SalesforceClient(object):
- def __init__(self, config):
- self.metrics = {
- 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'),
- 'sf_error_count': Counter('sf_error_count', 'sf-notifier'),
- 'sf_request_count': Counter('sf_request_count', 'sf-notifier')
- }
+ def __init__(self, config, prometheus_registry=None):
+ self.metrics = self._init_metrics(prometheus_registry)
self._registered_alerts = TTLCache(maxsize=2048, ttl=300)
self.config = self._validate_config(config)
@@ -104,6 +100,20 @@
self.auth(no_retry=True)
@staticmethod
+ def _init_metrics(prometheus_registry):
+ metrics = {
+ 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier',
+ multiprocess_mode='max',
+ registry=prometheus_registry),
+ 'sf_error_count': Counter('sf_error_count', 'sf-notifier',
+ registry=prometheus_registry),
+ 'sf_request_count': Counter('sf_request_count', 'sf-notifier',
+ registry=prometheus_registry)
+ }
+ metrics['sf_auth_ok'].set(0)
+ return metrics
+
+ @staticmethod
def _hash_func(name):
if name in ALLOWED_HASHING:
return getattr(hashlib, name)
diff --git a/sf_notifier/server.py b/sf_notifier/server.py
index 3702992..2b919e2 100644
--- a/sf_notifier/server.py
+++ b/sf_notifier/server.py
@@ -3,7 +3,8 @@
from flask import Flask, Response, jsonify, request
-from prometheus_client import make_wsgi_app
+from prometheus_client import (CollectorRegistry, CONTENT_TYPE_LATEST,
+ generate_latest, multiprocess)
from requests.exceptions import ConnectionError as RequestsConnectionError
@@ -14,19 +15,22 @@
from simple_settings import settings
-from werkzeug.middleware.dispatcher import DispatcherMiddleware
-
dictConfig(settings.LOGGING)
app = Flask(__name__)
-app_dispatch = DispatcherMiddleware(app, {
- '/metrics': make_wsgi_app()
-})
+registry = CollectorRegistry()
+multiprocess.MultiProcessCollector(registry)
create_file(SESSION_FILE)
-sf_cli = SalesforceClient(settings.SF_CONFIG)
+sf_cli = SalesforceClient(settings.SF_CONFIG, prometheus_registry=registry)
+
+
+@app.route('/metrics', methods=['GET'])
+def metrics():
+ return Response(generate_latest(registry),
+ mimetype=CONTENT_TYPE_LATEST)
@app.route('/info', methods=['GET'])