Improve sf-notifier worker management

- tune uwsgi with feature flags based on
  https://www.bloomberg.com/company/stories/configuring-uwsgi-production-deployment/
- add second HTTP process for redundancy
- increase worker lifetime to 1 hour
- handle Prometheus metrics in multiprocess mode

Change-Id: Ie63333d982fd73791db15eedc0604d0e1f01f34f
Related-PROD: PRODX-34058
diff --git a/entrypoint.sh b/entrypoint.sh
index 104de77..f6f475d 100755
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -1,18 +1,38 @@
 #!/bin/ash
 
 export SIMPLE_SETTINGS=${SIMPLE_SETTINGS:-sf_notifier.settings.production}
+export PROMETHEUS_MULTIPROC_DIR=/tmp/prom
+
+mkdir -p ${PROMETHEUS_MULTIPROC_DIR}
 
 WORKERS=${SF_NOTIFIER_WORKERS:-4}
 BUFFER=${SF_NOTIFIER_BUFFER_SIZE:-32768}
 PORT=${SF_NOTIFIER_APP_PORT:-5000}
 
-uwsgi -p ${WORKERS} \
+uwsgi \
+    --http-processes 2 \
+    --processes ${WORKERS} \
     --uid 1000 \
     --gid 1000 \
     --http 0.0.0.0:${PORT} \
     --wsgi-file sf_notifier/server.py \
-    --callable app_dispatch \
+    --callable app \
     --buffer-size=${BUFFER} \
-    --max-worker-lifetime 300 \
     --master \
-    --req-logger=file:${LOGPATH}
+    --req-logger=file:${LOGPATH} \
+    --enable-threads \
+    --single-interpreter \
+    --vacuum \
+    --die-on-term \
+    --need-app \
+    --disable-logging \
+    --log-4xx \
+    --log-5xx \
+    --auto-procname \
+    --procname-prefix "sf_notifier " \
+    --max-requests 1000 \
+    --max-worker-lifetime 3600 \
+    --reload-on-rss 512 \
+    --worker-reload-mercy 60 \
+    --harakiri 60 \
+    --py-call-osafterfork
diff --git a/sf_notifier/salesforce/client.py b/sf_notifier/salesforce/client.py
index f4a8096..eb39b77 100644
--- a/sf_notifier/salesforce/client.py
+++ b/sf_notifier/salesforce/client.py
@@ -80,12 +80,8 @@
 
 class SalesforceClient(object):
 
-    def __init__(self, config):
-        self.metrics = {
-            'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'),
-            'sf_error_count': Counter('sf_error_count', 'sf-notifier'),
-            'sf_request_count': Counter('sf_request_count', 'sf-notifier')
-        }
+    def __init__(self, config, prometheus_registry=None):
+        self.metrics = self._init_metrics(prometheus_registry)
         self._registered_alerts = TTLCache(maxsize=2048, ttl=300)
 
         self.config = self._validate_config(config)
@@ -104,6 +100,20 @@
         self.auth(no_retry=True)
 
     @staticmethod
+    def _init_metrics(prometheus_registry):
+        metrics = {
+            'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier',
+                                multiprocess_mode='max',
+                                registry=prometheus_registry),
+            'sf_error_count': Counter('sf_error_count', 'sf-notifier',
+                                      registry=prometheus_registry),
+            'sf_request_count': Counter('sf_request_count', 'sf-notifier',
+                                        registry=prometheus_registry)
+        }
+        metrics['sf_auth_ok'].set(0)
+        return metrics
+
+    @staticmethod
     def _hash_func(name):
         if name in ALLOWED_HASHING:
             return getattr(hashlib, name)
diff --git a/sf_notifier/server.py b/sf_notifier/server.py
index 3702992..2b919e2 100644
--- a/sf_notifier/server.py
+++ b/sf_notifier/server.py
@@ -3,7 +3,8 @@
 
 from flask import Flask, Response, jsonify, request
 
-from prometheus_client import make_wsgi_app
+from prometheus_client import (CollectorRegistry, CONTENT_TYPE_LATEST,
+                               generate_latest, multiprocess)
 
 from requests.exceptions import ConnectionError as RequestsConnectionError
 
@@ -14,19 +15,22 @@
 
 from simple_settings import settings
 
-from werkzeug.middleware.dispatcher import DispatcherMiddleware
-
 
 dictConfig(settings.LOGGING)
 
 app = Flask(__name__)
-app_dispatch = DispatcherMiddleware(app, {
-    '/metrics': make_wsgi_app()
-})
 
+registry = CollectorRegistry()
+multiprocess.MultiProcessCollector(registry)
 
 create_file(SESSION_FILE)
-sf_cli = SalesforceClient(settings.SF_CONFIG)
+sf_cli = SalesforceClient(settings.SF_CONFIG, prometheus_registry=registry)
+
+
+@app.route('/metrics', methods=['GET'])
+def metrics():
+    return Response(generate_latest(registry),
+                    mimetype=CONTENT_TYPE_LATEST)
 
 
 @app.route('/info', methods=['GET'])