Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 1 | import fcntl |
| 2 | import hashlib |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 3 | import logging |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 4 | import os |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 5 | import time |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 6 | import uuid |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 7 | from contextlib import contextmanager |
| 8 | |
| 9 | from cachetools import TTLCache |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 10 | |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 11 | from prometheus_client import Counter, Gauge |
| 12 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 13 | from requests import Session |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 14 | from requests.exceptions import ConnectionError as RequestsConnectionError |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 15 | |
| 16 | from simple_salesforce import Salesforce |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 17 | from simple_salesforce import exceptions as sf_exceptions |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 18 | |
| 19 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 20 | STATE_MAP = { |
| 21 | 'OK': '060 Informational', |
| 22 | 'UP': '060 Informational', |
| 23 | 'UNKNOWN': '070 Unknown', |
| 24 | 'WARNING': '080 Warning', |
| 25 | 'MINOR': '080 Warning', |
| 26 | 'MAJOR': '090 Critical', |
| 27 | 'CRITICAL': '090 Critical', |
| 28 | 'DOWN': '090 Critical', |
| 29 | 'UNREACHABLE': '090 Critical', |
| 30 | } |
| 31 | |
| 32 | CONFIG_FIELD_MAP = { |
| 33 | 'auth_url': 'instance_url', |
| 34 | 'username': 'username', |
| 35 | 'password': 'password', |
| 36 | 'organization_id': 'organizationId', |
| 37 | 'environment_id': 'environment_id', |
| 38 | 'sandbox_enabled': 'domain', |
| 39 | } |
| 40 | |
| 41 | ALLOWED_HASHING = ('md5', 'sha256') |
| 42 | SESSION_FILE = '/tmp/session' |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 43 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 44 | logger = logging.getLogger(__name__) |
| 45 | |
| 46 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 47 | @contextmanager |
| 48 | def flocked(fd): |
| 49 | try: |
| 50 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) |
| 51 | yield |
| 52 | except IOError: |
| 53 | logger.info('Session file locked. Waiting 5 seconds...') |
| 54 | time.sleep(5) |
| 55 | finally: |
| 56 | fcntl.flock(fd, fcntl.LOCK_UN) |
| 57 | |
| 58 | |
| 59 | def sf_auth_retry(method): |
| 60 | def wrapper(self, *args, **kwargs): |
| 61 | try: |
| 62 | return method(self, *args, **kwargs) |
| 63 | except sf_exceptions.SalesforceExpiredSession: |
| 64 | logger.warning('Salesforce session expired.') |
| 65 | self.auth() |
| 66 | except RequestsConnectionError: |
| 67 | logger.error('Salesforce connection error.') |
| 68 | self.auth() |
| 69 | return method(self, *args, **kwargs) |
| 70 | return wrapper |
| 71 | |
| 72 | |
| 73 | class SfNotifierError(Exception): |
| 74 | pass |
| 75 | |
| 76 | |
| 77 | class SalesforceClient(object): |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 78 | |
| 79 | def __init__(self, config): |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 80 | self.metrics = { |
| 81 | 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'), |
| 82 | 'sf_error_count': Counter('sf_error_count', 'sf-notifier'), |
| 83 | 'sf_request_count': Counter('sf_request_count', 'sf-notifier') |
| 84 | } |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 85 | self.config = self._validate_config(config) |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 86 | self.hash_func = self._hash_func() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 87 | self.environment = self.config.pop('environment_id') |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 88 | self._registered_alerts = TTLCache(maxsize=2048, ttl=300) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 89 | self.sf = None |
| 90 | self.session = Session() |
| 91 | self.auth() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 92 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 93 | @staticmethod |
| 94 | def _hash_func(): |
| 95 | name = os.environ.get('SF_NOTIFIER_ALERT_ID_HASH_FUNC', 'sha256') |
| 96 | if name in ALLOWED_HASHING: |
| 97 | return getattr(hashlib, name) |
| 98 | return hashlib.sha256 |
| 99 | |
| 100 | @staticmethod |
| 101 | def _validate_config(config): |
| 102 | kwargs = {} |
| 103 | |
Michal Kobus | 492d8bc | 2021-02-15 16:54:13 +0100 | [diff] [blame^] | 104 | for param, field in CONFIG_FIELD_MAP.items(): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 105 | setting_var = param.upper() |
| 106 | env_var = 'SFDC_{}'.format(setting_var) |
| 107 | kwargs[field] = os.environ.get( |
| 108 | env_var, config.get(setting_var)) |
| 109 | |
| 110 | if field == 'domain': |
| 111 | if kwargs[field] in ['true', 'True', True]: |
| 112 | kwargs[field] = 'test' |
| 113 | else: |
| 114 | del kwargs[field] |
| 115 | continue |
| 116 | |
| 117 | if kwargs[field] is None: |
| 118 | msg = ('Invalid config: missing "{}" field or "{}" environment' |
| 119 | ' variable.').format(param, env_var) |
| 120 | logger.error(msg) |
| 121 | raise SfNotifierError(msg) |
| 122 | return kwargs |
| 123 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 124 | def _auth(self, config): |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 125 | try: |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 126 | config.update({'session': self.session}) |
| 127 | self.sf = Salesforce(**config) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 128 | except sf_exceptions.SalesforceAuthenticationFailed as ex: |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 129 | logger.error('Salesforce authentication failure: {}.'.format(ex)) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 130 | self.metrics['sf_auth_ok'].set(0) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 131 | return False |
| 132 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 133 | logger.info('Salesforce authentication successful.') |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 134 | self.metrics['sf_auth_ok'].set(1) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 135 | return True |
| 136 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 137 | def _load_session(self, session_file): |
| 138 | lines = session_file.readlines() |
| 139 | |
| 140 | if lines == []: |
| 141 | return |
| 142 | return lines[0] |
| 143 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 144 | def _refresh_ready(self, saved_session): |
| 145 | if saved_session is None: |
| 146 | logger.info('Current session is None.') |
| 147 | return True |
| 148 | |
| 149 | if self.sf is None: |
| 150 | return False |
| 151 | |
| 152 | if self.sf.session_id == saved_session: |
| 153 | return True |
| 154 | return False |
| 155 | |
| 156 | def _reuse_session(self, saved_session): |
| 157 | logger.info('Reusing session id from file.') |
| 158 | # limit params to avoid login request |
| 159 | config = { |
| 160 | 'session_id': saved_session, |
| 161 | 'instance_url': self.config['instance_url'] |
| 162 | } |
| 163 | return self._auth(config) |
| 164 | |
| 165 | def _acquire_session(self): |
| 166 | # only one worker at a time can check session_file |
| 167 | auth_success = False |
| 168 | |
| 169 | with open(SESSION_FILE, 'r+') as session_file: |
| 170 | with flocked(session_file): |
| 171 | logger.info('Successfully locked session file for refresh.') |
| 172 | |
| 173 | saved_session = self._load_session(session_file) |
| 174 | |
| 175 | if self._refresh_ready(saved_session): |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 176 | logger.info('Attempting to refresh session.') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 177 | |
| 178 | if self._auth(self.config): |
| 179 | auth_success = True |
| 180 | session_file.truncate(0) |
| 181 | session_file.seek(0) |
| 182 | session_file.write(self.sf.session_id) |
| 183 | logger.info('Refreshed session successfully.') |
| 184 | else: |
| 185 | logger.error('Failed to refresh session.') |
| 186 | else: |
| 187 | logger.info('Not refreshing. Reusing session.') |
| 188 | auth_success = self._reuse_session(saved_session) |
| 189 | |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 190 | if auth_success is False: |
| 191 | logger.warn('Waiting 30 seconds before next attempt...') |
| 192 | time.sleep(30) |
| 193 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 194 | return auth_success |
| 195 | |
| 196 | def auth(self): |
| 197 | auth_ok = self._acquire_session() |
| 198 | while auth_ok is False: |
| 199 | auth_ok = self._acquire_session() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 200 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 201 | def _get_alert_id(self, labels): |
| 202 | alert_id_data = '' |
| 203 | for key in sorted(labels): |
| 204 | alert_id_data += labels[key].replace(".", "\\.") |
Michal Kobus | 492d8bc | 2021-02-15 16:54:13 +0100 | [diff] [blame^] | 205 | return self.hash_func(alert_id_data.encode('utf-8')).hexdigest() |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 206 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 207 | @sf_auth_retry |
| 208 | def _create_case(self, subject, body, labels, alert_id): |
| 209 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 210 | if alert_id in self._registered_alerts: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 211 | logger.warning('Duplicate case for alert: {}.'.format(alert_id)) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 212 | return 1, self._registered_alerts[alert_id]['Id'] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 213 | |
| 214 | severity = labels.get('severity', 'unknown').upper() |
| 215 | payload = { |
| 216 | 'Subject': subject, |
| 217 | 'Description': body, |
| 218 | 'IsMosAlert__c': 'true', |
| 219 | 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'), |
| 220 | 'Alert_Host__c': labels.get('host') or labels.get( |
| 221 | 'instance', 'UNKNOWN' |
| 222 | ), |
| 223 | 'Alert_Service__c': labels.get('service', 'UNKNOWN'), |
| 224 | 'Environment2__c': self.environment, |
| 225 | 'Alert_ID__c': alert_id, |
| 226 | } |
Michal Kobus | af771fc | 2020-04-29 15:57:16 +0200 | [diff] [blame] | 227 | if labels.get('cluster_id') is not None: |
| 228 | payload['ClusterId__c'] = labels['cluster_id'] |
| 229 | |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 230 | logger.info('Try to create case: {}.'.format(payload)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 231 | try: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 232 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 233 | case = self.sf.Case.create(payload) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 234 | logger.info('Created case: {}.'.format(case)) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 235 | except sf_exceptions.SalesforceMalformedRequest as ex: |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 236 | msg = ex.content[0]['message'] |
| 237 | err_code = ex.content[0]['errorCode'] |
| 238 | |
| 239 | if err_code == 'DUPLICATE_VALUE': |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 240 | logger.warning('Duplicate case: {}.'.format(msg)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 241 | case_id = msg.split()[-1] |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 242 | self._registered_alerts[alert_id] = {'Id': case_id} |
| 243 | return 1, case_id |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 244 | |
| 245 | logger.error('Cannot create case: {}.'.format(msg)) |
| 246 | self.metrics['sf_error_count'].inc() |
| 247 | raise |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 248 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 249 | self._registered_alerts[alert_id] = {'Id': case['id']} |
| 250 | return 0, case['id'] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 251 | |
| 252 | @sf_auth_retry |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 253 | def _close_case(self, case_id): |
| 254 | logger.info('Try to close case: {}.'.format(case_id)) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 255 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 256 | update = self.sf.Case.update( |
| 257 | case_id, |
| 258 | {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex} |
| 259 | ) |
| 260 | logger.info('Closed case: {}.'.format(case_id)) |
| 261 | return update |
| 262 | |
| 263 | @sf_auth_retry |
| 264 | def _create_feed_item(self, subject, body, case_id): |
| 265 | feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body} |
| 266 | return self.sf.FeedItem.create(feed_item) |
| 267 | |
| 268 | @sf_auth_retry |
| 269 | def _get_case_by_alert_id(self, alert_id): |
| 270 | logger.info('Try to get case by alert ID: {}.'.format(alert_id)) |
| 271 | |
| 272 | if alert_id in self._registered_alerts: |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 273 | return self._registered_alerts[alert_id] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 274 | try: |
| 275 | return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 276 | except sf_exceptions.SalesforceResourceNotFound: |
| 277 | if self._registered_alerts.get(alert_id): |
| 278 | del self._registered_alerts[alert_id] |
Michal Kobus | ba98705 | 2018-11-30 13:01:08 +0100 | [diff] [blame] | 279 | |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 280 | logger.warning('Alert ID: {} not found.'.format(alert_id)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 281 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 282 | def create_case(self, subject, body, labels): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 283 | alert_id = self._get_alert_id(labels) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 284 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 285 | error_code, case_id = self._create_case(subject, body, |
| 286 | labels, alert_id) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 287 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 288 | self._create_feed_item(subject, body, case_id) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 289 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 290 | response = {'case_id': case_id, 'alert_id': alert_id} |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 291 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 292 | if error_code == 1: |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 293 | response['status'] = 'duplicate' |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 294 | else: |
| 295 | response['status'] = 'created' |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 296 | return response |
| 297 | |
| 298 | def close_case(self, labels): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 299 | alert_id = self._get_alert_id(labels) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 300 | case = self._get_case_by_alert_id(alert_id) |
| 301 | |
| 302 | response = {'alert_id': alert_id, 'status': 'resolved'} |
| 303 | |
| 304 | if case is None: |
| 305 | return response |
| 306 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 307 | if self._registered_alerts.get(alert_id): |
| 308 | del self._registered_alerts[alert_id] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 309 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 310 | response['case_id'] = case['Id'] |
| 311 | response['closed'] = self._close_case(case['Id']) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 312 | return response |