Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 1 | import fcntl |
| 2 | import hashlib |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 3 | import logging |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 4 | import os |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 5 | import time |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 6 | import uuid |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 7 | from contextlib import contextmanager |
| 8 | |
| 9 | from cachetools import TTLCache |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 10 | |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 11 | from prometheus_client import Counter, Gauge |
| 12 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 13 | from requests import Session |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 14 | from requests.exceptions import ConnectionError as RequestsConnectionError |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 15 | |
| 16 | from simple_salesforce import Salesforce |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 17 | from simple_salesforce import exceptions as sf_exceptions |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 18 | |
| 19 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 20 | STATE_MAP = { |
| 21 | 'OK': '060 Informational', |
| 22 | 'UP': '060 Informational', |
| 23 | 'UNKNOWN': '070 Unknown', |
| 24 | 'WARNING': '080 Warning', |
| 25 | 'MINOR': '080 Warning', |
| 26 | 'MAJOR': '090 Critical', |
| 27 | 'CRITICAL': '090 Critical', |
| 28 | 'DOWN': '090 Critical', |
| 29 | 'UNREACHABLE': '090 Critical', |
| 30 | } |
| 31 | |
| 32 | CONFIG_FIELD_MAP = { |
| 33 | 'auth_url': 'instance_url', |
| 34 | 'username': 'username', |
| 35 | 'password': 'password', |
| 36 | 'organization_id': 'organizationId', |
| 37 | 'environment_id': 'environment_id', |
| 38 | 'sandbox_enabled': 'domain', |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 39 | 'feed_enabled': 'feed_enabled', |
| 40 | 'hash_func': 'hash_func', |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 41 | } |
| 42 | |
| 43 | ALLOWED_HASHING = ('md5', 'sha256') |
| 44 | SESSION_FILE = '/tmp/session' |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 45 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 46 | logger = logging.getLogger(__name__) |
| 47 | |
| 48 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 49 | @contextmanager |
| 50 | def flocked(fd): |
| 51 | try: |
| 52 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) |
| 53 | yield |
| 54 | except IOError: |
| 55 | logger.info('Session file locked. Waiting 5 seconds...') |
| 56 | time.sleep(5) |
| 57 | finally: |
| 58 | fcntl.flock(fd, fcntl.LOCK_UN) |
| 59 | |
| 60 | |
| 61 | def sf_auth_retry(method): |
| 62 | def wrapper(self, *args, **kwargs): |
| 63 | try: |
| 64 | return method(self, *args, **kwargs) |
| 65 | except sf_exceptions.SalesforceExpiredSession: |
| 66 | logger.warning('Salesforce session expired.') |
| 67 | self.auth() |
| 68 | except RequestsConnectionError: |
| 69 | logger.error('Salesforce connection error.') |
| 70 | self.auth() |
| 71 | return method(self, *args, **kwargs) |
| 72 | return wrapper |
| 73 | |
| 74 | |
| 75 | class SfNotifierError(Exception): |
| 76 | pass |
| 77 | |
| 78 | |
| 79 | class SalesforceClient(object): |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 80 | |
| 81 | def __init__(self, config): |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 82 | self.metrics = { |
| 83 | 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'), |
| 84 | 'sf_error_count': Counter('sf_error_count', 'sf-notifier'), |
| 85 | 'sf_request_count': Counter('sf_request_count', 'sf-notifier') |
| 86 | } |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 87 | self._registered_alerts = TTLCache(maxsize=2048, ttl=300) |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 88 | |
| 89 | self.config = self._validate_config(config) |
| 90 | self.hash_func = self._hash_func(self.config.pop('hash_func')) |
| 91 | self.feed_enabled = self.config.pop('feed_enabled') |
| 92 | |
| 93 | self.environment = self.config.pop('environment_id') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 94 | self.sf = None |
| 95 | self.session = Session() |
| 96 | self.auth() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 97 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 98 | @staticmethod |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 99 | def _hash_func(name): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 100 | if name in ALLOWED_HASHING: |
| 101 | return getattr(hashlib, name) |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 102 | msg = ('Invalid hashing function "{}".' |
| 103 | 'Switching to default "sha256".').format(name) |
| 104 | logger.warn(msg) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 105 | return hashlib.sha256 |
| 106 | |
| 107 | @staticmethod |
| 108 | def _validate_config(config): |
| 109 | kwargs = {} |
| 110 | |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 111 | for param, value in config.items(): |
| 112 | field = CONFIG_FIELD_MAP.get(param.lower()) |
| 113 | if field is None: |
| 114 | env_var = 'SFDC_{}'.format(param) |
| 115 | msg = ('Invalid config: missing "{}" field or "{}" environment' |
| 116 | ' variable.').format(field, env_var) |
| 117 | logger.error(msg) |
| 118 | raise SfNotifierError(msg) |
| 119 | |
| 120 | kwargs[field] = value |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 121 | |
| 122 | if field == 'domain': |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 123 | if value: |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 124 | kwargs[field] = 'test' |
| 125 | else: |
| 126 | del kwargs[field] |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 127 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 128 | return kwargs |
| 129 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 130 | def _auth(self, config): |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 131 | try: |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 132 | config.update({'session': self.session}) |
| 133 | self.sf = Salesforce(**config) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 134 | except sf_exceptions.SalesforceAuthenticationFailed as ex: |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 135 | logger.error('Salesforce authentication failure: {}.'.format(ex)) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 136 | self.metrics['sf_auth_ok'].set(0) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 137 | return False |
| 138 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 139 | logger.info('Salesforce authentication successful.') |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 140 | self.metrics['sf_auth_ok'].set(1) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 141 | return True |
| 142 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 143 | def _load_session(self, session_file): |
| 144 | lines = session_file.readlines() |
| 145 | |
| 146 | if lines == []: |
| 147 | return |
| 148 | return lines[0] |
| 149 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 150 | def _refresh_ready(self, saved_session): |
| 151 | if saved_session is None: |
| 152 | logger.info('Current session is None.') |
| 153 | return True |
| 154 | |
| 155 | if self.sf is None: |
| 156 | return False |
| 157 | |
| 158 | if self.sf.session_id == saved_session: |
| 159 | return True |
| 160 | return False |
| 161 | |
| 162 | def _reuse_session(self, saved_session): |
| 163 | logger.info('Reusing session id from file.') |
| 164 | # limit params to avoid login request |
| 165 | config = { |
| 166 | 'session_id': saved_session, |
| 167 | 'instance_url': self.config['instance_url'] |
| 168 | } |
| 169 | return self._auth(config) |
| 170 | |
| 171 | def _acquire_session(self): |
| 172 | # only one worker at a time can check session_file |
| 173 | auth_success = False |
| 174 | |
| 175 | with open(SESSION_FILE, 'r+') as session_file: |
| 176 | with flocked(session_file): |
| 177 | logger.info('Successfully locked session file for refresh.') |
| 178 | |
| 179 | saved_session = self._load_session(session_file) |
| 180 | |
| 181 | if self._refresh_ready(saved_session): |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 182 | logger.info('Attempting to refresh session.') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 183 | |
| 184 | if self._auth(self.config): |
| 185 | auth_success = True |
| 186 | session_file.truncate(0) |
| 187 | session_file.seek(0) |
| 188 | session_file.write(self.sf.session_id) |
| 189 | logger.info('Refreshed session successfully.') |
| 190 | else: |
| 191 | logger.error('Failed to refresh session.') |
| 192 | else: |
| 193 | logger.info('Not refreshing. Reusing session.') |
| 194 | auth_success = self._reuse_session(saved_session) |
| 195 | |
Michal Kobus | f0046f5 | 2019-04-24 12:56:41 +0200 | [diff] [blame] | 196 | if auth_success is False: |
| 197 | logger.warn('Waiting 30 seconds before next attempt...') |
| 198 | time.sleep(30) |
| 199 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame] | 200 | return auth_success |
| 201 | |
| 202 | def auth(self): |
| 203 | auth_ok = self._acquire_session() |
| 204 | while auth_ok is False: |
| 205 | auth_ok = self._acquire_session() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 206 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 207 | def _get_alert_id(self, labels): |
| 208 | alert_id_data = '' |
| 209 | for key in sorted(labels): |
| 210 | alert_id_data += labels[key].replace(".", "\\.") |
Michal Kobus | 492d8bc | 2021-02-15 16:54:13 +0100 | [diff] [blame] | 211 | return self.hash_func(alert_id_data.encode('utf-8')).hexdigest() |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 212 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 213 | @sf_auth_retry |
| 214 | def _create_case(self, subject, body, labels, alert_id): |
| 215 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 216 | if alert_id in self._registered_alerts: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 217 | logger.warning('Duplicate case for alert: {}.'.format(alert_id)) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 218 | return 1, self._registered_alerts[alert_id]['Id'] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 219 | |
| 220 | severity = labels.get('severity', 'unknown').upper() |
| 221 | payload = { |
| 222 | 'Subject': subject, |
| 223 | 'Description': body, |
| 224 | 'IsMosAlert__c': 'true', |
| 225 | 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'), |
| 226 | 'Alert_Host__c': labels.get('host') or labels.get( |
| 227 | 'instance', 'UNKNOWN' |
| 228 | ), |
| 229 | 'Alert_Service__c': labels.get('service', 'UNKNOWN'), |
| 230 | 'Environment2__c': self.environment, |
| 231 | 'Alert_ID__c': alert_id, |
| 232 | } |
Michal Kobus | af771fc | 2020-04-29 15:57:16 +0200 | [diff] [blame] | 233 | if labels.get('cluster_id') is not None: |
| 234 | payload['ClusterId__c'] = labels['cluster_id'] |
| 235 | |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 236 | logger.info('Try to create case: {}.'.format(payload)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 237 | try: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 238 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 239 | case = self.sf.Case.create(payload) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 240 | logger.info('Created case: {}.'.format(case)) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 241 | except sf_exceptions.SalesforceMalformedRequest as ex: |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 242 | msg = ex.content[0]['message'] |
| 243 | err_code = ex.content[0]['errorCode'] |
| 244 | |
| 245 | if err_code == 'DUPLICATE_VALUE': |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 246 | logger.warning('Duplicate case: {}.'.format(msg)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 247 | case_id = msg.split()[-1] |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 248 | self._registered_alerts[alert_id] = {'Id': case_id} |
| 249 | return 1, case_id |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 250 | |
| 251 | logger.error('Cannot create case: {}.'.format(msg)) |
| 252 | self.metrics['sf_error_count'].inc() |
| 253 | raise |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 254 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 255 | self._registered_alerts[alert_id] = {'Id': case['id']} |
| 256 | return 0, case['id'] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 257 | |
| 258 | @sf_auth_retry |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 259 | def _close_case(self, case_id): |
| 260 | logger.info('Try to close case: {}.'.format(case_id)) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 261 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 262 | update = self.sf.Case.update( |
| 263 | case_id, |
| 264 | {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex} |
| 265 | ) |
| 266 | logger.info('Closed case: {}.'.format(case_id)) |
| 267 | return update |
| 268 | |
| 269 | @sf_auth_retry |
| 270 | def _create_feed_item(self, subject, body, case_id): |
| 271 | feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body} |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 272 | logger.debug('Creating feed item: {}.'.format(feed_item)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 273 | return self.sf.FeedItem.create(feed_item) |
| 274 | |
| 275 | @sf_auth_retry |
| 276 | def _get_case_by_alert_id(self, alert_id): |
| 277 | logger.info('Try to get case by alert ID: {}.'.format(alert_id)) |
| 278 | |
| 279 | if alert_id in self._registered_alerts: |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 280 | return self._registered_alerts[alert_id] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 281 | try: |
| 282 | return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id) |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 283 | except sf_exceptions.SalesforceResourceNotFound: |
| 284 | if self._registered_alerts.get(alert_id): |
| 285 | del self._registered_alerts[alert_id] |
Michal Kobus | ba98705 | 2018-11-30 13:01:08 +0100 | [diff] [blame] | 286 | |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 287 | logger.warning('Alert ID: {} not found.'.format(alert_id)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 288 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 289 | def create_case(self, subject, body, labels): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 290 | alert_id = self._get_alert_id(labels) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 291 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 292 | error_code, case_id = self._create_case(subject, body, |
| 293 | labels, alert_id) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 294 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 295 | response = {'case_id': case_id, 'alert_id': alert_id} |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 296 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 297 | if error_code == 1: |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 298 | response['status'] = 'duplicate' |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 299 | else: |
| 300 | response['status'] = 'created' |
Michal Kobus | 2e85ef8 | 2021-06-24 18:01:43 +0200 | [diff] [blame^] | 301 | |
| 302 | if self.feed_enabled: |
| 303 | self._create_feed_item(subject, body, case_id) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 304 | return response |
| 305 | |
| 306 | def close_case(self, labels): |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 307 | alert_id = self._get_alert_id(labels) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 308 | case = self._get_case_by_alert_id(alert_id) |
| 309 | |
| 310 | response = {'alert_id': alert_id, 'status': 'resolved'} |
| 311 | |
| 312 | if case is None: |
| 313 | return response |
| 314 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 315 | if self._registered_alerts.get(alert_id): |
| 316 | del self._registered_alerts[alert_id] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 317 | |
Michal Kobus | aa3accf | 2019-06-05 12:25:09 +0200 | [diff] [blame] | 318 | response['case_id'] = case['Id'] |
| 319 | response['closed'] = self._close_case(case['Id']) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 320 | return response |