Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 1 | # Copyright 2018: Mirantis Inc. |
| 2 | # All Rights Reserved. |
| 3 | # |
| 4 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 5 | # not use this file except in compliance with the License. You may obtain |
| 6 | # a copy of the License at |
| 7 | # |
| 8 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | # |
| 10 | # Unless required by applicable law or agreed to in writing, software |
| 11 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 12 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 13 | # License for the specific language governing permissions and limitations |
| 14 | # under the License. |
| 15 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 16 | import fcntl |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 17 | import hashlib |
| 18 | import logging |
| 19 | import os |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 20 | import time |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 21 | import uuid |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 22 | from contextlib import contextmanager |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 23 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 24 | from cachetools import TTLCache |
| 25 | |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 26 | from prometheus_client import Counter, Gauge |
| 27 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 28 | from requests import Session |
Michal Kobus | 4104c10 | 2019-02-22 17:05:11 +0100 | [diff] [blame] | 29 | from requests.exceptions import ConnectionError as RequestsConnectionError |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 30 | |
| 31 | from simple_salesforce import Salesforce |
| 32 | from simple_salesforce import exceptions as sf_exceptions |
| 33 | |
| 34 | |
| 35 | STATE_MAP = { |
| 36 | 'OK': '060 Informational', |
| 37 | 'UP': '060 Informational', |
| 38 | 'UNKNOWN': '070 Unknown', |
| 39 | 'WARNING': '080 Warning', |
| 40 | 'MINOR': '080 Warning', |
| 41 | 'MAJOR': '090 Critical', |
| 42 | 'CRITICAL': '090 Critical', |
| 43 | 'DOWN': '090 Critical', |
| 44 | 'UNREACHABLE': '090 Critical', |
| 45 | } |
| 46 | |
| 47 | CONFIG_FIELD_MAP = { |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 48 | 'auth_url': 'instance_url', |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 49 | 'username': 'username', |
| 50 | 'password': 'password', |
| 51 | 'organization_id': 'organizationId', |
| 52 | 'environment_id': 'environment_id', |
| 53 | 'sandbox_enabled': 'domain', |
| 54 | } |
| 55 | |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 56 | ALLOWED_HASHING = ('md5', 'sha256') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 57 | SESSION_FILE = '/tmp/session' |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 58 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 59 | logger = logging.getLogger(__name__) |
| 60 | |
| 61 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 62 | @contextmanager |
| 63 | def flocked(fd): |
| 64 | try: |
| 65 | fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) |
| 66 | yield |
| 67 | except IOError: |
| 68 | logger.info('Waiting for session file 5 seconds...') |
| 69 | time.sleep(5) |
| 70 | finally: |
| 71 | fcntl.flock(fd, fcntl.LOCK_UN) |
| 72 | |
| 73 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 74 | def sf_auth_retry(method): |
| 75 | def wrapper(self, *args, **kwargs): |
| 76 | try: |
| 77 | return method(self, *args, **kwargs) |
| 78 | except sf_exceptions.SalesforceExpiredSession: |
| 79 | logger.warning('Salesforce session expired.') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 80 | self.auth() |
Michal Kobus | 4104c10 | 2019-02-22 17:05:11 +0100 | [diff] [blame] | 81 | except RequestsConnectionError: |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 82 | logger.error('Salesforce connection error.') |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 83 | self.auth() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 84 | return method(self, *args, **kwargs) |
| 85 | return wrapper |
| 86 | |
| 87 | |
| 88 | class SfNotifierError(Exception): |
Michal Kobus | ee36c42 | 2018-11-26 15:02:31 +0100 | [diff] [blame] | 89 | pass |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 90 | |
| 91 | |
| 92 | class SalesforceClient(object): |
| 93 | |
| 94 | def __init__(self, config): |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 95 | self.metrics = { |
| 96 | 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'), |
| 97 | 'sf_error_count': Counter('sf_error_count', 'sf-notifier'), |
| 98 | 'sf_request_count': Counter('sf_request_count', 'sf-notifier') |
| 99 | } |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 100 | self.config = self._validate_config(config) |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 101 | self.hash_func = self._hash_func() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 102 | self.environment = self.config.pop('environment_id') |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 103 | self._registered_alerts = TTLCache(maxsize=2048, ttl=300) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 104 | self.sf = None |
| 105 | self.session = Session() |
| 106 | self.auth() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 107 | |
Michal Kobus | ee36c42 | 2018-11-26 15:02:31 +0100 | [diff] [blame] | 108 | @staticmethod |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 109 | def _hash_func(): |
| 110 | name = os.environ.get('SF_NOTIFIER_ALERT_ID_HASH_FUNC', 'sha256') |
| 111 | if name in ALLOWED_HASHING: |
| 112 | return getattr(hashlib, name) |
| 113 | return hashlib.sha256 |
| 114 | |
| 115 | @staticmethod |
Michal Kobus | ee36c42 | 2018-11-26 15:02:31 +0100 | [diff] [blame] | 116 | def _validate_config(config): |
| 117 | kwargs = {} |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 118 | |
| 119 | for param, field in CONFIG_FIELD_MAP.iteritems(): |
| 120 | setting_var = param.upper() |
| 121 | env_var = 'SFDC_{}'.format(setting_var) |
| 122 | kwargs[field] = os.environ.get( |
Michal Kobus | ee36c42 | 2018-11-26 15:02:31 +0100 | [diff] [blame] | 123 | env_var, config.get(setting_var)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 124 | |
| 125 | if field == 'domain': |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 126 | if kwargs[field] in ['true', 'True', True]: |
Michal Kobus | ee36c42 | 2018-11-26 15:02:31 +0100 | [diff] [blame] | 127 | kwargs[field] = 'test' |
| 128 | else: |
| 129 | del kwargs[field] |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 130 | continue |
| 131 | |
| 132 | if kwargs[field] is None: |
| 133 | msg = ('Invalid config: missing "{}" field or "{}" environment' |
| 134 | ' variable.').format(param, env_var) |
| 135 | logger.error(msg) |
| 136 | raise SfNotifierError(msg) |
| 137 | return kwargs |
| 138 | |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 139 | def _auth(self, config): |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 140 | try: |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 141 | config.update({'session': self.session}) |
| 142 | self.sf = Salesforce(**config) |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 143 | except sf_exceptions.SalesforceAuthenticationFailed: |
| 144 | logger.error('Salesforce authentication failure.') |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 145 | self.metrics['sf_auth_ok'].set(0) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 146 | return False |
| 147 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 148 | logger.info('Salesforce authentication successful.') |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 149 | self.metrics['sf_auth_ok'].set(1) |
Michal Kobus | 211ee92 | 2019-04-15 17:44:06 +0200 | [diff] [blame^] | 150 | return True |
| 151 | |
| 152 | def _load_session(self, session_file): |
| 153 | lines = session_file.readlines() |
| 154 | |
| 155 | if lines == []: |
| 156 | return |
| 157 | return lines[0] |
| 158 | |
| 159 | def _refresh_ready(self, saved_session): |
| 160 | if saved_session is None: |
| 161 | logger.info('Current session is None.') |
| 162 | return True |
| 163 | |
| 164 | if self.sf is None: |
| 165 | return False |
| 166 | |
| 167 | if self.sf.session_id == saved_session: |
| 168 | return True |
| 169 | return False |
| 170 | |
| 171 | def _reuse_session(self, saved_session): |
| 172 | logger.info('Reusing session id from file.') |
| 173 | # limit params to avoid login request |
| 174 | config = { |
| 175 | 'session_id': saved_session, |
| 176 | 'instance_url': self.config['instance_url'] |
| 177 | } |
| 178 | return self._auth(config) |
| 179 | |
| 180 | def _acquire_session(self): |
| 181 | # only one worker at a time can check session_file |
| 182 | auth_success = False |
| 183 | |
| 184 | with open(SESSION_FILE, 'r+') as session_file: |
| 185 | with flocked(session_file): |
| 186 | logger.info('Successfully locked session file for refresh.') |
| 187 | |
| 188 | saved_session = self._load_session(session_file) |
| 189 | |
| 190 | if self._refresh_ready(saved_session): |
| 191 | logger.info('Attepmting to refresh session.') |
| 192 | |
| 193 | if self._auth(self.config): |
| 194 | auth_success = True |
| 195 | session_file.truncate(0) |
| 196 | session_file.seek(0) |
| 197 | session_file.write(self.sf.session_id) |
| 198 | logger.info('Refreshed session successfully.') |
| 199 | else: |
| 200 | logger.error('Failed to refresh session.') |
| 201 | else: |
| 202 | logger.info('Not refreshing. Reusing session.') |
| 203 | auth_success = self._reuse_session(saved_session) |
| 204 | |
| 205 | return auth_success |
| 206 | |
| 207 | def auth(self): |
| 208 | auth_ok = self._acquire_session() |
| 209 | while auth_ok is False: |
| 210 | auth_ok = self._acquire_session() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 211 | |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 212 | def _get_alert_id(self, labels): |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 213 | alert_id_data = '' |
| 214 | for key in sorted(labels): |
| 215 | alert_id_data += labels[key].replace(".", "\\.") |
Michal Kobus | 73d3352 | 2018-12-10 11:41:13 +0100 | [diff] [blame] | 216 | return self.hash_func(alert_id_data).hexdigest() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 217 | |
| 218 | @sf_auth_retry |
| 219 | def _create_case(self, subject, body, labels, alert_id): |
| 220 | |
| 221 | if alert_id in self._registered_alerts: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 222 | logger.warning('Duplicate case for alert: {}.'.format(alert_id)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 223 | return 1, self._registered_alerts[alert_id]['Id'] |
| 224 | |
| 225 | severity = labels.get('severity', 'unknown').upper() |
| 226 | payload = { |
| 227 | 'Subject': subject, |
| 228 | 'Description': body, |
| 229 | 'IsMosAlert__c': 'true', |
| 230 | 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'), |
| 231 | 'Alert_Host__c': labels.get('host') or labels.get( |
| 232 | 'instance', 'UNKNOWN' |
| 233 | ), |
| 234 | 'Alert_Service__c': labels.get('service', 'UNKNOWN'), |
| 235 | 'Environment2__c': self.environment, |
| 236 | 'Alert_ID__c': alert_id, |
| 237 | } |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 238 | logger.info('Try to create case: {}.'.format(payload)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 239 | try: |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 240 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 241 | case = self.sf.Case.create(payload) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 242 | logger.info('Created case: {}.'.format(case)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 243 | except sf_exceptions.SalesforceMalformedRequest as ex: |
| 244 | msg = ex.content[0]['message'] |
| 245 | err_code = ex.content[0]['errorCode'] |
| 246 | |
| 247 | if err_code == 'DUPLICATE_VALUE': |
Michal Kobus | 17726ae | 2018-11-27 12:59:55 +0100 | [diff] [blame] | 248 | logger.warning('Duplicate case: {}.'.format(msg)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 249 | case_id = msg.split()[-1] |
| 250 | self._registered_alerts[alert_id] = {'Id': case_id} |
| 251 | return 1, case_id |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 252 | |
| 253 | logger.error('Cannot create case: {}.'.format(msg)) |
| 254 | self.metrics['sf_error_count'].inc() |
| 255 | raise |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 256 | |
| 257 | self._registered_alerts[alert_id] = {'Id': case['id']} |
| 258 | return 0, case['id'] |
| 259 | |
| 260 | @sf_auth_retry |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 261 | def _close_case(self, case_id): |
| 262 | logger.info('Try to close case: {}.'.format(case_id)) |
Michal Kobus | afbf4d0 | 2018-11-28 14:18:05 +0100 | [diff] [blame] | 263 | self.metrics['sf_request_count'].inc() |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 264 | update = self.sf.Case.update( |
| 265 | case_id, |
| 266 | {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex} |
| 267 | ) |
| 268 | logger.info('Closed case: {}.'.format(case_id)) |
| 269 | return update |
| 270 | |
| 271 | @sf_auth_retry |
| 272 | def _create_feed_item(self, subject, body, case_id): |
| 273 | feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body} |
| 274 | return self.sf.FeedItem.create(feed_item) |
| 275 | |
| 276 | @sf_auth_retry |
| 277 | def _get_case_by_alert_id(self, alert_id): |
| 278 | logger.info('Try to get case by alert ID: {}.'.format(alert_id)) |
| 279 | |
| 280 | if alert_id in self._registered_alerts: |
| 281 | return self._registered_alerts[alert_id] |
| 282 | try: |
| 283 | return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id) |
| 284 | except sf_exceptions.SalesforceResourceNotFound: |
Michal Kobus | ba98705 | 2018-11-30 13:01:08 +0100 | [diff] [blame] | 285 | if self._registered_alerts.get(alert_id): |
| 286 | del self._registered_alerts[alert_id] |
| 287 | |
Michal Kobus | 27457d4 | 2019-02-13 14:06:11 +0100 | [diff] [blame] | 288 | logger.warning('Alert ID: {} not found.'.format(alert_id)) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 289 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 290 | def create_case(self, subject, body, labels): |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 291 | alert_id = self._get_alert_id(labels) |
| 292 | |
| 293 | error_code, case_id = self._create_case(subject, body, |
| 294 | labels, alert_id) |
| 295 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 296 | self._create_feed_item(subject, body, case_id) |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 297 | |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 298 | response = {'case_id': case_id, 'alert_id': alert_id} |
| 299 | |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 300 | if error_code == 1: |
| 301 | response['status'] = 'duplicate' |
Michal Kobus | 915f3cf | 2018-12-10 20:09:41 +0100 | [diff] [blame] | 302 | else: |
| 303 | response['status'] = 'created' |
Mateusz Matuszkowiak | 2820c66 | 2018-11-21 12:07:25 +0100 | [diff] [blame] | 304 | return response |
| 305 | |
| 306 | def close_case(self, labels): |
| 307 | alert_id = self._get_alert_id(labels) |
| 308 | case = self._get_case_by_alert_id(alert_id) |
| 309 | |
| 310 | response = {'alert_id': alert_id, 'status': 'resolved'} |
| 311 | |
| 312 | if case is None: |
| 313 | return response |
| 314 | |
| 315 | if self._registered_alerts.get(alert_id): |
| 316 | del self._registered_alerts[alert_id] |
| 317 | |
| 318 | response['case_id'] = case['Id'] |
| 319 | response['closed'] = self._close_case(case['Id']) |
| 320 | return response |