blob: d3979e85df993c3e0b8209cae2460f8b20072041 [file] [log] [blame]
Michal Kobusaa3accf2019-06-05 12:25:09 +02001import fcntl
2import hashlib
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01003import logging
Michal Kobus211ee922019-04-15 17:44:06 +02004import time
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01005import uuid
Michal Kobusaa3accf2019-06-05 12:25:09 +02006from contextlib import contextmanager
7
8from cachetools import TTLCache
Michal Kobus915f3cf2018-12-10 20:09:41 +01009
Michal Kobusafbf4d02018-11-28 14:18:05 +010010from prometheus_client import Counter, Gauge
11
Michal Kobus915f3cf2018-12-10 20:09:41 +010012from requests import Session
Michal Kobusaa3accf2019-06-05 12:25:09 +020013from requests.exceptions import ConnectionError as RequestsConnectionError
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010014
15from simple_salesforce import Salesforce
Michal Kobusaa3accf2019-06-05 12:25:09 +020016from simple_salesforce import exceptions as sf_exceptions
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010017
18
Michal Kobusaa3accf2019-06-05 12:25:09 +020019STATE_MAP = {
20 'OK': '060 Informational',
21 'UP': '060 Informational',
Michal Kobusa069a1a2021-09-23 18:05:27 +020022 'INFORMATIONAL': '060 Informational',
Michal Kobusaa3accf2019-06-05 12:25:09 +020023 'UNKNOWN': '070 Unknown',
24 'WARNING': '080 Warning',
25 'MINOR': '080 Warning',
26 'MAJOR': '090 Critical',
27 'CRITICAL': '090 Critical',
28 'DOWN': '090 Critical',
29 'UNREACHABLE': '090 Critical',
30}
31
32CONFIG_FIELD_MAP = {
33 'auth_url': 'instance_url',
34 'username': 'username',
35 'password': 'password',
36 'organization_id': 'organizationId',
37 'environment_id': 'environment_id',
38 'sandbox_enabled': 'domain',
Michal Kobus2e85ef82021-06-24 18:01:43 +020039 'feed_enabled': 'feed_enabled',
40 'hash_func': 'hash_func',
Michal Kobusaa3accf2019-06-05 12:25:09 +020041}
42
43ALLOWED_HASHING = ('md5', 'sha256')
44SESSION_FILE = '/tmp/session'
Michal Kobus73d33522018-12-10 11:41:13 +010045
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010046logger = logging.getLogger(__name__)
47
48
Michal Kobusaa3accf2019-06-05 12:25:09 +020049@contextmanager
50def flocked(fd):
51 try:
52 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
53 yield
54 except IOError:
55 logger.info('Session file locked. Waiting 5 seconds...')
56 time.sleep(5)
57 finally:
58 fcntl.flock(fd, fcntl.LOCK_UN)
59
60
61def sf_auth_retry(method):
62 def wrapper(self, *args, **kwargs):
63 try:
64 return method(self, *args, **kwargs)
65 except sf_exceptions.SalesforceExpiredSession:
66 logger.warning('Salesforce session expired.')
67 self.auth()
68 except RequestsConnectionError:
69 logger.error('Salesforce connection error.')
70 self.auth()
71 return method(self, *args, **kwargs)
72 return wrapper
73
74
75class SfNotifierError(Exception):
76 pass
77
78
79class SalesforceClient(object):
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010080
81 def __init__(self, config):
Michal Kobusafbf4d02018-11-28 14:18:05 +010082 self.metrics = {
83 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'),
84 'sf_error_count': Counter('sf_error_count', 'sf-notifier'),
85 'sf_request_count': Counter('sf_request_count', 'sf-notifier')
86 }
Michal Kobusaa3accf2019-06-05 12:25:09 +020087 self._registered_alerts = TTLCache(maxsize=2048, ttl=300)
Michal Kobus2e85ef82021-06-24 18:01:43 +020088
89 self.config = self._validate_config(config)
90 self.hash_func = self._hash_func(self.config.pop('hash_func'))
91 self.feed_enabled = self.config.pop('feed_enabled')
92
93 self.environment = self.config.pop('environment_id')
Michal Kobus211ee922019-04-15 17:44:06 +020094 self.sf = None
95 self.session = Session()
Michal Kobusa3f8fe42022-06-27 10:30:18 +020096 self.auth(no_retry=True)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010097
Michal Kobusaa3accf2019-06-05 12:25:09 +020098 @staticmethod
Michal Kobus2e85ef82021-06-24 18:01:43 +020099 def _hash_func(name):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200100 if name in ALLOWED_HASHING:
101 return getattr(hashlib, name)
Michal Kobus2e85ef82021-06-24 18:01:43 +0200102 msg = ('Invalid hashing function "{}".'
103 'Switching to default "sha256".').format(name)
104 logger.warn(msg)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200105 return hashlib.sha256
106
107 @staticmethod
108 def _validate_config(config):
109 kwargs = {}
110
Michal Kobus2e85ef82021-06-24 18:01:43 +0200111 for param, value in config.items():
112 field = CONFIG_FIELD_MAP.get(param.lower())
113 if field is None:
114 env_var = 'SFDC_{}'.format(param)
115 msg = ('Invalid config: missing "{}" field or "{}" environment'
116 ' variable.').format(field, env_var)
117 logger.error(msg)
118 raise SfNotifierError(msg)
119
120 kwargs[field] = value
Michal Kobusaa3accf2019-06-05 12:25:09 +0200121
122 if field == 'domain':
Michal Kobus2e85ef82021-06-24 18:01:43 +0200123 if value:
Michal Kobusaa3accf2019-06-05 12:25:09 +0200124 kwargs[field] = 'test'
125 else:
126 del kwargs[field]
Michal Kobusaa3accf2019-06-05 12:25:09 +0200127
Michal Kobusaa3accf2019-06-05 12:25:09 +0200128 return kwargs
129
Michal Kobus211ee922019-04-15 17:44:06 +0200130 def _auth(self, config):
Michal Kobus17726ae2018-11-27 12:59:55 +0100131 try:
Michal Kobus211ee922019-04-15 17:44:06 +0200132 config.update({'session': self.session})
133 self.sf = Salesforce(**config)
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200134 except Exception as ex:
Michal Kobusf0046f52019-04-24 12:56:41 +0200135 logger.error('Salesforce authentication failure: {}.'.format(ex))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100136 self.metrics['sf_auth_ok'].set(0)
Michal Kobus211ee922019-04-15 17:44:06 +0200137 return False
138
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100139 logger.info('Salesforce authentication successful.')
Michal Kobusafbf4d02018-11-28 14:18:05 +0100140 self.metrics['sf_auth_ok'].set(1)
Michal Kobus211ee922019-04-15 17:44:06 +0200141 return True
142
Michal Kobusaa3accf2019-06-05 12:25:09 +0200143 def _load_session(self, session_file):
144 lines = session_file.readlines()
145
146 if lines == []:
147 return
148 return lines[0]
149
Michal Kobus211ee922019-04-15 17:44:06 +0200150 def _refresh_ready(self, saved_session):
151 if saved_session is None:
152 logger.info('Current session is None.')
153 return True
154
155 if self.sf is None:
156 return False
157
158 if self.sf.session_id == saved_session:
159 return True
160 return False
161
162 def _reuse_session(self, saved_session):
163 logger.info('Reusing session id from file.')
164 # limit params to avoid login request
165 config = {
166 'session_id': saved_session,
167 'instance_url': self.config['instance_url']
168 }
169 return self._auth(config)
170
171 def _acquire_session(self):
172 # only one worker at a time can check session_file
173 auth_success = False
174
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200175 logger.info('Attempting to lock session file.')
Michal Kobus211ee922019-04-15 17:44:06 +0200176 with open(SESSION_FILE, 'r+') as session_file:
177 with flocked(session_file):
178 logger.info('Successfully locked session file for refresh.')
179
180 saved_session = self._load_session(session_file)
181
182 if self._refresh_ready(saved_session):
Michal Kobusf0046f52019-04-24 12:56:41 +0200183 logger.info('Attempting to refresh session.')
Michal Kobus211ee922019-04-15 17:44:06 +0200184
185 if self._auth(self.config):
186 auth_success = True
187 session_file.truncate(0)
188 session_file.seek(0)
189 session_file.write(self.sf.session_id)
190 logger.info('Refreshed session successfully.')
191 else:
192 logger.error('Failed to refresh session.')
193 else:
194 logger.info('Not refreshing. Reusing session.')
195 auth_success = self._reuse_session(saved_session)
196
Michal Kobusf0046f52019-04-24 12:56:41 +0200197 if auth_success is False:
198 logger.warn('Waiting 30 seconds before next attempt...')
199 time.sleep(30)
200
Michal Kobus211ee922019-04-15 17:44:06 +0200201 return auth_success
202
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200203 def auth(self, no_retry=False):
Michal Kobus211ee922019-04-15 17:44:06 +0200204 auth_ok = self._acquire_session()
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200205
206 if no_retry:
207 return
208
Michal Kobus211ee922019-04-15 17:44:06 +0200209 while auth_ok is False:
210 auth_ok = self._acquire_session()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100211
Michal Kobusaa3accf2019-06-05 12:25:09 +0200212 def _get_alert_id(self, labels):
213 alert_id_data = ''
214 for key in sorted(labels):
215 alert_id_data += labels[key].replace(".", "\\.")
Michal Kobus492d8bc2021-02-15 16:54:13 +0100216 return self.hash_func(alert_id_data.encode('utf-8')).hexdigest()
Michal Kobusaa3accf2019-06-05 12:25:09 +0200217
Michal Kobusa069a1a2021-09-23 18:05:27 +0200218 @staticmethod
219 def _is_watchdog(labels):
220 return labels['alertname'].lower() == 'watchdog'
221
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100222 @sf_auth_retry
223 def _create_case(self, subject, body, labels, alert_id):
224
Michal Kobusaa3accf2019-06-05 12:25:09 +0200225 if alert_id in self._registered_alerts:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100226 logger.warning('Duplicate case for alert: {}.'.format(alert_id))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200227 return 1, self._registered_alerts[alert_id]['Id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100228
229 severity = labels.get('severity', 'unknown').upper()
230 payload = {
231 'Subject': subject,
232 'Description': body,
233 'IsMosAlert__c': 'true',
234 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'),
235 'Alert_Host__c': labels.get('host') or labels.get(
236 'instance', 'UNKNOWN'
237 ),
238 'Alert_Service__c': labels.get('service', 'UNKNOWN'),
239 'Environment2__c': self.environment,
240 'Alert_ID__c': alert_id,
241 }
Michal Kobusaf771fc2020-04-29 15:57:16 +0200242 if labels.get('cluster_id') is not None:
243 payload['ClusterId__c'] = labels['cluster_id']
244
Michal Kobusa069a1a2021-09-23 18:05:27 +0200245 if self._is_watchdog(labels):
246 payload['IsWatchDogAlert__c'] = 'true'
247
Michal Kobusafbf4d02018-11-28 14:18:05 +0100248 logger.info('Try to create case: {}.'.format(payload))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100249 try:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100250 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100251 case = self.sf.Case.create(payload)
Michal Kobusafbf4d02018-11-28 14:18:05 +0100252 logger.info('Created case: {}.'.format(case))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200253 except sf_exceptions.SalesforceMalformedRequest as ex:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100254 msg = ex.content[0]['message']
255 err_code = ex.content[0]['errorCode']
256
257 if err_code == 'DUPLICATE_VALUE':
Michal Kobus17726ae2018-11-27 12:59:55 +0100258 logger.warning('Duplicate case: {}.'.format(msg))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100259 case_id = msg.split()[-1]
Michal Kobusaa3accf2019-06-05 12:25:09 +0200260 self._registered_alerts[alert_id] = {'Id': case_id}
261 return 1, case_id
Michal Kobus27457d42019-02-13 14:06:11 +0100262
263 logger.error('Cannot create case: {}.'.format(msg))
264 self.metrics['sf_error_count'].inc()
265 raise
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100266
Michal Kobusaa3accf2019-06-05 12:25:09 +0200267 self._registered_alerts[alert_id] = {'Id': case['id']}
268 return 0, case['id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100269
270 @sf_auth_retry
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100271 def _close_case(self, case_id):
272 logger.info('Try to close case: {}.'.format(case_id))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100273 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100274 update = self.sf.Case.update(
275 case_id,
276 {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex}
277 )
278 logger.info('Closed case: {}.'.format(case_id))
279 return update
280
281 @sf_auth_retry
282 def _create_feed_item(self, subject, body, case_id):
283 feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body}
Michal Kobus2e85ef82021-06-24 18:01:43 +0200284 logger.debug('Creating feed item: {}.'.format(feed_item))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100285 return self.sf.FeedItem.create(feed_item)
286
287 @sf_auth_retry
288 def _get_case_by_alert_id(self, alert_id):
289 logger.info('Try to get case by alert ID: {}.'.format(alert_id))
290
291 if alert_id in self._registered_alerts:
Michal Kobusaa3accf2019-06-05 12:25:09 +0200292 return self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100293 try:
294 return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200295 except sf_exceptions.SalesforceResourceNotFound:
296 if self._registered_alerts.get(alert_id):
297 del self._registered_alerts[alert_id]
Michal Kobusba987052018-11-30 13:01:08 +0100298
Michal Kobus27457d42019-02-13 14:06:11 +0100299 logger.warning('Alert ID: {} not found.'.format(alert_id))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100300
Michal Kobus915f3cf2018-12-10 20:09:41 +0100301 def create_case(self, subject, body, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200302 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100303
Michal Kobusaa3accf2019-06-05 12:25:09 +0200304 error_code, case_id = self._create_case(subject, body,
305 labels, alert_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100306
Michal Kobusaa3accf2019-06-05 12:25:09 +0200307 response = {'case_id': case_id, 'alert_id': alert_id}
Michal Kobus915f3cf2018-12-10 20:09:41 +0100308
Michal Kobusaa3accf2019-06-05 12:25:09 +0200309 if error_code == 1:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100310 response['status'] = 'duplicate'
Michal Kobus915f3cf2018-12-10 20:09:41 +0100311 else:
312 response['status'] = 'created'
Michal Kobus2e85ef82021-06-24 18:01:43 +0200313
Michal Kobusa069a1a2021-09-23 18:05:27 +0200314 if self.feed_enabled or self._is_watchdog(labels):
Michal Kobus2e85ef82021-06-24 18:01:43 +0200315 self._create_feed_item(subject, body, case_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100316 return response
317
318 def close_case(self, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200319 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100320 case = self._get_case_by_alert_id(alert_id)
321
322 response = {'alert_id': alert_id, 'status': 'resolved'}
323
324 if case is None:
325 return response
326
Michal Kobusaa3accf2019-06-05 12:25:09 +0200327 if self._registered_alerts.get(alert_id):
328 del self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100329
Michal Kobusaa3accf2019-06-05 12:25:09 +0200330 response['case_id'] = case['Id']
331 response['closed'] = self._close_case(case['Id'])
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100332 return response