blob: f4a809665b8b7289cc86e74d94ec063757fc13e3 [file] [log] [blame]
Michal Kobusaa3accf2019-06-05 12:25:09 +02001import fcntl
2import hashlib
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01003import logging
Michal Kobus211ee922019-04-15 17:44:06 +02004import time
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01005import uuid
Michal Kobusaa3accf2019-06-05 12:25:09 +02006from contextlib import contextmanager
Michal Kobus8023fd42023-06-22 15:47:58 +02007from urllib3.util.retry import Retry
Michal Kobusaa3accf2019-06-05 12:25:09 +02008
9from cachetools import TTLCache
Michal Kobus915f3cf2018-12-10 20:09:41 +010010
Michal Kobusafbf4d02018-11-28 14:18:05 +010011from prometheus_client import Counter, Gauge
12
Michal Kobus915f3cf2018-12-10 20:09:41 +010013from requests import Session
Michal Kobus8023fd42023-06-22 15:47:58 +020014from requests.adapters import HTTPAdapter
Michal Kobusaa3accf2019-06-05 12:25:09 +020015from requests.exceptions import ConnectionError as RequestsConnectionError
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010016
17from simple_salesforce import Salesforce
Michal Kobusaa3accf2019-06-05 12:25:09 +020018from simple_salesforce import exceptions as sf_exceptions
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010019
20
Michal Kobusaa3accf2019-06-05 12:25:09 +020021STATE_MAP = {
22 'OK': '060 Informational',
23 'UP': '060 Informational',
Michal Kobusa069a1a2021-09-23 18:05:27 +020024 'INFORMATIONAL': '060 Informational',
Michal Kobusaa3accf2019-06-05 12:25:09 +020025 'UNKNOWN': '070 Unknown',
26 'WARNING': '080 Warning',
27 'MINOR': '080 Warning',
28 'MAJOR': '090 Critical',
29 'CRITICAL': '090 Critical',
30 'DOWN': '090 Critical',
31 'UNREACHABLE': '090 Critical',
32}
33
34CONFIG_FIELD_MAP = {
35 'auth_url': 'instance_url',
36 'username': 'username',
37 'password': 'password',
38 'organization_id': 'organizationId',
39 'environment_id': 'environment_id',
40 'sandbox_enabled': 'domain',
Michal Kobus2e85ef82021-06-24 18:01:43 +020041 'feed_enabled': 'feed_enabled',
42 'hash_func': 'hash_func',
Michal Kobusaa3accf2019-06-05 12:25:09 +020043}
44
45ALLOWED_HASHING = ('md5', 'sha256')
46SESSION_FILE = '/tmp/session'
Michal Kobus73d33522018-12-10 11:41:13 +010047
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010048logger = logging.getLogger(__name__)
49
50
Michal Kobusaa3accf2019-06-05 12:25:09 +020051@contextmanager
52def flocked(fd):
53 try:
54 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
55 yield
56 except IOError:
57 logger.info('Session file locked. Waiting 5 seconds...')
58 time.sleep(5)
59 finally:
60 fcntl.flock(fd, fcntl.LOCK_UN)
61
62
63def sf_auth_retry(method):
64 def wrapper(self, *args, **kwargs):
65 try:
66 return method(self, *args, **kwargs)
67 except sf_exceptions.SalesforceExpiredSession:
68 logger.warning('Salesforce session expired.')
69 self.auth()
70 except RequestsConnectionError:
71 logger.error('Salesforce connection error.')
72 self.auth()
73 return method(self, *args, **kwargs)
74 return wrapper
75
76
77class SfNotifierError(Exception):
78 pass
79
80
81class SalesforceClient(object):
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010082
83 def __init__(self, config):
Michal Kobusafbf4d02018-11-28 14:18:05 +010084 self.metrics = {
85 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'),
86 'sf_error_count': Counter('sf_error_count', 'sf-notifier'),
87 'sf_request_count': Counter('sf_request_count', 'sf-notifier')
88 }
Michal Kobusaa3accf2019-06-05 12:25:09 +020089 self._registered_alerts = TTLCache(maxsize=2048, ttl=300)
Michal Kobus2e85ef82021-06-24 18:01:43 +020090
91 self.config = self._validate_config(config)
92 self.hash_func = self._hash_func(self.config.pop('hash_func'))
93 self.feed_enabled = self.config.pop('feed_enabled')
94
95 self.environment = self.config.pop('environment_id')
Michal Kobus211ee922019-04-15 17:44:06 +020096 self.sf = None
97 self.session = Session()
Michal Kobus8023fd42023-06-22 15:47:58 +020098
99 retry = Retry(connect=3, backoff_factor=0.5)
100 adapter = HTTPAdapter(max_retries=retry)
101 self.session.mount('http://', adapter)
102 self.session.mount('https://', adapter)
103
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200104 self.auth(no_retry=True)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100105
Michal Kobusaa3accf2019-06-05 12:25:09 +0200106 @staticmethod
Michal Kobus2e85ef82021-06-24 18:01:43 +0200107 def _hash_func(name):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200108 if name in ALLOWED_HASHING:
109 return getattr(hashlib, name)
Michal Kobus2e85ef82021-06-24 18:01:43 +0200110 msg = ('Invalid hashing function "{}".'
111 'Switching to default "sha256".').format(name)
112 logger.warn(msg)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200113 return hashlib.sha256
114
115 @staticmethod
116 def _validate_config(config):
117 kwargs = {}
118
Michal Kobus2e85ef82021-06-24 18:01:43 +0200119 for param, value in config.items():
120 field = CONFIG_FIELD_MAP.get(param.lower())
121 if field is None:
122 env_var = 'SFDC_{}'.format(param)
123 msg = ('Invalid config: missing "{}" field or "{}" environment'
124 ' variable.').format(field, env_var)
125 logger.error(msg)
126 raise SfNotifierError(msg)
127
128 kwargs[field] = value
Michal Kobusaa3accf2019-06-05 12:25:09 +0200129
130 if field == 'domain':
Michal Kobus2e85ef82021-06-24 18:01:43 +0200131 if value:
Michal Kobusaa3accf2019-06-05 12:25:09 +0200132 kwargs[field] = 'test'
133 else:
134 del kwargs[field]
Michal Kobusaa3accf2019-06-05 12:25:09 +0200135
Michal Kobusaa3accf2019-06-05 12:25:09 +0200136 return kwargs
137
Michal Kobus211ee922019-04-15 17:44:06 +0200138 def _auth(self, config):
Michal Kobus17726ae2018-11-27 12:59:55 +0100139 try:
Michal Kobus211ee922019-04-15 17:44:06 +0200140 config.update({'session': self.session})
141 self.sf = Salesforce(**config)
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200142 except Exception as ex:
Michal Kobusf0046f52019-04-24 12:56:41 +0200143 logger.error('Salesforce authentication failure: {}.'.format(ex))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100144 self.metrics['sf_auth_ok'].set(0)
Michal Kobus211ee922019-04-15 17:44:06 +0200145 return False
146
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100147 logger.info('Salesforce authentication successful.')
Michal Kobusafbf4d02018-11-28 14:18:05 +0100148 self.metrics['sf_auth_ok'].set(1)
Michal Kobus211ee922019-04-15 17:44:06 +0200149 return True
150
Michal Kobusaa3accf2019-06-05 12:25:09 +0200151 def _load_session(self, session_file):
152 lines = session_file.readlines()
153
154 if lines == []:
155 return
156 return lines[0]
157
Michal Kobus211ee922019-04-15 17:44:06 +0200158 def _refresh_ready(self, saved_session):
159 if saved_session is None:
160 logger.info('Current session is None.')
161 return True
162
163 if self.sf is None:
164 return False
165
166 if self.sf.session_id == saved_session:
167 return True
168 return False
169
170 def _reuse_session(self, saved_session):
171 logger.info('Reusing session id from file.')
172 # limit params to avoid login request
173 config = {
174 'session_id': saved_session,
175 'instance_url': self.config['instance_url']
176 }
177 return self._auth(config)
178
179 def _acquire_session(self):
180 # only one worker at a time can check session_file
181 auth_success = False
182
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200183 logger.info('Attempting to lock session file.')
Michal Kobus211ee922019-04-15 17:44:06 +0200184 with open(SESSION_FILE, 'r+') as session_file:
185 with flocked(session_file):
186 logger.info('Successfully locked session file for refresh.')
187
188 saved_session = self._load_session(session_file)
189
190 if self._refresh_ready(saved_session):
Michal Kobusf0046f52019-04-24 12:56:41 +0200191 logger.info('Attempting to refresh session.')
Michal Kobus211ee922019-04-15 17:44:06 +0200192
193 if self._auth(self.config):
194 auth_success = True
195 session_file.truncate(0)
196 session_file.seek(0)
197 session_file.write(self.sf.session_id)
198 logger.info('Refreshed session successfully.')
199 else:
200 logger.error('Failed to refresh session.')
201 else:
202 logger.info('Not refreshing. Reusing session.')
203 auth_success = self._reuse_session(saved_session)
204
Michal Kobusf0046f52019-04-24 12:56:41 +0200205 if auth_success is False:
206 logger.warn('Waiting 30 seconds before next attempt...')
207 time.sleep(30)
208
Michal Kobus211ee922019-04-15 17:44:06 +0200209 return auth_success
210
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200211 def auth(self, no_retry=False):
Michal Kobus211ee922019-04-15 17:44:06 +0200212 auth_ok = self._acquire_session()
Michal Kobusa3f8fe42022-06-27 10:30:18 +0200213
214 if no_retry:
215 return
216
Michal Kobus211ee922019-04-15 17:44:06 +0200217 while auth_ok is False:
218 auth_ok = self._acquire_session()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100219
Michal Kobusaa3accf2019-06-05 12:25:09 +0200220 def _get_alert_id(self, labels):
221 alert_id_data = ''
222 for key in sorted(labels):
223 alert_id_data += labels[key].replace(".", "\\.")
Michal Kobus492d8bc2021-02-15 16:54:13 +0100224 return self.hash_func(alert_id_data.encode('utf-8')).hexdigest()
Michal Kobusaa3accf2019-06-05 12:25:09 +0200225
Michal Kobusa069a1a2021-09-23 18:05:27 +0200226 @staticmethod
227 def _is_watchdog(labels):
228 return labels['alertname'].lower() == 'watchdog'
229
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100230 @sf_auth_retry
231 def _create_case(self, subject, body, labels, alert_id):
232
Michal Kobusaa3accf2019-06-05 12:25:09 +0200233 if alert_id in self._registered_alerts:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100234 logger.warning('Duplicate case for alert: {}.'.format(alert_id))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200235 return 1, self._registered_alerts[alert_id]['Id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100236
237 severity = labels.get('severity', 'unknown').upper()
238 payload = {
239 'Subject': subject,
240 'Description': body,
241 'IsMosAlert__c': 'true',
242 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'),
243 'Alert_Host__c': labels.get('host') or labels.get(
244 'instance', 'UNKNOWN'
245 ),
246 'Alert_Service__c': labels.get('service', 'UNKNOWN'),
247 'Environment2__c': self.environment,
248 'Alert_ID__c': alert_id,
249 }
Michal Kobusaf771fc2020-04-29 15:57:16 +0200250 if labels.get('cluster_id') is not None:
251 payload['ClusterId__c'] = labels['cluster_id']
252
Michal Kobusa069a1a2021-09-23 18:05:27 +0200253 if self._is_watchdog(labels):
254 payload['IsWatchDogAlert__c'] = 'true'
255
Michal Kobusafbf4d02018-11-28 14:18:05 +0100256 logger.info('Try to create case: {}.'.format(payload))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100257 try:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100258 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100259 case = self.sf.Case.create(payload)
Michal Kobusafbf4d02018-11-28 14:18:05 +0100260 logger.info('Created case: {}.'.format(case))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200261 except sf_exceptions.SalesforceMalformedRequest as ex:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100262 msg = ex.content[0]['message']
263 err_code = ex.content[0]['errorCode']
264
265 if err_code == 'DUPLICATE_VALUE':
Michal Kobus17726ae2018-11-27 12:59:55 +0100266 logger.warning('Duplicate case: {}.'.format(msg))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100267 case_id = msg.split()[-1]
Michal Kobusaa3accf2019-06-05 12:25:09 +0200268 self._registered_alerts[alert_id] = {'Id': case_id}
269 return 1, case_id
Michal Kobus27457d42019-02-13 14:06:11 +0100270
271 logger.error('Cannot create case: {}.'.format(msg))
272 self.metrics['sf_error_count'].inc()
273 raise
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100274
Michal Kobusaa3accf2019-06-05 12:25:09 +0200275 self._registered_alerts[alert_id] = {'Id': case['id']}
276 return 0, case['id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100277
278 @sf_auth_retry
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100279 def _close_case(self, case_id):
280 logger.info('Try to close case: {}.'.format(case_id))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100281 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100282 update = self.sf.Case.update(
283 case_id,
284 {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex}
285 )
286 logger.info('Closed case: {}.'.format(case_id))
287 return update
288
289 @sf_auth_retry
290 def _create_feed_item(self, subject, body, case_id):
291 feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body}
Michal Kobus2e85ef82021-06-24 18:01:43 +0200292 logger.debug('Creating feed item: {}.'.format(feed_item))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100293 return self.sf.FeedItem.create(feed_item)
294
295 @sf_auth_retry
296 def _get_case_by_alert_id(self, alert_id):
297 logger.info('Try to get case by alert ID: {}.'.format(alert_id))
298
299 if alert_id in self._registered_alerts:
Michal Kobusaa3accf2019-06-05 12:25:09 +0200300 return self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100301 try:
302 return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200303 except sf_exceptions.SalesforceResourceNotFound:
304 if self._registered_alerts.get(alert_id):
305 del self._registered_alerts[alert_id]
Michal Kobusba987052018-11-30 13:01:08 +0100306
Michal Kobus27457d42019-02-13 14:06:11 +0100307 logger.warning('Alert ID: {} not found.'.format(alert_id))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100308
Michal Kobus915f3cf2018-12-10 20:09:41 +0100309 def create_case(self, subject, body, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200310 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100311
Michal Kobusaa3accf2019-06-05 12:25:09 +0200312 error_code, case_id = self._create_case(subject, body,
313 labels, alert_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100314
Michal Kobusaa3accf2019-06-05 12:25:09 +0200315 response = {'case_id': case_id, 'alert_id': alert_id}
Michal Kobus915f3cf2018-12-10 20:09:41 +0100316
Michal Kobusaa3accf2019-06-05 12:25:09 +0200317 if error_code == 1:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100318 response['status'] = 'duplicate'
Michal Kobus915f3cf2018-12-10 20:09:41 +0100319 else:
320 response['status'] = 'created'
Michal Kobus2e85ef82021-06-24 18:01:43 +0200321
Michal Kobusa069a1a2021-09-23 18:05:27 +0200322 if self.feed_enabled or self._is_watchdog(labels):
Michal Kobus2e85ef82021-06-24 18:01:43 +0200323 self._create_feed_item(subject, body, case_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100324 return response
325
326 def close_case(self, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200327 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100328 case = self._get_case_by_alert_id(alert_id)
329
330 response = {'alert_id': alert_id, 'status': 'resolved'}
331
332 if case is None:
333 return response
334
Michal Kobusaa3accf2019-06-05 12:25:09 +0200335 if self._registered_alerts.get(alert_id):
336 del self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100337
Michal Kobusaa3accf2019-06-05 12:25:09 +0200338 response['case_id'] = case['Id']
339 response['closed'] = self._close_case(case['Id'])
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100340 return response