blob: 44035285d470d79b7fbc0213245c692f925f289e [file] [log] [blame]
Michal Kobusaa3accf2019-06-05 12:25:09 +02001import fcntl
2import hashlib
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01003import logging
Michal Kobusaa3accf2019-06-05 12:25:09 +02004import os
Michal Kobus211ee922019-04-15 17:44:06 +02005import time
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +01006import uuid
Michal Kobusaa3accf2019-06-05 12:25:09 +02007from contextlib import contextmanager
8
9from cachetools import TTLCache
Michal Kobus915f3cf2018-12-10 20:09:41 +010010
Michal Kobusafbf4d02018-11-28 14:18:05 +010011from prometheus_client import Counter, Gauge
12
Michal Kobus915f3cf2018-12-10 20:09:41 +010013from requests import Session
Michal Kobusaa3accf2019-06-05 12:25:09 +020014from requests.exceptions import ConnectionError as RequestsConnectionError
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010015
16from simple_salesforce import Salesforce
Michal Kobusaa3accf2019-06-05 12:25:09 +020017from simple_salesforce import exceptions as sf_exceptions
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010018
19
Michal Kobusaa3accf2019-06-05 12:25:09 +020020STATE_MAP = {
21 'OK': '060 Informational',
22 'UP': '060 Informational',
23 'UNKNOWN': '070 Unknown',
24 'WARNING': '080 Warning',
25 'MINOR': '080 Warning',
26 'MAJOR': '090 Critical',
27 'CRITICAL': '090 Critical',
28 'DOWN': '090 Critical',
29 'UNREACHABLE': '090 Critical',
30}
31
32CONFIG_FIELD_MAP = {
33 'auth_url': 'instance_url',
34 'username': 'username',
35 'password': 'password',
36 'organization_id': 'organizationId',
37 'environment_id': 'environment_id',
38 'sandbox_enabled': 'domain',
39}
40
41ALLOWED_HASHING = ('md5', 'sha256')
42SESSION_FILE = '/tmp/session'
Michal Kobus73d33522018-12-10 11:41:13 +010043
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010044logger = logging.getLogger(__name__)
45
46
Michal Kobusaa3accf2019-06-05 12:25:09 +020047@contextmanager
48def flocked(fd):
49 try:
50 fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
51 yield
52 except IOError:
53 logger.info('Session file locked. Waiting 5 seconds...')
54 time.sleep(5)
55 finally:
56 fcntl.flock(fd, fcntl.LOCK_UN)
57
58
59def sf_auth_retry(method):
60 def wrapper(self, *args, **kwargs):
61 try:
62 return method(self, *args, **kwargs)
63 except sf_exceptions.SalesforceExpiredSession:
64 logger.warning('Salesforce session expired.')
65 self.auth()
66 except RequestsConnectionError:
67 logger.error('Salesforce connection error.')
68 self.auth()
69 return method(self, *args, **kwargs)
70 return wrapper
71
72
73class SfNotifierError(Exception):
74 pass
75
76
77class SalesforceClient(object):
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010078
79 def __init__(self, config):
Michal Kobusafbf4d02018-11-28 14:18:05 +010080 self.metrics = {
81 'sf_auth_ok': Gauge('sf_auth_ok', 'sf-notifier'),
82 'sf_error_count': Counter('sf_error_count', 'sf-notifier'),
83 'sf_request_count': Counter('sf_request_count', 'sf-notifier')
84 }
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010085 self.config = self._validate_config(config)
Michal Kobus73d33522018-12-10 11:41:13 +010086 self.hash_func = self._hash_func()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010087 self.environment = self.config.pop('environment_id')
Michal Kobusaa3accf2019-06-05 12:25:09 +020088 self._registered_alerts = TTLCache(maxsize=2048, ttl=300)
Michal Kobus211ee922019-04-15 17:44:06 +020089 self.sf = None
90 self.session = Session()
91 self.auth()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +010092
Michal Kobusaa3accf2019-06-05 12:25:09 +020093 @staticmethod
94 def _hash_func():
95 name = os.environ.get('SF_NOTIFIER_ALERT_ID_HASH_FUNC', 'sha256')
96 if name in ALLOWED_HASHING:
97 return getattr(hashlib, name)
98 return hashlib.sha256
99
100 @staticmethod
101 def _validate_config(config):
102 kwargs = {}
103
Michal Kobus492d8bc2021-02-15 16:54:13 +0100104 for param, field in CONFIG_FIELD_MAP.items():
Michal Kobusaa3accf2019-06-05 12:25:09 +0200105 setting_var = param.upper()
106 env_var = 'SFDC_{}'.format(setting_var)
107 kwargs[field] = os.environ.get(
108 env_var, config.get(setting_var))
109
110 if field == 'domain':
111 if kwargs[field] in ['true', 'True', True]:
112 kwargs[field] = 'test'
113 else:
114 del kwargs[field]
115 continue
116
117 if kwargs[field] is None:
118 msg = ('Invalid config: missing "{}" field or "{}" environment'
119 ' variable.').format(param, env_var)
120 logger.error(msg)
121 raise SfNotifierError(msg)
122 return kwargs
123
Michal Kobus211ee922019-04-15 17:44:06 +0200124 def _auth(self, config):
Michal Kobus17726ae2018-11-27 12:59:55 +0100125 try:
Michal Kobus211ee922019-04-15 17:44:06 +0200126 config.update({'session': self.session})
127 self.sf = Salesforce(**config)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200128 except sf_exceptions.SalesforceAuthenticationFailed as ex:
Michal Kobusf0046f52019-04-24 12:56:41 +0200129 logger.error('Salesforce authentication failure: {}.'.format(ex))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100130 self.metrics['sf_auth_ok'].set(0)
Michal Kobus211ee922019-04-15 17:44:06 +0200131 return False
132
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100133 logger.info('Salesforce authentication successful.')
Michal Kobusafbf4d02018-11-28 14:18:05 +0100134 self.metrics['sf_auth_ok'].set(1)
Michal Kobus211ee922019-04-15 17:44:06 +0200135 return True
136
Michal Kobusaa3accf2019-06-05 12:25:09 +0200137 def _load_session(self, session_file):
138 lines = session_file.readlines()
139
140 if lines == []:
141 return
142 return lines[0]
143
Michal Kobus211ee922019-04-15 17:44:06 +0200144 def _refresh_ready(self, saved_session):
145 if saved_session is None:
146 logger.info('Current session is None.')
147 return True
148
149 if self.sf is None:
150 return False
151
152 if self.sf.session_id == saved_session:
153 return True
154 return False
155
156 def _reuse_session(self, saved_session):
157 logger.info('Reusing session id from file.')
158 # limit params to avoid login request
159 config = {
160 'session_id': saved_session,
161 'instance_url': self.config['instance_url']
162 }
163 return self._auth(config)
164
165 def _acquire_session(self):
166 # only one worker at a time can check session_file
167 auth_success = False
168
169 with open(SESSION_FILE, 'r+') as session_file:
170 with flocked(session_file):
171 logger.info('Successfully locked session file for refresh.')
172
173 saved_session = self._load_session(session_file)
174
175 if self._refresh_ready(saved_session):
Michal Kobusf0046f52019-04-24 12:56:41 +0200176 logger.info('Attempting to refresh session.')
Michal Kobus211ee922019-04-15 17:44:06 +0200177
178 if self._auth(self.config):
179 auth_success = True
180 session_file.truncate(0)
181 session_file.seek(0)
182 session_file.write(self.sf.session_id)
183 logger.info('Refreshed session successfully.')
184 else:
185 logger.error('Failed to refresh session.')
186 else:
187 logger.info('Not refreshing. Reusing session.')
188 auth_success = self._reuse_session(saved_session)
189
Michal Kobusf0046f52019-04-24 12:56:41 +0200190 if auth_success is False:
191 logger.warn('Waiting 30 seconds before next attempt...')
192 time.sleep(30)
193
Michal Kobus211ee922019-04-15 17:44:06 +0200194 return auth_success
195
196 def auth(self):
197 auth_ok = self._acquire_session()
198 while auth_ok is False:
199 auth_ok = self._acquire_session()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100200
Michal Kobusaa3accf2019-06-05 12:25:09 +0200201 def _get_alert_id(self, labels):
202 alert_id_data = ''
203 for key in sorted(labels):
204 alert_id_data += labels[key].replace(".", "\\.")
Michal Kobus492d8bc2021-02-15 16:54:13 +0100205 return self.hash_func(alert_id_data.encode('utf-8')).hexdigest()
Michal Kobusaa3accf2019-06-05 12:25:09 +0200206
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100207 @sf_auth_retry
208 def _create_case(self, subject, body, labels, alert_id):
209
Michal Kobusaa3accf2019-06-05 12:25:09 +0200210 if alert_id in self._registered_alerts:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100211 logger.warning('Duplicate case for alert: {}.'.format(alert_id))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200212 return 1, self._registered_alerts[alert_id]['Id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100213
214 severity = labels.get('severity', 'unknown').upper()
215 payload = {
216 'Subject': subject,
217 'Description': body,
218 'IsMosAlert__c': 'true',
219 'Alert_Priority__c': STATE_MAP.get(severity, '070 Unknown'),
220 'Alert_Host__c': labels.get('host') or labels.get(
221 'instance', 'UNKNOWN'
222 ),
223 'Alert_Service__c': labels.get('service', 'UNKNOWN'),
224 'Environment2__c': self.environment,
225 'Alert_ID__c': alert_id,
226 }
Michal Kobusaf771fc2020-04-29 15:57:16 +0200227 if labels.get('cluster_id') is not None:
228 payload['ClusterId__c'] = labels['cluster_id']
229
Michal Kobusafbf4d02018-11-28 14:18:05 +0100230 logger.info('Try to create case: {}.'.format(payload))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100231 try:
Michal Kobusafbf4d02018-11-28 14:18:05 +0100232 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100233 case = self.sf.Case.create(payload)
Michal Kobusafbf4d02018-11-28 14:18:05 +0100234 logger.info('Created case: {}.'.format(case))
Michal Kobusaa3accf2019-06-05 12:25:09 +0200235 except sf_exceptions.SalesforceMalformedRequest as ex:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100236 msg = ex.content[0]['message']
237 err_code = ex.content[0]['errorCode']
238
239 if err_code == 'DUPLICATE_VALUE':
Michal Kobus17726ae2018-11-27 12:59:55 +0100240 logger.warning('Duplicate case: {}.'.format(msg))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100241 case_id = msg.split()[-1]
Michal Kobusaa3accf2019-06-05 12:25:09 +0200242 self._registered_alerts[alert_id] = {'Id': case_id}
243 return 1, case_id
Michal Kobus27457d42019-02-13 14:06:11 +0100244
245 logger.error('Cannot create case: {}.'.format(msg))
246 self.metrics['sf_error_count'].inc()
247 raise
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100248
Michal Kobusaa3accf2019-06-05 12:25:09 +0200249 self._registered_alerts[alert_id] = {'Id': case['id']}
250 return 0, case['id']
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100251
252 @sf_auth_retry
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100253 def _close_case(self, case_id):
254 logger.info('Try to close case: {}.'.format(case_id))
Michal Kobusafbf4d02018-11-28 14:18:05 +0100255 self.metrics['sf_request_count'].inc()
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100256 update = self.sf.Case.update(
257 case_id,
258 {'Status': 'Auto-solved', 'Alert_ID__c': uuid.uuid4().hex}
259 )
260 logger.info('Closed case: {}.'.format(case_id))
261 return update
262
263 @sf_auth_retry
264 def _create_feed_item(self, subject, body, case_id):
265 feed_item = {'Title': subject, 'ParentId': case_id, 'Body': body}
266 return self.sf.FeedItem.create(feed_item)
267
268 @sf_auth_retry
269 def _get_case_by_alert_id(self, alert_id):
270 logger.info('Try to get case by alert ID: {}.'.format(alert_id))
271
272 if alert_id in self._registered_alerts:
Michal Kobusaa3accf2019-06-05 12:25:09 +0200273 return self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100274 try:
275 return self.sf.Case.get_by_custom_id('Alert_ID__c', alert_id)
Michal Kobusaa3accf2019-06-05 12:25:09 +0200276 except sf_exceptions.SalesforceResourceNotFound:
277 if self._registered_alerts.get(alert_id):
278 del self._registered_alerts[alert_id]
Michal Kobusba987052018-11-30 13:01:08 +0100279
Michal Kobus27457d42019-02-13 14:06:11 +0100280 logger.warning('Alert ID: {} not found.'.format(alert_id))
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100281
Michal Kobus915f3cf2018-12-10 20:09:41 +0100282 def create_case(self, subject, body, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200283 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100284
Michal Kobusaa3accf2019-06-05 12:25:09 +0200285 error_code, case_id = self._create_case(subject, body,
286 labels, alert_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100287
Michal Kobusaa3accf2019-06-05 12:25:09 +0200288 self._create_feed_item(subject, body, case_id)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100289
Michal Kobusaa3accf2019-06-05 12:25:09 +0200290 response = {'case_id': case_id, 'alert_id': alert_id}
Michal Kobus915f3cf2018-12-10 20:09:41 +0100291
Michal Kobusaa3accf2019-06-05 12:25:09 +0200292 if error_code == 1:
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100293 response['status'] = 'duplicate'
Michal Kobus915f3cf2018-12-10 20:09:41 +0100294 else:
295 response['status'] = 'created'
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100296 return response
297
298 def close_case(self, labels):
Michal Kobusaa3accf2019-06-05 12:25:09 +0200299 alert_id = self._get_alert_id(labels)
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100300 case = self._get_case_by_alert_id(alert_id)
301
302 response = {'alert_id': alert_id, 'status': 'resolved'}
303
304 if case is None:
305 return response
306
Michal Kobusaa3accf2019-06-05 12:25:09 +0200307 if self._registered_alerts.get(alert_id):
308 del self._registered_alerts[alert_id]
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100309
Michal Kobusaa3accf2019-06-05 12:25:09 +0200310 response['case_id'] = case['Id']
311 response['closed'] = self._close_case(case['Id'])
Mateusz Matuszkowiak2820c662018-11-21 12:07:25 +0100312 return response