blob: 1f3b8cb437128febb7abac3eb7d5d4fbe24e9ce2 [file] [log] [blame]
Alex0989ecf2022-03-29 13:43:21 -05001# Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
2# Copyright 2019-2022 Mirantis, Inc.
Alex90ac1532021-12-09 11:13:14 -06003import base64
Alexdcb792f2021-10-04 14:24:21 -05004import json
Alex90ac1532021-12-09 11:13:14 -06005import os
6import tarfile
7import io
Alexdcb792f2021-10-04 14:24:21 -05008from time import sleep
Alexeb934de2022-10-06 13:49:30 -05009from datetime import datetime
Alexdcb792f2021-10-04 14:24:21 -050010
11from cfg_checker.common import logger_cli
12from cfg_checker.common.exception import KubeException
13
14from cfg_checker.helpers.console_utils import Progress
15from cfg_checker.helpers.tgz import TGZFile
16from cfg_checker.nodes import KubeNodes
17from cfg_checker.reports import reporter
18
19
20class CephInfo(object):
21 def __init__(
22 self,
23 config
24 ):
25 self.env_config = config
26 return
27
Alexeb934de2022-10-06 13:49:30 -050028 def get_info_archive_filename(self, client, project):
29 # prefill known data
30 _tags = ["CephCollectData"]
31 _tags.append(client)
32 _tags.append(project)
33
34 # generate date for tgz
35 _file_datetime_fmt = "%Y-%m-%d"
36 _dt = datetime.now().strftime(_file_datetime_fmt)
37 _tags.append(_dt)
38
39 # extension
40 _tags.append("tar")
41 _tags.append("gz")
42 return ".".join(_tags)
43
Alexdcb792f2021-10-04 14:24:21 -050044 def get_transposed_latency_table(self):
45 _table = {
46 "<dev>": []
47 }
48 for _pfd in self.ceph_info['osd_latency_data']['data']['data']:
49 _table["<dev>"].append({
50 "formatted": " cL/aL ",
51 "commit_latency_ms": "Commit, ms",
52 "apply_latency_ms": "Apply, ms",
53 "commit_latency_ns": "Commit, ns",
54 "apply_latency_ns": "Apply, ns"
55 })
56 for _f in _pfd['osdstats']['osd_perf_infos']:
57 _n = "osd_{}".format(_f['id'])
58 if _n not in _table:
59 _table[_n] = []
60 _table[_n].append({
61 "formatted": "{:>3}/{:<3}".format(
62 _f['perf_stats']['commit_latency_ms'],
63 _f['perf_stats']['apply_latency_ms'],
64 ),
65 "commit_latency_ms": _f['perf_stats']['commit_latency_ms'],
66 "apply_latency_ms": _f['perf_stats']['apply_latency_ms'],
67 "commit_latency_ns": _f['perf_stats']['commit_latency_ns'],
68 "apply_latency_ns": _f['perf_stats']['apply_latency_ns']
69 })
70 self.ceph_info['osd_latency_data']['table'] = _table
71 return _table
72
73 def get_latest_health_readout(self):
74 _h = self.ceph_info['ceph_health']['data']
75 self.ceph_info['ceph_health']['latest'] = {}
76 for _n, _d in _h.items():
77 if not _d:
78 self.ceph_info['ceph_health']['latest'][_n] = {}
79 continue
80 else:
81 # TODO: Consider filtering out or prepare data for the table
Alexdf9cc3a2021-10-12 14:37:28 -050082 _osd = _d.pop("osd_name")
83 _node_name = _d.pop("node_name")
Alex90ac1532021-12-09 11:13:14 -060084 # Additional check for empty data
85 if not _d:
86 self.ceph_info['ceph_health']['latest'][_n] = {}
87 continue
Alexdcb792f2021-10-04 14:24:21 -050088 _date = sorted(_d.keys(), reverse=True)[0]
89 self.ceph_info['ceph_health']['date'] = _date
90 self.ceph_info['ceph_health']['latest'][_n] = _d[_date]
Alexdf9cc3a2021-10-12 14:37:28 -050091 self.ceph_info['ceph_health']['latest'][_n]["osd_name"] = _osd
92 self.ceph_info['ceph_health']['latest'][_n]["node_name"] = \
93 _node_name
Alexdcb792f2021-10-04 14:24:21 -050094
95 return self.ceph_info['ceph_health']['latest']
96
97 def print_summary(self):
98 logger_cli.info("\n# Ceph Cluster summary")
99 # Health status
100 _h = self.ceph_info['health_detail']['data']
101 logger_cli.info("Cluster status: {}".format(_h['status']))
102 for _chk, _d in _h['checks'].items():
103 logger_cli.info(
104 "+ {}: {}\n\tSummary: {}".format(
105 _chk,
106 _d['severity'],
107 _d['summary']['message']
108 )
109 )
110 logger_cli.info("\tDetails:")
111 for _item in _d['detail']:
112 logger_cli.info("\t '{}".format(_item['message']))
113
114 # OSD health metrics
115 logger_cli.info("\n# Device health metrics:")
116 _fmt = " {:45} {:^14} {:^9} {:^6} {:^6}"
117 logger_cli.info(
118 _fmt.format(
119 "Device Name",
120 "Info",
121 "Speed",
122 "SMART",
123 "Tempr."
124 )
125 )
126 _latest = self.get_latest_health_readout()
127 for _n, _d in _latest.items():
128 if not _d:
129 logger_cli.info("{:45} {:<10}".format(_n, "<empty>"))
130 continue
131
Alexdf9cc3a2021-10-12 14:37:28 -0500132 _status = _d['smart_status']['passed']
133 if "interface_speed" in _d:
134 _speed = _d['interface_speed']['current']['string']
135 else:
136 _speed = "-"
Alexdcb792f2021-10-04 14:24:21 -0500137
138 _status = 'passed' if _status else 'failed'
139 logger_cli.info(
140 _fmt.format(
141 _n,
142 _d['device']['info_name'],
Alexdf9cc3a2021-10-12 14:37:28 -0500143 _speed,
Alexdcb792f2021-10-04 14:24:21 -0500144 _status,
145 _d['temperature']['current']
146 )
147 )
148
149 # Latency table
150 logger_cli.info(
151 "\n# OSD Latency data ({} iterations, {} sec delay), "
152 "table items 'osd_dev: N:cL/aL'\n"
153 " 'Commit Latency' -> 'cL', 'Apply Latency' -> 'aL'\n".format(
154 self.ceph_info['osd_latency_data']['data']['total'],
155 self.ceph_info['osd_latency_data']['data']['delay']
156 )
157 )
158 _strs = self.get_transposed_latency_table()
159 for _osd, _list in _strs.items():
160 _row = [c["formatted"] for c in _list]
161 logger_cli.info(
162 " {:8}: {}".format(
163 _osd,
164 " ".join(_row)
165 )
166 )
167 logger_cli.info("\n")
168
169 # critical config values
170 # TODO: print/calculate config values
171
172 return
173
174 def dump_info(self):
175 with open('cephdump.json', 'wt') as _f:
176 _f.write(json.dumps(self.ceph_info, indent=2))
177
178 def load_info(self):
179 with open('cephdump.json', 'rt') as _f:
180 self.ceph_info = json.load(_f)
181
182 def generate_archive(self, tgzfilename):
Alexeb934de2022-10-06 13:49:30 -0500183 def _ensure_fname(ext):
184 return key + ext if _fname is None else _fname
185
Alexdcb792f2021-10-04 14:24:21 -0500186 if not self.ceph_info:
187 logger_cli.warning(
188 "WARNING: Ceph Info Data not detected. "
189 "Consider check for errors in log."
190 )
191 else:
192 # Create Archive
193 logger_cli.info("-> Generating archive '{}'".format(tgzfilename))
194 _tgz = TGZFile(
195 tgzfilename,
196 label="MCP Checker: Generated Ceph Information"
197 )
198 # Iterate every key and write data to tar file
199 for key, d in self.ceph_info.items():
Alexeb934de2022-10-06 13:49:30 -0500200 _fname = None
Alexdcb792f2021-10-04 14:24:21 -0500201 # Cast buf to a proper type
202 _buf = None
Alexeb934de2022-10-06 13:49:30 -0500203 if "filename" in d:
204 _fname = d["filename"]
Alexdcb792f2021-10-04 14:24:21 -0500205 if isinstance(d["data"], dict) or isinstance(d["data"], list):
206 _buf = json.dumps(d["data"], indent=2)
Alexeb934de2022-10-06 13:49:30 -0500207 # _filename = key + ".json" if _fname is not None else _fname
208 _filename = _ensure_fname(".json")
Alexdcb792f2021-10-04 14:24:21 -0500209 elif isinstance(d["data"], str):
210 _buf = d["data"]
Alexeb934de2022-10-06 13:49:30 -0500211 # _filename = key + ".txt"
212 _filename = _ensure_fname(".txt")
Alexdcb792f2021-10-04 14:24:21 -0500213 else:
214 _buf = str(d["data"])
Alexeb934de2022-10-06 13:49:30 -0500215 # _filename = key + ".txt"
216 _filename = _ensure_fname(".txt")
Alexdcb792f2021-10-04 14:24:21 -0500217 logger_cli.debug("... writing '{}'".format(_filename))
218 _tgz.add_file(_filename, buf=_buf, replace=True)
219
220 return
221
222 def create_html_report(self, filename):
223 """
224 Create static html showing ceph info report
225
226 :return: none
227 """
228 logger_cli.info("### Generating report to '{}'".format(filename))
229 _report = reporter.ReportToFile(
230 reporter.HTMLCephInfo(self),
231 filename
232 )
233 _report(
234 {
235 "info": self.ceph_info,
236 "cluster": self.cluster_info,
237 "nodes": self.nodes,
238 "ceph_version": self.ceph_version,
239 }
240 )
241 logger_cli.info("-> Done")
242
243 return
244
245
246class SaltCephInfo(CephInfo):
247 def __init__(
248 self,
249 config
250 ):
251 logger_cli.warning("\nWARNING: Not impelented for Salt environment!\n")
252
253 # self.master = SaltNodes(config)
254 super(SaltCephInfo, self).__init__(config)
255 return
256
257
258class KubeCephInfo(CephInfo):
259 ceph_ns = "rook-ceph"
260 ceph_app_label = "rook-ceph-tools"
261 ceph_group = "ceph.rook.io"
262 ceph_apiversion = "v1"
263 ceph_plural = "cephclusters"
264 ceph_version = "unknown"
265
266 def __init__(self, config):
267 self.master = KubeNodes(config)
268 super(KubeCephInfo, self).__init__(config)
269 # Init ceph tools pod
270 self.pod_name = self._get_tools_pod_name()
271 self.ceph_info = {}
272 self.cluster_info = {}
273 self.ceph_version = self.get_ceph_cluster_config()
274
Alex90ac1532021-12-09 11:13:14 -0600275 def _safe_tools_cmd(self, cmd_str, expect_output=True):
Alexdcb792f2021-10-04 14:24:21 -0500276 _r = self.master.exec_cmd_on_target_pod(
277 self.pod_name,
278 self.ceph_ns,
Alex90ac1532021-12-09 11:13:14 -0600279 cmd_str
Alexdcb792f2021-10-04 14:24:21 -0500280 )
281 if expect_output and not _r:
Alex90ac1532021-12-09 11:13:14 -0600282 logger_cli.debug("... got empty output for '{}'".format(cmd_str))
Alexdcb792f2021-10-04 14:24:21 -0500283 elif not expect_output and _r:
284 logger_cli.warning(
285 "WARNING: Unexpected output for '{}':\n"
Alex90ac1532021-12-09 11:13:14 -0600286 "===== Start\n{}\n===== End".format(cmd_str, _r)
Alexdcb792f2021-10-04 14:24:21 -0500287 )
288 return _r
289
Alex90ac1532021-12-09 11:13:14 -0600290 def _safe_tools_cmd_zipped_output(self, cmd_str):
291 # temp file
292 _tmp_path = "/tmp"
293 _filename = "checker_cmd_output"
294 _tar_path = os.path.join(_tmp_path, "checker_cmd.tgz")
295 _path = os.path.join(_tmp_path, _filename)
296
297 # Run original cmd with redirect
298 _cmd = [cmd_str, "-o", _path]
299 self._safe_tools_cmd(" ".join(_cmd), expect_output=False)
300 # zip it and base64 encode
301 _cmd = ["tar", "-zcvf", _tar_path, _path]
302 self._safe_tools_cmd(" ".join(_cmd))
303 _b64 = self._safe_tools_cmd("base64 " + _tar_path)
304 # decode and decompress
305 _io = io.BytesIO(base64.standard_b64decode(_b64))
306 _json = ""
307 with tarfile.open(fileobj=_io) as _tar:
308 _tar_item = _tar.extractfile(_tar.getmembers()[0])
309 _json = _tar_item.read()
310 # cleanup
311 self._safe_tools_cmd("rm -f " + _path)
312 self._safe_tools_cmd("rm -f " + _tar_path)
313 return _json
314
315 def _safe_get_cmd_output_as_json(self, cmd, zipped=False):
316 if zipped:
317 _buf = self._safe_tools_cmd_zipped_output(cmd)
318 else:
319 _buf = self._safe_tools_cmd(cmd)
Alexdcb792f2021-10-04 14:24:21 -0500320 try:
321 return json.loads(_buf)
Alex90ac1532021-12-09 11:13:14 -0600322 except ValueError as e:
323 _out = ""
324 if len(_buf) > 512:
325 _out = _buf[:512]
326 _out += "..."
327 else:
328 _out = _buf
Alexdcb792f2021-10-04 14:24:21 -0500329 logger_cli.error(
Alex90ac1532021-12-09 11:13:14 -0600330 "\nERROR: failed to parse json: '{}'. Data: '{}'".format(
331 e,
332 _out
333 )
Alexdcb792f2021-10-04 14:24:21 -0500334 )
335 return _buf
336
337 def _get_tools_pod_name(self):
338 # get ceph pod
339 _names = self.master.kube.get_pod_names_by_partial_name(
340 self.ceph_app_label,
341 self.ceph_ns
342 )
343 if not _names:
344 raise KubeException(
345 "Failed to find pod using '{}'".format(self.ceph_app_label)
346 )
347 elif len(_names) > 1:
348 logger_cli.warning(
349 "WARNING: Environment has more than one pod "
350 "with '{}' app: {}".format(
351 self.ceph_app_label,
352 ", ".join(_names)
353 )
354 )
355 else:
356 logger_cli.debug("... found '{}'".format(_names[0]))
357 return _names[0]
358
Alexeb934de2022-10-06 13:49:30 -0500359 def _add_ceph_info_item(self, key, title, data, filename=None):
360 # handle data
Alexdcb792f2021-10-04 14:24:21 -0500361 if key in self.ceph_info:
362 self.ceph_info[key]["title"] = title
363 self.ceph_info[key]["data"] = data
364 else:
365 self.ceph_info[key] = {
366 "title": title,
367 "data": data
368 }
Alexeb934de2022-10-06 13:49:30 -0500369 if filename:
370 self.ceph_info[key]["filename"] = filename
Alexdcb792f2021-10-04 14:24:21 -0500371
372 def _parse_dev_classes(self, deviceClasses):
373 _devClasses = []
374 for _i in deviceClasses:
375 _devClasses += list(_i.values())
376 return set(_devClasses)
377
378 def get_ceph_cluster_config(self):
379 # get cephclusters resource
380 logger_cli.info("# Loading '{}' object of type '{}/{}'".format(
381 self.ceph_plural,
382 self.ceph_group,
383 self.ceph_apiversion
384 ))
385 _r = self.master.kube.get_custom_resource(
386 self.ceph_group,
387 self.ceph_apiversion,
388 self.ceph_plural,
389 )
390 # find cluster
391 _cluster = None
392 if len(_r['items']) < 1:
393 logger_cli.warning(
394 "WARNING: Failed to find '{}' ({}/{})".format(
395 self.ceph_plural,
396 self.ceph_group,
397 self.ceph_apiversion
398 )
399 )
400 return 'uknown'
401 elif len(_r['items']) > 1:
402 logger_cli.warning(
403 "WARNING: Multiple clusters found '{}' ({}/{})".format(
404 self.ceph_plural,
405 self.ceph_group,
406 self.ceph_apiversion
407 )
408 )
409 _cluster = _r['items'][0]
410 _s = _cluster['status']
411 self.cluster_info.update({
412 'image': _s['version']['image'],
413 'version': _s['version']['version'],
414 'device_classes': self._parse_dev_classes(
Alex0f9b2652022-10-20 13:50:47 -0500415 _s['storage'].get('deviceClasses', [])
Alexdcb792f2021-10-04 14:24:21 -0500416 ),
417 'phase': _s['phase'],
418 'state': _s['state'],
Alex0f9b2652022-10-20 13:50:47 -0500419 'health': _s['ceph'].get('health', {}),
420 'previousHealth': _s['ceph'].get('previousHealth', {}),
421 'lastChanged': _s['ceph'].get('lastChanged', ""),
422 'lastChecked': _s['ceph'].get('lastChecked', ""),
Alexdcb792f2021-10-04 14:24:21 -0500423 'mon_count': _cluster['spec']['mon']['count']
424 })
425 self.nodes = _cluster['spec']['storage']['nodes'],
426 logger_cli.info("-> Found Ceph cluster: {} ({})".format(
427 self.cluster_info['version'],
428 self.cluster_info['image']
429 ))
430 return self.cluster_info['version']
431
Alexb2129542021-11-23 15:49:42 -0600432 def get_cluster_status(self):
433 return self._safe_get_cmd_output_as_json("ceph -s -f json")
434
435 def get_health_detail(self):
436 return self._safe_get_cmd_output_as_json("ceph -f json health detail")
437
438 def get_ceph_df(self):
439 return self._safe_get_cmd_output_as_json("ceph df -f json")
440
441 def get_ceph_pg_dump(self):
Alex90ac1532021-12-09 11:13:14 -0600442 return self._safe_get_cmd_output_as_json(
443 "ceph pg dump -f json",
444 zipped=True
445 )
Alexb2129542021-11-23 15:49:42 -0600446
447 def get_ceph_osd_df(self):
448 return self._safe_get_cmd_output_as_json("ceph osd df -f json")
449
Alexdcb792f2021-10-04 14:24:21 -0500450 def gather_info(self):
451 logger_cli.info("# Gathering Ceph cluster info")
452 # Collect info
453 _c = self._safe_tools_cmd
454 _cj = self._safe_get_cmd_output_as_json
455 # Crush Map
456 logger_cli.info("-> Collecting CRUSH map")
457 _cmap_tmp_path = "/tmp/crushmap.bin"
458 _r = _c(
459 "ceph osd getcrushmap -o " + _cmap_tmp_path,
460 expect_output=False
461 )
462 # TODO: Handle errors in _r
463 logger_cli.debug("... 'getcrushmap' return value is: '{}'".format(_r))
464
465 # Get Crush map as json and text
466 self._add_ceph_info_item(
467 "crushmap_json",
468 "Crush Map (json)",
Alexeb934de2022-10-06 13:49:30 -0500469 _cj("crushtool -i " + _cmap_tmp_path + " --dump"),
470 filename="crushmap.json"
Alexdcb792f2021-10-04 14:24:21 -0500471 )
472 # _crushmap = _cj("crushtool -i " + _cmap_tmp_path + " --dump")
473 self._add_ceph_info_item(
474 "crushmap_text",
475 "Crush Map (text)",
Alexeb934de2022-10-06 13:49:30 -0500476 _c("crushtool -d " + _cmap_tmp_path),
477 filename="crushmap.json"
Alexdcb792f2021-10-04 14:24:21 -0500478 )
479
480 logger_cli.info("-> Collecting ceph osd crush dump")
481 self._add_ceph_info_item(
482 "osd_crushdump",
483 "Crush dump (osd)",
484 _cj("ceph osd crush dump")
485 )
486
487 logger_cli.info("-> Collecting cluster status")
488 self._add_ceph_info_item(
489 "cluster_status",
490 "Cluster status",
Alexb2129542021-11-23 15:49:42 -0600491 self.get_cluster_status()
Alexdcb792f2021-10-04 14:24:21 -0500492 )
493
494 logger_cli.info("-> Collecting health detail")
495 self._add_ceph_info_item(
496 "health_detail",
497 "Health details",
Alexb2129542021-11-23 15:49:42 -0600498 self.get_health_detail()
Alexdcb792f2021-10-04 14:24:21 -0500499 )
500
501 logger_cli.info("-> Collecting monmap")
502 self._add_ceph_info_item(
503 "monmap",
504 "Ceph Mon map",
505 _cj("ceph mon dump -f json")
506 )
507
508 logger_cli.info("-> Collecting ceph df")
509 self._add_ceph_info_item(
510 "ceph_df",
511 "Ceph DF",
Alexb2129542021-11-23 15:49:42 -0600512 self.get_ceph_df()
Alexdcb792f2021-10-04 14:24:21 -0500513 )
514
515 logger_cli.info("-> Collecting ceph osd df")
516 self._add_ceph_info_item(
517 "ceph_osd_df",
518 "Ceph OSD DF",
Alexb2129542021-11-23 15:49:42 -0600519 self.get_ceph_osd_df()
Alexdcb792f2021-10-04 14:24:21 -0500520 )
521
522 logger_cli.info("-> Collecting ceph osd dump")
523 self._add_ceph_info_item(
524 "ceph_osd_dump",
525 "Ceph OSD dump",
526 _cj("ceph osd dump -f json")
527 )
528
529 logger_cli.info("-> Collecting rados df")
530 self._add_ceph_info_item(
531 "rados_df",
532 "Rados DF",
533 _cj("rados df -f json")
534 )
535
536 logger_cli.info("-> Collecting ceph report")
537 self._add_ceph_info_item(
538 "ceph_report",
539 "Ceph Report",
540 _cj("ceph report")
541 )
542
543 logger_cli.info("-> Collecting auth data anonymized")
544 _auth_data = _cj("ceph auth list -f json")
545 # Anonymize data
546 # _cj("ceph auth list -f json | sed 's/AQ[^=]*==/KEY/g'")
547 for item in _auth_data["auth_dump"]:
548 if "key" in item:
549 item['key'] = "key-data-redacted"
550 self._add_ceph_info_item(
551 "ceph_auth_ls",
552 "Ceph Auth Data (anonymized)",
553 _auth_data
554 )
555
556 logger_cli.info("-> Collecting ceph pg dump")
557 self._add_ceph_info_item(
558 "ceph_pg_dump",
559 "Ceph PG dump",
Alexb2129542021-11-23 15:49:42 -0600560 self.get_ceph_pg_dump()
Alexdcb792f2021-10-04 14:24:21 -0500561 )
562
563 logger_cli.info("-> Collecting ceph running configuration")
564 self._add_ceph_info_item(
565 "ceph_config_dump",
566 "Ceph Configuration Dump",
567 _cj("ceph config dump -f json")
568 )
569
570 logger_cli.info("-> Collecting health metrics")
571 _health_metrics = {}
572 _devices = _c("ceph device ls")
Alexdf9cc3a2021-10-12 14:37:28 -0500573 _devices = _devices.splitlines()
574 _progress = Progress(len(_devices)-1)
575 _index = 1
576 for device in _devices:
Alexdcb792f2021-10-04 14:24:21 -0500577 _t = device.split()
578 _osd = _t[2]
Alexdf9cc3a2021-10-12 14:37:28 -0500579 _node = _t[1]
Alexdcb792f2021-10-04 14:24:21 -0500580 _dev = _t[0]
581 if _dev == "DEVICE":
582 continue
583 _metric = _cj("ceph device get-health-metrics {}".format(_dev))
Alexdf9cc3a2021-10-12 14:37:28 -0500584 _dev_name = "{}_{}".format(_osd, _dev)
585 _health_metrics[_dev_name] = _metric
586 _health_metrics[_dev_name]['node_name'] = _node
587 _health_metrics[_dev_name]['osd_name'] = _osd
588 _progress.write_progress(_index, note=_dev_name)
589 _index += 1
590 _progress.end()
Alexdcb792f2021-10-04 14:24:21 -0500591 self._add_ceph_info_item(
592 "ceph_health",
593 "Ceph Health Metrics",
594 _health_metrics
595 )
596
597 # Latency values
598 # config const for set
599 _latency_count = 10
600 _latency_delay = 4
601 logger_cli.info(
602 "-> Collecting ceph osd latency data "
603 "({} total, {} sec delay)".format(
604 _latency_count,
605 _latency_delay
606 )
607 )
608 _osd_lat = {
609 "total": _latency_count,
610 "delay": _latency_delay,
611 "data": []
612 }
613 _progress = Progress(_latency_count)
614 _index = 1
615 while _index <= _latency_count:
616 _progress.write_progress(_index)
617 _osd_lat["data"].append(_cj("ceph osd perf -f json"))
618 sleep(_latency_delay)
619 _index += 1
620 _progress.end()
621 self._add_ceph_info_item(
622 "osd_latency_data",
623 "OSD Latency metrics",
624 _osd_lat
625 )
626
627 return
Alex41dd0cc2022-02-09 17:33:23 -0600628
629 def gather_osd_configs(self):
630 _total_osd = len(self.ceph_info["ceph_osd_df"]["data"]["nodes"])
631 logger_cli.info(
632 "-> Gathering OSD configuration ({})".format(_total_osd)
633 )
634 # Shortcuts
635 # _c = self._safe_tools_cmd
636 _cj = self._safe_get_cmd_output_as_json
637 _progress = Progress(_total_osd)
638 _idx = 1
639 _cfgs = {}
640 for _osd in self.ceph_info["ceph_osd_df"]["data"]["nodes"]:
641 _progress.write_progress(_idx, note=_osd["name"])
642 _cfgs[_osd["name"]] = _cj(
643 "ceph config show-with-defaults -f json {}".format(
644 _osd["name"]
645 )
646 )
647 _idx += 1
648 _progress.end()
649
650 # Process configs
651 _base = {}
652 _uniq = {}
653 logger_cli.info("-> Filtering config values")
654 _progress = Progress(_total_osd)
655 _idx = 1
656 for _osd, _data in _cfgs.items():
657 _progress.write_progress(_idx, note=_osd)
658 for _o in _data:
659 _name = _o.pop("name")
660 if not _o["value"]:
661 _o["value"] = "-"
662 if _name not in _base:
663 _base[_name] = _o
664 elif _base[_name]["value"] != _o["value"]:
665 _progress.clearline()
666 logger_cli.info(
667 "...specific value for {} (src: '{}'): {}={}".format(
668 _osd,
669 _o["source"],
670 _name,
671 _o["value"]
672 )
673 )
674 _uniq[_osd] = {
675 _name: _o
676 }
677 _idx += 1
678 _progress.end()
679 self._add_ceph_info_item(
680 "osd_config_data",
681 "OSD Configuration values",
682 {
683 "common": _base,
684 "uniq": _uniq
685 }
686 )
687 return