cfg_checker/modules/ceph/bench.py - mcp/cfg-checker - Gitiles

 #    Author: Alex Savatieiev (osavatieiev@mirantis.com; a.savex@gmail.com)
 #    Copyright 2019-2022 Mirantis, Inc.
 import csv
 import os
 import json

 from copy import deepcopy
 from datetime import datetime, timedelta, timezone

 from cfg_checker.common import logger_cli
 from cfg_checker.common.decorators import retry
 from cfg_checker.common.file_utils import write_str_to_file
 from cfg_checker.helpers.console_utils import Progress
 from cfg_checker.helpers.console_utils import cl_typewriter
 from cfg_checker.reports import reporter
 # from cfg_checker.common.exception import InvalidReturnException
 # from cfg_checker.common.exception import ConfigException
 # from cfg_checker.common.exception import KubeException

 from cfg_checker.nodes import KubeNodes
 from cfg_checker.agent.fio_runner import _get_seconds, _datetime_fmt


 _file_datetime_fmt = "%m%d%Y%H%M%S%z"


 def _reformat_timestr(_str, _chars=["/", ",", " ", ":", "+"], _tchar=""):
     _new = ""
     for _c in _str:
         _new += _c if _c not in _chars else _tchar
     return _new


 def _parse_json_output(buffer):
     try:
         return json.loads(buffer)
     except TypeError as e:
         logger_cli.error(
             "ERROR: Status not decoded: {}\n{}".format(e, buffer)
         )
     except json.decoder.JSONDecodeError as e:
         logger_cli.error(
             "ERROR: Status not decoded: {}\n{}".format(e, buffer)
         )
     return {}


 def _split_vol_size(size):
     # I know, but it is faster then regex
     _numbers = [48, 49, 50, 51, 52, 53, 54, 55, 56, 57]
     _s_int = "0"
     _s_type = ""
     for ch in size:
         if ord(ch) in _numbers:
             _s_int += ch
         else:
             _s_type += ch
     return int(_s_int), _s_type


 class CephBench(object):
     _agent_template = "cfgagent-template.yaml"

     def __init__(self, config):
         self.env_config = config
         return


 class SaltCephBench(CephBench):
     def __init__(
         self,
         config
     ):
         logger_cli.error("ERROR: Not impelented for Salt environment!")

         # self.master = SaltNodes(config)
         super(SaltCephBench, self).__init__(config)
         return


 class KubeCephBench(CephBench):
     def __init__(self, config):
         self.agent_count = config.bench_agent_count
         self.master = KubeNodes(config)
         super(KubeCephBench, self).__init__(config)

         self.mode = config.bench_mode
         self.resource_prefix = config.resource_prefix

         if config.bench_mode == "tasks":
             self.taskfile = config.bench_task_file
             self.load_tasks(self.taskfile)

         if config.bench_mode == "cleanup":
             self.cleanup_list = []
             return

         self.bench_name = config.bench_name
         self.results_dump_path = config.bench_results_dump_path
         self.results = {}
         self.agent_results = {}
         self.cleanup_list = []
         self.agent_pods = []

         if config.bench_mode == "report":
             self.results = {}
             return

         self.storage_class = config.bench_storage_class
         self.services = []
         # By default,
         # 30 seconds should be enough to send tasks to 3-5 agents
         self.scheduled_delay = 30

     def set_ceph_info_class(self, ceph_info):
         self.ceph_info = ceph_info

     def load_tasks(self, taskfile):
         # Load csv file
         logger_cli.info("-> Loading taskfile '{}'".format(taskfile))
         self.tasks = []
         with open(taskfile) as f:
             _reader = csv.reader(f, delimiter=',')
             # load packages
             for row in _reader:
                 self.tasks.append({
                     "readwrite": row[0],
                     "rwmixread": row[1],
                     "bs": row[2],
                     "iodepth": row[3],
                     "size": row[4],
                     "ramp_time": row[5],
                     "runtime": row[6]
                 })
         logger_cli.info("-> Loaded {} tasks".format(len(self.tasks)))

     def add_for_deletion(self, obj, typ):
         self.cleanup_list.append(
             [
                 typ,
                 obj.metadata.namespace,
                 obj.metadata.name
             ]
         )
         return

     def prepare_cleanup(self):
         # Assume number of resources not given
         # list all svc, pod, pvc, pv and identify 'cfgagent-xx ones
         _types = ["pv", "pvc", "pod", "svc"]
         _prefix = self.resource_prefix
         for _typ in _types:
             _list = self.master.list_resource_names_by_type_and_ns(_typ)
             for ns, name in _list:
                 if name.startswith(_prefix):
                     if ns:
                         _msg = "{} {}/{}".format(_typ, ns, name)
                     else:
                         _msg = "{} {}".format(_typ, name)
                     logger_cli.info("-> Found {}".format(_msg))
                     self.cleanup_list.append([_typ, ns, name])
         return

     def prepare_agents(self, options):
         logger_cli.info("# Preparing {} agents".format(self.agent_count))
         # Increase volume size a bit, so datafile fits
         _quanitizer = 1.3
         _v_size, _vol_size_units = _split_vol_size(options['size'])
         _v_size = round(_v_size * _quanitizer)
         _vol_size = str(_v_size) + _vol_size_units + "i"
         logger_cli.info(
             "-> Testfile size: {0}, Volume size: {1} ({0}*{2})".format(
                 options['size'],
                 _vol_size,
                 _quanitizer
             )
         )
         # Start preparing
         for idx in range(self.agent_count):
             # create pvc/pv and pod
             logger_cli.info("-> creating agent '{:02}'".format(idx))
             # _agent, _pv, _pvc = self.master.prepare_benchmark_agent(
             _agent, _pvc = self.master.prepare_benchmark_agent(
                 idx,
                 os.path.split(options["filename"])[0],
                 self.storage_class,
                 _vol_size,
                 self._agent_template
             )
             # save it to lists
             self.agent_pods.append(_agent)
             # self.add_for_deletion(_pv, "pv")
             self.add_for_deletion(_pvc, "pvc")
             self.add_for_deletion(_agent, "pod")

             # expose it
             _svc = self.master.expose_benchmark_agent(_agent)
             self.add_for_deletion(_svc, "svc")
             # Save service
             self.services.append(_svc)
             # prepopulate results
             self.agent_results[_agent.metadata.name] = {}
             self.agent_results[_agent.metadata.name]["url"] = \
                 "http://{}:{}/api/".format(
                     _svc.spec.cluster_ip,
                     8765
                 )
             self.agent_results[_agent.metadata.name]["storage_class"] = \
                 self.storage_class
             self.agent_results[_agent.metadata.name]["volume_size"] = \
                 options['size']

         logger_cli.info("-> Done creating agents")
         # TODO: Update after implementing pooled task sending
         # idea is to have time to schedule task to each agent every 5 sec max
         self.scheduled_delay = self.agent_count * 5
         logger_cli.info(
             "-> Schedule delay set to {} sec".format(self.scheduled_delay)
         )
         return

     def _poke_agent(self, url, body, action="GET"):
         _datafile = "/tmp/data"
         _data = [
             "-d",
             "@" + _datafile
         ]
         _cmd = [
             "curl",
             "-s",
             "-H",
             "'Content-Type: application/json'",
             "-X",
             action,
             url
         ]
         if body:
             _cmd += _data
             _ret = self.master.prepare_json_in_pod(
                 self.agent_pods[0].metadata.name,
                 self.master._namespace,
                 body,
                 _datafile
             )
         _ret = self.master.exec_cmd_on_target_pod(
             self.agent_pods[0].metadata.name,
             self.master._namespace,
             " ".join(_cmd)
         )
         return _parse_json_output(_ret)

     def _ensure_agents_ready(self):
         # make sure agents idle
         _status_set = []
         _ready_set = []
         for _agent, _d in self.get_agents_status().items():
             # obviously, there should be some answer
             if _d is None:
                 logger_cli.error("ERROR: Agent status not available")
                 return False
             # status should be idle or finished
             if _d['status'] not in ["idle", "finished"]:
                 logger_cli.error(
                     "Agent status invalid {}:{}".format(_agent, _d['status'])
                 )
                 _status_set += [False]
             else:
                 # Good agent
                 _status_set += [True]
             # agent's fio shell should be in 'ready'
             if not _d["healthcheck"]["ready"]:
                 logger_cli.error("Agent is not ready {}".format(_agent))
                 _ready_set += [False]
             else:
                 # 'fio' shell for agent is ready
                 _ready_set += [True]
         # all agent's statuses should be True
         # and all 'fio' shell modules should be 'ready'
         if not any(_status_set) or not any(_ready_set):
             # At least one is not ready and it was already logged above
             return False
         else:
             # All is good
             return True

     def get_agents_status(self, silent=True):
         _status = {}
         _results = self.master.exec_on_labeled_pods_and_ns(
             "app=cfgagent",
             "curl -s http://localhost:8765/api/fio",
             silent=silent
         )
         for _agent, _result in _results.items():
             _j = _parse_json_output(_result)
             _status[_agent] = _j
         return _status

     @retry(Exception, initial_wait=5)
     def get_agents_resultlist(self):
         _t = {"module": "fio", "action": "get_resultlist"}
         _status = {}
         for _agent, _d in self.agent_results.items():
             _status[_agent] = self._poke_agent(_d["url"], _t, action="POST")
         return _status

     @retry(Exception, initial_wait=5)
     def get_result_from_agent(self, agent, time):
         _t = {
             "module": "fio",
             "action": "get_result",
             "options": {
                 "time": time
             }
         }
         return self._poke_agent(
             self.agent_results[agent]["url"],
             _t,
             action="POST"
         )

     def _get_next_scheduled_time(self):
         _now = datetime.now(timezone.utc)
         logger_cli.info("-> time is '{}'".format(_now.strftime(_datetime_fmt)))
         self.next_scheduled_time = _now + timedelta(
             seconds=self.scheduled_delay
         )
         _str_time = self.next_scheduled_time.strftime(_datetime_fmt)
         logger_cli.info(
             "-> next benchmark scheduled to '{}'".format(_str_time)
         )
         return _str_time

     def _send_scheduled_task(self, options):
         _task = {
             "module": "fio",
             "action": "do_scheduledrun",
             "options": options
         }
         for _agent, _d in self.agent_results.items():
             logger_cli.info(
                 "-> sending task to '{}:{}'".format(_agent, _d["url"])
             )
             _ret = self._poke_agent(_d["url"], _task, action="POST")
             if 'error' in _ret:
                 logger_cli.error(
                     "ERROR: Agent returned: '{}'".format(_ret['error'])
                 )
                 return False
         # No errors detected
         return True

     def track_benchmark(self, options):
         _runtime = _get_seconds(options["runtime"])
         _ramptime = _get_seconds(options["ramp_time"])
         # Sum up all timings that we must wait and double it
         _timeout = (self.scheduled_delay + _runtime + _ramptime) * 2
         # We should have no more than 65 measurements
         _stats_delay = round((_runtime + _ramptime) / 65)
         _start = self.next_scheduled_time
         _end = datetime.now(timezone.utc) + timedelta(seconds=_timeout)
         logger_cli.info(" ")
         tw = cl_typewriter()
         while True:
             # Print status
             tw.cl_start(" ")
             _sts = self.get_agents_status(silent=True)
             # Use same line
             diff = (_end - datetime.now(timezone.utc)).total_seconds()
             _startin = (_start - datetime.now(timezone.utc)).total_seconds()
             if _startin > 0:
                 tw.cl_inline("-> starting in {:.2f}s ".format(_startin))
             else:
                 tw.cl_inline("-> {:.2f}s; ".format(diff))
                 _progress = [_st["progress"] for _st in _sts.values()]
                 tw.cl_inline(
                     "{}% <-> {}%; ".format(
                         min(_progress),
                         max(_progress)
                     )
                 )

                 _a_sts = [_t["status"] for _t in _sts.values()]
                 tw.cl_inline(
                     ", ".join(
                         ["{} {}".format(_a_sts.count(_s), _s)
                          for _s in set(_a_sts)]
                     )
                 )

             # Get Ceph status if _start time passed
             _elapsed = (datetime.now(timezone.utc) - _start).total_seconds()
             if _elapsed > _stats_delay:
                 # Use same line output
                 tw.cl_inline(" {:.2f}s elapsed".format(_elapsed))
                 _sec = "{:0.1f}".format(_elapsed)
                 self.results[options["scheduled_to"]]["ceph"][_sec] = \
                     self.ceph_info.get_cluster_status()
             # Check if agents finished
             finished = [True for _s in _sts.values()
                         if _s["status"] == 'finished']
             _fcnt = len(finished)
             _tcnt = len(_sts)
             if _fcnt < _tcnt:
                 tw.cl_inline("; {}/{}".format(_fcnt, _tcnt))
             else:
                 tw.cl_flush(newline=True)
                 logger_cli.info("-> All agents finished run")
                 return True
             # recalc how much is left
             diff = (_end - datetime.now(timezone.utc)).total_seconds()
             # In case end_datetime was in past to begin with
             if diff < 0:
                 tw.cl_flush(newline=True)
                 logger_cli.info("-> Timed out waiting for agents to finish")
                 return False
             tw.cl_flush()

     def _do_testrun(self, options):
         self.results[options["scheduled_to"]]["osd_df_before"] = \
             self.ceph_info.get_ceph_osd_df()
         # send single to agent
         if not self._send_scheduled_task(options):
             return False
         # Track this benchmark progress
         if not self.track_benchmark(options):
             return False
         else:
             logger_cli.info("-> Finished testrun. Collecting results...")
             # get ceph osd stats
             self.results[options["scheduled_to"]]["osd_df_after"] = \
                 self.ceph_info.get_ceph_osd_df()
             # Get results for each agent
             self.collect_results()
             logger_cli.info("-> Calculating totals and averages")
             self.calculate_totals()
             self.calculate_ceph_stats()
             self.osd_df_compare(options["scheduled_to"])
             logger_cli.info("-> Dumping results")
             for _time, _d in self.results.items():
                 self.dump_result(
                     self._get_dump_filename(_time),
                     _d
                 )
             return True

     def wait_ceph_cooldown(self):
         # TODO: Query Ceph ince a 20 sec to make sure its load dropped

         # get ceph idle status
         self.ceph_idle_status = self.ceph_info.get_cluster_status()
         self.health_detail = self.ceph_info.get_health_detail()
         self.ceph_df = self.ceph_info.get_ceph_df()
         self.ceph_pg_dump = self.ceph_info.get_ceph_pg_dump()
         return

     def run_benchmark(self, options):
         logger_cli.info("# Starting '{}' benchmark".format(self.mode))
         # Check agent readyness
         logger_cli.info("# Checking agents")
         if not self._ensure_agents_ready():
             return False

         # Make sure that Ceph is at low load
         # TODO: Ceph status check
         # self._wait_ceph_cooldown()

         # Do benchmark according to mode
         if self.mode == "tasks":
             logger_cli.info(
                 "# Running benchmark with tasks from '{}'".format(
                     self.taskfile
                 )
             )
             # take next task
             _total_tasks = len(self.tasks)
             for idx in range(_total_tasks):
                 # init time to schedule
                 _task = self.tasks[idx]
                 _r = self.results
                 logger_cli.info(
                     "-> Starting next task ({}/{})".format(idx+1, _total_tasks)
                 )
                 logger_cli.info("-> Updating options with: {}".format(
                         ", ".join(
                             ["{} = {}".format(k, v) for k, v in _task.items()]
                         )
                     )
                 )
                 # update options
                 options.update(_task)
                 # Check if such result already exists
                 o = "input_options"
                 _existing = filter(
                     lambda t:
                         _r[t]["id"] == idx and
                         _r[t]["mode"] == "tasks" and
                         _r[t][o]["readwrite"] == options["readwrite"] and
                         _r[t][o]["rwmixread"] == options["rwmixread"] and
                         _r[t][o]["bs"] == options["bs"] and
                         _r[t][o]["iodepth"] == options["iodepth"] and
                         _r[t][o]["size"] == options["size"],
                     _r
                 )
                 if len(list(_existing)) > 0:
                     logger_cli.info(
                         "-> Skipped already performed task from {}: "
                         "line {}, {}({}), {}, {}, {}".format(
                             self.taskfile,
                             idx,
                             options["readwrite"],
                             options["rwmixread"],
                             options["bs"],
                             options["iodepth"],
                             options["size"]
                         )
                     )
                     continue
                 _sch_time = self._get_next_scheduled_time()
                 options["scheduled_to"] = _sch_time
                 # init results table
                 _r[_sch_time] = {
                     "id": idx,
                     "mode": self.mode,
                     "input_options": deepcopy(options),
                     "agents": {},
                     "ceph": {}
                 }
                 # exit on error
                 if not self._do_testrun(options):
                     return False
                 # Save ceph osd stats and wait cooldown
                 self.wait_ceph_cooldown()
         elif self.mode == "single":
             logger_cli.info("# Running single benchmark")
             # init time to schedule
             _sch_time = self._get_next_scheduled_time()
             options["scheduled_to"] = _sch_time
             # init results table
             self.results[_sch_time] = {
                 "id": "{:2}".format(0),
                 "input_options": options,
                 "agents": {},
                 "ceph": {}
             }
             if not self._do_testrun(options):
                 return False
             # Save ceph osd stats
         else:
             logger_cli.error("ERROR: Unknown mode '{}'".format(self.mode))
             return False

         # Normal exit
         logger_cli.info("# All benchmark tasks done")
         return True

     def cleanup(self):
         logger_cli.info("# Cleaning up")
         self.cleanup_list.reverse()

         for _res in self.cleanup_list:
             self.master.cleanup_resource_by_name(_res[0], _res[2], ns=_res[1])

         # Wait for resource to be cleaned
         _timeout = 120
         _total = len(self.cleanup_list)
         logger_cli.info("-> Wait until {} resources cleaned".format(_total))
         _p = Progress(_total)
         while True:
             _g = self.master.get_resource_phase_by_name
             _l = [_g(r[0], r[2], ns=r[1]) for r in self.cleanup_list]
             _l = [item for item in _l if item]
             _idx = _total - len(_l)
             if len(_l) > 0:
                 _p.write_progress(_idx)
             else:
                 _p.write_progress(_idx)
                 _p.end()
                 logger_cli.info("# Done cleaning up")
                 break
             if _timeout > 0:
                 _timeout -= 1
             else:
                 _p.end()
                 logger_cli.info("# Timed out waiting after 120s.")
                 break

         return

     def collect_results(self):
         logger_cli.info("# Collecting results")
         # query agents for results
         _agents = self.get_agents_resultlist()
         for _agent, _l in _agents.items():
             # Check if we already have this locally
             for _time in _l["resultlist"]:
                 # There is a file already for this task/time
                 # Check if we need to load it
                 if _time not in self.results:
                     # Some older results found
                     # do not process them
                     logger_cli.debug(
                         "...skipped old results for '{}'".format(_time)
                     )
                     continue
                 elif _agent not in self.results[_time]["agents"]:
                     # Load result add it locally
                     logger_cli.info(
                         "-> Getting results for '{}' from '{}'".format(
                             _time,
                             _agent
                         )
                     )
                     _r = self.get_result_from_agent(_agent, _time)
                     self.results[_time]["agents"][_agent] = _r[_time]
                 else:
                     # Should never happen, actually
                     logger_cli.info(
                         "-> Skipped loaded result for '{}' from '{}'".format(
                             _time,
                             _agent
                         )
                     )

     def _get_dump_filename(self, _time):
         _r = self.results[_time]
         _dirname = _r["input_options"]["name"]
         _filename = "-".join([
             _reformat_timestr(_time),
             "{:02}".format(len(_r["agents"])),
             _r["input_options"]["readwrite"],
             _r["input_options"]["bs"],
             str(_r["input_options"]["iodepth"]),
         ]) + ".json"
         return os.path.join(
             self.results_dump_path,
             _dirname,
             _filename
         )

     def preload_results(self):
         logger_cli.info(
             "# Preloading results for '{}'".format(self.bench_name)
         )
         # get all dirs in folder
         _p = self.results_dump_path
         if not os.path.isdir(_p):
             logger_cli.warn(
                 "WARNING: Dump path is not a folder '{}'".format(_p)
             )
             return
         for path, dirs, files in os.walk(_p):
             if path == os.path.join(_p, self.bench_name):
                 logger_cli.info("-> Folder found '{}'".format(path))
                 for _fname in files:
                     logger_cli.debug("... processing '{}'".format(_fname))
                     _ext = _fname.split('.')[-1]
                     if _ext != "json":
                         logger_cli.info(
                             "-> Extension invalid '{}', "
                             "'json' is expected".format(_ext)
                         )
                         continue
                     # get time from filename
                     # Ugly, but works
                     _t = _fname.split('-')[0]
                     _str_time = _t[:14] + "+" + _t[14:]
                     _t = datetime.strptime(_str_time, _file_datetime_fmt)
                     _time = _t.strftime(_datetime_fmt)
                     self.results[_time] = self.load_dumped_result(
                         os.path.join(path, _fname)
                     )
                     logger_cli.info(
                         "-> Loaded '{}' as '{}'".format(
                             _fname,
                             _time
                         )
                     )

     def dump_result(self, filename, data):
         # Function dumps all available results as jsons to the given path
         # overwriting if needed
         _folder, _file = os.path.split(filename)
         # Do dump
         if not os.path.exists(_folder):
             os.mkdir(_folder)
             logger_cli.info("-> Created folder '{}'".format(_folder))
         # Dump agent data for this test run
         write_str_to_file(filename, json.dumps(data, indent=2))
         logger_cli.info("-> Dumped '{}'".format(filename))
         return

     def load_dumped_result(self, filename):
         try:
             with open(filename, "rt+") as f:
                 return json.loads(f.read())
         except FileNotFoundError as e:
             logger_cli.error(
                 "ERROR: {}".format(e)
             )
         except TypeError as e:
             logger_cli.error(
                 "ERROR: Invalid file ({}): {}".format(filename, e)
             )
         except json.decoder.JSONDecodeError as e:
             logger_cli.error(
                 "ERROR: Failed to decode json ({}): {}".format(filename, e)
             )
         return None

     def _lookup_storage_class_id_by_name(self, storage_class_name):
         # Assume that self had proper data
         for _pool in self.ceph_df["pools"]:
             if storage_class_name == _pool["name"]:
                 return _pool["id"]
         return None

     def calculate_totals(self):
         def _savg(vlist):
             if len(vlist) > 0:
                 return (sum(vlist) / len(vlist)) / 1000
             else:
                 return 0
         # Calculate totals for Read and Write
         for _time, data in self.results.items():
             if "totals" not in data:
                 data["totals"] = {}
             else:
                 continue
             _totals = data["totals"]
             _r_bw = 0
             _r_avglat = []
             _r_95clat = []
             _r_iops = 0
             _w_bw = 0
             _w_avglat = []
             _w_95clat = []
             _w_iops = 0
             for _a, _d in data["agents"].items():
                 # Hardcoded number of jobs param :(
                 _j = _d["jobs"][0]
                 _r_bw += _j["read"]["bw_bytes"]
                 _r_avglat += [_j["read"]["lat_ns"]["mean"]]
                 _r_iops += _j["read"]["iops"]
                 _w_bw += _j["write"]["bw_bytes"]
                 _w_avglat += [_j["write"]["lat_ns"]["mean"]]
                 _w_iops += _j["write"]["iops"]
                 # check for percentiles
                 if "percentile" in _j["read"]["clat_ns"]:
                     _r_95clat += \
                         [_j["read"]["clat_ns"]["percentile"]["95.000000"]]
                 else:
                     _r_95clat += []
                 if "percentile" in _j["write"]["clat_ns"]:
                     _w_95clat += \
                         [_j["write"]["clat_ns"]["percentile"]["95.000000"]]
                 else:
                     _w_95clat += []

                 # Save storage class name
                 if "storage_class" not in _totals:
                     _totals["storage_class"] = \
                         self.agent_results[_a]["storage_class"]
                     # Lookup storage class id and num_pg
                     _totals["storage_class_stats"] = \
                         reporter.get_pool_stats_by_id(
                             self._lookup_storage_class_id_by_name(
                                 self.agent_results[_a]["storage_class"]
                             ),
                             self.ceph_pg_dump
                         )

             _totals["read_bw_bytes"] = _r_bw
             _totals["read_avg_lat_us"] = _savg(_r_avglat)
             _totals["read_95p_clat_us"] = _savg(_r_95clat)
             _totals["read_iops"] = _r_iops
             _totals["write_bw_bytes"] = _w_bw
             _totals["write_avg_lat_us"] = _savg(_w_avglat)
             _totals["write_95p_clat_us"] = _savg(_w_95clat)
             _totals["write_iops"] = _w_iops

     def calculate_ceph_stats(self):
         # func to get values as lists
         def _get_max_value(key, stats):
             _max_time = 0
             _value = 0
             for _k, _v in stats.items():
                 if key in _v and _value < _v[key]:
                     _max_time = _k
                     _value = _v[key]
             return _max_time, _value

         def _perc(n, m):
             if not n:
                 return 0
             elif not m:
                 return 0
             else:
                 return "{:.0f}%".format((n / m) * 100)

         def _axis_vals(val):
             return [
                 val, int(val*1.1), int(val*0.75), int(val*0.50), int(val*0.15)
             ]

         _stats = {}
         for _time, data in self.results.items():
             if "ceph" not in data:
                 logger_cli.warning(
                     "WARNING: Ceph stats raw data not found in results"
                 )
                 continue
             if "ceph_stats" not in data:
                 data["ceph_stats"] = {}
             else:
                 continue
             # Copy pool stats data
             for _e, _d in data["ceph"].items():
                 _stats[_e] = _d["pgmap"]
             # Maximums
             mrb_t, mrb = _get_max_value("read_bytes_sec", _stats)
             mwb_t, mwb = _get_max_value("write_bytes_sec", _stats)
             mri_t, mri = _get_max_value("read_op_per_sec", _stats)
             mwi_t, mwi = _get_max_value("write_op_per_sec", _stats)
             # Replace ceph with shorter data
             data["ceph"] = {
                 "max_rbl": _axis_vals(mrb),
                 "max_rbl_time": mrb_t,
                 "max_wbl": _axis_vals(mwb),
                 "max_wbl_time": mwb_t,
                 "max_ril": _axis_vals(mri),
                 "max_ril_time": mri_t,
                 "max_wil": _axis_vals(mwi),
                 "max_wil_time": mwi_t,
                 "stats": _stats
             }
             # Calculate %% values for barchart
             for _e, _d in data["ceph"]["stats"].items():
                 _d["read_bytes_sec_perc"] = \
                     _perc(_d.get("read_bytes_sec", 0), mrb)
                 _d["write_bytes_sec_perc"] = \
                     _perc(_d.get("write_bytes_sec", 0), mwb)
                 _d["read_op_per_sec_perc"] = \
                     _perc(_d.get("read_op_per_sec", 0), mri)
                 _d["write_op_per_sec_perc"] = \
                     _perc(_d.get("write_op_per_sec", 0), mwi)
         return

     def osd_df_compare(self, _time):
         def _get_osd(osd_id, nodes):
             for osd in nodes:
                 if osd["id"] == osd_id:
                     return osd
             return None

         logger_cli.info("# Comparing OSD stats")
         _osd = {}
         if _time not in self.results:
             logger_cli.warning("WARNING: {} not found in results. Check data")
             return
         data = self.results[_time]
         # Save summary
         data["osd_summary"] = {}
         data["osd_summary"]["before"] = data["osd_df_before"]["summary"]
         data["osd_summary"]["after"] = data["osd_df_after"]["summary"]
         data["osd_summary"]["active"] = {
             "status": "",
             "device_class": "",
             "pgs": 0,
             "kb_used": 0,
             "kb_used_data": 0,
             "kb_used_omap": 0,
             "kb_used_meta": 0,
             "utilization": 0,
             "var_down": 0,
             "var_up": 0
         }
         # Compare OSD counts
         osds_before = len(data["osd_df_before"]["nodes"])
         osds_after = len(data["osd_df_after"]["nodes"])
         if osds_before != osds_after:
             logger_cli.warning(
                 "WARNING: Before/After bench OSD "
                 "count mismatch for '{}'".format(_time)
             )
         # iterate osds from before
         _pgs = 0
         _classes = set()
         _nodes_up = 0
         for idx in range(osds_before):
             _osd_b = data["osd_df_before"]["nodes"][idx]
             # search for the same osd in after
             _osd_a = _get_osd(_osd_b["id"], data["osd_df_after"]["nodes"])
             # Save data to the new place
             _osd[_osd_b["name"]] = {}
             _osd[_osd_b["name"]]["before"] = _osd_b
             if not _osd_a:
                 # If this happen, Ceph cluster is actually broken
                 logger_cli.warning(
                     "WARNING: Wow! {} dissapered".format(_osd_b["name"])
                 )
                 _osd[_osd_b["name"]]["after"] = {}
             else:
                 _osd[_osd_b["name"]]["after"] = _osd_a
             _osd[_osd_b["name"]]["percent"] = {}
             # Calculate summary using "after" data
             _pgs += _osd_a["pgs"]
             _classes.update([_osd_a["device_class"]])
             if _osd_a["status"] == "up":
                 _nodes_up += 1
             # compare
             _keys_b = list(_osd_b.keys())
             _keys_a = list(_osd_a.keys())
             _nodes_up
             # To be safe, detect if some keys are different
             # ...and log it.
             _diff = set(_keys_b).symmetric_difference(_keys_a)
             if len(_diff) > 0:
                 # This should never happen, actually
                 logger_cli.warning(
                     "WARNING: Before/after keys mismatch "
                     "for OSD node {}: {}".format(idx, ", ".join(_diff))
                 )
                 continue
             # Compare each key and calculate how it changed
             for k in _keys_b:
                 if _osd_b[k] != _osd_a[k]:
                     # Announce change
                     logger_cli.debug(
                         "-> {:4}: {}, {} -> {}".format(
                             idx,
                             k,
                             _osd_b[k],
                             _osd_a[k]
                         )
                     )
                     # calculate percent
                     _change_perc = (_osd_a[k] / _osd_b[k]) * 100 - 100
                     _osd[_osd_b["name"]]["percent"][k] = _change_perc

                     # Increase counters
                     _p = data["osd_summary"]["active"]

                     if k not in _p:
                         _p[k] = 1
                     else:
                         _p[k] += 1
                     if k == "var":
                         if _change_perc > 0:
                             _p["var_up"] += 1
                         elif _change_perc < 0:
                             _p["var_down"] += 1
         # Save sorted data
         data["osds"] = _osd
         logger_cli.info("-> Removing redundand osd before/after data")
         data.pop("osd_df_before")
         data.pop("osd_df_after")
         # Save summary
         data["osd_summary"]["active"]["status"] = "{}".format(_nodes_up)
         data["osd_summary"]["active"]["device_class"] = \
             "{}".format(len(list(_classes)))
         data["osd_summary"]["active"]["pgs"] = _pgs
         return

     # Create report
     def create_report(self, filename):
         """
         Create static html showing ceph info report

         :return: none
         """
         logger_cli.info("### Generating report to '{}'".format(filename))
         _report = reporter.ReportToFile(
             reporter.HTMLCephBench(self),
             filename
         )
         _report(
             {
                 "results": self.results,
                 "idle_status": self.ceph_idle_status,
                 "health_detail": self.health_detail,
                 "ceph_df": self.ceph_df,
                 "ceph_pg_dump": self.ceph_pg_dump,
                 "info": self.ceph_info.ceph_info,
                 "cluster": self.ceph_info.cluster_info,
                 "ceph_version": self.ceph_info.ceph_version,
                 "nodes": self.agent_pods
             }
         )
         logger_cli.info("-> Done")

         return