blob: db012ac4a998d168541d7635292db621d458c2fe [file] [log] [blame]
import json
import os
import queue
import signal
import subprocess
from copy import deepcopy
from datetime import datetime, timezone
from platform import system, release, node
from threading import Thread
import threading
from time import sleep
from cfg_checker.common.exception import CheckerException
from cfg_checker.common.other import piped_shell
from cfg_checker.common.log import logger
_datetime_fmt = "%m/%d/%Y, %H:%M:%S%z"
fio_options_common = {
"name": "agent_run",
"filename": "/cephvol/testfile",
"status-interval": "500ms",
"randrepeat": 0,
"verify": 0,
"direct": 1,
"gtod_reduce": 0,
"bs": "32k",
"iodepth": 16,
"size": "10G",
"readwrite": "randrw",
"ramp_time": "5s",
"runtime": "30s",
"ioengine": "libaio"
}
fio_options_seq = {
"numjobs": 1,
"offset_increment": "500M"
}
fio_options_mix = {
"rwmixread": 50
}
seq_modes = ['read', 'write']
mix_modes = ['randrw']
rand_modes = ['randread', 'randwrite']
def get_fio_options():
# Duplicate function for external option access
_opts = deepcopy(fio_options_common)
_opts.update(deepcopy(fio_options_seq))
_opts.update(deepcopy(fio_options_mix))
return _opts
def output_reader(_stdout, outq):
for line in iter(_stdout.readline, ''):
outq.put(line)
def _o(option, param, suffix=""):
return "--{}={}{}".format(option, param, suffix)
def get_time(timestamp=None):
if not timestamp:
_t = datetime.now(timezone.utc)
else:
_t = datetime.fromtimestamp(timestamp)
return _t.strftime(_datetime_fmt)
def _get_seconds(value):
# assume that we have symbol at the end
_suffix = value[-1]
if _suffix == 's':
return int(value[:-1])
elif _suffix == 'm':
return int(value[:-1])*60
elif _suffix == 'h':
return int(value[:-1])*60*60
else:
return -1
def wait_until(end_datetime):
while True:
diff = (end_datetime - datetime.now(timezone.utc)).total_seconds()
# In case end_datetime was in past to begin with
if diff < 0:
return
sleep(diff/2)
if diff <= 0.1:
return
class ShellThread(object):
def __init__(self, cmd, queue):
self.cmd = cmd
self.queue = queue
self._p = None
self.timeout = 15
self.output = []
def run_shell(self):
# Start
_cmd = " ".join(self.cmd)
logger.debug("... {}".format(_cmd))
self._p = subprocess.Popen(
_cmd,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
env={"PYTHONUNBUFFERED": "1"},
universal_newlines=True,
bufsize=1
)
self._t = threading.Thread(
target=output_reader,
args=(self._p.stdout, self.queue)
)
self._t.start()
if not self.wait_started():
self.kill_shell()
def is_alive(self):
if not self._p.poll():
return True
else:
return False
def wait_started(self):
while True:
if not self.queue.empty():
break
else:
logger.debug("... {} sec".format(self.timeout))
sleep(1)
self.timeout -= 1
if not self.timeout:
logger.debug(
"...timed out after {} sec".format(str(self.timeout))
)
return False
logger.debug("... got first fio output")
return True
def kill_shell(self):
# Run the poll
if not self._p.poll():
self._p.send_signal(signal.SIGINT)
self.get_output()
def get_output(self):
while True:
try:
line = self.queue.get(block=False)
line = str(line) if isinstance(line, bytes) else line
self.output.append(line)
except queue.Empty:
return self.output
return None
class FioProcess(Thread):
# init vars for status
_fio_options_list = [
"--time_based",
"--output-format=json+",
"--eta=always"
]
_fio_options_seq_list = [
"--thread"
]
_fio_options_common = fio_options_common
_fio_options_seq = fio_options_seq
_fio_options_mix = fio_options_mix
eta_sec = 0
total_time_sec = 0
elapsed_sec = 0
testrun = {}
mount_point = "/cephvol"
filename = "testfile"
# test modes: 'randread', 'randwrite', 'read', 'write', 'randrw'
mode = "randrw"
_seq_modes = seq_modes
_mix_modes = mix_modes
_rand_modes = rand_modes
# results
results = {}
def _shell(self, cmd):
self._code, self._shell_output = piped_shell(cmd, code=True)
if self._code:
logger.error(
"# Shell error for '{}': [{}] {}".format(
cmd,
self._code,
self._shell_output
)
)
return False
else:
return True
def recalc_times(self):
_rt = _get_seconds(self._fio_options_common["runtime"])
_rup = _get_seconds(self._fio_options_common["ramp_time"])
if not _rt:
raise CheckerException("invalid 'runtime': '{}'".format(_rt))
elif not _rup:
raise CheckerException("invalid 'ramp_time': '{}'".format(_rt))
self.total_time_sec = _rt + _rup
self.eta_sec = self.total_time_sec
def __init__(self):
Thread.__init__(self)
logger.info("fio thread initialized")
# save system
self.system = system()
self.release = release()
self.hostname = node()
# create a clear var for last shell output
self._shell_output = ""
# prepare params
self.recalc_times()
# prepare the fio
self.fio_version = "unknown"
if not self._shell("fio --version"):
raise CheckerException(
"Error running fio: '{}'".format(self._shell_output)
)
else:
self.fio_version = self._shell_output
# all outputs timeline
self.timeline = {}
# setup target file
if not os.path.exists(self.mount_point):
logger.warning(
"WARNING: '{}' not exists, using tmp folder".format(
self.mount_point
)
)
self.mount_point = "/tmp"
self._fio_options_common["filename"] = os.path.join(
self.mount_point,
self.filename
)
if self.system == "Darwin":
self._fio_options_common["ioengine"] = "posixaio"
# Thread finish marker
self.finished = False
self.testrun_starttime = None
self.scheduled_datetime = None
def update_options(self, _dict):
# validate keys, do not validate numbers themselves
for k, v in _dict.items():
if k in self._fio_options_mix:
self._fio_options_mix[k] = v
elif k in self._fio_options_seq:
self._fio_options_seq[k] = v
elif k in self._fio_options_common:
self._fio_options_common[k] = v
else:
raise CheckerException(
"Unknown option: '{}': '{}'".format(k, v)
)
# recalc
self.recalc_times()
def run(self):
def _cut(_list, _s, _e):
_new = _list[_s:_e]
_pre = _list[:_s]
_list = _pre + _list[_e:]
return (_new, _list)
# create a cmd
_cmd = ["fio"]
_cmd += self._fio_options_list
_cmd += [_o(k, v) for k, v in self._fio_options_common.items()]
if self._fio_options_common["readwrite"] in self._seq_modes:
_sq = self._fio_options_seq_list
_cmd += _sq + [_o(k, v) for k, v in self._fio_options_seq.items()]
elif self._fio_options_common["readwrite"] in self._mix_modes:
_cmd += [_o(k, v) for k, v in self._fio_options_mix.items()]
_q = queue.Queue()
self.fiorun = ShellThread(_cmd, _q)
# Check if schedule is set
_now = datetime.now(timezone.utc)
if self.scheduled_datetime:
logger.debug(
"waiting for '{}', now is '{}', total of {} sec left".format(
self.scheduled_datetime.strftime(_datetime_fmt),
_now.strftime(_datetime_fmt),
(self.scheduled_datetime - _now).total_seconds()
)
)
wait_until(self.scheduled_datetime)
else:
self.testrun_starttime = _now.strftime(_datetime_fmt)
self.fiorun.run_shell()
_raw = []
_start = -1
_end = -1
while self.fiorun.is_alive() or not _q.empty():
while not _q.empty():
# processing
_bb = _q.get(block=False)
if isinstance(_bb, bytes):
_line = _bb.decode('utf-8')
else:
_line = _bb
if _start < 0 and _end < 0 and not _line.startswith("{"):
self.results[self.testrun_starttime] = {
"error": _line
}
self.eta = -1
self.fiorun.kill_shell()
self.finished = True
return
_current = _line.splitlines()
_raw += _current
for ll in range(len(_raw)):
if _start < 0 and _raw[ll] == "{":
_start = ll
elif _end < 0 and _raw[ll] == "}":
_end = ll
# loop until we have full json
if _end < 0 or _start < 0:
continue
# if start and and found, cut json
(_json, _raw) = _cut(_raw, _start, _end+1)
_start = -1
_end = -1
# Try to parse json
_json = "\n".join(_json)
try:
_json = json.loads(_json)
_timestamp = _json["timestamp"]
self.timeline[_timestamp] = _json["jobs"][0]
# save last values
self.eta_sec = self.timeline[_timestamp]["eta"]
self.elapsed_sec = self.timeline[_timestamp]["elapsed"]
self.testrun = _json
except TypeError as e:
logger.error("ERROR: {}".format(e))
except json.decoder.JSONDecodeError as e:
logger.error("ERROR: {}".format(e))
if not self.eta_sec:
break
sleep(0.1)
# Save status to results dictionary
self.results[self.testrun_starttime] = {
"result": self.testrun,
"timeline": self.timeline
}
self.finished = True
self.scheduled_datetime = None
self.testrun_starttime = None
return
def healthcheck(self):
_version = self.fio_version
_binary_path = self._shell_output if self._shell("which fio") else ""
if self._shell("fio --enghelp"):
_ioengines = self._shell_output
_ioengines = _ioengines.replace("\t", "")
_ioengines = _ioengines.splitlines()[1:]
self._shell_output = ""
else:
_ioengines = []
return {
"ready": all((_version, _binary_path, _ioengines)),
"version": _version,
"path": _binary_path,
"ioengines": _ioengines,
"system": self.system,
"release": self.release,
"hostname": self.hostname
}
def status(self):
_running = self.is_alive() and self.eta_sec >= 0
_scheduled = False
_diff = -1
if self.scheduled_datetime:
_now = datetime.now(timezone.utc)
_diff = (self.scheduled_datetime - _now).total_seconds()
if _diff > 0:
_scheduled = True
_s = "running" if _running else "idle"
_s = "scheduled" if _scheduled else _s
_s = "finished" if self.finished else _s
return {
"status": _s,
"progress": self.get_progress()
}
def end_fio(self):
if self.fiorun:
self.fiorun.kill_shell()
# Current run
def percent_done(self):
_total = self.elapsed_sec + self.eta_sec
return float(self.elapsed_sec) / float(_total) * 100.0
def get_progress(self):
return "{:.2f}".format(self.percent_done())
# latest parsed measurements
def get_last_measurements(self):
if self.timeline:
return self.timeline[max(list(self.timeline.keys()))]
else:
return {}
class FioProcessShellRun(object):
stats = {}
results = {}
def __init__(self, init_class=FioProcess):
self.init_class = init_class
self.actions = {
"do_singlerun": self.do_singlerun,
"do_scheduledrun": self.do_scheduledrun,
"get_options": self.get_options,
"get_result": self.get_result,
"get_resultlist": self.get_resultlist
}
self.fio_reset()
@staticmethod
def healthcheck(fio):
hchk = fio.healthcheck()
hchk_str = \
"# fio status: {}\n# {} at {}\n# Engines: {}".format(
"ready" if hchk["ready"] else "fail",
hchk["version"],
hchk["path"],
", ".join(hchk["ioengines"])
)
return hchk, hchk_str
def status(self):
return self.fio.status()
def fio_reset(self):
# Fancy way of handling fio class not even initialized yet
try:
_f = self.fio.finished
_r = self.fio.results
_o = self.fio.get_options()
except AttributeError:
_f = True
_r = None
_o = None
# Check if reset is needed
if not _f:
# No need to reset, fio is either idle or running
return
else:
# extract results if they present
if _r:
self.results.update(_r)
# re-init
_fio = self.init_class()
# Do healthcheck
self.hchk, self.hchk_str = self.healthcheck(_fio)
# restore options if they existed
if _o:
_fio.update_options(_o)
self.fio = _fio
def get_options(self):
_opts = deepcopy(self.fio._fio_options_common)
_opts.update(deepcopy(self.fio._fio_options_seq))
_opts.update(deepcopy(self.fio._fio_options_mix))
return _opts
def do_singlerun(self, options):
# Reset thread if it closed
self.fio_reset()
# Fill options
if "scheduled_to" in options:
# just ignore it
_k = "scheduled_to"
_v = options.pop(_k)
logger.warning("Ignoring option: '{}': '{}'".format(_k, _v))
self.fio.update_options(options)
# Start it
self.fio.start()
return True
def do_scheduledrun(self, options):
# Reset thread if it closed
self.fio_reset()
# Handle scheduled time
if "scheduled_to" not in options:
# required parameter not set
raise CheckerException("Parameter missing: 'scheduled_to'")
else:
# set time and get rid of it from options
self.fio.testrun_starttime = options.pop("scheduled_to")
self.fio.scheduled_datetime = datetime.strptime(
self.fio.testrun_starttime,
_datetime_fmt
)
# Fill options
self.fio.update_options(options)
# Start it
self.fio.start()
return True
def _get_result_object(self, obj_name, time):
if time in self.results:
if obj_name in self.results[time]:
return self.results[time][obj_name]
elif "error" in self.results[time]:
return self.results[time]["error"]
else:
return {
"error": "Empty {} for '{}'".format(obj_name, time)
}
else:
return {
"error": "Result not found for '{}'".format(time)
}
def _update_results(self):
# Update only in case of completed thread
if self.fio.finished:
_r_local = list(self.results.keys())
_r_fio = list(self.fio.results.keys())
for _r in _r_fio:
if _r not in _r_local:
self.results[_r] = self.fio.results.pop(_r)
def get_result(self, time):
self._update_results()
return self._get_result_object('result', time)
def get_result_timeline(self, time):
self._update_results()
return self._get_result_object('timeline', time)
# reporting
def get_resultlist(self):
self._update_results()
return list(self.results.keys())
def __call__(self):
if not self.fio.is_alive() and not self.fio.finished:
self.fio.start()
while self.fio.is_alive() and self.fio.eta_sec >= 0:
sleep(0.2)
self.stats = self.fio.get_last_measurements()
_r = self.stats.get('read', {})
_w = self.stats.get('write', {})
_r_bw = _r.get('bw_bytes', -1)
_r_iops = _r.get('iops', -1)
_w_bw = _w.get('bw_bytes', -1)
_w_iops = _w.get('iops', -1)
_s = self.fio.status()
if _s["status"] == "scheduled":
_t = self.fio.scheduled_datetime
_n = datetime.now(timezone.utc)
_delta = (_t - _n).total_seconds()
print(
"{}: waiting for '{}'; now '{}'; {} sec left".format(
_s["status"],
_t.strftime(_datetime_fmt),
_n.strftime(_datetime_fmt),
_delta
)
)
else:
stats = "{}: {:>7}% ({}/{}) " \
"(BW/IOPS: " \
"Read {:>9.2f} MB/{:>9.2f} " \
"Write {:>9.2f} MB/{:>9.2f})".format(
_s["status"],
_s["progress"],
self.fio.elapsed_sec,
self.fio.eta_sec,
_r_bw / 1024 / 1024,
_r_iops,
_w_bw / 1024 / 1024,
_w_iops
)
print(stats)
self.fio.end_fio()
if __name__ == '__main__':
# Debug shell to test FioProcessShellRun
_shell = FioProcessShellRun()
_opts = _shell.get_options()
_opts["readwrite"] = "read"
_opts["ramp_time"] = "1s"
_opts["runtime"] = "5s"
_opts["scheduled_to"] = "11/23/2021, 21:48:20+0000"
_shell.do_scheduledrun(_opts)
_shell()
_times = _shell.get_resultlist()
print("# results:\n{}".format("\n".join(_times)))
# print(
# "##### Dumping results\n{}".format(
# json.dumps(_shell.get_result(_times[0]), indent=2)
# )
# )
_shell.fio_reset()
_opts = _shell.get_options()
_opts["readwrite"] = "read"
_opts["ramp_time"] = "1s"
_opts["runtime"] = "10s"
# _opts["scheduled_to"] = "11/23/2021, 21:40:30+0000"
_shell.do_singlerun(_opts)
_shell()
_times = _shell.get_resultlist()
print("# results:\n{}".format("\n".join(_times)))