koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 1 | import array |
| 2 | import logging |
| 3 | from typing import List, Dict, Tuple |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 4 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 5 | import numpy |
| 6 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 7 | from cephlib import sensors_rpc_plugin |
| 8 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 9 | from . import utils |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 10 | from .test_run_class import TestRun |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 11 | from .result_classes import DataSource |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 12 | from .stage import Stage, StepOrder |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 13 | from .hlstorage import ResultStorage |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 14 | |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 15 | |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 16 | plugin_fname = sensors_rpc_plugin.__file__.rsplit(".", 1)[0] + ".py" |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 17 | SENSORS_PLUGIN_CODE = open(plugin_fname, "rb").read() # type: bytes |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 18 | |
| 19 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 20 | logger = logging.getLogger("wally") |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 21 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 22 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 23 | sensor_units = { |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 24 | "system-cpu.idle": "", |
| 25 | "system-cpu.nice": "", |
| 26 | "system-cpu.user": "", |
| 27 | "system-cpu.sys": "", |
| 28 | "system-cpu.iowait": "", |
| 29 | "system-cpu.irq": "", |
| 30 | "system-cpu.sirq": "", |
| 31 | "system-cpu.steal": "", |
| 32 | "system-cpu.guest": "", |
| 33 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 34 | "system-cpu.procs_blocked": "", |
| 35 | "system-cpu.procs_queue_x10": "", |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 36 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 37 | "net-io.recv_bytes": "B", |
| 38 | "net-io.recv_packets": "", |
| 39 | "net-io.send_bytes": "B", |
| 40 | "net-io.send_packets": "", |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 41 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 42 | "block-io.io_queue": "", |
| 43 | "block-io.io_time": "ms", |
| 44 | "block-io.reads_completed": "", |
| 45 | "block-io.rtime": "ms", |
| 46 | "block-io.sectors_read": "B", |
| 47 | "block-io.sectors_written": "B", |
| 48 | "block-io.writes_completed": "", |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 49 | "block-io.wtime": "ms", |
| 50 | "block-io.weighted_io_time": "ms" |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 51 | } |
| 52 | |
| 53 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 54 | # TODO(koder): in case if node has more than one role sensor settings might be incorrect |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 55 | class StartSensorsStage(Stage): |
| 56 | priority = StepOrder.START_SENSORS |
| 57 | config_block = 'sensors' |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 58 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 59 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 60 | if array.array('L').itemsize != 8: |
| 61 | message = "Python array.array('L') items should be 8 bytes in size, not {}." + \ |
| 62 | " Can't provide sensors on this platform. Disable sensors in config and retry" |
| 63 | logger.critical(message.format(array.array('L').itemsize)) |
| 64 | raise utils.StopTestError() |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 65 | |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 66 | # TODO: need carefully fix this |
| 67 | # sensors config is: |
| 68 | # role: |
| 69 | # sensor: [str] |
| 70 | # or |
| 71 | # role: |
| 72 | # sensor: |
| 73 | # allowed: [str] |
| 74 | # dissallowed: [str] |
| 75 | # params: Any |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 76 | per_role_config = {} # type: Dict[str, Dict[str, str]] |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 77 | |
| 78 | for name, val in ctx.config.sensors.roles_mapping.raw().items(): |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 79 | if isinstance(val, str): |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 80 | val = {vl.strip(): (".*" if vl.strip() != 'ceph' else {}) for vl in val.split(",")} |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 81 | elif isinstance(val, list): |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 82 | val = {vl: (".*" if vl != 'ceph' else {}) for vl in val} |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 83 | per_role_config[name] = val |
| 84 | |
| 85 | if 'all' in per_role_config: |
| 86 | all_vl = per_role_config.pop('all') |
| 87 | all_roles = set(per_role_config) |
| 88 | |
| 89 | for node in ctx.nodes: |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 90 | all_roles.update(node.info.roles) # type: ignore |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 91 | |
| 92 | for name, vals in list(per_role_config.items()): |
| 93 | new_vals = all_vl.copy() |
| 94 | new_vals.update(vals) |
| 95 | per_role_config[name] = new_vals |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 96 | |
| 97 | for node in ctx.nodes: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 98 | node_cfg = {} # type: Dict[str, Dict[str, str]] |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 99 | for role in node.info.roles: |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 100 | node_cfg.update(per_role_config.get(role, {})) # type: ignore |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 101 | |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 102 | nid = node.node_id |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 103 | if node_cfg: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 104 | # ceph requires additional settings |
| 105 | if 'ceph' in node_cfg: |
| 106 | node_cfg['ceph'].update(node.info.params['ceph']) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 107 | node_cfg['ceph']['osds'] = [osd['id'] for osd in node.info.params['ceph-osds']] # type: ignore |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 108 | |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 109 | logger.debug("Setting up sensors RPC plugin for node %s", nid) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 110 | node.upload_plugin("sensors", SENSORS_PLUGIN_CODE) |
| 111 | ctx.sensors_run_on.add(nid) |
| 112 | logger.debug("Start monitoring node %s", nid) |
| 113 | node.conn.sensors.start(node_cfg) |
| 114 | else: |
| 115 | logger.debug("Skip monitoring node %s, as no sensors selected", nid) |
| 116 | |
| 117 | |
| 118 | def collect_sensors_data(ctx: TestRun, stop: bool = False): |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 119 | rstorage = ResultStorage(ctx.storage) |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 120 | raw_skipped = False |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 121 | for node in ctx.nodes: |
koder aka kdanilov | 108ac36 | 2017-01-19 20:17:16 +0200 | [diff] [blame] | 122 | node_id = node.node_id |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 123 | if node_id in ctx.sensors_run_on: |
| 124 | |
| 125 | if stop: |
| 126 | func = node.conn.sensors.stop |
| 127 | else: |
| 128 | func = node.conn.sensors.get_updates |
| 129 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 130 | # TODO: units should came along with data |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 131 | # TODO: process raw sensors data |
| 132 | |
| 133 | for path, value, is_parsed in sensors_rpc_plugin.unpack_rpc_updates(func()): |
| 134 | if not is_parsed: |
| 135 | if not raw_skipped: |
| 136 | logger.warning("Raw sensors data at path %r and, maybe, others are skipped", path) |
| 137 | raw_skipped = True |
| 138 | continue |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 139 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 140 | if path == 'collected_at': |
| 141 | ds = DataSource(node_id=node_id, metric='collected_at') |
| 142 | units = 'us' |
| 143 | else: |
| 144 | sensor, dev, metric = path.split(".") |
| 145 | ds = DataSource(node_id=node_id, metric=metric, dev=dev, sensor=sensor) |
| 146 | units = sensor_units["{}.{}".format(sensor, metric)] |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 147 | |
koder aka kdanilov | a732a60 | 2017-02-01 20:29:56 +0200 | [diff] [blame] | 148 | rstorage.append_sensor(numpy.array(value), ds, units) |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 149 | |
| 150 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 151 | class CollectSensorsStage(Stage): |
| 152 | priority = StepOrder.COLLECT_SENSORS |
| 153 | config_block = 'sensors' |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 154 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 155 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 156 | collect_sensors_data(ctx, True) |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 157 | |
| 158 | |
| 159 | # def delta(func, only_upd=True): |
| 160 | # prev = {} |
| 161 | # while True: |
| 162 | # for dev_name, vals in func(): |
| 163 | # if dev_name not in prev: |
| 164 | # prev[dev_name] = {} |
| 165 | # for name, (val, _) in vals.items(): |
| 166 | # prev[dev_name][name] = val |
| 167 | # else: |
| 168 | # dev_prev = prev[dev_name] |
| 169 | # res = {} |
| 170 | # for stat_name, (val, accum_val) in vals.items(): |
| 171 | # if accum_val: |
| 172 | # if stat_name in dev_prev: |
| 173 | # delta = int(val) - int(dev_prev[stat_name]) |
| 174 | # if not only_upd or 0 != delta: |
| 175 | # res[stat_name] = str(delta) |
| 176 | # dev_prev[stat_name] = val |
| 177 | # elif not only_upd or '0' != val: |
| 178 | # res[stat_name] = val |
| 179 | # |
| 180 | # if only_upd and len(res) == 0: |
| 181 | # continue |
| 182 | # yield dev_name, res |
| 183 | # yield None, None |
| 184 | # |
| 185 | # |
| 186 | |
| 187 | |