koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 1 | import time |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 2 | import json |
koder aka kdanilov | e21d747 | 2015-02-14 19:02:04 -0800 | [diff] [blame] | 3 | import logging |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 4 | from concurrent.futures import Future |
| 5 | from typing import List, Dict, Tuple, Optional, Union, cast |
koder aka kdanilov | 88407ff | 2015-05-26 15:35:57 +0300 | [diff] [blame] | 6 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 7 | from . import utils, ssh_utils, hw_info |
| 8 | from .config import ConfigBlock |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 9 | from .node import setup_rpc, connect |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 10 | from .node_interfaces import NodeInfo, IRPCNode |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 11 | from .stage import Stage, StepOrder |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 12 | from .sensors import collect_sensors_data |
koder aka kdanilov | 3b4da8b | 2016-10-17 00:17:53 +0300 | [diff] [blame] | 13 | from .suits.io.fio import IOPerfTest |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 14 | from .suits.itest import TestInputConfig |
| 15 | from .suits.mysql import MysqlTest |
koder aka kdanilov | 3b4da8b | 2016-10-17 00:17:53 +0300 | [diff] [blame] | 16 | from .suits.omgbench import OmgTest |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 17 | from .suits.postgres import PgBenchTest |
| 18 | from .test_run_class import TestRun |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 19 | from .utils import StopTestError |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 20 | |
| 21 | |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 22 | TOOL_TYPE_MAPPER = { |
| 23 | "io": IOPerfTest, |
| 24 | "pgbench": PgBenchTest, |
| 25 | "mysql": MysqlTest, |
Yulia Portnova | b0c977c | 2015-12-11 19:23:28 +0200 | [diff] [blame] | 26 | "omg": OmgTest, |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 27 | } |
koder aka kdanilov | 63ad206 | 2015-04-27 13:11:40 +0300 | [diff] [blame] | 28 | |
koder aka kdanilov | 57ce4db | 2015-04-25 21:25:51 +0300 | [diff] [blame] | 29 | |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 30 | logger = logging.getLogger("wally") |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 31 | |
koder aka kdanilov | 7acd6bd | 2015-02-12 14:28:30 -0800 | [diff] [blame] | 32 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 33 | class ConnectStage(Stage): |
| 34 | """Connect to nodes stage""" |
koder aka kdanilov | e21d747 | 2015-02-14 19:02:04 -0800 | [diff] [blame] | 35 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 36 | priority = StepOrder.CONNECT |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 37 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 38 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 39 | with ctx.get_pool() as pool: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 40 | logger.info("Connecting to %s nodes", len(ctx.nodes_info)) |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 41 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 42 | def connect_ext(node_info: NodeInfo) -> Tuple[bool, Union[IRPCNode, NodeInfo]]: |
| 43 | try: |
| 44 | ssh_node = connect(node_info, conn_timeout=ctx.config.connect_timeout) |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 45 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 46 | return True, setup_rpc(ssh_node, |
| 47 | ctx.rpc_code, |
| 48 | ctx.default_rpc_plugins, |
| 49 | log_level=ctx.config.rpc_log_level) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 50 | except Exception as exc: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 51 | logger.exception("During connect to %s: %s", node_info, exc) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 52 | return False, node_info |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 53 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 54 | failed_testnodes = [] # type: List[NodeInfo] |
| 55 | failed_nodes = [] # type: List[NodeInfo] |
| 56 | ctx.nodes = [] |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 57 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 58 | for ok, node in pool.map(connect_ext, ctx.nodes_info.values()): |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 59 | if not ok: |
| 60 | node = cast(NodeInfo, node) |
| 61 | if 'testnode' in node.roles: |
| 62 | failed_testnodes.append(node) |
| 63 | else: |
| 64 | failed_nodes.append(node) |
| 65 | else: |
| 66 | ctx.nodes.append(cast(IRPCNode, node)) |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 67 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 68 | if failed_nodes: |
| 69 | msg = "Node(s) {} would be excluded - can't connect" |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 70 | logger.warning(msg.format(", ".join(map(str, failed_nodes)))) |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 71 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 72 | if failed_testnodes: |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 73 | msg = "Can't start RPC on testnode(s) " + ",".join(map(str, failed_testnodes)) |
koder aka kdanilov | c368eb6 | 2015-04-28 18:22:01 +0300 | [diff] [blame] | 74 | logger.error(msg) |
| 75 | raise utils.StopTestError(msg) |
| 76 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 77 | if not failed_nodes: |
| 78 | logger.info("All nodes connected successfully") |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 79 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 80 | def cleanup(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 81 | if ctx.config.get("download_rpc_logs", False): |
| 82 | for node in ctx.nodes: |
| 83 | if node.rpc_log_file is not None: |
| 84 | nid = node.info.node_id() |
| 85 | path = "rpc_logs/" + nid |
| 86 | node.conn.server.flush_logs() |
| 87 | log = node.get_file_content(node.rpc_log_file) |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 88 | if path in ctx.storage: |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame^] | 89 | ctx.storage.append_raw(log, path) |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 90 | else: |
koder aka kdanilov | ffaf48d | 2016-12-27 02:25:29 +0200 | [diff] [blame^] | 91 | ctx.storage.put_raw(log, path) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 92 | logger.debug("RPC log from node {} stored into storage::{}".format(nid, path)) |
| 93 | |
| 94 | with ctx.get_pool() as pool: |
| 95 | list(pool.map(lambda node: node.disconnect(stop=True), ctx.nodes)) |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 96 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 97 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 98 | class CollectInfoStage(Stage): |
| 99 | """Collect node info""" |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 100 | |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 101 | priority = StepOrder.START_SENSORS - 2 |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 102 | config_block = 'collect_info' |
| 103 | |
| 104 | def run(self, ctx: TestRun) -> None: |
| 105 | if not ctx.config.collect_info: |
| 106 | return |
| 107 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 108 | futures = {} # type: Dict[Tuple[str, str], Future] |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 109 | |
| 110 | with ctx.get_pool() as pool: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 111 | # can't make next RPC request until finish with previous |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 112 | for node in ctx.nodes: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 113 | nid = node.info.node_id() |
| 114 | hw_info_path = "hw_info/{}".format(nid) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 115 | if hw_info_path not in ctx.storage: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 116 | futures[(hw_info_path, nid)] = pool.submit(hw_info.get_hw_info, node) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 117 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 118 | for (path, nid), future in futures.items(): |
| 119 | try: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 120 | ctx.storage.put(future.result(), path) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 121 | except Exception: |
| 122 | logger.exception("During collecting hardware info from %s", nid) |
| 123 | raise utils.StopTestError() |
| 124 | |
| 125 | futures.clear() |
| 126 | for node in ctx.nodes: |
| 127 | nid = node.info.node_id() |
| 128 | sw_info_path = "sw_info/{}".format(nid) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 129 | if sw_info_path not in ctx.storage: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 130 | futures[(sw_info_path, nid)] = pool.submit(hw_info.get_sw_info, node) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 131 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 132 | for (path, nid), future in futures.items(): |
| 133 | try: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 134 | ctx.storage.put(future.result(), path) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 135 | except Exception: |
| 136 | logger.exception("During collecting software info from %s", nid) |
| 137 | raise utils.StopTestError() |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 138 | |
| 139 | |
| 140 | class ExplicitNodesStage(Stage): |
| 141 | """add explicit nodes""" |
| 142 | |
| 143 | priority = StepOrder.DISCOVER |
| 144 | config_block = 'nodes' |
| 145 | |
| 146 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 147 | if 'all_nodes' in ctx.storage: |
| 148 | logger.info("Skip explicid nodes filling, as all_nodes all ready in storage") |
| 149 | return |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 150 | |
koder aka kdanilov | bbbe1dc | 2016-12-20 01:19:56 +0200 | [diff] [blame] | 151 | for url, roles in ctx.config.get('nodes', {}).raw().items(): |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 152 | ctx.merge_node(ssh_utils.parse_ssh_uri(url), set(roles.split(","))) |
koder aka kdanilov | bbbe1dc | 2016-12-20 01:19:56 +0200 | [diff] [blame] | 153 | logger.debug("Add node %s with roles %s", url, roles) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 154 | |
| 155 | |
| 156 | class SaveNodesStage(Stage): |
| 157 | """Save nodes list to file""" |
| 158 | |
| 159 | priority = StepOrder.CONNECT |
| 160 | |
| 161 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 162 | ctx.storage.put_list(ctx.nodes_info.values(), 'all_nodes') |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 163 | |
| 164 | |
| 165 | class SleepStage(Stage): |
| 166 | """Save nodes list to file""" |
| 167 | |
| 168 | priority = StepOrder.TEST |
| 169 | config_block = 'sleep' |
| 170 | |
| 171 | def run(self, ctx: TestRun) -> None: |
| 172 | logger.debug("Will sleep for %r seconds", ctx.config.sleep) |
| 173 | time.sleep(ctx.config.sleep) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 174 | |
| 175 | |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame] | 176 | class PrepareNodes(Stage): |
| 177 | priority = StepOrder.START_SENSORS - 1 |
| 178 | |
| 179 | def __init__(self): |
| 180 | Stage.__init__(self) |
| 181 | self.nodeepscrub_updated = False |
| 182 | self.noscrub_updated = False |
| 183 | |
| 184 | def run(self, ctx: TestRun) -> None: |
| 185 | ceph_sett = ctx.config.get('ceph_settings', "").split() |
| 186 | if ceph_sett: |
| 187 | for node in ctx.nodes: |
| 188 | if "ceph-mon" in node.info.roles or "ceph-osd" in node.info.roles: |
| 189 | state = json.loads(node.run("ceph health --format json"))["summary"]["summary"] |
| 190 | if 'noscrub' in ceph_sett: |
| 191 | if 'noscrub' in state: |
| 192 | logger.debug("noscrub already set on cluster") |
| 193 | else: |
| 194 | logger.info("Applying noscrub settings to ceph cluster") |
| 195 | node.run("ceph osd set noscrub") |
| 196 | self.noscrub_updated = True |
| 197 | |
| 198 | if 'nodeepscrub' in ceph_sett: |
| 199 | if 'nodeepscrub' in state: |
| 200 | logger.debug("noscrub already set on cluster") |
| 201 | else: |
| 202 | logger.info("Applying noscrub settings to ceph cluster") |
| 203 | node.run("ceph osd set noscrub") |
| 204 | self.nodeepscrub_updated = True |
| 205 | break |
| 206 | |
| 207 | def cleanup(self, ctx: TestRun) -> None: |
| 208 | if self.nodeepscrub_updated or self.noscrub_updated: |
| 209 | for node in ctx.nodes: |
| 210 | if "ceph-mon" in node.info.roles or "ceph-osd" in node.info.roles : |
| 211 | if self.noscrub_updated: |
| 212 | logger.info("Reverting noscrub setting for ceph cluster") |
| 213 | node.run("ceph osd unset noscrub") |
| 214 | self.noscrub_updated = False |
| 215 | |
| 216 | if self.nodeepscrub_updated: |
| 217 | logger.info("Reverting noscrub setting for ceph cluster") |
| 218 | node.run("ceph osd unset nodeepscrub") |
| 219 | self.nodeepscrub_updated = False |
| 220 | |
| 221 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 222 | class RunTestsStage(Stage): |
| 223 | |
| 224 | priority = StepOrder.TEST |
| 225 | config_block = 'tests' |
| 226 | |
| 227 | def run(self, ctx: TestRun) -> None: |
| 228 | for test_group in ctx.config.get('tests', []): |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 229 | if not ctx.config.no_tests: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 230 | test_nodes = [node for node in ctx.nodes if 'testnode' in node.info.roles] |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 231 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 232 | if not test_nodes: |
| 233 | logger.error("No test nodes found") |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 234 | raise StopTestError() |
koder aka kdanilov | da45e88 | 2015-04-06 02:24:42 +0300 | [diff] [blame] | 235 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 236 | for name, params in test_group.items(): |
| 237 | vm_count = params.get('node_limit', None) # type: Optional[int] |
gstepanov | 023c1e4 | 2015-04-08 15:50:19 +0300 | [diff] [blame] | 238 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 239 | # select test nodes |
| 240 | if vm_count is None: |
| 241 | curr_test_nodes = test_nodes |
| 242 | else: |
| 243 | curr_test_nodes = test_nodes[:vm_count] |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 244 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 245 | if not curr_test_nodes: |
| 246 | logger.error("No nodes found for test, skipping it.") |
| 247 | continue |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 248 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 249 | test_cls = TOOL_TYPE_MAPPER[name] |
| 250 | remote_dir = ctx.config.default_test_local_folder.format(name=name, uuid=ctx.config.run_uuid) |
| 251 | test_cfg = TestInputConfig(test_cls.__name__, |
| 252 | params=params, |
| 253 | run_uuid=ctx.config.run_uuid, |
| 254 | nodes=test_nodes, |
| 255 | storage=ctx.storage, |
| 256 | remote_dir=remote_dir) |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 257 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 258 | test_cls(test_cfg, |
| 259 | on_idle=lambda: collect_sensors_data(ctx, False)).run() |
gstepanov | 023c1e4 | 2015-04-08 15:50:19 +0300 | [diff] [blame] | 260 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 261 | @classmethod |
| 262 | def validate_config(cls, cfg: ConfigBlock) -> None: |
| 263 | pass |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 264 | |
| 265 | |
koder aka kdanilov | 7f59d56 | 2016-12-26 01:34:23 +0200 | [diff] [blame] | 266 | class LoadStoredNodesStage(Stage): |
| 267 | priority = StepOrder.DISCOVER |
| 268 | |
| 269 | def run(self, ctx: TestRun) -> None: |
| 270 | if 'all_nodes' in ctx.storage: |
| 271 | if ctx.nodes_info: |
| 272 | logger.error("Internal error: Some nodes already stored in " + |
| 273 | "nodes_info before LoadStoredNodesStage stage") |
| 274 | raise StopTestError() |
| 275 | ctx.nodes_info = {node.node_id(): node |
| 276 | for node in ctx.storage.load_list(NodeInfo, "all_nodes")} |
| 277 | logger.info("%s nodes loaded from database", len(ctx.nodes_info)) |