koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 1 | import time |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 2 | import json |
koder aka kdanilov | e21d747 | 2015-02-14 19:02:04 -0800 | [diff] [blame] | 3 | import logging |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 4 | from concurrent.futures import Future |
| 5 | from typing import List, Dict, Tuple, Optional, Union, cast |
koder aka kdanilov | 88407ff | 2015-05-26 15:35:57 +0300 | [diff] [blame] | 6 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 7 | from . import utils, ssh_utils, hw_info |
| 8 | from .config import ConfigBlock |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 9 | from .node import setup_rpc, connect |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 10 | from .node_interfaces import NodeInfo, IRPCNode, ISSHHost |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 11 | from .stage import Stage, StepOrder |
koder aka kdanilov | 3b4da8b | 2016-10-17 00:17:53 +0300 | [diff] [blame] | 12 | from .suits.io.fio import IOPerfTest |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 13 | from .suits.itest import TestInputConfig |
| 14 | from .suits.mysql import MysqlTest |
koder aka kdanilov | 3b4da8b | 2016-10-17 00:17:53 +0300 | [diff] [blame] | 15 | from .suits.omgbench import OmgTest |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 16 | from .suits.postgres import PgBenchTest |
| 17 | from .test_run_class import TestRun |
koder aka kdanilov | bc2c898 | 2015-06-13 02:50:43 +0300 | [diff] [blame] | 18 | |
| 19 | |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 20 | TOOL_TYPE_MAPPER = { |
| 21 | "io": IOPerfTest, |
| 22 | "pgbench": PgBenchTest, |
| 23 | "mysql": MysqlTest, |
Yulia Portnova | b0c977c | 2015-12-11 19:23:28 +0200 | [diff] [blame] | 24 | "omg": OmgTest, |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 25 | } |
koder aka kdanilov | 63ad206 | 2015-04-27 13:11:40 +0300 | [diff] [blame] | 26 | |
koder aka kdanilov | 57ce4db | 2015-04-25 21:25:51 +0300 | [diff] [blame] | 27 | |
koder aka kdanilov | cff7b2e | 2015-04-18 20:48:15 +0300 | [diff] [blame] | 28 | logger = logging.getLogger("wally") |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 29 | |
koder aka kdanilov | 7acd6bd | 2015-02-12 14:28:30 -0800 | [diff] [blame] | 30 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 31 | class ConnectStage(Stage): |
| 32 | """Connect to nodes stage""" |
koder aka kdanilov | e21d747 | 2015-02-14 19:02:04 -0800 | [diff] [blame] | 33 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 34 | priority = StepOrder.CONNECT |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 35 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 36 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 7308462 | 2016-11-16 21:51:08 +0200 | [diff] [blame] | 37 | with ctx.get_pool() as pool: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 38 | logger.info("Connecting to %s nodes", len(ctx.nodes_info)) |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 39 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 40 | def connect_ext(node_info: NodeInfo) -> Tuple[bool, Union[IRPCNode, NodeInfo]]: |
| 41 | try: |
| 42 | ssh_node = connect(node_info, conn_timeout=ctx.config.connect_timeout) |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 43 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 44 | return True, setup_rpc(ssh_node, |
| 45 | ctx.rpc_code, |
| 46 | ctx.default_rpc_plugins, |
| 47 | log_level=ctx.config.rpc_log_level) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 48 | except Exception as exc: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 49 | logger.exception("During connect to %s: %s", node_info, exc) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 50 | return False, node_info |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 51 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 52 | failed_testnodes = [] # type: List[NodeInfo] |
| 53 | failed_nodes = [] # type: List[NodeInfo] |
| 54 | ctx.nodes = [] |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 55 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 56 | for ok, node in pool.map(connect_ext, ctx.nodes_info.values()): |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 57 | if not ok: |
| 58 | node = cast(NodeInfo, node) |
| 59 | if 'testnode' in node.roles: |
| 60 | failed_testnodes.append(node) |
| 61 | else: |
| 62 | failed_nodes.append(node) |
| 63 | else: |
| 64 | ctx.nodes.append(cast(IRPCNode, node)) |
koder aka kdanilov | 22d134e | 2016-11-08 11:33:19 +0200 | [diff] [blame] | 65 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 66 | if failed_nodes: |
| 67 | msg = "Node(s) {} would be excluded - can't connect" |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 68 | logger.warning(msg.format(", ".join(map(str, failed_nodes)))) |
koder aka kdanilov | 4af1c1d | 2015-05-18 15:48:58 +0300 | [diff] [blame] | 69 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 70 | if failed_testnodes: |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 71 | msg = "Can't start RPC on testnode(s) " + ",".join(map(str, failed_testnodes)) |
koder aka kdanilov | c368eb6 | 2015-04-28 18:22:01 +0300 | [diff] [blame] | 72 | logger.error(msg) |
| 73 | raise utils.StopTestError(msg) |
| 74 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 75 | if not failed_nodes: |
| 76 | logger.info("All nodes connected successfully") |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 77 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 78 | def cleanup(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 79 | if ctx.config.get("download_rpc_logs", False): |
| 80 | for node in ctx.nodes: |
| 81 | if node.rpc_log_file is not None: |
| 82 | nid = node.info.node_id() |
| 83 | path = "rpc_logs/" + nid |
| 84 | node.conn.server.flush_logs() |
| 85 | log = node.get_file_content(node.rpc_log_file) |
koder aka kdanilov | 3af3c33 | 2016-12-19 17:12:34 +0200 | [diff] [blame] | 86 | ctx.storage.store_raw(log, path) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 87 | logger.debug("RPC log from node {} stored into storage::{}".format(nid, path)) |
| 88 | |
| 89 | with ctx.get_pool() as pool: |
| 90 | list(pool.map(lambda node: node.disconnect(stop=True), ctx.nodes)) |
koder aka kdanilov | cee4334 | 2015-04-14 22:52:53 +0300 | [diff] [blame] | 91 | |
koder aka kdanilov | 0fdaaee | 2015-06-30 11:10:48 +0300 | [diff] [blame] | 92 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 93 | class CollectInfoStage(Stage): |
| 94 | """Collect node info""" |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 95 | |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 96 | priority = StepOrder.START_SENSORS - 2 |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 97 | config_block = 'collect_info' |
| 98 | |
| 99 | def run(self, ctx: TestRun) -> None: |
| 100 | if not ctx.config.collect_info: |
| 101 | return |
| 102 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 103 | futures = {} # type: Dict[Tuple[str, str], Future] |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 104 | |
| 105 | with ctx.get_pool() as pool: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 106 | # can't make next RPC request until finish with previous |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 107 | for node in ctx.nodes: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 108 | nid = node.info.node_id() |
| 109 | hw_info_path = "hw_info/{}".format(nid) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 110 | if hw_info_path not in ctx.storage: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 111 | futures[(hw_info_path, nid)] = pool.submit(hw_info.get_hw_info, node) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 112 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 113 | for (path, nid), future in futures.items(): |
| 114 | try: |
| 115 | ctx.storage[path] = future.result() |
| 116 | except Exception: |
| 117 | logger.exception("During collecting hardware info from %s", nid) |
| 118 | raise utils.StopTestError() |
| 119 | |
| 120 | futures.clear() |
| 121 | for node in ctx.nodes: |
| 122 | nid = node.info.node_id() |
| 123 | sw_info_path = "sw_info/{}".format(nid) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 124 | if sw_info_path not in ctx.storage: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 125 | futures[(sw_info_path, nid)] = pool.submit(hw_info.get_sw_info, node) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 126 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 127 | for (path, nid), future in futures.items(): |
| 128 | try: |
| 129 | ctx.storage[path] = future.result() |
| 130 | except Exception: |
| 131 | logger.exception("During collecting software info from %s", nid) |
| 132 | raise utils.StopTestError() |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 133 | |
| 134 | |
| 135 | class ExplicitNodesStage(Stage): |
| 136 | """add explicit nodes""" |
| 137 | |
| 138 | priority = StepOrder.DISCOVER |
| 139 | config_block = 'nodes' |
| 140 | |
| 141 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 142 | if 'all_nodes' in ctx.storage: |
| 143 | logger.info("Skip explicid nodes filling, as all_nodes all ready in storage") |
| 144 | return |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 145 | |
koder aka kdanilov | bbbe1dc | 2016-12-20 01:19:56 +0200 | [diff] [blame] | 146 | for url, roles in ctx.config.get('nodes', {}).raw().items(): |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 147 | ctx.merge_node(ssh_utils.parse_ssh_uri(url), set(roles.split(","))) |
koder aka kdanilov | bbbe1dc | 2016-12-20 01:19:56 +0200 | [diff] [blame] | 148 | logger.debug("Add node %s with roles %s", url, roles) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 149 | |
| 150 | |
| 151 | class SaveNodesStage(Stage): |
| 152 | """Save nodes list to file""" |
| 153 | |
| 154 | priority = StepOrder.CONNECT |
| 155 | |
| 156 | def run(self, ctx: TestRun) -> None: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 157 | ctx.storage['all_nodes'] = list(ctx.nodes_info.values()) # type: ignore |
| 158 | |
| 159 | |
| 160 | class SleepStage(Stage): |
| 161 | """Save nodes list to file""" |
| 162 | |
| 163 | priority = StepOrder.TEST |
| 164 | config_block = 'sleep' |
| 165 | |
| 166 | def run(self, ctx: TestRun) -> None: |
| 167 | logger.debug("Will sleep for %r seconds", ctx.config.sleep) |
| 168 | time.sleep(ctx.config.sleep) |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 169 | |
| 170 | |
koder aka kdanilov | 23e6bdf | 2016-12-24 02:18:54 +0200 | [diff] [blame^] | 171 | class PrepareNodes(Stage): |
| 172 | priority = StepOrder.START_SENSORS - 1 |
| 173 | |
| 174 | def __init__(self): |
| 175 | Stage.__init__(self) |
| 176 | self.nodeepscrub_updated = False |
| 177 | self.noscrub_updated = False |
| 178 | |
| 179 | def run(self, ctx: TestRun) -> None: |
| 180 | ceph_sett = ctx.config.get('ceph_settings', "").split() |
| 181 | if ceph_sett: |
| 182 | for node in ctx.nodes: |
| 183 | if "ceph-mon" in node.info.roles or "ceph-osd" in node.info.roles: |
| 184 | state = json.loads(node.run("ceph health --format json"))["summary"]["summary"] |
| 185 | if 'noscrub' in ceph_sett: |
| 186 | if 'noscrub' in state: |
| 187 | logger.debug("noscrub already set on cluster") |
| 188 | else: |
| 189 | logger.info("Applying noscrub settings to ceph cluster") |
| 190 | node.run("ceph osd set noscrub") |
| 191 | self.noscrub_updated = True |
| 192 | |
| 193 | if 'nodeepscrub' in ceph_sett: |
| 194 | if 'nodeepscrub' in state: |
| 195 | logger.debug("noscrub already set on cluster") |
| 196 | else: |
| 197 | logger.info("Applying noscrub settings to ceph cluster") |
| 198 | node.run("ceph osd set noscrub") |
| 199 | self.nodeepscrub_updated = True |
| 200 | break |
| 201 | |
| 202 | def cleanup(self, ctx: TestRun) -> None: |
| 203 | if self.nodeepscrub_updated or self.noscrub_updated: |
| 204 | for node in ctx.nodes: |
| 205 | if "ceph-mon" in node.info.roles or "ceph-osd" in node.info.roles : |
| 206 | if self.noscrub_updated: |
| 207 | logger.info("Reverting noscrub setting for ceph cluster") |
| 208 | node.run("ceph osd unset noscrub") |
| 209 | self.noscrub_updated = False |
| 210 | |
| 211 | if self.nodeepscrub_updated: |
| 212 | logger.info("Reverting noscrub setting for ceph cluster") |
| 213 | node.run("ceph osd unset nodeepscrub") |
| 214 | self.nodeepscrub_updated = False |
| 215 | |
| 216 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 217 | class RunTestsStage(Stage): |
| 218 | |
| 219 | priority = StepOrder.TEST |
| 220 | config_block = 'tests' |
| 221 | |
| 222 | def run(self, ctx: TestRun) -> None: |
| 223 | for test_group in ctx.config.get('tests', []): |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 224 | if not ctx.config.no_tests: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 225 | test_nodes = [node for node in ctx.nodes if 'testnode' in node.info.roles] |
koder aka kdanilov | 3d2bc4f | 2016-11-12 18:31:18 +0200 | [diff] [blame] | 226 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 227 | if not test_nodes: |
| 228 | logger.error("No test nodes found") |
| 229 | return |
koder aka kdanilov | da45e88 | 2015-04-06 02:24:42 +0300 | [diff] [blame] | 230 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 231 | for name, params in test_group.items(): |
| 232 | vm_count = params.get('node_limit', None) # type: Optional[int] |
gstepanov | 023c1e4 | 2015-04-08 15:50:19 +0300 | [diff] [blame] | 233 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 234 | # select test nodes |
| 235 | if vm_count is None: |
| 236 | curr_test_nodes = test_nodes |
| 237 | else: |
| 238 | curr_test_nodes = test_nodes[:vm_count] |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 239 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 240 | if not curr_test_nodes: |
| 241 | logger.error("No nodes found for test, skipping it.") |
| 242 | continue |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 243 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 244 | test_cls = TOOL_TYPE_MAPPER[name] |
| 245 | remote_dir = ctx.config.default_test_local_folder.format(name=name, uuid=ctx.config.run_uuid) |
| 246 | test_cfg = TestInputConfig(test_cls.__name__, |
| 247 | params=params, |
| 248 | run_uuid=ctx.config.run_uuid, |
| 249 | nodes=test_nodes, |
| 250 | storage=ctx.storage, |
| 251 | remote_dir=remote_dir) |
koder aka kdanilov | 7022706 | 2016-11-26 23:23:21 +0200 | [diff] [blame] | 252 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 253 | test_cls(test_cfg).run() |
gstepanov | 023c1e4 | 2015-04-08 15:50:19 +0300 | [diff] [blame] | 254 | |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 255 | @classmethod |
| 256 | def validate_config(cls, cfg: ConfigBlock) -> None: |
| 257 | pass |