blob: 36d6b1534c87490398b06ed9a7821e94ec5c75b2 [file] [log] [blame]
koder aka kdanilov39e449e2016-12-17 15:15:26 +02001""" Collect data about ceph nodes"""
2import json
3import logging
koder aka kdanilov962ee5f2016-12-19 02:40:08 +02004from typing import Dict, cast, List, Set, Optional
koder aka kdanilov39e449e2016-12-17 15:15:26 +02005
6
7from .node_interfaces import NodeInfo, IRPCNode
8from .ssh_utils import ConnCreds
9from .common_types import IP
10from .stage import Stage, StepOrder
11from .test_run_class import TestRun
12from .ssh_utils import parse_ssh_uri
13from .node import connect, setup_rpc
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020014from .utils import StopTestError, to_ip
koder aka kdanilov39e449e2016-12-17 15:15:26 +020015
16
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030017from cephlib import discover
18from cephlib.discover import OSDInfo
19
20
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020021logger = logging.getLogger("wally")
koder aka kdanilov39e449e2016-12-17 15:15:26 +020022
23
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030024def get_osds_info(node: IRPCNode, ceph_extra_args: str = "") -> Dict[IP, List[OSDInfo]]:
koder aka kdanilov39e449e2016-12-17 15:15:26 +020025 """Get set of osd's ip"""
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030026 res = {} # type: Dict[IP, List[OSDInfo]]
27 return {IP(ip): osd_info_list
28 for ip, osd_info_list in discover.get_osds_nodes(node.run, ceph_extra_args)}
koder aka kdanilov39e449e2016-12-17 15:15:26 +020029
30
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030031def get_mons_ips(node: IRPCNode, ceph_extra_args: str = "") -> Set[IP]:
koder aka kdanilov39e449e2016-12-17 15:15:26 +020032 """Return mon ip set"""
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030033 return {IP(ip) for ip in discover.get_mons_nodes(node.run, ceph_extra_args).values()}
koder aka kdanilov39e449e2016-12-17 15:15:26 +020034
35
36class DiscoverCephStage(Stage):
37 config_block = 'ceph'
38 priority = StepOrder.DISCOVER
39
40 def run(self, ctx: TestRun) -> None:
41 """Return list of ceph's nodes NodeInfo"""
42
kdanylov aka koder150b2192017-04-01 16:53:01 +030043 if 'ceph' not in ctx.config.discovery:
44 logger.debug("Skip ceph discovery due to config setting")
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020045 return
koder aka kdanilov39e449e2016-12-17 15:15:26 +020046
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020047 if 'all_nodes' in ctx.storage:
48 logger.debug("Skip ceph discovery, use previously discovered nodes")
49 return
koder aka kdanilov39e449e2016-12-17 15:15:26 +020050
kdanylov aka koder150b2192017-04-01 16:53:01 +030051 if 'metadata' in ctx.config.discovery:
52 logger.exception("Ceph metadata discovery is not implemented")
53 raise StopTestError()
54
55 ignore_errors = 'ignore_errors' in ctx.config.discovery
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020056 ceph = ctx.config.ceph
57 root_node_uri = cast(str, ceph.root_node)
58 cluster = ceph.get("cluster", "ceph")
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030059
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020060 conf = ceph.get("conf")
61 key = ceph.get("key")
koder aka kdanilov39e449e2016-12-17 15:15:26 +020062
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020063 if conf is None:
64 conf = "/etc/ceph/{}.conf".format(cluster)
koder aka kdanilov39e449e2016-12-17 15:15:26 +020065
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020066 if key is None:
67 key = "/etc/ceph/{}.client.admin.keyring".format(cluster)
koder aka kdanilov39e449e2016-12-17 15:15:26 +020068
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030069 ceph_extra_args = ""
70
71 if conf:
72 ceph_extra_args += " -c '{}'".format(conf)
73
74 if key:
75 ceph_extra_args += " -k '{}'".format(key)
76
77 logger.debug("Start discovering ceph nodes from root %s", root_node_uri)
78 logger.debug("cluster=%s key=%s conf=%s", cluster, conf, key)
79
80 info = NodeInfo(parse_ssh_uri(root_node_uri), set())
81
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020082 ceph_params = {"cluster": cluster, "conf": conf, "key": key}
83
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030084 with setup_rpc(connect(info), ctx.rpc_code, ctx.default_rpc_plugins,
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020085 log_level=ctx.config.rpc_log_level) as node:
86
87 ssh_key = node.get_file_content("~/.ssh/id_rsa")
88
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020089 try:
90 ips = set()
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +030091 for ip, osds_info in get_osds_info(node, ceph_extra_args).items():
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020092 ips.add(ip)
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +020093 creds = ConnCreds(to_ip(cast(str, ip)), user="root", key=ssh_key)
koder aka kdanilov962ee5f2016-12-19 02:40:08 +020094 info = ctx.merge_node(creds, {'ceph-osd'})
95 info.params.setdefault('ceph-osds', []).extend(osds_info)
96 assert 'ceph' not in info.params or info.params['ceph'] == ceph_params
97 info.params['ceph'] = ceph_params
98
99 logger.debug("Found %s nodes with ceph-osd role", len(ips))
100 except Exception as exc:
kdanylov aka koder150b2192017-04-01 16:53:01 +0300101 if not ignore_errors:
koder aka kdanilov962ee5f2016-12-19 02:40:08 +0200102 logger.exception("OSD discovery failed")
103 raise StopTestError()
104 else:
105 logger.warning("OSD discovery failed %s", exc)
106
107 try:
108 counter = 0
kdanylov aka koder0e0cfcb2017-03-27 22:19:09 +0300109 for counter, ip in enumerate(get_mons_ips(node, ceph_extra_args)):
koder aka kdanilovbbbe1dc2016-12-20 01:19:56 +0200110 creds = ConnCreds(to_ip(cast(str, ip)), user="root", key=ssh_key)
koder aka kdanilov962ee5f2016-12-19 02:40:08 +0200111 info = ctx.merge_node(creds, {'ceph-mon'})
112 assert 'ceph' not in info.params or info.params['ceph'] == ceph_params
113 info.params['ceph'] = ceph_params
114 logger.debug("Found %s nodes with ceph-mon role", counter + 1)
115 except Exception as exc:
kdanylov aka koder150b2192017-04-01 16:53:01 +0300116 if not ignore_errors:
koder aka kdanilov962ee5f2016-12-19 02:40:08 +0200117 logger.exception("MON discovery failed")
118 raise StopTestError()
119 else:
120 logger.warning("MON discovery failed %s", exc)