koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 1 | """ Collect data about ceph nodes""" |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 2 | import logging |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 3 | from typing import Dict, cast, List, Set |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 4 | |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 5 | from cephlib import discover |
| 6 | from cephlib.discover import OSDInfo |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 7 | from cephlib.common import to_ip |
| 8 | from cephlib.node import NodeInfo, IRPCNode |
| 9 | from cephlib.ssh import ConnCreds, IP, parse_ssh_uri |
| 10 | from cephlib.node_impl import connect, setup_rpc |
| 11 | |
| 12 | from .stage import Stage, StepOrder |
| 13 | from .test_run_class import TestRun |
| 14 | from .utils import StopTestError |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 15 | |
| 16 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 17 | logger = logging.getLogger("wally") |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 18 | |
| 19 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 20 | def get_osds_info(node: IRPCNode, ceph_extra_args: str = "", thcount: int = 8) -> Dict[IP, List[OSDInfo]]: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 21 | """Get set of osd's ip""" |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 22 | res: Dict[IP, List[OSDInfo]] = {} |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 23 | return {IP(ip): osd_info_list |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 24 | for ip, osd_info_list in discover.get_osds_nodes(node.run, ceph_extra_args, thcount=thcount).items()} |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 25 | |
| 26 | |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 27 | def get_mons_ips(node: IRPCNode, ceph_extra_args: str = "") -> Set[IP]: |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 28 | """Return mon ip set""" |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 29 | return {IP(ip) for ip, _ in discover.get_mons_nodes(node.run, ceph_extra_args).values()} |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 30 | |
| 31 | |
| 32 | class DiscoverCephStage(Stage): |
| 33 | config_block = 'ceph' |
| 34 | priority = StepOrder.DISCOVER |
| 35 | |
| 36 | def run(self, ctx: TestRun) -> None: |
| 37 | """Return list of ceph's nodes NodeInfo""" |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 38 | if 'all_nodes' in ctx.storage: |
| 39 | logger.debug("Skip ceph discovery, use previously discovered nodes") |
| 40 | return |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 41 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 42 | if 'metadata' in ctx.config.discover: |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 43 | logger.exception("Ceph metadata discovery is not implemented") |
| 44 | raise StopTestError() |
| 45 | |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 46 | ignore_errors = 'ignore_errors' in ctx.config.discover |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 47 | ceph = ctx.config.ceph |
| 48 | root_node_uri = cast(str, ceph.root_node) |
| 49 | cluster = ceph.get("cluster", "ceph") |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 50 | ip_remap = ctx.config.ceph.get('ip_remap', {}) |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 51 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 52 | conf = ceph.get("conf") |
| 53 | key = ceph.get("key") |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 54 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 55 | if conf is None: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 56 | conf = f"/etc/ceph/{cluster}.conf" |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 57 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 58 | if key is None: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 59 | key = f"/etc/ceph/{cluster}.client.admin.keyring" |
koder aka kdanilov | 39e449e | 2016-12-17 15:15:26 +0200 | [diff] [blame] | 60 | |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 61 | ceph_extra_args = "" |
| 62 | |
| 63 | if conf: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 64 | ceph_extra_args += f" -c '{conf}'" |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 65 | |
| 66 | if key: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 67 | ceph_extra_args += f" -k '{key}'" |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 68 | |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 69 | logger.debug(f"Start discovering ceph nodes from root {root_node_uri}") |
| 70 | logger.debug(f"cluster={cluster} key={conf} conf={key}") |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 71 | |
| 72 | info = NodeInfo(parse_ssh_uri(root_node_uri), set()) |
| 73 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 74 | ceph_params = {"cluster": cluster, "conf": conf, "key": key} |
| 75 | |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 76 | with setup_rpc(connect(info), ctx.rpc_code, ctx.default_rpc_plugins, |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 77 | log_level=ctx.config.rpc_log_level) as node: |
| 78 | |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 79 | try: |
| 80 | ips = set() |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 81 | for ip, osds_info in get_osds_info(node, ceph_extra_args, thcount=16).items(): |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 82 | ip = ip_remap.get(ip, ip) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 83 | ips.add(ip) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 84 | creds = ConnCreds(to_ip(cast(str, ip)), user="root") |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 85 | info = ctx.merge_node(creds, {'ceph-osd'}) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 86 | info.params.setdefault('ceph-osds', []).extend(info.__dict__.copy() for info in osds_info) |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 87 | assert 'ceph' not in info.params or info.params['ceph'] == ceph_params |
| 88 | info.params['ceph'] = ceph_params |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 89 | logger.debug(f"Found {len(ips)} nodes with ceph-osd role") |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 90 | except Exception as exc: |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 91 | if not ignore_errors: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 92 | logger.exception("OSD discovery failed") |
| 93 | raise StopTestError() |
| 94 | else: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 95 | logger.warning(f"OSD discovery failed {exc}") |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 96 | |
| 97 | try: |
| 98 | counter = 0 |
kdanylov aka koder | 0e0cfcb | 2017-03-27 22:19:09 +0300 | [diff] [blame] | 99 | for counter, ip in enumerate(get_mons_ips(node, ceph_extra_args)): |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 100 | ip = ip_remap.get(ip, ip) |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 101 | creds = ConnCreds(to_ip(cast(str, ip)), user="root") |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 102 | info = ctx.merge_node(creds, {'ceph-mon'}) |
| 103 | assert 'ceph' not in info.params or info.params['ceph'] == ceph_params |
| 104 | info.params['ceph'] = ceph_params |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 105 | logger.debug(f"Found {counter + 1} nodes with ceph-mon role") |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 106 | except Exception as exc: |
kdanylov aka koder | 150b219 | 2017-04-01 16:53:01 +0300 | [diff] [blame] | 107 | if not ignore_errors: |
koder aka kdanilov | 962ee5f | 2016-12-19 02:40:08 +0200 | [diff] [blame] | 108 | logger.exception("MON discovery failed") |
| 109 | raise StopTestError() |
| 110 | else: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 111 | logger.warning(f"MON discovery failed {exc}") |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 112 | |
| 113 | |
kdanylov aka koder | 026e5f2 | 2017-05-15 01:04:39 +0300 | [diff] [blame] | 114 | def raw_dev_name(path: str) -> str: |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 115 | if path.startswith("/dev/"): |
| 116 | path = path[5:] |
| 117 | while path and path[-1].isdigit(): |
| 118 | path = path[:-1] |
| 119 | return path |
| 120 | |
| 121 | |
kdanylov aka koder | b083333 | 2017-05-13 20:39:17 +0300 | [diff] [blame] | 122 | class CollectCephInfoStage(Stage): |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 123 | config_block = 'ceph' |
| 124 | priority = StepOrder.UPDATE_NODES_INFO |
| 125 | |
| 126 | def run(self, ctx: TestRun) -> None: |
| 127 | for node in ctx.nodes: |
| 128 | if 'ceph_storage_devs' not in node.info.params: |
| 129 | if 'ceph-osd' in node.info.roles: |
kdanylov aka koder | 938f75f | 2018-06-27 01:52:44 +0300 | [diff] [blame^] | 130 | jdevs: Set[str] = set() |
| 131 | sdevs: Set[str] = set() |
kdanylov aka koder | cdfcdaf | 2017-04-29 10:03:39 +0300 | [diff] [blame] | 132 | for osd_info in node.info.params['ceph-osds']: |
| 133 | for key, sset in [('journal', jdevs), ('storage', sdevs)]: |
| 134 | path = osd_info.get(key) |
| 135 | if path: |
| 136 | dpath = node.conn.fs.get_dev_for_file(path) |
| 137 | if isinstance(dpath, bytes): |
| 138 | dpath = dpath.decode('utf8') |
| 139 | sset.add(raw_dev_name(dpath)) |
| 140 | node.info.params['ceph_storage_devs'] = list(sdevs) |
| 141 | node.info.params['ceph_journal_devs'] = list(jdevs) |