blob: a374ed6783af3e79609fdd1bb9583daf6af876a7 [file] [log] [blame]
""" Collect data about ceph nodes"""
import json
import logging
from typing import Dict, cast, List, Set, Optional
from .node_interfaces import NodeInfo, IRPCNode
from .ssh_utils import ConnCreds
from .common_types import IP
from .stage import Stage, StepOrder
from .test_run_class import TestRun
from .ssh_utils import parse_ssh_uri
from .node import connect, setup_rpc
from .utils import StopTestError, to_ip
logger = logging.getLogger("wally")
class OSDInfo:
def __init__(self, id: int, journal: str = None, storage: str = None) -> None:
self.id = id
self.journal = journal
self.storage = storage
def get_osds_info(node: IRPCNode, conf: str, key: str) -> Dict[IP, List[OSDInfo]]:
"""Get set of osd's ip"""
data = node.run("ceph -c {} -k {} --format json osd dump".format(conf, key))
try:
jdata = json.loads(data)
except:
open("/tmp/ceph-out.json", "w").write(data)
raise
ips = {} # type: Dict[IP, List[OSDInfo]]
first_error = True
for osd_data in jdata["osds"]:
osd_id = int(osd_data["osd"])
if "public_addr" not in osd_data:
if first_error:
logger.warning("No 'public_addr' field in 'ceph osd dump' output for osd %s" +
"(all subsequent errors omitted)", osd_id)
first_error = False
else:
ip_port = osd_data["public_addr"]
if '/' in ip_port:
ip_port = ip_port.split("/", 1)[0]
ip = IP(ip_port.split(":")[0])
osd_journal_path = None # type: Optional[str]
osd_data_path = None # type: Optional[str]
# TODO: parallelize this!
osd_cfg = node.run("ceph -n osd.{} --show-config".format(osd_id))
for line in osd_cfg.split("\n"):
if line.startswith("osd_journal ="):
osd_journal_path = line.split("=")[1].strip()
elif line.startswith("osd_data ="):
osd_data_path = line.split("=")[1].strip()
if osd_data_path is None or osd_journal_path is None:
open("/tmp/ceph-out.json", "w").write(osd_cfg)
logger.error("Can't detect osd %s journal or storage path", osd_id)
raise StopTestError()
ips.setdefault(ip, []).append(OSDInfo(osd_id,
journal=osd_journal_path,
storage=osd_data_path))
return ips
def get_mons_ips(node: IRPCNode, conf: str, key: str) -> Set[IP]:
"""Return mon ip set"""
data = node.run("ceph -c {} -k {} --format json mon_status".format(conf, key))
jdata = json.loads(data)
ips = set() # type: Set[IP]
first_error = True
for mon_data in jdata["monmap"]["mons"]:
if "addr" not in mon_data:
if first_error:
mon_name = mon_data.get("name", "<MON_NAME_MISSED>")
logger.warning("No 'addr' field in 'ceph mon_status' output for mon %s" +
"(all subsequent errors omitted)", mon_name)
first_error = False
else:
ip_port = mon_data["addr"]
if '/' in ip_port:
ip_port = ip_port.split("/", 1)[0]
ips.add(IP(ip_port.split(":")[0]))
return ips
class DiscoverCephStage(Stage):
config_block = 'ceph'
priority = StepOrder.DISCOVER
def run(self, ctx: TestRun) -> None:
"""Return list of ceph's nodes NodeInfo"""
discovery = ctx.config.get("discovery")
if discovery == 'disable' or discovery == 'metadata':
logger.info("Skip ceph discovery due to config setting")
return
if 'all_nodes' in ctx.storage:
logger.debug("Skip ceph discovery, use previously discovered nodes")
return
ceph = ctx.config.ceph
root_node_uri = cast(str, ceph.root_node)
cluster = ceph.get("cluster", "ceph")
conf = ceph.get("conf")
key = ceph.get("key")
logger.debug("Start discovering ceph nodes from root %s", root_node_uri)
logger.debug("cluster=%s key=%s conf=%s", cluster, conf, key)
info = NodeInfo(parse_ssh_uri(root_node_uri), set())
if conf is None:
conf = "/etc/ceph/{}.conf".format(cluster)
if key is None:
key = "/etc/ceph/{}.client.admin.keyring".format(cluster)
ceph_params = {"cluster": cluster, "conf": conf, "key": key}
with setup_rpc(connect(info),
ctx.rpc_code,
ctx.default_rpc_plugins,
log_level=ctx.config.rpc_log_level) as node:
ssh_key = node.get_file_content("~/.ssh/id_rsa")
try:
ips = set()
for ip, osds_info in get_osds_info(node, conf, key).items():
ips.add(ip)
creds = ConnCreds(to_ip(cast(str, ip)), user="root", key=ssh_key)
info = ctx.merge_node(creds, {'ceph-osd'})
info.params.setdefault('ceph-osds', []).extend(osds_info)
assert 'ceph' not in info.params or info.params['ceph'] == ceph_params
info.params['ceph'] = ceph_params
logger.debug("Found %s nodes with ceph-osd role", len(ips))
except Exception as exc:
if discovery != 'ignore_errors':
logger.exception("OSD discovery failed")
raise StopTestError()
else:
logger.warning("OSD discovery failed %s", exc)
try:
counter = 0
for counter, ip in enumerate(get_mons_ips(node, conf, key)):
creds = ConnCreds(to_ip(cast(str, ip)), user="root", key=ssh_key)
info = ctx.merge_node(creds, {'ceph-mon'})
assert 'ceph' not in info.params or info.params['ceph'] == ceph_params
info.params['ceph'] = ceph_params
logger.debug("Found %s nodes with ceph-mon role", counter + 1)
except Exception as exc:
if discovery != 'ignore_errors':
logger.exception("MON discovery failed")
raise StopTestError()
else:
logger.warning("MON discovery failed %s", exc)