| #!/bin/bash |
| |
| # Collect relevant data from a Ceph cluster for troubleshooting and assessment |
| # (C) Christian Huebner chuebner@mirantis.com 2015 |
| # run with ./ceph_collect.sh <customername> <clustername> |
| |
| echo "Collecting Ceph cluster data." |
| |
| help () { |
| echo "Data collector for Ceph analytics" |
| echo "Usage: ceph_collect.sh [-b] [-m] <customername> <clustername>" |
| echo "-m only works with Nautilus and up" |
| } |
| |
| POSITIONAL=() |
| BENCH="true" |
| VOLUMES="false" |
| OSDS=(`ceph osd ls`) |
| |
| if [[ $# -eq 0 ]]; then |
| help |
| exit |
| fi |
| |
| while [[ $# -gt 0 ]]; do |
| key="$1" |
| |
| case $key in |
| -h|--help) |
| help |
| exit |
| ;; |
| -vol|--volumes) |
| VOLUMES="true" |
| shift |
| ;; |
| -nb|--nobench) |
| BENCH="false" |
| shift # past argument |
| ;; |
| -m|--healthmetrics) |
| HEALTHMETRICS="true" |
| shift # past argument |
| ;; |
| *) # unknown option |
| POSITIONAL+=("$1") # save it in an array for later |
| shift # past argument |
| ;; |
| esac |
| done |
| |
| if [ ${#POSITIONAL[*]} -lt 2 ]; then echo "Usage: ./ceph_collect.sh <CUSTOMER> <CLUSTERNAME>"; exit; fi |
| # if [ "$#" -lt 2 ]; then echo "Usage: ./ceph_collect.sh <CUSTOMER> <CLUSTERNAME>"; exit; fi |
| export CUSTOMER=${POSITIONAL[0]} |
| export CLUSTERNAME=${POSITIONAL[1]} |
| |
| if ! which ceph >/dev/null; then echo "ERROR: This script must be run on a ceph monitor or admin node"; exit; fi |
| |
| DATE=`date "+%Y-%m-%d"` |
| DIRNAME="CephCollectData.$CUSTOMER.$CLUSTERNAME.$DATE" |
| ARCHNAME=$DIRNAME".tar.gz" |
| mkdir $DIRNAME |
| cd $DIRNAME |
| |
| echo "Collecting CRUSH map" |
| ceph osd getcrushmap -o crush.bin |
| crushtool -d crush.bin -o crushmap.txt |
| crushtool -i crush.bin --dump > crushmap.json |
| rm crush.bin |
| |
| echo "Collecting ceph osd crush dump" |
| ceph osd crush dump >crushdump.json |
| |
| echo "Collecting cluster status" |
| ceph -s -f json -o ceph_s.json |
| echo "Collecting health detail" |
| ceph -f json health detail -o ceph_health_detail.json |
| echo "Collecting monmap" |
| ceph mon dump -f json -o monmap.json |
| echo "Collecting ceph df" |
| ceph df -f json -o ceph_df.json |
| echo "Collecting ceph osd df" |
| ceph osd df -f json -o ceph_osd_df.json |
| echo "Collecting ceph osd dump" |
| ceph osd dump -f json -o ceph_osd_dump.json |
| echo "Collecting rados df" |
| rados df -f json >rados_df.json |
| echo "Collecting ceph report" |
| ceph report -o ceph_report.json |
| echo "Collecting auth data anonymized" |
| ceph auth list -f json |sed 's/AQ[^=]*==/KEY/g' > ceph_auth_ls.json |
| echo "Collecting ceph pg dump" |
| ceph pg dump -f json -o ceph_pg_dump.json |
| echo "Collecting ceph pg autoscale" |
| ceph osd pool autoscale-status -f json -o ceph_pg_autoscale_status.json |
| echo "Collecting ceph running configuration" |
| ceph config dump -f json >ceph_config_dump.json |
| echo "Collecting ceph erasure code profiles" |
| ceph -f json osd erasure-code-profile ls >ceph_osd_erasure-code-profiles.json |
| |
| |
| echo "Collecting erasure code profiles" |
| ceph -f json osd erasure-code-profile ls >ceph_osd_erasure-code-profiles.json |
| |
| echo "Collecting rbd ls -l" |
| rbd ls -l | sed '$ s/.$/}/' >rbd_ls_l.json |
| |
| echo "Collecting block DB/WAL stats" |
| for i in ${OSDS[@]}; do |
| echo \"osd.$i\"\: `ceph -f json tell osd.$i bluefs stats` >>ceph_db_wal_stats.json; |
| done |
| |
| for prof in `ceph osd erasure-code-profile ls`; do \ |
| ceph -f json -o ceph_osd_erasure-code-profile_$prof.json osd erasure-code-profile get $prof; \ |
| done |
| |
| if [[ $VOLUMES = "true" ]]; then |
| echo "Collecting ceph volumes and CoW clones per rbd pool" |
| for pool in `ceph osd pool ls detail |grep rbd |awk '{print $3}'|sed s/\'//g`; do \ |
| echo '{' \ |
| '"name" : ' $pool ',' \ |
| '"volumes" : ' `rbd ls -l $pool | grep -v '@'| wc -l |grep -v ' 0$'` ',' \ |
| '"snapshots" : ' `rbd ls -l $pool | awk '{print $1}' |grep '@' | wc -l` ',' \ |
| '"clones" : ' `rbd ls -l $pool | awk '{print $4}' |grep '@' | wc -l` ',' \ |
| '}' ; \ |
| done >> volumes_per_pool.json |
| else |
| echo "Volume collection disabled." |
| fi |
| |
| echo "Collecting ceph osd perf" |
| for i in {0..9}; do echo $i; ceph osd perf -f json -o ceph_osd_perf_$i.json; sleep 4; done |
| |
| if $(ceph device ls &>/dev/null); then |
| echo "Collecting device health information" |
| ceph device ls|grep -v DEVICE|awk '{print $1}'|xargs --max-args=1 ceph device get-health-metrics > ceph_device_get_health_metrics.json |
| else |
| echo "Device health check not supported" |
| fi |
| |
| if [[ $HEALTHMETRICS = "true" ]]; then |
| echo "Collecting Ceph Health Metrics (-m option)" |
| IFS=$'\n' |
| for device in `ceph device ls|grep -v DEVICE`; do |
| osd=$(echo $device|awk '{print $3}'); |
| dev=$(echo $device|awk '{print $1}'); |
| ceph device get-health-metrics $dev >ceph_health_$osd.json ; |
| done; |
| fi |
| |
| if [[ $BENCH = "true" ]]; then |
| echo "Collecting Ceph Benchmark" |
| echo "{ " > ceph_tell_bench.json |
| for i in ${OSDS[@]}; do |
| if [ $i -ne ${OSDS[-1]} ]; then |
| echo \"osd.$i\"\: `ceph tell osd.$i bench -f json 12000000 4096` ',' >>ceph_tell_bench.json; |
| else |
| echo \"osd.$i\"\: `ceph tell osd.$i bench -f json 12000000 4096` >>ceph_tell_bench.json; |
| fi |
| done; |
| echo "}" >> ceph_tell_bench.json |
| fi |
| |
| # Grab Ceph logs |
| echo "Collecting Ceph Logs" |
| ceph log last 10000 cluster >ceph_cluster_log.json || true |
| ceph log last 10000 audit >ceph_audit_log.json || true |
| |
| tar czf "../"$ARCHNAME * |
| cd .. |
| |