Blame - ceph-replace-failed-osd.groovy - mk/mk-pipelines

blob: e00160c2d8fffdff09be378db6d9772098f0195a [file] [log] [blame]

Jiri Broulik	2c00f4c	2017-10-26 13:23:11 +0200	[diff] [blame^]	1	/**
				2	*
				3	* Replace failed disk with a new disk
				4	*
				5	* Requred parameters:
				6	* SALT_MASTER_URL URL of Salt master
				7	* SALT_MASTER_CREDENTIALS Credentials to the Salt API
				8	*
				9	* HOST Host (minion id) to be removed
				10	* ADMIN_HOST Host (minion id) with admin keyring and /etc/crushmap file present
				11	* OSD Failed OSD ids to be replaced (comma-separated list - 1,2,3)
				12	* DEVICE Comma separated list of failed devices that will be replaced at HOST (/dev/sdb,/dev/sdc)
				13	* JOURNAL_OR_BLOCKDB_PARTITION Comma separated list of partitions where journal or block_db for the failed devices on this HOST were stored (/dev/sdh2,/dev/sdh3)
				14	* ENFORCE_CRUSHMAP Set to true if the prepared crush map should be enforced
				15	* WAIT_FOR_PG_REBALANCE Wait for PGs to rebalance after osd is removed from crush map
				16	* CLUSTER_FLAGS Comma separated list of tags to apply to cluster
				17	* WAIT_FOR_HEALTHY Wait for cluster rebalance before stoping daemons
				18	*
				19	*/
				20
				21	common = new com.mirantis.mk.Common()
				22	salt = new com.mirantis.mk.Salt()
				23	def python = new com.mirantis.mk.Python()
				24
				25	def pepperEnv = "pepperEnv"
				26	def flags = CLUSTER_FLAGS.tokenize(',')
				27	def osds = OSD.tokenize(',')
				28	def devices = DEVICE.tokenize(',')
				29	def journals_blockdbs = JOURNAL_OR_BLOCKDB_PARTITION.tokenize(',')
				30
				31
				32	def runCephCommand(master, target, cmd) {
				33	return salt.cmdRun(master, target, cmd)
				34	}
				35
				36	node("python") {
				37
				38	// create connection to salt master
				39	python.setupPepperVirtualenv(pepperEnv, SALT_MASTER_URL, SALT_MASTER_CREDENTIALS)
				40
				41	if (flags.size() > 0) {
				42	stage('Set cluster flags') {
				43	for (flag in flags) {
				44	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd set ' + flag)
				45	}
				46	}
				47	}
				48
				49	def osd_ids = []
				50
				51	print("osds:")
				52	print(osds)
				53
				54	// get list of osd disks of the host
				55	def ceph_disks = salt.getGrain(pepperEnv, HOST, 'ceph')['return'][0].values()[0].values()[0]['ceph_disk']
				56	common.prettyPrint(ceph_disks)
				57
				58	for (i in ceph_disks) {
				59	def osd_id = i.getKey().toString()
				60	if (osd_id in osds \|\| OSD == '*') {
				61	osd_ids.add('osd.' + osd_id)
				62	print("Will delete " + osd_id)
				63	} else {
				64	print("Skipping " + osd_id)
				65	}
				66	}
				67
				68	// `ceph osd out <id> <id>`
				69	stage('Set OSDs out') {
				70	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd out ' + osd_ids.join(' '))
				71	}
				72
				73	// wait for healthy cluster
				74	if (common.validInputParam('WAIT_FOR_HEALTHY') && WAIT_FOR_HEALTHY.toBoolean()) {
				75	stage('Waiting for healthy cluster') {
				76	while (true) {
				77	def health = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph health')['return'][0].values()[0]
				78	if (health.contains('HEALTH OK')) {
				79	common.infoMsg('Cluster is healthy')
				80	break;
				81	}
				82	sleep(60)
				83	}
				84	}
				85	}
				86
				87	// stop osd daemons
				88	stage('Stop OSD daemons') {
				89	for (i in osd_ids) {
				90	salt.runSaltProcessStep(pepperEnv, HOST, 'service.stop', ['ceph-osd@' + i.replaceAll('osd.', '')], null, true)
				91	}
				92	}
				93
				94	// `ceph osd crush remove osd.2`
				95	stage('Remove OSDs from CRUSH') {
				96	for (i in osd_ids) {
				97	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd crush remove ' + i)
				98	}
				99	}
				100
				101	// wait for pgs to rebalance
				102	if (WAIT_FOR_PG_REBALANCE.toBoolean() == true) {
				103	stage('Waiting for pgs to rebalance') {
				104	while (true) {
				105	def status = runCephCommand(pepperEnv, ADMIN_HOST, 'ceph -s')['return'][0].values()[0]
				106	if (!status.contains('degraded')) {
				107	common.infoMsg('PGs rebalanced')
				108	break;
				109	}
				110	sleep(3)
				111	}
				112	}
				113	}
				114
				115	// remove keyring `ceph auth del osd.3`
				116	stage('Remove OSD keyrings from auth') {
				117	for (i in osd_ids) {
				118	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph auth del ' + i)
				119	}
				120	}
				121
				122	// remove osd `ceph osd rm osd.3`
				123	stage('Remove OSDs') {
				124	for (i in osd_ids) {
				125	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd rm ' + i)
				126	}
				127	}
				128
				129	// remove cluster flags
				130	if (flags.size() > 0) {
				131	stage('Unset cluster flags') {
				132	for (flag in flags) {
				133	common.infoMsg('Removing flag ' + flag)
				134	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd unset ' + flag)
				135	}
				136	}
				137	}
				138
				139	// umount `umount /dev/sdi1`
				140	stage('Umount devices') {
				141	for (dev in devices) {
				142	runCephCommand(pepperEnv, HOST, 'umount ' + dev + '1')
				143	}
				144	}
				145
				146	// zap disks `ceph-disk zap /dev/sdi`
				147	stage('Zap devices') {
				148	for (dev in devices) {
				149	runCephCommand(pepperEnv, HOST, 'ceph-disk zap ' + dev)
				150	}
				151	}
				152
				153	// remove journal or block_db partition `parted /dev/sdj rm 3`
				154	stage('Remove journal / block_db partitions') {
				155	for (journal_blockdb in journals_blockdbs) {
				156	if (journal_blockdb?.trim()) {
				157	// dev = /dev/sdi
				158	def dev = journal_blockdb.replaceAll("[0-9]", "")
				159	// part_id = 2
				160	def part_id = journal_blockdb.substring(journal_blockdb.lastIndexOf("/")+1).replaceAll("[^0-9]", "")
				161	runCephCommand(pepperEnv, HOST, "parted ${dev} rm ${part_id}")
				162	}
				163	}
				164	}
				165
				166	// Deploy failed Ceph OSD
				167	stage('Deploy Ceph OSD') {
				168	salt.enforceState(pepperEnv, HOST, 'ceph.osd', true)
				169	}
				170
				171
				172	if (ENFORCE_CRUSHMAP.toBoolean() == true) {
				173
				174	// enforce crushmap `crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled; ceph osd setcrushmap -i /etc/ceph/crushmap.compiled`
				175	stage('Enforce crushmap') {
				176
				177	stage('Ask for manual confirmation') {
				178	input message: "Are you sure that your ADMIN_HOST has correct /etc/ceph/crushmap file? Click proceed to compile and enforce crushmap."
				179	}
				180	runCephCommand(pepperEnv, ADMIN_HOST, 'crushtool -c /etc/ceph/crushmap -o /etc/ceph/crushmap.compiled')
				181	runCephCommand(pepperEnv, ADMIN_HOST, 'ceph osd setcrushmap -i /etc/ceph/crushmap.compiled')
				182	}
				183	}
				184	}