Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | #-*- coding: utf-8 -*- |
| 3 | |
| 4 | |
| 5 | import datetime |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 6 | import logging |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 7 | import os |
Max Rasskazov | e99d837 | 2015-01-13 20:07:01 +0300 | [diff] [blame] | 8 | import re |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 9 | import subprocess |
| 10 | import tempfile |
Max Rasskazov | d5b47eb | 2015-05-27 22:26:54 +0300 | [diff] [blame] | 11 | import shutil |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 12 | |
| 13 | |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 14 | logging.basicConfig(level=logging.INFO) |
| 15 | logger = logging.getLogger('rsync_staging') |
| 16 | |
Max Rasskazov | 3fe4271 | 2015-01-15 15:34:22 +0300 | [diff] [blame] | 17 | now = datetime.datetime.utcnow() |
| 18 | staging_snapshot_stamp_format = r'%Y-%m-%d-%H%M%S' |
| 19 | staging_snapshot_stamp_regexp = r'[0-9]{4}-[0-9]{2}-[0-9]{2}-[0-9]{6}' |
| 20 | staging_snapshot_stamp = now.strftime(staging_snapshot_stamp_format) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 21 | |
| 22 | |
Max Rasskazov | d5b47eb | 2015-05-27 22:26:54 +0300 | [diff] [blame] | 23 | class RsyncHost(object): |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 24 | def __init__(self, |
| 25 | mirror_name, |
| 26 | host, |
| 27 | module='mirror-sync', |
| 28 | root_path='fwm', |
| 29 | files_dir='files', |
| 30 | save_last_days=61, |
| 31 | rsync_extra_params='-v', |
Max Rasskazov | d5b47eb | 2015-05-27 22:26:54 +0300 | [diff] [blame] | 32 | staging_postfix='staging', |
| 33 | init_directory_structure=True): |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 34 | self.mirror_name = mirror_name |
| 35 | self.host = host |
| 36 | self.module = module |
| 37 | self.root_path = root_path |
| 38 | self.files_dir = files_dir |
| 39 | self.save_last_days = save_last_days |
| 40 | self.rsync_extra_params = rsync_extra_params |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 41 | self.staging_postfix = staging_postfix |
Max Rasskazov | 3fe4271 | 2015-01-15 15:34:22 +0300 | [diff] [blame] | 42 | self.staging_snapshot_stamp = staging_snapshot_stamp |
| 43 | self.staging_snapshot_stamp_format = staging_snapshot_stamp_format |
| 44 | if re.match(staging_snapshot_stamp_regexp, |
| 45 | self.staging_snapshot_stamp) \ |
| 46 | is not None: |
| 47 | self.staging_snapshot_stamp_regexp = staging_snapshot_stamp_regexp |
| 48 | else: |
| 49 | raise RuntimeError('Wrong regexp for staging_snapshot_stamp\n' |
| 50 | 'staging_snapshot_stamp = "{}"\n' |
| 51 | 'staging_snapshot_stamp_regexp = "{}"'. |
| 52 | format(staging_snapshot_stamp, |
| 53 | staging_snapshot_stamp_regexp) |
| 54 | ) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 55 | |
Max Rasskazov | d5b47eb | 2015-05-27 22:26:54 +0300 | [diff] [blame] | 56 | if init_directory_structure is True: |
| 57 | self.init_directory_structure() |
| 58 | |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 59 | @property |
| 60 | def url(self): |
| 61 | return '{}::{}'.format(self.host, self.module) |
| 62 | |
| 63 | @property |
| 64 | def root_url(self): |
| 65 | return '{}/{}'.format(self.url, self.root_path) |
| 66 | |
| 67 | @property |
| 68 | def files_path(self): |
| 69 | return '{}/{}'.format(self.root_path, self.files_dir) |
| 70 | |
| 71 | @property |
| 72 | def files_url(self): |
| 73 | return '{}/{}'.format(self.root_url, self.files_dir) |
| 74 | |
| 75 | def http_url(self, path): |
| 76 | return 'http://{}/{}'.format(self.host, path) |
| 77 | |
| 78 | def html_link(self, path, link_name): |
| 79 | return '<a href="{}">{}</a>'.format(self.http_url(path), link_name) |
| 80 | |
| 81 | @property |
| 82 | def staging_dir(self): |
| 83 | return '{}-{}'.format(self.mirror_name, self.staging_snapshot_stamp) |
| 84 | |
| 85 | @property |
| 86 | def staging_dir_path(self): |
| 87 | return '{}/{}'.format(self.files_path, self.staging_dir) |
| 88 | |
| 89 | @property |
| 90 | def staging_dir_url(self): |
| 91 | return '{}/{}'.format(self.url, self.staging_dir_path) |
| 92 | |
| 93 | @property |
| 94 | def staging_link(self): |
| 95 | return '{}-{}'.format(self.mirror_name, self.staging_postfix) |
| 96 | |
| 97 | @property |
| 98 | def staging_link_path(self): |
| 99 | return '{}/{}'.format(self.files_path, self.staging_link) |
| 100 | |
| 101 | @property |
| 102 | def staging_link_url(self): |
| 103 | return '{}/{}'.format(self.url, self.staging_link_path) |
| 104 | |
Max Rasskazov | d5b47eb | 2015-05-27 22:26:54 +0300 | [diff] [blame] | 105 | def init_directory_structure(self): |
| 106 | root_dir_present = self.rsync_ls_dirs( |
| 107 | '/', |
| 108 | pattern=r'^{}$'.format(self.root_path) |
| 109 | )[1] |
| 110 | root_dir_present = True if len(root_dir_present) > 0 else False |
| 111 | if root_dir_present is True: |
| 112 | files_dir_present = self.rsync_ls_dirs( |
| 113 | '{}/'.format(self.root_path), |
| 114 | pattern=r'^{}$'.format(self.files_dir) |
| 115 | )[1] |
| 116 | files_dir_present = True if len(files_dir_present) > 0 else False |
| 117 | |
| 118 | if not root_dir_present or not files_dir_present: |
| 119 | dir_to_sync = tempfile.mkdtemp() |
| 120 | os.makedirs('{}/{}'.format(dir_to_sync, self.files_path)) |
| 121 | self._do_rsync( |
| 122 | source='{}/'.format(dir_to_sync), |
| 123 | dest='{}/'.format(self.url), |
| 124 | opts='-a' |
| 125 | ) |
| 126 | shutil.rmtree(dir_to_sync) |
| 127 | |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 128 | @property |
| 129 | def empty_dir(self): |
| 130 | if self.__dict__.get('_empty_dir') is None: |
| 131 | self._empty_dir = tempfile.mkdtemp() |
| 132 | return self._empty_dir |
| 133 | |
| 134 | def symlink_to(self, target): |
| 135 | linkname = tempfile.mktemp() |
| 136 | os.symlink(target, linkname) |
| 137 | return linkname |
| 138 | |
| 139 | def _shell(self, cmd, raise_error=True): |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 140 | logger.info(cmd) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 141 | process = subprocess.Popen(cmd, |
| 142 | stdin=subprocess.PIPE, |
| 143 | stdout=subprocess.PIPE, |
| 144 | stderr=subprocess.PIPE, |
| 145 | shell=True) |
| 146 | out, err = process.communicate() |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 147 | logger.debug(out) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 148 | exitcode = process.returncode |
| 149 | if process.returncode != 0 and raise_error: |
| 150 | msg = '"{cmd}" failed. Exit code == {exitcode}'\ |
| 151 | '\n\nSTDOUT: \n{out}'\ |
| 152 | '\n\nSTDERR: \n{err}'\ |
| 153 | .format(**(locals())) |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 154 | logger.error(msg) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 155 | raise RuntimeError(msg) |
| 156 | return exitcode, out, err |
| 157 | |
| 158 | def _do_rsync(self, source='', dest=None, opts='', extra=None): |
| 159 | if extra is None: |
| 160 | extra = self.rsync_extra_params |
| 161 | cmd = 'rsync {opts} {extra} {source} {dest}'.format(**(locals())) |
| 162 | return self._shell(cmd) |
| 163 | |
Max Rasskazov | e99d837 | 2015-01-13 20:07:01 +0300 | [diff] [blame] | 164 | def _rsync_ls(self, dirname=None, pattern=r'.*', opts=''): |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 165 | if dirname is None: |
| 166 | dirname = '{}/'.format(self.root_path) |
| 167 | dest = '{}/{}'.format(self.url, dirname) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 168 | extra = self.rsync_extra_params + ' --no-v' |
| 169 | exitcode, out, err = self._do_rsync(dest=dest, opts=opts, extra=extra) |
Max Rasskazov | e99d837 | 2015-01-13 20:07:01 +0300 | [diff] [blame] | 170 | regexp = re.compile(pattern) |
| 171 | out = [_ for _ in out.splitlines() |
| 172 | if (_.split()[-1] != '.') and |
| 173 | (regexp.match(_.split()[-1]) is not None)] |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 174 | return exitcode, out, err |
| 175 | |
Max Rasskazov | e99d837 | 2015-01-13 20:07:01 +0300 | [diff] [blame] | 176 | def rsync_ls(self, dirname, pattern=r'.*'): |
| 177 | exitcode, out, err = self._rsync_ls(dirname, pattern=pattern) |
| 178 | out = [_.split()[-1] for _ in out] |
| 179 | return exitcode, out, err |
| 180 | |
| 181 | def rsync_ls_dirs(self, dirname, pattern=r'.*'): |
| 182 | exitcode, out, err = self._rsync_ls(dirname, pattern=pattern) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 183 | out = [_.split()[-1] for _ in out if _.startswith('d')] |
| 184 | return exitcode, out, err |
| 185 | |
Max Rasskazov | e99d837 | 2015-01-13 20:07:01 +0300 | [diff] [blame] | 186 | def rsync_ls_symlinks(self, dirname, pattern=r'.*'): |
| 187 | exitcode, out, err = self._rsync_ls(dirname, |
| 188 | pattern=pattern, |
| 189 | opts='-l') |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 190 | out = [_.split()[-3:] for _ in out if _.startswith('l')] |
| 191 | out = [[_[0], _[-1]] for _ in out] |
| 192 | return exitcode, out, err |
| 193 | |
| 194 | def rsync_delete_file(self, filename): |
| 195 | dirname, filename = os.path.split(filename) |
| 196 | source = '{}/'.format(self.empty_dir) |
| 197 | dest = '{}/{}/'.format(self.url, dirname) |
| 198 | opts = "-r --delete --include={} '--exclude=*'".format(filename) |
| 199 | return self._do_rsync(source=source, dest=dest, opts=opts) |
| 200 | |
| 201 | def rsync_delete_dir(self, dirname): |
| 202 | source = '{}/'.format(self.empty_dir) |
| 203 | dest = '{}/{}/'.format(self.url, dirname) |
| 204 | opts = "-a --delete" |
| 205 | exitcode, out, err = self._do_rsync(source=source, |
| 206 | dest=dest, |
| 207 | opts=opts) |
| 208 | return self.rsync_delete_file(dirname) |
| 209 | |
| 210 | def rsync_staging_transfer(self, source, tgt_symlink_name=None): |
| 211 | if tgt_symlink_name is None: |
| 212 | tgt_symlink_name = self.mirror_name |
| 213 | opts = '--archive --force --ignore-errors '\ |
| 214 | '--delete-excluded --no-owner --no-group --delete '\ |
| 215 | '--link-dest=/{}'.format(self.staging_link_path) |
| 216 | try: |
| 217 | exitcode, out, err = self._do_rsync(source=source, |
| 218 | dest=self.staging_dir_url, |
| 219 | opts=opts) |
| 220 | self.rsync_delete_file(self.staging_link_path) |
| 221 | self._do_rsync(source=self.symlink_to(self.staging_dir), |
| 222 | dest=self.staging_link_url, |
| 223 | opts='-l') |
| 224 | # cleaning of old snapshots |
Max Rasskazov | 3fe4271 | 2015-01-15 15:34:22 +0300 | [diff] [blame] | 225 | self._remove_old_snapshots() |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 226 | return exitcode, out, err |
| 227 | except RuntimeError as e: |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 228 | logger.error(e.message) |
Max Rasskazov | 7f3c53c | 2014-12-09 14:19:17 +0300 | [diff] [blame] | 229 | self.rsync_delete_dir(self.staging_dir_path) |
| 230 | raise |
Max Rasskazov | 3fe4271 | 2015-01-15 15:34:22 +0300 | [diff] [blame] | 231 | |
| 232 | def _remove_old_snapshots(self, save_last_days=None): |
| 233 | if save_last_days is None: |
| 234 | save_last_days = self.save_last_days |
Max Rasskazov | 9c69f4e | 2015-05-28 16:13:27 +0300 | [diff] [blame] | 235 | if save_last_days is False: |
| 236 | # delete all snapshots |
| 237 | logger.info('Deletion all of the old snapshots ' |
| 238 | '(save_last_days == {})'.format(save_last_days)) |
| 239 | save_last_days = -1 |
| 240 | elif save_last_days == 0: |
| 241 | # skipping deletion |
| 242 | logger.info('Skip deletion of old snapshots ' |
| 243 | '(save_last_days == {})'.format(save_last_days)) |
Max Rasskazov | 3fe4271 | 2015-01-15 15:34:22 +0300 | [diff] [blame] | 244 | return |
| 245 | warn_date = now - datetime.timedelta(days=save_last_days) |
| 246 | warn_date = datetime.datetime.combine(warn_date, datetime.time(0)) |
| 247 | dirs = self.rsync_ls_dirs( |
| 248 | '{}/'.format(self.files_path), |
| 249 | pattern='^{}-{}'.format(self.mirror_name, |
| 250 | self.staging_snapshot_stamp_regexp) |
| 251 | )[1] |
| 252 | links = self.rsync_ls_symlinks('{}/'.format(self.root_path))[1] |
| 253 | links += self.rsync_ls_symlinks('{}/'.format(self.files_path))[1] |
| 254 | for d in dirs: |
| 255 | dir_date = datetime.datetime.strptime( |
| 256 | d, |
| 257 | '{}-{}'.format(self.mirror_name, |
| 258 | self.staging_snapshot_stamp_format) |
| 259 | ) |
| 260 | dir_date = datetime.datetime.combine(dir_date, datetime.time(0)) |
| 261 | dir_path = '{}/{}'.format(self.files_path, d) |
| 262 | if dir_date < warn_date: |
| 263 | dir_links = [_[0] for _ in links |
| 264 | if _[1] == d |
| 265 | or _[1].endswith('/{}'.format(d)) |
| 266 | ] |
| 267 | if not dir_links: |
| 268 | self.rsync_delete_dir(dir_path) |
| 269 | else: |
Max Rasskazov | 11653ab | 2015-01-15 15:45:16 +0300 | [diff] [blame] | 270 | logger.info('Skip deletion of "{}" because there are ' |
| 271 | 'symlinks found: {}'.format(d, dir_links)) |