blob: 15129e7ef37d9c9389f3bce1dbb01c5abd8f1343 [file] [log] [blame]
Alexd9fd85e2019-05-16 16:58:24 -05001import json
2import os
Alexd0391d42019-05-21 18:48:55 -05003import re
Alexd9fd85e2019-05-16 16:58:24 -05004from copy import deepcopy
5
Alex74dc1352019-05-17 13:18:24 -05006from cfg_checker.common import logger, logger_cli, nested_set
Alex0ed4f762019-05-17 17:55:33 -05007from cfg_checker.common.const import _mainteiners_index_filename
8from cfg_checker.common.const import _mirantis_versions_filename
9from cfg_checker.common.const import _other_versions_filename
Alexd9fd85e2019-05-16 16:58:24 -050010from cfg_checker.common.const import _pkg_desc_archive
11from cfg_checker.common.const import _repos_index_filename
12from cfg_checker.common.const import _repos_info_archive
13from cfg_checker.common.const import _repos_versions_archive
Alexd9fd85e2019-05-16 16:58:24 -050014from cfg_checker.common.const import ubuntu_releases
Alex7f69a6a2019-05-31 16:53:35 -050015from cfg_checker.common.file_utils import ensure_folder_exists
Alexd9fd85e2019-05-16 16:58:24 -050016from cfg_checker.common.file_utils import get_gzipped_file
17from cfg_checker.common.settings import pkg_dir
18from cfg_checker.helpers.console_utils import Progress
19from cfg_checker.helpers.tgz import TGZFile
20
21import requests
22from requests.exceptions import ConnectionError
23
24ext = ".json"
25
26
Alex0ed4f762019-05-17 17:55:33 -050027def get_tag_label(_tag, parsed=False):
Alex74dc1352019-05-17 13:18:24 -050028 # prettify the tag for printing
Alex0ed4f762019-05-17 17:55:33 -050029 if parsed:
30 _label = "+ "
31 else:
32 _label = " "
33
Alex74dc1352019-05-17 13:18:24 -050034 if _tag.endswith(".update"):
35 _label += "[updates] " + _tag.rsplit('.', 1)[0]
36 elif _tag.endswith(".hotfix"):
37 _label += " [hotfix] " + _tag.rsplit('.', 1)[0]
38 else:
39 _label += " "*10 + _tag
Alex0ed4f762019-05-17 17:55:33 -050040
Alex74dc1352019-05-17 13:18:24 -050041 return _label
42
43
Alex0ed4f762019-05-17 17:55:33 -050044def _get_value_index(_di, value, header=None):
Alex29ee76f2019-05-17 18:52:29 -050045 # Mainteiner names often uses specific chars
Alex3bc95f62020-03-05 17:00:04 -060046 # so make sure that value saved is str not str
47 # Python2
48 # _val = str(value, 'utf-8') if isinstance(value, str) else value
49 # Python3 has always utf-8 decoded value
50 _val = value
Alex0ed4f762019-05-17 17:55:33 -050051 if header:
Alex3bc95f62020-03-05 17:00:04 -060052 try:
53 _ = next(filter(lambda i: _di[i]["header"] == header, _di))
54 # iterator not empty, find index
55 for _k, _v in _di.items():
56 if _v["header"] == header:
57 _index = _k
58 except StopIteration:
59 _index = str(len(_di.keys()) + 1)
Alex0ed4f762019-05-17 17:55:33 -050060 _di[_index] = {
61 "header": header,
Alex29ee76f2019-05-17 18:52:29 -050062 "props": _val
Alex0ed4f762019-05-17 17:55:33 -050063 }
Alex3bc95f62020-03-05 17:00:04 -060064 finally:
65 return _index
Alex0ed4f762019-05-17 17:55:33 -050066 else:
Alex3bc95f62020-03-05 17:00:04 -060067 try:
68 _ = next(filter(lambda i: _di[i] == _val, _di))
69 # iterator not empty, find index
70 for _k, _v in _di.items():
Alex29ee76f2019-05-17 18:52:29 -050071 if _v == _val:
Alex0ed4f762019-05-17 17:55:33 -050072 _index = _k
Alex3bc95f62020-03-05 17:00:04 -060073 except StopIteration:
74 _index = str(len(_di.keys()) + 1)
75 # on save, cast it as str
76 _di[_index] = _val
77 finally:
78 return _index
Alex0ed4f762019-05-17 17:55:33 -050079
80
81def _safe_load(_f, _a):
82 if _f in _a.list_files():
Alexd0391d42019-05-21 18:48:55 -050083 logger_cli.debug(
84 "... loading '{}':'{}'".format(
Alex0ed4f762019-05-17 17:55:33 -050085 _a.basefile,
86 _f
87 )
88 )
Alex3bc95f62020-03-05 17:00:04 -060089 return json.loads(_a.get_file(_f, decode=True))
Alex0ed4f762019-05-17 17:55:33 -050090 else:
91 return {}
92
93
Alexd9fd85e2019-05-16 16:58:24 -050094def _n_url(url):
95 if url[-1] == '/':
96 return url
97 else:
98 return url + '/'
99
100
101class ReposInfo(object):
Alex3bc95f62020-03-05 17:00:04 -0600102 init_done = False
103
104 def _init_vars(self):
105 self.repos = []
106
107 def _init_folders(self, arch_folder=None):
108 if arch_folder:
109 self._arch_folder = arch_folder
110 self._repofile = os.path.join(arch_folder, _repos_info_archive)
111 else:
112 self._arch_folder = os.path.join(pkg_dir, "versions")
113 self._repofile = os.path.join(
114 self._arch_folder,
115 _repos_info_archive
116 )
117
118 def __init__(self, arch_folder=None):
119 # perform inits
120 self._init_vars()
121 self._init_folders(arch_folder)
122 self.init_done = True
123
124 def __call__(self, *args, **kwargs):
125 if self.init_done:
126 return self
127 else:
128 return self.__init__(self, *args, **kwargs)
Alexd9fd85e2019-05-16 16:58:24 -0500129
130 @staticmethod
131 def _ls_repo_page(url):
132 # Yes, this is ugly. But it works ok for small HTMLs.
133 _a = "<a"
134 _s = "href="
135 _e = "\">"
136 try:
137 page = requests.get(url, timeout=60)
138 except ConnectionError as e:
139 logger_cli.error("# ERROR: {}".format(e.message))
140 return [], []
141 a = page.text.splitlines()
142 # Comprehension for dirs. Anchors for ends with '-'
143 _dirs = [l[l.index(_s)+6:l.index(_e)-1]
144 for l in a if l.startswith(_a) and l.endswith('-')]
145 # Comprehension for files. Anchors ends with size
146 _files = [l[l.index(_s)+6:l.index(_e)]
147 for l in a if l.startswith(_a) and not l.endswith('-')]
148
149 return _dirs, _files
150
151 def search_pkg(self, url, _list):
152 # recoursive method to walk dists tree
153 _dirs, _files = self._ls_repo_page(url)
154
155 for _d in _dirs:
156 # Search only in dists, ignore the rest
157 if "dists" not in url and _d != "dists":
158 continue
159 _u = _n_url(url + _d)
160 self.search_pkg(_u, _list)
161
162 for _f in _files:
163 if _f == "Packages.gz":
164 _list.append(url + _f)
165 logger.debug("... [F] '{}'".format(url + _f))
166
167 return _list
168
169 @staticmethod
170 def _map_repo(_path_list, _r):
171 for _pkg_path in _path_list:
172 _l = _pkg_path.split('/')
173 _kw = _l[_l.index('dists')+1:]
174 _kw.reverse()
175 _repo_item = {
176 "arch": _kw[1][7:] if "binary" in _kw[1] else _kw[1],
177 "type": _kw[2],
178 "ubuntu-release": _kw[3],
179 "filepath": _pkg_path
180 }
181 _r.append(_repo_item)
182
183 def _find_tag(self, _t, _u, label=""):
184 if label:
185 _url = _n_url(_u + label)
186 _label = _t + '.' + label
187 else:
188 _url = _u
189 _label = _t
190 _ts, _ = self._ls_repo_page(_url)
191 if _t in _ts:
192 logger.debug(
193 "... found tag '{}' at '{}'".format(
194 _t,
195 _url
196 )
197 )
198 return {
199 _label: {
200 "baseurl": _n_url(_url + _t),
201 "all": {}
202 }
203 }
204 else:
205 return {}
206
207 def fetch_repos(self, url, tag=None):
208 base_url = _n_url(url)
209 logger_cli.info("# Using '{}' as a repos source".format(base_url))
210
211 logger_cli.info("# Gathering repos info (i.e. links to 'packages.gz')")
212 # init repoinfo archive
213 _repotgz = TGZFile(self._repofile)
214 # prepare repo links
215 _repos = {}
216 if tag:
217 # only one tag to process
218 _repos.update(self._find_tag(tag, base_url))
219 _repos.update(self._find_tag(tag, base_url, label="hotfix"))
220 _repos.update(self._find_tag(tag, base_url, label="update"))
221 else:
222 # gather all of them
223 _tags, _ = self._ls_repo_page(base_url)
Alex3bc95f62020-03-05 17:00:04 -0600224 if "hotfix" in _tags:
225 _tags.remove('hotfix')
226 if "update" in _tags:
227 _tags.remove('update')
Alexd9fd85e2019-05-16 16:58:24 -0500228 # search tags in subfolders
229 _h_tags, _ = self._ls_repo_page(base_url + 'hotfix')
230 _u_tags, _ = self._ls_repo_page(base_url + 'update')
231 _tags.extend([t for t in _h_tags if t not in _tags])
232 _tags.extend([t for t in _u_tags if t not in _tags])
233 _progress = Progress(len(_tags))
234 _index = 0
235 for _tag in _tags:
236 _repos.update(self._find_tag(_tag, base_url))
237 _repos.update(self._find_tag(_tag, base_url, label="hotfix"))
238 _repos.update(self._find_tag(_tag, base_url, label="update"))
239 _index += 1
240 _progress.write_progress(_index)
241 _progress.end()
242
243 # parse subtags
244 for _label in _repos.keys():
245 logger_cli.info("-> processing tag '{}'".format(_label))
246 _name = _label + ".json"
247 if _repotgz.has_file(_name):
248 logger_cli.info(
249 "-> skipping, '{}' already has '{}'".format(
250 _repos_info_archive,
251 _name
252 )
253 )
254 continue
255 # process the tag
256 _repo = _repos[_label]
257 _baseurl = _repos[_label]["baseurl"]
258 # get the subtags
259 _sub_tags, _ = self._ls_repo_page(_baseurl)
260 _total_index = len(_sub_tags)
261 _index = 0
262 _progress = Progress(_total_index)
263 logger.debug(
264 "... found {} subtags for '{}'".format(
265 len(_sub_tags),
266 _label
267 )
268 )
269 # save the url and start search
270 for _stag in _sub_tags:
271 _u = _baseurl + _stag
272 _index += 1
273 logger.debug(
274 "... searching repos in '{}/{}'".format(
275 _label,
276 _stag
277 )
278 )
279
280 # Searching Package collections
281 if _stag in ubuntu_releases:
282 # if stag is the release, this is all packages
283 _repo["all"][_stag] = []
284 _repo["all"]["url"] = _n_url(_u)
285 _path_list = self.search_pkg(_n_url(_u), [])
286 self._map_repo(_path_list, _repo["all"][_stag])
287 logger.info(
288 "-> found {} dists".format(
289 len(_repo["all"][_stag])
290 )
291 )
292
293 else:
294 # each subtag might have any ubuntu release
295 # so iterate them
296 _repo[_stag] = {
297 "url": _n_url(_u)
298 }
299 _releases, _ = self._ls_repo_page(_n_url(_u))
300 for _rel in _releases:
301 if _rel not in ubuntu_releases:
302 logger.debug(
303 "... skipped unknown ubuntu release: "
304 "'{}' in '{}'".format(
305 _rel,
306 _u
307 )
308 )
309 else:
310 _rel_u = _n_url(_u) + _rel
311 _repo[_stag][_rel] = []
312 _path_list = self.search_pkg(_n_url(_rel_u), [])
313 self._map_repo(
314 _path_list,
315 _repo[_stag][_rel]
316 )
317 logger.info(
318 "-> found {} dists for '{}'".format(
319 len(_repo[_stag][_rel]),
320 _rel
321 )
322 )
323 _progress.write_progress(_index)
324
325 _progress.end()
326 _name = _label + ext
327 _repotgz.add_file(_name, buf=json.dumps(_repo, indent=2))
328 logger_cli.info(
329 "-> archive '{}' updated with '{}'".format(
330 self._repofile,
331 _name
332 )
333 )
334
335 return
336
Alex74dc1352019-05-17 13:18:24 -0500337 def list_tags(self, splitted=False):
Alexd9fd85e2019-05-16 16:58:24 -0500338 _files = TGZFile(self._repofile).list_files()
339 # all files in archive with no '.json' part
340 _all = set([f.rsplit('.', 1)[0] for f in _files])
Alex74dc1352019-05-17 13:18:24 -0500341 if splitted:
342 # files that ends with '.update'
343 _updates = set([f for f in _all if f.find('update') >= 0])
344 # files that ends with '.hotfix'
345 _hotfix = set([f for f in _all if f.find('hotfix') >= 0])
346 # remove updates and hotfix tags from all. The true magic of SETs
347 _all = _all - _updates - _hotfix
348 # cut updates and hotfix endings
349 _updates = [f.rsplit('.', 1)[0] for f in _updates]
350 _hotfix = [f.rsplit('.', 1)[0] for f in _hotfix]
Alexd9fd85e2019-05-16 16:58:24 -0500351
Alex74dc1352019-05-17 13:18:24 -0500352 return _all, _updates, _hotfix
353 else:
354 # dynamic import
355 import re
356 _all = list(_all)
357 # lexical tags
358 _lex = [s for s in _all if not s[0].isdigit()]
359 _lex.sort()
360 # tags with digits
361 _dig = [s for s in _all if s[0].isdigit()]
362 _dig = sorted(
363 _dig,
Alexd0391d42019-05-21 18:48:55 -0500364 key=lambda x: tuple(int(i) for i in re.findall(r"\d+", x)[:3])
Alex74dc1352019-05-17 13:18:24 -0500365 )
366
367 return _dig + _lex
Alexd9fd85e2019-05-16 16:58:24 -0500368
369 def get_repoinfo(self, tag):
370 _tgz = TGZFile(self._repofile)
Alex3bc95f62020-03-05 17:00:04 -0600371 _buf = _tgz.get_file(tag + ext, decode=True)
Alexd9fd85e2019-05-16 16:58:24 -0500372 return json.loads(_buf)
373
374
375class RepoManager(object):
Alex3bc95f62020-03-05 17:00:04 -0600376 init_done = False
Alexd9fd85e2019-05-16 16:58:24 -0500377
Alex3bc95f62020-03-05 17:00:04 -0600378 def _init_folders(self, arch_folder=None):
Alex9a4ad212020-10-01 18:04:25 -0500379 logger_cli.info("# Loading package versions data")
Alex3bc95f62020-03-05 17:00:04 -0600380 # overide arch folder if needed
381 if arch_folder:
382 self._arch_folder = arch_folder
383 else:
384 self._arch_folder = os.path.join(pkg_dir, "versions")
Alexd9fd85e2019-05-16 16:58:24 -0500385
Alex3bc95f62020-03-05 17:00:04 -0600386 self._versions_arch = os.path.join(
387 self._arch_folder,
388 _repos_versions_archive
389 )
390 self._desc_arch = os.path.join(self._arch_folder, _pkg_desc_archive)
Alexd0391d42019-05-21 18:48:55 -0500391
Alex3bc95f62020-03-05 17:00:04 -0600392 def _init_vars(self, info_class):
393 # RepoInfo instance init
394 if info_class:
395 self._info_class = info_class
396 else:
397 self._info_class = ReposInfo()
398 # archives
399 self._apps_filename = "apps.json"
Alexd9fd85e2019-05-16 16:58:24 -0500400
Alex3bc95f62020-03-05 17:00:04 -0600401 # repository index
402 self._repo_index = {}
403 self._mainteiners_index = {}
404
405 self._apps = {}
406
407 # init package versions storage
408 self._versions_mirantis = {}
409 self._versions_other = {}
410
411 def _init_archives(self):
Alexd9fd85e2019-05-16 16:58:24 -0500412 # Init version files
413 self.versionstgz = TGZFile(
414 self._versions_arch,
415 label="MCP Configuration Checker: Package versions archive"
416 )
417 self.desctgz = TGZFile(
418 self._desc_arch,
419 label="MCP Configuration Checker: Package descriptions archive"
420 )
Alexd0391d42019-05-21 18:48:55 -0500421
422 # section / app
423 self._apps = _safe_load(
424 self._apps_filename,
425 self.desctgz
426 )
427
Alex0ed4f762019-05-17 17:55:33 -0500428 # indices
429 self._repo_index = _safe_load(
430 _repos_index_filename,
431 self.versionstgz
432 )
433 self._mainteiners_index = _safe_load(
434 _mainteiners_index_filename,
435 self.versionstgz
436 )
Alexd9fd85e2019-05-16 16:58:24 -0500437
Alex0ed4f762019-05-17 17:55:33 -0500438 # versions
439 self._versions_mirantis = _safe_load(
440 _mirantis_versions_filename,
441 self.versionstgz
442 )
443 self._versions_other = _safe_load(
444 _other_versions_filename,
445 self.versionstgz
446 )
Alexd9fd85e2019-05-16 16:58:24 -0500447
Alex3bc95f62020-03-05 17:00:04 -0600448 def __init__(self, arch_folder=None, info_class=None):
449 # Perform inits
450 self._init_vars(info_class)
451 self._init_folders(arch_folder)
452 # Ensure that versions folder exists
453 logger_cli.debug(ensure_folder_exists(self._arch_folder))
454 # Preload/create archives
455 self._init_archives()
456 self.init_done = True
457
458 def __call__(self, *args, **kwargs):
459 if self.init_done:
460 return self
461 else:
462 return self.__init__(self, *args, **kwargs)
463
Alexd9fd85e2019-05-16 16:58:24 -0500464 def _create_repo_header(self, p):
465 _header = "_".join([
466 p['tag'],
467 p['subset'],
468 p['release'],
469 p['ubuntu-release'],
470 p['type'],
471 p['arch']
472 ])
Alex0ed4f762019-05-17 17:55:33 -0500473 return _get_value_index(self._repo_index, p, header=_header)
Alexd9fd85e2019-05-16 16:58:24 -0500474
Alex0ed4f762019-05-17 17:55:33 -0500475 def _get_indexed_values(self, pair):
476 _h, _m = pair.split('-')
477 return self._repo_index[_h], self._mainteiners_index[_m]
Alexd9fd85e2019-05-16 16:58:24 -0500478
Alexd0391d42019-05-21 18:48:55 -0500479 def _update_pkg_version(self, _d, n, v, md5, s, a, h_index, m_index):
Alexd9fd85e2019-05-16 16:58:24 -0500480 """Method updates package version record in global dict
481 """
482 # 'if'*4 operation is pretty expensive when using it 100k in a row
483 # so try/except is a better way to go, even faster than 'reduce'
Alex0ed4f762019-05-17 17:55:33 -0500484 _pair = "-".join([h_index, m_index])
Alexd0391d42019-05-21 18:48:55 -0500485 _info = {
486 'repo': [_pair],
487 'section': s,
488 'app': a
489 }
Alexd9fd85e2019-05-16 16:58:24 -0500490 try:
491 # try to load list
Alexd0391d42019-05-21 18:48:55 -0500492 _list = _d[n][v][md5]['repo']
Alexd9fd85e2019-05-16 16:58:24 -0500493 # cast it as set() and union()
Alex0ed4f762019-05-17 17:55:33 -0500494 _list = set(_list).union([_pair])
Alexd9fd85e2019-05-16 16:58:24 -0500495 # cast back as set() is not serializeable
Alexd0391d42019-05-21 18:48:55 -0500496 _d[n][v][md5]['repo'] = list(_list)
Alexd9fd85e2019-05-16 16:58:24 -0500497 return False
498 except KeyError:
499 # ok, this is fresh pkg. Do it slow way.
Alex0ed4f762019-05-17 17:55:33 -0500500 if n in _d:
Alexd9fd85e2019-05-16 16:58:24 -0500501 # there is such pkg already
Alex0ed4f762019-05-17 17:55:33 -0500502 if v in _d[n]:
Alexd9fd85e2019-05-16 16:58:24 -0500503 # there is such version, check md5
Alex0ed4f762019-05-17 17:55:33 -0500504 if md5 in _d[n][v]:
Alexd9fd85e2019-05-16 16:58:24 -0500505 # just add new repo header
Alexd0391d42019-05-21 18:48:55 -0500506 if _pair not in _d[n][v][md5]['repo']:
507 _d[n][v][md5]['repo'].append(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500508 else:
509 # check if such index is here...
510 _existing = filter(
Alexd0391d42019-05-21 18:48:55 -0500511 lambda i: _pair in _d[n][v][i]['repo'],
Alex0ed4f762019-05-17 17:55:33 -0500512 _d[n][v]
Alexd9fd85e2019-05-16 16:58:24 -0500513 )
514 if _existing:
515 # Yuck! Same version had different MD5
Alex0ed4f762019-05-17 17:55:33 -0500516 _r, _m = self._get_indexed_values(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500517 logger_cli.error(
518 "# ERROR: Package version has multiple MD5s "
519 "in '{}': {}:{}:{}".format(
Alex0ed4f762019-05-17 17:55:33 -0500520 _r,
Alexd9fd85e2019-05-16 16:58:24 -0500521 n,
522 v,
523 md5
524 )
525 )
Alexd0391d42019-05-21 18:48:55 -0500526 _d[n][v][md5] = _info
Alexd9fd85e2019-05-16 16:58:24 -0500527 else:
528 # this is new version for existing package
Alex0ed4f762019-05-17 17:55:33 -0500529 _d[n][v] = {
Alexd0391d42019-05-21 18:48:55 -0500530 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500531 }
532 return False
533 else:
534 # this is new pakcage
Alex0ed4f762019-05-17 17:55:33 -0500535 _d[n] = {
Alexd9fd85e2019-05-16 16:58:24 -0500536 v: {
Alexd0391d42019-05-21 18:48:55 -0500537 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500538 }
539 }
540 return True
541
542 def _save_repo_descriptions(self, repo_props, desc):
543 # form the filename for the repo and save it
544 self.desctgz.add_file(
545 self._create_repo_header(repo_props),
546 json.dumps(desc)
547 )
548
549 # def get_description(self, repo_props, name, md5=None):
550 # """Gets target description
551 # """
552 # _filename = self._create_repo_header(repo_props)
553 # # check if it is present in cache
554 # if _filename in self._desc_cache:
555 # _descs = self._desc_cache[_filename]
556 # else:
557 # # load data
558 # _descs = self.desctgz.get_file(_filename)
559 # # Serialize it
560 # _descs = json.loads(_descs)
561 # self._desc_cache[_filename] = _descs
562 # # return target desc
563 # if name in _descs and md5 in _descs[name]:
564 # return _descs[name][md5]
565 # else:
566 # return None
567
Alexd0391d42019-05-21 18:48:55 -0500568 def parse_tag(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500569 """Download and parse Package.gz files for specific tag
570 By default, descriptions not saved
571 due to huge resulting file size and slow processing
572 """
573 # init gzip and downloader
Alex3bc95f62020-03-05 17:00:04 -0600574 _info = self._info_class.get_repoinfo(tag)
Alexd9fd85e2019-05-16 16:58:24 -0500575 # calculate Packages.gz files to process
576 _baseurl = _info.pop("baseurl")
577 _total_components = len(_info.keys()) - 1
578 _ubuntu_package_repos = 0
579 _other_repos = 0
Alex3bc95f62020-03-05 17:00:04 -0600580 for _c, _d in _info.items():
581 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500582 if _ur in ubuntu_releases:
583 _ubuntu_package_repos += len(_l)
584 elif _ur != 'url':
585 _other_repos += len(_l)
586 logger_cli.info(
587 "-> loaded repository info for '{}'.\n"
588 " '{}', {} components, {} ubuntu repos, {} other/uknown".format(
589 _baseurl,
590 tag,
591 _total_components,
592 _ubuntu_package_repos,
593 _other_repos
594 )
595 )
596 # init progress bar
597 _progress = Progress(_ubuntu_package_repos)
598 _index = 0
599 _processed = 0
600 _new = 0
Alex3bc95f62020-03-05 17:00:04 -0600601 for _c, _d in _info.items():
Alexd9fd85e2019-05-16 16:58:24 -0500602 # we do not need url here, just get rid of it
603 if 'url' in _d:
604 _d.pop('url')
605 # _url = if 'url' in _d else _baseurl + _c
Alex3bc95f62020-03-05 17:00:04 -0600606 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500607 # iterate package collections
608 for _p in _l:
609 # descriptions
610 if descriptions:
611 _descriptions = {}
612 # download and unzip
Alexd0391d42019-05-21 18:48:55 -0500613 _index += 1
614 _progress.write_progress(
615 _index,
616 note="/ {} {} {} {} {}, GET 'Packages.gz'".format(
617 _c,
618 _ur,
619 _p['ubuntu-release'],
620 _p['type'],
621 _p['arch']
622 )
623 )
624 _raw = get_gzipped_file(_p['filepath'])
625 if not _raw:
626 # empty repo...
627 _progress.clearline()
628 logger_cli.warning(
629 "# WARNING: Empty file: '{}'".format(
630 _p['filepath']
631 )
632 )
633 continue
Alex3bc95f62020-03-05 17:00:04 -0600634 else:
635 _raw = _raw.decode("utf-8")
Alexd9fd85e2019-05-16 16:58:24 -0500636 _progress.write_progress(
637 _index,
638 note="/ {} {} {} {} {}, {}/{}".format(
639 _c,
640 _ur,
641 _p['ubuntu-release'],
642 _p['type'],
643 _p['arch'],
644 _processed,
645 _new
646 )
647 )
Alexd9fd85e2019-05-16 16:58:24 -0500648 _lines = _raw.splitlines()
Alexd9fd85e2019-05-16 16:58:24 -0500649 # break lines collection into isolated pkg data
650 _pkg = {
651 "tag": tag,
652 "subset": _c,
653 "release": _ur
654 }
655 _pkg.update(_p)
656 _desc = {}
657 _key = _value = ""
Alexd0391d42019-05-21 18:48:55 -0500658 # if there is no empty line at end, add it
659 if _lines[-1] != '':
660 _lines.append('')
661 # Process lines
Alexd9fd85e2019-05-16 16:58:24 -0500662 for _line in _lines:
663 if not _line:
664 # if the line is empty, process pkg data gathered
665 _name = _desc['package']
666 _md5 = _desc['md5sum']
667 _version = _desc['version']
Alex0ed4f762019-05-17 17:55:33 -0500668 _mainteiner = _desc['maintainer']
669
Alexd0391d42019-05-21 18:48:55 -0500670 if 'source' in _desc:
671 _ap = _desc['source'].lower()
672 else:
673 _ap = "-"
674
675 if apps:
676 # insert app
677 _sc = _desc['section'].lower()
678 if 'source' in _desc:
679 _ap = _desc['source'].lower()
680 else:
681 _ap = "-"
682
683 try:
684 _tmp = set(self._apps[_sc][_ap][_name])
685 _tmp.add(_desc['architecture'])
686 self._apps[_sc][_ap][_name] = list(_tmp)
687 except KeyError:
688 nested_set(
689 self._apps,
690 [_sc, _ap, _name],
691 [_desc['architecture']]
692 )
693
Alex0ed4f762019-05-17 17:55:33 -0500694 # Check is mainteiner is Mirantis
695 if _mainteiner.endswith("@mirantis.com>"):
696 # update mirantis versions
697 if self._update_pkg_version(
698 self._versions_mirantis,
699 _name,
700 _version,
701 _md5,
Alexd0391d42019-05-21 18:48:55 -0500702 _desc['section'].lower(),
703 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500704 self._create_repo_header(_pkg),
705 _get_value_index(
706 self._mainteiners_index,
707 _mainteiner
708 )
709 ):
710 _new += 1
711 else:
712 # update other versions
713 if self._update_pkg_version(
714 self._versions_other,
715 _name,
716 _version,
717 _md5,
Alexd0391d42019-05-21 18:48:55 -0500718 _desc['section'].lower(),
719 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500720 self._create_repo_header(_pkg),
721 _get_value_index(
722 self._mainteiners_index,
723 _mainteiner
724 )
725 ):
726 _new += 1
Alexd9fd85e2019-05-16 16:58:24 -0500727
728 if descriptions:
729 _d_new = {
730 _md5: deepcopy(_desc)
731 }
732 try:
733 _descriptions[_name].update(_d_new)
734 except KeyError:
735 _descriptions[_name] = _d_new
736 # clear the data for next pkg
737 _processed += 1
738 _desc = {}
739 _key = ""
740 _value = ""
741 elif _line.startswith(' '):
742 _desc[_key] += "\n{}".format(_line)
743 else:
744 _key, _value = _line.split(': ', 1)
745 _key = _key.lower()
746
747 _desc[_key] = _value
748 # save descriptions if needed
749 if descriptions:
750 _progress.clearline()
751 self._save_repo_descriptions(_pkg, _descriptions)
752
753 _progress.end()
754 # backup headers to disk
755 self.versionstgz.add_file(
Alex0ed4f762019-05-17 17:55:33 -0500756 _repos_index_filename,
Alexd9fd85e2019-05-16 16:58:24 -0500757 json.dumps(self._repo_index),
758 replace=True
759 )
Alex0ed4f762019-05-17 17:55:33 -0500760 self.versionstgz.add_file(
761 _mainteiners_index_filename,
762 json.dumps(self._mainteiners_index),
763 replace=True
764 )
Alexd0391d42019-05-21 18:48:55 -0500765 if apps:
766 self.desctgz.add_file(
767 self._apps_filename,
768 json.dumps(self._apps),
769 replace=True
770 )
771
Alexd9fd85e2019-05-16 16:58:24 -0500772 return
773
Alexd0391d42019-05-21 18:48:55 -0500774 def fetch_versions(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500775 """Executes parsing for specific tag
776 """
777 if descriptions:
778 logger_cli.warning(
779 "\n\n# !!! WARNING: Saving repo descriptions "
780 "consumes huge amount of disk space\n\n"
781 )
782 # if there is no such tag, parse it from repoinfo
Alexd9fd85e2019-05-16 16:58:24 -0500783 logger_cli.info("# Fetching versions for {}".format(tag))
Alexd0391d42019-05-21 18:48:55 -0500784 self.parse_tag(tag, descriptions=descriptions, apps=apps)
Alex0ed4f762019-05-17 17:55:33 -0500785 logger_cli.info("-> saving updated versions")
786 self.versionstgz.add_file(
787 _mirantis_versions_filename,
788 json.dumps(self._versions_mirantis),
789 replace=True
790 )
791 self.versionstgz.add_file(
792 _other_versions_filename,
793 json.dumps(self._versions_other),
794 replace=True
795 )
Alexd9fd85e2019-05-16 16:58:24 -0500796
797 def build_repos(self, url, tag=None):
798 """Builds versions data for selected tag, or for all of them
799 """
Alexd9fd85e2019-05-16 16:58:24 -0500800 # recoursively walk the mirrors
801 # and gather all of the repos for 'tag' or all of the tags
Alex3bc95f62020-03-05 17:00:04 -0600802 self._info_class.fetch_repos(url, tag=tag)
Alexd9fd85e2019-05-16 16:58:24 -0500803
Alex74dc1352019-05-17 13:18:24 -0500804 def _build_action(self, url, tags):
805 for t in tags:
Alex6df29ad2019-05-31 17:55:32 -0500806 logger_cli.info("# Building repo info for '{}'".format(t))
Alex74dc1352019-05-17 13:18:24 -0500807 self.build_repos(url, tag=t)
808
Alexd0391d42019-05-21 18:48:55 -0500809 def get_available_tags(self, tag=None):
810 # Populate action tags
Alex3bc95f62020-03-05 17:00:04 -0600811 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd0391d42019-05-21 18:48:55 -0500812
813 _tags = []
814 if tag in major:
815 _tags.append(tag)
816 if tag in updates:
817 _tags.append(tag + ".update")
818 if tag in hotfix:
819 _tags.append(tag + ".hotfix")
820
821 return _tags
822
Alexd9fd85e2019-05-16 16:58:24 -0500823 def action_for_tag(
824 self,
825 url,
826 tag,
827 action=None,
Alexd0391d42019-05-21 18:48:55 -0500828 descriptions=None,
829 apps=None
Alexd9fd85e2019-05-16 16:58:24 -0500830 ):
831 """Executes action for every tag from all collections
832 """
833 if not action:
834 logger_cli.info("# No action set, nothing to do")
Alex74dc1352019-05-17 13:18:24 -0500835 # See if this is a list action
Alexd9fd85e2019-05-16 16:58:24 -0500836 if action == "list":
Alex3bc95f62020-03-05 17:00:04 -0600837 _all = self._info_class.list_tags()
Alex6df29ad2019-05-31 17:55:32 -0500838 if _all:
839 # Print pretty list and exit
840 logger_cli.info("# Tags available at '{}':".format(url))
841 for t in _all:
842 _ri = self._repo_index
843 _isparsed = any(
Alex3bc95f62020-03-05 17:00:04 -0600844 [k for k, v in _ri.items()
Alex6df29ad2019-05-31 17:55:32 -0500845 if v['props']['tag'] == t]
846 )
847 if _isparsed:
848 logger_cli.info(get_tag_label(t, parsed=True))
849 else:
850 logger_cli.info(get_tag_label(t))
851 else:
852 logger_cli.info("# Not tags parsed yet for '{}':".format(url))
853
Alex74dc1352019-05-17 13:18:24 -0500854 # exit
Alexd9fd85e2019-05-16 16:58:24 -0500855 return
Alex74dc1352019-05-17 13:18:24 -0500856
Alex6df29ad2019-05-31 17:55:32 -0500857 if action == "build":
858 self._build_action(url, [tag])
859
Alexd0391d42019-05-21 18:48:55 -0500860 # Populate action tags
861 _action_tags = self.get_available_tags(tag)
862
Alexd9fd85e2019-05-16 16:58:24 -0500863 if not _action_tags:
864 logger_cli.info(
865 "# Tag of '{}' not found. "
866 "Consider rebuilding repos info.".format(tag)
867 )
Alex74dc1352019-05-17 13:18:24 -0500868 else:
Alexd9fd85e2019-05-16 16:58:24 -0500869 logger_cli.info(
Alex74dc1352019-05-17 13:18:24 -0500870 "-> tags to process: {}".format(
Alexd9fd85e2019-05-16 16:58:24 -0500871 ", ".join(_action_tags)
872 )
873 )
Alex74dc1352019-05-17 13:18:24 -0500874 # Execute actions
Alex6df29ad2019-05-31 17:55:32 -0500875 if action == "fetch":
Alexd9fd85e2019-05-16 16:58:24 -0500876 for t in _action_tags:
Alexd0391d42019-05-21 18:48:55 -0500877 self.fetch_versions(t, descriptions=descriptions, apps=apps)
Alexd9fd85e2019-05-16 16:58:24 -0500878
879 logger_cli.info("# Done.")
880
Alex74dc1352019-05-17 13:18:24 -0500881 def show_package(self, name):
882 # get the package data
883 _p = self.get_package_versions(name)
884 if not _p:
885 logger_cli.warning(
886 "# WARNING: Package '{}' not found".format(name)
887 )
888 else:
889 # print package info using sorted tags from headers
890 # Package: name
891 # [u/h] tag \t <version>
892 # \t <version>
893 # <10symbols> \t <md5> \t sorted headers with no tag
894 # ...
Alexd0391d42019-05-21 18:48:55 -0500895 # section
Alex92e07ce2019-05-31 16:00:03 -0500896 for _s in sorted(_p):
Alexd0391d42019-05-21 18:48:55 -0500897 # app
Alex92e07ce2019-05-31 16:00:03 -0500898 for _a in sorted(_p[_s]):
Alexcf91b182019-05-31 11:57:07 -0500899 _o = ""
900 _mm = []
Alexd0391d42019-05-21 18:48:55 -0500901 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500902 for _v in sorted(_p[_s][_a]):
Alexd0391d42019-05-21 18:48:55 -0500903 _o += "\n" + " "*8 + _v + ':\n'
904 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500905 for _md5 in sorted(_p[_s][_a][_v]):
Alexd0391d42019-05-21 18:48:55 -0500906 _o += " "*16 + _md5 + "\n"
907 # get and sort repo headers
Alex92e07ce2019-05-31 16:00:03 -0500908 for _r in sorted(_p[_s][_a][_v][_md5]):
Alexcf91b182019-05-31 11:57:07 -0500909 _o += " "*24 + _r.replace('_', ' ') + '\n'
910 _m = _p[_s][_a][_v][_md5][_r]["maintainer"]
911 if _m not in _mm:
912 _mm.append(_m)
Alex74dc1352019-05-17 13:18:24 -0500913
Alexcf91b182019-05-31 11:57:07 -0500914 logger_cli.info(
915 "\n# Package: {}/{}/{}\nMaintainers: {}".format(
916 _s,
917 _a,
918 name,
919 ", ".join(_mm)
920 )
921 )
922
923 logger_cli.info(_o)
Alex74dc1352019-05-17 13:18:24 -0500924
Alexd0391d42019-05-21 18:48:55 -0500925 @staticmethod
926 def get_apps(versions, name):
927 _all = True if name == '*' else False
Alexcf91b182019-05-31 11:57:07 -0500928 _s_max = _a_max = _p_max = _v_max = 0
Alexd0391d42019-05-21 18:48:55 -0500929 _rows = []
930 for _p in versions.keys():
931 _vs = versions[_p]
Alex3bc95f62020-03-05 17:00:04 -0600932 for _v, _d1 in _vs.items():
933 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -0500934 if _all or name == _info['app']:
935 _s_max = max(len(_info['section']), _s_max)
936 _a_max = max(len(_info['app']), _a_max)
Alexcf91b182019-05-31 11:57:07 -0500937 _p_max = max(len(_p), _p_max)
938 _v_max = max(len(_v), _v_max)
Alexd0391d42019-05-21 18:48:55 -0500939 _rows.append([
940 _info['section'],
941 _info['app'],
Alexcf91b182019-05-31 11:57:07 -0500942 _p,
943 _v,
944 _md5,
945 len(_info['repo'])
Alexd0391d42019-05-21 18:48:55 -0500946 ])
Alexcf91b182019-05-31 11:57:07 -0500947 # format columns
948 # section
949 _fmt = "{:"+str(_s_max)+"} "
950 # app
951 _fmt += "{:"+str(_a_max)+"} "
952 # package name
953 _fmt += "{:"+str(_p_max)+"} "
954 # version
955 _fmt += "{:"+str(_v_max)+"} "
956 # md5 and number of repos is fixed
957 _fmt += "{} in {} repos"
958
959 # fill rows
960 _rows = [_fmt.format(s, a, p, v, m, l) for s, a, p, v, m, l in _rows]
Alexd0391d42019-05-21 18:48:55 -0500961 _rows.sort()
962 return _rows
963
964 def show_app(self, name):
965 c = 0
966 rows = self.get_apps(self._versions_mirantis, name)
967 if rows:
Alexcf91b182019-05-31 11:57:07 -0500968 logger_cli.info("\n# Mirantis packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500969 logger_cli.info("\n".join(rows))
970 c += 1
971 rows = self.get_apps(self._versions_other, name)
972 if rows:
Alexcf91b182019-05-31 11:57:07 -0500973 logger_cli.info("\n# Other packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500974 logger_cli.info("\n".join(rows))
975 c += 1
976 if c == 0:
977 logger_cli.info("\n# No app found for '{}'".format(name))
978
979 def get_mirantis_pkg_names(self):
980 # Mirantis maintainers only
981 return set(
982 self._versions_mirantis.keys()
983 ) - set(
984 self._versions_other.keys()
985 )
986
987 def get_other_pkg_names(self):
988 # Non-mirantis Maintainers
989 return set(
990 self._versions_other.keys()
991 ) - set(
992 self._versions_mirantis.keys()
993 )
994
995 def get_mixed_pkg_names(self):
996 # Mixed maintainers
997 return set(
998 self._versions_mirantis.keys()
999 ).intersection(set(
1000 self._versions_other.keys()
1001 ))
1002
1003 def is_mirantis(self, name, tag=None):
1004 """Method checks if this package is mainteined
1005 by mirantis in target tag repo
1006 """
1007 if name in self._versions_mirantis:
1008 # check tag
1009 if tag:
1010 _pkg = self.get_package_versions(
1011 name,
1012 tagged=True
1013 )
1014 _tags = []
1015 for s in _pkg.keys():
1016 for a in _pkg[s].keys():
1017 for t in _pkg[s][a].keys():
1018 _tags.append(t)
1019 if any([t.startswith(tag) for t in _tags]):
1020 return True
1021 else:
1022 return None
1023 else:
1024 return True
1025 elif name in self._versions_other:
1026 # check tag
1027 if tag:
1028 _pkg = self.get_package_versions(
1029 name,
1030 tagged=True
1031 )
1032 _tags = []
1033 for s in _pkg.keys():
1034 for a in _pkg[s].keys():
1035 for t in _pkg[s][a].keys():
1036 _tags.append(t)
1037 if any([t.startswith(tag) for t in _tags]):
1038 return False
1039 else:
1040 return None
1041 else:
1042 return False
1043 else:
1044 logger.error(
1045 "# ERROR: package '{}' not found "
1046 "while determining maintainer".format(
1047 name
1048 )
1049 )
1050 return None
1051
1052 def get_filtered_versions(
1053 self,
1054 name,
1055 tag=None,
1056 include=None,
1057 exclude=None
1058 ):
1059 """Method gets all the versions for the package
1060 and filters them using keys above
1061 """
1062 if tag:
Alex3bc95f62020-03-05 17:00:04 -06001063 tag = str(tag) if not isinstance(tag, str) else tag
Alexd0391d42019-05-21 18:48:55 -05001064 _out = {}
1065 _vs = self.get_package_versions(name, tagged=True)
1066 # iterate to filter out keywords
Alex3bc95f62020-03-05 17:00:04 -06001067 for s, apps in _vs.items():
1068 for a, _tt in apps.items():
1069 for t, vs in _tt.items():
Alexd0391d42019-05-21 18:48:55 -05001070 # filter tags
1071 if tag and t != tag and t.rsplit('.', 1)[0] != tag:
1072 continue
1073 # Skip hotfix tag
1074 if t == tag + ".hotfix":
1075 continue
Alex3bc95f62020-03-05 17:00:04 -06001076 for v, rp in vs.items():
1077 for h, p in rp.items():
Alexd0391d42019-05-21 18:48:55 -05001078 # filter headers with all keywords matching
1079 _h = re.split(r"[\-\_]+", h)
1080 _included = all([kw in _h for kw in include])
1081 _excluded = any([kw in _h for kw in exclude])
1082 if not _included or _excluded:
1083 continue
1084 else:
1085 nested_set(_out, [s, a, v], [])
1086 _dat = {
1087 "header": h
1088 }
1089 _dat.update(p)
1090 _out[s][a][v].append(_dat)
1091 return _out
1092
1093 def get_package_versions(self, name, tagged=False):
Alex74dc1352019-05-17 13:18:24 -05001094 """Method builds package version structure
1095 with repository properties included
1096 """
1097 # get data
Alexd0391d42019-05-21 18:48:55 -05001098 _vs = {}
1099
1100 if name in self._versions_mirantis:
1101 _vs.update(self._versions_mirantis[name])
1102 if name in self._versions_other:
1103 _vs.update(self._versions_other[name])
Alex0ed4f762019-05-17 17:55:33 -05001104
Alex74dc1352019-05-17 13:18:24 -05001105 # insert repo data, insert props into headers place
1106 _package = {}
1107 if tagged:
Alex3bc95f62020-03-05 17:00:04 -06001108 for _v, _d1 in _vs.items():
Alex74dc1352019-05-17 13:18:24 -05001109 # use tag as a next step
Alex3bc95f62020-03-05 17:00:04 -06001110 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001111 _s = _info['section']
1112 _a = _info['app']
1113 for _pair in _info['repo']:
1114 _rp = {}
Alex74dc1352019-05-17 13:18:24 -05001115 # extract props for a repo
Alex0ed4f762019-05-17 17:55:33 -05001116 _r, _m = self._get_indexed_values(_pair)
Alex74dc1352019-05-17 13:18:24 -05001117 # get tag
Alex0ed4f762019-05-17 17:55:33 -05001118 _tag = _r["props"]["tag"]
Alex74dc1352019-05-17 13:18:24 -05001119 # cut tag from the header
Alex0ed4f762019-05-17 17:55:33 -05001120 _cut_head = _r["header"].split("_", 1)[1]
Alex74dc1352019-05-17 13:18:24 -05001121 # populate dict
Alexd0391d42019-05-21 18:48:55 -05001122 _rp["maintainer"] = _m
1123 _rp["md5"] = _md5
1124 _rp.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001125 nested_set(
1126 _package,
Alexd0391d42019-05-21 18:48:55 -05001127 [_s, _a, _tag, _v, _cut_head],
1128 _rp
Alex74dc1352019-05-17 13:18:24 -05001129 )
1130 else:
Alex3bc95f62020-03-05 17:00:04 -06001131 for _v, _d1 in _vs.items():
1132 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001133 _s = _info['section']
1134 _a = _info['app']
1135 for _pair in _info['repo']:
Alex0ed4f762019-05-17 17:55:33 -05001136 _r, _m = self._get_indexed_values(_pair)
Alexd0391d42019-05-21 18:48:55 -05001137 _info["maintainer"] = _m
1138 _info.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001139 nested_set(
1140 _package,
Alexd0391d42019-05-21 18:48:55 -05001141 [_s, _a, _v, _md5, _r["header"]],
1142 _info
Alex74dc1352019-05-17 13:18:24 -05001143 )
1144
1145 return _package
1146
Alexd9fd85e2019-05-16 16:58:24 -05001147 def parse_repos(self):
1148 # all tags to check
Alex3bc95f62020-03-05 17:00:04 -06001149 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd9fd85e2019-05-16 16:58:24 -05001150
1151 # major tags
1152 logger_cli.info("# Processing major tags")
1153 for _tag in major:
1154 self.fetch_versions(_tag)
1155
1156 # updates tags
1157 logger_cli.info("# Processing update tags")
1158 for _tag in updates:
1159 self.fetch_versions(_tag + ".update")
1160
1161 # hotfix tags
1162 logger_cli.info("# Processing hotfix tags")
1163 for _tag in hotfix:
1164 self.fetch_versions(_tag + ".hotfix")