blob: 824275deb5ad6b3a96a589ca654a3a074b9c921e [file] [log] [blame]
Alexd9fd85e2019-05-16 16:58:24 -05001import json
2import os
Alexd0391d42019-05-21 18:48:55 -05003import re
Alexd9fd85e2019-05-16 16:58:24 -05004from copy import deepcopy
5
Alex74dc1352019-05-17 13:18:24 -05006from cfg_checker.common import logger, logger_cli, nested_set
Alex0ed4f762019-05-17 17:55:33 -05007from cfg_checker.common.const import _mainteiners_index_filename
8from cfg_checker.common.const import _mirantis_versions_filename
9from cfg_checker.common.const import _other_versions_filename
Alexd9fd85e2019-05-16 16:58:24 -050010from cfg_checker.common.const import _pkg_desc_archive
11from cfg_checker.common.const import _repos_index_filename
12from cfg_checker.common.const import _repos_info_archive
13from cfg_checker.common.const import _repos_versions_archive
Alexd9fd85e2019-05-16 16:58:24 -050014from cfg_checker.common.const import ubuntu_releases
Alex7f69a6a2019-05-31 16:53:35 -050015from cfg_checker.common.file_utils import ensure_folder_exists
Alexd9fd85e2019-05-16 16:58:24 -050016from cfg_checker.common.file_utils import get_gzipped_file
17from cfg_checker.common.settings import pkg_dir
18from cfg_checker.helpers.console_utils import Progress
19from cfg_checker.helpers.tgz import TGZFile
20
21import requests
22from requests.exceptions import ConnectionError
23
24ext = ".json"
25
26
Alex0ed4f762019-05-17 17:55:33 -050027def get_tag_label(_tag, parsed=False):
Alex74dc1352019-05-17 13:18:24 -050028 # prettify the tag for printing
Alex0ed4f762019-05-17 17:55:33 -050029 if parsed:
30 _label = "+ "
31 else:
32 _label = " "
33
Alex74dc1352019-05-17 13:18:24 -050034 if _tag.endswith(".update"):
35 _label += "[updates] " + _tag.rsplit('.', 1)[0]
36 elif _tag.endswith(".hotfix"):
37 _label += " [hotfix] " + _tag.rsplit('.', 1)[0]
38 else:
39 _label += " "*10 + _tag
Alex0ed4f762019-05-17 17:55:33 -050040
Alex74dc1352019-05-17 13:18:24 -050041 return _label
42
43
Alex0ed4f762019-05-17 17:55:33 -050044def _get_value_index(_di, value, header=None):
Alex29ee76f2019-05-17 18:52:29 -050045 # Mainteiner names often uses specific chars
Alex3bc95f62020-03-05 17:00:04 -060046 # so make sure that value saved is str not str
47 # Python2
48 # _val = str(value, 'utf-8') if isinstance(value, str) else value
49 # Python3 has always utf-8 decoded value
50 _val = value
Alex0ed4f762019-05-17 17:55:33 -050051 if header:
Alex3bc95f62020-03-05 17:00:04 -060052 try:
53 _ = next(filter(lambda i: _di[i]["header"] == header, _di))
54 # iterator not empty, find index
55 for _k, _v in _di.items():
56 if _v["header"] == header:
57 _index = _k
58 except StopIteration:
59 _index = str(len(_di.keys()) + 1)
Alex0ed4f762019-05-17 17:55:33 -050060 _di[_index] = {
61 "header": header,
Alex29ee76f2019-05-17 18:52:29 -050062 "props": _val
Alex0ed4f762019-05-17 17:55:33 -050063 }
Alex3bc95f62020-03-05 17:00:04 -060064 finally:
65 return _index
Alex0ed4f762019-05-17 17:55:33 -050066 else:
Alex3bc95f62020-03-05 17:00:04 -060067 try:
68 _ = next(filter(lambda i: _di[i] == _val, _di))
69 # iterator not empty, find index
70 for _k, _v in _di.items():
Alex29ee76f2019-05-17 18:52:29 -050071 if _v == _val:
Alex0ed4f762019-05-17 17:55:33 -050072 _index = _k
Alex3bc95f62020-03-05 17:00:04 -060073 except StopIteration:
74 _index = str(len(_di.keys()) + 1)
75 # on save, cast it as str
76 _di[_index] = _val
77 finally:
78 return _index
Alex0ed4f762019-05-17 17:55:33 -050079
80
81def _safe_load(_f, _a):
82 if _f in _a.list_files():
Alexd0391d42019-05-21 18:48:55 -050083 logger_cli.debug(
84 "... loading '{}':'{}'".format(
Alex0ed4f762019-05-17 17:55:33 -050085 _a.basefile,
86 _f
87 )
88 )
Alex3bc95f62020-03-05 17:00:04 -060089 return json.loads(_a.get_file(_f, decode=True))
Alex0ed4f762019-05-17 17:55:33 -050090 else:
91 return {}
92
93
Alexd9fd85e2019-05-16 16:58:24 -050094def _n_url(url):
95 if url[-1] == '/':
96 return url
97 else:
98 return url + '/'
99
100
101class ReposInfo(object):
Alex3bc95f62020-03-05 17:00:04 -0600102 init_done = False
103
104 def _init_vars(self):
105 self.repos = []
106
107 def _init_folders(self, arch_folder=None):
108 if arch_folder:
109 self._arch_folder = arch_folder
110 self._repofile = os.path.join(arch_folder, _repos_info_archive)
111 else:
112 self._arch_folder = os.path.join(pkg_dir, "versions")
113 self._repofile = os.path.join(
114 self._arch_folder,
115 _repos_info_archive
116 )
117
118 def __init__(self, arch_folder=None):
119 # perform inits
120 self._init_vars()
121 self._init_folders(arch_folder)
122 self.init_done = True
123
124 def __call__(self, *args, **kwargs):
125 if self.init_done:
126 return self
127 else:
128 return self.__init__(self, *args, **kwargs)
Alexd9fd85e2019-05-16 16:58:24 -0500129
130 @staticmethod
131 def _ls_repo_page(url):
132 # Yes, this is ugly. But it works ok for small HTMLs.
133 _a = "<a"
134 _s = "href="
135 _e = "\">"
136 try:
137 page = requests.get(url, timeout=60)
138 except ConnectionError as e:
139 logger_cli.error("# ERROR: {}".format(e.message))
140 return [], []
141 a = page.text.splitlines()
142 # Comprehension for dirs. Anchors for ends with '-'
143 _dirs = [l[l.index(_s)+6:l.index(_e)-1]
144 for l in a if l.startswith(_a) and l.endswith('-')]
145 # Comprehension for files. Anchors ends with size
146 _files = [l[l.index(_s)+6:l.index(_e)]
147 for l in a if l.startswith(_a) and not l.endswith('-')]
148
149 return _dirs, _files
150
151 def search_pkg(self, url, _list):
152 # recoursive method to walk dists tree
153 _dirs, _files = self._ls_repo_page(url)
154
155 for _d in _dirs:
156 # Search only in dists, ignore the rest
157 if "dists" not in url and _d != "dists":
158 continue
159 _u = _n_url(url + _d)
160 self.search_pkg(_u, _list)
161
162 for _f in _files:
163 if _f == "Packages.gz":
164 _list.append(url + _f)
165 logger.debug("... [F] '{}'".format(url + _f))
166
167 return _list
168
169 @staticmethod
170 def _map_repo(_path_list, _r):
171 for _pkg_path in _path_list:
172 _l = _pkg_path.split('/')
173 _kw = _l[_l.index('dists')+1:]
174 _kw.reverse()
175 _repo_item = {
176 "arch": _kw[1][7:] if "binary" in _kw[1] else _kw[1],
177 "type": _kw[2],
178 "ubuntu-release": _kw[3],
179 "filepath": _pkg_path
180 }
181 _r.append(_repo_item)
182
183 def _find_tag(self, _t, _u, label=""):
184 if label:
185 _url = _n_url(_u + label)
186 _label = _t + '.' + label
187 else:
188 _url = _u
189 _label = _t
190 _ts, _ = self._ls_repo_page(_url)
191 if _t in _ts:
192 logger.debug(
193 "... found tag '{}' at '{}'".format(
194 _t,
195 _url
196 )
197 )
198 return {
199 _label: {
200 "baseurl": _n_url(_url + _t),
201 "all": {}
202 }
203 }
204 else:
205 return {}
206
207 def fetch_repos(self, url, tag=None):
208 base_url = _n_url(url)
209 logger_cli.info("# Using '{}' as a repos source".format(base_url))
210
211 logger_cli.info("# Gathering repos info (i.e. links to 'packages.gz')")
212 # init repoinfo archive
213 _repotgz = TGZFile(self._repofile)
214 # prepare repo links
215 _repos = {}
216 if tag:
217 # only one tag to process
218 _repos.update(self._find_tag(tag, base_url))
219 _repos.update(self._find_tag(tag, base_url, label="hotfix"))
220 _repos.update(self._find_tag(tag, base_url, label="update"))
221 else:
222 # gather all of them
223 _tags, _ = self._ls_repo_page(base_url)
Alex3bc95f62020-03-05 17:00:04 -0600224 if "hotfix" in _tags:
225 _tags.remove('hotfix')
226 if "update" in _tags:
227 _tags.remove('update')
Alexd9fd85e2019-05-16 16:58:24 -0500228 # search tags in subfolders
229 _h_tags, _ = self._ls_repo_page(base_url + 'hotfix')
230 _u_tags, _ = self._ls_repo_page(base_url + 'update')
231 _tags.extend([t for t in _h_tags if t not in _tags])
232 _tags.extend([t for t in _u_tags if t not in _tags])
233 _progress = Progress(len(_tags))
234 _index = 0
235 for _tag in _tags:
236 _repos.update(self._find_tag(_tag, base_url))
237 _repos.update(self._find_tag(_tag, base_url, label="hotfix"))
238 _repos.update(self._find_tag(_tag, base_url, label="update"))
239 _index += 1
240 _progress.write_progress(_index)
241 _progress.end()
242
243 # parse subtags
244 for _label in _repos.keys():
245 logger_cli.info("-> processing tag '{}'".format(_label))
246 _name = _label + ".json"
247 if _repotgz.has_file(_name):
248 logger_cli.info(
249 "-> skipping, '{}' already has '{}'".format(
250 _repos_info_archive,
251 _name
252 )
253 )
254 continue
255 # process the tag
256 _repo = _repos[_label]
257 _baseurl = _repos[_label]["baseurl"]
258 # get the subtags
259 _sub_tags, _ = self._ls_repo_page(_baseurl)
260 _total_index = len(_sub_tags)
261 _index = 0
262 _progress = Progress(_total_index)
263 logger.debug(
264 "... found {} subtags for '{}'".format(
265 len(_sub_tags),
266 _label
267 )
268 )
269 # save the url and start search
270 for _stag in _sub_tags:
271 _u = _baseurl + _stag
272 _index += 1
273 logger.debug(
274 "... searching repos in '{}/{}'".format(
275 _label,
276 _stag
277 )
278 )
279
280 # Searching Package collections
281 if _stag in ubuntu_releases:
282 # if stag is the release, this is all packages
283 _repo["all"][_stag] = []
284 _repo["all"]["url"] = _n_url(_u)
285 _path_list = self.search_pkg(_n_url(_u), [])
286 self._map_repo(_path_list, _repo["all"][_stag])
287 logger.info(
288 "-> found {} dists".format(
289 len(_repo["all"][_stag])
290 )
291 )
292
293 else:
294 # each subtag might have any ubuntu release
295 # so iterate them
296 _repo[_stag] = {
297 "url": _n_url(_u)
298 }
299 _releases, _ = self._ls_repo_page(_n_url(_u))
300 for _rel in _releases:
301 if _rel not in ubuntu_releases:
302 logger.debug(
303 "... skipped unknown ubuntu release: "
304 "'{}' in '{}'".format(
305 _rel,
306 _u
307 )
308 )
309 else:
310 _rel_u = _n_url(_u) + _rel
311 _repo[_stag][_rel] = []
312 _path_list = self.search_pkg(_n_url(_rel_u), [])
313 self._map_repo(
314 _path_list,
315 _repo[_stag][_rel]
316 )
317 logger.info(
318 "-> found {} dists for '{}'".format(
319 len(_repo[_stag][_rel]),
320 _rel
321 )
322 )
323 _progress.write_progress(_index)
324
325 _progress.end()
326 _name = _label + ext
327 _repotgz.add_file(_name, buf=json.dumps(_repo, indent=2))
328 logger_cli.info(
329 "-> archive '{}' updated with '{}'".format(
330 self._repofile,
331 _name
332 )
333 )
334
335 return
336
Alex74dc1352019-05-17 13:18:24 -0500337 def list_tags(self, splitted=False):
Alexd9fd85e2019-05-16 16:58:24 -0500338 _files = TGZFile(self._repofile).list_files()
339 # all files in archive with no '.json' part
340 _all = set([f.rsplit('.', 1)[0] for f in _files])
Alex74dc1352019-05-17 13:18:24 -0500341 if splitted:
342 # files that ends with '.update'
343 _updates = set([f for f in _all if f.find('update') >= 0])
344 # files that ends with '.hotfix'
345 _hotfix = set([f for f in _all if f.find('hotfix') >= 0])
346 # remove updates and hotfix tags from all. The true magic of SETs
347 _all = _all - _updates - _hotfix
348 # cut updates and hotfix endings
349 _updates = [f.rsplit('.', 1)[0] for f in _updates]
350 _hotfix = [f.rsplit('.', 1)[0] for f in _hotfix]
Alexd9fd85e2019-05-16 16:58:24 -0500351
Alex74dc1352019-05-17 13:18:24 -0500352 return _all, _updates, _hotfix
353 else:
354 # dynamic import
355 import re
356 _all = list(_all)
357 # lexical tags
358 _lex = [s for s in _all if not s[0].isdigit()]
359 _lex.sort()
360 # tags with digits
361 _dig = [s for s in _all if s[0].isdigit()]
362 _dig = sorted(
363 _dig,
Alexd0391d42019-05-21 18:48:55 -0500364 key=lambda x: tuple(int(i) for i in re.findall(r"\d+", x)[:3])
Alex74dc1352019-05-17 13:18:24 -0500365 )
366
367 return _dig + _lex
Alexd9fd85e2019-05-16 16:58:24 -0500368
369 def get_repoinfo(self, tag):
370 _tgz = TGZFile(self._repofile)
Alex3bc95f62020-03-05 17:00:04 -0600371 _buf = _tgz.get_file(tag + ext, decode=True)
Alexd9fd85e2019-05-16 16:58:24 -0500372 return json.loads(_buf)
373
374
375class RepoManager(object):
Alex3bc95f62020-03-05 17:00:04 -0600376 init_done = False
Alexd9fd85e2019-05-16 16:58:24 -0500377
Alex3bc95f62020-03-05 17:00:04 -0600378 def _init_folders(self, arch_folder=None):
Alex9a4ad212020-10-01 18:04:25 -0500379 logger_cli.info("# Loading package versions data")
Alex3bc95f62020-03-05 17:00:04 -0600380 # overide arch folder if needed
381 if arch_folder:
382 self._arch_folder = arch_folder
383 else:
384 self._arch_folder = os.path.join(pkg_dir, "versions")
Alexd9fd85e2019-05-16 16:58:24 -0500385
Alex3bc95f62020-03-05 17:00:04 -0600386 self._versions_arch = os.path.join(
387 self._arch_folder,
388 _repos_versions_archive
389 )
390 self._desc_arch = os.path.join(self._arch_folder, _pkg_desc_archive)
Alexd0391d42019-05-21 18:48:55 -0500391
Alex3bc95f62020-03-05 17:00:04 -0600392 def _init_vars(self, info_class):
393 # RepoInfo instance init
394 if info_class:
395 self._info_class = info_class
396 else:
397 self._info_class = ReposInfo()
398 # archives
399 self._apps_filename = "apps.json"
Alexd9fd85e2019-05-16 16:58:24 -0500400
Alex3bc95f62020-03-05 17:00:04 -0600401 # repository index
402 self._repo_index = {}
403 self._mainteiners_index = {}
404
405 self._apps = {}
406
407 # init package versions storage
408 self._versions_mirantis = {}
409 self._versions_other = {}
410
411 def _init_archives(self):
Alexd9fd85e2019-05-16 16:58:24 -0500412 # Init version files
413 self.versionstgz = TGZFile(
414 self._versions_arch,
415 label="MCP Configuration Checker: Package versions archive"
416 )
417 self.desctgz = TGZFile(
418 self._desc_arch,
419 label="MCP Configuration Checker: Package descriptions archive"
420 )
Alexd0391d42019-05-21 18:48:55 -0500421
422 # section / app
423 self._apps = _safe_load(
424 self._apps_filename,
425 self.desctgz
426 )
427
Alex0ed4f762019-05-17 17:55:33 -0500428 # indices
429 self._repo_index = _safe_load(
430 _repos_index_filename,
431 self.versionstgz
432 )
433 self._mainteiners_index = _safe_load(
434 _mainteiners_index_filename,
435 self.versionstgz
436 )
Alexd9fd85e2019-05-16 16:58:24 -0500437
Alex0ed4f762019-05-17 17:55:33 -0500438 # versions
439 self._versions_mirantis = _safe_load(
440 _mirantis_versions_filename,
441 self.versionstgz
442 )
443 self._versions_other = _safe_load(
444 _other_versions_filename,
445 self.versionstgz
446 )
Alexd9fd85e2019-05-16 16:58:24 -0500447
Alex3bc95f62020-03-05 17:00:04 -0600448 def __init__(self, arch_folder=None, info_class=None):
449 # Perform inits
450 self._init_vars(info_class)
451 self._init_folders(arch_folder)
452 # Ensure that versions folder exists
453 logger_cli.debug(ensure_folder_exists(self._arch_folder))
454 # Preload/create archives
455 self._init_archives()
456 self.init_done = True
457
458 def __call__(self, *args, **kwargs):
459 if self.init_done:
460 return self
461 else:
462 return self.__init__(self, *args, **kwargs)
463
Alexd9fd85e2019-05-16 16:58:24 -0500464 def _create_repo_header(self, p):
465 _header = "_".join([
466 p['tag'],
467 p['subset'],
468 p['release'],
469 p['ubuntu-release'],
470 p['type'],
471 p['arch']
472 ])
Alex0ed4f762019-05-17 17:55:33 -0500473 return _get_value_index(self._repo_index, p, header=_header)
Alexd9fd85e2019-05-16 16:58:24 -0500474
Alex0ed4f762019-05-17 17:55:33 -0500475 def _get_indexed_values(self, pair):
476 _h, _m = pair.split('-')
477 return self._repo_index[_h], self._mainteiners_index[_m]
Alexd9fd85e2019-05-16 16:58:24 -0500478
Alexd0391d42019-05-21 18:48:55 -0500479 def _update_pkg_version(self, _d, n, v, md5, s, a, h_index, m_index):
Alexd9fd85e2019-05-16 16:58:24 -0500480 """Method updates package version record in global dict
481 """
482 # 'if'*4 operation is pretty expensive when using it 100k in a row
483 # so try/except is a better way to go, even faster than 'reduce'
Alex0ed4f762019-05-17 17:55:33 -0500484 _pair = "-".join([h_index, m_index])
Alexd0391d42019-05-21 18:48:55 -0500485 _info = {
486 'repo': [_pair],
487 'section': s,
488 'app': a
489 }
Alexd9fd85e2019-05-16 16:58:24 -0500490 try:
491 # try to load list
Alexd0391d42019-05-21 18:48:55 -0500492 _list = _d[n][v][md5]['repo']
Alexd9fd85e2019-05-16 16:58:24 -0500493 # cast it as set() and union()
Alex0ed4f762019-05-17 17:55:33 -0500494 _list = set(_list).union([_pair])
Alexd9fd85e2019-05-16 16:58:24 -0500495 # cast back as set() is not serializeable
Alexd0391d42019-05-21 18:48:55 -0500496 _d[n][v][md5]['repo'] = list(_list)
Alexd9fd85e2019-05-16 16:58:24 -0500497 return False
498 except KeyError:
499 # ok, this is fresh pkg. Do it slow way.
Alex0ed4f762019-05-17 17:55:33 -0500500 if n in _d:
Alexd9fd85e2019-05-16 16:58:24 -0500501 # there is such pkg already
Alex0ed4f762019-05-17 17:55:33 -0500502 if v in _d[n]:
Alexd9fd85e2019-05-16 16:58:24 -0500503 # there is such version, check md5
Alex0ed4f762019-05-17 17:55:33 -0500504 if md5 in _d[n][v]:
Alexd9fd85e2019-05-16 16:58:24 -0500505 # just add new repo header
Alexd0391d42019-05-21 18:48:55 -0500506 if _pair not in _d[n][v][md5]['repo']:
507 _d[n][v][md5]['repo'].append(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500508 else:
509 # check if such index is here...
510 _existing = filter(
Alexd0391d42019-05-21 18:48:55 -0500511 lambda i: _pair in _d[n][v][i]['repo'],
Alex0ed4f762019-05-17 17:55:33 -0500512 _d[n][v]
Alexd9fd85e2019-05-16 16:58:24 -0500513 )
514 if _existing:
515 # Yuck! Same version had different MD5
Alex0ed4f762019-05-17 17:55:33 -0500516 _r, _m = self._get_indexed_values(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500517 logger_cli.error(
518 "# ERROR: Package version has multiple MD5s "
519 "in '{}': {}:{}:{}".format(
Alex0ed4f762019-05-17 17:55:33 -0500520 _r,
Alexd9fd85e2019-05-16 16:58:24 -0500521 n,
522 v,
523 md5
524 )
525 )
Alexd0391d42019-05-21 18:48:55 -0500526 _d[n][v][md5] = _info
Alexd9fd85e2019-05-16 16:58:24 -0500527 else:
528 # this is new version for existing package
Alex0ed4f762019-05-17 17:55:33 -0500529 _d[n][v] = {
Alexd0391d42019-05-21 18:48:55 -0500530 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500531 }
532 return False
533 else:
534 # this is new pakcage
Alex0ed4f762019-05-17 17:55:33 -0500535 _d[n] = {
Alexd9fd85e2019-05-16 16:58:24 -0500536 v: {
Alexd0391d42019-05-21 18:48:55 -0500537 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500538 }
539 }
540 return True
541
542 def _save_repo_descriptions(self, repo_props, desc):
543 # form the filename for the repo and save it
544 self.desctgz.add_file(
545 self._create_repo_header(repo_props),
546 json.dumps(desc)
547 )
548
549 # def get_description(self, repo_props, name, md5=None):
550 # """Gets target description
551 # """
552 # _filename = self._create_repo_header(repo_props)
553 # # check if it is present in cache
554 # if _filename in self._desc_cache:
555 # _descs = self._desc_cache[_filename]
556 # else:
557 # # load data
558 # _descs = self.desctgz.get_file(_filename)
559 # # Serialize it
560 # _descs = json.loads(_descs)
561 # self._desc_cache[_filename] = _descs
562 # # return target desc
563 # if name in _descs and md5 in _descs[name]:
564 # return _descs[name][md5]
565 # else:
566 # return None
567
Alexd0391d42019-05-21 18:48:55 -0500568 def parse_tag(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500569 """Download and parse Package.gz files for specific tag
570 By default, descriptions not saved
571 due to huge resulting file size and slow processing
572 """
573 # init gzip and downloader
Alex3bc95f62020-03-05 17:00:04 -0600574 _info = self._info_class.get_repoinfo(tag)
Alexd9fd85e2019-05-16 16:58:24 -0500575 # calculate Packages.gz files to process
576 _baseurl = _info.pop("baseurl")
577 _total_components = len(_info.keys()) - 1
578 _ubuntu_package_repos = 0
579 _other_repos = 0
Alex3bc95f62020-03-05 17:00:04 -0600580 for _c, _d in _info.items():
581 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500582 if _ur in ubuntu_releases:
583 _ubuntu_package_repos += len(_l)
584 elif _ur != 'url':
585 _other_repos += len(_l)
586 logger_cli.info(
587 "-> loaded repository info for '{}'.\n"
588 " '{}', {} components, {} ubuntu repos, {} other/uknown".format(
589 _baseurl,
590 tag,
591 _total_components,
592 _ubuntu_package_repos,
593 _other_repos
594 )
595 )
596 # init progress bar
597 _progress = Progress(_ubuntu_package_repos)
598 _index = 0
599 _processed = 0
600 _new = 0
Alex3bc95f62020-03-05 17:00:04 -0600601 for _c, _d in _info.items():
Alexd9fd85e2019-05-16 16:58:24 -0500602 # we do not need url here, just get rid of it
603 if 'url' in _d:
604 _d.pop('url')
605 # _url = if 'url' in _d else _baseurl + _c
Alex3bc95f62020-03-05 17:00:04 -0600606 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500607 # iterate package collections
608 for _p in _l:
609 # descriptions
610 if descriptions:
611 _descriptions = {}
612 # download and unzip
Alexd0391d42019-05-21 18:48:55 -0500613 _index += 1
614 _progress.write_progress(
615 _index,
616 note="/ {} {} {} {} {}, GET 'Packages.gz'".format(
617 _c,
618 _ur,
619 _p['ubuntu-release'],
620 _p['type'],
621 _p['arch']
622 )
623 )
624 _raw = get_gzipped_file(_p['filepath'])
625 if not _raw:
626 # empty repo...
627 _progress.clearline()
628 logger_cli.warning(
629 "# WARNING: Empty file: '{}'".format(
630 _p['filepath']
631 )
632 )
633 continue
Alex3bc95f62020-03-05 17:00:04 -0600634 else:
635 _raw = _raw.decode("utf-8")
Alexd9fd85e2019-05-16 16:58:24 -0500636 _progress.write_progress(
637 _index,
638 note="/ {} {} {} {} {}, {}/{}".format(
639 _c,
640 _ur,
641 _p['ubuntu-release'],
642 _p['type'],
643 _p['arch'],
644 _processed,
645 _new
646 )
647 )
Alexd9fd85e2019-05-16 16:58:24 -0500648 _lines = _raw.splitlines()
Alexd9fd85e2019-05-16 16:58:24 -0500649 # break lines collection into isolated pkg data
650 _pkg = {
651 "tag": tag,
652 "subset": _c,
653 "release": _ur
654 }
655 _pkg.update(_p)
656 _desc = {}
657 _key = _value = ""
Alexd0391d42019-05-21 18:48:55 -0500658 # if there is no empty line at end, add it
659 if _lines[-1] != '':
660 _lines.append('')
661 # Process lines
Alexd9fd85e2019-05-16 16:58:24 -0500662 for _line in _lines:
663 if not _line:
664 # if the line is empty, process pkg data gathered
665 _name = _desc['package']
666 _md5 = _desc['md5sum']
667 _version = _desc['version']
Alex0ed4f762019-05-17 17:55:33 -0500668 _mainteiner = _desc['maintainer']
669
Alexd0391d42019-05-21 18:48:55 -0500670 if 'source' in _desc:
671 _ap = _desc['source'].lower()
672 else:
673 _ap = "-"
674
675 if apps:
676 # insert app
677 _sc = _desc['section'].lower()
678 if 'source' in _desc:
679 _ap = _desc['source'].lower()
680 else:
681 _ap = "-"
682
683 try:
684 _tmp = set(self._apps[_sc][_ap][_name])
685 _tmp.add(_desc['architecture'])
686 self._apps[_sc][_ap][_name] = list(_tmp)
687 except KeyError:
688 nested_set(
689 self._apps,
690 [_sc, _ap, _name],
691 [_desc['architecture']]
692 )
693
Alex0ed4f762019-05-17 17:55:33 -0500694 # Check is mainteiner is Mirantis
695 if _mainteiner.endswith("@mirantis.com>"):
696 # update mirantis versions
697 if self._update_pkg_version(
698 self._versions_mirantis,
699 _name,
700 _version,
701 _md5,
Alexd0391d42019-05-21 18:48:55 -0500702 _desc['section'].lower(),
703 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500704 self._create_repo_header(_pkg),
705 _get_value_index(
706 self._mainteiners_index,
707 _mainteiner
708 )
709 ):
710 _new += 1
711 else:
712 # update other versions
713 if self._update_pkg_version(
714 self._versions_other,
715 _name,
716 _version,
717 _md5,
Alexd0391d42019-05-21 18:48:55 -0500718 _desc['section'].lower(),
719 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500720 self._create_repo_header(_pkg),
721 _get_value_index(
722 self._mainteiners_index,
723 _mainteiner
724 )
725 ):
726 _new += 1
Alexd9fd85e2019-05-16 16:58:24 -0500727
728 if descriptions:
729 _d_new = {
730 _md5: deepcopy(_desc)
731 }
732 try:
733 _descriptions[_name].update(_d_new)
734 except KeyError:
735 _descriptions[_name] = _d_new
736 # clear the data for next pkg
737 _processed += 1
738 _desc = {}
739 _key = ""
740 _value = ""
741 elif _line.startswith(' '):
742 _desc[_key] += "\n{}".format(_line)
743 else:
Alex34874cd2020-12-30 21:31:43 -0600744 _key = _line[:_line.index(':')]
745 _value = _line[_line.index(':')+1:]
Alexd9fd85e2019-05-16 16:58:24 -0500746 _key = _key.lower()
747
748 _desc[_key] = _value
749 # save descriptions if needed
750 if descriptions:
751 _progress.clearline()
752 self._save_repo_descriptions(_pkg, _descriptions)
753
754 _progress.end()
755 # backup headers to disk
756 self.versionstgz.add_file(
Alex0ed4f762019-05-17 17:55:33 -0500757 _repos_index_filename,
Alexd9fd85e2019-05-16 16:58:24 -0500758 json.dumps(self._repo_index),
759 replace=True
760 )
Alex0ed4f762019-05-17 17:55:33 -0500761 self.versionstgz.add_file(
762 _mainteiners_index_filename,
763 json.dumps(self._mainteiners_index),
764 replace=True
765 )
Alexd0391d42019-05-21 18:48:55 -0500766 if apps:
767 self.desctgz.add_file(
768 self._apps_filename,
769 json.dumps(self._apps),
770 replace=True
771 )
772
Alexd9fd85e2019-05-16 16:58:24 -0500773 return
774
Alexd0391d42019-05-21 18:48:55 -0500775 def fetch_versions(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500776 """Executes parsing for specific tag
777 """
778 if descriptions:
779 logger_cli.warning(
780 "\n\n# !!! WARNING: Saving repo descriptions "
781 "consumes huge amount of disk space\n\n"
782 )
783 # if there is no such tag, parse it from repoinfo
Alexd9fd85e2019-05-16 16:58:24 -0500784 logger_cli.info("# Fetching versions for {}".format(tag))
Alexd0391d42019-05-21 18:48:55 -0500785 self.parse_tag(tag, descriptions=descriptions, apps=apps)
Alex0ed4f762019-05-17 17:55:33 -0500786 logger_cli.info("-> saving updated versions")
787 self.versionstgz.add_file(
788 _mirantis_versions_filename,
789 json.dumps(self._versions_mirantis),
790 replace=True
791 )
792 self.versionstgz.add_file(
793 _other_versions_filename,
794 json.dumps(self._versions_other),
795 replace=True
796 )
Alexd9fd85e2019-05-16 16:58:24 -0500797
798 def build_repos(self, url, tag=None):
799 """Builds versions data for selected tag, or for all of them
800 """
Alexd9fd85e2019-05-16 16:58:24 -0500801 # recoursively walk the mirrors
802 # and gather all of the repos for 'tag' or all of the tags
Alex3bc95f62020-03-05 17:00:04 -0600803 self._info_class.fetch_repos(url, tag=tag)
Alexd9fd85e2019-05-16 16:58:24 -0500804
Alex74dc1352019-05-17 13:18:24 -0500805 def _build_action(self, url, tags):
806 for t in tags:
Alex6df29ad2019-05-31 17:55:32 -0500807 logger_cli.info("# Building repo info for '{}'".format(t))
Alex74dc1352019-05-17 13:18:24 -0500808 self.build_repos(url, tag=t)
809
Alexd0391d42019-05-21 18:48:55 -0500810 def get_available_tags(self, tag=None):
811 # Populate action tags
Alex3bc95f62020-03-05 17:00:04 -0600812 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd0391d42019-05-21 18:48:55 -0500813
814 _tags = []
815 if tag in major:
816 _tags.append(tag)
817 if tag in updates:
818 _tags.append(tag + ".update")
819 if tag in hotfix:
820 _tags.append(tag + ".hotfix")
821
822 return _tags
823
Alexd9fd85e2019-05-16 16:58:24 -0500824 def action_for_tag(
825 self,
826 url,
827 tag,
828 action=None,
Alexd0391d42019-05-21 18:48:55 -0500829 descriptions=None,
830 apps=None
Alexd9fd85e2019-05-16 16:58:24 -0500831 ):
832 """Executes action for every tag from all collections
833 """
834 if not action:
835 logger_cli.info("# No action set, nothing to do")
Alex74dc1352019-05-17 13:18:24 -0500836 # See if this is a list action
Alexd9fd85e2019-05-16 16:58:24 -0500837 if action == "list":
Alex3bc95f62020-03-05 17:00:04 -0600838 _all = self._info_class.list_tags()
Alex6df29ad2019-05-31 17:55:32 -0500839 if _all:
840 # Print pretty list and exit
841 logger_cli.info("# Tags available at '{}':".format(url))
842 for t in _all:
843 _ri = self._repo_index
844 _isparsed = any(
Alex3bc95f62020-03-05 17:00:04 -0600845 [k for k, v in _ri.items()
Alex6df29ad2019-05-31 17:55:32 -0500846 if v['props']['tag'] == t]
847 )
848 if _isparsed:
849 logger_cli.info(get_tag_label(t, parsed=True))
850 else:
851 logger_cli.info(get_tag_label(t))
852 else:
853 logger_cli.info("# Not tags parsed yet for '{}':".format(url))
854
Alex74dc1352019-05-17 13:18:24 -0500855 # exit
Alexd9fd85e2019-05-16 16:58:24 -0500856 return
Alex74dc1352019-05-17 13:18:24 -0500857
Alex6df29ad2019-05-31 17:55:32 -0500858 if action == "build":
859 self._build_action(url, [tag])
860
Alexd0391d42019-05-21 18:48:55 -0500861 # Populate action tags
862 _action_tags = self.get_available_tags(tag)
863
Alexd9fd85e2019-05-16 16:58:24 -0500864 if not _action_tags:
865 logger_cli.info(
866 "# Tag of '{}' not found. "
867 "Consider rebuilding repos info.".format(tag)
868 )
Alex74dc1352019-05-17 13:18:24 -0500869 else:
Alexd9fd85e2019-05-16 16:58:24 -0500870 logger_cli.info(
Alex74dc1352019-05-17 13:18:24 -0500871 "-> tags to process: {}".format(
Alexd9fd85e2019-05-16 16:58:24 -0500872 ", ".join(_action_tags)
873 )
874 )
Alex74dc1352019-05-17 13:18:24 -0500875 # Execute actions
Alex6df29ad2019-05-31 17:55:32 -0500876 if action == "fetch":
Alexd9fd85e2019-05-16 16:58:24 -0500877 for t in _action_tags:
Alexd0391d42019-05-21 18:48:55 -0500878 self.fetch_versions(t, descriptions=descriptions, apps=apps)
Alexd9fd85e2019-05-16 16:58:24 -0500879
880 logger_cli.info("# Done.")
881
Alex74dc1352019-05-17 13:18:24 -0500882 def show_package(self, name):
883 # get the package data
884 _p = self.get_package_versions(name)
885 if not _p:
886 logger_cli.warning(
887 "# WARNING: Package '{}' not found".format(name)
888 )
889 else:
890 # print package info using sorted tags from headers
891 # Package: name
892 # [u/h] tag \t <version>
893 # \t <version>
894 # <10symbols> \t <md5> \t sorted headers with no tag
895 # ...
Alexd0391d42019-05-21 18:48:55 -0500896 # section
Alex92e07ce2019-05-31 16:00:03 -0500897 for _s in sorted(_p):
Alexd0391d42019-05-21 18:48:55 -0500898 # app
Alex92e07ce2019-05-31 16:00:03 -0500899 for _a in sorted(_p[_s]):
Alexcf91b182019-05-31 11:57:07 -0500900 _o = ""
901 _mm = []
Alexd0391d42019-05-21 18:48:55 -0500902 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500903 for _v in sorted(_p[_s][_a]):
Alexd0391d42019-05-21 18:48:55 -0500904 _o += "\n" + " "*8 + _v + ':\n'
905 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500906 for _md5 in sorted(_p[_s][_a][_v]):
Alexd0391d42019-05-21 18:48:55 -0500907 _o += " "*16 + _md5 + "\n"
908 # get and sort repo headers
Alex92e07ce2019-05-31 16:00:03 -0500909 for _r in sorted(_p[_s][_a][_v][_md5]):
Alexcf91b182019-05-31 11:57:07 -0500910 _o += " "*24 + _r.replace('_', ' ') + '\n'
911 _m = _p[_s][_a][_v][_md5][_r]["maintainer"]
912 if _m not in _mm:
913 _mm.append(_m)
Alex74dc1352019-05-17 13:18:24 -0500914
Alexcf91b182019-05-31 11:57:07 -0500915 logger_cli.info(
916 "\n# Package: {}/{}/{}\nMaintainers: {}".format(
917 _s,
918 _a,
919 name,
920 ", ".join(_mm)
921 )
922 )
923
924 logger_cli.info(_o)
Alex74dc1352019-05-17 13:18:24 -0500925
Alexd0391d42019-05-21 18:48:55 -0500926 @staticmethod
927 def get_apps(versions, name):
928 _all = True if name == '*' else False
Alexcf91b182019-05-31 11:57:07 -0500929 _s_max = _a_max = _p_max = _v_max = 0
Alexd0391d42019-05-21 18:48:55 -0500930 _rows = []
931 for _p in versions.keys():
932 _vs = versions[_p]
Alex3bc95f62020-03-05 17:00:04 -0600933 for _v, _d1 in _vs.items():
934 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -0500935 if _all or name == _info['app']:
936 _s_max = max(len(_info['section']), _s_max)
937 _a_max = max(len(_info['app']), _a_max)
Alexcf91b182019-05-31 11:57:07 -0500938 _p_max = max(len(_p), _p_max)
939 _v_max = max(len(_v), _v_max)
Alexd0391d42019-05-21 18:48:55 -0500940 _rows.append([
941 _info['section'],
942 _info['app'],
Alexcf91b182019-05-31 11:57:07 -0500943 _p,
944 _v,
945 _md5,
946 len(_info['repo'])
Alexd0391d42019-05-21 18:48:55 -0500947 ])
Alexcf91b182019-05-31 11:57:07 -0500948 # format columns
949 # section
950 _fmt = "{:"+str(_s_max)+"} "
951 # app
952 _fmt += "{:"+str(_a_max)+"} "
953 # package name
954 _fmt += "{:"+str(_p_max)+"} "
955 # version
956 _fmt += "{:"+str(_v_max)+"} "
957 # md5 and number of repos is fixed
958 _fmt += "{} in {} repos"
959
960 # fill rows
961 _rows = [_fmt.format(s, a, p, v, m, l) for s, a, p, v, m, l in _rows]
Alexd0391d42019-05-21 18:48:55 -0500962 _rows.sort()
963 return _rows
964
965 def show_app(self, name):
966 c = 0
967 rows = self.get_apps(self._versions_mirantis, name)
968 if rows:
Alexcf91b182019-05-31 11:57:07 -0500969 logger_cli.info("\n# Mirantis packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500970 logger_cli.info("\n".join(rows))
971 c += 1
972 rows = self.get_apps(self._versions_other, name)
973 if rows:
Alexcf91b182019-05-31 11:57:07 -0500974 logger_cli.info("\n# Other packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500975 logger_cli.info("\n".join(rows))
976 c += 1
977 if c == 0:
978 logger_cli.info("\n# No app found for '{}'".format(name))
979
980 def get_mirantis_pkg_names(self):
981 # Mirantis maintainers only
982 return set(
983 self._versions_mirantis.keys()
984 ) - set(
985 self._versions_other.keys()
986 )
987
988 def get_other_pkg_names(self):
989 # Non-mirantis Maintainers
990 return set(
991 self._versions_other.keys()
992 ) - set(
993 self._versions_mirantis.keys()
994 )
995
996 def get_mixed_pkg_names(self):
997 # Mixed maintainers
998 return set(
999 self._versions_mirantis.keys()
1000 ).intersection(set(
1001 self._versions_other.keys()
1002 ))
1003
1004 def is_mirantis(self, name, tag=None):
1005 """Method checks if this package is mainteined
1006 by mirantis in target tag repo
1007 """
1008 if name in self._versions_mirantis:
1009 # check tag
1010 if tag:
1011 _pkg = self.get_package_versions(
1012 name,
1013 tagged=True
1014 )
1015 _tags = []
1016 for s in _pkg.keys():
1017 for a in _pkg[s].keys():
1018 for t in _pkg[s][a].keys():
1019 _tags.append(t)
1020 if any([t.startswith(tag) for t in _tags]):
1021 return True
1022 else:
1023 return None
1024 else:
1025 return True
1026 elif name in self._versions_other:
1027 # check tag
1028 if tag:
1029 _pkg = self.get_package_versions(
1030 name,
1031 tagged=True
1032 )
1033 _tags = []
1034 for s in _pkg.keys():
1035 for a in _pkg[s].keys():
1036 for t in _pkg[s][a].keys():
1037 _tags.append(t)
1038 if any([t.startswith(tag) for t in _tags]):
1039 return False
1040 else:
1041 return None
1042 else:
1043 return False
1044 else:
1045 logger.error(
1046 "# ERROR: package '{}' not found "
1047 "while determining maintainer".format(
1048 name
1049 )
1050 )
1051 return None
1052
1053 def get_filtered_versions(
1054 self,
1055 name,
1056 tag=None,
1057 include=None,
1058 exclude=None
1059 ):
1060 """Method gets all the versions for the package
1061 and filters them using keys above
1062 """
1063 if tag:
Alex3bc95f62020-03-05 17:00:04 -06001064 tag = str(tag) if not isinstance(tag, str) else tag
Alexd0391d42019-05-21 18:48:55 -05001065 _out = {}
1066 _vs = self.get_package_versions(name, tagged=True)
1067 # iterate to filter out keywords
Alex3bc95f62020-03-05 17:00:04 -06001068 for s, apps in _vs.items():
1069 for a, _tt in apps.items():
1070 for t, vs in _tt.items():
Alexd0391d42019-05-21 18:48:55 -05001071 # filter tags
1072 if tag and t != tag and t.rsplit('.', 1)[0] != tag:
1073 continue
1074 # Skip hotfix tag
1075 if t == tag + ".hotfix":
1076 continue
Alex3bc95f62020-03-05 17:00:04 -06001077 for v, rp in vs.items():
1078 for h, p in rp.items():
Alexd0391d42019-05-21 18:48:55 -05001079 # filter headers with all keywords matching
1080 _h = re.split(r"[\-\_]+", h)
1081 _included = all([kw in _h for kw in include])
1082 _excluded = any([kw in _h for kw in exclude])
1083 if not _included or _excluded:
1084 continue
1085 else:
1086 nested_set(_out, [s, a, v], [])
1087 _dat = {
1088 "header": h
1089 }
1090 _dat.update(p)
1091 _out[s][a][v].append(_dat)
1092 return _out
1093
1094 def get_package_versions(self, name, tagged=False):
Alex74dc1352019-05-17 13:18:24 -05001095 """Method builds package version structure
1096 with repository properties included
1097 """
1098 # get data
Alexd0391d42019-05-21 18:48:55 -05001099 _vs = {}
1100
1101 if name in self._versions_mirantis:
1102 _vs.update(self._versions_mirantis[name])
1103 if name in self._versions_other:
1104 _vs.update(self._versions_other[name])
Alex0ed4f762019-05-17 17:55:33 -05001105
Alex74dc1352019-05-17 13:18:24 -05001106 # insert repo data, insert props into headers place
1107 _package = {}
1108 if tagged:
Alex3bc95f62020-03-05 17:00:04 -06001109 for _v, _d1 in _vs.items():
Alex74dc1352019-05-17 13:18:24 -05001110 # use tag as a next step
Alex3bc95f62020-03-05 17:00:04 -06001111 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001112 _s = _info['section']
1113 _a = _info['app']
1114 for _pair in _info['repo']:
1115 _rp = {}
Alex74dc1352019-05-17 13:18:24 -05001116 # extract props for a repo
Alex0ed4f762019-05-17 17:55:33 -05001117 _r, _m = self._get_indexed_values(_pair)
Alex74dc1352019-05-17 13:18:24 -05001118 # get tag
Alex0ed4f762019-05-17 17:55:33 -05001119 _tag = _r["props"]["tag"]
Alex74dc1352019-05-17 13:18:24 -05001120 # cut tag from the header
Alex0ed4f762019-05-17 17:55:33 -05001121 _cut_head = _r["header"].split("_", 1)[1]
Alex74dc1352019-05-17 13:18:24 -05001122 # populate dict
Alexd0391d42019-05-21 18:48:55 -05001123 _rp["maintainer"] = _m
1124 _rp["md5"] = _md5
1125 _rp.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001126 nested_set(
1127 _package,
Alexd0391d42019-05-21 18:48:55 -05001128 [_s, _a, _tag, _v, _cut_head],
1129 _rp
Alex74dc1352019-05-17 13:18:24 -05001130 )
1131 else:
Alex3bc95f62020-03-05 17:00:04 -06001132 for _v, _d1 in _vs.items():
1133 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001134 _s = _info['section']
1135 _a = _info['app']
1136 for _pair in _info['repo']:
Alex0ed4f762019-05-17 17:55:33 -05001137 _r, _m = self._get_indexed_values(_pair)
Alexd0391d42019-05-21 18:48:55 -05001138 _info["maintainer"] = _m
1139 _info.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001140 nested_set(
1141 _package,
Alexd0391d42019-05-21 18:48:55 -05001142 [_s, _a, _v, _md5, _r["header"]],
1143 _info
Alex74dc1352019-05-17 13:18:24 -05001144 )
1145
1146 return _package
1147
Alexd9fd85e2019-05-16 16:58:24 -05001148 def parse_repos(self):
1149 # all tags to check
Alex3bc95f62020-03-05 17:00:04 -06001150 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd9fd85e2019-05-16 16:58:24 -05001151
1152 # major tags
1153 logger_cli.info("# Processing major tags")
1154 for _tag in major:
1155 self.fetch_versions(_tag)
1156
1157 # updates tags
1158 logger_cli.info("# Processing update tags")
1159 for _tag in updates:
1160 self.fetch_versions(_tag + ".update")
1161
1162 # hotfix tags
1163 logger_cli.info("# Processing hotfix tags")
1164 for _tag in hotfix:
1165 self.fetch_versions(_tag + ".hotfix")