blob: 57d8b9ed28dcd25136e3f2b7d8b9b4b564ece87c [file] [log] [blame]
Alexd9fd85e2019-05-16 16:58:24 -05001import json
2import os
Alexd0391d42019-05-21 18:48:55 -05003import re
Alexd9fd85e2019-05-16 16:58:24 -05004from copy import deepcopy
5
Alex74dc1352019-05-17 13:18:24 -05006from cfg_checker.common import logger, logger_cli, nested_set
Alex0ed4f762019-05-17 17:55:33 -05007from cfg_checker.common.const import _mainteiners_index_filename
8from cfg_checker.common.const import _mirantis_versions_filename
9from cfg_checker.common.const import _other_versions_filename
Alexd9fd85e2019-05-16 16:58:24 -050010from cfg_checker.common.const import _pkg_desc_archive
11from cfg_checker.common.const import _repos_index_filename
12from cfg_checker.common.const import _repos_info_archive
13from cfg_checker.common.const import _repos_versions_archive
Alexd9fd85e2019-05-16 16:58:24 -050014from cfg_checker.common.const import ubuntu_releases
Alex7f69a6a2019-05-31 16:53:35 -050015from cfg_checker.common.file_utils import ensure_folder_exists
Alexd9fd85e2019-05-16 16:58:24 -050016from cfg_checker.common.file_utils import get_gzipped_file
17from cfg_checker.common.settings import pkg_dir
18from cfg_checker.helpers.console_utils import Progress
19from cfg_checker.helpers.tgz import TGZFile
20
21import requests
22from requests.exceptions import ConnectionError
23
24ext = ".json"
25
26
Alex0ed4f762019-05-17 17:55:33 -050027def get_tag_label(_tag, parsed=False):
Alex74dc1352019-05-17 13:18:24 -050028 # prettify the tag for printing
Alex0ed4f762019-05-17 17:55:33 -050029 if parsed:
30 _label = "+ "
31 else:
32 _label = " "
33
Alex74dc1352019-05-17 13:18:24 -050034 if _tag.endswith(".update"):
35 _label += "[updates] " + _tag.rsplit('.', 1)[0]
36 elif _tag.endswith(".hotfix"):
37 _label += " [hotfix] " + _tag.rsplit('.', 1)[0]
38 else:
39 _label += " "*10 + _tag
Alex0ed4f762019-05-17 17:55:33 -050040
Alex74dc1352019-05-17 13:18:24 -050041 return _label
42
43
Alex0ed4f762019-05-17 17:55:33 -050044def _get_value_index(_di, value, header=None):
Alex29ee76f2019-05-17 18:52:29 -050045 # Mainteiner names often uses specific chars
Alex3bc95f62020-03-05 17:00:04 -060046 # so make sure that value saved is str not str
47 # Python2
48 # _val = str(value, 'utf-8') if isinstance(value, str) else value
49 # Python3 has always utf-8 decoded value
50 _val = value
Alex0ed4f762019-05-17 17:55:33 -050051 if header:
Alex3bc95f62020-03-05 17:00:04 -060052 try:
53 _ = next(filter(lambda i: _di[i]["header"] == header, _di))
54 # iterator not empty, find index
55 for _k, _v in _di.items():
56 if _v["header"] == header:
57 _index = _k
58 except StopIteration:
59 _index = str(len(_di.keys()) + 1)
Alex0ed4f762019-05-17 17:55:33 -050060 _di[_index] = {
61 "header": header,
Alex29ee76f2019-05-17 18:52:29 -050062 "props": _val
Alex0ed4f762019-05-17 17:55:33 -050063 }
Alex3bc95f62020-03-05 17:00:04 -060064 finally:
65 return _index
Alex0ed4f762019-05-17 17:55:33 -050066 else:
Alex3bc95f62020-03-05 17:00:04 -060067 try:
68 _ = next(filter(lambda i: _di[i] == _val, _di))
69 # iterator not empty, find index
70 for _k, _v in _di.items():
Alex29ee76f2019-05-17 18:52:29 -050071 if _v == _val:
Alex0ed4f762019-05-17 17:55:33 -050072 _index = _k
Alex3bc95f62020-03-05 17:00:04 -060073 except StopIteration:
74 _index = str(len(_di.keys()) + 1)
75 # on save, cast it as str
76 _di[_index] = _val
77 finally:
78 return _index
Alex0ed4f762019-05-17 17:55:33 -050079
80
81def _safe_load(_f, _a):
82 if _f in _a.list_files():
Alexd0391d42019-05-21 18:48:55 -050083 logger_cli.debug(
84 "... loading '{}':'{}'".format(
Alex0ed4f762019-05-17 17:55:33 -050085 _a.basefile,
86 _f
87 )
88 )
Alex3bc95f62020-03-05 17:00:04 -060089 return json.loads(_a.get_file(_f, decode=True))
Alex0ed4f762019-05-17 17:55:33 -050090 else:
91 return {}
92
93
Alexd9fd85e2019-05-16 16:58:24 -050094def _n_url(url):
95 if url[-1] == '/':
96 return url
97 else:
98 return url + '/'
99
100
101class ReposInfo(object):
Alex3bc95f62020-03-05 17:00:04 -0600102 init_done = False
103
104 def _init_vars(self):
105 self.repos = []
106
107 def _init_folders(self, arch_folder=None):
108 if arch_folder:
109 self._arch_folder = arch_folder
110 self._repofile = os.path.join(arch_folder, _repos_info_archive)
111 else:
112 self._arch_folder = os.path.join(pkg_dir, "versions")
113 self._repofile = os.path.join(
114 self._arch_folder,
115 _repos_info_archive
116 )
117
118 def __init__(self, arch_folder=None):
119 # perform inits
120 self._init_vars()
121 self._init_folders(arch_folder)
122 self.init_done = True
123
124 def __call__(self, *args, **kwargs):
125 if self.init_done:
126 return self
127 else:
128 return self.__init__(self, *args, **kwargs)
Alexd9fd85e2019-05-16 16:58:24 -0500129
130 @staticmethod
131 def _ls_repo_page(url):
132 # Yes, this is ugly. But it works ok for small HTMLs.
133 _a = "<a"
134 _s = "href="
135 _e = "\">"
136 try:
137 page = requests.get(url, timeout=60)
138 except ConnectionError as e:
139 logger_cli.error("# ERROR: {}".format(e.message))
140 return [], []
141 a = page.text.splitlines()
142 # Comprehension for dirs. Anchors for ends with '-'
143 _dirs = [l[l.index(_s)+6:l.index(_e)-1]
144 for l in a if l.startswith(_a) and l.endswith('-')]
145 # Comprehension for files. Anchors ends with size
146 _files = [l[l.index(_s)+6:l.index(_e)]
147 for l in a if l.startswith(_a) and not l.endswith('-')]
148
149 return _dirs, _files
150
151 def search_pkg(self, url, _list):
152 # recoursive method to walk dists tree
153 _dirs, _files = self._ls_repo_page(url)
154
155 for _d in _dirs:
156 # Search only in dists, ignore the rest
157 if "dists" not in url and _d != "dists":
158 continue
159 _u = _n_url(url + _d)
160 self.search_pkg(_u, _list)
161
162 for _f in _files:
163 if _f == "Packages.gz":
164 _list.append(url + _f)
165 logger.debug("... [F] '{}'".format(url + _f))
166
167 return _list
168
169 @staticmethod
170 def _map_repo(_path_list, _r):
171 for _pkg_path in _path_list:
172 _l = _pkg_path.split('/')
173 _kw = _l[_l.index('dists')+1:]
174 _kw.reverse()
175 _repo_item = {
176 "arch": _kw[1][7:] if "binary" in _kw[1] else _kw[1],
177 "type": _kw[2],
178 "ubuntu-release": _kw[3],
179 "filepath": _pkg_path
180 }
181 _r.append(_repo_item)
182
183 def _find_tag(self, _t, _u, label=""):
184 if label:
185 _url = _n_url(_u + label)
186 _label = _t + '.' + label
187 else:
188 _url = _u
189 _label = _t
190 _ts, _ = self._ls_repo_page(_url)
191 if _t in _ts:
192 logger.debug(
193 "... found tag '{}' at '{}'".format(
194 _t,
195 _url
196 )
197 )
198 return {
199 _label: {
200 "baseurl": _n_url(_url + _t),
201 "all": {}
202 }
203 }
204 else:
205 return {}
206
207 def fetch_repos(self, url, tag=None):
208 base_url = _n_url(url)
209 logger_cli.info("# Using '{}' as a repos source".format(base_url))
210
211 logger_cli.info("# Gathering repos info (i.e. links to 'packages.gz')")
212 # init repoinfo archive
213 _repotgz = TGZFile(self._repofile)
214 # prepare repo links
215 _repos = {}
216 if tag:
217 # only one tag to process
218 _repos.update(self._find_tag(tag, base_url))
219 _repos.update(self._find_tag(tag, base_url, label="hotfix"))
220 _repos.update(self._find_tag(tag, base_url, label="update"))
221 else:
222 # gather all of them
223 _tags, _ = self._ls_repo_page(base_url)
Alex3bc95f62020-03-05 17:00:04 -0600224 if "hotfix" in _tags:
225 _tags.remove('hotfix')
226 if "update" in _tags:
227 _tags.remove('update')
Alexd9fd85e2019-05-16 16:58:24 -0500228 # search tags in subfolders
229 _h_tags, _ = self._ls_repo_page(base_url + 'hotfix')
230 _u_tags, _ = self._ls_repo_page(base_url + 'update')
231 _tags.extend([t for t in _h_tags if t not in _tags])
232 _tags.extend([t for t in _u_tags if t not in _tags])
233 _progress = Progress(len(_tags))
234 _index = 0
235 for _tag in _tags:
236 _repos.update(self._find_tag(_tag, base_url))
237 _repos.update(self._find_tag(_tag, base_url, label="hotfix"))
238 _repos.update(self._find_tag(_tag, base_url, label="update"))
239 _index += 1
240 _progress.write_progress(_index)
241 _progress.end()
242
243 # parse subtags
244 for _label in _repos.keys():
245 logger_cli.info("-> processing tag '{}'".format(_label))
246 _name = _label + ".json"
247 if _repotgz.has_file(_name):
248 logger_cli.info(
249 "-> skipping, '{}' already has '{}'".format(
250 _repos_info_archive,
251 _name
252 )
253 )
254 continue
255 # process the tag
256 _repo = _repos[_label]
257 _baseurl = _repos[_label]["baseurl"]
258 # get the subtags
259 _sub_tags, _ = self._ls_repo_page(_baseurl)
260 _total_index = len(_sub_tags)
261 _index = 0
262 _progress = Progress(_total_index)
263 logger.debug(
264 "... found {} subtags for '{}'".format(
265 len(_sub_tags),
266 _label
267 )
268 )
269 # save the url and start search
270 for _stag in _sub_tags:
271 _u = _baseurl + _stag
272 _index += 1
273 logger.debug(
274 "... searching repos in '{}/{}'".format(
275 _label,
276 _stag
277 )
278 )
279
280 # Searching Package collections
281 if _stag in ubuntu_releases:
282 # if stag is the release, this is all packages
283 _repo["all"][_stag] = []
284 _repo["all"]["url"] = _n_url(_u)
285 _path_list = self.search_pkg(_n_url(_u), [])
286 self._map_repo(_path_list, _repo["all"][_stag])
287 logger.info(
288 "-> found {} dists".format(
289 len(_repo["all"][_stag])
290 )
291 )
292
293 else:
294 # each subtag might have any ubuntu release
295 # so iterate them
296 _repo[_stag] = {
297 "url": _n_url(_u)
298 }
299 _releases, _ = self._ls_repo_page(_n_url(_u))
300 for _rel in _releases:
301 if _rel not in ubuntu_releases:
302 logger.debug(
303 "... skipped unknown ubuntu release: "
304 "'{}' in '{}'".format(
305 _rel,
306 _u
307 )
308 )
309 else:
310 _rel_u = _n_url(_u) + _rel
311 _repo[_stag][_rel] = []
312 _path_list = self.search_pkg(_n_url(_rel_u), [])
313 self._map_repo(
314 _path_list,
315 _repo[_stag][_rel]
316 )
317 logger.info(
318 "-> found {} dists for '{}'".format(
319 len(_repo[_stag][_rel]),
320 _rel
321 )
322 )
323 _progress.write_progress(_index)
324
325 _progress.end()
326 _name = _label + ext
327 _repotgz.add_file(_name, buf=json.dumps(_repo, indent=2))
328 logger_cli.info(
329 "-> archive '{}' updated with '{}'".format(
330 self._repofile,
331 _name
332 )
333 )
334
335 return
336
Alex74dc1352019-05-17 13:18:24 -0500337 def list_tags(self, splitted=False):
Alexd9fd85e2019-05-16 16:58:24 -0500338 _files = TGZFile(self._repofile).list_files()
339 # all files in archive with no '.json' part
340 _all = set([f.rsplit('.', 1)[0] for f in _files])
Alex74dc1352019-05-17 13:18:24 -0500341 if splitted:
342 # files that ends with '.update'
343 _updates = set([f for f in _all if f.find('update') >= 0])
344 # files that ends with '.hotfix'
345 _hotfix = set([f for f in _all if f.find('hotfix') >= 0])
346 # remove updates and hotfix tags from all. The true magic of SETs
347 _all = _all - _updates - _hotfix
348 # cut updates and hotfix endings
349 _updates = [f.rsplit('.', 1)[0] for f in _updates]
350 _hotfix = [f.rsplit('.', 1)[0] for f in _hotfix]
Alexd9fd85e2019-05-16 16:58:24 -0500351
Alex74dc1352019-05-17 13:18:24 -0500352 return _all, _updates, _hotfix
353 else:
354 # dynamic import
355 import re
356 _all = list(_all)
357 # lexical tags
358 _lex = [s for s in _all if not s[0].isdigit()]
359 _lex.sort()
360 # tags with digits
361 _dig = [s for s in _all if s[0].isdigit()]
362 _dig = sorted(
363 _dig,
Alexd0391d42019-05-21 18:48:55 -0500364 key=lambda x: tuple(int(i) for i in re.findall(r"\d+", x)[:3])
Alex74dc1352019-05-17 13:18:24 -0500365 )
366
367 return _dig + _lex
Alexd9fd85e2019-05-16 16:58:24 -0500368
369 def get_repoinfo(self, tag):
370 _tgz = TGZFile(self._repofile)
Alex3bc95f62020-03-05 17:00:04 -0600371 _buf = _tgz.get_file(tag + ext, decode=True)
Alexd9fd85e2019-05-16 16:58:24 -0500372 return json.loads(_buf)
373
374
375class RepoManager(object):
Alex3bc95f62020-03-05 17:00:04 -0600376 init_done = False
Alexd9fd85e2019-05-16 16:58:24 -0500377
Alex3bc95f62020-03-05 17:00:04 -0600378 def _init_folders(self, arch_folder=None):
379 # overide arch folder if needed
380 if arch_folder:
381 self._arch_folder = arch_folder
382 else:
383 self._arch_folder = os.path.join(pkg_dir, "versions")
Alexd9fd85e2019-05-16 16:58:24 -0500384
Alex3bc95f62020-03-05 17:00:04 -0600385 self._versions_arch = os.path.join(
386 self._arch_folder,
387 _repos_versions_archive
388 )
389 self._desc_arch = os.path.join(self._arch_folder, _pkg_desc_archive)
Alexd0391d42019-05-21 18:48:55 -0500390
Alex3bc95f62020-03-05 17:00:04 -0600391 def _init_vars(self, info_class):
392 # RepoInfo instance init
393 if info_class:
394 self._info_class = info_class
395 else:
396 self._info_class = ReposInfo()
397 # archives
398 self._apps_filename = "apps.json"
Alexd9fd85e2019-05-16 16:58:24 -0500399
Alex3bc95f62020-03-05 17:00:04 -0600400 # repository index
401 self._repo_index = {}
402 self._mainteiners_index = {}
403
404 self._apps = {}
405
406 # init package versions storage
407 self._versions_mirantis = {}
408 self._versions_other = {}
409
410 def _init_archives(self):
Alexd9fd85e2019-05-16 16:58:24 -0500411 # Init version files
412 self.versionstgz = TGZFile(
413 self._versions_arch,
414 label="MCP Configuration Checker: Package versions archive"
415 )
416 self.desctgz = TGZFile(
417 self._desc_arch,
418 label="MCP Configuration Checker: Package descriptions archive"
419 )
Alexd0391d42019-05-21 18:48:55 -0500420
421 # section / app
422 self._apps = _safe_load(
423 self._apps_filename,
424 self.desctgz
425 )
426
Alex0ed4f762019-05-17 17:55:33 -0500427 # indices
428 self._repo_index = _safe_load(
429 _repos_index_filename,
430 self.versionstgz
431 )
432 self._mainteiners_index = _safe_load(
433 _mainteiners_index_filename,
434 self.versionstgz
435 )
Alexd9fd85e2019-05-16 16:58:24 -0500436
Alex0ed4f762019-05-17 17:55:33 -0500437 # versions
438 self._versions_mirantis = _safe_load(
439 _mirantis_versions_filename,
440 self.versionstgz
441 )
442 self._versions_other = _safe_load(
443 _other_versions_filename,
444 self.versionstgz
445 )
Alexd9fd85e2019-05-16 16:58:24 -0500446
Alex3bc95f62020-03-05 17:00:04 -0600447 def __init__(self, arch_folder=None, info_class=None):
448 # Perform inits
449 self._init_vars(info_class)
450 self._init_folders(arch_folder)
451 # Ensure that versions folder exists
452 logger_cli.debug(ensure_folder_exists(self._arch_folder))
453 # Preload/create archives
454 self._init_archives()
455 self.init_done = True
456
457 def __call__(self, *args, **kwargs):
458 if self.init_done:
459 return self
460 else:
461 return self.__init__(self, *args, **kwargs)
462
Alexd9fd85e2019-05-16 16:58:24 -0500463 def _create_repo_header(self, p):
464 _header = "_".join([
465 p['tag'],
466 p['subset'],
467 p['release'],
468 p['ubuntu-release'],
469 p['type'],
470 p['arch']
471 ])
Alex0ed4f762019-05-17 17:55:33 -0500472 return _get_value_index(self._repo_index, p, header=_header)
Alexd9fd85e2019-05-16 16:58:24 -0500473
Alex0ed4f762019-05-17 17:55:33 -0500474 def _get_indexed_values(self, pair):
475 _h, _m = pair.split('-')
476 return self._repo_index[_h], self._mainteiners_index[_m]
Alexd9fd85e2019-05-16 16:58:24 -0500477
Alexd0391d42019-05-21 18:48:55 -0500478 def _update_pkg_version(self, _d, n, v, md5, s, a, h_index, m_index):
Alexd9fd85e2019-05-16 16:58:24 -0500479 """Method updates package version record in global dict
480 """
481 # 'if'*4 operation is pretty expensive when using it 100k in a row
482 # so try/except is a better way to go, even faster than 'reduce'
Alex0ed4f762019-05-17 17:55:33 -0500483 _pair = "-".join([h_index, m_index])
Alexd0391d42019-05-21 18:48:55 -0500484 _info = {
485 'repo': [_pair],
486 'section': s,
487 'app': a
488 }
Alexd9fd85e2019-05-16 16:58:24 -0500489 try:
490 # try to load list
Alexd0391d42019-05-21 18:48:55 -0500491 _list = _d[n][v][md5]['repo']
Alexd9fd85e2019-05-16 16:58:24 -0500492 # cast it as set() and union()
Alex0ed4f762019-05-17 17:55:33 -0500493 _list = set(_list).union([_pair])
Alexd9fd85e2019-05-16 16:58:24 -0500494 # cast back as set() is not serializeable
Alexd0391d42019-05-21 18:48:55 -0500495 _d[n][v][md5]['repo'] = list(_list)
Alexd9fd85e2019-05-16 16:58:24 -0500496 return False
497 except KeyError:
498 # ok, this is fresh pkg. Do it slow way.
Alex0ed4f762019-05-17 17:55:33 -0500499 if n in _d:
Alexd9fd85e2019-05-16 16:58:24 -0500500 # there is such pkg already
Alex0ed4f762019-05-17 17:55:33 -0500501 if v in _d[n]:
Alexd9fd85e2019-05-16 16:58:24 -0500502 # there is such version, check md5
Alex0ed4f762019-05-17 17:55:33 -0500503 if md5 in _d[n][v]:
Alexd9fd85e2019-05-16 16:58:24 -0500504 # just add new repo header
Alexd0391d42019-05-21 18:48:55 -0500505 if _pair not in _d[n][v][md5]['repo']:
506 _d[n][v][md5]['repo'].append(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500507 else:
508 # check if such index is here...
509 _existing = filter(
Alexd0391d42019-05-21 18:48:55 -0500510 lambda i: _pair in _d[n][v][i]['repo'],
Alex0ed4f762019-05-17 17:55:33 -0500511 _d[n][v]
Alexd9fd85e2019-05-16 16:58:24 -0500512 )
513 if _existing:
514 # Yuck! Same version had different MD5
Alex0ed4f762019-05-17 17:55:33 -0500515 _r, _m = self._get_indexed_values(_pair)
Alexd9fd85e2019-05-16 16:58:24 -0500516 logger_cli.error(
517 "# ERROR: Package version has multiple MD5s "
518 "in '{}': {}:{}:{}".format(
Alex0ed4f762019-05-17 17:55:33 -0500519 _r,
Alexd9fd85e2019-05-16 16:58:24 -0500520 n,
521 v,
522 md5
523 )
524 )
Alexd0391d42019-05-21 18:48:55 -0500525 _d[n][v][md5] = _info
Alexd9fd85e2019-05-16 16:58:24 -0500526 else:
527 # this is new version for existing package
Alex0ed4f762019-05-17 17:55:33 -0500528 _d[n][v] = {
Alexd0391d42019-05-21 18:48:55 -0500529 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500530 }
531 return False
532 else:
533 # this is new pakcage
Alex0ed4f762019-05-17 17:55:33 -0500534 _d[n] = {
Alexd9fd85e2019-05-16 16:58:24 -0500535 v: {
Alexd0391d42019-05-21 18:48:55 -0500536 md5: _info
Alexd9fd85e2019-05-16 16:58:24 -0500537 }
538 }
539 return True
540
541 def _save_repo_descriptions(self, repo_props, desc):
542 # form the filename for the repo and save it
543 self.desctgz.add_file(
544 self._create_repo_header(repo_props),
545 json.dumps(desc)
546 )
547
548 # def get_description(self, repo_props, name, md5=None):
549 # """Gets target description
550 # """
551 # _filename = self._create_repo_header(repo_props)
552 # # check if it is present in cache
553 # if _filename in self._desc_cache:
554 # _descs = self._desc_cache[_filename]
555 # else:
556 # # load data
557 # _descs = self.desctgz.get_file(_filename)
558 # # Serialize it
559 # _descs = json.loads(_descs)
560 # self._desc_cache[_filename] = _descs
561 # # return target desc
562 # if name in _descs and md5 in _descs[name]:
563 # return _descs[name][md5]
564 # else:
565 # return None
566
Alexd0391d42019-05-21 18:48:55 -0500567 def parse_tag(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500568 """Download and parse Package.gz files for specific tag
569 By default, descriptions not saved
570 due to huge resulting file size and slow processing
571 """
572 # init gzip and downloader
Alex3bc95f62020-03-05 17:00:04 -0600573 _info = self._info_class.get_repoinfo(tag)
Alexd9fd85e2019-05-16 16:58:24 -0500574 # calculate Packages.gz files to process
575 _baseurl = _info.pop("baseurl")
576 _total_components = len(_info.keys()) - 1
577 _ubuntu_package_repos = 0
578 _other_repos = 0
Alex3bc95f62020-03-05 17:00:04 -0600579 for _c, _d in _info.items():
580 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500581 if _ur in ubuntu_releases:
582 _ubuntu_package_repos += len(_l)
583 elif _ur != 'url':
584 _other_repos += len(_l)
585 logger_cli.info(
586 "-> loaded repository info for '{}'.\n"
587 " '{}', {} components, {} ubuntu repos, {} other/uknown".format(
588 _baseurl,
589 tag,
590 _total_components,
591 _ubuntu_package_repos,
592 _other_repos
593 )
594 )
595 # init progress bar
596 _progress = Progress(_ubuntu_package_repos)
597 _index = 0
598 _processed = 0
599 _new = 0
Alex3bc95f62020-03-05 17:00:04 -0600600 for _c, _d in _info.items():
Alexd9fd85e2019-05-16 16:58:24 -0500601 # we do not need url here, just get rid of it
602 if 'url' in _d:
603 _d.pop('url')
604 # _url = if 'url' in _d else _baseurl + _c
Alex3bc95f62020-03-05 17:00:04 -0600605 for _ur, _l in _d.items():
Alexd9fd85e2019-05-16 16:58:24 -0500606 # iterate package collections
607 for _p in _l:
608 # descriptions
609 if descriptions:
610 _descriptions = {}
611 # download and unzip
Alexd0391d42019-05-21 18:48:55 -0500612 _index += 1
613 _progress.write_progress(
614 _index,
615 note="/ {} {} {} {} {}, GET 'Packages.gz'".format(
616 _c,
617 _ur,
618 _p['ubuntu-release'],
619 _p['type'],
620 _p['arch']
621 )
622 )
623 _raw = get_gzipped_file(_p['filepath'])
624 if not _raw:
625 # empty repo...
626 _progress.clearline()
627 logger_cli.warning(
628 "# WARNING: Empty file: '{}'".format(
629 _p['filepath']
630 )
631 )
632 continue
Alex3bc95f62020-03-05 17:00:04 -0600633 else:
634 _raw = _raw.decode("utf-8")
Alexd9fd85e2019-05-16 16:58:24 -0500635 _progress.write_progress(
636 _index,
637 note="/ {} {} {} {} {}, {}/{}".format(
638 _c,
639 _ur,
640 _p['ubuntu-release'],
641 _p['type'],
642 _p['arch'],
643 _processed,
644 _new
645 )
646 )
Alexd9fd85e2019-05-16 16:58:24 -0500647 _lines = _raw.splitlines()
Alexd9fd85e2019-05-16 16:58:24 -0500648 # break lines collection into isolated pkg data
649 _pkg = {
650 "tag": tag,
651 "subset": _c,
652 "release": _ur
653 }
654 _pkg.update(_p)
655 _desc = {}
656 _key = _value = ""
Alexd0391d42019-05-21 18:48:55 -0500657 # if there is no empty line at end, add it
658 if _lines[-1] != '':
659 _lines.append('')
660 # Process lines
Alexd9fd85e2019-05-16 16:58:24 -0500661 for _line in _lines:
662 if not _line:
663 # if the line is empty, process pkg data gathered
664 _name = _desc['package']
665 _md5 = _desc['md5sum']
666 _version = _desc['version']
Alex0ed4f762019-05-17 17:55:33 -0500667 _mainteiner = _desc['maintainer']
668
Alexd0391d42019-05-21 18:48:55 -0500669 if 'source' in _desc:
670 _ap = _desc['source'].lower()
671 else:
672 _ap = "-"
673
674 if apps:
675 # insert app
676 _sc = _desc['section'].lower()
677 if 'source' in _desc:
678 _ap = _desc['source'].lower()
679 else:
680 _ap = "-"
681
682 try:
683 _tmp = set(self._apps[_sc][_ap][_name])
684 _tmp.add(_desc['architecture'])
685 self._apps[_sc][_ap][_name] = list(_tmp)
686 except KeyError:
687 nested_set(
688 self._apps,
689 [_sc, _ap, _name],
690 [_desc['architecture']]
691 )
692
Alex0ed4f762019-05-17 17:55:33 -0500693 # Check is mainteiner is Mirantis
694 if _mainteiner.endswith("@mirantis.com>"):
695 # update mirantis versions
696 if self._update_pkg_version(
697 self._versions_mirantis,
698 _name,
699 _version,
700 _md5,
Alexd0391d42019-05-21 18:48:55 -0500701 _desc['section'].lower(),
702 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500703 self._create_repo_header(_pkg),
704 _get_value_index(
705 self._mainteiners_index,
706 _mainteiner
707 )
708 ):
709 _new += 1
710 else:
711 # update other versions
712 if self._update_pkg_version(
713 self._versions_other,
714 _name,
715 _version,
716 _md5,
Alexd0391d42019-05-21 18:48:55 -0500717 _desc['section'].lower(),
718 _ap,
Alex0ed4f762019-05-17 17:55:33 -0500719 self._create_repo_header(_pkg),
720 _get_value_index(
721 self._mainteiners_index,
722 _mainteiner
723 )
724 ):
725 _new += 1
Alexd9fd85e2019-05-16 16:58:24 -0500726
727 if descriptions:
728 _d_new = {
729 _md5: deepcopy(_desc)
730 }
731 try:
732 _descriptions[_name].update(_d_new)
733 except KeyError:
734 _descriptions[_name] = _d_new
735 # clear the data for next pkg
736 _processed += 1
737 _desc = {}
738 _key = ""
739 _value = ""
740 elif _line.startswith(' '):
741 _desc[_key] += "\n{}".format(_line)
742 else:
743 _key, _value = _line.split(': ', 1)
744 _key = _key.lower()
745
746 _desc[_key] = _value
747 # save descriptions if needed
748 if descriptions:
749 _progress.clearline()
750 self._save_repo_descriptions(_pkg, _descriptions)
751
752 _progress.end()
753 # backup headers to disk
754 self.versionstgz.add_file(
Alex0ed4f762019-05-17 17:55:33 -0500755 _repos_index_filename,
Alexd9fd85e2019-05-16 16:58:24 -0500756 json.dumps(self._repo_index),
757 replace=True
758 )
Alex0ed4f762019-05-17 17:55:33 -0500759 self.versionstgz.add_file(
760 _mainteiners_index_filename,
761 json.dumps(self._mainteiners_index),
762 replace=True
763 )
Alexd0391d42019-05-21 18:48:55 -0500764 if apps:
765 self.desctgz.add_file(
766 self._apps_filename,
767 json.dumps(self._apps),
768 replace=True
769 )
770
Alexd9fd85e2019-05-16 16:58:24 -0500771 return
772
Alexd0391d42019-05-21 18:48:55 -0500773 def fetch_versions(self, tag, descriptions=False, apps=False):
Alexd9fd85e2019-05-16 16:58:24 -0500774 """Executes parsing for specific tag
775 """
776 if descriptions:
777 logger_cli.warning(
778 "\n\n# !!! WARNING: Saving repo descriptions "
779 "consumes huge amount of disk space\n\n"
780 )
781 # if there is no such tag, parse it from repoinfo
Alexd9fd85e2019-05-16 16:58:24 -0500782 logger_cli.info("# Fetching versions for {}".format(tag))
Alexd0391d42019-05-21 18:48:55 -0500783 self.parse_tag(tag, descriptions=descriptions, apps=apps)
Alex0ed4f762019-05-17 17:55:33 -0500784 logger_cli.info("-> saving updated versions")
785 self.versionstgz.add_file(
786 _mirantis_versions_filename,
787 json.dumps(self._versions_mirantis),
788 replace=True
789 )
790 self.versionstgz.add_file(
791 _other_versions_filename,
792 json.dumps(self._versions_other),
793 replace=True
794 )
Alexd9fd85e2019-05-16 16:58:24 -0500795
796 def build_repos(self, url, tag=None):
797 """Builds versions data for selected tag, or for all of them
798 """
Alexd9fd85e2019-05-16 16:58:24 -0500799 # recoursively walk the mirrors
800 # and gather all of the repos for 'tag' or all of the tags
Alex3bc95f62020-03-05 17:00:04 -0600801 self._info_class.fetch_repos(url, tag=tag)
Alexd9fd85e2019-05-16 16:58:24 -0500802
Alex74dc1352019-05-17 13:18:24 -0500803 def _build_action(self, url, tags):
804 for t in tags:
Alex6df29ad2019-05-31 17:55:32 -0500805 logger_cli.info("# Building repo info for '{}'".format(t))
Alex74dc1352019-05-17 13:18:24 -0500806 self.build_repos(url, tag=t)
807
Alexd0391d42019-05-21 18:48:55 -0500808 def get_available_tags(self, tag=None):
809 # Populate action tags
Alex3bc95f62020-03-05 17:00:04 -0600810 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd0391d42019-05-21 18:48:55 -0500811
812 _tags = []
813 if tag in major:
814 _tags.append(tag)
815 if tag in updates:
816 _tags.append(tag + ".update")
817 if tag in hotfix:
818 _tags.append(tag + ".hotfix")
819
820 return _tags
821
Alexd9fd85e2019-05-16 16:58:24 -0500822 def action_for_tag(
823 self,
824 url,
825 tag,
826 action=None,
Alexd0391d42019-05-21 18:48:55 -0500827 descriptions=None,
828 apps=None
Alexd9fd85e2019-05-16 16:58:24 -0500829 ):
830 """Executes action for every tag from all collections
831 """
832 if not action:
833 logger_cli.info("# No action set, nothing to do")
Alex74dc1352019-05-17 13:18:24 -0500834 # See if this is a list action
Alexd9fd85e2019-05-16 16:58:24 -0500835 if action == "list":
Alex3bc95f62020-03-05 17:00:04 -0600836 _all = self._info_class.list_tags()
Alex6df29ad2019-05-31 17:55:32 -0500837 if _all:
838 # Print pretty list and exit
839 logger_cli.info("# Tags available at '{}':".format(url))
840 for t in _all:
841 _ri = self._repo_index
842 _isparsed = any(
Alex3bc95f62020-03-05 17:00:04 -0600843 [k for k, v in _ri.items()
Alex6df29ad2019-05-31 17:55:32 -0500844 if v['props']['tag'] == t]
845 )
846 if _isparsed:
847 logger_cli.info(get_tag_label(t, parsed=True))
848 else:
849 logger_cli.info(get_tag_label(t))
850 else:
851 logger_cli.info("# Not tags parsed yet for '{}':".format(url))
852
Alex74dc1352019-05-17 13:18:24 -0500853 # exit
Alexd9fd85e2019-05-16 16:58:24 -0500854 return
Alex74dc1352019-05-17 13:18:24 -0500855
Alex6df29ad2019-05-31 17:55:32 -0500856 if action == "build":
857 self._build_action(url, [tag])
858
Alexd0391d42019-05-21 18:48:55 -0500859 # Populate action tags
860 _action_tags = self.get_available_tags(tag)
861
Alexd9fd85e2019-05-16 16:58:24 -0500862 if not _action_tags:
863 logger_cli.info(
864 "# Tag of '{}' not found. "
865 "Consider rebuilding repos info.".format(tag)
866 )
Alex74dc1352019-05-17 13:18:24 -0500867 else:
Alexd9fd85e2019-05-16 16:58:24 -0500868 logger_cli.info(
Alex74dc1352019-05-17 13:18:24 -0500869 "-> tags to process: {}".format(
Alexd9fd85e2019-05-16 16:58:24 -0500870 ", ".join(_action_tags)
871 )
872 )
Alex74dc1352019-05-17 13:18:24 -0500873 # Execute actions
Alex6df29ad2019-05-31 17:55:32 -0500874 if action == "fetch":
Alexd9fd85e2019-05-16 16:58:24 -0500875 for t in _action_tags:
Alexd0391d42019-05-21 18:48:55 -0500876 self.fetch_versions(t, descriptions=descriptions, apps=apps)
Alexd9fd85e2019-05-16 16:58:24 -0500877
878 logger_cli.info("# Done.")
879
Alex74dc1352019-05-17 13:18:24 -0500880 def show_package(self, name):
881 # get the package data
882 _p = self.get_package_versions(name)
883 if not _p:
884 logger_cli.warning(
885 "# WARNING: Package '{}' not found".format(name)
886 )
887 else:
888 # print package info using sorted tags from headers
889 # Package: name
890 # [u/h] tag \t <version>
891 # \t <version>
892 # <10symbols> \t <md5> \t sorted headers with no tag
893 # ...
Alexd0391d42019-05-21 18:48:55 -0500894 # section
Alex92e07ce2019-05-31 16:00:03 -0500895 for _s in sorted(_p):
Alexd0391d42019-05-21 18:48:55 -0500896 # app
Alex92e07ce2019-05-31 16:00:03 -0500897 for _a in sorted(_p[_s]):
Alexcf91b182019-05-31 11:57:07 -0500898 _o = ""
899 _mm = []
Alexd0391d42019-05-21 18:48:55 -0500900 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500901 for _v in sorted(_p[_s][_a]):
Alexd0391d42019-05-21 18:48:55 -0500902 _o += "\n" + " "*8 + _v + ':\n'
903 # get and sort tags
Alex92e07ce2019-05-31 16:00:03 -0500904 for _md5 in sorted(_p[_s][_a][_v]):
Alexd0391d42019-05-21 18:48:55 -0500905 _o += " "*16 + _md5 + "\n"
906 # get and sort repo headers
Alex92e07ce2019-05-31 16:00:03 -0500907 for _r in sorted(_p[_s][_a][_v][_md5]):
Alexcf91b182019-05-31 11:57:07 -0500908 _o += " "*24 + _r.replace('_', ' ') + '\n'
909 _m = _p[_s][_a][_v][_md5][_r]["maintainer"]
910 if _m not in _mm:
911 _mm.append(_m)
Alex74dc1352019-05-17 13:18:24 -0500912
Alexcf91b182019-05-31 11:57:07 -0500913 logger_cli.info(
914 "\n# Package: {}/{}/{}\nMaintainers: {}".format(
915 _s,
916 _a,
917 name,
918 ", ".join(_mm)
919 )
920 )
921
922 logger_cli.info(_o)
Alex74dc1352019-05-17 13:18:24 -0500923
Alexd0391d42019-05-21 18:48:55 -0500924 @staticmethod
925 def get_apps(versions, name):
926 _all = True if name == '*' else False
Alexcf91b182019-05-31 11:57:07 -0500927 _s_max = _a_max = _p_max = _v_max = 0
Alexd0391d42019-05-21 18:48:55 -0500928 _rows = []
929 for _p in versions.keys():
930 _vs = versions[_p]
Alex3bc95f62020-03-05 17:00:04 -0600931 for _v, _d1 in _vs.items():
932 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -0500933 if _all or name == _info['app']:
934 _s_max = max(len(_info['section']), _s_max)
935 _a_max = max(len(_info['app']), _a_max)
Alexcf91b182019-05-31 11:57:07 -0500936 _p_max = max(len(_p), _p_max)
937 _v_max = max(len(_v), _v_max)
Alexd0391d42019-05-21 18:48:55 -0500938 _rows.append([
939 _info['section'],
940 _info['app'],
Alexcf91b182019-05-31 11:57:07 -0500941 _p,
942 _v,
943 _md5,
944 len(_info['repo'])
Alexd0391d42019-05-21 18:48:55 -0500945 ])
Alexcf91b182019-05-31 11:57:07 -0500946 # format columns
947 # section
948 _fmt = "{:"+str(_s_max)+"} "
949 # app
950 _fmt += "{:"+str(_a_max)+"} "
951 # package name
952 _fmt += "{:"+str(_p_max)+"} "
953 # version
954 _fmt += "{:"+str(_v_max)+"} "
955 # md5 and number of repos is fixed
956 _fmt += "{} in {} repos"
957
958 # fill rows
959 _rows = [_fmt.format(s, a, p, v, m, l) for s, a, p, v, m, l in _rows]
Alexd0391d42019-05-21 18:48:55 -0500960 _rows.sort()
961 return _rows
962
963 def show_app(self, name):
964 c = 0
965 rows = self.get_apps(self._versions_mirantis, name)
966 if rows:
Alexcf91b182019-05-31 11:57:07 -0500967 logger_cli.info("\n# Mirantis packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500968 logger_cli.info("\n".join(rows))
969 c += 1
970 rows = self.get_apps(self._versions_other, name)
971 if rows:
Alexcf91b182019-05-31 11:57:07 -0500972 logger_cli.info("\n# Other packages for '{}'".format(name))
Alexd0391d42019-05-21 18:48:55 -0500973 logger_cli.info("\n".join(rows))
974 c += 1
975 if c == 0:
976 logger_cli.info("\n# No app found for '{}'".format(name))
977
978 def get_mirantis_pkg_names(self):
979 # Mirantis maintainers only
980 return set(
981 self._versions_mirantis.keys()
982 ) - set(
983 self._versions_other.keys()
984 )
985
986 def get_other_pkg_names(self):
987 # Non-mirantis Maintainers
988 return set(
989 self._versions_other.keys()
990 ) - set(
991 self._versions_mirantis.keys()
992 )
993
994 def get_mixed_pkg_names(self):
995 # Mixed maintainers
996 return set(
997 self._versions_mirantis.keys()
998 ).intersection(set(
999 self._versions_other.keys()
1000 ))
1001
1002 def is_mirantis(self, name, tag=None):
1003 """Method checks if this package is mainteined
1004 by mirantis in target tag repo
1005 """
1006 if name in self._versions_mirantis:
1007 # check tag
1008 if tag:
1009 _pkg = self.get_package_versions(
1010 name,
1011 tagged=True
1012 )
1013 _tags = []
1014 for s in _pkg.keys():
1015 for a in _pkg[s].keys():
1016 for t in _pkg[s][a].keys():
1017 _tags.append(t)
1018 if any([t.startswith(tag) for t in _tags]):
1019 return True
1020 else:
1021 return None
1022 else:
1023 return True
1024 elif name in self._versions_other:
1025 # check tag
1026 if tag:
1027 _pkg = self.get_package_versions(
1028 name,
1029 tagged=True
1030 )
1031 _tags = []
1032 for s in _pkg.keys():
1033 for a in _pkg[s].keys():
1034 for t in _pkg[s][a].keys():
1035 _tags.append(t)
1036 if any([t.startswith(tag) for t in _tags]):
1037 return False
1038 else:
1039 return None
1040 else:
1041 return False
1042 else:
1043 logger.error(
1044 "# ERROR: package '{}' not found "
1045 "while determining maintainer".format(
1046 name
1047 )
1048 )
1049 return None
1050
1051 def get_filtered_versions(
1052 self,
1053 name,
1054 tag=None,
1055 include=None,
1056 exclude=None
1057 ):
1058 """Method gets all the versions for the package
1059 and filters them using keys above
1060 """
1061 if tag:
Alex3bc95f62020-03-05 17:00:04 -06001062 tag = str(tag) if not isinstance(tag, str) else tag
Alexd0391d42019-05-21 18:48:55 -05001063 _out = {}
1064 _vs = self.get_package_versions(name, tagged=True)
1065 # iterate to filter out keywords
Alex3bc95f62020-03-05 17:00:04 -06001066 for s, apps in _vs.items():
1067 for a, _tt in apps.items():
1068 for t, vs in _tt.items():
Alexd0391d42019-05-21 18:48:55 -05001069 # filter tags
1070 if tag and t != tag and t.rsplit('.', 1)[0] != tag:
1071 continue
1072 # Skip hotfix tag
1073 if t == tag + ".hotfix":
1074 continue
Alex3bc95f62020-03-05 17:00:04 -06001075 for v, rp in vs.items():
1076 for h, p in rp.items():
Alexd0391d42019-05-21 18:48:55 -05001077 # filter headers with all keywords matching
1078 _h = re.split(r"[\-\_]+", h)
1079 _included = all([kw in _h for kw in include])
1080 _excluded = any([kw in _h for kw in exclude])
1081 if not _included or _excluded:
1082 continue
1083 else:
1084 nested_set(_out, [s, a, v], [])
1085 _dat = {
1086 "header": h
1087 }
1088 _dat.update(p)
1089 _out[s][a][v].append(_dat)
1090 return _out
1091
1092 def get_package_versions(self, name, tagged=False):
Alex74dc1352019-05-17 13:18:24 -05001093 """Method builds package version structure
1094 with repository properties included
1095 """
1096 # get data
Alexd0391d42019-05-21 18:48:55 -05001097 _vs = {}
1098
1099 if name in self._versions_mirantis:
1100 _vs.update(self._versions_mirantis[name])
1101 if name in self._versions_other:
1102 _vs.update(self._versions_other[name])
Alex0ed4f762019-05-17 17:55:33 -05001103
Alex74dc1352019-05-17 13:18:24 -05001104 # insert repo data, insert props into headers place
1105 _package = {}
1106 if tagged:
Alex3bc95f62020-03-05 17:00:04 -06001107 for _v, _d1 in _vs.items():
Alex74dc1352019-05-17 13:18:24 -05001108 # use tag as a next step
Alex3bc95f62020-03-05 17:00:04 -06001109 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001110 _s = _info['section']
1111 _a = _info['app']
1112 for _pair in _info['repo']:
1113 _rp = {}
Alex74dc1352019-05-17 13:18:24 -05001114 # extract props for a repo
Alex0ed4f762019-05-17 17:55:33 -05001115 _r, _m = self._get_indexed_values(_pair)
Alex74dc1352019-05-17 13:18:24 -05001116 # get tag
Alex0ed4f762019-05-17 17:55:33 -05001117 _tag = _r["props"]["tag"]
Alex74dc1352019-05-17 13:18:24 -05001118 # cut tag from the header
Alex0ed4f762019-05-17 17:55:33 -05001119 _cut_head = _r["header"].split("_", 1)[1]
Alex74dc1352019-05-17 13:18:24 -05001120 # populate dict
Alexd0391d42019-05-21 18:48:55 -05001121 _rp["maintainer"] = _m
1122 _rp["md5"] = _md5
1123 _rp.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001124 nested_set(
1125 _package,
Alexd0391d42019-05-21 18:48:55 -05001126 [_s, _a, _tag, _v, _cut_head],
1127 _rp
Alex74dc1352019-05-17 13:18:24 -05001128 )
1129 else:
Alex3bc95f62020-03-05 17:00:04 -06001130 for _v, _d1 in _vs.items():
1131 for _md5, _info in _d1.items():
Alexd0391d42019-05-21 18:48:55 -05001132 _s = _info['section']
1133 _a = _info['app']
1134 for _pair in _info['repo']:
Alex0ed4f762019-05-17 17:55:33 -05001135 _r, _m = self._get_indexed_values(_pair)
Alexd0391d42019-05-21 18:48:55 -05001136 _info["maintainer"] = _m
1137 _info.update(_r["props"])
Alex74dc1352019-05-17 13:18:24 -05001138 nested_set(
1139 _package,
Alexd0391d42019-05-21 18:48:55 -05001140 [_s, _a, _v, _md5, _r["header"]],
1141 _info
Alex74dc1352019-05-17 13:18:24 -05001142 )
1143
1144 return _package
1145
Alexd9fd85e2019-05-16 16:58:24 -05001146 def parse_repos(self):
1147 # all tags to check
Alex3bc95f62020-03-05 17:00:04 -06001148 major, updates, hotfix = self._info_class.list_tags(splitted=True)
Alexd9fd85e2019-05-16 16:58:24 -05001149
1150 # major tags
1151 logger_cli.info("# Processing major tags")
1152 for _tag in major:
1153 self.fetch_versions(_tag)
1154
1155 # updates tags
1156 logger_cli.info("# Processing update tags")
1157 for _tag in updates:
1158 self.fetch_versions(_tag + ".update")
1159
1160 # hotfix tags
1161 logger_cli.info("# Processing hotfix tags")
1162 for _tag in hotfix:
1163 self.fetch_versions(_tag + ".hotfix")