| #! /usr/bin/env python |
| # Copyright (C) 2011 OpenStack, LLC. |
| # Copyright (C) 2013 Hewlett-Packard Development Company, L.P. |
| # Copyright (C) 2013 OpenStack Foundation |
| # |
| # Licensed under the Apache License, Version 2.0 (the "License"); |
| # you may not use this file except in compliance with the License. |
| # You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| # License for the specific language governing permissions and limitations |
| # under the License. |
| |
| # run_mirror reads a YAML config file like: |
| # cache-root: /tmp/cache |
| # |
| # mirrors: |
| # - name: openstack |
| # projects: |
| # - https://github.com/openstack/requirements |
| # output: /tmp/mirror/openstack |
| # |
| # - name: openstack-infra |
| # projects: |
| # - https://github.com/openstack-infra/config |
| # output: /tmp/mirror/openstack-infra |
| # |
| # The algorithm it attempts to follow is: |
| # |
| # for each project: |
| # clone if necessary and fetch origin |
| # for each project-branch: |
| # create new virtualenv |
| # pip install reqs into virtualenv |
| # if installation succeeds: |
| # pip freeze > full-reqs |
| # create new virtualenv |
| # pip install (download only) full-reqs into virtualenv |
| # |
| # By default only summary information is printed on stdout (see the |
| # -d command line option to get more debug info). |
| # |
| # If "pip install" for a branch's requirements fails to complete |
| # (based on parsing of its output), that output will be copied to |
| # stderr and the script will skip ahead to the next branch. This |
| # makes it suitable for running in a cron job with only stdout |
| # redirected to a log, and also avoids one broken project preventing |
| # caching of requirements for others. |
| |
| import argparse |
| import datetime |
| import md5 |
| import os |
| import pkginfo |
| import re |
| import shlex |
| import shutil |
| import subprocess |
| import sys |
| import tempfile |
| import urllib |
| import yaml |
| |
| |
| class Mirror(object): |
| def __init__(self): |
| parser = argparse.ArgumentParser( |
| description='Build a pypi mirror from requirements') |
| parser.add_argument('-b', dest='branch', |
| help='restrict run to a specified branch') |
| parser.add_argument('-c', dest='config', |
| help='specify the config file') |
| parser.add_argument('-n', dest='noop', action='store_true', |
| help='do not run any commands') |
| parser.add_argument('--no-pip', dest='no_pip', action='store_true', |
| help='do not run any pip commands') |
| parser.add_argument('--verbose', dest='debug', action='store_true', |
| help='output verbose debug information') |
| parser.add_argument('--no-download', dest='no_download', |
| action='store_true', |
| help='only process the pip cache into a mirror ' |
| '(do not download)') |
| parser.add_argument('--no-process', dest='no_process', |
| action='store_true', |
| help='only download into the pip cache ' |
| '(do not process the cache into a mirror)') |
| parser.add_argument('--no-update', dest='no_update', |
| action='store_true', |
| help='do not update any git repos') |
| self.args = parser.parse_args() |
| self.config = yaml.load(open(self.args.config)) |
| |
| def run_command(self, cmd): |
| cmd_list = shlex.split(str(cmd)) |
| self.debug("Run: %s" % cmd) |
| if self.args.noop: |
| return '' |
| if self.args.no_pip and cmd_list[0].endswith('pip'): |
| return '' |
| p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE, |
| stderr=subprocess.STDOUT) |
| (out, nothing) = p.communicate() |
| out = out.strip() |
| self.debug(out) |
| return out |
| |
| def run(self): |
| for mirror in self.config['mirrors']: |
| if not self.args.no_download: |
| self.build_mirror(mirror) |
| if not self.args.no_process: |
| self.process_cache(mirror) |
| |
| def chdir(self, dest): |
| self.debug("cd %s" % dest) |
| if not self.args.noop: |
| os.chdir(dest) |
| |
| def debug(self, msg): |
| if self.args.debug: |
| print msg |
| |
| def process_http_requirements(self, reqlist, pip_cache_dir, pip): |
| new_reqs = [] |
| for reqfile in reqlist: |
| for req in open(reqfile): |
| req = req.strip() |
| # Handle http://, https://, and git+https?:// |
| if not re.search('https?://', req): |
| new_reqs.append(req) |
| continue |
| target_url = req.split('#', 1)[0] |
| target_file = os.path.join(pip_cache_dir, |
| urllib.quote(target_url, '')) |
| if os.path.exists(target_file): |
| self.debug("Unlink: %s" % target_file) |
| os.unlink(target_file) |
| if os.path.exists(target_file + '.content-type'): |
| self.debug("Unlink: %s.content-type" % target_file) |
| os.unlink(target_file + '.content-type') |
| return new_reqs |
| |
| def find_pkg_info(self, path): |
| versions = set() |
| for root, dirs, files in os.walk(path): |
| if not root.endswith('.egg'): |
| continue |
| if not os.path.exists(os.path.join(root, 'EGG-INFO', 'PKG-INFO')): |
| continue |
| package = pkginfo.Develop(root) |
| versions.add('%s==%s' % (package.name, package.version)) |
| return versions |
| |
| def build_mirror(self, mirror): |
| print("Building mirror: %s" % mirror['name']) |
| pip_format = ("%s install -M -U %s --exists-action=w " |
| "--download-cache=%s --build %s -r %s") |
| venv_format = ("virtualenv --clear --extra-search-dir=%s %s") |
| |
| workdir = tempfile.mkdtemp() |
| reqs = os.path.join(workdir, "reqs") |
| venv = os.path.join(workdir, "venv") |
| build = os.path.join(workdir, "build") |
| pip = os.path.join(venv, "bin", "pip") |
| |
| project_cache_dir = os.path.join(self.config['cache-root'], |
| 'projects') |
| pip_cache_dir = os.path.join(self.config['cache-root'], |
| 'pip', mirror['name']) |
| if not self.args.noop: |
| if not os.path.exists(project_cache_dir): |
| os.makedirs(project_cache_dir) |
| if not os.path.exists(pip_cache_dir): |
| os.makedirs(pip_cache_dir) |
| |
| for project in mirror['projects']: |
| print("Updating repository: %s" % project) |
| self.chdir(project_cache_dir) |
| short_project = project.split('/')[-1] |
| if short_project.endswith('.git'): |
| short_project = short_project[:-4] |
| if not os.path.isdir(short_project): |
| out = self.run_command("git clone %s %s" % |
| (project, short_project)) |
| self.chdir(os.path.join(project_cache_dir, |
| short_project)) |
| out = self.run_command("git fetch -p origin") |
| |
| if self.args.branch: |
| branches = [self.args.branch] |
| else: |
| branches = self.run_command("git branch -a").split("\n") |
| for branch in branches: |
| branch = branch.strip() |
| if (not branch.startswith("remotes/origin") |
| or "origin/HEAD" in branch): |
| continue |
| print("Fetching pip requires for %s:%s" % |
| (project, branch)) |
| if not self.args.no_update: |
| out = self.run_command("git reset --hard %s" % branch) |
| out = self.run_command("git clean -x -f -d -q") |
| reqlist = [] |
| for requires_file in ("requirements.txt", |
| "test-requirements.txt", |
| "tools/pip-requires", |
| "tools/test-requires"): |
| if os.path.exists(requires_file): |
| reqlist.append(requires_file) |
| if reqlist: |
| out = self.run_command(venv_format % |
| (pip_cache_dir, venv)) |
| if os.path.exists(build): |
| shutil.rmtree(build) |
| new_reqs = self.process_http_requirements(reqlist, |
| pip_cache_dir, |
| pip) |
| (reqfp, reqfn) = tempfile.mkstemp() |
| os.write(reqfp, '\n'.join(new_reqs)) |
| os.close(reqfp) |
| out = self.run_command(pip_format % |
| (pip, "", pip_cache_dir, |
| build, reqfn)) |
| if "\nSuccessfully installed " not in out: |
| sys.stderr.write("Installing pip requires for %s:%s " |
| "failed.\n%s\n" % |
| (project, branch, out)) |
| print("pip install did not indicate success") |
| else: |
| freeze = self.run_command("%s freeze -l" % pip) |
| requires = self.find_pkg_info(build) |
| reqfd = open(reqs, "w") |
| for line in freeze.split("\n"): |
| if line.startswith("-e ") or ( |
| "==" in line and " " not in line): |
| requires.add(line) |
| for r in requires: |
| reqfd.write(r + "\n") |
| reqfd.close() |
| out = self.run_command(venv_format % |
| (pip_cache_dir, venv)) |
| if os.path.exists(build): |
| shutil.rmtree(build) |
| out = self.run_command(pip_format % |
| (pip, "--no-install", |
| pip_cache_dir, build, reqs)) |
| if "\nSuccessfully downloaded " not in out: |
| sys.stderr.write("Downloading pip requires for " |
| "%s:%s failed.\n%s\n" % |
| (project, branch, out)) |
| print("pip install did not indicate success") |
| print("cached:\n%s" % freeze) |
| else: |
| print("no requirements") |
| shutil.rmtree(workdir) |
| |
| def process_cache(self, mirror): |
| if self.args.noop: |
| return |
| |
| pip_cache_dir = os.path.join(self.config['cache-root'], |
| 'pip', mirror['name']) |
| destination_mirror = mirror['output'] |
| |
| PACKAGE_VERSION_RE = re.compile(r'(.*)-[0-9]') |
| full_html_line = "<a href='{dir}/{name}'>{name}</a><br />\n" |
| |
| packages = {} |
| package_count = 0 |
| |
| if not os.path.exists(destination_mirror): |
| os.makedirs(destination_mirror) |
| |
| for filename in os.listdir(pip_cache_dir): |
| if filename.endswith('content-type'): |
| continue |
| |
| realname = urllib.unquote(filename) |
| # The ? accounts for sourceforge downloads |
| tarball = os.path.basename(realname).split("?")[0] |
| name_match = PACKAGE_VERSION_RE.search(tarball) |
| |
| if name_match is None: |
| continue |
| package_name = name_match.group(1) |
| |
| version_list = packages.get(package_name, {}) |
| version_list[tarball] = filename |
| packages[package_name] = version_list |
| package_count = package_count + 1 |
| |
| full_html = open(os.path.join(destination_mirror, ".full.html"), 'w') |
| simple_html = open(os.path.join(destination_mirror, ".index.html"), |
| 'w') |
| |
| header = ("<html><head><title>PyPI Mirror</title></head>" |
| "<body><h1>PyPI Mirror</h1><h2>Last update: %s</h2>\n\n" |
| % datetime.datetime.utcnow().strftime("%c UTC")) |
| full_html.write(header) |
| simple_html.write(header) |
| |
| for package_name, versions in packages.items(): |
| destination_dir = os.path.join(destination_mirror, package_name) |
| if not os.path.isdir(destination_dir): |
| os.makedirs(destination_dir) |
| safe_dir = urllib.quote(package_name) |
| simple_html.write("<a href='%s'>%s</a><br />\n" % |
| (safe_dir, safe_dir)) |
| with open(os.path.join(destination_dir, ".index.html"), |
| 'w') as index: |
| index.write("""<html><head> |
| <title>%s – PyPI Mirror</title> |
| </head><body>\n""" % package_name) |
| for tarball, filename in versions.items(): |
| source_path = os.path.join(pip_cache_dir, filename) |
| destination_path = os.path.join(destination_dir, |
| tarball) |
| dot_destination_path = os.path.join(destination_dir, |
| '.' + tarball) |
| with open(dot_destination_path, 'w') as dest: |
| src = open(source_path, 'r').read() |
| md5sum = md5.md5(src).hexdigest() |
| dest.write(src) |
| |
| safe_name = urllib.quote(tarball) |
| |
| full_html.write(full_html_line.format(dir=safe_dir, |
| name=safe_name)) |
| index.write("<a href='%s#md5=%s'>%s</a>\n" % |
| (safe_name, md5sum, safe_name)) |
| os.rename(dot_destination_path, destination_path) |
| index.write("</body></html>\n") |
| os.rename(os.path.join(destination_dir, ".index.html"), |
| os.path.join(destination_dir, "index.html")) |
| footer = """<p class='footer'>Generated by process_cache.py; %d |
| packages mirrored. </p> |
| </body></html>\n""" % package_count |
| full_html.write(footer) |
| full_html.close() |
| os.rename(os.path.join(destination_mirror, ".full.html"), |
| os.path.join(destination_mirror, "full.html")) |
| simple_html.write(footer) |
| simple_html.close() |
| os.rename(os.path.join(destination_mirror, ".index.html"), |
| os.path.join(destination_mirror, "index.html")) |
| |
| |
| def main(): |
| mb = Mirror() |
| mb.run() |
| |
| |
| if __name__ == "__main__": |
| main() |