Monty Taylor | a140e1a | 2012-11-22 10:00:22 -0800 | [diff] [blame] | 1 | # vim: tabstop=4 shiftwidth=4 softtabstop=4 |
| 2 | |
| 3 | # Copyright 2012 Hewlett-Packard Development Company, L.P. |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); you may |
| 6 | # not use this file except in compliance with the License. You may obtain |
| 7 | # a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 13 | # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 14 | # License for the specific language governing permissions and limitations |
| 15 | # under the License. |
| 16 | |
| 17 | import os |
| 18 | import urllib |
| 19 | import datetime |
| 20 | import sys |
| 21 | import re |
| 22 | import md5 |
| 23 | |
| 24 | |
| 25 | def main(): |
| 26 | source_cache = sys.argv[1] |
| 27 | destination_mirror = sys.argv[2] |
| 28 | |
| 29 | PACKAGE_VERSION_RE = re.compile(r'(.*)-[0-9]') |
| 30 | full_html_line = "<a href='{dir}/{name}'>{name}</a><br />\n" |
| 31 | |
| 32 | packages = {} |
| 33 | package_count = 0 |
| 34 | |
| 35 | for filename in os.listdir(source_cache): |
| 36 | if filename.endswith('content-type'): |
| 37 | continue |
| 38 | |
| 39 | realname = urllib.unquote(filename) |
| 40 | # The ? accounts for sourceforge downloads |
| 41 | tarball = os.path.basename(realname).split("?")[0] |
| 42 | name_match = PACKAGE_VERSION_RE.search(tarball) |
| 43 | |
| 44 | if name_match is None: |
| 45 | continue |
| 46 | package_name = name_match.group(1) |
| 47 | |
| 48 | version_list = packages.get(package_name, {}) |
| 49 | version_list[tarball] = filename |
| 50 | packages[package_name] = version_list |
| 51 | package_count = package_count + 1 |
| 52 | |
| 53 | full_html = open(os.path.join(destination_mirror, "full.html"), 'w') |
| 54 | simple_html = open(os.path.join(destination_mirror, "index.html"), 'w') |
| 55 | |
| 56 | header = ("<html><head><title>PyPI Mirror</title></head>" |
| 57 | "<body><h1>PyPI Mirror</h1><h2>Last update: %s</h2>\n\n" |
| 58 | % datetime.datetime.utcnow().strftime("%c UTC")) |
| 59 | full_html.write(header) |
| 60 | simple_html.write(header) |
| 61 | |
| 62 | for package_name, versions in packages.items(): |
| 63 | destination_dir = os.path.join(destination_mirror, package_name) |
| 64 | if not os.path.isdir(destination_dir): |
| 65 | os.makedirs(destination_dir) |
| 66 | safe_dir = urllib.quote(package_name) |
| 67 | simple_html.write("<a href='%s'>%s</a><br />\n" % (safe_dir, safe_dir)) |
| 68 | with open(os.path.join(destination_dir, "index.html"), 'w') as index: |
| 69 | index.write("""<html><head> |
| 70 | <title>%s – PyPI Mirror</title> |
| 71 | </head><body>\n""" % package_name) |
| 72 | for tarball, filename in versions.items(): |
| 73 | source_path = os.path.join(source_cache, filename) |
| 74 | destination_path = os.path.join(destination_dir, tarball) |
| 75 | with open(destination_path, 'w') as dest: |
| 76 | src = open(source_path, 'r').read() |
| 77 | md5sum = md5.md5(src).hexdigest() |
| 78 | dest.write(src) |
| 79 | |
| 80 | safe_name = urllib.quote(tarball) |
| 81 | |
| 82 | full_html.write(full_html_line.format(dir=safe_dir, |
| 83 | name=safe_name)) |
| 84 | index.write("<a href='%s#md5=%s'>%s</a>\n" % (safe_name, |
| 85 | md5sum, |
| 86 | safe_name)) |
| 87 | index.write("</body></html>\n") |
| 88 | footer = """<p class='footer'>Generated by process_cache.py; %d |
| 89 | packages mirrored. </p> |
| 90 | </body></html>\n""" % package_count |
| 91 | full_html.write(footer) |
| 92 | full_html.close() |
| 93 | simple_html.write(footer) |
| 94 | simple_html.close() |