Merge "Make protocol configurable"
diff --git a/jeepyb/cmd/create_cgitrepos.py b/jeepyb/cmd/create_cgitrepos.py
index 6950655..8294f09 100644
--- a/jeepyb/cmd/create_cgitrepos.py
+++ b/jeepyb/cmd/create_cgitrepos.py
@@ -34,6 +34,7 @@
 SCRATCH_GROUP = os.environ.get('SCRATCH_GROUP', 'scratch')
 CGIT_USER = os.environ.get('CGIT_USER', 'cgit')
 CGIT_GROUP = os.environ.get('CGIT_GROUP', 'cgit')
+DEFAULT_ORG = os.environ.get('DEFAULT_ORG', None)
 
 
 def main():
@@ -42,7 +43,13 @@
     names = set()
     for entry in registry.configs_list:
         project = entry['project']
-        (org, name) = project.split('/')
+        if '/' in project:
+            (org, name) = project.split('/')
+        else:
+            if DEFAULT_ORG is None:
+                raise RuntimeError('No org specified for project %s and no'
+                                   'DEFAULT_ORG is set.' % project)
+            (org, name) = (DEFAULT_ORG, project)
         description = entry.get('description', name)
         assert project not in names
         names.add(project)
diff --git a/jeepyb/cmd/expire_old_reviews.py b/jeepyb/cmd/expire_old_reviews.py
index 9ec1064..5a62c05 100644
--- a/jeepyb/cmd/expire_old_reviews.py
+++ b/jeepyb/cmd/expire_old_reviews.py
@@ -74,7 +74,7 @@
 
     for line in stdout:
         row = json.loads(line)
-        if 'rowCount' not in row:
+        if 'rowCount' not in row and 'open' in row and row['open']:
             # Search for negative approvals
             for approval in row['currentPatchSet']['approvals']:
                 if approval['value'] in ('-1', '-2'):
diff --git a/jeepyb/cmd/manage_projects.py b/jeepyb/cmd/manage_projects.py
index 878a4a8..36c3b89 100644
--- a/jeepyb/cmd/manage_projects.py
+++ b/jeepyb/cmd/manage_projects.py
@@ -52,12 +52,13 @@
 
 import argparse
 import ConfigParser
+import glob
+import hashlib
+import json
 import logging
 import os
 import re
-import shlex
-import subprocess
-import tempfile
+import shutil
 import time
 
 import gerritlib.gerrit
@@ -70,6 +71,7 @@
 registry = u.ProjectsRegistry()
 
 log = logging.getLogger("manage_projects")
+orgs = None
 
 # Gerrit system groups as defined:
 # https://review.openstack.org/Documentation/access-control.html#system_groups
@@ -94,51 +96,15 @@
     pass
 
 
-def run_command(cmd, status=False, env=None):
-    env = env or {}
-    cmd_list = shlex.split(str(cmd))
-    newenv = os.environ
-    newenv.update(env)
-    log.info("Executing command: %s" % " ".join(cmd_list))
-    p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE,
-                         stderr=subprocess.STDOUT, env=newenv)
-    (out, nothing) = p.communicate()
-    log.debug("Return code: %s" % p.returncode)
-    log.debug("Command said: %s" % out.strip())
-    if status:
-        return (p.returncode, out.strip())
-    return out.strip()
-
-
-def run_command_status(cmd, env=None):
-    env = env or {}
-    return run_command(cmd, True, env)
-
-
-def git_command(repo_dir, sub_cmd, env=None):
-    env = env or {}
-    git_dir = os.path.join(repo_dir, '.git')
-    cmd = "git --git-dir=%s --work-tree=%s %s" % (git_dir, repo_dir, sub_cmd)
-    status, _ = run_command(cmd, True, env)
-    return status
-
-
-def git_command_output(repo_dir, sub_cmd, env=None):
-    env = env or {}
-    git_dir = os.path.join(repo_dir, '.git')
-    cmd = "git --git-dir=%s --work-tree=%s %s" % (git_dir, repo_dir, sub_cmd)
-    status, out = run_command(cmd, True, env)
-    return (status, out)
-
-
 def fetch_config(project, remote_url, repo_path, env=None):
     env = env or {}
     # Poll for refs/meta/config as gerrit may not have written it out for
     # us yet.
     for x in range(10):
-        status = git_command(repo_path, "fetch %s +refs/meta/config:"
-                             "refs/remotes/gerrit-meta/config" %
-                             remote_url, env)
+        status = u.git_command(
+            repo_path,
+            "fetch %s +refs/meta/config:refs/remotes/gerrit-meta/config"
+            % remote_url, env)
         if status == 0:
             break
         else:
@@ -153,13 +119,13 @@
     # one yet.
     output = ""
     for x in range(10):
-        status = git_command(repo_path, "remote update --prune", env)
+        status = u.git_command(repo_path, "remote update --prune", env)
         if status != 0:
             log.error("Failed to update remote: %s" % remote_url)
             time.sleep(2)
             continue
         else:
-            status, output = git_command_output(
+            status, output = u.git_command_output(
                 repo_path, "ls-files --with-tree=remotes/gerrit-meta/config "
                 "project.config", env)
         if output.strip() != "project.config" or status != 0:
@@ -174,8 +140,8 @@
 
     # Because the following fails if executed more than once you should only
     # run fetch_config once in each repo.
-    status = git_command(repo_path, "checkout -B config "
-                         "remotes/gerrit-meta/config")
+    status = u.git_command(
+        repo_path, "checkout -B config remotes/gerrit-meta/config")
     if status != 0:
         log.error("Failed to checkout config for project: %s" % project)
         raise FetchConfigException()
@@ -186,32 +152,31 @@
         raise CopyACLException()
 
     acl_dest = os.path.join(repo_path, "project.config")
-    status, _ = run_command("cp %s %s" %
-                            (acl_config, acl_dest), status=True)
+    status, _ = u.run_command(
+        "cp %s %s" % (acl_config, acl_dest), status=True)
     if status != 0:
         raise CopyACLException()
 
-    status = git_command(repo_path, "diff --quiet")
+    status = u.git_command(repo_path, "diff --quiet")
     return status != 0
 
 
 def push_acl_config(project, remote_url, repo_path, gitid, env=None):
     env = env or {}
     cmd = "commit -a -m'Update project config.' --author='%s'" % gitid
-    status = git_command(repo_path, cmd)
+    status = u.git_command(repo_path, cmd)
     if status != 0:
         log.error("Failed to commit config for project: %s" % project)
         return False
-    status, out = git_command_output(repo_path,
-                                     "push %s HEAD:refs/meta/config" %
-                                     remote_url, env)
+    status, out = u.git_command_output(
+        repo_path, "push %s HEAD:refs/meta/config" % remote_url, env)
     if status != 0:
         log.error("Failed to push config for project: %s" % project)
         return False
     return True
 
 
-def _get_group_uuid(group):
+def _get_group_uuid(group, retries=10):
     """
     Gerrit keeps internal user groups in the DB while it keeps systems
     groups in All-Projects groups file (in refs/meta/config).  This
@@ -224,7 +189,7 @@
     """
     query = "SELECT group_uuid FROM account_groups WHERE name = %s"
     con = jeepyb.gerritdb.connect()
-    for x in range(10):
+    for x in range(retries):
         cursor = con.cursor()
         cursor.execute(query, (group,))
         data = cursor.fetchone()
@@ -232,12 +197,13 @@
         con.commit()
         if data:
             return data[0]
-        time.sleep(1)
+        if retries > 1:
+            time.sleep(1)
     return None
 
 
 def get_group_uuid(gerrit, group):
-    uuid = _get_group_uuid(group)
+    uuid = _get_group_uuid(group, retries=1)
     if uuid:
         return uuid
     if group in GERRIT_SYSTEM_GROUPS:
@@ -269,40 +235,48 @@
         with open(group_file, 'w') as fp:
             for group, uuid in uuids.items():
                 fp.write("%s\t%s\n" % (uuid, group))
-        status = git_command(repo_path, "add groups")
+        status = u.git_command(repo_path, "add groups")
         if status != 0:
             log.error("Failed to add groups file for project: %s" % project)
             raise CreateGroupException()
 
 
-def make_ssh_wrapper(gerrit_user, gerrit_key):
-    (fd, name) = tempfile.mkstemp(text=True)
-    os.write(fd, '#!/bin/bash\n')
-    os.write(fd,
-             'ssh -i %s -l %s -o "StrictHostKeyChecking no" $@\n' %
-             (gerrit_key, gerrit_user))
-    os.close(fd)
-    os.chmod(name, 0o755)
-    return dict(GIT_SSH=name)
-
-
-def create_github_project(
+def create_update_github_project(
         default_has_issues, default_has_downloads, default_has_wiki,
-        github_secure_config, options, project, description, homepage):
+        github_secure_config, options, project, description, homepage,
+        cache):
     created = False
     has_issues = 'has-issues' in options or default_has_issues
     has_downloads = 'has-downloads' in options or default_has_downloads
     has_wiki = 'has-wiki' in options or default_has_wiki
 
+    needs_update = False
+    if not cache.get('created-in-github', False):
+        needs_update = True
+    if not cache.get('gerrit-in-team', False):
+        needs_update = True
+    if cache.get('has_issues', default_has_issues) != has_issues:
+        needs_update = True
+    if cache.get('has_downloads', default_has_downloads) != has_downloads:
+        needs_update = True
+    if cache.get('has_wiki', default_has_wiki) != has_wiki:
+        needs_update = True
+    if not needs_update:
+        return False
+
     secure_config = ConfigParser.ConfigParser()
     secure_config.read(github_secure_config)
 
-    if secure_config.has_option("github", "oauth_token"):
-        ghub = github.Github(secure_config.get("github", "oauth_token"))
-    else:
-        ghub = github.Github(secure_config.get("github", "username"),
-                             secure_config.get("github", "password"))
-    orgs = ghub.get_user().get_orgs()
+    global orgs
+    if orgs is None:
+        if secure_config.has_option("github", "oauth_token"):
+            ghub = github.Github(secure_config.get("github", "oauth_token"))
+        else:
+            ghub = github.Github(secure_config.get("github", "username"),
+                                 secure_config.get("github", "password"))
+
+        log.info('Fetching github org list')
+        orgs = ghub.get_user().get_orgs()
     orgs_dict = dict(zip([o.login.lower() for o in orgs], orgs))
 
     # Find the project's repo
@@ -318,26 +292,50 @@
     except KeyError:
         # We do not have control of this github org ignore the project.
         return False
+
     try:
+        log.info("Fetching github info about %s", repo_name)
         repo = org.get_repo(repo_name)
+
     except github.GithubException:
+        log.info("Creating %s in github", repo_name)
         repo = org.create_repo(repo_name,
                                homepage=homepage,
                                has_issues=has_issues,
                                has_downloads=has_downloads,
                                has_wiki=has_wiki)
-        if description:
-            repo.edit(repo_name, description=description)
-        if homepage:
-            repo.edit(repo_name, homepage=homepage)
-        repo.edit(repo_name, has_issues=has_issues,
-                  has_downloads=has_downloads,
-                  has_wiki=has_wiki)
+        created = True
 
+    cache['created-in-github'] = True
+    cache['has_wiki'] = has_wiki
+    cache['has_downloads'] = has_downloads
+    cache['has_issues'] = has_issues
+
+    kwargs = {}
+    # If necessary, update project on Github
+    if description and description != repo.description:
+        kwargs['description'] = description
+    if homepage and homepage != repo.homepage:
+        kwargs['homepage'] = homepage
+    if has_issues != repo.has_issues:
+        kwargs['has_issues'] = has_issues
+    if has_downloads != repo.has_downloads:
+        kwargs['has_downloads'] = has_downloads
+    if has_wiki != repo.has_wiki:
+        kwargs['has_wiki'] = has_wiki
+
+    if kwargs:
+        log.info("Updating github repo info about %s", repo_name)
+        repo.edit(repo_name, **kwargs)
+    cache.update(kwargs)
+
+    if not cache.get('gerrit-in-team', False):
         if 'gerrit' not in [team.name for team in repo.get_teams()]:
+            log.info("Adding gerrit to github team for %s", repo_name)
             teams = org.get_teams()
             teams_dict = dict(zip([t.name.lower() for t in teams], teams))
             teams_dict['gerrit'].add_to_repos(repo)
+        cache['gerrit-in-team'] = True
         created = True
 
     return created
@@ -349,131 +347,33 @@
     return None
 
 
-def make_local_copy(repo_path, project, project_list,
-                    git_opts, ssh_env, upstream, GERRIT_HOST, GERRIT_PORT,
-                    project_git, GERRIT_GITID):
-
-    # Ensure that the base location exists
-    if not os.path.exists(os.path.dirname(repo_path)):
-        os.makedirs(os.path.dirname(repo_path))
-
-    # Three choices
-    #  - If gerrit has it, get from gerrit
-    #  - If gerrit doesn't have it:
-    #    - If it has an upstream, clone that
-    #    - If it doesn't, create it
-
-    # Gerrit knows about the project, clone it
-    # TODO(mordred): there is a possible failure condition here
-    #                we should consider 'gerrit has it' to be
-    #                'gerrit repo has a master branch'
-    if project in project_list:
-        run_command(
-            "git clone %(remote_url)s %(repo_path)s" % git_opts,
-            env=ssh_env)
-        if upstream:
-            git_command(
-                repo_path,
-                "remote add -f upstream %(upstream)s" % git_opts)
-        return None
-
-    # Gerrit doesn't have it, but it has an upstream configured
-    # We're probably importing it for the first time, clone
-    # upstream, but then ongoing we want gerrit to ge origin
-    # and upstream to be only there for ongoing tracking
-    # purposes, so rename origin to upstream and add a new
-    # origin remote that points at gerrit
-    elif upstream:
-        run_command(
-            "git clone %(upstream)s %(repo_path)s" % git_opts,
-            env=ssh_env)
-        git_command(
-            repo_path,
-            "fetch origin +refs/heads/*:refs/copy/heads/*",
-            env=ssh_env)
-        git_command(repo_path, "remote rename origin upstream")
-        git_command(
-            repo_path,
-            "remote add origin %(remote_url)s" % git_opts)
-        return "push %s +refs/copy/heads/*:refs/heads/*"
-
-    # Neither gerrit has it, nor does it have an upstream,
-    # just create a whole new one
-    else:
-        run_command("git init %s" % repo_path)
-        git_command(
-            repo_path,
-            "remote add origin %(remote_url)s" % git_opts)
-        with open(os.path.join(repo_path,
-                               ".gitreview"),
-                  'w') as gitreview:
-            gitreview.write("""[gerrit]
-host=%s
-port=%s
-project=%s
-""" % (GERRIT_HOST, GERRIT_PORT, project_git))
-        git_command(repo_path, "add .gitreview")
-        cmd = ("commit -a -m'Added .gitreview' --author='%s'"
-               % GERRIT_GITID)
-        git_command(repo_path, cmd)
-        return "push %s HEAD:refs/heads/master"
-
-
-def update_local_copy(repo_path, track_upstream, git_opts, ssh_env):
-    # first do a clean of the branch to prevent possible
-    # problems due to previous runs
-    git_command(repo_path, "clean -fdx")
-
-    has_upstream_remote = (
-        'upstream' in git_command_output(repo_path, 'remote')[1])
-    if track_upstream:
-        # If we're configured to track upstream but the repo
-        # does not have an upstream remote, add one
-        if not has_upstream_remote:
-            git_command(
-                repo_path,
-                "remote add upstream %(upstream)s" % git_opts)
-
-        # If we're configured to track upstream, make sure that
-        # the upstream URL matches the config
-        else:
-            git_command(
-                repo_path,
-                "remote set-url upstream %(upstream)s" % git_opts)
-
-        # Now that we have any upstreams configured, fetch all of the refs
-        # we might need, pruning remote branches that no longer exist
-        git_command(
-            repo_path, "remote update --prune", env=ssh_env)
-    else:
-        # If we are not tracking upstream, then we do not need
-        # an upstream remote configured
-        if has_upstream_remote:
-            git_command(repo_path, "remote rm upstream")
-
-    # TODO(mordred): This is here so that later we can
-    # inspect the master branch for meta-info
-    # Checkout master and reset to the state of origin/master
-    git_command(repo_path, "checkout -B master origin/master")
+def fsck_repo(repo_path):
+    rc, out = u.git_command_output(repo_path, 'fsck --full')
+    # Check for non zero return code or warnings which should
+    # be treated as errors. In this case zeroPaddedFilemodes
+    # will not be accepted by Gerrit/jgit but are accepted by C git.
+    if rc != 0 or 'zeroPaddedFilemode' in out:
+        log.error('git fsck of %s failed:\n%s' % (repo_path, out))
+        raise Exception('git fsck failed not importing')
 
 
 def push_to_gerrit(repo_path, project, push_string, remote_url, ssh_env):
     try:
-        git_command(repo_path, push_string % remote_url, env=ssh_env)
-        git_command(repo_path, "push --tags %s" % remote_url, env=ssh_env)
+        u.git_command(repo_path, push_string % remote_url, env=ssh_env)
+        u.git_command(repo_path, "push --tags %s" % remote_url, env=ssh_env)
     except Exception:
         log.exception(
             "Error pushing %s to Gerrit." % project)
 
 
 def sync_upstream(repo_path, project, ssh_env, upstream_prefix):
-    git_command(
+    u.git_command(
         repo_path,
         "remote update upstream --prune", env=ssh_env)
     # Any branch that exists in the upstream remote, we want
     # a local branch of, optionally prefixed with the
     # upstream prefix value
-    for branch in git_command_output(
+    for branch in u.git_command_output(
             repo_path, "branch -a")[1].split('\n'):
         if not branch.strip().startswith("remotes/upstream"):
             continue
@@ -486,17 +386,17 @@
 
         # Check out an up to date copy of the branch, so that
         # we can push it and it will get picked up below
-        git_command(repo_path, "checkout -B %s %s" % (
+        u.git_command(repo_path, "checkout -B %s %s" % (
             local_branch, branch))
 
     try:
         # Push all of the local branches to similarly named
         # Branches on gerrit. Also, push all of the tags
-        git_command(
+        u.git_command(
             repo_path,
             "push origin refs/heads/*:refs/heads/*",
             env=ssh_env)
-        git_command(repo_path, 'push origin --tags', env=ssh_env)
+        u.git_command(repo_path, 'push origin --tags', env=ssh_env)
     except Exception:
         log.exception(
             "Error pushing %s to Gerrit." % project)
@@ -518,9 +418,9 @@
         log.exception(
             "Exception processing ACLS for %s." % project)
     finally:
-        git_command(repo_path, 'reset --hard')
-        git_command(repo_path, 'checkout master')
-        git_command(repo_path, 'branch -D config')
+        u.git_command(repo_path, 'reset --hard')
+        u.git_command(repo_path, 'checkout master')
+        u.git_command(repo_path, 'branch -D config')
 
 
 def create_gerrit_project(project, project_list, gerrit):
@@ -540,14 +440,14 @@
 
     git_mirror_path = os.path.join(local_git_dir, project_git)
     if not os.path.exists(git_mirror_path):
-        (ret, output) = run_command_status(
+        (ret, output) = u.run_command_status(
             "git --bare init %s" % git_mirror_path)
         if ret:
-            run_command("rm -rf git_mirror_path")
+            u.run_command("rm -rf git_mirror_path")
             raise Exception(output)
-        run_command("chown -R %s:%s %s"
-                    % (gerrit_system_user, gerrit_system_group,
-                       git_mirror_path))
+        u.run_command(
+            "chown -R %s:%s %s" % (
+                gerrit_system_user, gerrit_system_group, git_mirror_path))
 
 
 def main():
@@ -583,15 +483,23 @@
     GITHUB_SECURE_CONFIG = registry.get_defaults(
         'github-config',
         '/etc/github/github-projects.secure.config')
+    PROJECT_CACHE_FILE = os.path.join(JEEPYB_CACHE_DIR, 'project.cache')
+    project_cache = {}
+    if os.path.exists(PROJECT_CACHE_FILE):
+        project_cache = json.loads(open(PROJECT_CACHE_FILE, 'r').read())
+    acl_cache = {}
+    for acl_file in glob.glob(os.path.join(ACL_DIR, '*/*.config')):
+        sha256 = hashlib.sha256()
+        sha256.update(open(acl_file, 'r').read())
+        acl_cache[acl_file] = sha256.hexdigest()
 
     gerrit = gerritlib.gerrit.Gerrit(GERRIT_HOST,
                                      GERRIT_USER,
                                      GERRIT_PORT,
                                      GERRIT_KEY)
     project_list = gerrit.listProjects()
-    ssh_env = make_ssh_wrapper(GERRIT_USER, GERRIT_KEY)
+    ssh_env = u.make_ssh_wrapper(GERRIT_USER, GERRIT_KEY)
     try:
-
         for section in registry.configs_list:
             project = section['project']
             if args.projects and project not in args.projects:
@@ -605,8 +513,6 @@
                 description = section.get('description', None)
                 homepage = section.get('homepage', DEFAULT_HOMEPAGE)
                 upstream = section.get('upstream', None)
-                upstream_prefix = section.get('upstream-prefix', None)
-                track_upstream = 'track-upstream' in options
                 repo_path = os.path.join(JEEPYB_CACHE_DIR, project)
 
                 # If this project doesn't want to use gerrit, exit cleanly.
@@ -624,57 +530,72 @@
                 acl_config = section.get(
                     'acl-config',
                     '%s.config' % os.path.join(ACL_DIR, project))
+                project_cache.setdefault(project, {})
 
                 # Create the project in Gerrit first, since it will fail
                 # spectacularly if its project directory or local replica
                 # already exist on disk
-                project_created = create_gerrit_project(
-                    project, project_list, gerrit)
+                project_created = project_cache[project].get(
+                    'project-created', False)
+                if not project_created:
+                    try:
+                        project_created = create_gerrit_project(
+                            project, project_list, gerrit)
+                        project_cache[project]['project-created'] = True
+                    except Exception:
+                        project_cache[project]['project-created'] = False
+                        continue
+
+                pushed_to_gerrit = project_cache[project].get(
+                    'pushed-to-gerrit', False)
+                if not pushed_to_gerrit:
+                    # We haven't pushed to gerrit, so grab the repo again
+                    if os.path.exists(repo_path):
+                        shutil.rmtree(repo_path)
+
+                    # Make Local repo
+                    push_string = u.make_local_copy(
+                        repo_path, project, project_list,
+                        git_opts, ssh_env, upstream, GERRIT_HOST,
+                        GERRIT_PORT, project_git, GERRIT_GITID)
+
+                    description = (
+                        find_description_override(repo_path)
+                        or description)
+
+                    fsck_repo(repo_path)
+
+                    if push_string:
+                        push_to_gerrit(
+                            repo_path, project, push_string,
+                            remote_url, ssh_env)
+                    project_cache[project]['pushed-to-gerrit'] = True
+                    if GERRIT_REPLICATE:
+                        gerrit.replicate(project)
 
                 # Create the repo for the local git mirror
                 create_local_mirror(
                     LOCAL_GIT_DIR, project_git,
                     GERRIT_OS_SYSTEM_USER, GERRIT_OS_SYSTEM_GROUP)
 
-                if not os.path.exists(repo_path) or project_created:
-                    # We don't have a local copy already, get one
-
-                    # Make Local repo
-                    push_string = make_local_copy(
-                        repo_path, project, project_list,
-                        git_opts, ssh_env, upstream, GERRIT_HOST, GERRIT_PORT,
-                        project_git, GERRIT_GITID)
-                else:
-                    # We do have a local copy of it already, make sure it's
-                    # in shape to have work done.
-                    update_local_copy(
-                        repo_path, track_upstream, git_opts, ssh_env)
-
-                description = (
-                    find_description_override(repo_path) or description)
-
-                if project_created:
-                    push_to_gerrit(
-                        repo_path, project, push_string, remote_url, ssh_env)
-                    if GERRIT_REPLICATE:
-                        gerrit.replicate(project)
-
-                # If we're configured to track upstream, make sure we have
-                # upstream's refs, and then push them to the appropriate
-                # branches in gerrit
-                if track_upstream:
-                    sync_upstream(repo_path, project, ssh_env, upstream_prefix)
-
                 if acl_config:
-                    process_acls(
-                        acl_config, project, ACL_DIR, section,
-                        remote_url, repo_path, ssh_env, gerrit, GERRIT_GITID)
+                    acl_sha = acl_cache.get(acl_config)
+                    if project_cache[project].get('acl-sha') != acl_sha:
+                        process_acls(
+                            acl_config, project, ACL_DIR, section,
+                            remote_url, repo_path, ssh_env, gerrit,
+                            GERRIT_GITID)
+                        project_cache[project]['acl-sha'] = acl_sha
+                    else:
+                        log.info("%s has matching sha, skipping ACLs",
+                                 project)
 
                 if 'has-github' in options or default_has_github:
-                    created = create_github_project(
+                    created = create_update_github_project(
                         DEFAULT_HAS_ISSUES, DEFAULT_HAS_DOWNLOADS,
                         DEFAULT_HAS_WIKI, GITHUB_SECURE_CONFIG,
-                        options, project, description, homepage)
+                        options, project, description, homepage,
+                        project_cache[project])
                     if created and GERRIT_REPLICATE:
                         gerrit.replicate(project)
 
@@ -682,7 +603,15 @@
                 log.exception(
                     "Problems creating %s, moving on." % project)
                 continue
+            finally:
+                # Clean up after ourselves - this repo has no use
+                if os.path.exists(repo_path):
+                    shutil.rmtree(repo_path)
     finally:
+        with open(PROJECT_CACHE_FILE, 'w') as cache_out:
+            log.info("Writing cache file %s", PROJECT_CACHE_FILE)
+            cache_out.write(json.dumps(
+                project_cache, sort_keys=True, indent=2))
         os.unlink(ssh_env['GIT_SSH'])
 
 if __name__ == "__main__":
diff --git a/jeepyb/cmd/track_upstream.py b/jeepyb/cmd/track_upstream.py
new file mode 100644
index 0000000..24e6245
--- /dev/null
+++ b/jeepyb/cmd/track_upstream.py
@@ -0,0 +1,249 @@
+#! /usr/bin/env python
+# Copyright (C) 2011 OpenStack, LLC.
+# Copyright (c) 2012 Hewlett-Packard Development Company, L.P.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+
+# manage_projects.py reads a config file called projects.ini
+# It should look like:
+
+# [projects]
+# homepage=http://openstack.org
+# gerrit-host=review.openstack.org
+# local-git-dir=/var/lib/git
+# gerrit-key=/home/gerrit2/review_site/etc/ssh_host_rsa_key
+# gerrit-committer=Project Creator <openstack-infra@lists.openstack.org>
+# gerrit-replicate=True
+# has-github=True
+# has-wiki=False
+# has-issues=False
+# has-downloads=False
+# acl-dir=/home/gerrit2/acls
+# acl-base=/home/gerrit2/acls/project.config
+#
+# manage_projects.py reads a project listing file called projects.yaml
+# It should look like:
+# - project: PROJECT_NAME
+#   options:
+#    - has-wiki
+#    - has-issues
+#    - has-downloads
+#    - has-pull-requests
+#    - track-upstream
+#   homepage: Some homepage that isn't http://openstack.org
+#   description: This is a great project
+#   upstream: https://gerrit.googlesource.com/gerrit
+#   upstream-prefix: upstream
+#   acl-config: /path/to/gerrit/project.config
+#   acl-append:
+#     - /path/to/gerrit/project.config
+#   acl-parameters:
+#     project: OTHER_PROJECT_NAME
+
+import argparse
+import json
+import logging
+import os
+
+import gerritlib.gerrit
+
+import jeepyb.log as l
+import jeepyb.utils as u
+
+registry = u.ProjectsRegistry()
+
+log = logging.getLogger("track_upstream")
+orgs = None
+
+
+def update_local_copy(repo_path, track_upstream, git_opts, ssh_env):
+    # first do a clean of the branch to prevent possible
+    # problems due to previous runs
+    u.git_command(repo_path, "clean -fdx")
+
+    has_upstream_remote = (
+        'upstream' in u.git_command_output(repo_path, 'remote')[1])
+    if track_upstream:
+        # If we're configured to track upstream but the repo
+        # does not have an upstream remote, add one
+        if not has_upstream_remote:
+            u.git_command(
+                repo_path,
+                "remote add upstream %(upstream)s" % git_opts)
+
+        # If we're configured to track upstream, make sure that
+        # the upstream URL matches the config
+        else:
+            u.git_command(
+                repo_path,
+                "remote set-url upstream %(upstream)s" % git_opts)
+
+        # Now that we have any upstreams configured, fetch all of the refs
+        # we might need, pruning remote branches that no longer exist
+        u.git_command(
+            repo_path, "remote update --prune", env=ssh_env)
+    else:
+        # If we are not tracking upstream, then we do not need
+        # an upstream remote configured
+        if has_upstream_remote:
+            u.git_command(repo_path, "remote rm upstream")
+
+    # TODO(mordred): This is here so that later we can
+    # inspect the master branch for meta-info
+    # Checkout master and reset to the state of origin/master
+    u.git_command(repo_path, "checkout -B master origin/master")
+
+
+def fsck_repo(repo_path):
+    rc, out = u.git_command_output(repo_path, 'fsck --full')
+    # Check for non zero return code or warnings which should
+    # be treated as errors. In this case zeroPaddedFilemodes
+    # will not be accepted by Gerrit/jgit but are accepted by C git.
+    if rc != 0 or 'zeroPaddedFilemode' in out:
+        log.error('git fsck of %s failed:\n%s' % (repo_path, out))
+        raise Exception('git fsck failed not importing')
+
+
+def push_to_gerrit(repo_path, project, push_string, remote_url, ssh_env):
+    try:
+        u.git_command(repo_path, push_string % remote_url, env=ssh_env)
+        u.git_command(repo_path, "push --tags %s" % remote_url, env=ssh_env)
+    except Exception:
+        log.exception(
+            "Error pushing %s to Gerrit." % project)
+
+
+def sync_upstream(repo_path, project, ssh_env, upstream_prefix):
+    u.git_command(
+        repo_path,
+        "remote update upstream --prune", env=ssh_env)
+    # Any branch that exists in the upstream remote, we want
+    # a local branch of, optionally prefixed with the
+    # upstream prefix value
+    for branch in u.git_command_output(
+            repo_path, "branch -a")[1].split('\n'):
+        if not branch.strip().startswith("remotes/upstream"):
+            continue
+        if "->" in branch:
+            continue
+        local_branch = branch.split()[0][len('remotes/upstream/'):]
+        if upstream_prefix:
+            local_branch = "%s/%s" % (
+                upstream_prefix, local_branch)
+
+        # Check out an up to date copy of the branch, so that
+        # we can push it and it will get picked up below
+        u.git_command(
+            repo_path, "checkout -B %s %s" % (local_branch, branch))
+
+    try:
+        # Push all of the local branches to similarly named
+        # Branches on gerrit. Also, push all of the tags
+        u.git_command(
+            repo_path,
+            "push origin refs/heads/*:refs/heads/*",
+            env=ssh_env)
+        u.git_command(repo_path, 'push origin --tags', env=ssh_env)
+    except Exception:
+        log.exception(
+            "Error pushing %s to Gerrit." % project)
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Manage projects')
+    l.setup_logging_arguments(parser)
+    parser.add_argument('--nocleanup', action='store_true',
+                        help='do not remove temp directories')
+    parser.add_argument('projects', metavar='project', nargs='*',
+                        help='name of project(s) to process')
+    args = parser.parse_args()
+    l.configure_logging(args)
+
+    JEEPYB_CACHE_DIR = registry.get_defaults('jeepyb-cache-dir',
+                                             '/var/lib/jeepyb')
+    IMPORT_DIR = os.path.join(JEEPYB_CACHE_DIR, 'import')
+    GERRIT_HOST = registry.get_defaults('gerrit-host')
+    GERRIT_PORT = int(registry.get_defaults('gerrit-port', '29418'))
+    GERRIT_USER = registry.get_defaults('gerrit-user')
+    GERRIT_KEY = registry.get_defaults('gerrit-key')
+    GERRIT_GITID = registry.get_defaults('gerrit-committer')
+
+    PROJECT_CACHE_FILE = os.path.join(JEEPYB_CACHE_DIR, 'project.cache')
+    project_cache = {}
+    if os.path.exists(PROJECT_CACHE_FILE):
+        project_cache = json.loads(open(PROJECT_CACHE_FILE, 'r').read())
+
+    gerrit = gerritlib.gerrit.Gerrit(GERRIT_HOST,
+                                     GERRIT_USER,
+                                     GERRIT_PORT,
+                                     GERRIT_KEY)
+    project_list = gerrit.listProjects()
+    ssh_env = u.make_ssh_wrapper(GERRIT_USER, GERRIT_KEY)
+    try:
+
+        for section in registry.configs_list:
+            project = section['project']
+            if args.projects and project not in args.projects:
+                continue
+
+            try:
+                log.info("Processing project: %s" % project)
+
+                # Figure out all of the options
+                options = section.get('options', dict())
+                track_upstream = 'track-upstream' in options
+                if not track_upstream:
+                    continue
+
+                # If this project doesn't want to use gerrit, exit cleanly.
+                if 'no-gerrit' in options:
+                    continue
+
+                upstream = section.get('upstream', None)
+                upstream_prefix = section.get('upstream-prefix', None)
+                repo_path = os.path.join(IMPORT_DIR, project)
+
+                project_git = "%s.git" % project
+                remote_url = "ssh://%s:%s/%s" % (
+                    GERRIT_HOST,
+                    GERRIT_PORT,
+                    project)
+                git_opts = dict(upstream=upstream,
+                                repo_path=repo_path,
+                                remote_url=remote_url)
+                project_cache.setdefault(project, {})
+                if not project_cache[project]['pushed-to-gerrit']:
+                    continue
+
+                # Make Local repo
+                if not os.path.exists(repo_path):
+                    u.make_local_copy(
+                        repo_path, project, project_list,
+                        git_opts, ssh_env, upstream, GERRIT_HOST,
+                        GERRIT_PORT, project_git, GERRIT_GITID)
+                else:
+                    update_local_copy(
+                        repo_path, track_upstream, git_opts, ssh_env)
+
+                fsck_repo(repo_path)
+                sync_upstream(repo_path, project, ssh_env, upstream_prefix)
+
+            except Exception:
+                log.exception(
+                    "Problems creating %s, moving on." % project)
+                continue
+    finally:
+        os.unlink(ssh_env['GIT_SSH'])
+
+if __name__ == "__main__":
+    main()
diff --git a/jeepyb/cmd/update_bug.py b/jeepyb/cmd/update_bug.py
index 5f457f5..aaf19b8 100644
--- a/jeepyb/cmd/update_bug.py
+++ b/jeepyb/cmd/update_bug.py
@@ -100,7 +100,7 @@
     # +-----------------+-----------------------------------------+
     # | email_address   | external_id                             |
     # +-----------------+-----------------------------------------+
-    # | plugh@xyzzy.com | https://login.launchpad.net/+id/fR0bnU1 |
+    # | plugh@xyzzy.com | https://login.ubuntu.com/+id/fR0bnU1 |
     # | bar@foo.org     | mailto:bar@foo.org                      |
     # | NULL            | username:quux                           |
     # +-----------------+-----------------------------------------+
@@ -112,7 +112,7 @@
                 SELECT t.account_id FROM account_external_ids t
                 WHERE t.email_address = %s )
             original ON t.account_id = original.account_id
-            AND t.external_id LIKE 'https://login.launchpad.net%%'"""
+            AND t.external_id LIKE 'https://login.ubuntu.com%%'"""
 
     cursor = jeepyb.gerritdb.connect().cursor()
     cursor.execute(query, searchkey)
diff --git a/jeepyb/utils.py b/jeepyb/utils.py
index 7b04dc1..f79257d 100644
--- a/jeepyb/utils.py
+++ b/jeepyb/utils.py
@@ -13,18 +13,147 @@
 # under the License.
 
 import ConfigParser
+import logging
 import os
+import shlex
+import subprocess
+import tempfile
 import yaml
 
 PROJECTS_INI = os.environ.get('PROJECTS_INI', '/home/gerrit2/projects.ini')
 PROJECTS_YAML = os.environ.get('PROJECTS_YAML', '/home/gerrit2/projects.yaml')
 
+log = logging.getLogger("jeepyb.utils")
+
 
 def short_project_name(full_project_name):
     """Return the project part of the git repository name."""
     return full_project_name.split('/')[-1]
 
 
+def run_command(cmd, status=False, env=None):
+    env = env or {}
+    cmd_list = shlex.split(str(cmd))
+    newenv = os.environ
+    newenv.update(env)
+    log.info("Executing command: %s" % " ".join(cmd_list))
+    p = subprocess.Popen(cmd_list, stdout=subprocess.PIPE,
+                         stderr=subprocess.STDOUT, env=newenv)
+    (out, nothing) = p.communicate()
+    log.debug("Return code: %s" % p.returncode)
+    log.debug("Command said: %s" % out.strip())
+    if status:
+        return (p.returncode, out.strip())
+    return out.strip()
+
+
+def run_command_status(cmd, env=None):
+    env = env or {}
+    return run_command(cmd, True, env)
+
+
+def git_command(repo_dir, sub_cmd, env=None):
+    env = env or {}
+    git_dir = os.path.join(repo_dir, '.git')
+    cmd = "git --git-dir=%s --work-tree=%s %s" % (git_dir, repo_dir, sub_cmd)
+    status, _ = run_command(cmd, True, env)
+    return status
+
+
+def git_command_output(repo_dir, sub_cmd, env=None):
+    env = env or {}
+    git_dir = os.path.join(repo_dir, '.git')
+    cmd = "git --git-dir=%s --work-tree=%s %s" % (git_dir, repo_dir, sub_cmd)
+    status, out = run_command(cmd, True, env)
+    return (status, out)
+
+
+def make_ssh_wrapper(gerrit_user, gerrit_key):
+    (fd, name) = tempfile.mkstemp(text=True)
+    os.write(fd, '#!/bin/bash\n')
+    os.write(fd,
+             'ssh -i %s -l %s -o "StrictHostKeyChecking no" $@\n' %
+             (gerrit_key, gerrit_user))
+    os.close(fd)
+    os.chmod(name, 0o755)
+    return dict(GIT_SSH=name)
+
+
+def make_local_copy(repo_path, project, project_list,
+                    git_opts, ssh_env, upstream, GERRIT_HOST, GERRIT_PORT,
+                    project_git, GERRIT_GITID):
+
+    # Ensure that the base location exists
+    if not os.path.exists(os.path.dirname(repo_path)):
+        os.makedirs(os.path.dirname(repo_path))
+
+    # Three choices
+    #  - If gerrit has it, get from gerrit
+    #  - If gerrit doesn't have it:
+    #    - If it has an upstream, clone that
+    #    - If it doesn't, create it
+
+    # Gerrit knows about the project, clone it
+    # TODO(mordred): there is a possible failure condition here
+    #                we should consider 'gerrit has it' to be
+    #                'gerrit repo has a master branch'
+    if project in project_list:
+        try:
+            run_command(
+                "git clone %(remote_url)s %(repo_path)s" % git_opts,
+                env=ssh_env)
+            if upstream:
+                git_command(
+                    repo_path,
+                    "remote add -f upstream %(upstream)s" % git_opts)
+            return None
+        except Exception:
+            # If the clone fails, then we need to clone from the upstream
+            # source
+            pass
+
+    # Gerrit doesn't have it, but it has an upstream configured
+    # We're probably importing it for the first time, clone
+    # upstream, but then ongoing we want gerrit to ge origin
+    # and upstream to be only there for ongoing tracking
+    # purposes, so rename origin to upstream and add a new
+    # origin remote that points at gerrit
+    if upstream:
+        run_command(
+            "git clone %(upstream)s %(repo_path)s" % git_opts,
+            env=ssh_env)
+        git_command(
+            repo_path,
+            "fetch origin +refs/heads/*:refs/copy/heads/*",
+            env=ssh_env)
+        git_command(repo_path, "remote rename origin upstream")
+        git_command(
+            repo_path,
+            "remote add origin %(remote_url)s" % git_opts)
+        return "push %s +refs/copy/heads/*:refs/heads/*"
+
+    # Neither gerrit has it, nor does it have an upstream,
+    # just create a whole new one
+    else:
+        run_command("git init %s" % repo_path)
+        git_command(
+            repo_path,
+            "remote add origin %(remote_url)s" % git_opts)
+        with open(os.path.join(repo_path,
+                               ".gitreview"),
+                  'w') as gitreview:
+            gitreview.write("""[gerrit]
+host=%s
+port=%s
+project=%s
+""" % (GERRIT_HOST, GERRIT_PORT, project_git))
+        git_command(repo_path, "add .gitreview")
+        cmd = ("commit -a -m'Added .gitreview' --author='%s'"
+               % GERRIT_GITID)
+        git_command(repo_path, cmd)
+        return "push %s HEAD:refs/heads/master"
+
+
 class ProjectsRegistry(object):
     """read config from ini or yaml file.
 
diff --git a/setup.cfg b/setup.cfg
index 8cc0e28..86ffa5a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -27,6 +27,7 @@
     openstackwatch = jeepyb.cmd.openstackwatch:main
     process-cache = jeepyb.cmd.process_cache:main
     register-zanata-projects = jeepyb.cmd.register_zanata_projects:main
+    track-upstream = jeepyb.cmd.track_upstream:main
     trivial-rebase = jeepyb.cmd.trivial_rebase:main
     update-blueprint = jeepyb.cmd.update_blueprint:main
     update-bug = jeepyb.cmd.update_bug:main