Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RFR] Avoid contacting twice (Fixes #28) #74

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 133 additions & 40 deletions git_aggregator/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
# License AGPLv3 (http://www.gnu.org/licenses/agpl-3.0-standalone.html)
# Parts of the code comes from ANYBOX
# https://github.com/anybox/anybox.recipe.odoo

from __future__ import unicode_literals

import os
import logging
import re
import subprocess
import collections

import requests

Expand Down Expand Up @@ -131,24 +134,40 @@ def init_git_version(cls, v_str):
"report this" % v_str)
return version

def query_remote_ref(self, remote, ref):
"""Query remote repo about given ref.
:return: ``('tag', sha)`` if ref is a tag in remote
def query_remote(self, remote, refs=None):
"""Query remote repo optionaly about given refs

:return: iterator of
``('tag', sha)`` if ref is a tag in remote
``('branch', sha)`` if ref is branch (aka "head") in remote
``(None, ref)`` if ref does not exist in remote. This happens
notably if ref if a commit sha (they can't be queried)
"""
out = self.log_call(['git', 'ls-remote', remote, ref],
cmd = ['git', 'ls-remote', remote]
if refs is not None:
if isinstance(refs, str):
refs = [refs]
cmd += refs
out = self.log_call(cmd,
cwd=self.cwd if os.path.exists(self.cwd) else None,
callwith=subprocess.check_output).strip()
if len(out) == 0:
for ref in refs:
yield None, ref, ref
return
for sha, fullref in (line.split() for line in out.splitlines()):
if fullref == 'refs/heads/' + ref:
return 'branch', sha
elif fullref == 'refs/tags/' + ref:
return 'tag', sha
elif fullref == ref and ref == 'HEAD':
return 'HEAD', sha
return None, ref
if fullref.startswith('refs/heads/'):
ref = fullref.split("/")[-1]
yield 'branch', ref, sha
elif fullref.startswith('refs/tags/'):
ref = fullref.split("/")[-1]
yield 'tag', ref, sha
elif fullref == 'HEAD':
yield 'HEAD', fullref, sha
else:
raise GitAggregatorException(
"Unrecognized type for value from ls-remote: %r"
% (fullref, ))

def log_call(self, cmd, callwith=subprocess.check_call,
log_level=logging.DEBUG, **kw):
Expand All @@ -166,9 +185,10 @@ def log_call(self, cmd, callwith=subprocess.check_call,
return ret

def aggregate(self):
""" Aggregate all merges into the target branch
If the target_dir doesn't exist, create an empty git repo otherwise
clean it, add all remotes , and merge all merges.
"""Aggregate all merges into the target branch

If the target_dir doesn't exist, create an empty git repo
otherwise clean it, add all remotes, and merge all merges.
"""
logger.info('Start aggregation of %s', self.cwd)
target_dir = self.cwd
Expand All @@ -177,18 +197,26 @@ def aggregate(self):
if is_new:
cloned = self.init_repository(target_dir)

self._switch_to_branch(self.target['branch'])
for r in self.remotes:
self._set_remote(**r)
self.fetch()
fetch_heads = self.fetch()
logger.debug("fetch_heads: %r", fetch_heads)
merges = self.merges
if not is_new or cloned:
origin = merges[0]
origin_sha1 = fetch_heads[(origin["remote"],
origin["ref"])]
merges = merges[1:]
if is_new and not cloned:
self._switch_to_branch(
self.target['branch'],
origin_sha1)
else:
# reset to the first merge
origin = merges[0]
merges = merges[1:]
self._reset_to(origin["remote"], origin["ref"])
self._reset_to(origin_sha1)
for merge in merges:
self._merge(merge)
logger.info("Merge %s, %s", merge["remote"], merge["ref"])
self._merge(fetch_heads[(merge["remote"],
merge["ref"])])
self._execute_shell_command_after()
logger.info('End aggregation of %s', self.cwd)

Expand Down Expand Up @@ -229,7 +257,7 @@ def init_repository(self, target_dir):
# repository
cmd += ('--filter=blob:none',)
# Try to clone target branch, if it exists
rtype, _sha = self.query_remote_ref(repository, branch)
rtype, _ref, _sha = list(self.query_remote(repository, branch))[0]
if rtype in {'branch', 'tag'}:
cmd += ('-b', branch)
# Emtpy fetch options to use global default for 1st clone
Expand All @@ -239,13 +267,74 @@ def init_repository(self, target_dir):
return True

def fetch(self):
"""Fetch all given (remote, ref) and associate their SHA

Will query and fetch all (remote, ref) in current git repository,
it'll take care to resolve each tuple in the local commit's SHA1

It returns a dict structure associating each (remote, ref) to their
SHA in local repository.
"""
merges_requested = [(m["remote"], m["ref"])
for m in self.merges]
basecmd = ("git", "fetch")
logger.info("Fetching required remotes")
for merge in self.merges:
cmd = basecmd + self._fetch_options(merge) + (merge["remote"],)
if merge["remote"] not in self.fetch_all:
cmd += (merge["ref"],)
fetch_heads = {}
ls_remote_refs = collections.defaultdict(list) # to ls-query
while merges_requested:
remote, ref = merges_requested[0]
merges_requested = merges_requested[1:]
cmd = (
basecmd +
self._fetch_options({"remote": remote, "ref": ref}) +
(remote,))
if remote not in self.fetch_all:
cmd += (ref, )
else:
ls_remote_refs[remote].append(ref)
self.log_call(cmd, cwd=self.cwd)
with open(os.path.join(self.cwd, ".git", "FETCH_HEAD"), "r") as f:
for line in f:
fetch_head, for_merge, _ = line.split("\t")
if for_merge == "not-for-merge":
continue
break
fetch_heads[(remote, ref)] = fetch_head
if self.fetch_all:
if self.fetch_all is True:
remotes = self.remotes
else:
remotes = [r for r in self.remotes
if r["name"] in self.fetch_all]
for remote in remotes:
refs = self.query_remote(
remote["url"],
ls_remote_refs[remote["name"]])
for _, ref, sha in refs:
if (remote["name"], ref) in merges_requested:
merges_requested.remove((remote["name"], ref))
fetch_heads[(remote["name"], ref)] = sha
if len(merges_requested):
# Last case: our ref is a sha and remote git repository does
# not support querying commit directly by SHA. In this case
# we need just to check if ref is actually SHA, and if we have
# this SHA locally.
for remote, ref in merges_requested:
if not re.search("[0-9a-f]{4,}", ref):
raise ValueError("Could not resolv ref %r on remote %r"
% (ref, remote))
valid_local_shas = self.log_call(
['git', 'rev-parse', '-v'] + [sha
for _r, sha in merges_requested],
cwd=self.cwd, callwith=subprocess.check_output
).strip().splitlines()
for remote, sha in merges_requested:
if sha not in valid_local_shas:
raise ValueError(
"Could not find SHA ref %r after fetch on remote %r"
% (ref, remote))
fetch_heads[(remote["name"], sha)] = sha
return fetch_heads

def push(self):
remote = self.target['remote']
Expand All @@ -255,7 +344,10 @@ def push(self):
"Cannot push %s, no target remote configured" % branch
)
logger.info("Push %s to %s", branch, remote)
self.log_call(['git', 'push', '-f', remote, branch], cwd=self.cwd)
self.log_call(
['git', 'push', '-f', remote, "HEAD:%s" % branch],
cwd=self.cwd
)

def _check_status(self):
"""Check repo status and except if dirty."""
Expand All @@ -277,42 +369,43 @@ def _fetch_options(self, merge):
cmd += ("--%s" % option, str(value))
return cmd

def _reset_to(self, remote, ref):
def _reset_to(self, ref):
if not self.force:
self._check_status()
logger.info('Reset branch to %s %s', remote, ref)
rtype, sha = self.query_remote_ref(remote, ref)
if rtype is None and not ishex(ref):
raise GitAggregatorException(
'Could not reset %s to %s. No commit found for %s '
% (remote, ref, ref))
cmd = ['git', 'reset', '--hard', sha]
logger.info('Reset branch to %s', ref)
cmd = ['git', 'reset', '--hard', ref]
if logger.getEffectiveLevel() != logging.DEBUG:
cmd.insert(2, '--quiet')
self.log_call(cmd, cwd=self.cwd)
self.log_call(['git', 'clean', '-ffd'], cwd=self.cwd)

def _switch_to_branch(self, branch_name):
def _switch_to_branch(self, branch_name, ref=None):
# check if the branch already exists
logger.info("Switch to branch %s", branch_name)
self.log_call(['git', 'checkout', '-B', branch_name], cwd=self.cwd)
cmd = ['git', 'checkout', '-B', branch_name]
if ref is not None:
sha1 = self.log_call(
['git', 'rev-parse', ref],
callwith=subprocess.check_output,
cwd=self.cwd).strip()
cmd.append(sha1)
self.log_call(cmd, cwd=self.cwd)

def _execute_shell_command_after(self):
logger.info('Execute shell after commands')
for cmd in self.shell_command_after:
self.log_call(cmd, shell=True, cwd=self.cwd)

def _merge(self, merge):
logger.info("Pull %s, %s", merge["remote"], merge["ref"])
cmd = ("git", "pull", "--ff", "--no-rebase")
cmd = ("git", "merge", "--ff")
if self.git_version >= (1, 7, 10):
# --edit and --no-edit appear with Git 1.7.10
# see Documentation/RelNotes/1.7.10.txt of Git
# (https://git.kernel.org/cgit/git/git.git/tree)
cmd += ('--no-edit',)
if logger.getEffectiveLevel() != logging.DEBUG:
cmd += ('--quiet',)
cmd += self._fetch_options(merge) + (merge["remote"], merge["ref"])
cmd += (merge,)
self.log_call(cmd, cwd=self.cwd)

def _get_remotes(self):
Expand Down
58 changes: 55 additions & 3 deletions tests/test_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def setUp(self):
commit 1 -> fork after -> remote 2
tag1
commit 2
*remote2 (clone remote 1)
annotated tag2
* remote2 (clone remote 1)
commit 1
commit 3
branch b2
Expand All @@ -94,6 +95,8 @@ def setUp(self):
subprocess.check_call(['git', 'tag', 'tag1'], cwd=self.remote1)
self.commit_2_sha = git_write_commit(
self.remote1, 'tracked', "last", msg="last commit")
subprocess.check_call(['git', 'tag', '-am', 'foo', 'tag2'],
cwd=self.remote1)
self.commit_3_sha = git_write_commit(
self.remote2, 'tracked2', "remote2", msg="new commit")
subprocess.check_call(['git', 'checkout', '-b', 'b2'],
Expand Down Expand Up @@ -121,6 +124,24 @@ def test_minimal(self):
last_rev = git_get_last_rev(self.cwd)
self.assertEqual(last_rev, self.commit_1_sha)

def test_annotated_tag(self):
remotes = [{
'name': 'r1',
'url': self.url_remote1
}]
merges = [{
'remote': 'r1',
'ref': 'tag2'
}]
target = {
'remote': 'r1',
'branch': 'agg1'
}
repo = Repo(self.cwd, remotes, merges, target)
repo.aggregate()
last_rev = git_get_last_rev(self.cwd)
self.assertEqual(last_rev, self.commit_2_sha)

def test_simple_merge(self):
remotes = [{
'name': 'r1',
Expand All @@ -146,8 +167,39 @@ def test_simple_merge(self):
self.assertEqual(last_rev, self.commit_3_sha)
# push
repo.push()
rtype, sha = repo.query_remote_ref('r1', 'agg')
self.assertEqual(rtype, 'branch')
rtype, ref, sha = list(repo.query_remote('r1', 'agg'))[0]
self.assertEquals(rtype, 'branch')
self.assertTrue(sha)

def test_simple_merge_2(self):
# Launched from an existing git repository
remotes = [{
'name': 'r1',
'url': self.url_remote1
}, {
'name': 'r2',
'url': self.url_remote2
}]
merges = [{
'remote': 'r1',
'ref': 'tag1'
}, {
'remote': 'r2',
'ref': self.commit_3_sha
}]
target = {
'remote': 'r1',
'branch': 'agg'
}
subprocess.call(['git', 'init', self.cwd])
repo = Repo(self.cwd, remotes, merges, target, fetch_all=True)
repo.aggregate()
last_rev = git_get_last_rev(self.cwd)
self.assertEqual(last_rev, self.commit_3_sha)
# push
repo.push()
rtype, ref, sha = list(repo.query_remote('r1', 'agg'))[0]
self.assertEquals(rtype, 'branch')
self.assertTrue(sha)

def test_push_missing_remote(self):
Expand Down