diff --git a/git_aggregator/repo.py b/git_aggregator/repo.py index cb49fd4..0428ec1 100644 --- a/git_aggregator/repo.py +++ b/git_aggregator/repo.py @@ -3,11 +3,14 @@ # License AGPLv3 (http://www.gnu.org/licenses/agpl-3.0-standalone.html) # Parts of the code comes from ANYBOX # https://github.com/anybox/anybox.recipe.odoo + from __future__ import unicode_literals + import os import logging import re import subprocess +import collections import requests @@ -131,24 +134,38 @@ def init_git_version(cls, v_str): "report this" % v_str) return version - def query_remote_ref(self, remote, ref): - """Query remote repo about given ref. - :return: ``('tag', sha)`` if ref is a tag in remote + def query_remote(self, remote, refs=None): + """Query remote repo optionaly about given refs + + :return: iterator of + ``('tag', sha)`` if ref is a tag in remote ``('branch', sha)`` if ref is branch (aka "head") in remote ``(None, ref)`` if ref does not exist in remote. This happens notably if ref if a commit sha (they can't be queried) """ - out = self.log_call(['git', 'ls-remote', remote, ref], + cmd = ['git', 'ls-remote', remote] + if refs is not None: + if isinstance(refs, str): + refs = [refs] + cmd += refs + out = self.log_call(cmd, cwd=self.cwd, callwith=subprocess.check_output).strip() + if len(out) == 0: + for ref in refs: + yield None, ref, ref + return for sha, fullref in (l.split() for l in out.splitlines()): - if fullref == 'refs/heads/' + ref: - return 'branch', sha - elif fullref == 'refs/tags/' + ref: - return 'tag', sha - elif fullref == ref and ref == 'HEAD': - return 'HEAD', sha - return None, ref + if fullref.startswith('refs/heads/'): + yield 'branch', fullref, sha + elif fullref.startswith('refs/tags/'): + yield 'tag', fullref, sha + elif fullref == 'HEAD': + yield 'HEAD', fullref, sha + else: + raise GitAggregatorException( + "Unrecognized type for value from ls-remote: %r" + % (fullref, )) def log_call(self, cmd, callwith=subprocess.check_call, log_level=logging.DEBUG, **kw): @@ -162,9 +179,10 @@ def log_call(self, cmd, callwith=subprocess.check_call, return ret def aggregate(self): - """ Aggregate all merges into the target branch - If the target_dir doesn't exist, create an empty git repo otherwise - clean it, add all remotes , and merge all merges. + """Aggregate all merges into the target branch + + If the target_dir doesn't exist, create an empty git repo + otherwise clean it, add all remotes, and merge all merges. """ logger.info('Start aggregation of %s', self.cwd) target_dir = self.cwd @@ -173,18 +191,26 @@ def aggregate(self): if is_new: self.init_repository(target_dir) - self._switch_to_branch(self.target['branch']) for r in self.remotes: self._set_remote(**r) - self.fetch() + fetch_heads = self.fetch() + logger.debug("fetch_heads: %r", fetch_heads) merges = self.merges - if not is_new: + origin = merges[0] + origin_sha1 = fetch_heads[(origin["remote"], + origin["ref"])] + merges = merges[1:] + if is_new: + self._switch_to_branch( + self.target['branch'], + origin_sha1) + else: # reset to the first merge - origin = merges[0] - merges = merges[1:] - self._reset_to(origin["remote"], origin["ref"]) + self._reset_to(origin_sha1) for merge in merges: - self._merge(merge) + logger.info("Merge %s, %s", merge["remote"], merge["ref"]) + self._merge(fetch_heads[(merge["remote"], + merge["ref"])]) self._execute_shell_command_after() logger.info('End aggregation of %s', self.cwd) @@ -193,19 +219,81 @@ def init_repository(self, target_dir): self.log_call(['git', 'init', target_dir]) def fetch(self): + """Fetch all given (remote, ref) and associate their SHA + + Will query and fetch all (remote, ref) in current git repository, + it'll take care to resolve each tuple in the local commit's SHA1 + + It returns a dict structure associating each (remote, ref) to their + SHA in local repository. + """ + merges_requested = [(m["remote"], m["ref"]) + for m in self.merges] basecmd = ("git", "fetch") logger.info("Fetching required remotes") - for merge in self.merges: - cmd = basecmd + self._fetch_options(merge) + (merge["remote"],) - if merge["remote"] not in self.fetch_all: - cmd += (merge["ref"],) + fetch_heads = {} + ls_remote_refs = collections.defaultdict(list) # to ls-query + while merges_requested: + remote, ref = merges_requested[0] + merges_requested = merges_requested[1:] + cmd = ( + basecmd + + self._fetch_options({"remote": remote, "ref": ref}) + + (remote,)) + if remote not in self.fetch_all: + cmd += (ref, ) + else: + ls_remote_refs[remote].append(ref) self.log_call(cmd, cwd=self.cwd) + with open(os.path.join(self.cwd, ".git", "FETCH_HEAD"), "r") as f: + for line in f: + fetch_head, for_merge, _ = line.split("\t") + if for_merge == "not-for-merge": + continue + break + fetch_heads[(remote, ref)] = fetch_head + if self.fetch_all: + if self.fetch_all is True: + remotes = self.remotes + else: + remotes = [r for r in self.remotes + if r["name"] in self.fetch_all] + for remote in remotes: + refs = self.query_remote( + remote["url"], + ls_remote_refs[remote["name"]]) + for _, ref, sha in refs: + if (remote["name"], ref) in merges_requested: + merges_requested.remove((remote["name"], ref)) + fetch_heads[(remote["name"], ref)] = sha + if len(merges_requested): + # Last case: our ref is a sha and remote git repository does + # not support querying commit directly by SHA. In this case + # we need just to check if ref is actually SHA, and if we have + # this SHA locally. + for remote, ref in merges_requested: + if not re.search("[0-9a-f]{4,}", ref): + raise ValueError("Could not resolv ref %r on remote %r" + % (ref, remote)) + valid_local_shas = self.log_call( + ['git', 'rev-parse', '-v'] + [sha + for _r, sha in merges_requested], + cwd=self.cwd, callwith=subprocess.check_output + ).strip().splitlines() + for remote, sha in merges_requested: + if sha not in valid_local_shas: + raise ValueError( + "Could not find SHA ref %r after fetch on remote %r" + % (ref, remote)) + fetch_heads[(remote["name"], sha)] = sha + return fetch_heads def push(self): remote = self.target['remote'] branch = self.target['branch'] logger.info("Push %s to %s", branch, remote) - self.log_call(['git', 'push', '-f', remote, branch], cwd=self.cwd) + self.log_call(['git', 'push', '-f', remote, "HEAD:%s" % branch], + cwd=self.cwd) def _check_status(self): """Check repo status and except if dirty.""" @@ -227,25 +315,27 @@ def _fetch_options(self, merge): cmd += ("--%s" % option, str(value)) return cmd - def _reset_to(self, remote, ref): + def _reset_to(self, ref): if not self.force: self._check_status() - logger.info('Reset branch to %s %s', remote, ref) - rtype, sha = self.query_remote_ref(remote, ref) - if rtype is None and not ishex(ref): - raise GitAggregatorException( - 'Could not reset %s to %s. No commit found for %s ' - % (remote, ref, ref)) - cmd = ['git', 'reset', '--hard', sha] + logger.info('Reset branch to %s', ref) + cmd = ['git', 'reset', '--hard', ref] if logger.getEffectiveLevel() != logging.DEBUG: cmd.insert(2, '--quiet') self.log_call(cmd, cwd=self.cwd) self.log_call(['git', 'clean', '-ffd'], cwd=self.cwd) - def _switch_to_branch(self, branch_name): + def _switch_to_branch(self, branch_name, ref=None): # check if the branch already exists logger.info("Switch to branch %s", branch_name) - self.log_call(['git', 'checkout', '-B', branch_name], cwd=self.cwd) + cmd = ['git', 'checkout', '-B', branch_name] + if ref is not None: + sha1 = self.log_call( + ['git', 'rev-parse', ref], + callwith=subprocess.check_output, + cwd=self.cwd).strip() + cmd.append(sha1) + self.log_call(cmd, cwd=self.cwd) def _execute_shell_command_after(self): logger.info('Execute shell after commands') @@ -253,8 +343,7 @@ def _execute_shell_command_after(self): self.log_call(cmd, shell=True, cwd=self.cwd) def _merge(self, merge): - logger.info("Pull %s, %s", merge["remote"], merge["ref"]) - cmd = ("git", "pull") + cmd = ("git", "merge") if self.git_version >= (1, 7, 10): # --edit and --no-edit appear with Git 1.7.10 # see Documentation/RelNotes/1.7.10.txt of Git @@ -262,7 +351,7 @@ def _merge(self, merge): cmd += ('--no-edit',) if logger.getEffectiveLevel() != logging.DEBUG: cmd += ('--quiet',) - cmd += self._fetch_options(merge) + (merge["remote"], merge["ref"]) + cmd += (merge,) self.log_call(cmd, cwd=self.cwd) def _get_remotes(self): diff --git a/tests/test_repo.py b/tests/test_repo.py index dddea45..a819a6e 100644 --- a/tests/test_repo.py +++ b/tests/test_repo.py @@ -73,7 +73,8 @@ def setUp(self): commit 1 -> fork after -> remote 2 tag1 commit 2 - *remote2 (clone remote 1) + annotated tag2 + * remote2 (clone remote 1) commit 1 commit 3 branch b2 @@ -94,6 +95,8 @@ def setUp(self): subprocess.check_call(['git', 'tag', 'tag1'], cwd=self.remote1) self.commit_2_sha = git_write_commit( self.remote1, 'tracked', "last", msg="last commit") + subprocess.check_call(['git', 'tag', '-am', 'foo', 'tag2'], + cwd=self.remote1) self.commit_3_sha = git_write_commit( self.remote2, 'tracked2', "remote2", msg="new commit") subprocess.check_call(['git', 'checkout', '-b', 'b2'], @@ -121,6 +124,24 @@ def test_minimal(self): last_rev = git_get_last_rev(self.cwd) self.assertEqual(last_rev, self.commit_1_sha) + def test_annotated_tag(self): + remotes = [{ + 'name': 'r1', + 'url': self.url_remote1 + }] + merges = [{ + 'remote': 'r1', + 'ref': 'tag2' + }] + target = { + 'remote': 'r1', + 'branch': 'agg1' + } + repo = Repo(self.cwd, remotes, merges, target) + repo.aggregate() + last_rev = git_get_last_rev(self.cwd) + self.assertEqual(last_rev, self.commit_2_sha) + def test_simple_merge(self): remotes = [{ 'name': 'r1', @@ -146,7 +167,38 @@ def test_simple_merge(self): self.assertEqual(last_rev, self.commit_3_sha) # push repo.push() - rtype, sha = repo.query_remote_ref('r1', 'agg') + rtype, ref, sha = list(repo.query_remote('r1', 'agg'))[0] + self.assertEquals(rtype, 'branch') + self.assertTrue(sha) + + def test_simple_merge_from_existing_git(self): + # Launched from an existing git repository + remotes = [{ + 'name': 'r1', + 'url': self.url_remote1 + }, { + 'name': 'r2', + 'url': self.url_remote2 + }] + merges = [{ + 'remote': 'r1', + 'ref': 'tag1' + }, { + 'remote': 'r2', + 'ref': self.commit_3_sha + }] + target = { + 'remote': 'r1', + 'branch': 'agg' + } + subprocess.call(['git', 'init', self.cwd]) + repo = Repo(self.cwd, remotes, merges, target, fetch_all=True) + repo.aggregate() + last_rev = git_get_last_rev(self.cwd) + self.assertEqual(last_rev, self.commit_3_sha) + # push + repo.push() + rtype, ref, sha = list(repo.query_remote('r1', 'agg'))[0] self.assertEquals(rtype, 'branch') self.assertTrue(sha)