Skip to content

Commit

Permalink
[elk] Handle private repos in git studies
Browse files Browse the repository at this point in the history
This code prevents to store the credentials of the
git private repos in the studies indexes
(aoc and git branches).

This change requires to improve also the way the
project attribute is assigned to a repository. In the
case of a private repo, the corresponding data source
in the projects.json is accessed, each repo is anonymized
and the output is compared with current repo (which value
is stored in the indexes).

Tests have been updated accordingly.

Signed-off-by: Valerio Cosentino <[email protected]>
  • Loading branch information
valeriocos authored and zhquan committed May 18, 2020
1 parent b15814e commit 8ce8c34
Show file tree
Hide file tree
Showing 8 changed files with 208 additions and 53 deletions.
12 changes: 11 additions & 1 deletion grimoire_elk/enriched/enrich.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def find_item_project(self, eitem):
# elk.enrich_backend)
if self.projects_json_repo:
project = self.prjs_map[ds_name][self.projects_json_repo]
# if `projects_json_repo`, which shouldn't never happen, use the
# if `projects_json_repo` (e.g., AOC study), use the
# method `get_project_repository` (defined in each enricher)
else:
repository = self.get_project_repository(eitem)
Expand All @@ -561,6 +561,16 @@ def find_item_project(self, eitem):
fltr = eitem['origin'] + ' --filter-raw=' + self.filter_raw
if ds_name in self.prjs_map and fltr in self.prjs_map[ds_name]:
project = self.prjs_map[ds_name][fltr]
elif ds_name in self.prjs_map:
# this code is executed to retrieve the project of private repositories (in particular Git ones)
# the URLs in the prjs_map are retrieved, anonymized and compared with the value
# returned by `get_project_repository`
repository = self.get_project_repository(eitem)
for r in self.prjs_map[ds_name]:
anonymized_repo = anonymize_url(r)
if repository == anonymized_repo:
project = self.prjs_map[ds_name][r]
break

if project == UNKNOWN_PROJECT:
return None
Expand Down
11 changes: 6 additions & 5 deletions grimoire_elk/enriched/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,10 @@ def enrich_areas_of_code(self, ocean_backend, enrich_backend, no_incremental=Fal
repos.extend(items)

for repo in repos:
logger.info("{} Processing repo: {}".format(log_prefix, repo))
in_conn.update_repo(repo)
out_conn.update_repo(repo)
anonymize_repo = anonymize_url(repo)
logger.info("{} Processing repo: {}".format(log_prefix, anonymize_repo))
in_conn.update_repo(anonymize_repo)
out_conn.update_repo(anonymize_repo)
areas_of_code(git_enrich=enrich_backend, in_conn=in_conn, out_conn=out_conn)

# Create alias if output index exists and alias does not
Expand Down Expand Up @@ -776,7 +777,7 @@ def delete_commit_branches(self, git_repo, enrich_backend):
}
}
]
""" % git_repo.uri
""" % anonymize_url(git_repo.uri)

# reset references in enrich index
es_query = """
Expand Down Expand Up @@ -856,7 +857,7 @@ def __process_commits_in_branch(self, enrich_backend, repo_origin, branch_name,
logger.warning("[git] Change branch name from {} to {}".format(branch_name, digested_branch_name))

# update enrich index
fltr = self.__prepare_filter("hash", commits_str, repo_origin)
fltr = self.__prepare_filter("hash", commits_str, anonymize_url(repo_origin))

es_query = """
{
Expand Down
10 changes: 8 additions & 2 deletions grimoire_elk/enriched/graal_study_evolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# Nishchith Shetty <[email protected]>
#

from grimoirelab_toolkit.datetime import str_to_datetime
from grimoirelab_toolkit.datetime import str_to_datetime, unixtime_to_datetime


def get_unique_repository():
Expand Down Expand Up @@ -178,9 +178,15 @@ def get_to_date(es_in, in_index, out_index, repository_url, interval):
index=out_index,
body=get_last_study_date(repository_url, interval))["aggregations"]["1"]

if last_study_date["value"] is not None:
if "value_as_string" in last_study_date and last_study_date["value_as_string"]:
study_data_available = True
to_date = str_to_datetime(last_study_date["value_as_string"])
elif "value" in last_study_date and last_study_date["value"]:
study_data_available = True
try:
to_date = unixtime_to_datetime(last_study_date["value"])
except Exception:
to_date = unixtime_to_datetime(last_study_date["value"] / 1000)

if not study_data_available:
first_item_date = es_in.search(
Expand Down
1 change: 1 addition & 0 deletions grimoire_elk/raw/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ class GitOcean(ElasticOcean):

def _fix_item(self, item):
item['origin'] = anonymize_url(item['origin'])
item['tag'] = anonymize_url(item['tag'])

@classmethod
def get_perceval_params_from_url(cls, url):
Expand Down
12 changes: 12 additions & 0 deletions releases/unreleased/support-for-git-private-repos.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
---
title: Support for Git private repos
category: added
author: Valerio Cosentino <[email protected]>
issue: 873
notes: > Git private repos can now be handled by
ELK, which allows to deal with the credentials that
appear in the repo URLs passed via the projects.json.
These URLs are processed when storing/retrieving the
data in the raw, enriched and studies indexes to make
sure that the credentials are not included in the
indexes nor visible on the dashboards.
12 changes: 11 additions & 1 deletion tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ def _test_refresh_project(self):
total = refresh_projects(self.enrich_backend)
return total

def _test_study(self, test_study):
def _test_study(self, test_study, projects_json_repo=None, projects_json=None, prjs_map=None):
"""Test the execution of a study"""

# populate raw index
Expand All @@ -280,6 +280,16 @@ def _test_study(self, test_study):

elastic_enrich = get_elastic(self.es_con, self.enrich_index, clean, self.enrich_backend)
self.enrich_backend.set_elastic(elastic_enrich)

if projects_json:
self.enrich_backend.json_projects = projects_json

if projects_json_repo:
self.enrich_backend.projects_json_repo = projects_json_repo

if prjs_map:
self.enrich_backend.prjs_map = prjs_map

self.enrich_backend.enrich_items(self.ocean_backend)

for study in self.enrich_backend.studies:
Expand Down
2 changes: 1 addition & 1 deletion tests/data/git.json
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@
}
]
}
},{
},{
"backend_name": "Git",
"backend_version": "0.12.0",
"perceval_version": "0.14.0",
Expand Down
Loading

0 comments on commit 8ce8c34

Please sign in to comment.