Skip to content

Commit

Permalink
Merge pull request #57 from DLR-SC/v1.0
Browse files Browse the repository at this point in the history
Incremental update for v1.0 release candidate
cdboer authored May 30, 2022

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
2 parents bf61a9e + 010e9f5 commit a80db75
Showing 4 changed files with 141 additions and 52 deletions.
20 changes: 20 additions & 0 deletions config/example.ini
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
# This is an example of a configuration file as used by gitlab2prov.
# The configuration options match the command line flags in function.

[GITLAB]
# Gitlab project urls as a comma seperated list.
project_urls = project_a_url, project_b_url

# Gitlab personal access token.
# More about tokens and how to create them:
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html#create-a-personal-access-token
token = token

[OUTPUT]
# Provenance serialization format.
# Supported formats: json, rdf, xml, provn, dot
format = json

[MISC]
# Enables/Disables profiling using the cprofile lib.
# The runtime profile is written to a file called gitlab2prov-run-$TIMESTAMP.profile
# where $TIMESTAMP is the current time in 'YYYY-MM-DD-hh-mm-ss' format.
# The profile can be visualized using tools such as snakeviz.
profile = False

# Enables/Disables verbose output (DEBUG mode logging to stdout)
verbose = False

# Path to double agent mapping to unify duplicated agents.
double_agents = path/to/alias/mapping

# Enables/Disables agent pseudonymization by enumeration.
pseudonymous = False
50 changes: 38 additions & 12 deletions gitlab2prov/config.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,15 @@ def convert_csv(csv_string: str) -> list[str]:
return urls


def read_config():
conf, file = read_cli()
if file:
conf = read_file(file)
if conf is None:
return None
return conf


def read_file(config_file: str) -> Config:
config = configparser.ConfigParser(
converters={"string": convert_string, "csv": convert_csv}
@@ -48,7 +57,15 @@ def read_file(config_file: str) -> Config:
)


def read_cli() -> Config:
def token_required(argv):
if not argv[1:]:
return False
if "-c" in argv or "--config-file" in argv:
return False
return True


def read_cli() -> tuple[Optional[Config], Optional[str]]:
parser = argparse.ArgumentParser(
prog="gitlab2prov",
description="Extract provenance information from GitLab projects.",
@@ -58,13 +75,13 @@ def read_cli() -> Config:
"--project-urls",
help="gitlab project urls",
nargs="+",
required="--config-file" not in sys.argv and "-c" not in sys.argv,
required=token_required(sys.argv),
)
parser.add_argument(
"-t",
"--token",
help="gitlab api access token",
required="--config-file" not in sys.argv and "-c" not in sys.argv,
required=token_required(sys.argv),
)
parser.add_argument("-c", "--config-file", help="config file path")
parser.add_argument(
@@ -98,15 +115,24 @@ def read_cli() -> Config:
action="store_true",
default=False,
)

if not sys.argv[1:]:
print(parser.format_help())
return None, None

args = parser.parse_args()
if args.config_file:
return read_file(args.config_file)
return Config(
args.project_urls,
args.token,
args.format,
args.pseudonymous,
args.verbose,
args.profile,
args.double_agents,
return None, args.config_file

return (
Config(
args.project_urls,
args.token,
args.format,
args.pseudonymous,
args.verbose,
args.profile,
args.double_agents,
),
None,
)
26 changes: 15 additions & 11 deletions gitlab2prov/entrypoints/cli.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from gitlab2prov import bootstrap
from gitlab2prov.config import read_cli
from gitlab2prov.config import read_config
from gitlab2prov.domain import commands
from gitlab2prov.log import create_logger
from gitlab2prov.profile import profiling


config = read_cli()
def main():
config = read_config()
if config is None:
return

@profiling(enabled=config.profile)
def run():
bus = bootstrap.bootstrap()

@profiling(enabled=config.profile)
def main():
bus = bootstrap.bootstrap()
if config.verbose:
create_logger()

if config.verbose:
create_logger()
for url in config.project_urls:
cmd = commands.Fetch(url, config.token)
bus.handle(cmd)

for url in config.project_urls:
cmd = commands.Fetch(url, config.token)
cmd = commands.Serialize(config.format, config.pseudonymous, config.double_agents)
bus.handle(cmd)

cmd = commands.Serialize(config.format, config.pseudonymous, config.double_agents)
bus.handle(cmd)
run()


if __name__ == "__main__":
97 changes: 68 additions & 29 deletions gitlab2prov/prov/model.py
Original file line number Diff line number Diff line change
@@ -29,6 +29,8 @@ def git_commit_model(
parents = [parent for parent in parents if parent is not None]
for rev in file_revisions:
model = choose_rev_model(rev)
if model is None:
continue
graph.update(model(commit, parents, rev))
return graph

@@ -46,6 +48,7 @@ def choose_rev_model(rev: FileRevision):
return modification
if rev.change_type == ChangeType.DELETED:
return deletion
return None


def addition(
@@ -59,21 +62,27 @@ def addition(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))

f = graph.entity(*rev.original)
f.wasAttributedTo(at)
f.wasGeneratedBy(c, c.get_startTime(), [(PROV_ROLE, ProvRole.FILE)])
f.wasGeneratedBy(c, time=c.get_startTime(), attributes=[(PROV_ROLE, ProvRole.FILE)])

rev = graph.entity(*rev)
rev.wasAttributedTo(at)
rev.specializationOf(f)
rev.wasGeneratedBy(
c, c.get_startTime(), [(PROV_ROLE, ProvRole.FILE_REVISION_AT_POINT_OF_ADDITION)]
c,
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.FILE_REVISION_AT_POINT_OF_ADDITION)],
)
return graph

@@ -88,8 +97,12 @@ def modification(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))
@@ -105,8 +118,8 @@ def modification(
) # NOTE: rev.wasRevisionOf(prev) is not impl in prov pkg
rev.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.FILE_REVISION_AFTER_MODIFICATION)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.FILE_REVISION_AFTER_MODIFICATION)],
)
c.used(
prev,
@@ -126,8 +139,12 @@ def deletion(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))
@@ -178,15 +195,17 @@ def commit_creation(

resource.wasAttributedTo(author)
creation.wasAssociatedWith(
author, None, [(PROV_ROLE, ProvRole.AUTHOR_GITLAB_COMMIT)]
author, plan=None, attributes=[(PROV_ROLE, ProvRole.AUTHOR_GITLAB_COMMIT)]
)
resource.wasGeneratedBy(
creation, creation.get_startTime(), [(PROV_ROLE, ProvRole.RESOURCE)]
creation,
time=creation.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE)],
)
first_version.wasGeneratedBy(
creation,
creation.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
time=creation.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
)
first_version.specializationOf(resource)
first_version.wasAttributedTo(author)
@@ -196,7 +215,9 @@ def commit_creation(

commit = graph.activity(*git_commit)
committer = graph.agent(*git_commit.committer)
commit.wasAssociatedWith(committer, None, [(PROV_ROLE, ProvRole.COMMITTER)])
commit.wasAssociatedWith(
committer, plan=None, attributes=[(PROV_ROLE, ProvRole.COMMITTER)]
)
creation.wasInformedBy(commit)

return graph
@@ -208,20 +229,24 @@ def resource_creation(resource: Resource, graph: ProvDocument = graph_factory())
rv = graph.entity(*resource.first_version)
at = graph.agent(*resource.author)

c.wasAssociatedWith(at, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at,
plan=None,
attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])],
)

r.wasAttributedTo(at)
rv.wasAttributedTo(at)
rv.specializationOf(r)
r.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE)],
)
rv.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
)
return graph

@@ -247,7 +272,9 @@ def annotation_chain(resource, graph=graph_factory()):
annot_ver.specializationOf(r)

annot.wasAssociatedWith(
annotator, None, [(PROV_ROLE, list(annotator.get_attribute(PROV_ROLE))[0])]
annotator,
plan=None,
attributes=[(PROV_ROLE, list(annotator.get_attribute(PROV_ROLE))[0])],
)

annot.used(
@@ -257,8 +284,8 @@ def annotation_chain(resource, graph=graph_factory()):
)
annot_ver.wasGeneratedBy(
annot,
annot.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AFTER_ANNOTATION)],
time=annot.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AFTER_ANNOTATION)],
)
prev_annot = annot
prev_annot_ver = annot_ver
@@ -285,7 +312,9 @@ def release_and_tag(
r = graph.collection(*release)
c = graph.activity(*release.creation)
t.hadMember(r)
r.wasGeneratedBy(c, c.get_startTime(), [(PROV_ROLE, ProvRole.RELEASE)])
r.wasGeneratedBy(
c, time=c.get_startTime(), attributes=[(PROV_ROLE, ProvRole.RELEASE)]
)
for asset in release.assets:
graph.entity(*asset).hadMember(graph.entity(*release))
for evidence in release.evidences:
@@ -296,7 +325,9 @@ def release_and_tag(

at = graph.agent(*release.author)
r.wasAttributedTo(at)
c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

return graph

@@ -308,8 +339,12 @@ def tag_and_commit(
tc = graph.activity(*tag.creation)
at = graph.agent(*tag.author)
t.wasAttributedTo(at)
t.wasGeneratedBy(tc, tc.get_startTime(), [(PROV_ROLE, ProvRole.TAG)])
tc.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
t.wasGeneratedBy(
tc, time=tc.get_startTime(), attributes=[(PROV_ROLE, ProvRole.TAG)]
)
tc.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

if commit is None:
return graph
@@ -319,8 +354,12 @@ def tag_and_commit(
at = graph.agent(*commit.author)
cmt.hadMember(t)
cmt.wasAttributedTo(at)
cmt.wasGeneratedBy(cc, cc.get_startTime(), [(PROV_ROLE, ProvRole.GIT_COMMIT)])
cc.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
cmt.wasGeneratedBy(
cc, time=cc.get_startTime(), attributes=[(PROV_ROLE, ProvRole.GIT_COMMIT)]
)
cc.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

return graph

0 comments on commit a80db75

Please sign in to comment.