Skip to content

Commit

Permalink
Merge pull request #57 from DLR-SC/v1.0
Browse files Browse the repository at this point in the history
Incremental update for v1.0 release candidate
cdboer authored May 30, 2022
2 parents bf61a9e + 010e9f5 commit a80db75
Showing 4 changed files with 141 additions and 52 deletions.
20 changes: 20 additions & 0 deletions config/example.ini
Original file line number Diff line number Diff line change
@@ -1,12 +1,32 @@
# This is an example of a configuration file as used by gitlab2prov.
# The configuration options match the command line flags in function.

[GITLAB]
# Gitlab project urls as a comma seperated list.
project_urls = project_a_url, project_b_url

# Gitlab personal access token.
# More about tokens and how to create them:
# https://docs.gitlab.com/ee/user/profile/personal_access_tokens.html#create-a-personal-access-token
token = token

[OUTPUT]
# Provenance serialization format.
# Supported formats: json, rdf, xml, provn, dot
format = json

[MISC]
# Enables/Disables profiling using the cprofile lib.
# The runtime profile is written to a file called gitlab2prov-run-$TIMESTAMP.profile
# where $TIMESTAMP is the current time in 'YYYY-MM-DD-hh-mm-ss' format.
# The profile can be visualized using tools such as snakeviz.
profile = False

# Enables/Disables verbose output (DEBUG mode logging to stdout)
verbose = False

# Path to double agent mapping to unify duplicated agents.
double_agents = path/to/alias/mapping

# Enables/Disables agent pseudonymization by enumeration.
pseudonymous = False
50 changes: 38 additions & 12 deletions gitlab2prov/config.py
Original file line number Diff line number Diff line change
@@ -32,6 +32,15 @@ def convert_csv(csv_string: str) -> list[str]:
return urls


def read_config():
conf, file = read_cli()
if file:
conf = read_file(file)
if conf is None:
return None
return conf


def read_file(config_file: str) -> Config:
config = configparser.ConfigParser(
converters={"string": convert_string, "csv": convert_csv}
@@ -48,7 +57,15 @@ def read_file(config_file: str) -> Config:
)


def read_cli() -> Config:
def token_required(argv):
if not argv[1:]:
return False
if "-c" in argv or "--config-file" in argv:
return False
return True


def read_cli() -> tuple[Optional[Config], Optional[str]]:
parser = argparse.ArgumentParser(
prog="gitlab2prov",
description="Extract provenance information from GitLab projects.",
@@ -58,13 +75,13 @@ def read_cli() -> Config:
"--project-urls",
help="gitlab project urls",
nargs="+",
required="--config-file" not in sys.argv and "-c" not in sys.argv,
required=token_required(sys.argv),
)
parser.add_argument(
"-t",
"--token",
help="gitlab api access token",
required="--config-file" not in sys.argv and "-c" not in sys.argv,
required=token_required(sys.argv),
)
parser.add_argument("-c", "--config-file", help="config file path")
parser.add_argument(
@@ -98,15 +115,24 @@ def read_cli() -> Config:
action="store_true",
default=False,
)

if not sys.argv[1:]:
print(parser.format_help())
return None, None

args = parser.parse_args()
if args.config_file:
return read_file(args.config_file)
return Config(
args.project_urls,
args.token,
args.format,
args.pseudonymous,
args.verbose,
args.profile,
args.double_agents,
return None, args.config_file

return (
Config(
args.project_urls,
args.token,
args.format,
args.pseudonymous,
args.verbose,
args.profile,
args.double_agents,
),
None,
)
26 changes: 15 additions & 11 deletions gitlab2prov/entrypoints/cli.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,30 @@
from gitlab2prov import bootstrap
from gitlab2prov.config import read_cli
from gitlab2prov.config import read_config
from gitlab2prov.domain import commands
from gitlab2prov.log import create_logger
from gitlab2prov.profile import profiling


config = read_cli()
def main():
config = read_config()
if config is None:
return

@profiling(enabled=config.profile)
def run():
bus = bootstrap.bootstrap()

@profiling(enabled=config.profile)
def main():
bus = bootstrap.bootstrap()
if config.verbose:
create_logger()

if config.verbose:
create_logger()
for url in config.project_urls:
cmd = commands.Fetch(url, config.token)
bus.handle(cmd)

for url in config.project_urls:
cmd = commands.Fetch(url, config.token)
cmd = commands.Serialize(config.format, config.pseudonymous, config.double_agents)
bus.handle(cmd)

cmd = commands.Serialize(config.format, config.pseudonymous, config.double_agents)
bus.handle(cmd)
run()


if __name__ == "__main__":
97 changes: 68 additions & 29 deletions gitlab2prov/prov/model.py
Original file line number Diff line number Diff line change
@@ -29,6 +29,8 @@ def git_commit_model(
parents = [parent for parent in parents if parent is not None]
for rev in file_revisions:
model = choose_rev_model(rev)
if model is None:
continue
graph.update(model(commit, parents, rev))
return graph

@@ -46,6 +48,7 @@ def choose_rev_model(rev: FileRevision):
return modification
if rev.change_type == ChangeType.DELETED:
return deletion
return None


def addition(
@@ -59,21 +62,27 @@ def addition(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))

f = graph.entity(*rev.original)
f.wasAttributedTo(at)
f.wasGeneratedBy(c, c.get_startTime(), [(PROV_ROLE, ProvRole.FILE)])
f.wasGeneratedBy(c, time=c.get_startTime(), attributes=[(PROV_ROLE, ProvRole.FILE)])

rev = graph.entity(*rev)
rev.wasAttributedTo(at)
rev.specializationOf(f)
rev.wasGeneratedBy(
c, c.get_startTime(), [(PROV_ROLE, ProvRole.FILE_REVISION_AT_POINT_OF_ADDITION)]
c,
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.FILE_REVISION_AT_POINT_OF_ADDITION)],
)
return graph

@@ -88,8 +97,12 @@ def modification(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))
@@ -105,8 +118,8 @@ def modification(
) # NOTE: rev.wasRevisionOf(prev) is not impl in prov pkg
rev.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.FILE_REVISION_AFTER_MODIFICATION)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.FILE_REVISION_AFTER_MODIFICATION)],
)
c.used(
prev,
@@ -126,8 +139,12 @@ def deletion(
at = graph.agent(*commit.author)
ct = graph.agent(*commit.committer)

c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(ct, None, [(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)
c.wasAssociatedWith(
ct, plan=None, attributes=[(PROV_ROLE, list(ct.get_attribute(PROV_ROLE))[0])]
)

for parent in parents:
graph.activity(*commit).wasInformedBy(graph.activity(*parent))
@@ -178,15 +195,17 @@ def commit_creation(

resource.wasAttributedTo(author)
creation.wasAssociatedWith(
author, None, [(PROV_ROLE, ProvRole.AUTHOR_GITLAB_COMMIT)]
author, plan=None, attributes=[(PROV_ROLE, ProvRole.AUTHOR_GITLAB_COMMIT)]
)
resource.wasGeneratedBy(
creation, creation.get_startTime(), [(PROV_ROLE, ProvRole.RESOURCE)]
creation,
time=creation.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE)],
)
first_version.wasGeneratedBy(
creation,
creation.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
time=creation.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
)
first_version.specializationOf(resource)
first_version.wasAttributedTo(author)
@@ -196,7 +215,9 @@ def commit_creation(

commit = graph.activity(*git_commit)
committer = graph.agent(*git_commit.committer)
commit.wasAssociatedWith(committer, None, [(PROV_ROLE, ProvRole.COMMITTER)])
commit.wasAssociatedWith(
committer, plan=None, attributes=[(PROV_ROLE, ProvRole.COMMITTER)]
)
creation.wasInformedBy(commit)

return graph
@@ -208,20 +229,24 @@ def resource_creation(resource: Resource, graph: ProvDocument = graph_factory())
rv = graph.entity(*resource.first_version)
at = graph.agent(*resource.author)

c.wasAssociatedWith(at, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at,
plan=None,
attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])],
)

r.wasAttributedTo(at)
rv.wasAttributedTo(at)
rv.specializationOf(r)
r.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE)],
)
rv.wasGeneratedBy(
c,
c.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
time=c.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AT_POINT_OF_CREATION)],
)
return graph

@@ -247,7 +272,9 @@ def annotation_chain(resource, graph=graph_factory()):
annot_ver.specializationOf(r)

annot.wasAssociatedWith(
annotator, None, [(PROV_ROLE, list(annotator.get_attribute(PROV_ROLE))[0])]
annotator,
plan=None,
attributes=[(PROV_ROLE, list(annotator.get_attribute(PROV_ROLE))[0])],
)

annot.used(
@@ -257,8 +284,8 @@ def annotation_chain(resource, graph=graph_factory()):
)
annot_ver.wasGeneratedBy(
annot,
annot.get_startTime(),
[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AFTER_ANNOTATION)],
time=annot.get_startTime(),
attributes=[(PROV_ROLE, ProvRole.RESOURCE_VERSION_AFTER_ANNOTATION)],
)
prev_annot = annot
prev_annot_ver = annot_ver
@@ -285,7 +312,9 @@ def release_and_tag(
r = graph.collection(*release)
c = graph.activity(*release.creation)
t.hadMember(r)
r.wasGeneratedBy(c, c.get_startTime(), [(PROV_ROLE, ProvRole.RELEASE)])
r.wasGeneratedBy(
c, time=c.get_startTime(), attributes=[(PROV_ROLE, ProvRole.RELEASE)]
)
for asset in release.assets:
graph.entity(*asset).hadMember(graph.entity(*release))
for evidence in release.evidences:
@@ -296,7 +325,9 @@ def release_and_tag(

at = graph.agent(*release.author)
r.wasAttributedTo(at)
c.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
c.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

return graph

@@ -308,8 +339,12 @@ def tag_and_commit(
tc = graph.activity(*tag.creation)
at = graph.agent(*tag.author)
t.wasAttributedTo(at)
t.wasGeneratedBy(tc, tc.get_startTime(), [(PROV_ROLE, ProvRole.TAG)])
tc.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
t.wasGeneratedBy(
tc, time=tc.get_startTime(), attributes=[(PROV_ROLE, ProvRole.TAG)]
)
tc.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

if commit is None:
return graph
@@ -319,8 +354,12 @@ def tag_and_commit(
at = graph.agent(*commit.author)
cmt.hadMember(t)
cmt.wasAttributedTo(at)
cmt.wasGeneratedBy(cc, cc.get_startTime(), [(PROV_ROLE, ProvRole.GIT_COMMIT)])
cc.wasAssociatedWith(at, None, [(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])])
cmt.wasGeneratedBy(
cc, time=cc.get_startTime(), attributes=[(PROV_ROLE, ProvRole.GIT_COMMIT)]
)
cc.wasAssociatedWith(
at, plan=None, attributes=[(PROV_ROLE, list(at.get_attribute(PROV_ROLE))[0])]
)

return graph

0 comments on commit a80db75

Please sign in to comment.