Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[run-task] Add the ability to configure submodule checkouts. #512

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
58 changes: 42 additions & 16 deletions src/taskgraph/run-task/run-task
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import urllib.error
import urllib.request
from pathlib import Path
from threading import Thread
from typing import Optional
from typing import Optional, Union

SECRET_BASEURL_TPL = "http://taskcluster/secrets/v1/secret/{}"

Expand Down Expand Up @@ -598,6 +598,7 @@ def git_checkout(
commit: Optional[str],
ssh_key_file: Optional[Path],
ssh_known_hosts_file: Optional[Path],
submodules: Union[str, bool, None],
):
env = {
# abort if transfer speed is lower than 1kB/s for 1 minute
Expand Down Expand Up @@ -705,23 +706,40 @@ def git_checkout(

run_required_command(b"vcs", args, cwd=destination_path)

if os.path.exists(os.path.join(destination_path, ".gitmodules")):
args = [
"git",
"submodule",
"init",
]

run_required_command(b"vcs", args, cwd=destination_path)

args = [
"git",
"submodule",
"update",
"--force", # Overrides any potential local changes
if submodules is not None:
def _filter_submodule(status):
# A boolean value selects/disables all submodules.
if isinstance(submodules, bool):
return submodules

# Otherwise, a colon-separated stringlist of selected submodules.
smpath = status.split()[1]
return smpath in submodules.split(':')

submodpaths = [
status.split()[1]
for status in subprocess.check_output(["git", "submodule", "status"],
cwd=destination_path, universal_newlines=True).splitlines()
if _filter_submodule(status)
]

run_required_command(b"vcs", args, cwd=destination_path)
for p in submodpaths:
args = [
"git",
"submodule",
"init",
p
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Checkouts are often cached on individual workers. Will this work correctly if a checkout already contains an initialized submodule at the same (or different) revision?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would be technically possible to have different submodules checked out at different revisions, but technically a task should only rely on the submodules that it declares, and those would all get the git submodule update treatment below.

That being said, I'm starting to think we should default to updating all submodules if SUBMODULES is not set. I don't think it's going to be easy to audit tasks to tell whether / which submodules they may or may not be using. So preserving backwards compatibility by defaulting to all of them will probably save us some headache.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It will take a little bit of rethinking how what the env variables should mean to properly convey that (default disabled is easy because an unset environment variable is read as an empty string), but I will see what I can do.

]
run_required_command(b"vcs", args, cwd=destination_path)

args = [
"git",
"submodule",
"update",
"--force", # Overrides any potential local changes
p
]
run_required_command(b"vcs", args, cwd=destination_path)

_clean_git_checkout(destination_path)

Expand Down Expand Up @@ -894,6 +912,12 @@ def collect_vcs_options(args, project, name):
ref = os.environ.get("%s_HEAD_REF" % env_prefix)
pip_requirements = os.environ.get("%s_PIP_REQUIREMENTS" % env_prefix)
private_key_secret = os.environ.get("%s_SSH_SECRET_NAME" % env_prefix)
submodules = os.environ.get("%s_SUBMODULES" % env_prefix)

# Some special values can be used to request all submodules.
if submodules is not None:
if submodules.lower() in ["auto", "true", "yes"]:
submodules = True

store_path = os.environ.get("HG_STORE_PATH")

Expand Down Expand Up @@ -928,6 +952,7 @@ def collect_vcs_options(args, project, name):
"repo-type": repo_type,
"ssh-secret-name": private_key_secret,
"pip-requirements": pip_requirements,
"submodules": submodules,
}


Expand Down Expand Up @@ -976,6 +1001,7 @@ def vcs_checkout_from_args(options):
revision,
ssh_key_file,
ssh_known_hosts_file,
options["submodules"],
)
elif options["repo-type"] == "hg":
if not revision and not ref:
Expand Down
1 change: 1 addition & 0 deletions src/taskgraph/transforms/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class RepoConfig:
path: str = ""
head_rev: Union[str, None] = None
ssh_secret_name: Union[str, None] = None
submodules: Union[bool, List[str], None] = None


@dataclass(frozen=True, eq=False)
Expand Down
6 changes: 6 additions & 0 deletions src/taskgraph/transforms/run/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False):
}
)
for repo_config in repo_configs.values():
if isinstance(repo_config.submodules, list):
repo_submods = ":".join(repo_config.submodules)
else:
repo_submods = "auto" if repo_config.submodules else None

env.update(
{
f"{repo_config.prefix.upper()}_{key}": value
Expand All @@ -153,6 +158,7 @@ def support_vcs_checkout(config, task, taskdesc, repo_configs, sparse=False):
"HEAD_REF": repo_config.head_ref,
"REPOSITORY_TYPE": repo_config.type,
"SSH_SECRET_NAME": repo_config.ssh_secret_name,
"SUBMODULES": repo_submods,
}.items()
if value is not None
}
Expand Down
161 changes: 140 additions & 21 deletions test/test_scripts_run_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,41 @@ def test_install_pip_requirements(
{
"base-repo": "https://hg.mozilla.org/mozilla-unified",
},
)
),
pytest.param(
{
"REPOSITORY_TYPE": "git",
"BASE_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REV": "abcdef",
"SUBMODULES": "3rdparty/i18n",
},
{},
),
pytest.param(
{
"REPOSITORY_TYPE": "git",
"BASE_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REV": "abcdef",
"SUBMODULES": "auto",
},
{
"submodules": True,
},
),
pytest.param(
{
"REPOSITORY_TYPE": "git",
"BASE_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REPOSITORY": "https://github.com/mozilla-mobile/mozilla-vpn-client.git",
"HEAD_REV": "abcdef",
"SUBMODULES": "yes",
},
{
"submodules": True,
},
),
],
)
def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
Expand Down Expand Up @@ -159,6 +193,7 @@ def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected):
"ssh-secret-name": env.get("SSH_SECRET_NAME"),
"sparse-profile": False,
"store-path": env.get("HG_STORE_PATH"),
"submodules": env.get("SUBMODULES"),
}
if "PIP_REQUIREMENTS" in env:
expected["pip-requirements"] = os.path.join(
Expand Down Expand Up @@ -283,54 +318,135 @@ def git_current_rev(cwd):
).strip()


@pytest.fixture()
def mock_homedir(monkeypatch):
"Mock home directory to isolate git config changes"
with tempfile.TemporaryDirectory() as fakehome:
env = os.environ.copy()
env["HOME"] = str(fakehome)
monkeypatch.setattr(os, "environ", env)

# Enable the git file protocol for testing.
subprocess.check_call(
["git", "config", "--global", "protocol.file.allow", "always"], env=env
)
yield str(fakehome)


@pytest.fixture(scope="session") # Tests shouldn't change this repo
def mock_git_repo():
"Mock repository with files, commits and branches for using as source"

def _create_empty_repo(path):
subprocess.check_call(["git", "init", "-b", "main", path])
subprocess.check_call(["git", "config", "user.name", "pytest"], cwd=path)
subprocess.check_call(["git", "config", "user.email", "[email protected]"], cwd=path)

def _commit_file(message, filename, path):
with open(os.path.join(path, filename), "w") as fout:
fout.write("test file content")
subprocess.check_call(["git", "add", filename], cwd=path)
subprocess.check_call(["git", "commit", "-m", message], cwd=path)
return git_current_rev(path)

with tempfile.TemporaryDirectory() as repo:
repo_path = str(repo)
# Init git repo and setup user config
subprocess.check_call(["git", "init", "-b", "main"], cwd=repo_path)
subprocess.check_call(["git", "config", "user.name", "pytest"], cwd=repo_path)
# Create the submodule repositories
sm1_path = os.path.join(str(repo), "submodule1")
_create_empty_repo(sm1_path)
_commit_file("The first submodule", "first", sm1_path)

sm2_path = os.path.join(str(repo), "submodule2")
_create_empty_repo(sm2_path)
_commit_file("The second submodule", "second", sm2_path)

# Create the testing repository.
repo_path = os.path.join(str(repo), "repository")
_create_empty_repo(repo_path)

# Add submodules (to main branch)
subprocess.check_call(
["git", "config", "user.email", "[email protected]"], cwd=repo_path
[
"git",
"-c",
"protocol.file.allow=always",
"submodule",
"add",
f"file://{os.path.abspath(sm1_path)}",
"sm1",
],
cwd=repo_path,
)

def _commit_file(message, filename):
with open(os.path.join(repo, filename), "w") as fout:
fout.write("test file content")
subprocess.check_call(["git", "add", filename], cwd=repo_path)
subprocess.check_call(["git", "commit", "-m", message], cwd=repo_path)
return git_current_rev(repo_path)
subprocess.check_call(
[
"git",
"-c",
"protocol.file.allow=always",
"submodule",
"add",
f"file://{os.path.abspath(sm2_path)}",
"sm2",
],
cwd=repo_path,
)
subprocess.check_call(["git", "add", "sm1"], cwd=repo_path)
subprocess.check_call(["git", "add", "sm2"], cwd=repo_path)
subprocess.check_call(["git", "commit", "-m", "Add submodules"], cwd=repo_path)

# Commit mainfile (to main branch)
main_commit = _commit_file("Initial commit", "mainfile")
main_commit = _commit_file("Initial commit", "mainfile", repo_path)

# New branch mybranch
subprocess.check_call(["git", "checkout", "-b", "mybranch"], cwd=repo_path)
# Commit branchfile to mybranch branch
branch_commit = _commit_file("File in mybranch", "branchfile")
branch_commit = _commit_file("File in mybranch", "branchfile", repo_path)

# Set current branch back to main
subprocess.check_call(["git", "checkout", "main"], cwd=repo_path)
yield {"path": repo_path, "main": main_commit, "branch": branch_commit}


@pytest.mark.parametrize(
"base_ref,ref,files,hash_key",
"base_ref,ref,submodules,files,hash_key",
[
(None, None, ["mainfile"], "main"),
(None, "main", ["mainfile"], "main"),
(None, "mybranch", ["mainfile", "branchfile"], "branch"),
("main", "main", ["mainfile"], "main"),
("main", "mybranch", ["mainfile", "branchfile"], "branch"),
# Check out the repository in a bunch of states.
(None, None, None, ["mainfile"], "main"),
(None, "main", None, ["mainfile"], "main"),
(None, "mybranch", None, ["mainfile", "branchfile"], "branch"),
("main", "main", None, ["mainfile"], "main"),
("main", "mybranch", None, ["mainfile", "branchfile"], "branch"),
# Same tests again - but with submodules.
(None, None, True, ["mainfile", "sm1/first", "sm2/second"], "main"),
(None, "main", True, ["mainfile", "sm1/first", "sm2/second"], "main"),
(
None,
"mybranch",
True,
["mainfile", "branchfile", "sm1/first", "sm2/second"],
"branch",
),
("main", "main", True, ["mainfile", "sm1/first", "sm2/second"], "main"),
(
"main",
"mybranch",
True,
["mainfile", "branchfile", "sm1/first", "sm2/second"],
"branch",
),
# Tests for submodule matching rules.
(None, "main", "sm1", ["mainfile", "sm1/first"], "main"),
(None, "main", "sm2", ["mainfile", "sm2/second"], "main"),
(None, "main", "one:two:three", ["mainfile"], "main"),
(None, "main", "one:two:sm1", ["mainfile", "sm1/first"], "main"),
],
)
def test_git_checkout(
mock_stdin,
mock_homedir,
run_task_mod,
mock_git_repo,
base_ref,
ref,
submodules,
files,
hash_key,
):
Expand All @@ -346,6 +462,7 @@ def test_git_checkout(
commit=None,
ssh_key_file=None,
ssh_known_hosts_file=None,
submodules=submodules,
)

# Check desired files exist
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's an assert further down to check whether the outer repo is at the right revision. There's an assert checking for expected files makes sure something was checked out for the submodules, which I guess is good enough here, since the submodules themselves control which revs are used?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is correct, a submodule in the superproject specifies the exact hash of child project to be checked out, and the git submodule update --force flag will ensure that the revision specified by the superproject is checked out.

Expand All @@ -367,6 +484,7 @@ def test_git_checkout(

def test_git_checkout_with_commit(
mock_stdin,
mock_homedir,
run_task_mod,
mock_git_repo,
):
Expand All @@ -382,6 +500,7 @@ def test_git_checkout_with_commit(
commit=mock_git_repo["branch"],
ssh_key_file=None,
ssh_known_hosts_file=None,
submodules=None,
)


Expand Down
Loading
Loading