Skip to content

Commit

Permalink
add a more robust computation of directory change
Browse files Browse the repository at this point in the history
add a check for "important_files"

we ran into a case where a "subdirectory" of a watched directory changing (i.e. my/path, where we watch my/) would not get built. similarly, changing build infrastructure like our "version script" would not trigger new builds either.
  • Loading branch information
colearendt committed May 19, 2022
1 parent 679083a commit 828ec22
Showing 1 changed file with 66 additions and 9 deletions.
75 changes: 66 additions & 9 deletions get-diffs.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
# --all, -a Whether to bypass checks and return the input
# --dirs DIRS [DIRS ...], -d DIRS [DIRS ...]
# A subset of directories to check for differences

import pathlib
import re
import json
import argparse
Expand All @@ -41,15 +41,15 @@ def uniq_list(input: typing.List[typing.Any]) -> typing.List:
return output


def get_changed_dirs(commit : str, dirs=None) -> typing.List[str]:
def get_changed_dirs(commit: str, dirs: typing.List[str] = None) -> typing.List[str]:
if dirs is None:
dirs = []
changed_files = get_changed_files(commit, dirs)
changed_dirs = [os.path.dirname(f) for f in changed_files]
return uniq_list(changed_dirs)


def get_changed_files(commit : str, dirs=None) -> typing.List[str]:
def get_changed_files(commit: str, dirs: typing.List[str] = None) -> typing.List[str]:
if dirs is None:
dirs = []
command = ['git', 'diff', '--name-only', '--find-renames', commit, '--'] + dirs
Expand All @@ -64,17 +64,38 @@ def get_current_commit() -> str:
return res.decode('utf-8').strip().replace("'", '')


def get_merge_base(commit1 : str, commit2 : str) -> str:
def get_merge_base(commit1: str, commit2: str) -> str:
print(commit2, file=sys.stderr)
res = subprocess.check_output(['git', 'merge-base', commit1, commit2])
return res.decode('utf-8').strip()


def filter_json_by_dirs(json_input: typing.List[typing.Dict], dirs=None) -> typing.List[dict]:
def is_dir_changed(input_dir: str, changed_dirs: typing.List[str] = None, include_parent: bool = False) -> bool:
# NOTE: python3.9+ feature only
i = pathlib.PurePath(input_dir)
if dirs is None:
return False
else:
for changed_dir in changed_dirs:
# NOTE: python3.9+ feature only
p = pathlib.PurePath(changed_dir)

# if a subdirectory changed
if p.is_relative_to(input_dir):
return True

# also if a parent directory changed
if include_parent & i.is_relative_to(changed_dir):
return True
return False


def filter_json_by_dirs(json_input: typing.List[typing.Dict], dirs: typing.List[str] = None) -> typing.List[dict]:
if dirs is None:
dirs = []
output_data = []
for m in json_input:
if m['dir'] in dirs:
if is_dir_changed(m['dir'], dirs):
output_data.append(m)
return output_data

Expand All @@ -84,6 +105,21 @@ def get_dirs_from_json(json_input: typing.List[typing.Dict]) -> typing.List[str]
return uniq_list(base_data)


def any_important_changed_files(commit: str, important_files: typing.List[str]) -> bool:
all_changed_files = get_changed_files(mb, ['.'])
important_changed_files = []
print(f"Changed files: {all_changed_files}", file=sys.stderr)
for i_file in important_files:
# any important_file sub-path of changed
# NOTE: python3.9+ feature only
if any([pathlib.PurePath(f).is_relative_to(i_file) for f in all_changed_files]):
important_changed_files.append(i_file)
if len(important_changed_files) > 0:
print(f"Important changed files. Returning all diffs: {important_changed_files}", file=sys.stderr)
return True
return False


if __name__ == "__main__":

# ------------------------------------------
Expand All @@ -106,6 +142,13 @@ def get_dirs_from_json(json_input: typing.List[typing.Dict]) -> typing.List[str]
action="store_true",
help="Whether to bypass checks and return the input",
)
parser.add_argument(
"--target", "-t",
type=str,
nargs=1,
default=['main'],
help="The merge target to reference",
)
parser.add_argument(
"--dirs", "-d",
type=str,
Expand All @@ -118,6 +161,7 @@ def get_dirs_from_json(json_input: typing.List[typing.Dict]) -> typing.List[str]

file = args.file
dirs = args.dirs
target = args.target[0]
return_all = args.all
read_stdin = args.stdin

Expand Down Expand Up @@ -167,7 +211,7 @@ def get_dirs_from_json(json_input: typing.List[typing.Dict]) -> typing.List[str]
# ----------------------------------------------------------

cc = get_current_commit()
mb = get_merge_base(cc, 'main')
mb = get_merge_base(cc, target)
print(f'Current commit: {cc}', file=sys.stderr)
print(f"Merge Base: {mb}", file=sys.stderr)
changed_directories = get_changed_dirs(mb, directories_base)
Expand All @@ -176,6 +220,19 @@ def get_dirs_from_json(json_input: typing.List[typing.Dict]) -> typing.List[str]
changed_directories_no_root = [d for d in changed_directories if len(d) > 0]
print(f"Changed directories: {changed_directories_no_root}", file=sys.stderr)

output_structure = []
print(filter_json_by_dirs(matrix_data, changed_directories_no_root))
# ----------------------------------------------------------
# Determine if any important diffs in the root directory (ci, etc.)
# ----------------------------------------------------------

# these are "shared resources" that get used in the build pipeline
important_files = [
'.github/workflows',
'get-diffs.py', 'get-version.py',
'matrix-preview.json', 'matrix-latest.json',
'content/matrix.json'
]
if any_important_changed_files(mb, important_files):
print(matrix_data)
else:
print(filter_json_by_dirs(matrix_data, changed_directories))
exit(0)

0 comments on commit 828ec22

Please sign in to comment.