Skip to content

Commit

Permalink
validate tests
Browse files Browse the repository at this point in the history
Signed-off-by: Samhita Alla <[email protected]>
  • Loading branch information
samhita-alla committed Nov 28, 2023
1 parent 3bf2d1b commit fbcab15
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 56 deletions.
9 changes: 3 additions & 6 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ jobs:
for i in */; do tar -czvf "../release-snacks/${i%/}.tar.gz" "$i" & done; wait
cd .. && sudo rm -rf download-artifact/
cp flyte_tests_manifest.json release-snacks/flyte_tests_manifest.json
python flyte_tests_generator.py >> flyte_tests.txt
cp flyte_tests.txt release-snacks/flyte_tests.txt
- name: Release test manifest
uses: goreleaser/goreleaser-action@v2
Expand Down Expand Up @@ -219,8 +218,9 @@ jobs:
with:
repository: flyteorg/flytesnacks
path: flytesnacks
- name: Generate tests
run: python flyte_tests_generator.py >> flyte_tests.txt
- name: Validate if the tests exist
run: |
python flyte_tests_validate.py
- name: Register specific tests
run: |
while read -r line;
Expand All @@ -234,6 +234,3 @@ jobs:
--version ${{ env.FLYTESNACKS_VERSION }} \
flytesnacks/$line;
done < flyte_tests.txt
- name: End2End
run: |
make end2end_execute
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
# %% [markdown]
# A simple split function that divides a list into two halves.


# %%
@task
def split(numbers: typing.List[int]) -> Tuple[typing.List[int], typing.List[int], int, int]:
Expand Down Expand Up @@ -65,6 +66,7 @@ def merge(sorted_list1: typing.List[int], sorted_list2: typing.List[int]) -> typ
# Generally speaking, the algorithm will recurse through the list, splitting it in half until it reaches a size that we
# know is efficient enough to run locally. At which point it'll just use the python-builtin sorted function.


# %% [markdown]
# This runs the sorting completely locally. It's faster and more efficient to do so if the entire list fits in memory.
# %%
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def parent_wf(a: int) -> Tuple[int, str, str]:
if __name__ == "__main__":
print(f"Running parent_wf(a=3) {parent_wf(a=3)}")


# %% [markdown]
# Interestingly, we can nest a workflow that has a subworkflow within a workflow.
# Workflows can be simply composed from other workflows, even if they are standalone entities. Each of the
Expand Down Expand Up @@ -164,6 +165,7 @@ def ext_workflow(my_input: str) -> Dict:
"parent_workflow_execution",
)


# %% [markdown]
# Define another task that returns the repeated keys (in our case, words) from a dictionary.
# %%
Expand Down
17 changes: 17 additions & 0 deletions flyte_tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
examples/advanced_composition/advanced_composition/chain_entities.py
examples/advanced_composition/advanced_composition/conditions.py
examples/advanced_composition/advanced_composition/decorating_tasks.py
examples/advanced_composition/advanced_composition/decorating_workflows.py
examples/advanced_composition/advanced_composition/dynamics.py
examples/advanced_composition/advanced_composition/map_task.py
examples/advanced_composition/advanced_composition/waiting_for_external_inputs.py
examples/basics/basics/documenting_workflows.py
examples/basics/basics/hello_world.py
examples/basics/basics/named_outputs.py
examples/basics/basics/shell_task.py
examples/basics/basics/workflow.py
examples/data_types_and_io/data_types_and_io/dataclass.py
examples/data_types_and_io/data_types_and_io/enum_type.py
examples/data_types_and_io/data_types_and_io/file.py
examples/data_types_and_io/data_types_and_io/folder.py
examples/data_types_and_io/data_types_and_io/structured_dataset.py
14 changes: 0 additions & 14 deletions flyte_tests_generator.py

This file was deleted.

108 changes: 72 additions & 36 deletions flyte_tests_manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,57 @@
"name": "core",
"priority": "P0",
"path": "core",
"examples": [
["advanced_composition.chain_entities.chain_workflows_wf", {}],
["advanced_composition.conditions.consume_outputs", { "my_input": 10.0 }],
["advanced_composition.decorating_tasks.wf", { "x": 10 }],
["advanced_composition.decorating_workflows.wf", { "x": 19.8 }],
["advanced_composition.dynamics.wf", { "s1": "Pear", "s2": "Earth" }],
[
"advanced_composition.map_task.my_map_workflow",
{ "a": [1, 2, 3, 4, 5] }
],
[
"advanced_composition.waiting_for_external_inputs.sleep_wf",
{ "num": 5 }
],
["basics.documenting_workflows.sphinx_docstring_wf", {}],
["basics.hello_world.hello_world_wf", {}],
["basics.named_outputs.simple_wf_with_named_outputs", {}],
["basics.shell_task.shell_task_wf", {}],
["basics.workflow.simple_wf", { "x": [1, 2, 3], "y": [1, 2, 3] }],
["data_types_and_io.dataclass.dataclass_wf", { "x": 10, "y": 20 }],
["data_types_and_io.enum_type.coffee_maker", { "coffee": "latte" }],
[
"data_types_and_io.file.normalize_csv_file",
{
"csv_url": "https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv",
"column_names": [
"Name",
"Sex",
"Age",
"Heights (in)",
"Weight (lbs)"
],
"columns_to_normalize": ["Age"]
}
],
[
"data_types_and_io.folder.download_and_normalize_csv_files",
{
"csv_urls": [
"https://people.sc.fsu.edu/~jburkardt/data/csv/biostats.csv",
"https://people.sc.fsu.edu/~jburkardt/data/csv/faithful.csv"
],
"columns_metadata": [
["Name", "Sex", "Age", "Heights (in)", "Weight (lbs)"],
["Index", "Eruption length (mins)", "Eruption wait (mins)"]
],
"columns_to_normalize_metadata": [["Age"], ["Eruption length (mins)"]]
}
],
["data_types_and_io.structured_dataset.simple_sd_wf", { "a": 42 }]
],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -21,6 +72,12 @@
"name": "integrations-k8s-spark",
"priority": "P1",
"path": "examples/k8s_spark_plugin",
"examples": [
[
"k8s_spark_plugin.pyspark_pi.my_spark",
{ "triggered_date": "2023-11-21T18:58:01" }
]
],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -30,6 +87,7 @@
"name": "integrations-kfpytorch",
"priority": "P1",
"path": "examples/kfpytorch_plugin",
"examples": [["kfpytorch_plugin.pytorch_mnist.pytorch_training_wf", {}]],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -39,15 +97,9 @@
"name": "integrations-kftensorflow",
"priority": "P1",
"path": "examples/kftensorflow_plugin",
"exitCondition": {
"exit_success": true,
"exit_message": ""
}
},
{
"name": "integrations-pod",
"priority": "P1",
"path": "examples/k8s_pod_plugin",
"examples": [
["kftensorflow_plugin.tf_mnist.mnist_tensorflow_workflow", {}]
],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -57,15 +109,7 @@
"name": "integrations-pandera",
"priority": "P1",
"path": "examples/pandera_plugin",
"exitCondition": {
"exit_success": true,
"exit_message": ""
}
},
{
"name": "integrations-whylogs",
"priority": "P1",
"path": "examples/whylogs_plugin",
"examples": [["pandera_plugin.basic_schema_example.process_data", {}]],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -75,6 +119,7 @@
"name": "integrations-modin",
"priority": "P1",
"path": "examples/modin_plugin",
"examples": [["modin_plugin.knn_classifier.pipeline", {}]],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -84,6 +129,9 @@
"name": "integrations-papermill",
"priority": "P1",
"path": "examples/papermill_plugin",
"examples": [
["papermill_plugin.simple.nb_to_python_wf", { "f": 3.1415926535 }]
],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand All @@ -93,24 +141,12 @@
"name": "integrations-greatexpectations",
"priority": "P1",
"path": "examples/greatexpectations_plugin",
"exitCondition": {
"exit_success": true,
"exit_message": ""
}
},
{
"name": "integrations-sagemaker-pytorch",
"priority": "P1",
"path": "examples/sagemaker_pytorch_plugin",
"exitCondition": {
"exit_success": true,
"exit_message": ""
}
},
{
"name": "integrations-sagemaker-training",
"priority": "P1",
"path": "examples/sagemaker_training_plugin",
"examples": [
["greatexpectations_plugin.task_example.simple_wf", {}],
["greatexpectations_plugin.task_example.file_wf", {}],
["greatexpectations_plugin.task_example.schema_wf", {}],
["greatexpectations_plugin.task_example.runtime_wf", {}]
],
"exitCondition": {
"exit_success": true,
"exit_message": ""
Expand Down
60 changes: 60 additions & 0 deletions flyte_tests_validate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
import json
import os
import subprocess
import re

file_list = "flyte_tests.txt"

with open("flyte_tests_manifest.json", "r") as file:
data = json.load(file)

examples = [(example[0], example[1]) for entry in data for example in entry.get("examples", []) if len(example) >= 1]

for file_name in open(file_list, "r").readlines():
file_name = file_name.strip()
print(f"Processing file: {file_name}")

# Retrieve the file path, including the name of the file and its immediate parent directory
directory_path = os.path.dirname(file_name).split(os.path.sep)[-1:]
file_path = ".".join(directory_path + [os.path.splitext(os.path.basename(file_name))[0]])

# Verify if there are any workflows present in the provided file path
workflows = []
for workflow, params in examples:
if file_path in workflow:
workflows.append((workflow, params))
else:
raise Exception("No workflows are defined in the given file.")

for workflow, params in workflows:
# Use the `pyflyte run` command to execute the workflow
output_string = subprocess.run(["pyflyte", "run", file_name], capture_output=True, text=True).stdout

# Define a regular expression pattern to match tasks/workflows in the pyflyte run output
pattern = re.compile(r"^\│\s+(\w+)\s+", re.MULTILINE)

# Extract command names using the specified pattern
commands = re.findall(pattern, output_string)

# Check if the workflow specified is present in the pyflyte run output
just_the_workflow = workflow.split(".")[2]
if just_the_workflow in commands:
print("Workflow found in the pyflyte run output!")
else:
raise Exception("Workflow not found in the pyflyte run output.")

# Check if the specified parameters are valid
options_output = subprocess.run(
["pyflyte", "run", file_name, just_the_workflow, "--help"], capture_output=True, text=True
).stdout

# Find all matches in the input string
options = [option.replace("--", "") for option in re.compile(r"--\w+").findall(options_output)]

# Validate if the provided params are a subset of the supported params
if set(params).issubset(set(options)):
print("All parameters found!")
else:
raise Exception(
f"There's a mismatch between the values accepted by the workflow and the ones you provided."
)

0 comments on commit fbcab15

Please sign in to comment.