From 76b6088ca0d59ac74c2c51caf41a0877fa1f0f62 Mon Sep 17 00:00:00 2001 From: Jordan Laser Date: Fri, 13 Sep 2024 08:21:34 -0600 Subject: [PATCH] update workflows --- .github/workflows/cloud-infra.yaml | 43 ------------- .github/workflows/datastream_python.yml | 16 +++-- .github/workflows/forcingprocessor.yml | 25 +++++--- .../research_datastream_terraform.yml | 34 ++++++++++ .../tests/test_forcingprocessor.py | 7 +-- .../src/python_tools/ngen_configs_gen.py | 11 +++- .../src/python_tools/run_validator.py | 16 +++-- .../tests/test_bmi_config_generation.py | 58 +++++++++++++++++ python_tools/tests/test_configurer.py | 8 +-- python_tools/tests/test_validator.py | 63 +++++++++++++++++++ .../terraform/variables.tfvars | 2 +- .../terraform/variables_gitactions.tfvars | 14 +++++ 12 files changed, 223 insertions(+), 74 deletions(-) delete mode 100644 .github/workflows/cloud-infra.yaml create mode 100644 .github/workflows/research_datastream_terraform.yml create mode 100644 python_tools/tests/test_bmi_config_generation.py create mode 100644 python_tools/tests/test_validator.py create mode 100644 research_datastream/terraform/variables_gitactions.tfvars diff --git a/.github/workflows/cloud-infra.yaml b/.github/workflows/cloud-infra.yaml deleted file mode 100644 index 620ee7ca..00000000 --- a/.github/workflows/cloud-infra.yaml +++ /dev/null @@ -1,43 +0,0 @@ -name: Datastream Cloud Workflow - -on: - push: - branches: - - main - -jobs: - build: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Install AWS CLI - run: | - sudo apt-get update - sudo apt-get install -y awscli - aws configure set aws_access_key_id ${{ secrets.aws_access_key_id }} - aws configure set aws_secret_access_key ${{ secrets.aws_secret_access_key }} - - # uncomment when tested - # - name: Read JSON file - # run: | - # curl -L -O https://github.com/CIROH-UA/ngen-datastream/blob/main/cloud/AWS/datastream/execution_dailyrun.json - # INSTANCE_TYPE=$(jq -r '.isntance_paramters.InstanceType' ./execution_dailyrun.json) - # SECURITY_GRP=$(jq -r '.isntance_paramters.InstanceType' ./execution_dailyrun.json) - # ROLE=$(jq -r '.isntance_paramters.IamInstanceProfile' ./execution_dailyrun.json) - # BLOCK_MAPPINGS=$(jq -r '.isntance_paramters.BlockDeviceMappings' ./execution_dailyrun.json) - # REGION=$(jq -r '.region' execution_dailyrun.json) - - # - name: Create AMI - # run: | - # curl -L -O https://github.com/CIROH-UA/ngen-datastream/blob/main/cloud/AWS/startup_ec2_gh.sh - # tag_str="ResourceType=instance,Tags=[{Key=Name,Value="ngen-datastream_$(env TZ=US/Eastern date +'%Y%m%d%H%M')"}]" - # output=$(aws ec2 run-instances --image-id resolve:ssm:/aws/service/ami-amazon-linux-latest/al2023-ami-kernel-default-x86_64 --instance-type $INSTANCE_TYPE --region $REGION --key-name datastream-github --security-group-ids $SECURITY_GRP --tag-specifications tag_str --user-data file://startup_ec2_gh.sh --iam-instance-profile Name=$ROLE --block-device-mappings $BLOCK_MAPPINGS) - # instance_id=$(echo $output | jq -r '.Instances[0].InstanceId') - # aws ec2 wait instance-running --instance-ids $instance_id - # ami_output=$(aws ec2 create-image --instance-id $instance_id --name "My_AMI_Name" --region $REGION --description "My AMI Description") - # ami_id=$(echo $ami_output | jq -r '.ImageId') - # aws ec2 stop-instances --instance-ids $instance_id - # echo "Created AMI: $AMI_ID" diff --git a/.github/workflows/datastream_python.yml b/.github/workflows/datastream_python.yml index 82507d07..39272ff2 100644 --- a/.github/workflows/datastream_python.yml +++ b/.github/workflows/datastream_python.yml @@ -4,18 +4,22 @@ name: Test Datastream Python on: push: - branches: [ "main" ] + branches: + - main + paths: + - 'python_tools/**' + - '.github/workflows/datastream_python.yml' pull_request: - branches: [ "main" ] - + branches: + - main + paths: + - 'python_tools/**' + - '.github/workflows/datastream_python.yml' permissions: contents: read - jobs: build: - runs-on: ubuntu-latest - steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 diff --git a/.github/workflows/forcingprocessor.yml b/.github/workflows/forcingprocessor.yml index 51c2df3e..9118ae36 100644 --- a/.github/workflows/forcingprocessor.yml +++ b/.github/workflows/forcingprocessor.yml @@ -4,18 +4,24 @@ name: Test Forcing Processor on: push: - branches: [ "main" ] + branches: + - main + paths: + - 'forcingprocessor/**' + - '.github/workflows/forcingprocessor.yml' pull_request: - branches: [ "main" ] - + branches: + - main + paths: + - 'forcingprocessor/**' + - '.github/workflows/forcingprocessor.yml' permissions: contents: read - jobs: build: runs-on: ubuntu-latest - + timeout-minutes: 10 steps: - uses: actions/checkout@v3 - name: Set up Python 3.10 @@ -29,7 +35,8 @@ jobs: pip install pytest - name: Test with pytest run: | - python -m pytest -vv forcingprocessor/ --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_google_cloud_storage" --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_gcs" --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_gs" --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_ciroh_zarr" --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_nomads_post_processed" --deselect="forcingprocessor/tests/test_forcingprocessor.py::test_retro_ciroh_zarr" - python -m pytest -vv forcingprocessor/ -k test_google_cloud_storage - python -m pytest -vv forcingprocessor/ -k test_gs - python -m pytest -vv forcingprocessor/ -k test_gcs + cd forcingprocessor + python -m pytest -vv --deselect="tests/test_forcingprocessor.py::test_google_cloud_storage" --deselect="tests/test_forcingprocessor.py::test_gcs" --deselect="tests/test_forcingprocessor.py::test_gs" --deselect="tests/test_forcingprocessor.py::test_ciroh_zarr" --deselect="tests/test_forcingprocessor.py::test_nomads_post_processed" --deselect="tests/test_forcingprocessor.py::test_retro_ciroh_zarr" + python -m pytest -vv -k test_google_cloud_storage + python -m pytest -vv -k test_gs + python -m pytest -vv -k test_gcs diff --git a/.github/workflows/research_datastream_terraform.yml b/.github/workflows/research_datastream_terraform.yml new file mode 100644 index 00000000..92bad5b9 --- /dev/null +++ b/.github/workflows/research_datastream_terraform.yml @@ -0,0 +1,34 @@ +name: Validate research datastream terraform +on: + push: + branches: + - main + paths: + - 'research_datastream/terraform/**' + - '.github/workflows/research_datastream_terraform.yml' + pull_request: + branches: + - main + paths: + - 'research_datastream/terraform/**' + - '.github/workflows/research_datastream_terraform.yml' +jobs: + validate-terraform: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Setup Terraform + uses: hashicorp/setup-terraform@v1 + - name: Configure AWS + run: | + aws configure set aws_access_key_id ${{ secrets.aws_access_key_id }} + aws configure set aws_secret_access_key ${{ secrets.aws_secret_access_key }} + aws configure set region us-east-1 + - name: Validate Terraform + run: | + cd research_datastream/terraform + terraform init + terraform validate + terraform apply -var-file=./variables_gitactions.tfvars -auto-approve + terraform destroy -var-file=./variables_gitactions.tfvars -auto-approve diff --git a/forcingprocessor/tests/test_forcingprocessor.py b/forcingprocessor/tests/test_forcingprocessor.py index 08c884aa..0bdb7c30 100644 --- a/forcingprocessor/tests/test_forcingprocessor.py +++ b/forcingprocessor/tests/test_forcingprocessor.py @@ -36,7 +36,7 @@ "run" : { "verbose" : False, - "collect_stats" : True, + "collect_stats" : False, "nprocs" : 1 } } @@ -116,7 +116,6 @@ def test_gs(): os.remove(parquet) def test_gcs(): - # assert False, f'hangs in pytest, but should work' nwmurl_conf['start_date'] = "202407100100" nwmurl_conf['end_date'] = "202407100100" nwmurl_conf["urlbaseinput"] = 6 @@ -126,7 +125,7 @@ def test_gcs(): assert parquet.exists() os.remove(parquet) -def test_noaa_nwm_pds(): +def test_noaa_nwm_pds_https(): nwmurl_conf['start_date'] = date + hourminute nwmurl_conf['end_date'] = date + hourminute nwmurl_conf["urlbaseinput"] = 7 @@ -157,7 +156,7 @@ def test_ciroh_zarr(): assert parquet.exists() os.remove(parquet) -def test_retro_2_1(): +def test_retro_2_1_https(): conf['forcing']['nwm_file'] = retro_filenamelist nwmurl_conf_retro["urlbaseinput"] = 1 generate_nwmfiles(nwmurl_conf_retro) diff --git a/python_tools/src/python_tools/ngen_configs_gen.py b/python_tools/src/python_tools/ngen_configs_gen.py index f007fd89..f58fc248 100644 --- a/python_tools/src/python_tools/ngen_configs_gen.py +++ b/python_tools/src/python_tools/ngen_configs_gen.py @@ -72,7 +72,12 @@ def generate_troute_conf(out_dir,start,max_loop_size,geo_file_path): with open(Path(out_dir,"ngen.yaml"),'w') as fp: fp.writelines(troute_conf_str) -def gen_petAORcfe(hf_file,out,models,include): +def gen_petAORcfe(hf_file,out,include): + models = [] + if 'PET' in include: + models.append(Pet) + if 'CFE' in include: + models.append(Cfe) for j, jmodel in enumerate(include): hf: gpd.GeoDataFrame = gpd.read_file(hf_file, layer="divides") hf_lnk_data: pd.DataFrame = gpd.read_file(hf_file,layer="model-attributes") @@ -187,14 +192,14 @@ def gen_petAORcfe(hf_file,out,models,include): print(f'ignoring CFE') else: print(f'Generating CFE configs from pydantic models',flush = True) - gen_petAORcfe(args.hf_file,args.outdir,[Cfe],["CFE"]) + gen_petAORcfe(args.hf_file,args.outdir,["CFE"]) if "PET" in model_names: if "PET" in ignore: print(f'ignoring PET') else: print(f'Generating PET configs from pydantic models',flush = True) - gen_petAORcfe(args.hf_file,args.outdir,[Pet],["PET"]) + gen_petAORcfe(args.hf_file,args.outdir,["PET"]) globals = [x[0] for x in serialized_realization] if serialized_realization.routing is not None: diff --git a/python_tools/src/python_tools/run_validator.py b/python_tools/src/python_tools/run_validator.py index afa592f0..443d3924 100644 --- a/python_tools/src/python_tools/run_validator.py +++ b/python_tools/src/python_tools/run_validator.py @@ -61,6 +61,8 @@ def validate_catchment_files(validations, catchments): if jval == "forcing": if files[0].endswith(".nc"): nc_file = files[0] + if not os.path.exists(nc_file): + raise Exception(f"Forcings file not found!") with xr.open_dataset(os.path.join(forcing_dir,nc_file)) as ngen_forcings: df = ngen_forcings['precip_rate'] forcings_start = datetime.fromtimestamp(ngen_forcings.Time.values[0,0],timezone.utc) @@ -73,7 +75,8 @@ def validate_catchment_files(validations, catchments): compiled = re.compile(jcatch_pattern) jfile = files[j] - assert bool(compiled.match(jfile)), f"{jcatch} -> File {jfile} does not match pattern specified {pattern}" + if not bool(compiled.match(jfile)): + raise Exception(f"{jcatch} -> File {jfile} does not match pattern specified {pattern}") if jval == "forcing": if j == 0: @@ -102,9 +105,13 @@ def validate_data_dir(data_dir): else: raise Exception('This run directory contains more than a single geopackage file, remove all but one.') - if realization_file is None: raise Exception(f"Did not find realization file in ngen-run/config!!!") + if realization_file is None: + raise Exception(f"Did not find realization file in ngen-run/config!!!") print(f'Realization found! Retrieving catchment data...',flush = True) + if geopackage_file is None: + raise Exception(f"Did not find geopackage file in ngen-run/config!!!") + catchments = geopandas.read_file(geopackage_file, layer='divides') catchment_list = sorted(list(catchments['divide_id'])) @@ -117,7 +124,8 @@ def validate_data_dir(data_dir): config_dir = os.path.join(data_dir,"config","cat_config") if os.path.isdir(forcing_dir): forcing_files = [x for _,_,x in os.walk(forcing_dir)] - if len(forcing_files) == 0: raise Exception(f"No forcing files in {forcing_dir}") + if len(forcing_files) == 0: + raise Exception(f"No forcing files in {forcing_dir}") forcing_files = sorted(forcing_files[0]) else: forcing_files = [forcing_dir] @@ -143,7 +151,7 @@ def validate_data_dir(data_dir): if serialized_realization.routing: troute_path = os.path.join(data_dir,serialized_realization.routing.config) - assert os.path.exists(troute_path), "t-route specified in config, but not found in" + assert os.path.exists(troute_path), "t-route specified in config, but not found" nprocs = os.cpu_count() val_dict_list = [] diff --git a/python_tools/tests/test_bmi_config_generation.py b/python_tools/tests/test_bmi_config_generation.py new file mode 100644 index 00000000..a4a04360 --- /dev/null +++ b/python_tools/tests/test_bmi_config_generation.py @@ -0,0 +1,58 @@ +import pytest, os +from python_tools.ngen_configs_gen import gen_noah_owp_confs_from_pkl, gen_petAORcfe, generate_troute_conf +from python_tools.noahowp_pkl import gen_noah_owp_pkl +import datetime as dt + +TEST_DIR = os.path.dirname(os.path.realpath(__file__)) +DATA_DIR = os.path.join(TEST_DIR,'data') +if os.path.exists(DATA_DIR): + os.system(f'rm -rf {str(DATA_DIR)}') +os.system(f'mkdir {str(DATA_DIR)}') +CONF_DIR = os.path.join(DATA_DIR,'cat_config') +NOAH_DIR = os.path.join(CONF_DIR,'NOAH-OWP-M') +CFE_DIR = os.path.join(CONF_DIR,'CFE') +PET_DIR = os.path.join(CONF_DIR,'PET') +GEOPACKAGE_NAME = "palisade.gpkg" +GEOPACKAGE_PATH = os.path.join(DATA_DIR,GEOPACKAGE_NAME) +os.system(f"curl -o {GEOPACKAGE_PATH} -L -O https://ngen-datastream.s3.us-east-2.amazonaws.com/{GEOPACKAGE_NAME}") +PKL_FILE = os.path.join(DATA_DIR,"noah-owp-modular-init.namelist.input.pkl") +START = dt.datetime.strptime("202006200100",'%Y%m%d%H%M') +END = dt.datetime.strptime("202006200100",'%Y%m%d%H%M') + + +@pytest.fixture(autouse=True) +def clean_dir(): + if os.path.exists(CONF_DIR): + os.system(f'rm -rf {str(CONF_DIR)}') + os.system(f'mkdir {str(CONF_DIR)}') + +def test_pkl(): + gen_noah_owp_pkl(GEOPACKAGE_PATH,DATA_DIR) + assert os.path.exists(PKL_FILE) + +def test_noah_owp_m(): + os.system(f'mkdir -p {NOAH_DIR}') + gen_noah_owp_confs_from_pkl(PKL_FILE, NOAH_DIR, START, END) + noah_config_example = os.path.join(NOAH_DIR,"noah-owp-modular-init-cat-2586011.namelist.input") + assert os.path.exists(noah_config_example) + +def test_cfe(): + os.system(f'mkdir -p {CFE_DIR}') + gen_petAORcfe(GEOPACKAGE_PATH,DATA_DIR,["CFE"]) + cfe_example = os.path.join(CFE_DIR,"CFE_cat-2586011.ini") + assert os.path.exists(cfe_example) + +def test_pet(): + os.system(f'mkdir -p {PET_DIR}') + gen_petAORcfe(GEOPACKAGE_PATH,DATA_DIR,["PET"]) + pet_example = os.path.join(PET_DIR,"PET_cat-2586011.ini") + assert os.path.exists(pet_example) + +def test_routing(): + max_loop_size = (END - START + dt.timedelta(hours=1)).total_seconds() / (3600) + generate_troute_conf(DATA_DIR,START,max_loop_size,GEOPACKAGE_PATH) + yml_example = os.path.join(DATA_DIR,'ngen.yaml') + assert os.path.exists(yml_example) + + + diff --git a/python_tools/tests/test_configurer.py b/python_tools/tests/test_configurer.py index f52c0463..93e7524a 100644 --- a/python_tools/tests/test_configurer.py +++ b/python_tools/tests/test_configurer.py @@ -1,6 +1,6 @@ import os, pytest, json from datetime import datetime -from python_tools.configure_datastream import config_class2dict, create_confs +from python_tools.configure_datastream import create_confs SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR,'data') @@ -71,12 +71,12 @@ def __init__(self, ) @pytest.fixture -def clean_dir(): +def clean_dir(autouse=True): if os.path.exists(DATA_DIR): os.system(f'rm -rf {str(DATA_DIR)}') os.system(f'mkdir {str(DATA_DIR)}') -def test_conf_basic(clean_dir): +def test_conf_basic(): create_confs(inputs) assert os.path.exists(CONF_NWM) assert os.path.exists(CONF_FP) @@ -85,7 +85,7 @@ def test_conf_basic(clean_dir): assert os.path.exists(REALIZATION_META_DS) assert os.path.exists(REALIZATION_RUN) -def test_conf_daily(clean_dir): +def test_conf_daily(): inputs.start_date = "DAILY" inputs.end_date = "" create_confs(inputs) diff --git a/python_tools/tests/test_validator.py b/python_tools/tests/test_validator.py new file mode 100644 index 00000000..52697eda --- /dev/null +++ b/python_tools/tests/test_validator.py @@ -0,0 +1,63 @@ +import pytest, os +from python_tools.run_validator import validate_data_dir + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DATA_DIR = os.path.join(SCRIPT_DIR,'data') +DATA_PACKAGE = "https://ngen-datastream.s3.us-east-2.amazonaws.com/validator_pytest.tar.gz" +ORIGINAL_TAR = "validator_test_original.tar.gz" +ORIGINAL_TAR_PATH = os.path.join(DATA_DIR,ORIGINAL_TAR) +TEST_DIR = os.path.join(DATA_DIR,"test_dir") +TEST_DATA_DIR = os.path.join(TEST_DIR,"ngen-run") +os.system(f"curl -o {ORIGINAL_TAR_PATH} -L -O {DATA_PACKAGE}") + +@pytest.fixture(autouse=True) +def ready_test_folder(): + if os.path.exists(TEST_DIR): + os.system(f"rm -rf {TEST_DIR}") + os.system(f'mkdir {TEST_DIR}') + os.system(f"tar -xf {ORIGINAL_TAR_PATH} -C {TEST_DIR}") + +def test_missing_geopackage(): + del_file = str(TEST_DATA_DIR) + '/config/*.gpkg' + os.system(f"rm {del_file}") + try: + validate_data_dir(TEST_DATA_DIR) + assert False + except Exception as inst: + assert inst.__str__() == "Did not find geopackage file in ngen-run/config!!!" + +def test_missing_realization(): + del_file = str(TEST_DATA_DIR) + '/config/*realization*.json' + os.system(f"rm {del_file}") + try: + validate_data_dir(TEST_DATA_DIR) + assert False + except Exception as inst: + assert inst.__str__() == "Did not find realization file in ngen-run/config!!!" + +def test_missing_bmi_config(): + del_file = str(TEST_DATA_DIR) + '/config/cat_config/CFE/CFE_cat-2586011.ini' + os.system(f"rm {del_file}") + try: + validate_data_dir(TEST_DATA_DIR) + assert False + except Exception as inst: + assert inst.__str__() == "cat-2586011 -> File config/cat_config/CFE/CFE_cat-2586012.ini does not match pattern specified config/cat_config/CFE/CFE_{{id}}.ini" + +def test_missing_forcings(): + del_file = str(TEST_DATA_DIR) + '/forcings/*.nc' + os.system(f"rm {del_file}") + try: + validate_data_dir(TEST_DATA_DIR) + assert False + except Exception as inst: + assert inst.__str__() == f"Forcings file not found!" + +def test_missing_troute_config(): + del_file = str(TEST_DATA_DIR) + '/config/ngen.yaml' + os.system(f"rm {del_file}") + try: + validate_data_dir(TEST_DATA_DIR) + assert False + except Exception as inst: + assert inst.__str__() == "t-route specified in config, but not found" \ No newline at end of file diff --git a/research_datastream/terraform/variables.tfvars b/research_datastream/terraform/variables.tfvars index f316f7a3..4469f95e 100644 --- a/research_datastream/terraform/variables.tfvars +++ b/research_datastream/terraform/variables.tfvars @@ -1,4 +1,4 @@ -region = "us-west-2" +region = "us-east-1" sm_name = "datastreamconductor_test" sm_role_name = "datastreamconductor_role_test" starter_lambda_name = "ec2start_ami_test" diff --git a/research_datastream/terraform/variables_gitactions.tfvars b/research_datastream/terraform/variables_gitactions.tfvars new file mode 100644 index 00000000..bb071677 --- /dev/null +++ b/research_datastream/terraform/variables_gitactions.tfvars @@ -0,0 +1,14 @@ +region = "us-east-1" +sm_name = "datastreamconductor_github_actions" +sm_role_name = "datastreamconductor_role_github_actions" +starter_lambda_name = "ec2start_ami_github_actions" +commander_lambda_name = "ec2commander_github_actions" +poller_lambda_name = "CommandPoller_github_actions" +checker_lambda_name = "S3ObjectChecker_github_actions" +stopper_lambda_name = "EC2Stopper_github_actions" +lambda_policy_name = "datastream_lambda_policy_github_actions" +lambda_role_name = "datastream_lambda_role_github_actions" +lambda_invoke_policy_name = "datastream_lambda_invoke_policy_github_actions" +ec2_role = "datastream_ec2_role_github_actions" +ec2_policy_name = "datastream_ec2_policy_github_actions" +profile_name = "datastream_ec2_profile_github_actions" \ No newline at end of file