Skip to content

Commit

Permalink
added logging in script
Browse files Browse the repository at this point in the history
  • Loading branch information
HarishC727 committed Jan 22, 2025
1 parent 85e43fd commit d118b27
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 17 deletions.
3 changes: 1 addition & 2 deletions scripts/noaa/gpcc_spi/gpcc_spi_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,14 @@

import pandas as pd
import json
from typing import List
import logging
from datetime import datetime
import os
import glob
from pathlib import Path

from absl import flags
from absl import app
from absl import logging

_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
FLAGS = flags.FLAGS
Expand Down
12 changes: 6 additions & 6 deletions scripts/noaa/gpcc_spi/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,27 +49,27 @@
],
"import_inputs": [
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_01_agg.csv"
},
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_03_agg.csv"
},
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_06_agg.csv"
},
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_09_agg.csv"
},
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_36_agg.csv"
},
{
"template_mcf": "gpcc_spi.tmcf",
"template_mcf": "gpcc_spi_aggregation.tmcf",
"cleaned_csv": "output_files/aggregations/gpcc_spi_pearson_72_agg.csv"
}
],
Expand Down
8 changes: 3 additions & 5 deletions scripts/noaa/gpcc_spi/preprocess_gpcc_spi.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
import glob
import xarray
from datetime import datetime
import logging
import os
import sys
from pathlib import Path
Expand All @@ -26,6 +25,7 @@

from absl import flags
from absl import app
from absl import logging

# Allows the following module imports to work when running as a script
_SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -60,6 +60,7 @@ def to_one_degree_grid_place(latlon):

def nc_to_df(nc_path, period, spi_col, start_date, end_date):
"""Read a netcdf and parse to df."""
logging.info("Read a netcdf and parse to df.")
ds = xarray.open_dataset(nc_path, engine='netcdf4')
df = ds.to_dataframe()
df = df[(df.index.get_level_values('time') >= start_date) &
Expand Down Expand Up @@ -92,9 +93,7 @@ def preprocess_one(start_date,
"""Create a single csv file from a single input nc file."""
logging.info('processing file: %s: %s', in_file,
datetime.now().strftime('%H:%M:%S'))
print("################in_file", in_file)
path = Path(in_file)
print("################path", path)
period = int(path.stem.split('_')[-1])
spi_col = f"spi_{path.stem.split('_')[-1]}"

Expand Down Expand Up @@ -129,13 +128,12 @@ def preprocess_gpcc_spi(start_date, end_date, in_pattern,
full_pattern = os.path.join(os.path.dirname(os.path.abspath(__file__)),
in_pattern)
for file in sorted(glob.glob(full_pattern)):
print("#################3file", file)
preprocess_one(start_date, end_date, file, preprocessed_dir)


def main(_):
"""Run pre-preocess spis with flags."""
print(FLAGS.gpcc_spi_input_pattern)
logging.info(FLAGS.gpcc_spi_input_pattern)
preprocess_gpcc_spi(FLAGS.start_date, FLAGS.end_date,
FLAGS.gpcc_spi_input_pattern,
FLAGS.gpcc_spi_preprocessed_dir)
Expand Down
12 changes: 8 additions & 4 deletions scripts/noaa/gpcc_spi/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,18 @@ config=spi_9m_polygon
set -x


# #Run download script
# python3 download.py
#Run download script
python3 download.py

# # Run NOAA_GPCC_StandardardizedPrecipitationIndex script
# python3 preprocess_gpcc_spi.py
# Run NOAA_GPCC_StandardardizedPrecipitationIndex script
python3 preprocess_gpcc_spi.py
echo "Created intermediate files for processing"

# shard data by year
echo "Creating a directory for the sharded files"
mkdir -p output_files/shard

echo "Sharding pcc_spi_pearson_09.csv based on year and writing them to the shard folder."
pd_csv -i output_files/gpcc_spi_pearson_09.csv -o output_files/shard/gpcc_spi_pearson_09 -sort time "df['year']=df['time'].str.slice(0,4)" -shard year


Expand Down

0 comments on commit d118b27

Please sign in to comment.