Skip to content

Commit

Permalink
GITC-5184 GITC-5181 periods.lua DETECT fix
Browse files Browse the repository at this point in the history
Merge in GITC/onearth from GITC-5184_GITC-5181 to develop

* commit '4ff31e17736bf10bddaaabcb547127f5d77a1f43':
  Made periods.lua more efficient
  Added periods.lua DETECT/force_start tests
  fixed DETECT behavior with forced start dates
  • Loading branch information
Matthew Graber committed Nov 15, 2023
2 parents 6257fd3 + 4ff31e1 commit 72a8b09
Show file tree
Hide file tree
Showing 2 changed files with 185 additions and 31 deletions.
74 changes: 45 additions & 29 deletions src/modules/time_service/utils/periods.lua
Original file line number Diff line number Diff line change
Expand Up @@ -360,12 +360,12 @@ end
-- end
--end

local function findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods)
local function findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods, start_idx, end_idx)
-- Loop through all the dates and keep track of when periods begin and end
local interval = getIntervalLetter(unit)
if isValidPeriod(size, unit) then
local dateList = {}
for i = 1, #dates do
for i = start_idx, end_idx do
if not datesInPeriods[dates[i]] then
dateList[#dateList + 1] = dates[i]
if (dates[i+1] == nil) or
Expand Down Expand Up @@ -441,6 +441,22 @@ local function calculatePeriods(dates, config)
force_end = dates[#dates]
end

-- Filter out any dates that occur before force_start or after force_end
local start_idx = 1
local end_idx = #dates
if force_start ~= 'DETECT' then
local start_epoch = dateToEpoch(force_start)
while start_epoch > dateToEpoch(dates[start_idx]) do
start_idx = start_idx + 1
end
end
if force_end ~= 'DETECT' then
local end_epoch = dateToEpoch(force_end)
while end_epoch < dateToEpoch(dates[end_idx]) do
end_idx = end_idx - 1
end
end

if force_start ~= 'DETECT' and force_end ~= 'DETECT' and force_period ~= 'DETECT' then
-- Skip DETECT if all forced values are provided
local dateList = {force_start, force_end}
Expand All @@ -453,13 +469,13 @@ local function calculatePeriods(dates, config)

-- Check for year matches
local annual = false
if dates[3] ~= nil then
local tail1 = dates[1]:sub(5)
local baseYear = tonumber(dates[1]:sub(1, 4))
local tail2 = dates[2]:sub(5)
local date2Year = tonumber(dates[2]:sub(1, 4))
local tail3 = dates[3]:sub(5)
local date3Year = tonumber(dates[3]:sub(1, 4))
if dates[start_idx + 2] ~= nil and start_idx + 2 <= end_idx then
local tail1 = dates[start_idx]:sub(5)
local baseYear = tonumber(dates[start_idx]:sub(1, 4))
local tail2 = dates[start_idx + 1]:sub(5)
local date2Year = tonumber(dates[start_idx + 1]:sub(1, 4))
local tail3 = dates[start_idx + 2]:sub(5)
local date3Year = tonumber(dates[start_idx + 2]:sub(1, 4))

local interval = date2Year - baseYear
if tail1 == tail2
Expand All @@ -468,13 +484,13 @@ local function calculatePeriods(dates, config)
and date2Year + interval == date3Year
then
-- We've found 3 dates at this interval, so it's a valid period. Now find the rest.
local dateList = {dates[1], dates[2]}
datesInPeriods[dates[1]] = true
datesInPeriods[dates[2]] = true
local dateList = {dates[start_idx], dates[start_idx + 1]}
datesInPeriods[dates[start_idx]] = true
datesInPeriods[dates[start_idx + 1]] = true

local prevTail = tail2
local prevYear =date2Year
for i = 3, #dates do
for i = start_idx + 2, end_idx do
local tailI = dates[i]:sub(5)
local dateIYear = tonumber(dates[i]:sub(1, 4))

Expand All @@ -493,25 +509,25 @@ local function calculatePeriods(dates, config)
periods[#periods + 1] = {size=interval, dates=dateList, unit="year"}
annual = true
end
elseif force_period ~= 'DETECT' and dates[2] ~= nil then
elseif force_period ~= 'DETECT' and dates[start_idx + 1] ~= nil and start_idx + 1 <= end_idx then
-- only 2 dates, check if they're in the same period
local size = tonumber(string.match(force_period, "%d+"))
local unit = getIntervalUnit(force_period)
local interval = getIntervalLetter(unit)
if isValidPeriod(size, unit) then
if (calcEpochDiff(dates[1], size, interval) == dateToEpoch(dates[2])) then
periods[#periods + 1] = {size=size, dates=dates, unit=unit}
datesInPeriods[dates[1]] = true
datesInPeriods[dates[2]] = true
if (calcEpochDiff(dates[start_idx], size, interval) == dateToEpoch(dates[start_idx + 1])) then
periods[#periods + 1] = {size=size, dates={dates[start_idx], dates[start_idx + 1]}, unit=unit}
datesInPeriods[dates[start_idx]] = true
datesInPeriods[dates[start_idx + 1]] = true
end
end
end

if dates[3] ~= nil and annual == false then
if dates[start_idx + 2] ~= nil and start_idx + 2 <= end_idx and annual == false then
-- Use the given size and interval of the period if they are present.
-- Otherwise figure out the size and interval of the period based on first 3 values.
local diff1 = math.abs(dateToEpoch(dates[1]) - dateToEpoch(dates[2]))
local diff2 = math.abs(dateToEpoch(dates[2]) - dateToEpoch(dates[3]))
local diff1 = math.abs(dateToEpoch(dates[start_idx]) - dateToEpoch(dates[start_idx + 1]))
local diff2 = math.abs(dateToEpoch(dates[start_idx + 1]) - dateToEpoch(dates[start_idx + 2]))
local size, unit
if (diff1 == diff2) or (force_period ~= 'DETECT') then
if (force_period ~= 'DETECT') then
Expand All @@ -520,11 +536,11 @@ local function calculatePeriods(dates, config)
else
size, unit = calcIntervalFromSeconds(diff1)
end
findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods)
findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods, start_idx, end_idx)
else -- More complicated scenarios: when the first and second intervals are different
local minInterval = diff1
local minIntervalStartDate, minIntervalEndEpoch
for i = 2, #dates - 1 do
for i = start_idx + 1, end_idx - 1 do
local currentInterval = math.abs(dateToEpoch(dates[i]) - dateToEpoch(dates[i + 1]))
if currentInterval < minInterval then
minInterval = currentInterval
Expand All @@ -547,21 +563,21 @@ local function calculatePeriods(dates, config)
else
size, unit = calcIntervalFromSeconds(minInterval)
end
findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods)
findPeriodsAndBreaks(dates, size, unit, datesInPeriods, periods, start_idx, end_idx)
end
else
-- Leftover times are likely loners
-- Determine if subdaily or not (assume daily if single)
local unit = "day"
if dates[2] ~= nil then
local diff1 = math.abs(dateToEpoch(dates[1]) - dateToEpoch(dates[2]))
if dates[start_idx + 1] ~= nil and start_idx + 1 <= end_idx then
local diff1 = math.abs(dateToEpoch(dates[start_idx]) - dateToEpoch(dates[start_idx + 1]))
if (diff1<86400) then
unit = "second"
end
end
for _, date in ipairs(dates) do
if not datesInPeriods[date] then
periods[#periods + 1] = {size=1, dates={date}, unit=unit}
for i = start_idx, end_idx do
if not datesInPeriods[dates[i]] then
periods[#periods + 1] = {size=1, dates={dates[i]}, unit=unit}
end
end
end
Expand Down
142 changes: 140 additions & 2 deletions src/test/test_time_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ def test_periods_monthly_3dates_DETECT(self):
self.assertEqual(
layer[2], layer_res['periods'][0],
'Layer {0} has incorrect "period" value -- got {1}, expected {2}'
.format(layer[0], layer[2], layer_res['periods'][0]))
.format(layer[0], layer_res['periods'][0], layer[2]))
if not DEBUG:
remove_redis_layer(layer, db_keys)

Expand Down Expand Up @@ -896,7 +896,7 @@ def test_periods_config_latest_no_dates(self):
.format(layer_name, layer_res['periods'], periods))
if not DEBUG:
remove_redis_layer([layer_name], db_keys)

def test_periods_config_multiple_force_latest_subdaily(self):
# Test multiple configs on subdaily times with forced periods
num_dates = 127400
Expand Down Expand Up @@ -1202,6 +1202,144 @@ def test_periods_lone_start_date_detect_all_minutes(self):
.format(layer[0], layer_res['periods'], periods))
if not DEBUG:
remove_redis_layer(layer, db_keys)

def test_periods_force_detect_8_days(self):
# Test when we force many periods and then detect the last one for 8-day periods
num_years = 23
num_dates_per_year = 45
date_lst = []
for i in range(num_years):
date_start = datetime.datetime(2000 + i, 1, 1)
date_lst = date_lst + [str((date_start + datetime.timedelta(days=idx * 8)).date()) for idx in range(num_dates_per_year)]
date_lst = date_lst + [str((datetime.datetime(2023, 1, 1) + datetime.timedelta(days=idx * 8)).date()) for idx in range(32)]
test_layers = []
for date_entry in date_lst:
test_layers.append(('test_periods_force_detect_8_days', date_entry))
db_keys = ['epsg4326']

config = '2000-02-26/2000-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2001-01-01/2001-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2002-01-01/2002-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2003-01-01/2003-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2004-01-01/2004-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2005-01-01/2005-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2006-01-01/2006-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2007-01-01/2007-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2008-01-01/2008-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2009-01-01/2009-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2010-01-01/2010-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2011-01-01/2011-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2012-01-01/2012-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2013-01-01/2013-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2014-01-01/2014-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2015-01-01/2015-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2016-01-01/2016-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2017-01-01/2017-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2018-01-01/2018-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2019-01-01/2019-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2020-01-01/2020-12-26/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2021-01-01/2021-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2022-01-01/2022-12-27/P8D'
add_redis_config(test_layers, db_keys, config)
config = '2023-01-01/DETECT/P8D'
add_redis_config(test_layers, db_keys, config)

periods = ['2000-02-26/2000-12-26/P8D',
'2001-01-01/2001-12-27/P8D',
'2002-01-01/2002-12-27/P8D',
'2003-01-01/2003-12-27/P8D',
'2004-01-01/2004-12-26/P8D',
'2005-01-01/2005-12-27/P8D',
'2006-01-01/2006-12-27/P8D',
'2007-01-01/2007-12-27/P8D',
'2008-01-01/2008-12-26/P8D',
'2009-01-01/2009-12-27/P8D',
'2010-01-01/2010-12-27/P8D',
'2011-01-01/2011-12-27/P8D',
'2012-01-01/2012-12-26/P8D',
'2013-01-01/2013-12-27/P8D',
'2014-01-01/2014-12-27/P8D',
'2015-01-01/2015-12-27/P8D',
'2016-01-01/2016-12-26/P8D',
'2017-01-01/2017-12-27/P8D',
'2018-01-01/2018-12-27/P8D',
'2019-01-01/2019-12-27/P8D',
'2020-01-01/2020-12-26/P8D',
'2021-01-01/2021-12-27/P8D',
'2022-01-01/2022-12-27/P8D',
'2023-01-01/2023-09-06/P8D']

seed_redis_data(test_layers, db_keys=db_keys)
r = requests.get(self.date_service_url + 'key1=epsg4326')
res = r.json()
for layer in test_layers:
layer_res = res.get(layer[0])
self.assertIsNotNone(
layer_res,
'Layer {0} not found in list of all layers'.format(layer[0]))
self.assertEqual(
periods, layer_res['periods'],
'Layer {0} has incorrect "periods" -- got {1}, expected {2}'
.format(layer[0], layer_res['periods'], periods))
if not DEBUG:
remove_redis_layer(layer, db_keys)

def test_periods_force_start_skip_gaps(self):
# Test that periods are detected properly when the start date is forced,
# the end date is DETECT, and there are gaps in the dates that are being skipped by "force start"
num_groups = 23
num_dates_per_group = 25
date_lst = []
for i in range(num_groups):
date_start = datetime.datetime(2000 + i, 1, 1)
date_lst = date_lst + [str((date_start + datetime.timedelta(days=idx + i)).date()) for idx in range(num_dates_per_group)]
date_lst = date_lst + [str((datetime.datetime(2023, 1, 1) + datetime.timedelta(days=idx)).date()) for idx in range(33)]
test_layers = []
for date_entry in date_lst:
test_layers.append(('test_periods_force_start_skip_gaps', date_entry))
db_keys = ['epsg4326']

config = '2023-01-01/DETECT/P1D'
add_redis_config(test_layers, db_keys, config)

periods = ['2023-01-01/2023-02-02/P1D']

seed_redis_data(test_layers, db_keys=db_keys)
r = requests.get(self.date_service_url + 'key1=epsg4326')
res = r.json()
for layer in test_layers:
layer_res = res.get(layer[0])
self.assertIsNotNone(
layer_res,
'Layer {0} not found in list of all layers'.format(layer[0]))
self.assertEqual(
periods, layer_res['periods'],
'Layer {0} has incorrect "periods" -- got {1}, expected {2}'
.format(layer[0], layer_res['periods'], periods))
if not DEBUG:
remove_redis_layer(layer, db_keys)

def test_periods_truncate(self):
# Test that the list of periods is truncated to the most recent 100 if there are more than 100 periods
Expand Down

0 comments on commit 72a8b09

Please sign in to comment.