diff --git a/src/data_cleaning/bgnoise.jl b/src/data_cleaning/bgnoise.jl index 4026b5e..a659092 100644 --- a/src/data_cleaning/bgnoise.jl +++ b/src/data_cleaning/bgnoise.jl @@ -1,21 +1,22 @@ +function noise_threshold(x,th = 0.4) + if x<=th + return missing + else + return 1 + end +end """ Pixels with no economic activity may show some light due to background noise. These pixels could be in forests, oceans, deserts etc. The ```bgnoise_PSTT2021``` function generates a background moise mask such that those pixels which are considered dark are marked as 0 and those considered lit are marked as 1. The function uses the datacubes of radiance and clouds to generate annual image of the last year the data. The function considers all the pixels below a provided threshold as dark and remaining to be lit. ```julia bgnoise_PSTT2021(radiance_datacube, clouds_datacube) ``` """ -function bgnoise_PSTT2021(radiance_datacube, clouds_datacube; threshold = 0.4) - function noise_threshold(x,threshold = 0.4) - if x<=threshold - return missing - else - return 1 - end - end +function bgnoise_PSTT2021(radiance_datacube, clouds_datacube, th = 0.4) + # This function may be obsolete because Payne Institute is providing annual images for each year. r_dc = convert(Array{Union{Missing, Float16}}, view(radiance_datacube, Band(1))) cf_dc = convert(Array{UInt8, 3}, view(clouds_datacube, Band(1))) last_year_rad = r_dc[:, :, (size(r_dc)[3]-11):size(r_dc)[3]] - + last_year_cloud = cf_dc[:, :, (size(r_dc)[3]-11):size(r_dc)[3]] average_lastyear = copy(r_dc[:, :, 1]) for i in 1:size(last_year_rad)[1] @@ -23,6 +24,6 @@ function bgnoise_PSTT2021(radiance_datacube, clouds_datacube; threshold = 0.4) average_lastyear[i,j] = weighted_mean(last_year_rad[i, j, :], last_year_cloud[i, j, :]) end end - mask = noise_threshold.(average_lastyear, threshold) + mask = noise_threshold.(average_lastyear, th) return Raster(mask, dims(radiance_datacube)[1:2]) end diff --git a/src/data_cleaning/outlier_removal.jl b/src/data_cleaning/outlier_removal.jl index d900dbe..2863a77 100644 --- a/src/data_cleaning/outlier_removal.jl +++ b/src/data_cleaning/outlier_removal.jl @@ -1,15 +1,13 @@ """ -There are extremely high values in the data due to fires, gas flare etc. You may find some values even greater than the aggregate radiance of large cities. Such pixels also have high standard deviation. These pixels may not be of importantance from the point of view of measureming prosperity. The ```outlier_variance``` function generates a mask of pixels with standard deviation less than a certain threshold, that defaults to the 0.999 quantile. Essentially, this function can be used to removed top pixels by standard deviation. A mask can be provided to the function, so that it calculates the percentile based on the lit pixel of the mask. - -The `threshold` keyword argument should be a number between 0 and 1. +There are extremely high values in the data due to fires, gas flare etc. You may find some values even greater than the aggregate radiance of large cities. Such pixels also have high standard deviation. These pixels may not be of importantance from the point of view of measureming prosperity. The ```outlier_variance``` function generates a mask of pixels with standard deviation less that the 99.9th percentile. Essentially, this function can be used to removed top 1 percent of pixels by standard deviation. A mask can be provided to the function, so that it calculates the percentile based on the lit pixel of the mask. For example, if the datacube is a box around India and the mask is the polygon mask of India, the outlier_variance function will calculate the 99th percentile of the standard deviation of the pixels inside India's boundary. ```julia -outlier_variance(datacube, mask; threshold=0.99) +outlier_variance(datacube, mask) ``` """ -function outlier_variance(dc, mask=ones(Int8, (size(dc)[1],size(dc)[2])); threshold = 0.999) - function std_mask(std, th) - if std < th +function outlier_variance(dc, mask=ones(Int8, (size(dc)[1],size(dc)[2]))) + function std_mask(std, threshold) + if std < threshold return 1 else return missing @@ -28,8 +26,8 @@ function outlier_variance(dc, mask=ones(Int8, (size(dc)[1],size(dc)[2])); thresh stds[i, j] = std(detrend_ts(filter(x -> !ismissing(x), datacube[i, j, :]))) end end - th = quantile(skipmissing(vec(stds .* mask)), threshold) - outlierMask = std_mask.(stds, th) + threshold = quantile(skipmissing(vec(stds .* mask)), 0.999) + outlierMask = std_mask.(stds, threshold) outlierMask = outlierMask.*mask return outlierMask end @@ -42,7 +40,7 @@ sample_timeseries = datacube[1, 2, :] # The time series of pixel [1, 2] outlier_hampel(sample_timeseries) ``` """ -function outlier_hampel(timeseries; window_size = 5, n_sigmas = 3) +function outlier_hampel(timeseries, window_size = 5, n_sigmas = 3) timeseries = Array(timeseries) # Credit: https://gist.github.com/erykml/d15525855f2ef455bd7969240f6f4073#file-hampel_filter_forloop-py missings = findall(ismissing, timeseries)