Skip to content

Commit

Permalink
IMGW telemetry dataset (#86)
Browse files Browse the repository at this point in the history
* issue #80

* bug fixes for precip

* fix ogimet break between years

* update ogimet_daily

* fix ogimet_daily

* test pkgdown

* new pkgdown

* pkgdown pipeline

* turn on tests for metadata

* missing ogimet tests

* ogimet fix potential warning

* updates roxygen

* adding HALA GASIENICOWA IMGW metadata

* meteo_imgw_datastore

* draft finished

* meteo imgw telemetry

* fix: imgw telemetry

* imgw telemetry fix

* telemetry fix for empty files

* string as factors for old R version

* assertions for telemetry

* add globals to avoid NOTEs

* updates docs (#87)

* IMGW column encoding and telemetry support

* fix: HTML5

* fix: HTML5

* fix: HTML5

* climate 1.2.0

* fix: avoid warnings

* fix: run test only if network available

* replace R 3.6 with 4.0

* fix: #88 #85

* fix: rename telemetry stations

* fix: #93

* imgw station names extended

* add github actions for rhub

* doc fixes

* add stations_meteo_imgw_telemetry gently break

* add telemetry tests

* imgw telemetry tests

---------

Co-authored-by: Jakub Nowosad <[email protected]>
Co-authored-by: Jakub Nowosad <[email protected]>
  • Loading branch information
3 people authored Jun 18, 2024
1 parent a62822d commit 83507e3
Show file tree
Hide file tree
Showing 40 changed files with 654 additions and 258 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,5 @@
vignettes/articles/usecase.Rmd
^pkgdown$
^.codecov.yml$
^tests$

2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ jobs:
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-20.04, r: 'release'}
- {os: ubuntu-20.04, r: '3.6'}
- {os: ubuntu-20.04, r: '4.0'}
env:
R_REMOTES_NO_ERRORS_FROM_WARNINGS: true
RSPM: ${{ matrix.config.rspm }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ covr_report.html
lib
docs
pkgdown
.Renviron
19 changes: 10 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: climate
Title: Interface to Download Meteorological (and Hydrological) Datasets
Version: 1.1.2
Version: 1.2.0
Authors@R: c(person(given = "Bartosz",
family = "Czernecki",
role = c("aut", "cre"),
Expand All @@ -20,28 +20,29 @@ Authors@R: c(person(given = "Bartosz",
Description: Automatize downloading of meteorological and hydrological data from publicly available repositories:
OGIMET (<http://ogimet.com/index.phtml.en>),
University of Wyoming - atmospheric vertical profiling data (<http://weather.uwyo.edu/upperair/>),
Polish Institute of Meterology and Water Management - National Research Institute (<https://danepubliczne.imgw.pl>),
Polish Institute of Meteorology and Water Management - National Research Institute (<https://danepubliczne.imgw.pl>),
and National Oceanic & Atmospheric Administration (NOAA).
This package also allows for searching geographical coordinates for each observation and calculate distances to the nearest stations.
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.0
Depends:
R (>= 3.5.0)
R (>= 4.0.0)
Imports:
XML,
httr,
curl,
data.table
data.table,
stringi
Suggests:
testthat,
knitr,
rmarkdown,
dplyr,
knitr,
maps,
testthat,
tidyr,
maps
rmarkdown
URL: https://github.com/bczernecki/climate
BugReports: https://github.com/bczernecki/climate/issues
VignetteBuilder: knitr
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(hydro_imgw_daily)
export(hydro_imgw_monthly)
export(meteo_imgw)
export(meteo_imgw_daily)
export(meteo_imgw_datastore)
export(meteo_imgw_hourly)
export(meteo_imgw_monthly)
export(meteo_noaa_co2)
Expand All @@ -19,17 +20,22 @@ export(ogimet_daily)
export(ogimet_hourly)
export(sounding_wyoming)
export(spheroid_dist)
export(stations_meteo_imgw_telemetry)
export(stations_ogimet)
export(test_url)
import(data.table)
import(httr)
importFrom(XML,readHTMLTable)
importFrom(curl,curl_download)
importFrom(curl,has_internet)
importFrom(data.table,as.data.table)
importFrom(data.table,fread)
importFrom(stats,na.omit)
importFrom(stats,runif)
importFrom(stringi,stri_trans_general)
importFrom(utils,data)
importFrom(utils,download.file)
importFrom(utils,globalVariables)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
importFrom(utils,read.fwf)
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# climate 1.2.0

* Corrected encoding problems for some of platforms where IMGW-PIB metadata were not parsed correctly
* Added location of IMGW-PIB telemetry stations within `meteo_imgw_telemetry_stations()`
* Minor bug fixes


# climate 1.1.1

* Fix problems with downloading `precip` dataset from IMGW-PIB repository after recent changes in metadata
Expand Down
16 changes: 10 additions & 6 deletions R/clean_metadata_meteo.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,27 @@
#' @param interval temporal interval
#' @importFrom utils read.fwf
#' @importFrom stats na.omit
#' @keywords internal
#' @importFrom stringi stri_trans_general
#' @keywords internal
#'

clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {

temp = tempfile()
test_url(link = address, output = temp)
a = readLines(temp, warn = FALSE)
a = iconv(a, from = "CP1250", to = "ASCII//TRANSLIT")

# a = readLines(temp, warn = FALSE, encoding = "CP1250") # doesn't work on mac,
# thus:
# a = iconv(a, from = "CP1250", to = "ASCII//TRANSLIT")
a = read.csv(temp, header = FALSE, stringsAsFactors = FALSE,
fileEncoding = "CP1250")$V1
a = gsub(a, pattern = "\\?", replacement = "")
a = stringi::stri_trans_general(a, 'LATIN-ASCII')

# additional workarounds for mac os but not only...
a = gsub(x = a, pattern = "'", replacement = "")
a = gsub(x = a, pattern = "\\^0", replacement = "")
a = data.frame(V1 = a[nchar(a) > 0], stringsAsFactors = FALSE)
a = data.frame(V1 = a[nchar(a) > 3], stringsAsFactors = FALSE)
# this one does not work on windows
# a = suppressWarnings(na.omit(read.fwf(address, widths = c(1000),
# fileEncoding = "CP1250", stringsAsFactors = FALSE)))
Expand All @@ -44,8 +50,6 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))
a$V1 = gsub(x = a$V1, pattern = "* ", "")

#strsplit(x = a$V1, split = "/")
#a = a[nchar(a$V1)>2,] # remove empty or almost empty rows
a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status
colnames(a)[1] = "parameters"
return(a)
Expand Down
2 changes: 2 additions & 0 deletions R/globals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#' @importFrom utils globalVariables
utils::globalVariables(c("i.parameter", "param"))
9 changes: 4 additions & 5 deletions R/imgw_hydro_abbrev.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@
#' fullname, abbr_eng, and fullname_eng
#'
#' @format The data contains a data.frame with ca. 20 elements described in three ways:
#' \itemize{
#' \item{fullname} {original column names as downloaded from the repository}
#' \item{abbr_eng} {shorten column names with abbreviations derived from the most
#' popular scheme used for meteorological parameters}
#' \item{fullname_eng} {detailed description of downloaded meteorological variables}
#' \describe{
#' \item{fullname}{original column names as downloaded from the repository}
#' \item{abbr_eng}{shorten column names with abbreviations derived from the most popular scheme used for meteorological parameters}
#' \item{fullname_eng}{detailed description of downloaded meteorological variables}
#' }
#' The object is created mostly to be used altogether with the hydro_shortening_imgw() function
#'
Expand Down
8 changes: 4 additions & 4 deletions R/imgw_hydro_stations.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
#' coordinates, ID numbers, and elevations
#'
#' @format The data contains a data.frame with 1304 obs. of 3 variables:
#' \itemize{
#' \item{id} {Station ID}
#' \item{X} {Longitude}
#' \item{Y} {Latitude}
#' \describe{
#' \item{id}{Station ID}
#' \item{X}{Longitude}
#' \item{Y}{Latitude}
#' }
#' The object is in the geographic coordinates using WGS84 (EPSG:4326).
#'
Expand Down
8 changes: 4 additions & 4 deletions R/imgw_meteo_abbrev.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
#' fullname, abbr_eng, and fullname_eng
#'
#' @format The data contains a data.frame with ca. 250 elements described in three ways:
#' \itemize{
#' \item{fullname} {original column names as downloaded from the repository}
#' \item{abbr_eng} {shorten column names with abbreviations derived from the most
#' \describe{
#' \item{fullname}{original column names as downloaded from the repository}
#' \item{abbr_eng}{shorten column names with abbreviations derived from the most
#' popular scheme used for meteorological parameters}
#' \item{fullname_eng} {detailed description of downloaded meteorological variables}
#' \item{fullname_eng}{detailed description of downloaded meteorological variables}
#' }
#' The object is created mostly to be used altogether with the meteo_shortening_imgw
#' function
Expand Down
10 changes: 6 additions & 4 deletions R/imgw_meteo_stations.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
#' coordinates, ID numbers, and elevations
#'
#' @format The data contains a data.frame with 1998 obs. of 3 variables:
#' \itemize{
#' \item{id} {Station ID}
#' \item{X} {Longitude}
#' \item{Y} {Latitude}
#' \describe{
#' \item{id}{Station ID}
#' \item{X}{Longitude}
#' \item{Y}{Latitude}
#' \item{station}{Station name}
#' \item{id2}{IMGW-PIB ID for station rank}
#' }
#' The object is in the geographic coordinates using WGS84 (EPSG:4326).
#'
Expand Down
34 changes: 14 additions & 20 deletions R/meteo_imgw_daily.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
#' (default status = FALSE - i.e. the status columns are deleted)
#' @param coords add coordinates of the station (logical value TRUE or FALSE)
#' @param station name of meteorological station(s).
#' It accepts names (characters in CAPITAL LETTERS); stations' IDs (numeric) are
#' no longer valid
#' It accepts names (characters in CAPITAL LETTERS); Stations' IDs (numeric) are no longer valid
#' @param col_names three types of column names possible:
#' "short" - default, values with shorten names,
#' "full" - full English description,
Expand Down Expand Up @@ -44,11 +43,6 @@ meteo_imgw_daily = function(rank = "synop",
coords,
station,
col_names),
warning = function(w) {
message(paste("Potential problem(s) found. Problems with downloading data.\n",
"\rRun function with argument allow_failure = FALSE",
"to see more details"))
},
error = function(e){
message(paste("Potential error(s) found. Problems with downloading data.\n",
"\rRun function with argument allow_failure = FALSE",
Expand Down Expand Up @@ -116,36 +110,35 @@ meteo_imgw_daily_bp = function(rank,
temp = tempfile()
temp2 = tempfile()
test_url(addresses_to_download[j], temp)
#download.file(addresses_to_download[j], temp)
unzip(zipfile = temp, exdir = temp2)
file1 = paste(temp2, dir(temp2), sep = "/")[1]
if (translit) {
data1 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file1)))
} else {
data1 = read.csv(file1, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
}

colnames(data1) = meta[[1]]$parameters

file2 = paste(temp2, dir(temp2), sep = "/")[2]
if (translit) {
data2 = as.data.frame(data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2)))
data2 = data.table::fread(cmd = paste("iconv -f CP1250 -t ASCII//TRANSLIT", file2))
} else {
data2 = read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250")
data2 = suppressWarnings(read.csv(file2, header = FALSE, stringsAsFactors = FALSE, fileEncoding = "CP1250"))
}
colnames(data2) = meta[[2]]$parameters
unlink(c(temp, temp2))

# usuwa statusy
# remove statuses if not needed:
if (status == FALSE) {
data1[grep("^Status", colnames(data1))] = NULL
data2[grep("^Status", colnames(data2))] = NULL
}

unlink(c(temp, temp2))

# moja proba z obejsciem dla wyboru kodu
ttt = merge(data1, data2, by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
all.x = TRUE)
ttt = base::merge(data1,
data2,
by = c("Kod stacji", "Rok", "Miesiac", "Dzien"),
all.x = TRUE)

ttt = ttt[order(ttt$`Nazwa stacji.x`, ttt$Rok, ttt$Miesiac, ttt$Dzien), ]
### ta część kodu powtarza sie po dużej petli od rank
if (!is.null(station)) {
Expand Down Expand Up @@ -251,7 +244,7 @@ meteo_imgw_daily_bp = function(rank,
all_data = do.call(rbind, all_data)

if (coords) {
all_data = merge(climate::imgw_meteo_stations,
all_data = merge(climate::imgw_meteo_stations[, 1:3],
all_data,
by.x = "id",
by.y = "Kod stacji",
Expand Down Expand Up @@ -291,15 +284,16 @@ meteo_imgw_daily_bp = function(rank,
}
}

# sortowanie w zaleznosci od nazw kolumn - raz jest "kod stacji", raz "id"
# sort output
if (sum(grepl(x = colnames(all_data), pattern = "Kod stacji"))) {
all_data = all_data[order(all_data$`Kod stacji`, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
} else {
all_data = all_data[order(all_data$id, all_data$Rok, all_data$Miesiac, all_data$Dzien), ]
}

# # dodanie opcji dla skracania kolumn i usuwania duplikatow:
# remove duplicates and shorten colnames
all_data = meteo_shortening_imgw(all_data, col_names = col_names, ...)
rownames(all_data) = NULL

return(all_data)
}
Loading

0 comments on commit 83507e3

Please sign in to comment.