Dev (#83)

climate ver. 1.1.1 rc_1
bczernecki · Apr 1, 2023 · 9c168a6 · 9c168a6
1 parent 600a3bc
commit 9c168a6
Show file tree

Hide file tree

Showing 14 changed files with 135 additions and 254 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -43,7 +43,7 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
@@ -52,7 +52,7 @@ jobs:
 
       - name: Query dependencies
         run: |
-          install.packages('remotes')
+          install.packages(c('remotes', 'covr'))
           saveRDS(remotes::dev_package_deps(dependencies = TRUE), ".github/depends.Rds", version = 2)
           writeLines(sprintf("R-%i.%i", getRversion()$major, getRversion()$minor), ".github/R-version")
         shell: Rscript {0}

diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -1,39 +1,46 @@
 # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
-name: pkgdown
 on:
   push:
-    branches: [main, master, dev, devel]
+    branches: [main, master, dev]
   pull_request:
-    branches: [main, master, dev, devel]
+    branches: [main, master, dev]
   release:
     types: [published]
   workflow_dispatch:
 
+name: pkgdown
+
 jobs:
-  build:
+  pkgdown:
     runs-on: ubuntu-latest
-    container: bczernecki/meteo:latest
+    # Only restrict concurrency for non-PR jobs
+    concurrency:
+      group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
-
     steps:
-      - name: Checkout Project
-        uses: actions/checkout@v1
-
-      - name: Checked for installed packages
-        run: |
-          sudo apt-get install -y libfontconfig1-dev libharfbuzz-dev libfribidi-dev rsync
-          R -e 'install.packages(c("pkgdown", "openair", "rnaturalearthdata"))'
-          R -e 'installed.packages()[, 1:3]'
-          
-      - name: Build book
-        run: |
-          Rscript -e 'pkgdown::build_site()'
+      - uses: actions/checkout@v3
+
+      - uses: r-lib/actions/setup-pandoc@v2
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::pkgdown, any::openair, local::.
+          needs: website
+
+      - name: Build site
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        shell: Rscript {0}
 
       - name: Deploy to GitHub pages 🚀
-        uses: JamesIves/[email protected]
+        if: github.event_name != 'pull_request'
+        uses: JamesIves/[email protected]
         with:
           clean: false
           branch: gh-pages
-          folder: docs
+          folder: docs
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: climate
 Title: Interface to Download Meteorological (and Hydrological) Datasets
-Version: 1.1.0
+Version: 1.1.1
 Authors@R: c(person(given = "Bartosz",
            family = "Czernecki",
            role = c("aut", "cre"),
@@ -27,7 +27,7 @@ License: MIT + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.1
+RoxygenNote: 7.2.3
 Depends: 
     R (>= 3.5.0)
 Imports: 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# climate 1.1.1
+
+* Fix problems with downloading `precip` dataset from IMGW-PIB repository after recent changes in metadata
+* Bug fix for `ogimet_daily` if data contains more than one year
+
+
 # climate 1.1.0
 
 * A new approach for handling CRAN policy for resolving problems if network issues are detected or some of the external services are temporarily down. 

diff --git a/R/clean_metadata_meteo.R b/R/clean_metadata_meteo.R
@@ -14,8 +14,7 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
   temp = tempfile()
   test_url(link = address, output = temp)
   a = readLines(temp, warn = FALSE)
-
-  a = iconv(a, from = "cp1250", to = "ASCII//TRANSLIT")
+  a = iconv(a, from = "CP1250", to = "ASCII//TRANSLIT")
   a = gsub(a, pattern = "\\?", replacement = "")
 
   # additional workarounds for mac os but not only...
@@ -27,10 +26,10 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
   #                                        fileEncoding = "CP1250", stringsAsFactors = FALSE)))
   length_char = max(nchar(a$V1), na.rm = TRUE)
 
-  if (rank == "precip" && interval == "hourly") length_char = 40 # wyjatek dla precipow
-  if (rank == "precip" && interval == "daily") length_char = 40 # wyjatek dla precipow dobowych
-  if (rank == "synop" && interval == "hourly") length_char = 60 # wyjatek dla synopow terminowych
-  if (rank == "climate" && interval == "monthly") length_char = 52 # wyjatek dla synopow terminowych
+  if (rank == "precip" && interval == "hourly") length_char = 40 # exception for precip / hourly
+  if (rank == "precip" && interval == "daily") length_char = 38 # exception for precip / daily
+  if (rank == "synop" && interval == "hourly") length_char = 60 # exception for synop / hourly
+  if (rank == "climate" && interval == "monthly") length_char = 52 # exception for climate / monthly
 
   field = substr(a$V1, length_char - 3, length_char)
 
@@ -43,8 +42,9 @@ clean_metadata_meteo = function(address, rank = "synop", interval = "hourly") {
   a$field2 = suppressWarnings(as.numeric(unlist(lapply(strsplit(field, "/"), function(x) x[2]))))
 
   a$V1 = trimws(substr(a$V1, 1, nchar(a$V1) - 3))
+  a$V1 = gsub(x = a$V1, pattern = "*  ", "")
 
-  strsplit(x = a$V1, split = "/")
+  #strsplit(x = a$V1, split = "/")
   #a = a[nchar(a$V1)>2,] # remove empty or almost empty rows
   a = a[!(is.na(a$field1) & is.na(a$field2)), ] # remove info about status
   colnames(a)[1] = "parameters"

diff --git a/R/meteo_metadata_imgw.R b/R/meteo_metadata_imgw.R
@@ -14,16 +14,16 @@
 #'   #meta = climate:::meteo_metadata_imgw(interval = "monthly", rank = "precip")
 #' }
 
-meteo_metadata_imgw = function(interval, rank) { # interval moze byc: monthly, hourly, hourly
+meteo_metadata_imgw = function(interval, rank) { # interval can be: monthly, hourly, hourly
 
   b = NULL
   base_url = "https://danepubliczne.imgw.pl/data/dane_pomiarowo_obserwacyjne/"
 
-  # METADANE daily:
-  if (interval == "daily") { # uwaga! daily maja dla climateow i synopow po 2 pliki z metadanymi!!!
+  # METADATA daily:
+  if (interval == "daily") { # warning! daily for climates and synop have 2 files with metadata!!!
 
     if (rank == "synop") {
-      b[[1]] = clean_metadata_meteo(address = paste0(base_url, "dane_meteorologiczne/dobowe/synop/s_d_format.txt"),
+      b[[1]] = clean_metadata_meteo(address = paste0(base_url,"dane_meteorologiczne/dobowe/synop/s_d_format.txt"),
                                rank = "synop", interval = "daily")
       b[[2]] = clean_metadata_meteo(address = paste0(base_url, "dane_meteorologiczne/dobowe/synop/s_d_t_format.txt"),
                                               rank = "synop", interval = "daily")

diff --git a/R/meteo_shortening_imgw.R b/R/meteo_shortening_imgw.R
@@ -20,6 +20,17 @@
 
 meteo_shortening_imgw = function(data, col_names = "short", remove_duplicates = TRUE) {
 
+  # removing duplicated column names:  (e.g. station's name)
+  if (remove_duplicates == TRUE) {
+    data = data[, !duplicated(colnames(data))]
+
+    # fix for merged station names with suffixes
+    if (any(colnames(data) %in% c("Nazwa stacji.x", "Nazwa stacji.y"))) {
+      data$`Nazwa stacji.y` = NULL
+      colnames(data)[colnames(data) == "Nazwa stacji.x"] = "Nazwa stacji"
+    }
+  }
+
   if (col_names != "polish") {
     abbrev = climate::imgw_meteo_abbrev
     orig_columns = trimws(gsub("\\s+", " ", colnames(data))) # remove double spaces
@@ -38,11 +49,6 @@ meteo_shortening_imgw = function(data, col_names = "short", remove_duplicates =
     }
   }
 
-  # removing duplicated column names:  (e.g. station's name)
-  if (remove_duplicates == TRUE) {
-    data = data[, !duplicated(colnames(data))]
-  }
-
   return(data)
 
 } # end of function
diff --git a/R/ogimet_daily.R b/R/ogimet_daily.R
@@ -21,8 +21,6 @@
 #' }
 #'
 
-
-
 ogimet_daily = function(date = c(Sys.Date() - 30, Sys.Date()),
                         coords = FALSE, 
                         station = NA, 
@@ -95,14 +93,17 @@ ogimet_daily_bp = function(date = date,
       month = format(dates[i], "%m")
       day = format(dates[i], "%d")
       ndays = day
+
       linkpl2 = paste("https://www.ogimet.com/cgi-bin/gsynres?lang=en&ind=", station_nr, "&ndays=32&ano=", year, "&mes=", month, "&day=", day, "&hora=", hour,"&ord=REV&Send=Send", sep = "")
-      if (month == 1) linkpl2 = paste("https://www.ogimet.com/cgi-bin/gsynres?lang=en&ind=", station_nr, "&ndays=32&ano=", year, "&mes=", month, "&day=", day, "&hora=", hour, "&ord=REV&Send=Send", sep = "")
-
       temp = tempfile()
       test_url(linkpl2, temp)
-
-      # run only if downloaded file is valid
-      if (!is.na(file.size(temp)) & (file.size(temp) > 500)) { 
+      if (is.na(file.size(temp)) | (file.size(temp) < 500)) { 
+        message("Problem with downloading data from:", linkpl2, "\n")
+        if (exists("data_station")) {
+          message("Returning results downloaded up to this point:\n")
+          return(data_station)
+        }
+      } else { # run only if downloaded file is valid
 
         a = readHTMLTable(temp, stringsAsFactors = FALSE)
         unlink(temp)
@@ -124,7 +125,8 @@ ogimet_daily_bp = function(date = date,
           test = b[1:2, ]
 
           if (is.null(test) ) {
-            warning(paste0("Wrong station ID: ", station_nr, " You can check available stations ID at https://ogimet.com/display_stations.php?lang=en&tipo=AND&isyn=&oaci=&nombre=&estado=&Send=Send"))
+            warning(paste0("Wrong station ID: ", station_nr,
+                           " You can check available stations ID at https://ogimet.com/display_stations.php?lang=en&tipo=AND&isyn=&oaci=&nombre=&estado=&Send=Send"))
             return(data_station)
           } 
 
@@ -168,7 +170,7 @@ ogimet_daily_bp = function(date = date,
               names_col = "Error_column"
             }
 
-          names_col <-
+          names_col =
             gsub("[^A-Za-z0-9]",
                  "",
                  as.character(lapply(names_col, as.character), stringsAsFactors = FALSE))
@@ -177,10 +179,17 @@ ogimet_daily_bp = function(date = date,
           b = b[-c(1:2), ]
           b["station_ID"] = station_nr
 
-          # adding year to date
-          b$Date = as.character(paste0(b$Date, "/", year))
-
-
+          # extra check if date is for December and January simultanously
+          # e.g. "01/02" "01/01" "12/31" "12/30"
+          uniq_mths = sort(unique(unlist(lapply(strsplit(b$Date, "/"), "[[", 1))))
+          if (sum(uniq_mths %in% c("01", "12")) == 2) {
+            mth = unlist(lapply(strsplit(b$Date, "/"), "[[", 1))
+            yr = ifelse(mth == "01", as.numeric(year), as.numeric(year) - 1)
+            b$Date = as.character(paste0(b$Date, "/", yr))
+          } else {
+            b$Date = as.character(paste0(b$Date, "/", year))
+          }
+
           # to avoid gtools::smartbind function or similar from another package..
           if (ncol(data_station) >= ncol(b)) {
             b[setdiff(names(data_station), names(b))] = NA # adding missing columns
@@ -196,9 +205,6 @@ ogimet_daily_bp = function(date = date,
 
             }
 
-          # cat(paste(year,month,"\n"))
-          # coords można lepiej na samym koncu dodać kolumne
-          # wtedy jak zmienia się lokalizacja na dacie to tutaj tez
           if (coords) {
             coord = a[[1]][2, 1]
             data_station["Lon"] = get_coord_from_string(coord, "Longitude")
@@ -247,8 +253,10 @@ ogimet_daily_bp = function(date = date,
     data_station$Date = as.Date(as.character(data_station$Date), format = "%m/%d/%Y")
     # clipping to interesting period as we're downloading slightly more than needed:
     data_station = data_station[which(data_station$Date >= as.Date(min(date)) & as.Date(data_station$Date) <= as.Date(max(date))), ]
-
+    
   } # end of checking whether no. of rows > 0 
 
+  # removing duplicates:
+  data_station = data_station[row.names(unique(data_station[, c("station_ID", "Date")])), ]
   return(data_station)
 }
diff --git a/R/onAttach.R b/R/onAttach.R
@@ -8,7 +8,7 @@
     ver = as.character(packageVersion("climate"))
     packageStartupMessage(paste0(c("\n____________________________________________________________________\n",
                                  "  Welcome to climate ", ver, "!",
-                                 "\n- More about the package and datasets: github.com/bczernecki/climate",
+                                 "\n- More about the package and datasets: bczernecki.github.io/climate",
                                  "\n- Using 'climate' for publication? See: citation('climate')\n",  
                                  "____________________________________________________________________\n")))
   }

diff --git a/R/sounding_wyoming.R b/R/sounding_wyoming.R
@@ -100,13 +100,15 @@ sounding_wyoming_bp = function(wmo_id,
   dd = formatC(dd, width = 2, format = "d", flag = "0")
   hh = formatC(hh, width = 2, format = "d", flag = "0")
   min = formatC(min, width = 2, format = "d", flag = "0")
- 
+
   if (bufr) {
-    url = paste0("http://weather.uwyo.edu/cgi-bin/bufrraob.py?datetime=", 
-                 yy, "-", mm, "-", dd, "+", hh, ":", min, ":00&id=", wmo_id, "&type=TEXT:LIST")
+    url = paste0("http://weather.uwyo.edu/cgi-bin/bufrraob.py?src=bufr&datetime=", 
+                 yy, "-", mm, "-", dd, "+", hh, ":", min, ":00&id=", 
+                 sprintf("%05d", wmo_id), "&type=TEXT:LIST")
   } else {
     url = paste0("http://weather.uwyo.edu/cgi-bin/sounding?TYPE=TEXT%3ALIST&YEAR=",
-                 yy, "&MONTH=", mm, "&FROM=", dd, hh, "&TO=", dd, hh, "&STNM=", wmo_id)
+                 yy, "&MONTH=", mm, "&FROM=", dd, hh, "&TO=", dd, hh, "&STNM=",
+                 sprintf("%05d", wmo_id))
   }
 
   temp = tempfile()
@@ -154,4 +156,3 @@ sounding_wyoming_bp = function(wmo_id,
   unlink(temp)
   return(df)
 }
-
diff --git a/README.md b/README.md
@@ -248,27 +248,29 @@ ggplot(co2, aes(date, co2_avg)) +
 ```python
 # load required packages
 from rpy2.robjects.packages import importr
-from rpy2.robjects import r
 import rpy2.robjects as robjects
 import pandas as pd
+import datetime as dt
 
 # load climate package (make sure that it was installed in R before)
 importr('climate')
 # test functionality e.g. with meteo_ogimet function for New York - La Guardia:
-df = robjects.r['meteo_ogimet'](interval = "daily", station = 72503)
-# optionally - transform object to pandas data frame and rename columns:
+df = robjects.r['meteo_ogimet'](interval = "daily", station = 72503,
+                                date = robjects.StrVector(['2022-05-01', '2022-06-15']))
+# optionally - transform object to pandas data frame and rename columns + fix datetime:
 res = pd.DataFrame(df).transpose()
 res.columns = df.colnames
+res['Date'] = pd.TimedeltaIndex(res['Date'], unit='d') + dt.datetime(1970,1,1)
+res.head
+
+>>> res[res.columns[0:7]].head()
+#  station_ID       Date TemperatureCAvg  ... TemperatureCMin TdAvgC HrAvg
+#0    72503.0 2022-06-15            23.5  ...            19.4   10.9  45.2
+#1    72503.0 2022-06-14            25.0  ...            20.6   16.1  59.0
+#2    72503.0 2022-06-13            20.4  ...            17.8   16.0  74.8
+#3    72503.0 2022-06-12            21.3  ...            18.3   12.0  57.1
+#4    72503.0 2022-06-11            22.6  ...            17.8    8.1  40.1
 
->>> res
-#   station_ID     Date TemperatureCAvg 
-#0     72503.0  19227.0            24.7 
-#1     72503.0  19226.0            25.1 
-#2     72503.0  19225.0            27.5 
-#3     72503.0  19224.0            26.8 
-#4     72503.0  19223.0            24.7 
-#5     72503.0  19222.0            23.3 
-#[178 rows x 23 columns]
 ```
 
 ## Acknowledgment