-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNFIP_FEMA_pull_notwork.R
78 lines (47 loc) · 2.93 KB
/
NFIP_FEMA_pull_notwork.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#Links
# https://www.fema.gov/about/openfema/api
# https://www.fema.gov/openfema-data-page/fima-nfip-redacted-claims-v1
# Paging example in R. Receiving data in JSON, saving in RDS - a single R object.
#install.packages("tidyjson")
require("httr") # wrapper for curl package - may require installation
# This is a simple JSON parser library (may require installation), but since we are not
# really doing JSON manipulation to get the data, this is not needed.
#require("jsonlite")
datalist = list() # a list that will hold the results of each call
#filter for NC, 2015-present
baseUrl <- "https://www.fema.gov/api/open/v1/FimaNfipClaims?$filter=(yearOfLoss%20ge%202015%20and%20state%20eq%20'NC')"
# Determine record count. Specifying only 1 column here to reduce amount of data returned.
# Remember to add criteria/filter here (if you have any) to get an accurate count.
result <- GET(paste0(baseUrl,"&$inlinecount=allpages&$top=1&$select=id"))
jsonData <- content(result) # should automatically parse as JSON as that is mime type
recCount <- jsonData$metadata$count
# calculate the number of calls we will need to get all of our data (using the maximum of 1000)
top <- 1000
loopNum <- ceiling(recCount / top)
# send some logging info to the console so we know what is happening
print(paste0("START ",Sys.time(),", ", recCount, " records, ", top, " returned per call, ", loopNum," iterations needed."),quote=FALSE)
# Loop and call the API endpoint changing the record start each iteration. Each call will
# return results in a JSON format. The metadata has been suppressed as we no longer need it.
skip <- 0
for(i in seq(from=0, to=loopNum, by=1)){
# As above, if you have filters, specific fields, or are sorting, add that to the base URL
# or make sure it gets concatenated here.
result <- GET(paste0(baseUrl,"&$metadata=off&$top=",top,"&$skip=",i * top))
jsonData <- content(result) # should automatically parse as JSON as that is mime type
# Here we are adding the resulting JSON return to a list that can be turned into a combined
# dataframe later or saved. You may encounter memory limitations with very large datasets.
# For those, inserting into a database or saving chunks of data may be desired.
datalist[[i+1]] <- jsonData
print(paste0("Iteration ", i, " done)"), quote=FALSE)
# binds many items in our list to one data frame
fullData <- dplyr::bind_rows(datalist)
# Save as one R object - probably more useful (and storage efficient) than CSV or JSON if doing
# analysis within R.
saveRDS(fullData, file = "./Final Project/NFIP.NC.2015.2021.rds")
# write.csv(fullData, file = "./Final Project/NFIP.test.csv")
# open file just to verify that we got what we expect
my_data <- readRDS(file = "./Final Project/NFIP.NC.2015.2021.rds")
print(paste0("END ",Sys.time(), ", ", nrow(my_data), " records in file"))}
#summary(unlist(my_data))
#class(my_data)
#####