Skip to content

Commit

Permalink
Merge branch 'master' into CRAN
Browse files Browse the repository at this point in the history
  • Loading branch information
MarkEdmondson1234 committed Feb 15, 2018
2 parents 6e99ef6 + d781f7e commit b09b13c
Show file tree
Hide file tree
Showing 225 changed files with 4,117 additions and 26,481 deletions.
3 changes: 3 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@
^CODE_OF_CONDUCT\.md$
^data-raw$
^\.httr-oauth$
^ga\.oauth$
^CONTRIBUTING$
^issue_template\.md$
^/Users/mark/dev/auth/googleAnalyticsR\.httr-oauth$
2 changes: 0 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,5 @@ vignettes/*.html
vignettes/*.pdf
.Rproj.user
inst/doc
.httr-oauth
tests/testthat/httr-oauth.rds
#tests/testthat/mock/
/Users/mark/dev/auth/googleAnalyticsR.httr-oauth
25 changes: 25 additions & 0 deletions CONTRIBUTING
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# How to contribute to googleAnalyticsR

Contributions of any sort are encouraged and very welcome.

## Bugs

If you find anything that looks like a bug, please raise a GitHub issue. One issue per thread please.

Bugs are much more likely to be fixed if you have a reproduceable example, so please include in the issue.

Please also report your `sessionInfo()` to check what versions of `googleAnalyticsR` and `googleAuthR` you are running, and run your example with the `option(googleAuthR.verbose = 1)` that will output more console feedback that will help debug.

## Pull requests

Any pull requests are welcome, however small, so typos, documentation improvements etc.

If you are contributing code, then please also include:

* An entry in the `NEWS.md` that details what has changed
* Add yourself as a contributor to the `DESCRIPTION`
* If you can, create a test using `testthat` which will run through your code. The tests assume you have a authentication file saved at the location indicated by the return of `Sys.getenv("GA_AUTH_FILE")`. If you don't know how to use `testthat`, an example file in the tests directory will do, which will be converted.
* Include an example on how to use the function in the examples section
* Try to stick to the same coding style as the rest of the package
* Add some documentation on how to use the functions.

17 changes: 11 additions & 6 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,32 +1,37 @@
Package: googleAnalyticsR
Type: Package
Version: 0.4.2
Version: 0.5.0
Title: Google Analytics API into R
Description: R library for interacting with the Google Analytics
Reporting API v3 and v4.
Description: Interact with the Google Analytics
APIs <https://developers.google.com/analytics/>, including
the Core Reporting API (v3 and v4), Management API, and Multi-Channel
Funnel API.
Authors@R: c(person("Mark", "Edmondson", email = "[email protected]",
role = c("aut", "cre")),
person(given = "Artem", family = "Klevtsov",
email = "[email protected]", role = "ctb"),
person("Johann", "deBoer", email = "[email protected]", role = "ctb"),
person("David", "Watkins", email = "[email protected]", role = "ctb"),
person("Olivia", "Brode-Roger", email = "[email protected]", role = "ctb"),
person("Jas", "Sohi", email = "[email protected]", role = "ctb"))
person("Jas", "Sohi", email = "[email protected]", role = "ctb"),
person("Zoran", "Selinger", email = "[email protected]", role = "ctb"))
URL: http://code.markedmondson.me/googleAnalyticsR/
BugReports: https://github.com/MarkEdmondson1234/googleAnalyticsR/issues
Depends:
R (>= 3.2.0)
Imports:
assertthat (>= 0.2.0),
dplyr (>= 0.7.0),
googleAuthR (>= 0.5.1),
googleAuthR (>= 0.6.2),
httr (>= 1.3.1),
magrittr (>= 1.5),
memoise,
progress,
purrr (>= 0.2.2),
rlang (>= 0.1.0),
tidyr (>= 0.6.3),
utils
utils,
methods
Suggests:
bigQueryR (>= 0.3.1),
covr,
Expand Down
25 changes: 25 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,12 @@ export(ga_custom_vars_list)
export(ga_experiment)
export(ga_experiment_list)
export(ga_filter)
export(ga_filter_add)
export(ga_filter_apply_to_view)
export(ga_filter_delete)
export(ga_filter_list)
export(ga_filter_update)
export(ga_filter_update_filter_link)
export(ga_filter_view)
export(ga_filter_view_list)
export(ga_goal)
Expand All @@ -35,13 +40,15 @@ export(ga_remarketing_get)
export(ga_remarketing_list)
export(ga_segment_list)
export(ga_unsampled)
export(ga_unsampled_download)
export(ga_unsampled_list)
export(ga_users_list)
export(ga_view)
export(ga_view_list)
export(ga_webproperty)
export(ga_webproperty_list)
export(google_analytics)
export(google_analytics_3)
export(google_analytics_4)
export(google_analytics_account_list)
export(google_analytics_bq)
Expand All @@ -60,15 +67,33 @@ export(segment_element)
export(segment_ga4)
export(segment_vector_sequence)
export(segment_vector_simple)
exportClasses(dim_fil_ga4)
exportClasses(met_fil_ga4)
exportClasses(orFiltersForSegment_ga4)
exportClasses(segmentDef_ga4)
exportClasses(segmentFilterClause_ga4)
exportClasses(segmentFilter_ga4)
exportClasses(segmentSequenceStep_ga4)
exportClasses(sequenceSegment_ga4)
exportClasses(simpleSegment_ga4)
import(assertthat)
import(googleAuthR)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,select)
importFrom(dplyr,transmute)
importFrom(googleAuthR,gar_api_generator)
importFrom(googleAuthR,gar_cache_setup)
importFrom(httr,GET)
importFrom(httr,add_headers)
importFrom(httr,content)
importFrom(httr,progress)
importFrom(httr,stop_for_status)
importFrom(httr,write_disk)
importFrom(magrittr,"%>%")
importFrom(memoise,cache_filesystem)
importFrom(methods,setClass)
importFrom(purrr,map)
importFrom(purrr,map_if)
importFrom(rlang,"!!!")
importFrom(stats,setNames)
Expand Down
23 changes: 23 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,26 @@
# googleAnalytics 0.5.0

## Breaking changes!

If you were using `google_analytics()` before to fetch the v3 API, this is now available via `google_analytics_3()` - replace all instances and it should work as before. However, you are encouraged to migrate to v4, which now runs when you use `google_analytics()`(and for a while still at `google_analytics_4()` too)

## Major changes

* Add support for [resource based quotas](`https://developers.google.com/analytics/devguides/reporting/core/v4/resource-based-quota`) (#127)
* Improve support for using different auth cache files with `ga_auth()`
* Changed `google_analytics` to be the v4 API, `google_analytics_3` now supports v3
* change default cache token name from `.httr-oauth` to `ga.oauth` to avoid clashes
* You can now change the rows fetched per API page in v4 up to 100k
* Add unsampled report downloads (#44 - many thanks to @j450h1 for this work on this)
* Add management of View and account filters (#108 - many thanks to @zselinger for the work on this)
* If an `google_analytics` batch API call fails, it will automatically retry with a slower request rate
* v4 API requests will report how long it took to help with configurations

## Bug fixes

* Fix bug where anti-sampling with no dimensions broke (#149 - thanks @smach)
* Let v3 API calls use batching when also using other googleAuthR batching functions

# googleAnalytics 0.4.2

## Bug fixes
Expand Down
142 changes: 80 additions & 62 deletions R/anti_sample.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#' Calculates multiple API calls to avoid sampling
#'
#' @inheritParams make_ga_4_req
#' @inheritParams google_analytics_4
#' @inheritParams google_analytics
#' @keywords internal
anti_sample <- function(anti_sample_batches,
viewId,
Expand All @@ -19,7 +19,8 @@ anti_sample <- function(anti_sample_batches,
cohorts,
metricFormat,
histogramBuckets,
slow_fetch){
slow_fetch,
rows_per_call){

if(length(date_range) > 2) stop("Anti-sampling not available for comparison date ranges.")

Expand All @@ -35,7 +36,36 @@ anti_sample <- function(anti_sample_batches,
}

myMessage("Finding how much sampling in data request...", level = 3)
test_call <- google_analytics_4(viewId = viewId,
test_call <- google_analytics(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
max = 10,
metricFormat = metricFormat,
samplingLevel = "LARGE",
histogramBuckets = histogramBuckets,
slow_fetch = FALSE,
rows_per_call = rows_per_call)


## reduce read counts by 10% to get more calls as returned figure is flakey
read_counts <- round(as.integer(attr(test_call,"samplesReadCounts")[[1]]) * 0.9)
space_size <- as.integer(attr(test_call, "samplingSpaceSizes")[[1]])
samplingPer <- get_samplePercent(read_counts, space_size)

## add 20% to rowCount as its flakey (sampled rows of 0 not included?)
rowCount <- round(as.integer(attr(test_call, "rowCount")[[1]]) * 1.2)

if(identical(samplingPer, numeric(0))){
myMessage("No sampling found, returning call", level = 3)
unsampled <- google_analytics(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
Expand All @@ -46,51 +76,24 @@ anti_sample <- function(anti_sample_batches,
segments = segments,
pivots = pivots,
cohorts = cohorts,
max = 10,
max = -1,
metricFormat = metricFormat,
samplingLevel = "LARGE",
histogramBuckets = histogramBuckets,
slow_fetch = FALSE)


## reduce read counts by 10% to get more calls as returned figure is flakey
read_counts <- round(as.integer(attr(test_call,"samplesReadCounts")[[1]]) * 0.9)
space_size <- as.integer(attr(test_call, "samplingSpaceSizes")[[1]])
samplingPer <- get_samplePercent(read_counts, space_size)

## add 20% to rowCount as its flakey (sampled rows of 0 not included?)
rowCount <- round(as.integer(attr(test_call, "rowCount")[[1]]) * 1.2)

if(identical(samplingPer, numeric(0))){
myMessage("No sampling found, returning call", level = 3)
unsampled <- google_analytics_4(viewId = viewId,
date_range = date_range,
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
max = -1,
metricFormat = metricFormat,
samplingLevel = "LARGE",
histogramBuckets = histogramBuckets,
anti_sample = FALSE,
slow_fetch = slow_fetch)
anti_sample = FALSE,
slow_fetch = slow_fetch,
rows_per_call = rows_per_call)
return(unsampled)
}

if(anti_sample_batches == "auto"){
## sampling
myMessage("Finding number of sessions for anti-sample calculations...", level = 3)
explore_sessions <- google_analytics_4(viewId = viewId,
date_range = date_range,
metrics = "sessions",
dimensions = "date",
max = -1) ## download all days! #66
explore_sessions <- google_analytics(viewId = viewId,
date_range = date_range,
metrics = "sessions",
dimensions = "date",
max = -1) ## download all days! #66
explore_sessions$cumulative <- cumsum(explore_sessions$sessions)
explore_sessions$sample_bucket <- chunkify(explore_sessions$sessions, limit = 250e3)

Expand All @@ -108,32 +111,41 @@ anti_sample <- function(anti_sample_batches,
new_date_ranges <- lapply(splits, function(x) {list(start_date = min(x$date),
end_date = max(x$date),
range_date = nrow(x))})
myMessage("Calculated [", length(new_date_ranges), "] batches are needed to download approx. [", rowCount,"] rows unsampled.",
myMessage("Calculated [",
length(new_date_ranges),
"] batches are needed to download approx. [",
rowCount,"] rows unsampled.",
level = 3)
myMessage("Found [", read_counts, "] sampleReadCounts from a [", space_size, "] samplingSpaceSize.",

myMessage("Found [",
read_counts,
"] sampleReadCounts from a [",
space_size, "] samplingSpaceSize.",
level = 2)

## send to fetch
did_it_work <- TRUE
unsampled_list <- lapply(new_date_ranges, function(x){

myMessage("Anti-sample call covering ", x$range_date, " days: ", x$start_date, ", ", x$end_date, level = 3)
out <- google_analytics_4(viewId = viewId,
date_range = c(x$start_date,x$end_date),
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
max = rowCount,
metricFormat = metricFormat,
samplingLevel = "LARGE",
histogramBuckets = histogramBuckets,
slow_fetch = slow_fetch)
myMessage("Anti-sample call covering ", x$range_date, " days: ",
x$start_date, ", ", x$end_date, level = 3)
out <- google_analytics(viewId = viewId,
date_range = c(x$start_date,x$end_date),
metrics = metrics,
dimensions = dimensions,
dim_filters = dim_filters,
met_filters = met_filters,
filtersExpression = filtersExpression,
order = order,
segments = segments,
pivots = pivots,
cohorts = cohorts,
max = -1,
metricFormat = metricFormat,
samplingLevel = "LARGE",
histogramBuckets = histogramBuckets,
slow_fetch = slow_fetch,
rows_per_call = rows_per_call)

read_counts2 <- as.integer(attr(out,"samplesReadCounts")[[1]])
space_size2 <- as.integer(attr(out, "samplingSpaceSizes")[[1]])
Expand All @@ -150,13 +162,19 @@ anti_sample <- function(anti_sample_batches,
out <- Reduce(rbind, unsampled_list)

## get rid of duplicate rows per sample call
out <- aggregateGAData(out, agg_names = gsub("ga:","",dimensions))
agg_cols <- gsub("ga:","",dimensions)

## take care of segment column (#149)
if(!is.null(segments)){
agg_cols <- c("segment", agg_cols)
}
out <- aggregateGAData(out, agg_names = agg_cols)

## fill these in later
if(!is.null(out)){
attr(out, "totals") <- NULL
attr(out, "minimums") <- NULL
attr(out, "maximums") <- NULL
attr(out, "totals") <- attr(test_call, "totals")
attr(out, "minimums") <- attr(test_call, "minimums")
attr(out, "maximums") <- attr(test_call, "maximums")
attr(out, "rowCount") <- as.character(nrow(out))
attr(out, "nextPageToken") <- NULL
attr(out, "antiSampleWorked") <- did_it_work
Expand Down
Loading

0 comments on commit b09b13c

Please sign in to comment.