Skip to content

Commit

Permalink
clarifications
Browse files Browse the repository at this point in the history
  • Loading branch information
aaxelb committed Oct 18, 2024
1 parent f742f70 commit 81e1fc3
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 9 deletions.
8 changes: 5 additions & 3 deletions osf/metrics/reporters/public_item_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

_CHUNK_SIZE = 500

_MAX_CARDINALITY_PRECISION = 40000 # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control


class _SkipItem(Exception):
pass
Expand Down Expand Up @@ -142,7 +144,7 @@ def _exact_item_search(self, yearmonth) -> edsl.Search:
'agg_session_count',
'cardinality',
field='session_id',
precision_threshold=40000, # maximum precision
precision_threshold=_MAX_CARDINALITY_PRECISION,
)
return _search

Expand Down Expand Up @@ -185,7 +187,7 @@ def _get_view_session_count(self, yearmonth, osfid: str):
'agg_session_count',
'cardinality',
field='session_id',
precision_threshold=40000, # maximum precision
precision_threshold=_MAX_CARDINALITY_PRECISION,
)
_response = _search.execute()
return _response.aggregations.agg_session_count.value
Expand Down Expand Up @@ -243,7 +245,7 @@ def _iter_composite_buckets(search: edsl.Search, composite_agg_name: str):
updates the search in-place for subsequent pages
'''
while True:
_page_response = search.execute(ignore_cache=True)
_page_response = search.execute(ignore_cache=True) # reused search object has the previous page cached
try:
_agg_result = _page_response.aggregations[composite_agg_name]
except KeyError:
Expand Down
13 changes: 7 additions & 6 deletions osf/metrics/reports.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,17 +290,18 @@ class InstitutionMonthlySummaryReport(MonthlyReport):
class PublicItemUsageReport(MonthlyReport):
UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid')

# where noted, fields correspond to defined terms from COUNTER
# where noted, fields are meant to correspond to defined terms from COUNTER
# https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html
item_osfid = metrics.Keyword() # counter:Item
# https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html
item_osfid = metrics.Keyword() # counter:Item (or Dataset)
item_type = metrics.Keyword(multi=True) # counter:Data-Type
provider_id = metrics.Keyword(multi=True) # counter:Database(?)
platform_iri = metrics.Keyword(multi=True) # counter:Platform

# view counts include views on components or files contained by this item
view_count = metrics.Long() # counter:Total_Item_Investigations
view_session_count = metrics.Long() # counter:Unique_Item_Investigations
view_count = metrics.Long() # counter:Total Investigations
view_session_count = metrics.Long() # counter:Unique Investigations

# download counts of this item only (not including contained components or files)
download_count = metrics.Long() # counter:Total_Item_Requests
download_session_count = metrics.Long() # counter:Unique_Item_Requests
download_count = metrics.Long() # counter:Total Requests
download_session_count = metrics.Long() # counter:Unique Requests

0 comments on commit 81e1fc3

Please sign in to comment.