From 81e1fc33b1b9b824d4f35e79dd5216d8d8bb5cf5 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Thu, 3 Oct 2024 16:37:36 -0400 Subject: [PATCH] clarifications --- osf/metrics/reporters/public_item_usage.py | 8 +++++--- osf/metrics/reports.py | 13 +++++++------ 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/osf/metrics/reporters/public_item_usage.py b/osf/metrics/reporters/public_item_usage.py index 740d7308f88..e61ae1933de 100644 --- a/osf/metrics/reporters/public_item_usage.py +++ b/osf/metrics/reporters/public_item_usage.py @@ -18,6 +18,8 @@ _CHUNK_SIZE = 500 +_MAX_CARDINALITY_PRECISION = 40000 # https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-cardinality-aggregation.html#_precision_control + class _SkipItem(Exception): pass @@ -142,7 +144,7 @@ def _exact_item_search(self, yearmonth) -> edsl.Search: 'agg_session_count', 'cardinality', field='session_id', - precision_threshold=40000, # maximum precision + precision_threshold=_MAX_CARDINALITY_PRECISION, ) return _search @@ -185,7 +187,7 @@ def _get_view_session_count(self, yearmonth, osfid: str): 'agg_session_count', 'cardinality', field='session_id', - precision_threshold=40000, # maximum precision + precision_threshold=_MAX_CARDINALITY_PRECISION, ) _response = _search.execute() return _response.aggregations.agg_session_count.value @@ -243,7 +245,7 @@ def _iter_composite_buckets(search: edsl.Search, composite_agg_name: str): updates the search in-place for subsequent pages ''' while True: - _page_response = search.execute(ignore_cache=True) + _page_response = search.execute(ignore_cache=True) # reused search object has the previous page cached try: _agg_result = _page_response.aggregations[composite_agg_name] except KeyError: diff --git a/osf/metrics/reports.py b/osf/metrics/reports.py index bb0553f7f3b..c9c2846adfc 100644 --- a/osf/metrics/reports.py +++ b/osf/metrics/reports.py @@ -290,17 +290,18 @@ class InstitutionMonthlySummaryReport(MonthlyReport): class PublicItemUsageReport(MonthlyReport): UNIQUE_TOGETHER_FIELDS = ('report_yearmonth', 'item_osfid') - # where noted, fields correspond to defined terms from COUNTER + # where noted, fields are meant to correspond to defined terms from COUNTER # https://cop5.projectcounter.org/en/5.1/appendices/a-glossary-of-terms.html - item_osfid = metrics.Keyword() # counter:Item + # https://coprd.countermetrics.org/en/1.0.1/appendices/a-glossary.html + item_osfid = metrics.Keyword() # counter:Item (or Dataset) item_type = metrics.Keyword(multi=True) # counter:Data-Type provider_id = metrics.Keyword(multi=True) # counter:Database(?) platform_iri = metrics.Keyword(multi=True) # counter:Platform # view counts include views on components or files contained by this item - view_count = metrics.Long() # counter:Total_Item_Investigations - view_session_count = metrics.Long() # counter:Unique_Item_Investigations + view_count = metrics.Long() # counter:Total Investigations + view_session_count = metrics.Long() # counter:Unique Investigations # download counts of this item only (not including contained components or files) - download_count = metrics.Long() # counter:Total_Item_Requests - download_session_count = metrics.Long() # counter:Unique_Item_Requests + download_count = metrics.Long() # counter:Total Requests + download_session_count = metrics.Long() # counter:Unique Requests