Skip to content

Commit

Permalink
Implement pagination in get_theme_datasets to efficiently retrieve al…
Browse files Browse the repository at this point in the history
…l datasets

- Modified the get_theme_datasets function to include pagination, allowing it to handle large datasets without specifying a count.
- Introduced a while loop to fetch datasets in batches of 100 until all results are retrieved.
- Updated the search_dict with start and rows parameters to manage pagination.
  • Loading branch information
mjanez committed Oct 11, 2024
1 parent d05b4dd commit 2579f47
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 9 deletions.
30 changes: 21 additions & 9 deletions ckanext/schemingdcat/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1507,28 +1507,34 @@ def get_spatial_datasets(count=10, return_count=False):
return result['results']

@helper
def get_theme_datasets(field='theme', count=10):
def get_theme_datasets(field='theme'):
"""
This helper function retrieves a specified number of featured datasets from the CKAN instance.
It uses the 'package_search' action of the CKAN logic layer to perform a search with specific parameters.
Retrieves all datasets with the specified field efficiently using pagination.
Parameters:
field (str): The field to search for in the dataset extras. Default is 'theme'.
count (int): The number of featured datasets to retrieve. Default is 10.
Returns:
list: A list of unique values from the specified field in the featured datasets.
"""
search_dict = {
'fl': 'extras_' + field,
'rows': count
'rows': 100, # Number of datasets per batch
'start': 0
}
context = {'model': model, 'session': model.Session}
result = logic.get_action('package_search')(context, search_dict)

return result['results']
results = []

while True:
result = logic.get_action('package_search')(context, search_dict)
results.extend(result['results'])
if len(result['results']) < search_dict['rows']:
break
search_dict['start'] += search_dict['rows']

log.debug('Total results retrieved: %d', len(results))
return results

@lru_cache(maxsize=16)
@helper
def get_unique_themes():
"""
Expand Down Expand Up @@ -1760,6 +1766,9 @@ def schemingdcat_get_theme_statistics(theme_field=None, icons_dir=None) -> List[
for val in parsed_values:
theme_counts[val] += 1

# Debugging: Print the theme counts
log.debug("Theme counts:%s", theme_counts)

# Generate the final list of dictionaries
stats = [
{
Expand All @@ -1772,6 +1781,9 @@ def schemingdcat_get_theme_statistics(theme_field=None, icons_dir=None) -> List[
for theme, count in theme_counts.items() # Process items directly without separate for loop
]

# Debugging: Print the stats
log.debug("Stats:%s", stats)

return stats

@helper
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
<div class="sct-about-themes-card-label">{{ _(label) }}</div>
</a>
{% endmacro %}

{% set theme_field = h.schemingdcat_get_default_package_item_icon() %}
{% set themes_stats = h.schemingdcat_get_open_data_statistics(theme_field) %}
{% if themes_stats %}
Expand Down

0 comments on commit 2579f47

Please sign in to comment.