Skip to content

Commit

Permalink
feat: don't drop custom datasets in regular indexing cycle
Browse files Browse the repository at this point in the history
  • Loading branch information
sylvanr committed Feb 12, 2024
1 parent 817579a commit ed52192
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 3 deletions.
Binary file modified .coverage
Binary file not shown.
4 changes: 2 additions & 2 deletions direct_indexing/direct_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def drop_removed_data():
existing = []

# Get the datasets that have been indexed
url = f'{settings.SOLR_DATASET}/select?fl=name%2Cid%2Ciati_cloud_indexed&indent=true&q.op=OR&q=*%3A*&rows=10000000'
url = f'{settings.SOLR_DATASET}/select?fl=name%2Cid%2Ciati_cloud_indexed%2Ciati_cloud_custom&indent=true&q.op=OR&q=*%3A*&rows=10000000' # NOQA: E501
data = requests.get(url)
data = data.json()['response']['docs']
if len(data) == 0:
Expand All @@ -91,7 +91,7 @@ def drop_removed_data():
existing.append(dataset['id'])

for d in data:
if d['id'] not in existing:
if 'iati_cloud_custom' not in d and d['id'] not in existing:
dropped_list.append(d['id'])

# For every core with dataset data, delete the data for the dropped datasets identified with the dataset.id field
Expand Down
1 change: 1 addition & 0 deletions direct_indexing/solr/cores/dataset/managed-schema
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,7 @@
<field name="iati_cloud_removed_date" type="pdate"/>
<field name="iati_cloud_removed_reason" type="text_general_single"/>
<field name="iati_cloud_should_be_indexed" type="boolean"/>
<field name="iati_cloud_custom" type="boolean"/>
<field name="id" type="string" multiValued="false" indexed="true" required="true" stored="true"/>
<field name="isopen" type="boolean"/>
<field name="license_id" type="text_general_single"/>
Expand Down
1 change: 1 addition & 0 deletions direct_indexing/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ def create_dataset_metadata(url, title, name, org):
meta["organization"]["created"] = now
meta["organization"]["id"] = f"zimmerman-custom-{org}"
meta["organization"]["name"] = org
meta["iati_cloud_custom"] = True

# Json dump meta to meta_dir
try:
Expand Down
2 changes: 1 addition & 1 deletion tests/direct_indexing/test_direct_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def test_run_dataset_metadata(mocker):
def test_drop_removed_data(mocker, tmp_path, requests_mock, fixture_solr_response, fixture_dataset_metadata):
# Mock settings.SOLR_DATASET to https://test.com
mocker.patch('direct_indexing.direct_indexing.settings.SOLR_DATASET', 'https://test.com')
test_url = 'https://test.com/select?fl=name%2Cid%2Ciati_cloud_indexed&indent=true&q.op=OR&q=*%3A*&rows=10000000'
test_url = 'https://test.com/select?fl=name%2Cid%2Ciati_cloud_indexed%2Ciati_cloud_custom&indent=true&q.op=OR&q=*%3A*&rows=10000000' # NOQA: E501

# mock settings.BASE_DIR to be tmp_path
mocker.patch('direct_indexing.direct_indexing.settings.BASE_DIR', tmp_path)
Expand Down

0 comments on commit ed52192

Please sign in to comment.