diff --git a/hack/docker-compose.opensearch.yml b/hack/docker-compose.opensearch.yml new file mode 100644 index 0000000000..09bcdc26d9 --- /dev/null +++ b/hack/docker-compose.opensearch.yml @@ -0,0 +1,38 @@ +--- +version: "2.1" + +volumes: + opensearch_data: + +services: + opensearch: + image: opensearchproject/opensearch:1.3.0 + environment: + - discovery.type=single-node + - cluster.name=am-cluster + - cluster.routing.allocation.disk.threshold_enabled=false + - node.name=am-node + - bootstrap.memory_lock=true + - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m" + - "DISABLE_INSTALL_DEMO_CONFIG=true" + - "DISABLE_SECURITY_PLUGIN=true" + ulimits: + memlock: + soft: -1 + hard: -1 + nofile: + soft: 65536 + hard: 65536 + volumes: + - opensearch_data:/usr/share/opensearch/data + ports: + - 127.0.0.1:62092:9200 + - 127.0.0.1:62096:9600 + + archivematica-mcp-client: + environment: + ARCHIVEMATICA_MCPCLIENT_MCPCLIENT_ELASTICSEARCHSERVER: "opensearch:9200" + + archivematica-dashboard: + environment: + ARCHIVEMATICA_DASHBOARD_DASHBOARD_ELASTICSEARCH_SERVER: "opensearch:9200" diff --git a/hack/docker-compose.yml b/hack/docker-compose.yml index 9d31cb3f86..7ad88a944a 100644 --- a/hack/docker-compose.yml +++ b/hack/docker-compose.yml @@ -40,9 +40,11 @@ services: - "127.0.0.1:62001:3306" elasticsearch: - image: "docker.elastic.co/elasticsearch/elasticsearch:6.5.4" + image: "docker.elastic.co/elasticsearch/elasticsearch-oss:7.10.2" environment: + - "discovery.type=single-node" - "cluster.name=am-cluster" + - "cluster.routing.allocation.disk.threshold_enabled=false" - "node.name=am-node" - "network.host=0.0.0.0" - "bootstrap.memory_lock=true" diff --git a/requirements-dev.txt b/requirements-dev.txt index 5cd17ffbf7..6f015f1069 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -23,6 +23,7 @@ brotli==0.5.2 certifi==2021.5.30 # via # -r requirements.txt + # elasticsearch # requests cffi==1.14.6 # via @@ -77,7 +78,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d # via -r requirements.txt django-tastypie==0.13.2 # via -r requirements.txt -elasticsearch==6.8.2 +elasticsearch==7.13.0 # via -r requirements.txt filelock==3.3.1 # via diff --git a/requirements.in b/requirements.in index 935a98ae63..9623078a2d 100644 --- a/requirements.in +++ b/requirements.in @@ -13,7 +13,7 @@ django-extensions==1.7.9 django-forms-bootstrap>=3.0.0,<4.0.0 django-prometheus==1.0.15 django-tastypie==0.13.2 -elasticsearch>=6.0.0,<7.0.0 +elasticsearch==7.13.0 gearman3==0.2.1 gevent==1.3.6 # used by gunicorn's async workers gunicorn==19.9.0 diff --git a/requirements.txt b/requirements.txt index ea0916521e..c9b2fe5716 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,7 +15,9 @@ bagit==1.7.0 brotli==0.5.2 # via -r requirements.in certifi==2021.5.30 - # via requests + # via + # elasticsearch + # requests cffi==1.14.6 # via cryptography charset-normalizer==2.0.1 @@ -56,7 +58,7 @@ django-shibboleth-remoteuser @ git+https://github.com/Brown-University-Library/d # via -r requirements.in django-tastypie==0.13.2 # via -r requirements.in -elasticsearch==6.8.2 +elasticsearch==7.13.0 # via -r requirements.in future==0.18.2 # via metsrw diff --git a/src/archivematicaCommon/lib/elasticSearchFunctions.py b/src/archivematicaCommon/lib/elasticSearchFunctions.py index 125df49dc6..cbfe3e3ad0 100644 --- a/src/archivematicaCommon/lib/elasticSearchFunctions.py +++ b/src/archivematicaCommon/lib/elasticSearchFunctions.py @@ -221,7 +221,7 @@ def create_indexes_if_needed(client, indexes): # Call get index body functions below for each index body = getattr(sys.modules[__name__], "_get_%s_index_body" % index)() logger.info('Creating "%s" index ...', index) - client.indices.create(index, body=body, ignore=400) + client.indices.create(index, body=body, ignore=400, include_type_name=True) logger.info("Index created.") @@ -1212,7 +1212,13 @@ def search_all_results(client, body, index): if isinstance(index, list): index = ",".join(index) - results = client.search(body=body, index=index, size=MAX_QUERY_SIZE) + results = client.search( + body=body, + index=index, + size=MAX_QUERY_SIZE, + rest_total_hits_as_int=True, + track_total_hits=True, + ) if results["hits"]["total"] > MAX_QUERY_SIZE: logger.warning( @@ -1284,7 +1290,13 @@ def get_file_tags(client, uuid): """ query = {"query": {"term": {"fileuuid": uuid}}} - results = client.search(body=query, index=TRANSFER_FILES_INDEX, _source="tags") + results = client.search( + body=query, + index=TRANSFER_FILES_INDEX, + _source="tags", + rest_total_hits_as_int=True, + track_total_hits=True, + ) count = results["hits"]["total"] if count == 0: diff --git a/src/archivematicaCommon/tests/fixtures/test_delete_aip.yaml b/src/archivematicaCommon/tests/fixtures/test_delete_aip.yaml index 39fdb2bc0b..d5c6cfa503 100644 --- a/src/archivematicaCommon/tests/fixtures/test_delete_aip.yaml +++ b/src/archivematicaCommon/tests/fixtures/test_delete_aip.yaml @@ -4,7 +4,7 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET + method: POST uri: http://elasticsearch:9200/aips/_search?_source=uuid response: body: {string: !!python/unicode '{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.2876821,"hits":[{"_index":"aips","_type":"_doc","_id":"lBsZBWgBn49OAVhMXeO8","_score":0.2876821,"_source":{"uuid":"b34521a3-1c63-43dd-b901-584416f36c91"}}]}}'} @@ -30,7 +30,7 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET + method: POST uri: http://elasticsearch:9200/aips/_search?_source=uuid response: body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'} diff --git a/src/archivematicaCommon/tests/fixtures/test_delete_aip_files.yaml b/src/archivematicaCommon/tests/fixtures/test_delete_aip_files.yaml index 38f27fe248..d60b012699 100644 --- a/src/archivematicaCommon/tests/fixtures/test_delete_aip_files.yaml +++ b/src/archivematicaCommon/tests/fixtures/test_delete_aip_files.yaml @@ -4,7 +4,7 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET + method: POST uri: http://elasticsearch:9200/aipfiles/_search response: body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":2,"max_score":0.2876821,"hits":[{"_index":"aipfiles","_type":"_doc","_id":"lRsZBWgBn49OAVhMXuMC","_score":0.2876821,"_source":{"origin":"1a14043f-68ef-4bfe-a129-e2e4cdbe391b","METS":{"dmdSec":{"ns0:xmlData_dict_list":[{"@xmlns:ns1":"http://www.loc.gov/premis/v3","@xmlns:ns0":"http://www.loc.gov/METS/","@xmlns:xsi":"http://www.w3.org/2001/XMLSchema-instance","ns1:object_dict_list":[{"ns1:originalName":"20181231153024-b34521a3-1c63-43dd-b901-584416f36c91","@version":"3.0","@xsi:type":"premis:intellectualEntity","ns1:objectIdentifier_dict_list":[{"ns1:objectIdentifierType":"UUID","ns1:objectIdentifierValue":"b34521a3-1c63-43dd-b901-584416f36c91"}],"@xsi:schemaLocation":"http://www.loc.gov/premis/v3 @@ -98,7 +98,7 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET + method: POST uri: http://elasticsearch:9200/aipfiles/_search response: body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'} diff --git a/src/archivematicaCommon/tests/fixtures/test_get_tags_no_matches.yaml b/src/archivematicaCommon/tests/fixtures/test_get_tags_no_matches.yaml index 75b2cbceae..5d36e88398 100644 --- a/src/archivematicaCommon/tests/fixtures/test_get_tags_no_matches.yaml +++ b/src/archivematicaCommon/tests/fixtures/test_get_tags_no_matches.yaml @@ -4,8 +4,8 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET - uri: http://elasticsearch:9200/transferfiles/_search?_source=tags + method: POST + uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true response: body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'} headers: diff --git a/src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml b/src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml index bb84c1fdfb..c9c80136c5 100644 --- a/src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml +++ b/src/archivematicaCommon/tests/fixtures/test_set_get_tags.yaml @@ -4,8 +4,8 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET - uri: http://elasticsearch:9200/transferfiles/_search?size=10000 + method: POST + uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true response: body: {string: !!python/unicode '{"took":1,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.6931472,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.6931472,"_source":{"accessionid":"","status":"backlog","sipuuid":"17b168b6-cbba-4f43-8838-a53360238acb","tags":[],"file_extension":"jpg","relative_path":"test-17b168b6-cbba-4f43-8838-a53360238acb/objects/Landing_zone.jpg","bulk_extractor_reports":[],"origin":"1a14043f-68ef-4bfe-a129-e2e4cdbe391b","size":1.2982568740844727,"modification_date":"2018-12-11","created":1546273029.7313669,"format":[],"ingestdate":"2018-12-31","filename":"Landing_zone.jpg","fileuuid":"268421a7-a986-4fa0-95c1-54176e508210"}}]}}'} headers: @@ -30,8 +30,8 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET - uri: http://elasticsearch:9200/transferfiles/_search?_source=tags + method: POST + uri: http://elasticsearch:9200/transferfiles/_search?_source=tags&rest_total_hits_as_int=true&track_total_hits=true response: body: {string: !!python/unicode '{"took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":1,"max_score":0.47000363,"hits":[{"_index":"transferfiles","_type":"_doc","_id":"mBsZBWgBn49OAVhMh-OV","_score":0.47000363,"_source":{"tags":["test"]}}]}}'} headers: diff --git a/src/archivematicaCommon/tests/fixtures/test_set_tags_no_matches.yaml b/src/archivematicaCommon/tests/fixtures/test_set_tags_no_matches.yaml index 95a225a1fd..da827d2738 100644 --- a/src/archivematicaCommon/tests/fixtures/test_set_tags_no_matches.yaml +++ b/src/archivematicaCommon/tests/fixtures/test_set_tags_no_matches.yaml @@ -4,8 +4,8 @@ interactions: headers: connection: [keep-alive] content-type: [application/json] - method: GET - uri: http://elasticsearch:9200/transferfiles/_search?size=10000 + method: POST + uri: http://elasticsearch:9200/transferfiles/_search?rest_total_hits_as_int=true&size=10000&track_total_hits=true response: body: {string: !!python/unicode '{"took":0,"timed_out":false,"_shards":{"total":5,"successful":5,"skipped":0,"failed":0},"hits":{"total":0,"max_score":null,"hits":[]}}'} headers: diff --git a/src/dashboard/src/components/archival_storage/views.py b/src/dashboard/src/components/archival_storage/views.py index 946983cca4..f09e143e64 100644 --- a/src/dashboard/src/components/archival_storage/views.py +++ b/src/dashboard/src/components/archival_storage/views.py @@ -360,6 +360,8 @@ def search(request): size=page_size, sort=order_by + ":" + sort_direction if order_by else "", _source=source, + rest_total_hits_as_int=True, + track_total_hits=True, ) if file_mode: diff --git a/src/dashboard/src/components/backlog/views.py b/src/dashboard/src/components/backlog/views.py index e082e68288..87c1d6d731 100644 --- a/src/dashboard/src/components/backlog/views.py +++ b/src/dashboard/src/components/backlog/views.py @@ -202,6 +202,8 @@ def search(request): size=page_size, sort=order_by + ":" + sort_direction if order_by else "", _source=source, + rest_total_hits_as_int=True, + track_total_hits=True, ) hit_count = hits["hits"]["total"]