diff --git a/neo4j-app/neo4j_app/app/tasks.py b/neo4j-app/neo4j_app/app/tasks.py index 169d7c6e..3058cbb2 100644 --- a/neo4j-app/neo4j_app/app/tasks.py +++ b/neo4j-app/neo4j_app/app/tasks.py @@ -31,14 +31,14 @@ def tasks_router() -> APIRouter: @router.post("/tasks", response_model=Task) async def _create_task(project: str, job: TaskJob) -> Response: - task_task_manager = lifespan_task_manager() + task_manager = lifespan_task_manager() event_publisher = lifespan_event_publisher() task_id = job.task_id if task_id is None: task_id = job.generate_task_id() task = job.to_task(task_id=task_id) try: - await task_task_manager.enqueue(task, project) + await task_manager.enqueue(task, project) except TaskAlreadyExists: return Response(task.id, status_code=200) except TaskQueueIsFull as e: @@ -55,9 +55,9 @@ async def _create_task(project: str, job: TaskJob) -> Response: @router.post("/tasks/{task_id}/cancel", response_model=Task) async def _cancel_task(project: str, task_id: str) -> Task: - task_task_manager = lifespan_task_manager() + task_manager = lifespan_task_manager() try: - cancelled = await task_task_manager.cancel(task_id=task_id, project=project) + cancelled = await task_manager.cancel(task_id=task_id, project=project) except UnknownTask as e: raise HTTPException(status_code=404, detail=e.args[0]) from e return cancelled diff --git a/neo4j-app/neo4j_app/constants.py b/neo4j-app/neo4j_app/constants.py index 085b0de8..4a04b13f 100644 --- a/neo4j-app/neo4j_app/constants.py +++ b/neo4j-app/neo4j_app/constants.py @@ -6,10 +6,13 @@ DOC_NODE = "Document" DOC_CONTENT_LENGTH = "contentLength" DOC_CONTENT_TYPE = "contentType" +DOC_CREATED_AT = "createdAt" DOC_DIRNAME = "dirname" DOC_ID = "id" DOC_ID_CSV = f"ID({DOC_NODE})" DOC_EXTRACTION_DATE = "extractionDate" +DOC_METADATA = "metadata" +DOC_MODIFIED_AT = "modifiedAt" DOC_PATH = "path" DOC_URL_SUFFIX = "urlSuffix" DOC_ROOT_ID = "rootDocument" @@ -20,12 +23,30 @@ DOC_CONTENT_TYPE: {}, DOC_CONTENT_LENGTH: {NEO4J_CSV_COL: "LONG"}, DOC_EXTRACTION_DATE: {NEO4J_CSV_COL: "DATETIME"}, + DOC_METADATA: {}, DOC_PATH: {}, DOC_URL_SUFFIX: {}, } DOC_ES_SOURCES = list(DOC_COLUMNS) + ["join", DOC_ROOT_ID] +# Order matters here, we're taking the cdterms create in priority to be consistent +# with datashare-api which sets the creationDate as from tika_metadata_dcterms_created +# we fall back to other metadata if this one is missing +DOC_CREATED_AT_META = [ + "tika_metadata_dcterms_created_iso8601", + "tika_metadata_creation_date_iso8601", + "tika_metadata_date_iso8601", +] +DOC_MODIFIED_AT_META = [ + "tika_metadata_dcterms_modified_iso8601", + "tika_metadata_last_modified_iso8601", + "tika_metadata_modified_iso8601", + "tika_metadata_last_save_date_iso8601", + "tika_metadata_pdf_docinfo_modified_iso8601", + "tika_metadata_date_iso8601", +] + PROJECT_RUNS_MIGRATION = "_RUNS" PROJECT_NAME = "name" PROJECT_NODE = "_Project" diff --git a/neo4j-app/neo4j_app/core/elasticsearch/to_neo4j.py b/neo4j-app/neo4j_app/core/elasticsearch/to_neo4j.py index 62c04ecf..bd32f49e 100644 --- a/neo4j-app/neo4j_app/core/elasticsearch/to_neo4j.py +++ b/neo4j-app/neo4j_app/core/elasticsearch/to_neo4j.py @@ -1,9 +1,14 @@ import hashlib -from typing import Dict, List, Optional, TextIO +from typing import Any, Dict, List, Optional, TextIO from neo4j_app.constants import ( DOC_COLUMNS, + DOC_CREATED_AT, + DOC_CREATED_AT_META, DOC_ID, + DOC_METADATA, + DOC_MODIFIED_AT, + DOC_MODIFIED_AT_META, DOC_NODE, DOC_ROOT_ID, DOC_URL_SUFFIX, @@ -40,7 +45,7 @@ _DS_DOC_URL = "ds/" -def es_to_neo4j_doc_row(document_hit: Dict) -> List[Dict[str, str]]: +def es_to_neo4j_doc_row(document_hit: Dict) -> List[Dict[str, Any]]: doc_id = document_hit["_id"] doc = {DOC_ID: doc_id} hit_source = document_hit[SOURCE] @@ -55,11 +60,23 @@ def es_to_neo4j_doc_row(document_hit: Dict) -> List[Dict[str, str]]: return [doc] +def _coalesce(item: Dict[str, Any], columns: List[str]) -> Optional[Any]: + for c in columns: + value = item.get(c) + if value is not None: + return value + return None + + def es_to_neo4j_doc_csv( document_hit: Dict, *, prop_to_col_header: Dict[str, str] ) -> List[Dict[str, str]]: doc = es_to_neo4j_doc_row(document_hit)[0] doc.pop(DOC_ROOT_ID, None) + metadata = doc.pop(DOC_METADATA, None) + if metadata is not None: + doc[DOC_CREATED_AT] = _coalesce(metadata, DOC_CREATED_AT_META) + doc[DOC_MODIFIED_AT] = _coalesce(metadata, DOC_MODIFIED_AT_META) doc = {prop_to_col_header[prop]: value for prop, value in doc.items()} doc[NEO4J_CSV_LABEL] = DOC_NODE return [doc] diff --git a/neo4j-app/neo4j_app/core/imports.py b/neo4j-app/neo4j_app/core/imports.py index 6123b517..f1931f64 100644 --- a/neo4j-app/neo4j_app/core/imports.py +++ b/neo4j-app/neo4j_app/core/imports.py @@ -31,9 +31,12 @@ from neo4j_app import ROOT_DIR from neo4j_app.constants import ( DOC_COLUMNS, + DOC_CREATED_AT, DOC_ES_SOURCES, DOC_ID, DOC_ID_CSV, + DOC_METADATA, + DOC_MODIFIED_AT, DOC_NODE, DOC_ROOT_TYPE, EMAIL_HEADER, @@ -375,6 +378,18 @@ async def to_neo4j_csvs( _DOC_ROOT_REL_HEADER = [f"{NEO4J_CSV_START_ID}({DOC_NODE})", _DOC_REL_END_CSV_COL] +def _doc_nodes_header_and_mapping() -> Tuple[List[str], Dict[str, str]]: + doc_nodes_header, doc_nodes_mapping = _make_header_and_mapping(DOC_COLUMNS) + doc_nodes_header = [h for h in doc_nodes_header if h != DOC_METADATA] + doc_nodes_mapping.pop(DOC_METADATA) + doc_created_at_h = f"{DOC_CREATED_AT}:DATETIME" + doc_modified_at_h = f"{DOC_MODIFIED_AT}:DATETIME" + doc_nodes_header.extend([doc_created_at_h, doc_modified_at_h]) + doc_nodes_mapping[DOC_CREATED_AT] = doc_modified_at_h + doc_nodes_mapping[DOC_MODIFIED_AT] = doc_created_at_h + return doc_nodes_header, doc_nodes_mapping + + async def _to_neo4j_doc_csvs( *, export_dir: Path, @@ -388,7 +403,7 @@ async def _to_neo4j_doc_csvs( ) -> Tuple[NodeCSVs, RelationshipCSVs]: doc_nodes_path = export_dir.joinpath("docs.csv") doc_nodes_header_path = export_dir.joinpath("docs-header.csv") - doc_nodes_header, doc_nodes_mapping = _make_header_and_mapping(DOC_COLUMNS) + doc_nodes_header, doc_nodes_mapping = _doc_nodes_header_and_mapping() doc_nodes_header.append(NEO4J_CSV_LABEL) with doc_nodes_header_path.open("w") as f: get_neo4j_csv_writer(f, doc_nodes_header).writeheader() diff --git a/neo4j-app/neo4j_app/core/neo4j/__init__.py b/neo4j-app/neo4j_app/core/neo4j/__init__.py index 0e5cff09..cc2757f7 100644 --- a/neo4j-app/neo4j_app/core/neo4j/__init__.py +++ b/neo4j-app/neo4j_app/core/neo4j/__init__.py @@ -15,6 +15,7 @@ migration_v_0_4_0_tx, migration_v_0_5_0_tx, migration_v_0_6_0_tx, + migration_v_0_7_0_tx, ) V_0_1_0 = Migration( @@ -47,7 +48,12 @@ label="Add mention counts to named entity document relationships", migration_fn=migration_v_0_6_0_tx, ) -MIGRATIONS = [V_0_1_0, V_0_2_0, V_0_3_0, V_0_4_0, V_0_5_0, V_0_6_0] +V_0_7_0 = Migration( + version="0.7.0", + label="Create document modified and created at indexes", + migration_fn=migration_v_0_7_0_tx, +) +MIGRATIONS = [V_0_1_0, V_0_2_0, V_0_3_0, V_0_4_0, V_0_5_0, V_0_6_0, V_0_7_0] def get_neo4j_csv_reader( diff --git a/neo4j-app/neo4j_app/core/neo4j/documents.py b/neo4j-app/neo4j_app/core/neo4j/documents.py index a66c36ca..49eb633a 100644 --- a/neo4j-app/neo4j_app/core/neo4j/documents.py +++ b/neo4j-app/neo4j_app/core/neo4j/documents.py @@ -6,9 +6,13 @@ from neo4j_app.constants import ( DOC_CONTENT_LENGTH, DOC_CONTENT_TYPE, + DOC_CREATED_AT, + DOC_CREATED_AT_META, DOC_DIRNAME, DOC_EXTRACTION_DATE, DOC_ID, + DOC_MODIFIED_AT, + DOC_MODIFIED_AT_META, DOC_NODE, DOC_PATH, DOC_ROOT_ID, @@ -20,6 +24,15 @@ logger = logging.getLogger(__name__) +_DOC_CREATED_AT_META = ["metadata." + c for c in DOC_CREATED_AT_META] +_DOC_MODIFIED_AT_META = ["metadata." + c for c in DOC_MODIFIED_AT_META] + + +def _coalesce(*, variable: str, attributes: List[str]) -> str: + values = ", ".join(f"{variable}.{a}" for a in attributes) + return f"coalesce({values})" + + async def import_document_rows( neo4j_session: neo4j.AsyncSession, records: List[Dict], @@ -37,7 +50,11 @@ async def import_document_rows( doc.{DOC_EXTRACTION_DATE} = datetime(row.{DOC_EXTRACTION_DATE}), doc.{DOC_DIRNAME} = row.{DOC_DIRNAME}, doc.{DOC_PATH} = row.{DOC_PATH}, - doc.{DOC_URL_SUFFIX} = row.{DOC_URL_SUFFIX} + doc.{DOC_URL_SUFFIX} = row.{DOC_URL_SUFFIX}, + doc.{DOC_CREATED_AT} = datetime({ + _coalesce(variable="row", attributes=_DOC_CREATED_AT_META)}), + doc.{DOC_MODIFIED_AT} = datetime({ + _coalesce(variable="row", attributes=_DOC_MODIFIED_AT_META)}) WITH doc, row WHERE doc.{DOC_ID} = row.{DOC_ID} and row.{DOC_ROOT_ID} IS NOT NULL MERGE (root:{DOC_NODE} {{{DOC_ID}: row.{DOC_ROOT_ID}}}) diff --git a/neo4j-app/neo4j_app/core/neo4j/migrations/migrations.py b/neo4j-app/neo4j_app/core/neo4j/migrations/migrations.py index 201d2c03..29726db7 100644 --- a/neo4j-app/neo4j_app/core/neo4j/migrations/migrations.py +++ b/neo4j-app/neo4j_app/core/neo4j/migrations/migrations.py @@ -2,7 +2,9 @@ from neo4j_app.constants import ( DOC_CONTENT_TYPE, + DOC_CREATED_AT, DOC_ID, + DOC_MODIFIED_AT, DOC_NODE, DOC_PATH, EMAIL_DOMAIN, @@ -57,6 +59,10 @@ async def migration_v_0_6_0_tx(tx: neo4j.AsyncTransaction): await _add_mention_count_to_named_entity_relationship(tx) +async def migration_v_0_7_0_tx(tx: neo4j.AsyncTransaction): + await _create_document_created_and_modified_at_indexes(tx) + + async def _create_document_and_ne_id_unique_constraint_tx(tx: neo4j.AsyncTransaction): doc_query = f"""CREATE CONSTRAINT constraint_document_unique_id IF NOT EXISTS @@ -164,3 +170,14 @@ async def _add_mention_count_to_named_entity_relationship(tx: neo4j.AsyncTransac query = f"""MATCH (:{NE_NODE})-[rel:{NE_APPEARS_IN_DOC}]->(:{DOC_NODE}) SET rel.{NE_MENTION_COUNT} = size(rel.{NE_OFFSETS})""" await tx.run(query) + + +async def _create_document_created_and_modified_at_indexes(tx: neo4j.AsyncTransaction): + created_at_index = f"""CREATE INDEX index_document_created_at IF NOT EXISTS +FOR (doc:{DOC_NODE}) +ON (doc.{DOC_CREATED_AT})""" + await tx.run(created_at_index) + modified_at_index = f"""CREATE INDEX index_document_modified_at IF NOT EXISTS +FOR (doc:{DOC_NODE}) +ON (doc.{DOC_MODIFIED_AT})""" + await tx.run(modified_at_index) diff --git a/neo4j-app/neo4j_app/tests/app/test_tasks.py b/neo4j-app/neo4j_app/tests/app/test_tasks.py index 52abb54b..93e8de52 100644 --- a/neo4j-app/neo4j_app/tests/app/test_tasks.py +++ b/neo4j-app/neo4j_app/tests/app/test_tasks.py @@ -176,7 +176,7 @@ def test_create_task_should_return_429_when_too_many_tasks( # Then assert res_0.status_code == 201, res_0.json() - # This one is queued or rejected depending if the first one is processed or still + # This one is queued or rejected depending on if the first one is processed or still # in the queue assert res_1.status_code in [201, 429], res_1.json() assert res_2.status_code == 429, res_1.json() diff --git a/neo4j-app/neo4j_app/tests/conftest.py b/neo4j-app/neo4j_app/tests/conftest.py index 81ed7588..b27b5b0f 100644 --- a/neo4j-app/neo4j_app/tests/conftest.py +++ b/neo4j-app/neo4j_app/tests/conftest.py @@ -351,10 +351,10 @@ async def neo4j_test_session( return session -def make_docs(n: int) -> Generator[Dict, None, None]: +def make_docs(n: int, add_dates: bool = False) -> Generator[Dict, None, None]: random.seed(a=777) for i in random.sample(list(range(n)), k=n): - yield { + doc = { "_index": TEST_PROJECT, "_id": f"doc-{i}", "_source": { @@ -368,6 +368,12 @@ def make_docs(n: int) -> Generator[Dict, None, None]: "join": {"name": "Document"}, }, } + if add_dates: + doc["_source"]["metadata"] = { + "tika_metadata_dcterms_created_iso8601": "2022-04-08T11:41:34Z", + "tika_metadata_modified_iso8601": "2022-04-08T11:41:34Z", + } + yield doc def make_named_entities(n: int) -> Generator[Dict, None, None]: @@ -393,8 +399,10 @@ def make_named_entities(n: int) -> Generator[Dict, None, None]: } -def index_docs_ops(*, index_name: str, n: int) -> Generator[Dict, None, None]: - for doc in make_docs(n): +def index_docs_ops( + *, index_name: str, n: int, add_dates: bool = False +) -> Generator[Dict, None, None]: + for doc in make_docs(n, add_dates): op = { "_op_type": "index", "_index": index_name, @@ -426,9 +434,9 @@ def index_named_entities_ops(*, index_name: str, n: int) -> Generator[Dict, None async def index_docs( - client: ESClient, *, n: int, index_name: str = TEST_PROJECT + client: ESClient, *, n: int, index_name: str = TEST_PROJECT, add_dates: bool = False ) -> AsyncGenerator[Dict, None]: - ops = index_docs_ops(index_name=index_name, n=n) + ops = index_docs_ops(index_name=index_name, n=n, add_dates=add_dates) # Let's wait to make this operation visible to the search refresh = "wait_for" async for res in async_streaming_bulk(client, actions=ops, refresh=refresh): diff --git a/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrate.py b/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrate.py index 83a9940f..115166d3 100644 --- a/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrate.py +++ b/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrate.py @@ -376,7 +376,7 @@ async def test_init_project_should_raise_for_reserved_name( ) -@pytest.mark.regression("131") +@pytest.mark.pull("131") async def test_migrate_project_db_schema_should_read_migrations_from_registry( neo4j_test_driver_session: neo4j.AsyncDriver, monkeypatch, diff --git a/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrations.py b/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrations.py index cc22d836..fb3d77a5 100644 --- a/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrations.py +++ b/neo4j-app/neo4j_app/tests/core/neo4j/migrations/test_migrations.py @@ -7,6 +7,7 @@ migration_v_0_4_0_tx, migration_v_0_5_0_tx, migration_v_0_6_0_tx, + migration_v_0_7_0_tx, ) @@ -116,3 +117,20 @@ async def test_migration_v_0_6_0_tx(neo4j_test_session: neo4j.AsyncSession): rel = res["rel"] mention_counts = rel.get("mentionCount") assert mention_counts == 2 + + +async def test_migration_v_0_7_0_tx(neo4j_test_session: neo4j.AsyncSession): + # When + await neo4j_test_session.execute_write(migration_v_0_7_0_tx) + + # Then + indexes_res = await neo4j_test_session.run("SHOW INDEXES") + existing_indexes = set() + async for rec in indexes_res: + existing_indexes.add(rec["name"]) + expected_indexes = [ + "index_document_created_at", + "index_document_modified_at", + ] + for index in expected_indexes: + assert index in expected_indexes diff --git a/neo4j-app/neo4j_app/tests/core/neo4j/test_documents.py b/neo4j-app/neo4j_app/tests/core/neo4j/test_documents.py index c7cc1af0..84c96bb8 100644 --- a/neo4j-app/neo4j_app/tests/core/neo4j/test_documents.py +++ b/neo4j-app/neo4j_app/tests/core/neo4j/test_documents.py @@ -1,4 +1,6 @@ import io +from datetime import datetime, timedelta, timezone +from typing import Dict, Optional import neo4j import pytest @@ -131,3 +133,132 @@ async def test_import_documents_should_update_document( assert isinstance(doc_property, DateTime) else: assert doc_property == v + + +_datetime_0 = datetime.utcnow().replace(microsecond=0) +_datetime_1 = _datetime_0 + timedelta(0, 1) +_datetime_2 = _datetime_1 + timedelta(0, 1) +_datetime_3 = _datetime_2 + timedelta(0, 1) +_datetime_4 = _datetime_3 + timedelta(0, 1) + + +def _tika_iso(dt: datetime) -> str: + return dt.replace(microsecond=0).isoformat() + "Z" + + +@pytest.mark.parametrize( + "metadata,expected_created_at", + [ + (None, None), + (dict(), None), + ( + { + "tika_metadata_date_iso8601": _tika_iso(_datetime_0), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ( + { + "tika_metadata_creation_date_iso8601": _tika_iso(_datetime_0), + "tika_metadata_date_iso8601": _tika_iso(_datetime_1), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ( + { + "tika_metadata_dcterms_created_iso8601": _tika_iso(_datetime_0), + "tika_metadata_creation_date_iso8601": _tika_iso(_datetime_1), + "tika_metadata_date_iso8601": _tika_iso(_datetime_2), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ], +) +async def test_import_documents_should_add_created_at( + neo4j_test_session: neo4j.AsyncSession, + metadata: Optional[Dict], + expected_created_at: Optional[datetime], +): + # Given + transaction_batch_size = 10 + docs = list(make_docs(n=1)) + if metadata is not None: + for d in docs: + d["_source"].update({"metadata": metadata}) + + # When + records = [row for doc in docs for row in es_to_neo4j_doc_row(doc)] + await import_document_rows( + neo4j_session=neo4j_test_session, + records=records, + transaction_batch_size=transaction_batch_size, + ) + + # Then + query = "MATCH (doc:Document) RETURN doc" + res = await neo4j_test_session.run(query) + doc = await res.single(strict=True) + doc = doc["doc"] + created_at = doc.get("createdAt") + if created_at is not None: + created_at = created_at.to_native() + assert created_at == expected_created_at + + +@pytest.mark.parametrize( + "metadata,expected_modified_at", + [ + (None, None), + (dict(), None), + ( + { + "tika_metadata_date_iso8601": _tika_iso(_datetime_0), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ( + { + "tika_metadata_modified_iso8601": _tika_iso(_datetime_0), + "tika_metadata_date_iso8601": _tika_iso(_datetime_1), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ( + { + "tika_metadata_dcterms_modified_iso8601": _tika_iso(_datetime_0), + "tika_metadata_modified_iso8601": _tika_iso(_datetime_1), + "tika_metadata_date_iso8601": _tika_iso(_datetime_2), + }, + _datetime_0.replace(tzinfo=timezone.utc), + ), + ], +) +async def test_import_documents_should_add_modified_at( + neo4j_test_session: neo4j.AsyncSession, + metadata: Optional[Dict], + expected_modified_at: Optional[datetime], +): + # Given + transaction_batch_size = 10 + docs = list(make_docs(n=1)) + if metadata is not None: + for d in docs: + d["_source"].update({"metadata": metadata}) + + # When + records = [row for doc in docs for row in es_to_neo4j_doc_row(doc)] + await import_document_rows( + neo4j_session=neo4j_test_session, + records=records, + transaction_batch_size=transaction_batch_size, + ) + + # Then + query = "MATCH (doc:Document) RETURN doc" + res = await neo4j_test_session.run(query) + doc = await res.single(strict=True) + doc = doc["doc"] + modified_at = doc.get("modifiedAt") + if modified_at is not None: + modified_at = modified_at.to_native() + assert modified_at == expected_modified_at diff --git a/neo4j-app/neo4j_app/tests/core/test_imports.py b/neo4j-app/neo4j_app/tests/core/test_imports.py index cdcbb781..ba6ad567 100644 --- a/neo4j-app/neo4j_app/tests/core/test_imports.py +++ b/neo4j-app/neo4j_app/tests/core/test_imports.py @@ -91,7 +91,7 @@ async def _populate_es( index_name = TEST_PROJECT n = 20 # Index some Documents - async for _ in index_docs(es_client, n=n): + async for _ in index_docs(es_client, n=n, add_dates=True): pass # Index entities async for _ in index_named_entities(es_client, n=n): @@ -515,19 +515,19 @@ async def test_to_neo4j_csvs( expected_doc_header = """\ id:ID(Document),dirname,contentType,contentLength:LONG,extractionDate:DATETIME,path,\ -urlSuffix,:LABEL +urlSuffix,createdAt:DATETIME,modifiedAt:DATETIME,:LABEL """ doc_nodes_header_path = archive_dir / doc_nodes_export.header_path assert_content(doc_nodes_header_path, expected_doc_header) expected_doc_nodes = """doc-0,dirname-0,content-type-0,0,2023-02-06T13:48:22.3866,\ -dirname-0,ds/test_project/doc-0/doc-0,Document +dirname-0,ds/test_project/doc-0/doc-0,2022-04-08T11:41:34Z,2022-04-08T11:41:34Z,Document doc-1,dirname-1,content-type-1,1,2023-02-06T13:48:22.3866,dirname-1,\ -ds/test_project/doc-1/doc-0,Document +ds/test_project/doc-1/doc-0,2022-04-08T11:41:34Z,2022-04-08T11:41:34Z,Document doc-3,dirname-3,content-type-3,9,2023-02-06T13:48:22.3866,dirname-3,\ -ds/test_project/doc-3/doc-2,Document +ds/test_project/doc-3/doc-2,2022-04-08T11:41:34Z,2022-04-08T11:41:34Z,Document doc-6,dirname-6,content-type-6,36,2023-02-06T13:48:22.3866,dirname-6,\ -ds/test_project/doc-6/doc-5,Document +ds/test_project/doc-6/doc-5,2022-04-08T11:41:34Z,2022-04-08T11:41:34Z,Document """ doc_root_rels_path = archive_dir / doc_nodes_export.node_paths[0] assert_content(doc_root_rels_path, expected_doc_nodes, sort_lines=True) diff --git a/neo4j-app/pyproject.toml b/neo4j-app/pyproject.toml index 5adef6d4..dc8e1d1a 100644 --- a/neo4j-app/pyproject.toml +++ b/neo4j-app/pyproject.toml @@ -15,6 +15,9 @@ target = "py39" [tool.pytest.ini_options] asyncio_mode = "auto" +markers = [ + "pull", +] [tool.poetry.dependencies] python = "^3.9"