Skip to content

Commit

Permalink
fix: entity count
Browse files Browse the repository at this point in the history
  • Loading branch information
ClemDoum committed Jan 3, 2024
1 parent 440b272 commit 45a5a38
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 12 deletions.
4 changes: 2 additions & 2 deletions neo4j-app/neo4j_app/core/neo4j/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
migration_v_0_3_0_tx,
migration_v_0_4_0_tx,
migration_v_0_5_0_tx,
migration_v_0_6_0_tx,
migration_v_0_6_0,
)

V_0_1_0 = Migration(
Expand Down Expand Up @@ -45,7 +45,7 @@
V_0_6_0 = Migration(
version="0.6.0",
label="Add mention counts to named entity document relationships",
migration_fn=migration_v_0_6_0_tx,
migration_fn=migration_v_0_6_0,
)
MIGRATIONS = [V_0_1_0, V_0_2_0, V_0_3_0, V_0_4_0, V_0_5_0, V_0_6_0]

Expand Down
16 changes: 12 additions & 4 deletions neo4j-app/neo4j_app/core/neo4j/migrations/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
MIGRATION_VERSION,
NE_APPEARS_IN_DOC,
NE_ID,
NE_IDS,
NE_MENTION_COUNT,
NE_MENTION_NORM,
NE_NODE,
NE_OFFSETS,
PROJECT_NAME,
PROJECT_NODE,
TASK_CREATED_AT,
Expand Down Expand Up @@ -53,8 +53,13 @@ async def migration_v_0_5_0_tx(tx: neo4j.AsyncTransaction):
await _create_email_user_and_domain_indexes(tx)


async def migration_v_0_6_0_tx(tx: neo4j.AsyncTransaction):
await _add_mention_count_to_named_entity_relationship(tx)
async def migration_v_0_6_0(sess: neo4j.AsyncSession):
query = f"""MATCH (:{NE_NODE})-[rel:{NE_APPEARS_IN_DOC}]->(:{DOC_NODE})
CALL {{
WITH rel
SET rel.{NE_MENTION_COUNT} = size(rel.{NE_IDS})
}} IN TRANSACTIONS OF 10000 ROWS"""
await sess.run(query)


async def _create_document_and_ne_id_unique_constraint_tx(tx: neo4j.AsyncTransaction):
Expand Down Expand Up @@ -162,5 +167,8 @@ async def _create_email_user_and_domain_indexes(tx: neo4j.AsyncTransaction):

async def _add_mention_count_to_named_entity_relationship(tx: neo4j.AsyncTransaction):
query = f"""MATCH (:{NE_NODE})-[rel:{NE_APPEARS_IN_DOC}]->(:{DOC_NODE})
SET rel.{NE_MENTION_COUNT} = size(rel.{NE_OFFSETS})"""
CALL {{
WITH rel
SET rel.{NE_MENTION_COUNT} = size(rel.{NE_IDS})
}} IN TRANSACTIONS OF 10000 ROWS"""
await tx.run(query)
2 changes: 1 addition & 1 deletion neo4j-app/neo4j_app/core/neo4j/named_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ async def import_named_entity_rows(
rel.{NE_EXTRACTORS} = apoc.coll.toSet(\
rel.{NE_EXTRACTORS} + row.{NE_EXTRACTOR}),
rel.{NE_OFFSETS} = apoc.coll.toSet(rel.{NE_OFFSETS} + row.{NE_OFFSETS})
SET rel.{NE_MENTION_COUNT} = size(rel.{NE_OFFSETS})
SET rel.{NE_MENTION_COUNT} = size(rel.{NE_IDS})
WITH mention, doc, row
CALL apoc.do.case(
[
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
migration_v_0_3_0_tx,
migration_v_0_4_0_tx,
migration_v_0_5_0_tx,
migration_v_0_6_0_tx,
migration_v_0_6_0,
)


Expand Down Expand Up @@ -104,11 +104,11 @@ async def test_migration_v_0_5_0_tx(neo4j_test_session: neo4j.AsyncSession):

async def test_migration_v_0_6_0_tx(neo4j_test_session: neo4j.AsyncSession):
# Given
create_path = """CREATE (:NamedEntity)-[:APPEARS_IN {offsets: [0, 1]}
create_path = """CREATE (:NamedEntity)-[:APPEARS_IN {mentionIds: ['id-0', 'id-1']}
]->(:Document)"""
await neo4j_test_session.run(create_path)
# When
await neo4j_test_session.execute_write(migration_v_0_6_0_tx)
await migration_v_0_6_0(neo4j_test_session)
# Then
match_path = "MATCH (:NamedEntity)-[rel:APPEARS_IN]->(:Document) RETURN rel"
res = await neo4j_test_session.run(match_path)
Expand Down
10 changes: 8 additions & 2 deletions neo4j-app/neo4j_app/tests/core/neo4j/test_name_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,9 @@ async def test_import_named_entities_should_update_named_entity(
transaction_batch_size = 3
ents = list(make_named_entities(n=num_ents))
query = """
CREATE (n:NamedEntity {id: 'named-entity-0', offsets: [1, 2], documentId: 'doc-0'})
CREATE (n:NamedEntity {
id: 'named-entity-0', mentionIds: ['id-0', 'id-1'], documentId: 'doc-0'
})
"""
await neo4j_test_session.run(query)

Expand All @@ -104,7 +106,11 @@ async def test_import_named_entities_should_update_named_entity(
res = await neo4j_test_session.run(query)
ent = await res.single()
ent = dict(ent["ent"])
expected_ent = {"id": "named-entity-0", "offsets": [1, 2], "documentId": "doc-0"}
expected_ent = {
"id": "named-entity-0",
"mentionIds": ["id-0", "id-1"],
"documentId": "doc-0",
}
assert ent == expected_ent


Expand Down

0 comments on commit 45a5a38

Please sign in to comment.