Skip to content

Commit

Permalink
Fix Cypher query returns duplicate authors
Browse files Browse the repository at this point in the history
  • Loading branch information
Brad-Edwards committed May 24, 2024
1 parent ad56f44 commit dbe99a9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 7 deletions.
6 changes: 3 additions & 3 deletions infra/core/dev.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# **********************************************************

app_name = "atomiklabs"
app_version = "0.3.6-alpha"
app_version = "0.3.7-alpha"
availability_zones = ["us-east-1a", "us-east-1b", "us-east-1c"]
aws_region = "us-east-1"
backend_dynamodb_table = "terraform-state-locks"
Expand Down Expand Up @@ -63,9 +63,9 @@ arxiv_sets = ["cs"]
default_lambda_runtime = "python3.10"
pods_prefix = "pods"

create_pod_task_version = "0.1.0"
create_pod_task_version = "0.1.1"
fetch_from_arxiv_task_version = "0.1.0"
most_recent_research_records_version = "0.0.2"
parse_summaries_task_version = "0.1.0"
persist_summaries_task_version = "0.1.0"
persist_summaries_task_version = "0.1.1"
save_summaries_to_datalake_task_version = "0.0.1"
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
PUBLISHES,
RECORDS_PREFIX,
RETRIEVAL_ERRORS,
SUMMARIZES,
)
from shared.utils.utils import get_config

Expand Down Expand Up @@ -73,10 +74,31 @@ def run(
logger.info("No summaries for date", set=arxiv_set, category=category, date=pod_date)
continue
pod_summaries = get_pod_summaries(context, config, summaries)
scripts = write_pod_script(config, pod_summaries, arxiv_set, category, pod_date)
scripts = write_pod_script(
config=config,
pod_summaries=pod_summaries,
arxiv_set=arxiv_set,
category=category,
episode_date=pod_date,
)
for key, script, part_record_ids in scripts:
audio_key = create_audio(config, arxiv_set, category, pod_date, script, key)
create_pod_node(config, arxiv_set, category, pod_date, key, audio_key, part_record_ids)
audio_key = create_audio(
config=config,
arxiv_set=arxiv_set,
category=category,
episode_date=pod_date,
script_text=script,
key=key,
)
create_pod_node(
config=config,
arxiv_set=arxiv_set,
category=category,
episode_date=pod_date,
key=key,
audio_key=audio_key,
part_record_ids=part_record_ids,
)
except Exception as e:
logger.error("Error creating pod", set=arxiv_set, category=category, date=pod_date, error=e)
continue
Expand Down Expand Up @@ -123,7 +145,7 @@ def get_summaries(config: dict, arxiv_set: str, category: str, episode_date: dat
query = (
f"MATCH (s:ArxivSet {{code: $arxiv_set}}) "
f"-[:{CATEGORIZED_BY}]->(c:ArxivCategory {{code: $category}}) "
f"<-[:{PRIMARILY_CATEGORIZED_BY}]-(a:ArxivRecord)--(b:Abstract) "
f"<-[:{PRIMARILY_CATEGORIZED_BY}]-(a:ArxivRecord)<-[:{SUMMARIZES}]-(b:Abstract) "
"MATCH (a)-[:AUTHORED_BY]->(author:Author)"
"WHERE a.date = $date "
"RETURN {record: a, abstract: b, authors: collect({first_name: author.first_name, last_name: author.last_name})} AS result"
Expand Down

0 comments on commit dbe99a9

Please sign in to comment.