Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix param reversal when creating Authors #413

Merged
merged 7 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## [0.3.7-alpha] - 2024-05-24

### Fixed

- Author first and last names reversed
- Podcast scripts double author names

## [0.3.6-alpha] - 2024-05-24

### Added
Expand Down
6 changes: 3 additions & 3 deletions infra/core/dev.tfvars
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# **********************************************************

app_name = "atomiklabs"
app_version = "0.3.6-alpha"
app_version = "0.3.7-alpha"
availability_zones = ["us-east-1a", "us-east-1b", "us-east-1c"]
aws_region = "us-east-1"
backend_dynamodb_table = "terraform-state-locks"
Expand Down Expand Up @@ -63,9 +63,9 @@ arxiv_sets = ["cs"]
default_lambda_runtime = "python3.10"
pods_prefix = "pods"

create_pod_task_version = "0.1.0"
create_pod_task_version = "0.1.1"
fetch_from_arxiv_task_version = "0.1.0"
most_recent_research_records_version = "0.0.2"
parse_summaries_task_version = "0.1.0"
persist_summaries_task_version = "0.1.0"
persist_summaries_task_version = "0.1.1"
save_summaries_to_datalake_task_version = "0.0.1"
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ def store_records(records: List[Dict], bucket_name: str, key: str, config: dict,
malformed_records.append(record)
try:
for author in record.get("authors", []):
author_node = Author(driver, author.get(FIRST_NAME), author.get(LAST_NAME))
author_node = Author(driver, last_name=author.get(LAST_NAME), first_name=author.get(FIRST_NAME))
author_node.create()
arxiv_record.relate(
driver,
Expand Down
2 changes: 1 addition & 1 deletion orchestration/airflow/dags/publishing/create_pods_dag.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import os
from logging.config import dictConfig

import publishing.tasks.create_pod as cpt
import publishing.tasks.create_pod_task as cpt
import structlog
from airflow import DAG
from airflow.operators.python import PythonOperator
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
PUBLISHES,
RECORDS_PREFIX,
RETRIEVAL_ERRORS,
SUMMARIZES,
)
from shared.utils.utils import get_config

Expand Down Expand Up @@ -73,10 +74,31 @@ def run(
logger.info("No summaries for date", set=arxiv_set, category=category, date=pod_date)
continue
pod_summaries = get_pod_summaries(context, config, summaries)
scripts = write_pod_script(config, pod_summaries, arxiv_set, category, pod_date)
scripts = write_pod_script(
config=config,
pod_summaries=pod_summaries,
arxiv_set=arxiv_set,
category=category,
episode_date=pod_date,
)
for key, script, part_record_ids in scripts:
audio_key = create_audio(config, arxiv_set, category, pod_date, script, key)
create_pod_node(config, arxiv_set, category, pod_date, key, audio_key, part_record_ids)
audio_key = create_audio(
config=config,
arxiv_set=arxiv_set,
category=category,
episode_date=pod_date,
script_text=script,
key=key,
)
create_pod_node(
config=config,
arxiv_set=arxiv_set,
category=category,
pod_date=pod_date,
script_key=key,
audio_key=audio_key,
part_record_ids=part_record_ids,
)
except Exception as e:
logger.error("Error creating pod", set=arxiv_set, category=category, date=pod_date, error=e)
continue
Expand All @@ -95,7 +117,7 @@ def next_pod_dates(config: dict, arxiv_set: str, category: str) -> List[datetime
f"MATCH (s:ArxivSet {{code: $arxiv_set}}) "
f"-[:{CATEGORIZED_BY}]->(c:ArxivCategory {{code: $category}}) "
f"-[:{CATEGORIZES}]->(p:Podcast) "
"RETURN p.date ORDER BY p.date DESC LIMIT 1"
"RETURN p.episode_date ORDER BY p.episode_date DESC LIMIT 1"
)
result = session.run(query, {"arxiv_set": arxiv_set, "category": category})
data = result.data()
Expand All @@ -105,7 +127,7 @@ def next_pod_dates(config: dict, arxiv_set: str, category: str) -> List[datetime
if len(data) == 0:
start_date = end_date - timedelta(days=5)
else:
start_date = datetime.combine(data[0]["p.date"].to_native(), datetime.min.time(), tzinfo)
start_date = datetime.combine(data[0]["p.episode_date"].to_native(), datetime.min.time(), tzinfo)
date_list = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]
return date_list
except Exception as e:
Expand All @@ -123,10 +145,10 @@ def get_summaries(config: dict, arxiv_set: str, category: str, episode_date: dat
query = (
f"MATCH (s:ArxivSet {{code: $arxiv_set}}) "
f"-[:{CATEGORIZED_BY}]->(c:ArxivCategory {{code: $category}}) "
f"<-[:{PRIMARILY_CATEGORIZED_BY}]-(a:ArxivRecord)--(b:Abstract) "
f"<-[:{PRIMARILY_CATEGORIZED_BY}]-(a:ArxivRecord)<-[:{SUMMARIZES}]-(b:Abstract) "
"MATCH (a)-[:AUTHORED_BY]->(author:Author)"
"WHERE a.date = $date "
"RETURN {record: a, abstract: b, authors: collect(author)} AS result"
"RETURN {record: a, abstract: b, authors: collect({first_name: author.first_name, last_name: author.last_name})} AS result"
)
result = session.run(query, {"arxiv_set": arxiv_set, "category": category, "date": episode_date.date()})
data = result.data()
Expand Down Expand Up @@ -224,7 +246,7 @@ def write_pod_script(
script_content += no_latex_paragraph + "\n\n"
part_record_ids.append(r["record"]["arxiv_id"])
except Exception as e:
logger.error("Error writing pod script", error=e, method=write_pod_script.__name__, record=r)
logger.error("Error writing pod script", error=e, method=write_pod_script.__name__)
continue

script_content += outro
Expand Down
7 changes: 3 additions & 4 deletions orchestration/airflow/dags/shared/models/author.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,17 +62,16 @@ def create(self, last_name: str = "", first_name: str = ""):
)
now = get_storage_key_datetime()
properties = {
"first_name": self.first_name,
"uuid": str(uuid.uuid4()),
"last_name": self.last_name,
"created": now,
"last_modified": now,
}
records, summary, _ = self.driver.execute_query(
"""
MERGE (a:Author {last_name: $last_name})
MERGE (a:Author {first_name: $first_name, last_name: $last_name})
ON CREATE SET a += $props
RETURN a""",
first_name=self.first_name,
last_name=self.last_name,
props=properties,
database_=self.db,
Expand Down Expand Up @@ -118,7 +117,7 @@ def create(self, last_name: str = "", first_name: str = ""):
raise e

@classmethod
def find(cls, driver: Driver, last_name: str, first_name: str = ""):
def find(cls, driver: Driver, last_name: str, first_name: str):
if not driver or not isinstance(driver, Driver):
raise ValueError("Invalid driver")
if not validate_strings(last_name, first_name):
Expand Down
Loading