Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ena-deposition): Add metadata revision option #3180

Draft
wants to merge 7 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions ena-submission/scripts/deposition_dry_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@
type=click.Choice(["project", "sample", "assembly"]),
)
@click.option("--center-name", required=False, type=str, default="CENTER_NAME")
@click.option(
"--revision",
required=False,
type=bool,
default=False,
)
@click.option(
"--log-level",
default="INFO",
Expand All @@ -52,6 +58,7 @@ def local_ena_submission_generator(
data_to_submit,
center_name,
mode,
revision,
log_level,
):
"""
Expand Down Expand Up @@ -98,15 +105,15 @@ def local_ena_submission_generator(
logger.info(
"You can submit the project to ENA using the command: \n"
"curl -u {params.ena_submission_username}:{params.ena_submission_password}"
"-F 'SUBMISSION=@{project/submission.xml}' -F 'PROJECT=@{project/project.xml}'"
" -F 'SUBMISSION=@project/submission.xml' -F 'PROJECT=@project/project.xml'"
" {params.ena_submission_url} > {output}"
"\n Remember to submit to wwwdev. if you do not want to submit to production"
)

if mode == "sample":
entry["center_name"] = center_name
sample_set = construct_sample_set_object(config, entry, entry)
sample_xml = get_sample_xml(sample_set)
sample_xml = get_sample_xml(sample_set, revision=revision)

directory = "sample"
os.makedirs(directory, exist_ok=True)
Expand All @@ -120,7 +127,7 @@ def local_ena_submission_generator(
logger.info(
"You can submit the sample to ENA using the command: \n"
"curl -u {params.ena_submission_username}:{params.ena_submission_password}"
"-F 'SUBMISSION=@{sample/submission.xml}' -F 'SAMPLE=@{sample/project.xml}'"
" -F 'SUBMISSION=@sample/submission.xml' -F 'SAMPLE=@sample/sample.xml'"
" {params.ena_submission_url} > {output}"
"\n Remember to submit to wwwdev. if you do not want to submit to production"
)
Expand Down
16 changes: 16 additions & 0 deletions ena-submission/scripts/test_ena_submission.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
dataclass_to_xml,
get_chromsome_accessions,
get_ena_analysis_process,
get_sample_xml,
reformat_authors_from_loculus_to_embl_style,
)
from ena_deposition.ena_types import default_project_type, default_sample_type
Expand Down Expand Up @@ -68,6 +69,7 @@ def mock_config():

test_sample_xml_request = Path("test/test_sample_request.xml").read_text(encoding="utf-8")
test_sample_xml_response = Path("test/test_sample_response.xml").read_text(encoding="utf-8")
revision_submission_xml_request = Path("test/test_revision_submission_request.xml").read_text(encoding="utf-8")
process_response_text = Path("test/get_ena_analysis_process_response.json").read_text(
encoding="utf-8"
)
Expand Down Expand Up @@ -178,6 +180,20 @@ def test_sample_set_construction(self):
xmltodict.parse(test_sample_xml_request),
)

def test_sample_revision(self):
config = mock_config()
sample_set = construct_sample_set_object(
config,
sample_data_in_submission_table,
sample_table_entry,
)
files = get_sample_xml(sample_set, revision=True)
revision = files["SUBMISSION"]
self.assertEqual(
xmltodict.parse(revision),
xmltodict.parse(revision_submission_xml_request),
)


class AssemblyCreationTests(unittest.TestCase):
def setUp(self):
Expand Down
2 changes: 1 addition & 1 deletion ena-submission/src/ena_deposition/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def secure_ena_connection(config: Config):
config.test = True
logging.info("Submitting to ENA dev environment")
config.ena_submission_url = "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/loculus_test/test/approved_ena_submission_list.json"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Revert before merge

Suggested change
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/loculus_test/test/approved_ena_submission_list.json"
config.github_url = "https://raw.githubusercontent.com/pathoplexus/ena-submission/main/test/approved_ena_submission_list.json"

config.ena_reports_service_url = "https://wwwdev.ebi.ac.uk/ena/submit/report"

if submit_to_ena_prod:
Expand Down
128 changes: 123 additions & 5 deletions ena-submission/src/ena_deposition/create_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
XmlAttribute,
XrefType,
)
from .notifications import SlackConfig, send_slack_notification, slack_conn_init
from .notifications import SlackConfig, notify, send_slack_notification, slack_conn_init
from .submission_db_helper import (
SampleTableEntry,
Status,
Expand Down Expand Up @@ -234,8 +234,102 @@ def submission_table_update(db_config: SimpleConnectionPool):
raise RuntimeError(error_msg)


def handle_revision(
db_config: SimpleConnectionPool,
slack_config: SlackConfig,
row: dict[str, str],
retry_number: int = 3,
) -> bool:
"""
Check if revision can be submitted:
- If revision is not the latest version, do not submit
- If revision has different authors, do not submit -> requires manual change
In these cases send notification and set DB to state HAS_ERRORS
"""
seq_key = {"accession": row["accession"], "version": row["version"]}
sample_data_in_submission_table = find_conditions_in_db(
db_config, table_name="submission_table", conditions=seq_key
)
logging.info(f"Trying to submit revision for {row["accession"]}")
versions = [
submission["version"]
for submission in find_conditions_in_db(
db_config,
table_name="submission_table",
conditions={"accession": row["accession"]},
)
].sort()
if row["version"] != versions[-1]:
error_msg = f"Revision for {row["accession"]} is the not the latest version {versions[-1]} - will not submit"
logging.warning(error_msg)
notify(slack_config, error_msg)
# TODO: Send notification, update sample_table to HAS_ERRORS
update_values = {
"status": Status.HAS_ERRORS,
"errors": error_msg,
"started_at": datetime.now(tz=pytz.utc),
}
number_rows_updated = 0
tries = 0
while number_rows_updated != 1 and tries < retry_number:
if tries > 0:
# If state not correctly added retry
logging.warning(
f"sample creation failed and DB update failed - reentry DB update #{tries}."
)
number_rows_updated = update_db_where_conditions(
db_config,
table_name="sample_table",
conditions=seq_key,
update_values=update_values,
)
tries += 1
return False
latest_version = versions[-2]
last_submission = find_conditions_in_db(
db_config,
table_name="submission_table",
conditions={"accession": row["accession"], "version": latest_version},
)
if last_submission[0]["metadata"].get("authors") != sample_data_in_submission_table[0][
"metadata"
].get("authors"):
# TODO: Send notification, update sample_table to HAS_ERRORS
error_msg = (
f"Revision for {row["accession"]} has different authors - needs to be updated manually"
)
logging.warning(error_msg)
notify(slack_config, error_msg)
update_values = {
"status": Status.HAS_ERRORS,
"errors": error_msg,
"started_at": datetime.now(tz=pytz.utc),
}
number_rows_updated = 0
tries = 0
while number_rows_updated != 1 and tries < retry_number:
if tries > 0:
# If state not correctly added retry
logging.warning(
f"sample creation failed and DB update failed - reentry DB update #{tries}."
)
number_rows_updated = update_db_where_conditions(
db_config,
table_name="sample_table",
conditions=seq_key,
update_values=update_values,
)
tries += 1
return False
return True


def sample_table_create(
db_config: SimpleConnectionPool, config: Config, retry_number: int = 3, test: bool = False
db_config: SimpleConnectionPool,
config: Config,
slack_config: SlackConfig,
retry_number: int = 3,
test: bool = False,
):
"""
1. Find all entries in sample_table in state READY
Expand Down Expand Up @@ -265,6 +359,29 @@ def sample_table_create(
db_config, table_name="submission_table", conditions=seq_key
)

revision = False
# If revision, check if can be revised
if (
row["version"] != "1"
and len(
find_conditions_in_db(
db_config,
table_name="submission_table",
conditions={"accession": row["accession"]},
)
)
> 1
):
can_revise = handle_revision(
db_config,
slack_config,
row,
retry_number,
)
if not can_revise:
continue
revision = True

sample_set = construct_sample_set_object(
config, sample_data_in_submission_table[0], row, test
)
Expand All @@ -286,7 +403,9 @@ def sample_table_create(
)
continue
logging.info(f"Starting sample creation for accession {row["accession"]}")
sample_creation_results: CreationResult = create_ena_sample(ena_config, sample_set)
sample_creation_results: CreationResult = create_ena_sample(
ena_config, sample_set, revision=revision
)
if sample_creation_results.result:
update_values = {
"status": Status.SUBMITTED,
Expand Down Expand Up @@ -366,7 +485,6 @@ def sample_table_handle_errors(


def create_sample(config: Config, stop_event: threading.Event):

db_config = db_init(config.db_password, config.db_username, config.db_url)
slack_config = slack_conn_init(
slack_hook_default=config.slack_hook,
Expand All @@ -382,7 +500,7 @@ def create_sample(config: Config, stop_event: threading.Event):
submission_table_start(db_config)
submission_table_update(db_config)

sample_table_create(db_config, config, test=config.test)
sample_table_create(db_config, config, slack_config, test=config.test)
sample_table_handle_errors(db_config, config, slack_config)
time.sleep(config.time_between_iterations)

Expand Down
12 changes: 8 additions & 4 deletions ena-submission/src/ena_deposition/ena_submission_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,10 @@ def get_submission_dict(hold_until_date: str | None = None):
)


def get_revision_dict():
return Submission(actions=Actions(action=[Action(modify="")]))


def get_project_xml(project_set):
submission_set = get_submission_dict()
return {
Expand Down Expand Up @@ -197,16 +201,16 @@ def create_ena_project(config: ENAConfig, project_set: ProjectSet) -> CreationRe
return CreationResult(result=project_results, errors=errors, warnings=warnings)


def get_sample_xml(sample_set):
submission_set = get_submission_dict()
def get_sample_xml(sample_set, revision: bool = False):
submission_set = get_revision_dict() if revision else get_submission_dict()
files = {
"SUBMISSION": dataclass_to_xml(submission_set, root_name="SUBMISSION"),
"SAMPLE": dataclass_to_xml(sample_set, root_name="SAMPLE_SET"),
}
return files


def create_ena_sample(config: ENAConfig, sample_set: SampleSetType) -> CreationResult:
def create_ena_sample(config: ENAConfig, sample_set: SampleSetType, revision: bool = False) -> CreationResult:
"""
The sample creation request should be equivalent to
curl -u {params.ena_submission_username}:{params.ena_submission_password} \
Expand All @@ -219,7 +223,7 @@ def create_ena_sample(config: ENAConfig, sample_set: SampleSetType) -> CreationR
warnings = []

try:
xml = get_sample_xml(sample_set)
xml = get_sample_xml(sample_set, revision=revision)
response = post_webin(config, xml)
except requests.exceptions.RequestException as e:
error_message = f"Request failed with exception: {e}."
Expand Down
1 change: 1 addition & 0 deletions ena-submission/src/ena_deposition/ena_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ class Hold:
class Action:
add: str | None = None
hold: Hold | None = None
modify: str | None = None


@dataclass
Expand Down
8 changes: 4 additions & 4 deletions ena-submission/src/ena_deposition/submission_db_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ class AssemblyTableEntry:

def find_conditions_in_db(
db_conn_pool: SimpleConnectionPool, table_name: TableName, conditions: dict[str, str]
) -> dict[str, str]:
) -> list[dict[str, str]]:
con = db_conn_pool.getconn()
try:
with con, con.cursor(cursor_factory=RealDictCursor) as cur:
Expand Down Expand Up @@ -199,7 +199,7 @@ def find_conditions_in_db(

def find_errors_in_db(
db_conn_pool: SimpleConnectionPool, table_name: TableName, time_threshold: int = 15
) -> dict[str, str]:
) -> list[dict[str, str]]:
con = db_conn_pool.getconn()
try:
with con, con.cursor(cursor_factory=RealDictCursor) as cur:
Expand All @@ -224,7 +224,7 @@ def find_errors_in_db(

def find_stuck_in_submission_db(
db_conn_pool: SimpleConnectionPool, time_threshold: int = 48
) -> dict[str, str]:
) -> list[dict[str, str]]:
con = db_conn_pool.getconn()
try:
with con, con.cursor(cursor_factory=RealDictCursor) as cur:
Expand All @@ -247,7 +247,7 @@ def find_stuck_in_submission_db(

def find_waiting_in_db(
db_conn_pool: SimpleConnectionPool, table_name: TableName, time_threshold: int = 48
) -> dict[str, str]:
) -> list[dict[str, str]]:
con = db_conn_pool.getconn()
try:
with con, con.cursor(cursor_factory=RealDictCursor) as cur:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,6 @@ def upload_sequences(db_config: SimpleConnectionPool, sequences_to_upload: dict[
accession, version = full_accession.split(".")
if in_submission_table(db_config, {"accession": accession, "version": version}):
continue
if in_submission_table(db_config, {"accession": accession}):
# TODO: Correctly handle revisions
msg = f"Trying to submit revision for {accession}, this is not currently enabled"
logging.error(msg)
continue
entry = {
"accession": accession,
"version": version,
Expand Down
7 changes: 7 additions & 0 deletions ena-submission/test/test_revision_submission_request.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<SUBMISSION>
<ACTIONS>
<ACTION>
<MODIFY/>
</ACTION>
</ACTIONS>
</SUBMISSION>
Loading