From 26ccc63abebb09e1592192209e9e2d7028b62e38 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Sat, 4 Nov 2023 11:59:58 -0400 Subject: [PATCH 1/4] Fix bug - no more encoding of quality stats --- config/config_sars2_alpha.yaml | 5 ----- config/config_sars2_genbank.yaml | 5 ----- config/config_sars2_genbank_dev.yaml | 5 ----- config/config_sars2_gisaid.yaml | 5 ----- config/config_sars2_gisaid_private.yaml | 5 ----- workflow_main/scripts/build_full_dataframe.py | 1 - workflow_main/scripts/collapse_to_isolate.py | 6 ++++++ workflow_main/scripts/combine_all_data.py | 6 ++++++ 8 files changed, 12 insertions(+), 26 deletions(-) diff --git a/config/config_sars2_alpha.yaml b/config/config_sars2_alpha.yaml index 78f606ad..d68c0697 100644 --- a/config/config_sars2_alpha.yaml +++ b/config/config_sars2_alpha.yaml @@ -72,11 +72,6 @@ metadata_cols: title: "Originating lab" submitting_lab: title: "Submitting lab" - # Sequence quality - length: - title: "Sequence length" - percent_ambiguous: - title: "Percentage of ambiguous (N) bases" # PANGO metadata conflict: title: "PANGO conflict" diff --git a/config/config_sars2_genbank.yaml b/config/config_sars2_genbank.yaml index e3515f07..d693194c 100644 --- a/config/config_sars2_genbank.yaml +++ b/config/config_sars2_genbank.yaml @@ -63,11 +63,6 @@ metadata_cols: title: "Authors" publications: title: "Publications" - # Sequence quality - length: - title: "Sequence length" - percent_ambiguous: - title: "Percentage of ambiguous (N) bases" group_cols: lineage: diff --git a/config/config_sars2_genbank_dev.yaml b/config/config_sars2_genbank_dev.yaml index 65b4d3fb..fb1a02eb 100644 --- a/config/config_sars2_genbank_dev.yaml +++ b/config/config_sars2_genbank_dev.yaml @@ -64,11 +64,6 @@ metadata_cols: title: "Authors" publications: title: "Publications" - # Sequence quality - length: - title: "Sequence length" - percent_ambiguous: - title: "Percentage of ambiguous (N) bases" group_cols: lineage: diff --git a/config/config_sars2_gisaid.yaml b/config/config_sars2_gisaid.yaml index afc24d21..345c2efa 100644 --- a/config/config_sars2_gisaid.yaml +++ b/config/config_sars2_gisaid.yaml @@ -72,11 +72,6 @@ metadata_cols: title: "Originating lab" submitting_lab: title: "Submitting lab" - # Sequence quality - length: - title: "Sequence length" - percent_ambiguous: - title: "Percentage of ambiguous (N) bases" # PANGO metadata conflict: title: "PANGO conflict" diff --git a/config/config_sars2_gisaid_private.yaml b/config/config_sars2_gisaid_private.yaml index 0b770f23..0f7b2c8c 100644 --- a/config/config_sars2_gisaid_private.yaml +++ b/config/config_sars2_gisaid_private.yaml @@ -72,11 +72,6 @@ metadata_cols: title: "Originating lab" submitting_lab: title: "Submitting lab" - # Sequence quality - length: - title: "Sequence length" - percent_ambiguous: - title: "Percentage of ambiguous (N) bases" # PANGO metadata conflict: title: "PANGO conflict" diff --git a/workflow_main/scripts/build_full_dataframe.py b/workflow_main/scripts/build_full_dataframe.py index 7e1917fd..4d5274fb 100644 --- a/workflow_main/scripts/build_full_dataframe.py +++ b/workflow_main/scripts/build_full_dataframe.py @@ -7,7 +7,6 @@ def main(): - parser = argparse.ArgumentParser() parser.add_argument( diff --git a/workflow_main/scripts/collapse_to_isolate.py b/workflow_main/scripts/collapse_to_isolate.py index 81f4ae95..e1d2686c 100755 --- a/workflow_main/scripts/collapse_to_isolate.py +++ b/workflow_main/scripts/collapse_to_isolate.py @@ -77,6 +77,12 @@ def main(): for col in args.group_cols + args.metadata_cols: column_aggs[col] = (col, "first") + # Sequence quality data + if "length" in sequence_df.columns: + column_aggs["length"] = ("length", "first") + if "percent_ambiguous" in sequence_df.columns: + column_aggs["percent_ambiguous"] = ("percent_ambiguous", "first") + isolate_df = sequence_df.groupby(["isolate_id", "reference"], as_index=False).agg( **column_aggs ) diff --git a/workflow_main/scripts/combine_all_data.py b/workflow_main/scripts/combine_all_data.py index a2aeb5e1..46015ef2 100644 --- a/workflow_main/scripts/combine_all_data.py +++ b/workflow_main/scripts/combine_all_data.py @@ -254,6 +254,12 @@ def main(): df.loc[:, col] = factor metadata_maps[col] = pd.Series(labels).to_dict() + # Special processing for sequence quality columns + if "length" in df.columns: + df.loc[:, "length"] = df["length"].fillna(0).astype(int) + if "percent_ambiguous" in df.columns: + df.loc[:, "percent_ambiguous"] = df["percent_ambiguous"].fillna(0).astype(float) + # Special processing for locations - leave missing data as -1 for col in ["region", "country", "division", "location"]: missing_inds = df[col] == "-1" From 2ca5aff41053e33228201a0a9584e092dc3601d8 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Sat, 4 Nov 2023 12:47:42 -0400 Subject: [PATCH 2/4] UI and server accomodation for new quality filtering --- services/server/cg_server/db_seed/seed.py | 38 ++--- services/server/cg_server/download/genomes.py | 2 + .../server/cg_server/download/metadata.py | 8 +- .../query/group_mutation_frequencies.py | 15 +- services/server/cg_server/query/metadata.py | 3 +- services/server/cg_server/query/selection.py | 59 +++++++- .../server/cg_server/query/variant_table.py | 2 + src/components/Modals/SelectSequencesModal.js | 25 ++++ src/components/Selection/QualitySelect.js | 132 ++++++++++++++++++ .../Selection/QualitySelect.styles.js | 43 ++++++ src/components/Sidebar/StatusBox.js | 112 ++++++++++++--- src/constants/initialValues.js | 3 + src/stores/configStore.js | 17 +++ src/stores/dataStore.js | 4 + src/stores/urlMonitor.js | 12 ++ 15 files changed, 425 insertions(+), 50 deletions(-) create mode 100644 src/components/Selection/QualitySelect.js create mode 100644 src/components/Selection/QualitySelect.styles.js diff --git a/services/server/cg_server/db_seed/seed.py b/services/server/cg_server/db_seed/seed.py index 2767a62b..c0036769 100644 --- a/services/server/cg_server/db_seed/seed.py +++ b/services/server/cg_server/db_seed/seed.py @@ -66,7 +66,6 @@ def df_to_sql(cur, df, table, index_label=None): def seed_database(conn, schema="public"): with conn.cursor() as cur: - cur.execute(sql.SQL("SET search_path TO {};").format(sql.Identifier(schema))) cur.execute("DROP EXTENSION IF EXISTS intarray;") @@ -195,7 +194,6 @@ def seed_database(conn, schema="public"): mutation_fields = ["dna", "gene_aa", "protein_aa"] for grouping in group_mutation_frequencies.keys(): - # Get references reference_names = sorted(group_mutation_frequencies[grouping].keys()) @@ -256,7 +254,6 @@ def seed_database(conn, schema="public"): # Build colormaps for grouping in config["group_cols"].keys(): - # Collect unique group names group_names = [] for reference in global_group_counts.keys(): @@ -296,12 +293,27 @@ def seed_database(conn, schema="public"): print("done") print("Writing sequence metadata...", end="", flush=True) + + isolate_df = pd.read_json(data_path / "isolate_data.json") + isolate_df["collection_date"] = pd.to_datetime(isolate_df["collection_date"]) + isolate_df["submission_date"] = pd.to_datetime(isolate_df["submission_date"]) + # print(isolate_df.columns) + # Make a column for each metadata field metadata_cols = [] metadata_col_defs = [] for field in list(config["metadata_cols"].keys()) + loc_levels: metadata_col_defs.append(sql.SQL(f"{field} INTEGER NOT NULL")) metadata_cols.append(field) + + # Make columns for sequence metadata, if they exist + if "length" in isolate_df.columns: + metadata_cols.append("length") + metadata_col_defs.append(sql.SQL("length INTEGER NOT NULL")) + if "percent_ambiguous" in isolate_df.columns: + metadata_cols.append("percent_ambiguous") + metadata_col_defs.append(sql.SQL("percent_ambiguous REAL NOT NULL")) + metadata_col_defs = sql.SQL(",\n").join(metadata_col_defs) # Make a column for each grouping @@ -338,11 +350,6 @@ def seed_database(conn, schema="public"): ) ) - isolate_df = pd.read_json(data_path / "isolate_data.json") - isolate_df["collection_date"] = pd.to_datetime(isolate_df["collection_date"]) - isolate_df["submission_date"] = pd.to_datetime(isolate_df["submission_date"]) - # print(isolate_df.columns) - # Partition settings min_date = isolate_df["collection_date"].min() # Round latest sequence to the nearest partition break @@ -423,8 +430,7 @@ def seed_database(conn, schema="public"): "segments", "accession_ids", ] - + list(config["metadata_cols"].keys()) - + loc_levels + + metadata_cols + list( filter(lambda x: x != "subtype", config["group_cols"].keys()) ) # Avoid duplicate subtype index @@ -480,7 +486,9 @@ def seed_database(conn, schema="public"): # Clean up the reference name as a SQL ident - no dots reference_name_sql = reference_name.replace(".", "_") - reference_partition_name = f"seqmut_{mutation_field}_{reference_name_sql}" + reference_partition_name = ( + f"seqmut_{mutation_field}_{reference_name_sql}" + ) # Create reference partition cur.execute( @@ -555,13 +563,11 @@ def seed_database(conn, schema="public"): "subtype", "reference", ] - + list(config["metadata_cols"].keys()) - + loc_levels + + metadata_cols + list( filter(lambda x: x != "subtype", config["group_cols"].keys()) ) # Avoid duplicate subtype index ): - cur.execute( sql.SQL( "CREATE INDEX {index_name} ON {table_name}({field});" @@ -756,13 +762,11 @@ def seed_database(conn, schema="public"): "subtype", "reference", ] - + list(config["metadata_cols"].keys()) - + loc_levels + + metadata_cols + list( filter(lambda x: x != "subtype", config["group_cols"].keys()) ) # Avoid duplicate subtype index ): - cur.execute( sql.SQL( "CREATE INDEX {index_name} ON {table_name}({field});" diff --git a/services/server/cg_server/download/genomes.py b/services/server/cg_server/download/genomes.py index 75fa74fc..c5891c5e 100644 --- a/services/server/cg_server/download/genomes.py +++ b/services/server/cg_server/download/genomes.py @@ -33,6 +33,8 @@ def download_genomes(conn, req): req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), req.get("selected_reference", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), ) cur.execute( diff --git a/services/server/cg_server/download/metadata.py b/services/server/cg_server/download/metadata.py index 1d4fffbc..241a8016 100644 --- a/services/server/cg_server/download/metadata.py +++ b/services/server/cg_server/download/metadata.py @@ -16,7 +16,6 @@ def download_metadata(conn, req): - selected_reference = req.get("selected_reference", None) if not selected_reference: raise Exception("No reference specified") @@ -34,6 +33,8 @@ def download_metadata(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), selected_reference, ) @@ -179,7 +180,10 @@ def download_metadata(conn, req): cur.execute(query) - res_df = pd.DataFrame.from_records(cur.fetchall(), columns=sequence_cols,) + res_df = pd.DataFrame.from_records( + cur.fetchall(), + columns=sequence_cols, + ) # Replace mutation IDs with names for mutation_field in ["dna", "gene_aa", "protein_aa"]: diff --git a/services/server/cg_server/query/group_mutation_frequencies.py b/services/server/cg_server/query/group_mutation_frequencies.py index d16ad6c9..fa199f66 100644 --- a/services/server/cg_server/query/group_mutation_frequencies.py +++ b/services/server/cg_server/query/group_mutation_frequencies.py @@ -36,7 +36,9 @@ def query_group_mutation_frequencies(conn, req): "mutation_str", ] if mutation_type == "gene_aa" or mutation_type == "protein_aa": - mutation_cols = ["feature",] + mutation_cols + mutation_cols = [ + "feature", + ] + mutation_cols mutation_cols_expr = sql.SQL(",\n").join( [ @@ -94,10 +96,14 @@ def query_group_mutation_frequencies_dynamic(conn, req): mutation_table = "dna_mutation" elif mutation_type == "gene_aa": mutation_table = "gene_aa_mutation" - mutation_cols = ["feature",] + mutation_cols + mutation_cols = [ + "feature", + ] + mutation_cols elif mutation_type == "protein_aa": mutation_table = "protein_aa_mutation" - mutation_cols = ["feature",] + mutation_cols + mutation_cols = [ + "feature", + ] + mutation_cols sequence_where_filter = build_sequence_location_where_filter( group_key, @@ -108,6 +114,8 @@ def query_group_mutation_frequencies_dynamic(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), selected_reference, ) sequence_mutation_table = "sequence_" + mutation_table @@ -167,4 +175,3 @@ def query_group_mutation_frequencies_dynamic(conn, req): ) return res.to_json(orient="records") - diff --git a/services/server/cg_server/query/metadata.py b/services/server/cg_server/query/metadata.py index e10f21ff..6bb00127 100644 --- a/services/server/cg_server/query/metadata.py +++ b/services/server/cg_server/query/metadata.py @@ -14,7 +14,6 @@ def query_metadata(conn, req): with conn.cursor() as cur: - sequence_where_filter = build_sequence_location_where_filter( req.get("group_key", None), get_loc_level_ids(req), @@ -25,6 +24,8 @@ def query_metadata(conn, req): req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), req.get("selected_reference", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), ) # Iterate over each metadata column, and aggregate counts diff --git a/services/server/cg_server/query/selection.py b/services/server/cg_server/query/selection.py index ab393af5..60387f8c 100644 --- a/services/server/cg_server/query/selection.py +++ b/services/server/cg_server/query/selection.py @@ -113,6 +113,8 @@ def build_sequence_where_filter( selected_metadata_fields=None, selected_group_fields=None, selected_reference=None, + sequence_length=None, + percent_ambiguous=None, ): """Build query for filtering sequences based on user's location/date selection and selected metadata fields @@ -138,6 +140,10 @@ def build_sequence_where_filter( - Values are a list of group values, i.e., ["B.1.617.2", "BA.1"] selected_reference: str - Reference name (e.g., "NC_012920.1") + sequence_length: pair of integers + - (min_length, max_length) + percent_ambiguous: pair of floats + - (min_percent, max_percent) Returns ------- @@ -230,12 +236,50 @@ def build_sequence_where_filter( else: group_filters = sql.SQL("") + if sequence_length: + sequence_length_filter = [] + if sequence_length[0] is not None: + sequence_length_filter.append( + sql.SQL('"length" >= {}').format(sql.Literal(sequence_length[0])) + ) + if sequence_length[1] is not None: + sequence_length_filter.append( + sql.SQL('"length" <= {}').format(sql.Literal(sequence_length[1])) + ) + sequence_length_filter = sql.Composed( + [sql.SQL(" AND "), sql.SQL(" AND ").join(sequence_length_filter)] + ) + else: + sequence_length_filter = sql.SQL("") + + if percent_ambiguous: + percent_ambiguous_filter = [] + if percent_ambiguous[0] is not None: + percent_ambiguous_filter.append( + sql.SQL('"percent_ambiguous" >= {}').format( + sql.Literal(percent_ambiguous[0]) + ) + ) + if percent_ambiguous[1] is not None: + percent_ambiguous_filter.append( + sql.SQL('"percent_ambiguous" <= {}').format( + sql.Literal(percent_ambiguous[1]) + ) + ) + percent_ambiguous_filter = sql.Composed( + [sql.SQL(" AND "), sql.SQL(" AND ").join(percent_ambiguous_filter)] + ) + else: + percent_ambiguous_filter = sql.SQL("") + sequence_where_filter = sql.SQL( """ {metadata_filters} {group_filters} "collection_date" >= {start_date} AND "collection_date" <= {end_date} {submission_date_filter} + {sequence_length_filter} + {percent_ambiguous_filter} """ ).format( metadata_filters=metadata_filters, @@ -243,6 +287,8 @@ def build_sequence_where_filter( start_date=sql.Literal(pd.to_datetime(start_date)), end_date=sql.Literal(pd.to_datetime(end_date)), submission_date_filter=submission_date_filter, + sequence_length_filter=sequence_length_filter, + percent_ambiguous_filter=percent_ambiguous_filter, ) return sequence_where_filter @@ -283,7 +329,8 @@ def build_sequence_location_where_filter(group_key, loc_level_ids, *args, **kwar continue loc_where.append( sql.SQL("({loc_level_col} = ANY({loc_ids}))").format( - loc_level_col=sql.Identifier(loc_level), loc_ids=sql.Literal(loc_ids), + loc_level_col=sql.Identifier(loc_level), + loc_ids=sql.Literal(loc_ids), ) ) @@ -370,7 +417,10 @@ def count_coverage( cur.execute(coverage_query) - coverage_df = pd.DataFrame.from_records(cur.fetchall(), columns=["ind", "count"],) + coverage_df = pd.DataFrame.from_records( + cur.fetchall(), + columns=["ind", "count"], + ) if dna_or_aa != constants["DNA_OR_AA"]["DNA"]: if coordinate_mode == constants["COORDINATE_MODES"]["COORD_GENE"]: @@ -417,7 +467,6 @@ def query_and_aggregate(conn, req): selected_protein = req.get("selected_protein", None) with conn.cursor() as cur: - main_query = [] for loc_level in constants["GEO_LEVELS"].values(): loc_ids = req.get(loc_level, None) @@ -433,6 +482,8 @@ def query_and_aggregate(conn, req): req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), req.get("selected_reference", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), ) sequence_where_filter = sql.SQL( "{prior} AND {loc_level_col} = ANY({loc_ids})" @@ -536,6 +587,8 @@ def query_and_aggregate(conn, req): req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), req.get("selected_reference", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), ) coverage_df = count_coverage( cur, diff --git a/services/server/cg_server/query/variant_table.py b/services/server/cg_server/query/variant_table.py index 379a5cdd..0f97a602 100644 --- a/services/server/cg_server/query/variant_table.py +++ b/services/server/cg_server/query/variant_table.py @@ -50,6 +50,8 @@ def build_variant_table(conn, req): req.get("subm_end_date", None), req.get("selected_metadata_fields", None), req.get("selected_group_fields", None), + req.get("sequence_length", None), + req.get("percent_ambiguous", None), selected_reference, ) diff --git a/src/components/Modals/SelectSequencesModal.js b/src/components/Modals/SelectSequencesModal.js index 3055523b..6dfcd3e7 100644 --- a/src/components/Modals/SelectSequencesModal.js +++ b/src/components/Modals/SelectSequencesModal.js @@ -26,6 +26,7 @@ import CoordinateSelect from '../Selection/CoordinateSelect'; import DateSelect from '../Selection/DateSelect'; import GroupSelect from '../Selection/GroupSelect'; import MetaFieldSelect from '../Selection/MetaFieldSelect'; +import QualitySelect from '../Selection/QualitySelect'; import LoadingSpinner from '../Common/LoadingSpinner'; import { @@ -81,6 +82,8 @@ const SelectSequencesContent = observer(({ onRequestClose }) => { submEndDate: configStore.submEndDate, selectedGroupFields: configStore.selectedGroupFields, selectedMetadataFields: configStore.selectedMetadataFields, + sequenceLengthRange: configStore.sequenceLengthRange, + percentAmbiguousRange: configStore.percentAmbiguousRange, ageRange: configStore.ageRange, }); @@ -443,6 +446,23 @@ const SelectSequencesContent = observer(({ onRequestClose }) => { selectedMetadataFields, }); }; + + const updateQualityFilters = (field, range) => { + const { sequenceLengthRange, percentAmbiguousRange } = metaPending; + if (field === 'sequenceLengthRange') { + sequenceLengthRange[0] = range[0]; + sequenceLengthRange[1] = range[1]; + } else if (field === 'percentAmbiguousRange') { + percentAmbiguousRange[0] = range[0]; + percentAmbiguousRange[1] = range[1]; + } + setMetaPending({ + ...metaPending, + sequenceLengthRange, + percentAmbiguousRange, + }); + }; + // const updateAgeRange = (ageRange) => { // setPending({ // ...pending, @@ -637,6 +657,11 @@ const SelectSequencesContent = observer(({ onRequestClose }) => { updateSelectedGroupFields={updateSelectedGroupFields} /> + + { + // Only render this if we have the quality filters available + if (config['virus'] !== 'sars2') { + return ''; + } + + const handleChange = (field, position, event) => { + //console.log(field, position, event.target.value); + let rng; + if (field === 'sequenceLengthRange') { + rng = sequenceLengthRange; + } else if (field === 'percentAmbiguousRange') { + rng = percentAmbiguousRange; + } + rng[position] = + event.target.value === '' ? null : parseFloat(event.target.value); + + updateQualityFilters(field, rng); + }; + + const qualityFilterItems = []; + + qualityFilterItems.push( + + Sequence Length (bases) + + + + + + + + + + ); + + qualityFilterItems.push( + + % Ambiguous (% N) + + + + + + + + + + ); + + return ( + + {' '} + + Sequence Quality + + + {qualityFilterItems} + + ); + } +); + +QualitySelect.propTypes = { + sequenceLengthRange: PropTypes.arrayOf(PropTypes.number), + percentAmbiguousRange: PropTypes.arrayOf(PropTypes.number), + updateQualityFilters: PropTypes.func, +}; + +export default QualitySelect; diff --git a/src/components/Selection/QualitySelect.styles.js b/src/components/Selection/QualitySelect.styles.js new file mode 100644 index 00000000..76d515ab --- /dev/null +++ b/src/components/Selection/QualitySelect.styles.js @@ -0,0 +1,43 @@ +import styled from 'styled-components'; + +export const QualitySelectContainer = styled.div` + display: flex; + flex-direction: column; + + padding-left: 15px; + margin-bottom: 10px; + + span.title { + font-weight: 500; + font-size: 1rem; + margin-bottom: 5px; + } +`; + +export const FormRow = styled.div` + display: flex; + flex-direction: row; + align-items: flex-end; +`; + +export const TitleColumn = styled.div` + display: flex; + flex-direction: column; + align-items: stretch; + + margin-right: 10px; + font-weight: 500; +`; + +export const FormColumn = styled.div` + display: flex; + flex-direction: column; + align-items: stretch; + max-width: 6em; + + margin-right: 10px; + + input { + font-family: inherit; + } +`; diff --git a/src/components/Sidebar/StatusBox.js b/src/components/Sidebar/StatusBox.js index 57c709e8..fa9f53e7 100644 --- a/src/components/Sidebar/StatusBox.js +++ b/src/components/Sidebar/StatusBox.js @@ -1,6 +1,7 @@ import React from 'react'; import { observer } from 'mobx-react'; import { useStores } from '../../stores/connect'; +import { config } from '../../config'; import { DNA_OR_AA, @@ -31,6 +32,93 @@ const serializeAACoordinates = (coordinateRanges) => { const StatusBox = observer(() => { const { configStore, dataStore, UIStore } = useStores(); + let qualityFilters = ''; + const qualityFields = ['length', 'percent_ambiguous']; + const qualityFieldKeys = { + length: 'sequenceLengthRange', + percent_ambiguous: 'percentAmbiguousRange', + }; + const qualityFieldNames = { + length: 'Sequence Length', + percent_ambiguous: '% Ambiguous (N) Bases', + }; + // Only render this if we have the quality filters available + if (config['virus'] === 'sars2') { + const qualityFilterItems = []; + qualityFields.forEach((field) => { + const rng = configStore[qualityFieldKeys[field]]; + let suffix = ' bases'; + if (field === 'percent_ambiguous') { + suffix = '%'; + } + // If first value of the range undefined, then only maximum is set + if (rng[0] === null) { + qualityFilterItems.push( + + {qualityFieldNames[field]} ≤{' '} + + {rng[1]} + {suffix} + + + ); + } + // If second value of the range undefined, then only minimum is set + else if (rng[1] === null) { + qualityFilterItems.push( + + {qualityFieldNames[field]} ≥{' '} + + {rng[0]} + {suffix} + + + ); + } + // If both values of the range are defined, then both minimum and maximum are set + else { + qualityFilterItems.push( + + {qualityFieldNames[field]}:{' '} + + {rng[0]} + {suffix} + {' '} + –{' '} + + {rng[1]} + {suffix} + + + ); + } + }); + qualityFilters = <>{qualityFilterItems}; + } + + const selectedGroupFields = []; + Object.keys(configStore.selectedGroupFields).forEach((groupKey) => { + if (configStore.selectedGroupFields[groupKey].length === 0) { + return; + } + + const selectedGroupFieldItems = []; + configStore.selectedGroupFields[groupKey].forEach((group, i) => { + selectedGroupFieldItems.push( + {group} + ); + if (i < configStore.selectedGroupFields[groupKey].length - 1) { + selectedGroupFieldItems.push(','); + } + }); + + selectedGroupFields.push( + + Selected {groupKey}s: {selectedGroupFieldItems} + + ); + }); + let genomeSelection = ''; const residuesOrBases = configStore.dnaOrAa === DNA_OR_AA.DNA ? 'Bases' : 'Residues'; @@ -92,29 +180,6 @@ const StatusBox = observer(() => { ); } - const selectedGroupFields = []; - Object.keys(configStore.selectedGroupFields).forEach((groupKey) => { - if (configStore.selectedGroupFields[groupKey].length === 0) { - return; - } - - const selectedGroupFieldItems = []; - configStore.selectedGroupFields[groupKey].forEach((group, i) => { - selectedGroupFieldItems.push( - {group} - ); - if (i < configStore.selectedGroupFields[groupKey].length - 1) { - selectedGroupFieldItems.push(','); - } - }); - - selectedGroupFields.push( - - Selected {groupKey}s: {selectedGroupFieldItems} - - ); - }); - let selectedGroups = None; if (configStore.selectedGroups.length > 0) { if (configStore.groupKey === GROUP_MUTATION) { @@ -166,6 +231,7 @@ const StatusBox = observer(() => { Reference genome: {configStore.selectedReference} ( {getReferences()[configStore.selectedReference]['description']}). + {qualityFilters} {configStore.selectedLocationNodes.length} selected locations:{' '} diff --git a/src/constants/initialValues.js b/src/constants/initialValues.js index 6ced0cce..decfea91 100644 --- a/src/constants/initialValues.js +++ b/src/constants/initialValues.js @@ -66,6 +66,9 @@ if (config['virus'] === 'sars2') { selectedMetadataFields: {}, ageRange: [null, null], + sequenceLengthRange: [29000, null], + percentAmbiguousRange: [null, 5], + // Location tab hoverLocation: null, focusedLocations: [], diff --git a/src/stores/configStore.js b/src/stores/configStore.js index d80e9afb..34f8fb99 100644 --- a/src/stores/configStore.js +++ b/src/stores/configStore.js @@ -56,15 +56,23 @@ export class ConfigStore { @observable selectedMetadataFields = {}; @observable ageRange = []; + @observable sequenceLengthRange = [null, null]; + @observable percentAmbiguousRange = [null, null]; + @observable hoverLocation = null; @observable focusedLocations = []; constructor() {} init() { + // Set initial values this.initialValues = initialConfigStore; Object.keys(this.initialValues).forEach((key) => { + // Ignore fields that aren't defined in the initial values + if (!Object.prototype.hasOwnProperty.call(this.initialValues, key)) { + return; + } this[key] = this.initialValues[key]; }); } @@ -178,6 +186,15 @@ export class ConfigStore { urlParams.set(field, coordsToText(pending[field])); } else if (field === 'residueCoordinates') { urlParams.set(field, residueCoordsToText(pending[field])); + } else if ( + field === 'sequenceLengthRange' || + field === 'percentAmbiguousRange' + ) { + // Store ranged values, like sequence length and percent ambiguous + urlParams.set( + field, + pending[field].map((x) => (x === null ? '' : x.toString())).join(',') + ); } else { urlParams.set(field, String(pending[field])); } diff --git a/src/stores/dataStore.js b/src/stores/dataStore.js index 8f8f221c..c1fa2d2d 100644 --- a/src/stores/dataStore.js +++ b/src/stores/dataStore.js @@ -87,6 +87,10 @@ export class DataStore { end_date: toJS(rootStoreInstance.configStore.endDate), subm_start_date: toJS(rootStoreInstance.configStore.submStartDate), subm_end_date: toJS(rootStoreInstance.configStore.submEndDate), + sequence_length: toJS(rootStoreInstance.configStore.sequenceLengthRange), + percent_ambiguous: toJS( + rootStoreInstance.configStore.percentAmbiguousRange + ), }; fetch(hostname + '/data', { diff --git a/src/stores/urlMonitor.js b/src/stores/urlMonitor.js index 4766ec09..826787ca 100644 --- a/src/stores/urlMonitor.js +++ b/src/stores/urlMonitor.js @@ -118,6 +118,18 @@ export class URLMonitor { if (primer !== undefined && primer !== null) this.pendingChanges.configStore[key].push(primer); }); + } else if ( + key === 'sequenceLengthRange' || + key === 'percentAmbiguousRange' + ) { + // Parse ranges + value = value + .split(',') + .map((x) => (x === '' ? null : parseFloat(x))); + if (key === 'sequenceLengthRange') { + value = value.map((x) => (x === null ? null : Math.round(x))); + } + this.pendingChanges.configStore[key] = value; } else { this.pendingChanges.configStore[key] = value; } From 130571433cd6dcd54ac210791902f4526ea86b5b Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Sat, 4 Nov 2023 12:48:03 -0400 Subject: [PATCH 3/4] Ignore flu genbank data --- .dockerignore | 1 + .gcloudignore | 1 + .gitignore | 1 + 3 files changed, 3 insertions(+) diff --git a/.dockerignore b/.dockerignore index 9138dfd7..dbeecbc4 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,6 +4,7 @@ data data_genbank data_flu data_flu_small +data_flu_genbank data_gisaid_flu data_gisaid_rsv data_genbank_rsv diff --git a/.gcloudignore b/.gcloudignore index 439c793b..0aa052d5 100644 --- a/.gcloudignore +++ b/.gcloudignore @@ -23,6 +23,7 @@ data_az data_ma data_flu data_flu_small +data_flu_genbank data_gisaid_flu data_gisaid_rsv dist diff --git a/.gitignore b/.gitignore index 57e76e5c..ad73646b 100644 --- a/.gitignore +++ b/.gitignore @@ -133,6 +133,7 @@ example_data_genbank/*/lineage_treetime/*.pdf data data_genbank +data_flu_genbank example_data_genbank/rsv/** example_data_genbank/flu/** example_data_genbank/sars2/** From c5ae3763f91a2f1e50730f012e6ad3eb2926f2d7 Mon Sep 17 00:00:00 2001 From: Albert Tian Chen Date: Sat, 4 Nov 2023 12:49:06 -0400 Subject: [PATCH 4/4] Bump version to v2.7.6-qual-rc1 --- package-lock.json | 2 +- package.json | 2 +- src/utils/version.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index e1862237..bad27744 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,6 +1,6 @@ { "name": "covidcg", - "version": "2.7.5-pango", + "version": "2.7.6-qual-rc1", "lockfileVersion": 1, "requires": true, "dependencies": { diff --git a/package.json b/package.json index d31b17c5..dc850ca9 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "covidcg", - "version": "2.7.5-pango", + "version": "2.7.6-qual-rc1", "description": "", "engines": { "node": ">=8", diff --git a/src/utils/version.js b/src/utils/version.js index 70e4870c..1df3c44b 100644 --- a/src/utils/version.js +++ b/src/utils/version.js @@ -1 +1 @@ -export const version = '2.7.5-pango'; +export const version = '2.7.6-qual-rc1';