diff --git a/.github/workflows/flake8.yml b/.github/workflows/flake8.yml index 6999f5d..e11b44b 100644 --- a/.github/workflows/flake8.yml +++ b/.github/workflows/flake8.yml @@ -21,7 +21,7 @@ jobs: uses: TrueBrain/actions-flake8@v2 with: ignore: E203,E701,W503,W504 - max_line_length: 88 + max_line_length: 118 path: src plugins: flake8-black flake8-isort flake8-quotes error_classes: E,H,I00,Q00 diff --git a/.vscode/settings.json b/.vscode/settings.json index 4706399..fb1df91 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,7 +2,7 @@ "git.ignoreLimitWarning": true, "editor.formatOnSave": true, "flake8.args": [ - "--max-line-length=88" + "--max-line-length=118" ], "[python]": { "editor.codeActionsOnSave": { diff --git a/pyproject.toml b/pyproject.toml index 26acf2e..6153753 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [tool.black] -line-length = 88 +line-length = 118 target-version = ['py39', 'py310', 'py311', 'py312'] include = '\.pyi?$' required-version = '24.4.2' diff --git a/setup.cfg b/setup.cfg index 66427f0..89fcc31 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,4 +1,4 @@ [flake8] -max-line-length = 88 +max-line-length = 118 extend-ignore = E203,E701,W503,W504 inline-quotes = double \ No newline at end of file diff --git a/src/agent/component_helpers.py b/src/agent/component_helpers.py index fec2f99..f5b1fe1 100644 --- a/src/agent/component_helpers.py +++ b/src/agent/component_helpers.py @@ -70,30 +70,24 @@ def identify_intent(input: str, state: Dict[str, Any]): def identify_attributes(input: str, state: Dict[str, Any]): response = requests.get( - "https://goat.genomehubs.org/api/v2/resultFields" - + f'?result={state["index"]["classification"]}' - + "&taxonomy=ncbi" + f'https://goat.genomehubs.org/api/v2/resultFields?result={state["index"]["classification"]}&taxonomy=ncbi' ) response_parsed = response.json() - cleaned_attributes = [] - - for name, attribute in response_parsed["fields"].items(): - cleaned_attributes.append( - { - "name": name, - "description": ( - attribute["description"] if "description" in attribute else None - ), - "constraint": ( - attribute["constraint"] if "constraint" in attribute else None - ), - "value_metadata": ( - attribute["value_metadata"] - if "value_metadata" in attribute - else None - ), - } - ) + cleaned_attributes = [ + { + "name": name, + "description": ( + attribute["description"] if "description" in attribute else None + ), + "constraint": ( + attribute["constraint"] if "constraint" in attribute else None + ), + "value_metadata": ( + attribute["value_metadata"] if "value_metadata" in attribute else None + ), + } + for name, attribute in response_parsed["fields"].items() + ] attribute_response = Settings.llm.complete( ATTRIBUTE_PROMPT.format( @@ -133,10 +127,10 @@ def construct_query(input: str, state: Dict[str, Any]): if state["timeframe"]["from_date"] != "" or state["timeframe"]["to_date"] != "": state["index"]["classification"] = "assembly" - if state["timeframe"]["from_date"] != "": - query += f"last_updated>={state['timeframe']['from_date']} AND " - if state["timeframe"]["to_date"] != "": - query += f"last_updated<={state['timeframe']['to_date']} AND " + if state["timeframe"]["from_date"] != "": + query += f"last_updated>={state['timeframe']['from_date']} AND " + if state["timeframe"]["to_date"] != "": + query += f"last_updated<={state['timeframe']['to_date']} AND " if state["attributes"]["attributes"] != []: for attribute in state["attributes"]["attributes"]: @@ -160,13 +154,11 @@ def construct_query(input: str, state: Dict[str, Any]): def construct_url(input: str, state: Dict[str, Any]): base_url = "https://goat.genomehubs.org/" endpoint = state["intent"]["intent"] + "?" - suffix = ( - f'&result={state["index"]["classification"]}' - + "&summaryValues=count&taxonomy=ncbi&offset=0" - + "&fields=assembly_level%2Cassembly_span%2Cgenome_size%2C" - + "chromosome_number%2Chaploid_number&names=common_name&ranks=" - + "&includeEstimates=false&size=100" + suffix = f'&result={state["index"]["classification"]}&summaryValues=count&taxonomy=ncbi&offset=0' + suffix += ( + "&fields=assembly_level%2Cassembly_span%2Cgenome_size%2Cchromosome_number%2C" ) + suffix += "haploid_number&names=common_name&ranks=&includeEstimates=false&size=100" state["final_url"] = ( base_url + endpoint + "query=" + urllib.parse.quote(state["query"]) + suffix @@ -182,25 +174,20 @@ def identify_record(input: str, state: Dict[str, Any]): entities += f"* {entity['singular_form']}," entities += f"* {entity['plural_form']}," - query_url = ( - "https://goat.genomehubs.org/api/v2/search?query=" - + urllib.parse.quote(f"tax_name({entities})") - + "&result=taxon" - ) + query_url = f'https://goat.genomehubs.org/api/v2/search?query={urllib.parse.quote(f"tax_name({entities})")}' + query_url += "&result=taxon" response = requests.get(query_url) response_parsed = response.json() - cleaned_taxons = [] - - for res in response_parsed["results"]: - cleaned_taxons.append( - { - "taxon_id": res["result"]["taxon_id"], - "taxon_rank": res["result"]["taxon_rank"], - "scientific_name": res["result"]["scientific_name"], - "taxon_names": res["result"]["taxon_names"], - } - ) + cleaned_taxons = [ + { + "taxon_id": res["result"]["taxon_id"], + "taxon_rank": res["result"]["taxon_rank"], + "scientific_name": res["result"]["scientific_name"], + "taxon_names": res["result"]["taxon_names"], + } + for res in response_parsed["results"] + ] taxon_response = Settings.llm.complete( RECORD_PROMPT.format(query=input, results=json.dumps(cleaned_taxons, indent=4))