Skip to content

Commit

Permalink
Added code for JSON oriented model approach (#4)
Browse files Browse the repository at this point in the history
* Added code for JSON oriented model approach

* chore: Update code formatting and editor settings
Refined logic for time based queries

* refactor: Reorganize imports and update code formatting

* chore: Refactor build_index function to simplify code

* Minor changes

* Minor changes

* Removed conflicting flake8 error

* Updated model in INSTALL.md

* Resolved PR Comments
  • Loading branch information
deepnayak authored Jun 24, 2024
1 parent 99c3669 commit 7f2131f
Show file tree
Hide file tree
Showing 16 changed files with 11,152 additions and 295 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/flake8.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
- name: flake8 Lint
uses: TrueBrain/actions-flake8@v2
with:
ignore: E203,E701
ignore: E203,E701,W503,W504
max_line_length: 88
path: src
plugins: flake8-black flake8-isort flake8-quotes
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
prompts/__pycache__
rich_query_index
.DS_Store
.env
__pycache__
__pycache__
src/query_index
9 changes: 6 additions & 3 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
{
"git.ignoreLimitWarning": true,
"editor.formatOnSave": true,
"flake8.args": ["--max-line-length=88"],
"flake8.args": [
"--max-line-length=88"
],
"[python]": {
"editor.codeActionsOnSave": {
"source.organizeImports.python": "explicit"
},
"editor.defaultFormatter": "ms-python.black-formatter"
"editor.defaultFormatter": "ms-python.black-formatter",
"editor.formatOnSave": true,
}
}
}
2 changes: 1 addition & 1 deletion INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ curl -fsSL https://ollama.com/install.sh | sh
Run the Ollama application:

```bash
ollama run llama3
ollama run codellama
```

## Step 7: Start the Flask Application
Expand Down
49 changes: 0 additions & 49 deletions app.py

This file was deleted.

49 changes: 0 additions & 49 deletions prompt.py

This file was deleted.

75 changes: 0 additions & 75 deletions query_reformulation.py

This file was deleted.

70 changes: 0 additions & 70 deletions rich_queries/queryV1.json

This file was deleted.

2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[flake8]
max-line-length = 88
extend-ignore = E203,E701
extend-ignore = E203,E701,W503,W504
inline-quotes = double
93 changes: 93 additions & 0 deletions src/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json
import logging
import os
import sys
import urllib

from flask import Flask, render_template, request

from index import load_index, query_engine

app = Flask("goat_nlp")

handler = logging.StreamHandler(sys.stdout)
handler.setFormatter(
logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
)
app.logger.addHandler(handler)
app.logger.setLevel(logging.INFO)


def construct_url(json_output):
base_url = "https://goat.genomehubs.org/"
endpoint = "search?"
suffix = (
"&result=taxon&summaryValues=count&taxonomy=ncbi&offset=0"
+ "&fields=assembly_level%2Cassembly_span%2Cgenome_size%2C"
+ "chromosome_number%2Chaploid_number&names=common_name&ranks="
+ "&includeEstimates=false&size=100"
)

if json_output["intent"] == "count":
endpoint = "api/v2/count?"
elif json_output["intent"] == "record":
endpoint = "record?"

params = []

if "taxon" in json_output:
params.append(f"tax_tree(* {json_output['taxon']})")
if "rank" in json_output:
params.append(f"tax_rank({json_output['rank']})")
if "field" in json_output:
params.append(f"{json_output['field']}")
if "time_frame_query" in json_output:
params.append(f"{json_output['time_frame_query']}")
suffix = (
"&result=assembly&summaryValues=count&taxonomy=ncbi&offset=0"
+ "&fields=assembly_level%2Cassembly_span%2Cgenome_size%2C"
+ "chromosome_number%2Chaploid_number&names=common_name&ranks="
+ "&includeEstimates=false&size=100"
)

query_string = " AND ".join(params)
return base_url + endpoint + "query=" + urllib.parse.quote(query_string) + suffix


def chat_bot_rag(query):
# entity_taxon_map = fetch_related_taxons(query)
for _ in range(int(os.getenv("RETRY_COUNT", 3))):
try:
model_response = json.loads(query_engine.custom_query(query))
return {
"json_debug": json.dumps(model_response, indent=2),
"url": construct_url(model_response),
}
# return construct_url(json.loads(query_engine.custom_query(query)))
except Exception as e:
app.logger.error(f"Error: {e}")
app.logger.error("Retrying...")
model_response = json.loads(query_engine.custom_query(query))
return {
"json_debug": json.dumps(model_response, indent=2),
"url": construct_url(model_response),
}


@app.route("/")
def home():
return render_template("chat.html")


@app.route("/rebuildIndex")
def index():
load_index(force_reload=True)


@app.route("/chat", methods=["POST"])
def chat():
return chat_bot_rag(request.form["user_input"])


if __name__ == "__main__":
app.run(debug=True)
Loading

0 comments on commit 7f2131f

Please sign in to comment.