diff --git a/.gitignore b/.gitignore index b1672e1..515c116 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.idea/ node_modules coverage .DS_Store diff --git a/use_cases/kg_chat/README.md b/use_cases/kg_chat/README.md index 37f1980..c94d52b 100644 --- a/use_cases/kg_chat/README.md +++ b/use_cases/kg_chat/README.md @@ -8,8 +8,9 @@ If you are not using a Sandbox instance, make sure you have APOC and GDS librari - Create `.env` file - Populate Neo4j and OpenAI credentials in the `.env` as shown in the `.env.example` + - If you do have an OpenAI API key, you can get one through an [OpenAI Platform Account](https://platform.openai.com/). - Start the project by running `docker-compose up` -- Open your favorite internet browser at TBD +- Open your favorite internet browser at [localhost:4173/use-cases/chat-with-kg/index.html](http://localhost:4173/use-cases/chat-with-kg/index.html) ## API diff --git a/use_cases/kg_chat/backend/src/main.py b/use_cases/kg_chat/backend/src/main.py index cdf3fe4..25a83d6 100644 --- a/use_cases/kg_chat/backend/src/main.py +++ b/use_cases/kg_chat/backend/src/main.py @@ -6,7 +6,7 @@ project_root = current_file.parents[4] sys.path.append(str(project_root)) -from fastapi import FastAPI, WebSocket, WebSocketDisconnect +from fastapi import FastAPI, HTTPException, WebSocket, WebSocketDisconnect from fastapi.middleware.cors import CORSMiddleware from use_cases.shared.components.text2cypher import Text2Cypher @@ -20,23 +20,42 @@ class Payload(BaseModel): question: str +cypher = {"arxiv": """ + CREATE CONSTRAINT IF NOT EXISTS FOR (p:Paper) REQUIRE p.id IS UNIQUE; + LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/arxiv/arxiv.csv" AS row + MERGE (p:Paper {id: row.paper_id}) + SET p += apoc.map.clean(row, ["paper_id", "authors"], []) + WITH p, row.authors AS authors + UNWIND apoc.convert.fromJsonList(authors) as author + MERGE (a:Author {name:author}) + MERGE (p)-[:HAS_AUTHOR]->(a); + LOAD CSV WITH HEADERS FROM "https://raw.githubusercontent.com/tomasonjo/blog-datasets/main/arxiv/arxiv_embedding.csv" AS row + MATCH (p:Paper {id: row.paper_id}) + SET p.embedding = apoc.convert.fromJsonList(row.embedding);"""} + # Maximum number of records used in the context HARD_LIMIT_CONTEXT_RECORDS = 10 -neo4j_connection = Neo4jDatabase( +neo4j_read_connection = Neo4jDatabase( host=os.environ.get("NEO4J_URL", "bolt://neo4j:7687"), user=os.environ.get("NEO4J_USER", "neo4j"), password=os.environ.get("NEO4J_PASS", "pleaseletmein"), ) +neo4j_write_connection = Neo4jDatabase( + host=os.environ.get("NEO4J_URL", "bolt://neo4j:7687"), + user=os.environ.get("NEO4J_USER", "neo4j"), + password=os.environ.get("NEO4J_PASS", "pleaseletmein"), + read_only=False +) # Initialize LLM modules openai_api_key = os.environ.get("OPENAI_API_KEY", "") text2cypher = Text2Cypher( - database=neo4j_connection, llm=OpenAIChat( + database=neo4j_read_connection, llm=OpenAIChat( openai_api_key=openai_api_key, model_name="gpt-3.5-turbo-0613"), - cypher_examples="" + cypher_examples="" ) summarize_results = SummarizeCypherResult(llm=OpenAIChat( @@ -128,6 +147,35 @@ async def onToken(token): except WebSocketDisconnect: print("disconnected") + +@app.get("/load") +async def root(dataset: str): + """ + Constructs appropriate indexes and import relevant dataset into Neo4j + """ + try: + queries = cypher[dataset].split(";") + for q in queries: + if q: + res = neo4j_write_connection.query(q) + print(res) + neo4j_read_connection.refresh_schema() + return {"message": "import successful"} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +@app.get("/init") +async def root(): + """ + Checks if the database is empty + """ + try: + return {"message": neo4j_read_connection.check_if_empty()} + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + if __name__ == "__main__": import uvicorn diff --git a/use_cases/kg_chat/docker-compose.yml b/use_cases/kg_chat/docker-compose.yml index 17e0769..de87efc 100644 --- a/use_cases/kg_chat/docker-compose.yml +++ b/use_cases/kg_chat/docker-compose.yml @@ -1,7 +1,7 @@ version: "3.7" services: neo4j: - image: neo4j:5.7 + image: neo4j:5.9 restart: always hostname: kgchat-neo4j container_name: kgchat-neo4j diff --git a/use_cases/shared/driver/neo4j.py b/use_cases/shared/driver/neo4j.py index 61715d5..ed8991e 100644 --- a/use_cases/shared/driver/neo4j.py +++ b/use_cases/shared/driver/neo4j.py @@ -4,7 +4,6 @@ from logger import logger - node_properties_query = """ CALL apoc.meta.data() YIELD label, other, elementType, type, property @@ -78,9 +77,9 @@ def _execute_read_only_query(tx, cypher_query: str, params: Optional[Dict] = {}) return [r.data() for r in result] def query( - self, - cypher_query: str, - params: Optional[Dict] = {} + self, + cypher_query: str, + params: Optional[Dict] = {} ) -> List[Dict[str, Any]]: with self._driver.session() as session: try: @@ -100,7 +99,8 @@ def query( except exceptions.ClientError as e: # Catch access mode errors if e.code == "Neo.ClientError.Statement.AccessMode": - return [{"code": "error", "message": "Couldn't execute the query due to the read only access to Neo4j"}] + return [ + {"code": "error", "message": "Couldn't execute the query due to the read only access to Neo4j"}] else: return [{"code": "error", "message": e}]