Skip to content

Commit

Permalink
added pqa literature search tool
Browse files Browse the repository at this point in the history
  • Loading branch information
SamCox822 committed Feb 21, 2024
1 parent 8a19fed commit f73a662
Show file tree
Hide file tree
Showing 12 changed files with 98 additions and 56 deletions.
3 changes: 0 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,5 @@
# OpenAI API Key
OPENAI_API_KEY=YOUR_OPENAI_API_KEY_GOES_HERE # pragma: allowlist secret

# PQA API Key
PQA_API_KEY=YOUR_PQA_API_KEY_GOES_HERE # pragma: allowlist secret

# Serp API key
SERP_API_KEY=YOUR_SERP_API_KEY_GOES_HERE # pragma: allowlist secret
4 changes: 2 additions & 2 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@ jobs:

steps:
- uses: actions/checkout@v2
- name: Set up Python "3.9"
- name: Set up Python "3.11"
uses: actions/setup-python@v2
with:
python-version: "3.9"
python-version: "3.11"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,5 @@ jobs:
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
SEMANTIC_SCHOLAR_API_KEY: ${{ secrets.SEMANTIC_SCHOLAR_API_KEY }}
PQA_API_KEY : ${{ secrets.PQA_API_TOKEN }}
run: |
pytest -m "not skip" tests
3 changes: 0 additions & 3 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,5 @@
# Rule for detecting OpenAI API keys
OpenAI API Key: \b[secrets]{3}_[a-zA-Z0-9]{32}\b

# Rule for detecting pqa API keys
PQA API Key: "pqa[a-zA-Z0-9-._]+"

# Rule for detecting serp API keys
# Serp API Key: "[a-zA-Z0-9]{64}"
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ To use the OpenMM features in the agent, please set up a conda environment, foll
- Create conda environment: `conda env create -n mdagent -f environment.yaml`
- Activate your environment: `conda activate mdagent`

If you already have a conda environment, you can install the necessary dependencies with the following steps.
- Install the necessary conda dependencies: `conda install -c conda-forge openmm pdbfixer mdanalysis`
If you already have a conda environment, you can install, pdbfixer, a necessary dependency with the following steps.
- Install the necessary conda dependencies: `conda install -c conda-forge pdbfixer`


## Installation
Expand Down
1 change: 1 addition & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pre-commit
pytest
pytest-mock
83 changes: 63 additions & 20 deletions mdagent/tools/base_tools/util_tools/search_tools.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,69 @@
import pqapi
from langchain.tools import BaseTool
import os
import re
from langchain.base_language import BaseLanguageModel
import langchain
import paperqa
import paperscraper
from pypdf.errors import PdfReadError


class Scholar2ResultLLM(BaseTool):
name = "LiteratureSearch"
description = """Input a specific question,
returns an answer from literature search."""
def paper_scraper(search:str, pdir:str="query") -> dict:
try:
return paperscraper.search_papers(search, pdir=pdir)
except KeyError:
return {}

def paper_search(llm, query):
prompt = langchain.prompts.PromptTemplate(
input_variables=["question"],
template="""
I would like to find scholarly papers to answer
this question: {question}.
'A search query that would bring up papers that can answer
this question would be: '""",)

query_chain = langchain.chains.llm.LLMChain(llm=llm, prompt=prompt)
if not os.path.isdir("./query"): #todo: move to ckpt
os.mkdir("query/")

pqa_key: str = ""
search = query_chain.run(query)
print("\nSearch:", search)
papers = paper_scraper(search, pdir=f"query/{re.sub(' ', '', search)}")
return papers

def __init__(self, pqa_key: str):
super().__init__()
self.pqa_key = pqa_key

def _run(self, question: str) -> str:
"""Use the tool"""
def scholar2result_llm(llm, query):
"""Useful to answer questions that require
technical knowledge. Ask a specific question."""
papers = paper_search(llm, query)
if len(papers) == 0:
return "Not enough papers found"
docs = paperqa.Docs(llm=llm)
not_loaded = 0
for path, data in papers.items():
try:
response = pqapi.agent_query("default", question)
return response.answer
except Exception:
return "Literature search failed."

async def _arun(self, question: str) -> str:
"""Use the tool asynchronously"""
raise NotImplementedError
docs.add(path, data["citation"])
except (ValueError, FileNotFoundError, PdfReadError):
not_loaded += 1

print(f"\nFound {len(papers.items())} papers but couldn't load {not_loaded}")
return docs.query(query).formatted_answer


class Scholar2ResultLLM:
name = "Literature Search"
description = (
"Useful to answer questions that require technical ",
"knowledge. Ask a specific question.",
)
llm: BaseLanguageModel

def __init__(self, llm):
self.llm = llm

def _run(self, query) -> str:
return scholar2result_llm(self.llm, query)

async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async")
10 changes: 2 additions & 8 deletions mdagent/tools/maketools.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
SimulationOutputFigures,
SmallMolPDB,
VisualizeProtein,
Scholar2ResultLLM,
)
from .subagent_tools import RetryExecuteSkill, SkillRetrieval, WorkflowPlan

Expand Down Expand Up @@ -78,6 +79,7 @@ def make_all_tools(

# add base tools
base_tools = [
Scholar2ResultLLM(llm=llm),
CleaningToolFunction(path_registry=path_instance),
CheckDirectoryFiles(),
ListRegistryPaths(path_registry=path_instance),
Expand Down Expand Up @@ -113,14 +115,6 @@ def make_all_tools(
learned_tools = get_learned_tools(subagent_settings.ckpt_dir)

all_tools += base_tools + subagents_tools + learned_tools

# add other tools depending on api keys
os.getenv("SERP_API_KEY")
pqa_key = os.getenv("PQA_API_KEY")
# if serp_key:
# all_tools.append(SerpGitTool(serp_key)) # github issues search
if pqa_key:
all_tools.append(Scholar2ResultLLM(pqa_key)) # literature search
return all_tools


Expand Down
9 changes: 4 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,22 +17,21 @@
license="MIT",
packages=find_packages(),
install_requires=[
"paper-scraper @ git+https://github.com/blackadad/paper-scraper.git",
"chromadb==0.3.29",
"google-search-results",
"langchain==0.0.336",
"langchain_experimental",
"matplotlib",
"nbformat",
"openai",
"paper-qa",
"python-dotenv",
"pqapi",
"requests",
"rmrkl",
"tiktoken",
"rdkit",
"streamlit",
"paper-qa",
"openmm",
"MDAnalysis",
"paper-scraper @ git+https://github.com/blackadad/paper-scraper.git",
],
test_suite="tests",
long_description=long_description,
Expand Down
7 changes: 1 addition & 6 deletions tests/test_sims_and_clean.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@

import pytest

from mdagent.tools.base_tools import (
CleaningTools,
SimulationFunctions,
)
from mdagent.tools.base_tools import CleaningTools, SimulationFunctions
from mdagent.tools.base_tools.preprocess_tools.pdb_tools import MolPDB, PackMolTool
from mdagent.utils import PathRegistry

Expand Down Expand Up @@ -58,7 +55,6 @@ def packmol(get_registry):
return PackMolTool(get_registry)



def test_add_hydrogens_and_remove_water(path_to_cif, cleaning_fxns, get_registry):
result = cleaning_fxns._add_hydrogens_and_remove_water(path_to_cif, get_registry)
assert "Cleaned File" in result # just want to make sur the function ran
Expand Down Expand Up @@ -183,4 +179,3 @@ def test_packmol_download_only_once(packmol):
assert water_time == water_time_after
# Clean up
os.remove("files/pdb/water.pdb")

28 changes: 22 additions & 6 deletions tests/test_tools.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import os
import warnings
from unittest.mock import MagicMock, mock_open, patch

from langchain.chat_models import ChatOpenAI
import pytest

from mdagent.tools.base_tools import (
VisFunctions,
get_pdb,
)
from mdagent.tools.base_tools import Scholar2ResultLLM
from mdagent.tools.base_tools import VisFunctions, get_pdb
from mdagent.tools.base_tools.analysis_tools.plot_tools import plot_data, process_csv
from mdagent.utils import PathRegistry

Expand Down Expand Up @@ -116,3 +113,22 @@ def test_getpdb(fibronectin, get_registry):
name, _ = get_pdb(fibronectin, get_registry)
assert name.endswith(".pdb")

@pytest.fixture
def questions():
qs = [
"What are the effects of norhalichondrin B in mammals?",
]
return qs[0]

@pytest.mark.skip(reason="This requires an API call")
def test_litsearch(questions):
llm = ChatOpenAI()

searchtool = Scholar2ResultLLM(llm=llm)
for q in questions:
ans = searchtool._run(q)
assert isinstance(ans, str)
assert len(ans) > 0
#then if query folder exists one step back, delete it
if os.path.exists("../query"):
os.rmdir("../query")
1 change: 1 addition & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ def test_map_path():
# Check the result message
assert result == "Path successfully mapped to name: new_name"


mocked_files = {"files/solvents": ["water.pdb"]}


Expand Down

0 comments on commit f73a662

Please sign in to comment.