Skip to content

Commit

Permalink
Remove hardcoded values and references to gpt-3.5-turbo models (#458)
Browse files Browse the repository at this point in the history
This includes updates to the docs, the tests, the CLI, and two engines.
caufieldjh authored Sep 23, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
2 parents 8a66db6 + 16fd9dc commit 4d4d71e
Showing 10 changed files with 25 additions and 34 deletions.
2 changes: 1 addition & 1 deletion docs/custom.md
Original file line number Diff line number Diff line change
@@ -365,7 +365,7 @@ The output of this is then passed through further SPIRES iterations.

#### Text length limit

LLMs have context sizes limiting the combined length of their inputs and outputs. The `gpt-3.5-turbo` model, for example, has a 4,096 token limit (prompt + completion), while the `gpt-3.5-turbo-16k` model has a larger context of 16,384 tokens.
LLMs have context sizes limiting the combined length of their inputs and outputs.

To see the token limit for each model, use `ontogpt list-models`. The Max Tokens value will be in the fourth column.

8 changes: 4 additions & 4 deletions docs/functions.md
Original file line number Diff line number Diff line change
@@ -596,7 +596,7 @@ Options:
Examples:
```bash
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review" --get-pmc --model gpt-3.5-turbo-16k --limit 3
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review" --get-pmc --model gpt-4o --limit 3
```
```bash
@@ -770,7 +770,7 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt web-extract -t reaction.Reaction -m gpt-3.5-turbo-16k https://www.scienceofcooking.com/maillard_reaction.htm
ontogpt web-extract -t reaction.Reaction -m gpt-4o https://www.scienceofcooking.com/maillard_reaction.htm
```
### wikipedia-extract
@@ -784,7 +784,7 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt wikipedia-extract -t mendelian_disease.MendelianDisease -m gpt-3.5-turbo-16k "Cartilage–hair hypoplasia"
ontogpt wikipedia-extract -t mendelian_disease.MendelianDisease -m gpt-4o "Cartilage–hair hypoplasia"
```
### wikipedia-search
@@ -798,5 +798,5 @@ Even relatively short pages may exceed a model's context size, so larger context
Example:
```bash
ontogpt wikipedia-search -t biological_process -m gpt-3.5-turbo-16k "digestion"
ontogpt wikipedia-search -t biological_process -m gpt-4o "digestion"
```
3 changes: 1 addition & 2 deletions src/ontogpt/cli.py
Original file line number Diff line number Diff line change
@@ -714,7 +714,7 @@ def pubmed_annotate(
Example:
ontogpt pubmed-annotate -t phenotype "Takotsubo Cardiomyopathy: A Brief Review"
--get-pmc --model gpt-3.5-turbo-16k --limit 3
--get-pmc --model gpt-4o --limit 3
"""
if not model:
model = DEFAULT_MODEL
@@ -1174,7 +1174,6 @@ def convert(
@model_option
@output_option_txt
@temperature_option
@cut_input_text_option
@api_base_option
@api_version_option
@model_provider_option
1 change: 0 additions & 1 deletion src/ontogpt/engines/halo_engine.py
Original file line number Diff line number Diff line change
@@ -57,7 +57,6 @@ def text(self) -> str:
class HALOEngine(KnowledgeEngine):
"""Engine for Hallucinating Latent Ontologies."""

engine: str = "gpt-3.5-turbo"
ontology: Ontology = None
traverse_slots: List[FIELD] = field(
default_factory=lambda: ["subtypes", "parts", "subclass_of", "part_of"]
4 changes: 0 additions & 4 deletions src/ontogpt/engines/spires_engine.py
Original file line number Diff line number Diff line change
@@ -286,10 +286,6 @@ def map_terms(
"""
Map the given terms to the given ontology.
EXPERIMENTAL
currently GPT-3 does not do so well with this task.
:param terms:
:param ontology:
:return:
2 changes: 0 additions & 2 deletions src/ontogpt/engines/synonym_engine.py
Original file line number Diff line number Diff line change
@@ -9,8 +9,6 @@
class SynonymEngine(KnowledgeEngine):
"""Engine for generating synonyms."""

engine: str = "gpt-3.5-turbo-instruct"

def synonyms(self, named_entity: str, domain: str) -> List[str]:
"""Get synonyms for a given text."""
prompt = f"List the example formal scientific\
1 change: 0 additions & 1 deletion src/ontogpt/webapp/main.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,6 @@
"gpt-4o",
"gpt-4",
"gpt-4-turbo",
"gpt-3.5-turbo",
"ollama/llama2",
"ollama/llama3",
"ollama/orca-mini",
30 changes: 15 additions & 15 deletions tests/input/prompts/prompts.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
default_engine: gpt-3.5-turbo
default_engine: gpt-4o
prompts:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
@@ -15,7 +15,7 @@ prompts:
expected:
- "predicate: INDUCES"
- "object: .*nystagmus"
- engine: gpt-3.5-turbo
- engine: gpt-4o
pre_prompt: |
## Instructions:
## Add an additional element to the YAML below, which is for elements
@@ -99,31 +99,31 @@ prompts:
equivalent_to: Transport and utilizes some Train
- name: CarEngine
context: IndustrialOntology
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; part of; equivalent to.
Question: The relationship between the concepts Car and Vehicle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; supertype of; part of; has part; equivalent to.
Question: The relationship between the concepts Car and Vehicle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Answer the following question where the answer is one of: subtype of; supertype of; part of; has part; equivalent to.
Question: The relationship between the concepts Tire and Bicycle is:
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all the parts of a nucleus as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all the things a nucleus is a part of, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all parent concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
In the context of cell biology, list all child concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
triples: <A semi-colon separated list of chemical to disease relationships, where the relationship is either INDUCES or TREATS.
@@ -134,7 +134,7 @@ prompts:
In vivo evidences suggesting the role of oxidative stress in pathogenesis of vancomycin-induced nephrotoxicity:
protection by erdosteine
In the context of cell biology, list all child concepts for the concept of nucleus, as a semi-colon separated list.
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
@@ -145,7 +145,7 @@ prompts:
===
#expected:
# - "Veralipride INDUCES Parkinsonism"
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
@@ -154,7 +154,7 @@ prompts:
Text:
Kaliuretic effect of L-dopa treatment in parkinsonian patients.
===
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
From the text below, extract the following entities in the following format:
@@ -163,7 +163,7 @@ prompts:
Text:
Electrocardiographic evidence of myocardial injury in psychiatrically hospitalized cocaine abusers.
===
- engine: gpt-3.5-turbo
- engine: gpt-4o
prompt: |
Generate a list of exact synonyms for the chemical concept "heavy metal", as a semi-colon separated list.
Only include concepts with the identical meaning. Do not include more specific concepts or broader concepts.
6 changes: 3 additions & 3 deletions tests/integration/test_clients/test_llmclient.py
Original file line number Diff line number Diff line change
@@ -153,7 +153,7 @@ def setUp(self) -> None:
def test_all_prompts(self):
"""Test all prompts."""
prompt_doc = yaml.safe_load(open(PROMPTS_FILE))
default_engine = prompt_doc.get("default_engine", "gpt-3.5-turbo-instruct")
default_engine = prompt_doc.get("default_engine", "gpt-4o")
for prompt in prompt_doc["prompts"]:
prompt_text = prompt["prompt"]
if not isinstance(prompt_text, str):
@@ -199,15 +199,15 @@ def test_drug_mech_db(self):

def test_code_completion_generalization(self):
"""Tests structured responses."""
engine = "gpt-3.5-turbo"
engine = "gpt-4o"
client = LLMClient(model=engine)
print(len(CODE_PROMPT_GENERALIZATION))
ann = client.complete(CODE_PROMPT_GENERALIZATION)
print(ann)

def test_extract_via_code_completion(self):
"""Tests structured responses."""
engine = "gpt-3.5-turbo"
engine = "gpt-4o"
client = LLMClient(model=engine)
ann = client.complete(CODE_PROMPT_EXTRACT)
print(ann)
2 changes: 1 addition & 1 deletion tests/integration/test_evaluation/test_eval_drugmechdb.py
Original file line number Diff line number Diff line change
@@ -145,7 +145,7 @@ def test_training_set(self):
evaluator = self.engine
ke = evaluator.extractor
training_set = list(evaluator.create_training_set(100))
t = dict(base_model="gpt-3.5-turbo-instruct", template=ke.template, examples=training_set)
t = dict(base_model="gpt-4o", template=ke.template, examples=training_set)
with open(TRAINING_OUT, "w") as f:
yaml.dump(t, f)
# print(yaml.dump(training_set))

0 comments on commit 4d4d71e

Please sign in to comment.