diff --git a/tutorials/llm_clinical_trials/configs/ner_dolly.cfg b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg new file mode 100644 index 00000000..c8c53325 --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_dolly.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Dolly.v1" +name = "dolly-v2-3b" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = "Drug,Dose" diff --git a/tutorials/llm_clinical_trials/configs/ner_falcon.cfg b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg new file mode 100644 index 00000000..89bb970b --- /dev/null +++ b/tutorials/llm_clinical_trials/configs/ner_falcon.cfg @@ -0,0 +1,17 @@ +[nlp] +lang = "en" +pipeline = ["llm"] +batch_size = 128 + +[components] + +[components.llm] +factory = "llm" + +[components.llm.model] +@llm_models = "spacy.Falcon.v1" +name = "falcon-7b-instruct" + +[components.llm.task] +@llm_tasks = "spacy.NER.v2" +labels = "Drug,Dose" diff --git a/tutorials/llm_clinical_trials/falcon_requirements.txt b/tutorials/llm_clinical_trials/falcon_requirements.txt new file mode 100644 index 00000000..6bbb8493 --- /dev/null +++ b/tutorials/llm_clinical_trials/falcon_requirements.txt @@ -0,0 +1,4 @@ +cupy-cuda117 +torch==1.13.1+cu117 -f https://download.pytorch.org/whl/torch_stable.html +transformers +einops \ No newline at end of file diff --git a/tutorials/llm_clinical_trials/project.yml b/tutorials/llm_clinical_trials/project.yml index 1bc66751..0f33a797 100644 --- a/tutorials/llm_clinical_trials/project.yml +++ b/tutorials/llm_clinical_trials/project.yml @@ -2,7 +2,7 @@ title: 'Clinical trial results extraction with LLMs' description: "Using an LLM in a spaCy pipeline to extract patient groups, treatments and outcomes in clinical trials." vars: - ner_config: "ner_openai.cfg" + ner_config: "ner_dolly.cfg" # "ner_falcon.cfg" # "ner_openai.cfg" trial_config: "trial_openai.cfg" pmid: 27144689 diff --git a/tutorials/llm_clinical_trials/scripts/visualise_entities.py b/tutorials/llm_clinical_trials/scripts/visualise_entities.py index 3c504156..da8c0653 100644 --- a/tutorials/llm_clinical_trials/scripts/visualise_entities.py +++ b/tutorials/llm_clinical_trials/scripts/visualise_entities.py @@ -13,8 +13,12 @@ def visualise_entities(pmid: int, config_path: Path, verbose: bool = False): text = read_trial(pmid, verbose=verbose) nlp = assemble(config_path) doc = nlp(text) - options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}} - displacy.serve(doc, style="ent", options=options) + # options = {"ents": ["Drug", "Dose"], "colors": {"Drug": "pink", "Dose": "orange"}} + ents = list(doc.ents) + print("ents", len(ents)) + for ent in ents: + print(ent.text, ent.label_) + #displacy.serve(doc, style="ent", options=options) if __name__ == "__main__":