Feature/simplify sciphi (#107)

* Commit simplifying changes * Commit simplifying changes * Commit simplifying changes * Commit simplifying changes * Add back openai interface * revive select providers * cleanup sciphi-vllm-openai interlacing * prep * tweaks * nit * cleanup further * check in progress * revive data augmenter * more fixes
SciPhi-AI · Oct 30, 2023 · 68d3579 · 68d3579
1 parent 5362d3c
commit 68d3579
Show file tree

Hide file tree

Showing 34 changed files with 613 additions and 889 deletions.
diff --git a/.env.example b/.env.example
@@ -1,8 +1,10 @@
 # LLM Providers
-OPENAI_API_KEY=your_openai_key
-ANTHROPIC_API_KEY=your_anthropic_key
-VLLM_API_KEY=your_vllm_token
+## Fill where necessary.
+OPENAI_API_KEY=your_openai_api_key
+ANTHROPIC_API_KEY=your_anthropic_api_key
 HF_TOKEN=your_huggingface_token
+SCIPHI_API_KEY=your_sciphi_api_key
+VLLM_API_KEY=your_vllm_api_key
 # RAG Setttings
-RAG_API_BASE=your_rag_server_base_url
+RAG_API_BASE=your_rag_api_base_url
 RAG_API_KEY=your_rag_server_key
diff --git a/.flake8 b/.flake8
@@ -1,3 +1,3 @@
 [flake8]
-exclude = playground/*,sciphi/deprecated/**
+exclude = playground/*,sciphi/deprecated/**,dump/*
 ignore = E501, W503, E203, F541, W293, W291, E266
diff --git a/README.md b/README.md
@@ -15,8 +15,6 @@ With SciPhi, users can:
 
 ## Fast Setup
 
-### Bare Minimum
-
 ```bash
 pip install sciphi
 ```
@@ -31,8 +29,6 @@ pip install 'sciphi[all_with_extras]'
 - **All (no vLLM)**: `'sciphi[all]'`
 - **Anthropic**: `'sciphi[anthropic_support]'`
 - **HF (includes Torch)**: `'sciphi[hf_support]'`
-- **Llama-CPP**: `'sciphi[llama_cpp_support]'`
-- **Llama-Index**: `'sciphi[llama_index_support]'`
 - **VLLM (includes Torch)**: `'sciphi[vllm_support]'`
 
 ### **Setup Your Environment**
@@ -41,15 +37,17 @@ Navigate to your working directory and use a text editor to adjust the `.env` fi
 
 ```bash
 # Proprietary Providers
-OPENAI_API_KEY=your_openai_key
-ANTHROPIC_API_KEY=your_anthropic_key
+OPENAI_API_KEY=your_openai_api_key
+ANTHROPIC_API_KEY=your_anthropic_api_key
 # Open Source Providers
 HF_TOKEN=your_huggingface_token
 # vLLM
-VLLM_API_KEY=your_vllm_token
+VLLM_API_KEY=your_vllm_api_key # for remote vLLM use.
+# SciPhi
+SCIPHI_API_KEY=your_sciphi_api_key # for remote vLLM use.
 # RAG Provider Settings
-RAG_API_BASE=your_rag_server_base_url
-RAG_API_KEY=your_rag_server_key
+RAG_API_KEY=your_rag_server_api_key
+RAG_API_BASE=your_rag_api_base_url
 ```
 
 After entering your settings, ensure you save and exit the file.
@@ -104,7 +102,7 @@ This is an effort to democratize access to top-tier textbooks. This can readily
 2. **Textbook Generation**:
 
    ```bash
-   python -m sciphi.scripts.textbook_generator run --toc_dir=sciphi/data/sample/table_of_contents --rag-enabled=False
+   python -m sciphi.scripts.textbook_generator run --toc_dir=sciphi/data/sample/table_of_contents --rag-enabled=False --filter_existing_books=False
    ```
 
    Replace `dry_run` in step 1 with `run` to generate one textbook for each table of contents in the target directory. See a [sample textbook here.](sciphi/data/sample/textbooks/Aerodynamics_of_Viscous_Fluids.md)
@@ -114,7 +112,7 @@ This is an effort to democratize access to top-tier textbooks. This can readily
    Prepare your table of contents and save it into `$PWD/toc/test.yaml`. Then, run the following command:
 
    ```bash
-   python -m sciphi.scripts.generate_textbook run --toc_dir=toc --output_dir=books --data_dir=$PWD
+   python -m sciphi.scripts.textbook_generator run --toc_dir=toc --output_dir=books --data_dir=$PWD
    ```
 
    For help with formatting your table of contents, [see here](https://github.com/SciPhi-AI/sciphi/blob/main/sciphi/data/library_of_phi/table_of_contents/Aerodynamics_of_Viscous_Fluids.yaml).
@@ -162,7 +160,7 @@ if __name__ == "__main__":
       temperature=llm_temperature,
       top_k=llm_top_k,
       # Used for re-routing requests to a remote vLLM server
-      server_base=kwargs.get("llm_server_base", None),
+      api_base=kwargs.get("llm_api_base", None),
   )
 
   rag_interface = RAGInterfaceManager.get_interface_from_args(
@@ -178,7 +176,7 @@ if __name__ == "__main__":
   # ... Continue ...
 ```
 
-Supported LLM providers include OpenAI, Anthropic, HuggingFace, and vLLM. For RAG database access, configure your own or use the SciPhi **gigaRAG API**.
+Supported LLM providers include OpenAI, Anthropic, HuggingFace, and vLLM. For RAG database access, configure your own or use the SciPhi **World Databasef API**.
 
 ### Setting Up Locally
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -7,7 +7,7 @@ write_to = "sciphi/_version.py"
 
 [tool.poetry]
 name = "sciphi"
-version = "0.1.5"
+version = "0.1.6"
 description = "SciPhi: A Framework for LLM Powered Data."
 authors = ["Owen Colegrove <[email protected]>"]
 license = "Apache-2.0"
@@ -37,20 +37,13 @@ anthropic = { version = "^0.3.10", optional = true }
 accelerate = { version = "^0.23.0", optional = true }
 datasets = { version = "^2.14.5", optional = true }
 transformers = { version = "^4.33.1", optional = true }
-# llama-cpp 
-llama-cpp-python = { version = "^0.2.11", optional = true }
-# llama-index
-llama-index = { version = "^0.8.29.post1", optional = true }
 # vllm
-# accelerate = { version = "^0.23.0", optional = true } ## Defined above in 'hf'
 vllm = { version = "0.2.0", optional = true }
 blingfire = "^0.1.8"
 
 [tool.poetry.extras]
 anthropic_support = ["anthropic"]
 hf_support = ["accelerate", "datasets", "torch", "transformers"]
-llama_cpp_support = ["llama-cpp-python"]
-llama_index_support = ["llama-index"]
 vllm_support = ["accelerate", "torch", "vllm"]
 
 all = [
@@ -72,8 +65,6 @@ all_with_extras = [
     "torch",
     "transformers",
     # More Extras
-    "llama-index",
-    "llama-cpp-python",
     "vllm",
 ]
 # To export dependencies to pip, use:

diff --git a/sciphi/config/generation_settings/textbook_generation_settings.yaml b/sciphi/config/generation_settings/textbook_generation_settings.yaml
diff --git a/sciphi/eval/rag/science_multiple_choice/rag_science_evaluator.py b/sciphi/eval/rag/science_multiple_choice/rag_science_evaluator.py
@@ -75,7 +75,11 @@ def __init__(
     def initialize_prompts(self):
         contexts = (
             self.rag_interface.get_contexts(
-                self.evals[ScienceMultipleChoiceEvaluator.PROMPT_FIELD]
+                list(
+                    self.evals[
+                        ScienceMultipleChoiceEvaluator.PROMPT_FIELD
+                    ].values
+                )
             )
             if self.rag_interface
             else [ScienceMultipleChoiceEvaluator.RAG_DISABLED_RESPONSE]

diff --git a/sciphi/interface/__init__.py b/sciphi/interface/__init__.py
@@ -5,13 +5,15 @@
     RAGProviderConfig,
 )
 from sciphi.interface.llm.anthropic_interface import AnthropicLLMInterface
-from sciphi.interface.llm.hugging_face_interface import HuggingFaceLLMInterface
-from sciphi.interface.llm.litellm_interface import LiteLLMInterface
-from sciphi.interface.llm.llama_index_interface import LlamaIndexInterface
-from sciphi.interface.llm.llamacpp_interface import LlamaCPPInterface
+
+# from sciphi.interface.llm.hugging_face_interface import HuggingFaceLLMInterface
+# from sciphi.interface.llm.litellm_interface import LiteLLMInterface
+# from sciphi.interface.llm.llama_index_interface import LlamaIndexInterface
+# from sciphi.interface.llm.llamacpp_interface import LlamaCPPInterface
 from sciphi.interface.llm.openai_interface import OpenAILLMInterface
 from sciphi.interface.llm.sciphi_interface import SciPhiInterface
-from sciphi.interface.llm.vllm_interface import vLLMInterface
+
+# from sciphi.interface.llm.vllm_interface import vLLMInterface
 from sciphi.interface.llm_interface_manager import LLMInterfaceManager
 from sciphi.interface.rag.sciphi_wiki import (
     SciPhiWikiRAGConfig,
@@ -26,12 +28,12 @@
     "LLMInterface",
     # Concrete LLM Interfaces
     "AnthropicLLMInterface",
-    "HuggingFaceLLMInterface",
-    "LlamaIndexInterface",
+    # "HuggingFaceLLMInterface",
+    # "LlamaIndexInterface",
     "OpenAILLMInterface",
-    "vLLMInterface",
-    "LiteLLMInterface",
-    "LlamaCPPInterface",
+    # "vLLMInterface",
+    # "LiteLLMInterface",
+    # "LlamaCPPInterface",
     "SciPhiInterface",
     # RAG
     "RAGInterfaceManager",

diff --git a/sciphi/interface/base.py b/sciphi/interface/base.py
@@ -4,22 +4,22 @@
 from typing import Any, List, Type
 
 from sciphi.core import LLMProviderName, RAGProviderName
-from sciphi.llm import LLM, LLMConfig, ModelName
+from sciphi.llm import LLM, GenerationConfig, LLMConfig, ModelName
 
 
 @dataclass
 class LLMProviderConfig:
     """A dataclass to hold the configuration for a provider."""
 
-    llm_provider_name: LLMProviderName
+    provider_name: LLMProviderName
     models: List[ModelName]
     llm_class: Type["LLMInterface"]
 
 
 class LLMInterface(ABC):
     """An abstract class to provide a common interface for LLM providers."""
 
-    llm_provider_name: LLMProviderName
+    provider_name: LLMProviderName
     supported_models: list[ModelName] = []
 
     def __init__(
@@ -35,29 +35,36 @@ def model(self) -> LLM:
         pass
 
     @abstractmethod
-    def get_completion(self, prompt: str) -> str:
+    def get_completion(
+        self, prompt: str, generation_config: GenerationConfig
+    ) -> str:
         """Abstract method to get a completion from the provider."""
         pass
 
-    def get_batch_completion(self, prompts: List[str]) -> List[str]:
+    def get_batch_completion(
+        self, prompts: List[str], generation_config: GenerationConfig
+    ) -> List[str]:
         """Get a batch of completions from the provider."""
-        return [self.get_completion(prompt) for prompt in prompts]
+        return [
+            self.get_completion(prompt, generation_config)
+            for prompt in prompts
+        ]
 
 
 @dataclass
 class RAGProviderConfig(ABC):
     """An abstract class to hold the configuration for a RAG provider."""
 
-    rag_provider_name: RAGProviderName
-    base: str
+    provider_name: RAGProviderName
+    api_base: str
     api_key: str
     max_context: int = 2_048
 
 
 class RAGInterface(ABC):
     """An abstract class to provide a common interface for RAG providers."""
 
-    rag_provider_name: RAGProviderName
+    provider_name: RAGProviderName
     RAG_DISABLED_MESSAGE: str = "Not Available."
 
     def __init__(

diff --git a/sciphi/interface/llm/anthropic_interface.py b/sciphi/interface/llm/anthropic_interface.py
@@ -1,31 +1,40 @@
 """A module for interfacing with the Anthropic API"""
 from sciphi.interface.base import LLMInterface, LLMProviderName
 from sciphi.interface.llm_interface_manager import llm_interface
-from sciphi.llm import AnthropicConfig, AnthropicLLM, ModelName
+from sciphi.llm import (
+    AnthropicConfig,
+    AnthropicLLM,
+    GenerationConfig,
+    ModelName,
+)
 
 
 @llm_interface
 class AnthropicLLMInterface(LLMInterface):
     """A class to interface with the Anthropic API."""
 
-    llm_provider_name = LLMProviderName.ANTHROPIC
+    provider_name = LLMProviderName.ANTHROPIC
     supported_models = [
         ModelName.CLAUDE_INSTANT_1,
         ModelName.CLAUDE_2,
     ]
 
     def __init__(
         self,
-        config: AnthropicConfig = AnthropicConfig(),
+        config: AnthropicConfig,
+        *args,
+        **kwargs,
     ) -> None:
         super()
         self._model = AnthropicLLM(
             config,
         )
 
-    def get_completion(self, prompt: str) -> str:
+    def get_completion(
+        self, prompt: str, generation_config: GenerationConfig
+    ) -> str:
         """Get a completion from the remote Anthropic provider."""
-        return self.model.get_instruct_completion(prompt)
+        return self.model.get_instruct_completion(prompt, generation_config)
 
     @property
     def model(self) -> AnthropicLLM:

diff --git a/sciphi/interface/llm/hugging_face_interface.py b/sciphi/interface/llm/hugging_face_interface.py
@@ -12,11 +12,11 @@
 class HuggingFaceLLMInterface(LLMInterface):
     """A class to interface with local HuggingFace models."""
 
-    llm_provider_name = LLMProviderName.HUGGING_FACE
+    provider_name = LLMProviderName.HUGGING_FACE
 
     def __init__(
         self,
-        config: HuggingFaceConfig = HuggingFaceConfig(),
+        config: HuggingFaceConfig,
     ) -> None:
         self._model = HuggingFaceLLM(config)
 

diff --git a/sciphi/interface/llm/litellm_interface.py b/sciphi/interface/llm/litellm_interface.py