Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate llama embeddings #116

Merged
merged 17 commits into from
Feb 4, 2025
Merged
9 changes: 5 additions & 4 deletions .env.dev
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,7 @@

ENVIRONMENT=development

PERSISTENT_Q_TABLE_PATH=persistent_q_table.json

PLANNING_ALPHA=0.1
PLANNING_GAMMA=0.95
PLANNING_EPSILON=0.1

# === Memory module settings ===

Expand Down Expand Up @@ -34,6 +30,11 @@ DEEPSEEK_API_KEY=
DEEPSEEK_MODEL=deepseek-chat
DEEPSEEK_API_BASE_URL=https://api.deepseek.com


EMBEDDING_PROVIDER=
LLAMA_MODEL_PATH=


# === Third-party services settings ===

# Perplexity
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/bug_report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: 🐞 Bug Report
description: Create a report to help us improve
title: "[BUG] "
labels: ["bug", "triage"]
type: bug
assignees: []
body:
- type: checkboxes
Expand Down
1 change: 1 addition & 0 deletions .github/ISSUE_TEMPLATE/feature_request.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ name: 💡 Feature Request
description: Suggest an idea for this project
title: "[FEATURE] "
labels: ["feature", "enhancement"]
type: feature
assignees: []
body:
- type: checkboxes
Expand Down
2 changes: 1 addition & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"version": "0.2.0",
"configurations": [
{
"name": "Python: AA Core",
"name": "Python: Nevron",
"type": "python",
"request": "launch",
"module": "src.main",
Expand Down
6 changes: 5 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ ENV PYTHONFAULTHANDLER=1

RUN apt-get update && apt-get install -y \
curl \
git \
gcc \
g++ \
build-essential \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
Expand All @@ -41,6 +44,7 @@ RUN mkdir logs

COPY pyproject.toml poetry.lock ./

# Install other dependencies with Poetry
RUN poetry install --no-interaction --no-ansi --no-root

# --------------
Expand All @@ -53,4 +57,4 @@ COPY entrypoint.sh /nevron/entrypoint.sh
RUN chmod +x /nevron/entrypoint.sh

# Command to run the application
ENTRYPOINT ["/nevron/entrypoint.sh"]
ENTRYPOINT ["/nevron/entrypoint.sh"]
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Nevron - Autonomous Agent
[![CI](https://github.com/axioma-ai-labs/aa-core/actions/workflows/main.yml/badge.svg)](https://github.com/axioma-ai-labs/aa-core/actions/workflows/main.yml)
[![CI](https://github.com/axioma-ai-labs/nevron/actions/workflows/main.yml/badge.svg)](https://github.com/axioma-ai-labs/nevron/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/axioma-ai-labs/nevron/branch/main/graph/badge.svg?token=krO46pgB7P)](https://codecov.io/gh/axioma-ai-labs/nevron)
[![Build Docker image](https://github.com/axioma-ai-labs/nevron/actions/workflows/docker.yml/badge.svg)](https://github.com/axioma-ai-labs/nevron/actions/workflows/docker.yml)
[![Docs](https://img.shields.io/badge/Nevron-Docs-blue)](https://axioma-ai-labs.github.io/nevron/)
Expand Down
2 changes: 1 addition & 1 deletion docs/index.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# **Developer Documentation**

[![CI](https://github.com/axioma-ai-labs/aa-core/actions/workflows/main.yml/badge.svg)](https://github.com/axioma-ai-labs/aa-core/actions/workflows/main.yml)
[![CI](https://github.com/axioma-ai-labs/nevron/actions/workflows/main.yml/badge.svg)](https://github.com/axioma-ai-labs/nevron/actions/workflows/main.yml)
[![codecov](https://codecov.io/gh/axioma-ai-labs/nevron/branch/main/graph/badge.svg?token=krO46pgB7P)](https://codecov.io/gh/axioma-ai-labs/nevron)
[![Build Docker image](https://github.com/axioma-ai-labs/nevron/actions/workflows/docker.yml/badge.svg)](https://github.com/axioma-ai-labs/nevron/actions/workflows/docker.yml)
[![Docs](https://img.shields.io/badge/Nevron-Docs-blue)](https://axioma-ai-labs.github.io/nevron/)
Expand Down
432 changes: 249 additions & 183 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ authors = []
description = "Nevron AI Agent"
readme = "README.md"
keywords = ["agent", "ai"]
requires-python = ">=3.12"
requires-python = ">=3.8"
classifiers = [
"Programming Language :: Python :: 3",
"License :: GNU General Public License v3.0",
Expand Down Expand Up @@ -75,6 +75,12 @@ pytest-asyncio = "^0.25.2"
pytest-cov = "^6.0.0"
types-requests = "^2.32.0.20241016"
freezegun = "^1.5.1"
llama-cpp-python = "^0.3.7"

[[tool.poetry.source]]
name = "llama-cpp-python-cpu"
url = "https://abetlen.github.io/llama-cpp-python/whl/cpu/"
priority = "supplemental"

[tool.mypy]
python_version = "3.13"
Expand Down
85 changes: 83 additions & 2 deletions src/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
from pydantic import field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

from src.core.defs import Environment, LlamaProviderType, LLMProviderType, MemoryBackendType
from src.core.defs import (
EmbeddingProviderType,
Environment,
LlamaPoolingType,
LlamaProviderType,
LLMProviderType,
MemoryBackendType,
)


class Settings(BaseSettings):
Expand Down Expand Up @@ -45,6 +52,12 @@ class Settings(BaseSettings):
#: Memory persist directory. Used only for ChromaDB.
MEMORY_PERSIST_DIRECTORY: str = ".chromadb"

EMBEDDING_PROVIDER: EmbeddingProviderType = EmbeddingProviderType.OPENAI
LLAMA_MODEL_PATH: str = "/path/to/your/local/llama/model"
LLAMA_EMBEDDING_MODEL: str = "llama3.1-8b" # llama2-7b
# Embedding pooling type for local Llama models (NONE, MEAN, CLS, LAST, RANK), defaults to MEAN pooling
EMBEDDING_POOLING_TYPE: LlamaPoolingType = LlamaPoolingType.MEAN

# --- LLMs settings ---

LLM_PROVIDER: LLMProviderType = LLMProviderType.OPENAI
Expand Down Expand Up @@ -73,7 +86,7 @@ class Settings(BaseSettings):
QWEN_API_BASE_URL: str = "https://dashscope-intl.aliyuncs.com/compatible-mode/v1"

#: Llama
LLAMA_PROVIDER: LlamaProviderType = LlamaProviderType.OLLAMA
LLAMA_PROVIDER: LlamaProviderType = LlamaProviderType.LLAMA_API
LLAMA_MODEL_NAME: str = "llama3-8b-8192" # Model name is usually unique for each provider
LLAMA_API_KEY: str = "" # API key for your provider

Expand Down Expand Up @@ -229,5 +242,73 @@ def validate_memory_settings(self, params, required_params):
if not isinstance(params[param], param_type):
raise ValueError(f"{param} must be of type {param_type.__name__}.")

@field_validator("EMBEDDING_PROVIDER", mode="before")
def validate_embedding_provider(
cls, value: str | EmbeddingProviderType
) -> EmbeddingProviderType:
"""Convert string to EmbeddingProviderType enum."""
if isinstance(value, EmbeddingProviderType):
return value
try:
# Map string values to enum
if value.lower() == "openai":
return EmbeddingProviderType.OPENAI
elif value.lower() == "llama_local":
return EmbeddingProviderType.LLAMA_LOCAL
elif value.lower() == "llama_api":
return EmbeddingProviderType.LLAMA_API
else:
raise ValueError(f"Invalid embedding provider: {value}")
except Exception as e:
raise ValueError(f"Invalid embedding provider: {value}") from e

@field_validator("EMBEDDING_POOLING_TYPE", mode="before")
def validate_embedding_pooling_type(cls, value: str | LlamaPoolingType) -> LlamaPoolingType:
"""Convert string to LlamaPoolingType enum."""
if isinstance(value, LlamaPoolingType):
return value
try:
return LlamaPoolingType[value.upper()]
except KeyError:
raise ValueError(
f"Invalid pooling type: {value}. Must be one of {list(LlamaPoolingType)}"
)

@field_validator("LLAMA_PROVIDER", mode="before")
def validate_llama_provider(cls, value: str | LlamaProviderType) -> LlamaProviderType:
"""Convert string to LlamaProviderType enum."""
if isinstance(value, LlamaProviderType):
return value
try:
return LlamaProviderType[value.upper()]
except KeyError:
raise ValueError(
f"Invalid Llama provider: {value}. Must be one of {list(LlamaProviderType)}"
)

@field_validator("LLM_PROVIDER", mode="before")
def validate_llm_provider(cls, value: str | LLMProviderType) -> LLMProviderType:
"""Convert string to LLMProviderType enum."""
if isinstance(value, LLMProviderType):
return value
try:
return LLMProviderType[value.upper()]
except KeyError:
raise ValueError(
f"Invalid LLM provider: {value}. Must be one of {list(LLMProviderType)}"
)

@field_validator("MEMORY_BACKEND_TYPE", mode="before")
def validate_memory_backend_type(cls, value: str | MemoryBackendType) -> MemoryBackendType:
"""Convert string to MemoryBackendType enum."""
if isinstance(value, MemoryBackendType):
return value
try:
return MemoryBackendType[value.upper()]
except KeyError:
raise ValueError(
f"Invalid memory backend type: {value}. Must be one of {list(MemoryBackendType)}"
)


settings = Settings(_env_file=".env", _env_file_encoding="utf-8") # type: ignore[call-arg]
23 changes: 22 additions & 1 deletion src/core/defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from enum import Enum

import llama_cpp


class Environment(str, Enum):
"""Environment type."""
Expand Down Expand Up @@ -76,5 +78,24 @@ class LlamaProviderType(str, Enum):

OLLAMA = "ollama"
FIREWORKS = "fireworks"
LLAMA_API = "llama_api"
LLAMA_API = "llama-api"
LLAMA_LOCAL = "llama_local"
OPENROUTER = "openrouter"


class LlamaPoolingType(int, Enum):
"""local Llama model pooling type."""

NONE = llama_cpp.LLAMA_POOLING_TYPE_NONE
MEAN = llama_cpp.LLAMA_POOLING_TYPE_MEAN
CLS = llama_cpp.LLAMA_POOLING_TYPE_CLS
LAST = llama_cpp.LLAMA_POOLING_TYPE_LAST
RANK = llama_cpp.LLAMA_POOLING_TYPE_RANK


class EmbeddingProviderType(str, Enum):
"""Embedding provider type."""

OPENAI = LLMProviderType.OPENAI
LLAMA_LOCAL = LlamaProviderType.LLAMA_LOCAL
LLAMA_API = LlamaProviderType.LLAMA_API
63 changes: 43 additions & 20 deletions src/llm/embeddings.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,57 @@
from typing import List, Union
from typing import List, Optional, Union

import numpy as np
from llama_cpp import Llama
from loguru import logger
from openai import AsyncOpenAI

from src.core.config import settings
from src.llm.llm import get_oai_client
from src.core.defs import EmbeddingProviderType
from src.llm.llm import get_embedding_client, get_llama_model
from src.llm.providers.llama_embeddings import (
generate_embedding_api,
generate_llama_embedding_local,
)


class EmbeddingGenerator:
"""A class to generate embeddings using OpenAI's text embedding models."""
"""A class to generate embeddings using multiple provider options."""

def __init__(
self,
client: AsyncOpenAI = get_oai_client(),
model: str = settings.OPENAI_EMBEDDING_MODEL,
provider: EmbeddingProviderType = settings.EMBEDDING_PROVIDER,
embedding_client: Optional[AsyncOpenAI] = None,
llama_model: Optional[Union[str, Llama]] = None,
):
"""
Initialize the embedding generator.

Args:
client: AsyncOpenAI client instance
model: The OpenAI model to use for embeddings
provider: The embedding provider to use ('openai', 'llama_local', 'llama_api', ...)
openai_client: Optional pre-configured OpenAI client (default: None)
llama_model: Optional pre-configured LLama model (default: None)
"""
self.client = client
self.model = model
self.provider = provider
logger.debug(f"Using Embedding provider: {self.provider}")

if self.provider == EmbeddingProviderType.OPENAI:
self.client = embedding_client or get_embedding_client(self.provider)
self.model_name: str = settings.OPENAI_EMBEDDING_MODEL
elif self.provider == EmbeddingProviderType.LLAMA_LOCAL:
if isinstance(llama_model, Llama):
self.llama_model: Llama = llama_model
else:
model_path = (
llama_model
if (isinstance(llama_model, str) and llama_model != "")
else settings.LLAMA_MODEL_PATH
)
self.llama_model = get_llama_model(model_path)
elif self.provider == EmbeddingProviderType.LLAMA_API:
self.client = embedding_client or get_embedding_client(self.provider)
self.model_name = settings.LLAMA_EMBEDDING_MODEL
else:
raise ValueError(f"Unsupported embedding provider: {self.provider}")

async def get_embedding(self, text: Union[str, List[str]]) -> np.ndarray:
"""
Expand All @@ -43,17 +70,13 @@ async def get_embedding(self, text: Union[str, List[str]]) -> np.ndarray:
if not text:
raise ValueError("Input text cannot be empty")

# Convert single string to list for consistent handling
texts = [text] if isinstance(text, str) else text

try:
logger.debug(f"Getting embeddings for {len(texts)} texts")
response = await self.client.embeddings.create(model=self.model, input=texts)

# Extract embeddings from response
embeddings = [data.embedding for data in response.data]
return np.array(embeddings)

if self.provider in (EmbeddingProviderType.OPENAI, EmbeddingProviderType.LLAMA_API):
return await generate_embedding_api(self.client, text, self.model_name)
elif self.provider == EmbeddingProviderType.LLAMA_LOCAL:
return await generate_llama_embedding_local(self.llama_model, text)
else:
raise ValueError(f"Unsupported provider: {self.provider}")
except Exception as e:
logger.error(f"Error getting embeddings: {str(e)}")
logger.error(f"Error getting embeddings from {self.provider}: {str(e)}")
raise
Loading
Loading