Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
matchcase authored Feb 13, 2025
2 parents bb192b1 + 3da6f22 commit f6641de
Show file tree
Hide file tree
Showing 10 changed files with 196 additions and 80 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "langcheck"
version = "0.9.0"
version = "0.9.0.dev1"
description = "Simple, Pythonic building blocks to evaluate LLM-based applications"
readme = "README.md"
authors = [{ name = "Citadel AI", email = "[email protected]" }]
Expand Down
2 changes: 1 addition & 1 deletion src/langcheck/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from langcheck import augment, metrics, plot, utils

__all__ = ["augment", "metrics", "plot", "utils"]
__version__ = "0.9.0"
__version__ = "0.9.0.dev1"
25 changes: 23 additions & 2 deletions src/langcheck/metrics/eval_clients/_anthropic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import asyncio
import warnings
from collections.abc import Iterable
from typing import Any

Expand All @@ -21,6 +22,7 @@ def __init__(
anthropic_args: dict[str, Any] | None = None,
*,
use_async: bool = False,
system_prompt: str | None = None,
):
"""
Initialize the Anthropic evaluation client. The authentication
Expand All @@ -32,6 +34,8 @@ def __init__(
anthropic_args: (Optional) dict of additional args to pass in to
the ``client.messages.create`` function
use_async: (Optional) If True, the async client will be used.
system_prompt: (Optional) The system prompt to use. If not provided,
no system prompt will be used.
"""
if anthropic_client:
self._client = anthropic_client
Expand All @@ -42,12 +46,20 @@ def __init__(

self._anthropic_args = anthropic_args or {}
self._use_async = use_async
self._system_prompt = system_prompt

if system_prompt and "system" in self._anthropic_args:
warnings.warn(
'"system" of anthropic_args will be ignored because '
"system_prompt is provided."
)

def _call_api(
self,
prompts: Iterable[str | None],
config: dict[str, Any],
*,
system_prompt: str | None = None,
tqdm_description: str | None = None,
) -> list[Any]:
# A helper function to call the API with exception filter for alignment
Expand All @@ -60,8 +72,14 @@ def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any:
except Exception as e:
return e

if system_prompt:
config["system"] = system_prompt

model_inputs = [
{"messages": [{"role": "user", "content": prompt}], **config}
{
"messages": [{"role": "user", "content": prompt}],
**config,
}
for prompt in prompts
]

Expand Down Expand Up @@ -121,7 +139,10 @@ def get_text_responses(
config.update(self._anthropic_args or {})
tqdm_description = tqdm_description or "Intermediate assessments (1/2)"
responses = self._call_api(
prompts=prompts, config=config, tqdm_description=tqdm_description
prompts=prompts,
config=config,
tqdm_description=tqdm_description,
system_prompt=self._system_prompt,
)
response_texts = [
response.content[0].text if response else None
Expand Down
28 changes: 24 additions & 4 deletions src/langcheck/metrics/eval_clients/_gemini.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import os
import warnings
from collections.abc import Iterable
from typing import Any

Expand All @@ -26,6 +27,8 @@ def __init__(
model_args: dict[str, Any] | None = None,
generate_content_args: dict[str, Any] | None = None,
embed_model_name: str | None = None,
*,
system_prompt: str | None = None,
):
"""
Initialize the Gemini evaluation client. The authentication
Expand All @@ -47,19 +50,32 @@ def __init__(
``generate_content`` function.
embed_model_name: (Optional) The name of the embedding model to use.
If not provided, the models/embedding-001 model will be used.
system_prompt: (Optional) The system prompt to use. If not provided,
no system prompt will be used.
"""
if model:
self._model = model
self._text_response_model = model
self._structured_assessment_model = model
else:
configure(api_key=os.getenv("GOOGLE_API_KEY"))
model_args = model_args or {}
self._model = GenerativeModel(**model_args)
self._structured_assessment_model = GenerativeModel(**model_args)
# Only add system prompt to the text response model if it is provided
if system_prompt:
if "system_instruction" in model_args:
warnings.warn(
'"system_instruction" of model_args will be ignored because '
"system_prompt is provided."
)
model_args["system_instruction"] = system_prompt
self._text_response_model = GenerativeModel(**model_args)

self._generate_content_args = generate_content_args or {}
self._embed_model_name = embed_model_name

def _call_api(
self,
model: GenerativeModel,
prompts: Iterable[str | None],
config: dict[str, Any],
*,
Expand All @@ -69,7 +85,7 @@ def _call_api(
# of exception handling with the async version.
def _call_api_with_exception_filter(prompt: str) -> Any:
try:
return self._model.generate_content(prompt, **config)
return model.generate_content(prompt, **config)
except Exception as e:
return e

Expand Down Expand Up @@ -109,7 +125,10 @@ def get_text_responses(
config.update(self._generate_content_args or {})
tqdm_description = tqdm_description or "Intermediate assessments (1/2)"
responses = self._call_api(
prompts=prompts, config=config, tqdm_description=tqdm_description
model=self._text_response_model,
prompts=prompts,
config=config,
tqdm_description=tqdm_description,
)
response_texts = [
response.text if response else None for response in responses
Expand Down Expand Up @@ -189,6 +208,7 @@ def get_float_score(

tqdm_description = tqdm_description or "Scores (2/2)"
responses = self._call_api(
model=self._structured_assessment_model,
prompts=fn_call_messages,
config=config_structured_assessments,
tqdm_description=tqdm_description,
Expand Down
16 changes: 13 additions & 3 deletions src/langcheck/metrics/eval_clients/_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def __init__(
torch_dtype: str = "bfloat16",
tensor_parallel_size: int = 1,
device: str = "cuda",
*,
system_prompt: str | None = None,
):
"""
Initialize the Llama evaluation client.
Expand All @@ -42,6 +44,8 @@ def __init__(
tensor_parallel_size: The number of GPUs to use for distributed
execution with tensor parallelism.
device: The device to load the model on.
system_prompt: The system prompt to use. If not provided, default
system prompts based on the language will be used.
"""
self._model = LLM(
model=model_name,
Expand All @@ -58,7 +62,8 @@ def __init__(
stop="<|eot_id|>",
skip_special_tokens=True,
)
self._system_prompts = {
self._system_prompt = system_prompt
self._default_system_prompts = {
"en": "You are a helpful and competent assistant.",
"ja": "あなたは誠実で優秀な日本人のアシスタントです。以下は、タスクを説明する指示です。要求を適切に満たす応答を日本語で書きなさい。",
}
Expand All @@ -80,11 +85,16 @@ def get_text_responses(
if language not in ["en", "ja"]:
raise ValueError(f"Unsupported language: {language}")

if self._system_prompt is None:
system_prompt = self._default_system_prompts[language]
else:
system_prompt = self._system_prompt

messages = [
[
{
"role": "system",
"content": self._system_prompts[language],
"content": system_prompt,
},
{
"role": "user",
Expand Down Expand Up @@ -157,7 +167,7 @@ def get_float_score(
[
{
"role": "system",
"content": self._system_prompts[language],
"content": self._default_system_prompts[language],
},
{
"role": "user",
Expand Down
60 changes: 39 additions & 21 deletions src/langcheck/metrics/eval_clients/_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(
openai_args: dict[str, str] | None = None,
*,
use_async: bool = False,
system_prompt: str | None = None,
):
"""
Initialize the OpenAI evaluation client.
Expand All @@ -35,6 +36,8 @@ def __init__(
openai_args: (Optional) dict of additional args to pass in to the
``client.chat.completions.create`` function.
use_async: (Optional) If True, the async client will be used.
system_prompt: (Optional) The system prompt to use. If not provided,
no system prompt will be used.
"""
if openai_client:
self._client = openai_client
Expand All @@ -45,13 +48,15 @@ def __init__(

self._openai_args = openai_args
self._use_async = use_async
self._system_prompt = system_prompt

def _call_api(
self,
prompts: Iterable[str | None],
config: dict[str, str],
*,
tqdm_description: str | None = None,
system_prompt: str | None = None,
) -> list[Any]:
# A helper function to call the API with exception filter for alignment
# of exception handling with the async version.
Expand All @@ -63,10 +68,15 @@ def _call_api_with_exception_filter(model_input: dict[str, Any]) -> Any:
except Exception as e:
return e

system_message = []
if system_prompt:
system_message.append({"role": "system", "content": system_prompt})

# Call API with different seed values for each prompt.
model_inputs = [
{
"messages": [{"role": "user", "content": prompt}],
"messages": system_message
+ [{"role": "user", "content": prompt}],
"seed": i,
**config,
}
Expand Down Expand Up @@ -131,6 +141,7 @@ def get_text_responses(
prompts=prompts,
config=config,
tqdm_description=tqdm_description,
system_prompt=self._system_prompt,
)
response_texts = [
response.choices[0].message.content if response else None
Expand Down Expand Up @@ -169,7 +180,10 @@ def get_text_responses_with_log_likelihood(
config.update(self._openai_args or {})
tqdm_description = tqdm_description or "Getting log likelihoods"
responses = self._call_api(
prompts=prompts, config=config, tqdm_description=tqdm_description
prompts=prompts,
config=config,
tqdm_description=tqdm_description,
system_prompt=self._system_prompt,
)
response_texts_with_log_likelihood = []
for response in responses:
Expand Down Expand Up @@ -328,6 +342,7 @@ def __init__(
openai_args: dict[str, str] | None = None,
*,
use_async: bool = False,
system_prompt: str | None = None,
):
"""
Intialize the Azure OpenAI evaluation client.
Expand All @@ -345,6 +360,8 @@ def __init__(
openai_args: (Optional) dict of additional args to pass in to the
``client.chat.completions.create`` function
use_async: (Optional) If True, the async client will be used.
system_prompt: (Optional) The system prompt to use. If not provided,
no system prompt will be used.
"""
assert (
text_model_name is not None or embedding_model_name is not None
Expand All @@ -368,6 +385,7 @@ def __init__(
self._text_model_name = text_model_name
self._embedding_model_name = embedding_model_name
self._openai_args = openai_args or {}
self._system_prompt = system_prompt

if self._text_model_name is not None:
self._openai_args["model"] = self._text_model_name
Expand Down Expand Up @@ -411,31 +429,31 @@ def __init__(
self._use_async = use_async

async def _async_embed(self, inputs: list[str]) -> CreateEmbeddingResponse:
"""Embed the inputs using the OpenAI API in async mode."""
assert isinstance(self.openai_client, AsyncOpenAI)
if self.openai_args:
responses = await self.openai_client.embeddings.create(
input=inputs, **self.openai_args
)
else:
responses = await self.openai_client.embeddings.create(
input=inputs, model="text-embedding-3-small"
)
return responses
"""Embed the inputs using the OpenAI API in async mode."""
assert isinstance(self.openai_client, AsyncOpenAI)
if self.openai_args:
responses = await self.openai_client.embeddings.create(
input=inputs, **self.openai_args
)
else:
responses = await self.openai_client.embeddings.create(
input=inputs, model="text-embedding-3-small"
)
return responses

def _embed(self, inputs: list[str]) -> torch.Tensor:
"""Embed the inputs using the OpenAI API."""

# TODO: Fix that this async call could be much slower than the sync
# version. https://github.com/citadel-ai/langcheck/issues/160
if self._use_async:
try:
loop = asyncio.get_event_loop()
except RuntimeError: # pragma: py-lt-310
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
embed_response = loop.run_until_complete(self._async_embed(inputs))
embeddings = [item.embedding for item in embed_response.data]
try:
loop = asyncio.get_event_loop()
except RuntimeError: # pragma: py-lt-310
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
embed_response = loop.run_until_complete(self._async_embed(inputs))
embeddings = [item.embedding for item in embed_response.data]
else:
assert isinstance(self.openai_client, OpenAI)

Expand Down
Loading

0 comments on commit f6641de

Please sign in to comment.