Skip to content

Commit

Permalink
style:fmt
Browse files Browse the repository at this point in the history
  • Loading branch information
Aries-ckt committed Sep 18, 2024
1 parent 6062675 commit 6251878
Showing 1 changed file with 10 additions and 4 deletions.
14 changes: 10 additions & 4 deletions dbgpt/rag/embedding/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -888,12 +888,17 @@ def __init__(self, **kwargs):
super().__init__(**kwargs)
self._api_key = kwargs.get("api_key")


def embed_documents(self, texts: List[str], max_batch_chunks_size=25) -> List[List[float]]:
def embed_documents(
self, texts: List[str], max_batch_chunks_size=25
) -> List[List[float]]:
"""Get the embeddings for a list of texts.
refer:https://help.aliyun.com/zh/model-studio/getting-started/models?
spm=a2c4g.11186623.0.0.62524a77NlILDI#c05fe72732770
Args:
texts (Documents): A list of texts to get embeddings for.
max_batch_chunks_size: The max batch size for embedding.
Returns:
Embedded texts as List[List[float]], where each inner List[float]
Expand All @@ -902,13 +907,14 @@ def embed_documents(self, texts: List[str], max_batch_chunks_size=25) -> List[Li
from dashscope import TextEmbedding

embeddings = []
# batch size too longer may cause embedding error,eg: qwen online embedding models must not be larger than 25
# batch size too longer may cause embedding error,eg: qwen online embedding
# models must not be larger than 25
# text-embedding-v3 embedding batch size should not be larger than 6
if str(self.model_name) == "text-embedding-v3":
max_batch_chunks_size = 6

for i in range(0, len(texts), max_batch_chunks_size):
batch_texts = texts[i:i + max_batch_chunks_size]
batch_texts = texts[i : i + max_batch_chunks_size]
resp = TextEmbedding.call(
model=self.model_name, input=batch_texts, api_key=self._api_key
)
Expand Down

0 comments on commit 6251878

Please sign in to comment.