From 656fca621b658a1d58b291cbe0bee77d52df69da Mon Sep 17 00:00:00 2001 From: juanjoman Date: Wed, 30 Aug 2023 11:48:36 +0100 Subject: [PATCH 1/2] add embeddings space component --- .../docs/03_components/02_embeddings_space.md | 9 +-- .../openai/OpenAIEmbeddingsModelTest.java | 2 +- modules/embeddingsspace/build.gradle | 24 +++++++ .../EmbeddingsSpaceComponent.java | 69 +++++++++++++++++++ .../EmbeddingsSpaceComponentTest.java | 0 settings.gradle | 2 +- 6 files changed, 96 insertions(+), 10 deletions(-) create mode 100644 modules/embeddingsspace/build.gradle create mode 100644 modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java create mode 100644 modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java diff --git a/docs-site/docs/03_components/02_embeddings_space.md b/docs-site/docs/03_components/02_embeddings_space.md index cc267f4..bc01f10 100644 --- a/docs-site/docs/03_components/02_embeddings_space.md +++ b/docs-site/docs/03_components/02_embeddings_space.md @@ -66,18 +66,11 @@ Generates and persists an embedding for a given text. - **Parameters**: - `text`: Text to be embedded. - - `additionalMetadata`: (Optional) Additional metadata. - **Returns**: The generated embedding. ```java -Map additionalMetadata = new HashMap<>(); -additionalMetadata.put("key", "value"); - String sampleText = "Hello, eLLMental!"; -Embedding embedding = embeddingsSpace.save(sampleText, additionalMetadata); - -// Or just -Embedding embedding = embeddingSpace.save(sampleText); +Embedding embedding = embeddingsSpace.save(sampleText); ``` ## `mostSimilarEmbeddings` diff --git a/modules/embeddingsgeneration/src/test/java/com/theagilemonkeys/ellmental/embeddingsgeneration/openai/OpenAIEmbeddingsModelTest.java b/modules/embeddingsgeneration/src/test/java/com/theagilemonkeys/ellmental/embeddingsgeneration/openai/OpenAIEmbeddingsModelTest.java index eb0be69..fb6a05f 100644 --- a/modules/embeddingsgeneration/src/test/java/com/theagilemonkeys/ellmental/embeddingsgeneration/openai/OpenAIEmbeddingsModelTest.java +++ b/modules/embeddingsgeneration/src/test/java/com/theagilemonkeys/ellmental/embeddingsgeneration/openai/OpenAIEmbeddingsModelTest.java @@ -9,7 +9,7 @@ import java.util.List; public class OpenAIEmbeddingsModelTest { - @Test + @Test public void testGenerateEmbedding(){ OpenAIEmbeddingsModel openAI = new OpenAIEmbeddingsModel(); Embedding embedding = openAI.generateEmbedding("The Agile Monkeys rule!"); diff --git a/modules/embeddingsspace/build.gradle b/modules/embeddingsspace/build.gradle new file mode 100644 index 0000000..55f66d3 --- /dev/null +++ b/modules/embeddingsspace/build.gradle @@ -0,0 +1,24 @@ +plugins { + id 'java' +} + +group = "com.theagilemonkeys.ellmental" +version = '1.0-SNAPSHOT' + + +repositories { + mavenCentral() +} + +dependencies { + implementation project(':core') + implementation project(':modules:embeddingsgeneration') + implementation project(':modules:embeddingsstore') + + testImplementation platform('org.junit:junit-bom:5.9.1') + testImplementation 'org.junit.jupiter:junit-jupiter' +} + +test { + useJUnitPlatform() +} diff --git a/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java b/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java new file mode 100644 index 0000000..d39cd50 --- /dev/null +++ b/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java @@ -0,0 +1,69 @@ +package com.theagilemonkeys.ellmental.embeddingsspace; + +import com.theagilemonkeys.ellmental.core.schema.Embedding; +import com.theagilemonkeys.ellmental.embeddingsgeneration.EmbeddingsGenerationModel; +import com.theagilemonkeys.ellmental.embeddingsstore.EmbeddingsStore; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; + +public class EmbeddingsSpaceComponent { + + private final EmbeddingsGenerationModel model; + private final EmbeddingsStore store; + + public EmbeddingsSpaceComponent(EmbeddingsGenerationModel model, EmbeddingsStore store) { + this.model = model; + this.store = store; + } + + public Embedding generate(String text) { + Embedding embedding = null; + try { + embedding = model.generateEmbedding(text); + } catch (RuntimeException e) { + // TODO: Create and catch EmbeddingGenerationException? + System.out.printf("Error while generating embedding tor text %s", text); + } + return embedding; + } + + public Embedding generate(String text, Map additionalMetadata) { + // TODO: What about metadata? + return model.generateEmbedding(text); + } + + public Embedding save(String text) { + Embedding embedding = generate(text); + + try { + store.store(embedding); + return embedding; + } catch (RuntimeException e) { + // TODO: Create and catch EmbeddingStoreException? + System.out.printf("Error while storing embedding %s into the embedding space%n", embedding.toString()); + return null; + } + } + + public List mostSimilarEmbeddings(Embedding referenceEmbedding, int limit) { + List embeddings = new ArrayList<>(); + + try { + embeddings = store.similaritySearch(referenceEmbedding, limit); + } catch (RuntimeException e) { + // TODO: Create and catch SimilaritySearchException? + System.out.printf("Error while looking for most similar embeddings for referenceText %s", referenceText); + } + + return embeddings; + } + + public List mostSimilarEmbeddings(String referenceText, int limit) { + Embedding embedding = generate(referenceText); + + return mostSimilarEmbeddings(embedding, limit); + } + +} \ No newline at end of file diff --git a/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java b/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java new file mode 100644 index 0000000..e69de29 diff --git a/settings.gradle b/settings.gradle index fe94a3f..0b8a450 100644 --- a/settings.gradle +++ b/settings.gradle @@ -4,6 +4,6 @@ include( "core", "modules:embeddingsgeneration", "modules:embeddingsstore", + "modules:embeddingsspace", "examples:simplejava" ) - From e3fd340c4dd34e58f8520919d8eaeafa674132cc Mon Sep 17 00:00:00 2001 From: juanjoman Date: Wed, 30 Aug 2023 12:29:23 +0100 Subject: [PATCH 2/2] make embedding generation private, remove it from the docs and remove error handling --- .../docs/03_components/02_embeddings_space.md | 17 ------- .../EmbeddingsSpaceComponent.java | 50 ++++--------------- .../EmbeddingsSpaceComponentTest.java | 21 ++++++++ 3 files changed, 31 insertions(+), 57 deletions(-) diff --git a/docs-site/docs/03_components/02_embeddings_space.md b/docs-site/docs/03_components/02_embeddings_space.md index bc01f10..4193bd9 100644 --- a/docs-site/docs/03_components/02_embeddings_space.md +++ b/docs-site/docs/03_components/02_embeddings_space.md @@ -43,23 +43,6 @@ EmbeddingsStore pineconeStore = new PineconeEmbeddingsStore("YOUR_PINECONE_URL", EmbeddingsSpaceComponent embeddingsSpace = new EmbeddingsSpaceComponent(openAIModel, pineconeStore); ``` -## `generate` - -Generates an embedding from a text without persisting it. - -- **Parameters**: - - `text`: The textual input for embedding. - - `additionalMetadata`: Supplementary metadata associated with the text. -- **Returns**: The generated embedding. - -```java -String sampleText = "Hello, eLLMental!"; -Map additionalMetadata = new HashMap<>(); -additionalMetadata.put("key", "value"); - -Embedding embedding = embeddingsSpace.generate(sampleText, additionalMetadata); -``` - ## `save` Generates and persists an embedding for a given text. diff --git a/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java b/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java index d39cd50..a18ec40 100644 --- a/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java +++ b/modules/embeddingsspace/src/main/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponent.java @@ -4,60 +4,30 @@ import com.theagilemonkeys.ellmental.embeddingsgeneration.EmbeddingsGenerationModel; import com.theagilemonkeys.ellmental.embeddingsstore.EmbeddingsStore; -import java.util.ArrayList; import java.util.List; -import java.util.Map; public class EmbeddingsSpaceComponent { - private final EmbeddingsGenerationModel model; - private final EmbeddingsStore store; + private final EmbeddingsGenerationModel embeddingsGenerationModel; + private final EmbeddingsStore embeddingsStore; - public EmbeddingsSpaceComponent(EmbeddingsGenerationModel model, EmbeddingsStore store) { - this.model = model; - this.store = store; + public EmbeddingsSpaceComponent(EmbeddingsGenerationModel embeddingsGenerationModel, EmbeddingsStore embeddingsStore) { + this.embeddingsGenerationModel = embeddingsGenerationModel; + this.embeddingsStore = embeddingsStore; } - public Embedding generate(String text) { - Embedding embedding = null; - try { - embedding = model.generateEmbedding(text); - } catch (RuntimeException e) { - // TODO: Create and catch EmbeddingGenerationException? - System.out.printf("Error while generating embedding tor text %s", text); - } - return embedding; - } - - public Embedding generate(String text, Map additionalMetadata) { - // TODO: What about metadata? - return model.generateEmbedding(text); + private Embedding generate(String text) { + return embeddingsGenerationModel.generateEmbedding(text); } public Embedding save(String text) { Embedding embedding = generate(text); - - try { - store.store(embedding); - return embedding; - } catch (RuntimeException e) { - // TODO: Create and catch EmbeddingStoreException? - System.out.printf("Error while storing embedding %s into the embedding space%n", embedding.toString()); - return null; - } + embeddingsStore.store(embedding); + return embedding; } public List mostSimilarEmbeddings(Embedding referenceEmbedding, int limit) { - List embeddings = new ArrayList<>(); - - try { - embeddings = store.similaritySearch(referenceEmbedding, limit); - } catch (RuntimeException e) { - // TODO: Create and catch SimilaritySearchException? - System.out.printf("Error while looking for most similar embeddings for referenceText %s", referenceText); - } - - return embeddings; + return embeddingsStore.similaritySearch(referenceEmbedding, limit); } public List mostSimilarEmbeddings(String referenceText, int limit) { diff --git a/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java b/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java index e69de29..6d91d4a 100644 --- a/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java +++ b/modules/embeddingsspace/src/test/java/com/theagilemonkeys/ellmental/embeddingsspace/EmbeddingsSpaceComponentTest.java @@ -0,0 +1,21 @@ +package com.theagilemonkeys.ellmental.embeddingsspace; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertTrue; + +public class EmbeddingsSpaceComponentTest { + + // TODO: Use mockito to Mock these + // @Spy + // private final EmbeddingsGenerationModel model; + // @Spy + // private final EmbeddingsStore store; + // @Inject + // private final EmbeddingsSpaceComponent embeddingsSpaceComponent; + + @Test + public void todo() { + assertTrue(true); + } +} \ No newline at end of file