From 2d3aa4f512dd8d1c2b3ec0954f460555062ae3b0 Mon Sep 17 00:00:00 2001 From: Phodal Huang Date: Wed, 29 Nov 2023 22:47:34 +0800 Subject: [PATCH] docs: add doc for enfer_core --- enfer_core/src/document.rs | 12 +++++++++++- enfer_core/src/embedding/semantic.rs | 10 ++++++++++ enfer_core/src/similarity/cosine_similarity.rs | 3 +++ enfer_core/src/store/embedding_store.rs | 14 +++++++------- enfer_core/src/store/memory_store.rs | 14 ++++++++++++++ 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/enfer_core/src/document.rs b/enfer_core/src/document.rs index 7b3b8b7..1788645 100644 --- a/enfer_core/src/document.rs +++ b/enfer_core/src/document.rs @@ -23,8 +23,8 @@ impl Metadata { } } - impl Document { + /// Creates a new document from a string value. pub fn from(string_value: String) -> Self { Self { id: "".to_string(), @@ -34,6 +34,16 @@ impl Document { } } + /// Creates a new document from a string value and metadata. + /// Example: + /// ```rust + /// use inference_core::{Document, Metadata}; + /// + /// let mut metadata = Metadata::new(); + /// metadata.metadata.insert("title".to_string(), "Hello world!".to_string()); + /// let document = Document::from_with_metadata("Hello world!".to_string(), metadata); + /// assert_eq!(document.metadata.metadata.get("title").unwrap(), "Hello world!"); + /// ``` pub fn from_with_metadata(string_value: String, metadata: Metadata) -> Self { Self { id: "".to_string(), diff --git a/enfer_core/src/embedding/semantic.rs b/enfer_core/src/embedding/semantic.rs index 7a88e34..a65cd38 100644 --- a/enfer_core/src/embedding/semantic.rs +++ b/enfer_core/src/embedding/semantic.rs @@ -63,6 +63,16 @@ impl Semantic { Ok(semantic) } + /// Embeds a sequence of text into a vector of xxx floats. The xxx floats are the embedding of the sequence. + /// + /// Example: + /// ```rust + /// use inference_core::Semantic; + /// + /// let semantic = Semantic::new(); + /// let embedding = semantic.embed("Hello world!"); + /// assert_eq!(embedding.len(), 128); // if the model dimension is 128 + /// ``` pub fn embed(&self, sequence: &str) -> Result { let encoding = self.tokenizer.encode(sequence, true) .map_err(|_| SemanticError::TokenizeEncodeError)?; diff --git a/enfer_core/src/similarity/cosine_similarity.rs b/enfer_core/src/similarity/cosine_similarity.rs index c33dea1..889131b 100644 --- a/enfer_core/src/similarity/cosine_similarity.rs +++ b/enfer_core/src/similarity/cosine_similarity.rs @@ -3,7 +3,10 @@ use crate::similarity::Similarity; pub struct CosineSimilarity; +/// Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them. +/// The cosine of 0° is 1, and it is less than 1 for any other angle. impl Similarity for CosineSimilarity { + /// Calculates the cosine similarity between two vectors. fn similarity_score(&self, vector_a: &Embedding, vector_b: &Embedding) -> f32 { if vector_a.len() != vector_b.len() { panic!( diff --git a/enfer_core/src/store/embedding_store.rs b/enfer_core/src/store/embedding_store.rs index 5d1e0ce..c405ca8 100644 --- a/enfer_core/src/store/embedding_store.rs +++ b/enfer_core/src/store/embedding_store.rs @@ -2,23 +2,23 @@ use crate::embedding::Embedding; use crate::similarity::EmbeddingMatch; pub trait EmbeddingStore { - // Adds an embedding to the store and returns its unique identifier. + /// Adds an embedding to the store and returns its unique identifier. fn add(&mut self, embedding: Embedding) -> String; - // Adds an embedding to the store with a specified identifier. + /// Adds an embedding to the store with a specified identifier. fn add_with_id(&mut self, id: String, embedding: Embedding); - // Adds an embedding to the store and associates it with the provided embedded data. + /// Adds an embedding to the store and associates it with the provided embedded data. fn add_with_embedded(&mut self, embedding: Embedding, embedded: Embedded) -> String; - // Adds a list of embeddings to the store and returns a list of unique identifiers. + /// Adds a list of embeddings to the store and returns a list of unique identifiers. fn add_all(&mut self, embeddings: Vec) -> Vec; - // Adds a list of embeddings to the store and associates them with a list of embedded data. + /// Adds a list of embeddings to the store and associates them with a list of embedded data. fn add_all_with_embedded(&mut self, embeddings: Vec, embedded: Vec) -> Vec; - // Find relevant embeddings in the store based on a reference embedding, with a maximum number of results. - // An optional minimum score can be specified to filter results. + /// Find relevant embeddings in the store based on a reference embedding, with a maximum number of results. + /// An optional minimum score can be specified to filter results. fn find_relevant( &self, reference_embedding: Embedding, diff --git a/enfer_core/src/store/memory_store.rs b/enfer_core/src/store/memory_store.rs index 3ac206d..477e711 100644 --- a/enfer_core/src/store/memory_store.rs +++ b/enfer_core/src/store/memory_store.rs @@ -23,6 +23,20 @@ pub struct InMemoryEmbeddingStore { entries: Arc>>, } +/// An in-memory implementation of the EmbeddingStore trait. +/// +/// Example: +/// ```rust +/// use inference_core::{Document, InMemoryEmbeddingStore, Semantic}; +/// +/// let semantic = Semantic::new(); +/// let store = InMemoryEmbeddingStore::new(); +/// +/// let embedding = semantic.embed("Hello world!"); +/// let id = store.add("".to_string(), embedding, Document::from("Hello world!".to_string())); +/// let matches = store.find_relevant(embedding, 10, 0.0); +/// assert_eq!(matches.len(), 1); +/// ``` impl InMemoryEmbeddingStore { pub fn new() -> Self { InMemoryEmbeddingStore { entries: Arc::new(Mutex::new(vec![])) }