Skip to content

Commit

Permalink
docs: add doc for enfer_core
Browse files Browse the repository at this point in the history
  • Loading branch information
phodal committed Nov 29, 2023
1 parent 394f70c commit 2d3aa4f
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 8 deletions.
12 changes: 11 additions & 1 deletion enfer_core/src/document.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ impl Metadata {
}
}


impl Document {
/// Creates a new document from a string value.
pub fn from(string_value: String) -> Self {
Self {
id: "".to_string(),
Expand All @@ -34,6 +34,16 @@ impl Document {
}
}

/// Creates a new document from a string value and metadata.
/// Example:
/// ```rust
/// use inference_core::{Document, Metadata};
///
/// let mut metadata = Metadata::new();
/// metadata.metadata.insert("title".to_string(), "Hello world!".to_string());
/// let document = Document::from_with_metadata("Hello world!".to_string(), metadata);
/// assert_eq!(document.metadata.metadata.get("title").unwrap(), "Hello world!");
/// ```
pub fn from_with_metadata(string_value: String, metadata: Metadata) -> Self {
Self {
id: "".to_string(),
Expand Down
10 changes: 10 additions & 0 deletions enfer_core/src/embedding/semantic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,16 @@ impl Semantic {
Ok(semantic)
}

/// Embeds a sequence of text into a vector of xxx floats. The xxx floats are the embedding of the sequence.
///
/// Example:
/// ```rust
/// use inference_core::Semantic;
///
/// let semantic = Semantic::new();
/// let embedding = semantic.embed("Hello world!");
/// assert_eq!(embedding.len(), 128); // if the model dimension is 128
/// ```
pub fn embed(&self, sequence: &str) -> Result<Embedding, SemanticError> {
let encoding = self.tokenizer.encode(sequence, true)
.map_err(|_| SemanticError::TokenizeEncodeError)?;
Expand Down
3 changes: 3 additions & 0 deletions enfer_core/src/similarity/cosine_similarity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ use crate::similarity::Similarity;

pub struct CosineSimilarity;

/// Cosine similarity is a measure of similarity between two non-zero vectors of an inner product space that measures the cosine of the angle between them.
/// The cosine of 0° is 1, and it is less than 1 for any other angle.
impl Similarity for CosineSimilarity {
/// Calculates the cosine similarity between two vectors.
fn similarity_score(&self, vector_a: &Embedding, vector_b: &Embedding) -> f32 {
if vector_a.len() != vector_b.len() {
panic!(
Expand Down
14 changes: 7 additions & 7 deletions enfer_core/src/store/embedding_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,23 @@ use crate::embedding::Embedding;
use crate::similarity::EmbeddingMatch;

pub trait EmbeddingStore<Embedded: Clone + Ord> {
// Adds an embedding to the store and returns its unique identifier.
/// Adds an embedding to the store and returns its unique identifier.
fn add(&mut self, embedding: Embedding) -> String;

// Adds an embedding to the store with a specified identifier.
/// Adds an embedding to the store with a specified identifier.
fn add_with_id(&mut self, id: String, embedding: Embedding);

// Adds an embedding to the store and associates it with the provided embedded data.
/// Adds an embedding to the store and associates it with the provided embedded data.
fn add_with_embedded(&mut self, embedding: Embedding, embedded: Embedded) -> String;

// Adds a list of embeddings to the store and returns a list of unique identifiers.
/// Adds a list of embeddings to the store and returns a list of unique identifiers.
fn add_all(&mut self, embeddings: Vec<Embedding>) -> Vec<String>;

// Adds a list of embeddings to the store and associates them with a list of embedded data.
/// Adds a list of embeddings to the store and associates them with a list of embedded data.
fn add_all_with_embedded(&mut self, embeddings: Vec<Embedding>, embedded: Vec<Embedded>) -> Vec<String>;

// Find relevant embeddings in the store based on a reference embedding, with a maximum number of results.
// An optional minimum score can be specified to filter results.
/// Find relevant embeddings in the store based on a reference embedding, with a maximum number of results.
/// An optional minimum score can be specified to filter results.
fn find_relevant(
&self,
reference_embedding: Embedding,
Expand Down
14 changes: 14 additions & 0 deletions enfer_core/src/store/memory_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ pub struct InMemoryEmbeddingStore {
entries: Arc<Mutex<Vec<Entry>>>,
}

/// An in-memory implementation of the EmbeddingStore trait.
///
/// Example:
/// ```rust
/// use inference_core::{Document, InMemoryEmbeddingStore, Semantic};
///
/// let semantic = Semantic::new();
/// let store = InMemoryEmbeddingStore::new();
///
/// let embedding = semantic.embed("Hello world!");
/// let id = store.add("".to_string(), embedding, Document::from("Hello world!".to_string()));
/// let matches = store.find_relevant(embedding, 10, 0.0);
/// assert_eq!(matches.len(), 1);
/// ```
impl InMemoryEmbeddingStore {
pub fn new() -> Self {
InMemoryEmbeddingStore { entries: Arc::new(Mutex::new(vec![])) }
Expand Down

0 comments on commit 2d3aa4f

Please sign in to comment.