diff --git a/tests/omnoms/vlite-unit.omom b/tests/omnoms/vlite-unit.omom
new file mode 100644
index 0000000..e86a98c
Binary files /dev/null and b/tests/omnoms/vlite-unit.omom differ
diff --git a/tests/unit.py b/tests/unit.py
index 9a28175..d2ac261 100644
--- a/tests/unit.py
+++ b/tests/unit.py
@@ -105,6 +105,9 @@ def tearDownClass(cls):
         if os.path.exists('vlite-unit.npz'):
             print("[+] Removing vlite")
             os.remove('vlite-unit.npz')
+        if os.path.exists('vlite-unit.omom'):
+            print("[+] Removing vlite")
+            os.remove('vlite-unit.omom')
 
 if __name__ == '__main__':
     unittest.main(verbosity=2)
\ No newline at end of file
diff --git a/vlite/main.py b/vlite/main.py
index ece5788..2a898d3 100644
--- a/vlite/main.py
+++ b/vlite/main.py
@@ -3,60 +3,35 @@
 from .model import EmbeddingModel
 from .utils import chop_and_chunk
 import datetime
+from .omom import Omom
 
 class VLite:
-    """
-    A simple vector database for text embedding and retrieval.
-
-    Attributes:
-        collection (str): Path to the collection file.
-        device (str): Device to use for embedding ('cpu' or 'cuda').
-        model (EmbeddingModel): The embedding model used for text representation.
-
-    Methods:
-        add(text, id=None, metadata=None): Adds a text to the collection with optional ID and metadata.
-        retrieve(text=None, id=None, top_k=5): Retrieves similar texts from the collection.
-        save(): Saves the collection to a file.
-    """
     def __init__(self, collection=None, device='cpu', model_name='mixedbread-ai/mxbai-embed-large-v1'):
         if collection is None:
             current_datetime = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
             collection = f"vlite_{current_datetime}"
-        self.collection = f"{collection}.npz"
+        self.collection = f"{collection}"
         self.device = device
         self.model = EmbeddingModel(model_name) if model_name else EmbeddingModel()
+        
+        self.omom = Omom()
+        self.index = {}
 
         try:
-            with np.load(self.collection, allow_pickle=True) as data:
-                index_data = data['index'].item()
+            with self.omom.read(collection) as omom_file:
                 self.index = {
                     chunk_id: {
                         'text': chunk_data['text'],
                         'metadata': chunk_data['metadata'],
-                        'vector': np.array(chunk_data['vector']),  # Convert back to numpy array
-                        'binary_vector': np.array(chunk_data['binary_vector']),  # Convert back to numpy array
-                        'int8_vector': np.array(chunk_data['int8_vector'])  # Convert back to numpy array
+                        'binary_vector': np.array(chunk_data['binary_vector'])
                     }
-                    for chunk_id, chunk_data in index_data.items()
+                    for chunk_id, chunk_data in omom_file.metadata.items()
                 }
         except FileNotFoundError:
             print(f"Collection file {self.collection} not found. Initializing empty attributes.")
-            self.index = {}
-
-    def add(self, data, metadata=None, need_chunks=True, newEmbedding=False, fast=True):
-        """
-        Adds text or a list of texts to the collection with optional ID within metadata.
-
-        Args:
-            data (str, dict, or list): Text data to be added. Can be a string, a dictionary containing text, id, and/or metadata, or a list of strings or dictionaries.
-            metadata (dict, optional): Additional metadata to be appended to each text entry.
-            need_chunks (bool, optional): Whether to split the text into chunks before embedding. Defaults to True.
-            fast (bool, optional): Whether to use fast mode for chunking. Defaults to True.
-
-        Returns:
-            list: A list of tuples, each containing the ID of the added text and the updated vectors array.
-        """
-        print("Adding text to the collection...")
+
+    def add(self, data, metadata=None, need_chunks=True, fast=True):
+        print("Adding text to the collection...", self.collection)
         data = [data] if not isinstance(data, list) else data
         results = []
         all_chunks = []
@@ -88,48 +63,33 @@ def add(self, data, metadata=None, need_chunks=True, newEmbedding=False, fast=Tr
 
         encoded_data = self.model.embed(all_chunks, device=self.device)
         binary_encoded_data = self.model.quantize(encoded_data, precision="binary")
-        int8_encoded_data = self.model.quantize(encoded_data, precision="int8")
-
-        for idx, (chunk, vector, binary_vector, int8_vector, metadata, item_id) in enumerate(zip(all_chunks, encoded_data, binary_encoded_data, int8_encoded_data, all_metadata, all_ids)):
+        
+        for idx, (chunk, binary_vector, metadata, item_id) in enumerate(zip(all_chunks, binary_encoded_data, all_metadata, all_ids)):
             chunk_id = f"{item_id}_{idx}"
             self.index[chunk_id] = {
                 'text': chunk,
                 'metadata': metadata,
-                'vector': vector,
-                'binary_vector': binary_vector.tolist(),
-                'int8_vector': int8_vector.tolist()
+                'binary_vector': binary_vector.tolist()
             }
 
             if item_id not in [result[0] for result in results]:
-                results.append((item_id, encoded_data, metadata))
+                results.append((item_id, binary_encoded_data, metadata))
 
         self.save()
         print("Text added successfully.")
         return results
 
-    def retrieve(self, text=None, top_k=5, metadata=None, newEmbedding=False):
-        """
-        Retrieves similar texts from the collection based on text content, ID, or metadata.
-
-        Args:
-            text (str, optional): Query text for finding similar texts.
-            top_k (int, optional): Number of top similar texts to retrieve. Defaults to 5.
-            metadata (dict, optional): Metadata to filter the retrieved texts.
-
-        Returns:
-            tuple: A tuple containing a list of similar texts, their similarity scores, and metadata (if applicable).
-        """
+    def retrieve(self, text=None, top_k=5, metadata=None):
         print("Retrieving similar texts...")
         if text:
             print(f"Retrieving top {top_k} similar texts for query: {text}")
             query_chunks = chop_and_chunk(text, fast=True)
             query_vectors = self.model.embed(query_chunks, device=self.device)
             query_binary_vectors = self.model.quantize(query_vectors, precision="binary")
-            query_int8_vectors = self.model.quantize(query_vectors, precision="int8")
 
             results = []
-            for query_binary_vector, query_int8_vector in zip(query_binary_vectors, query_int8_vectors):
-                chunk_results = self.rescore(query_binary_vector, query_int8_vector, top_k, metadata)
+            for query_binary_vector in query_binary_vectors:
+                chunk_results = self.search(query_binary_vector, top_k, metadata)
                 results.extend(chunk_results)
 
             results.sort(key=lambda x: x[1], reverse=True)
@@ -138,59 +98,31 @@ def retrieve(self, text=None, top_k=5, metadata=None, newEmbedding=False):
             print("Retrieval completed.")
             return [(self.index[idx]['text'], score, self.index[idx]['metadata']) for idx, score in results]
         
-    def rescore(self, query_binary_vector, query_int8_vector, top_k, metadata=None):
-        """
-        Performs retrieval using binary search and rescoring using int8 embeddings.
-
-        Args:
-            query_binary_vector (numpy.ndarray): Binary vector of the query.
-            query_int8_vector (numpy.ndarray): Int8 vector of the query.
-            top_k (int): Number of top similar texts to retrieve.
-            metadata (dict, optional): Metadata to filter the retrieved texts.
-
-        Returns:
-            list: A list of tuples containing the chunk IDs and their similarity scores.
-        """
-        # Reshape query_binary_vector and query_int8_vector to 1D arrays
+    def search(self, query_binary_vector, top_k, metadata=None):
+        # Reshape query_binary_vector to 1D array
         query_binary_vector = query_binary_vector.reshape(-1)
-        query_int8_vector = query_int8_vector.reshape(-1)
 
         # Perform binary search
         binary_vectors = np.array([item['binary_vector'] for item in self.index.values()])
         binary_similarities = np.einsum('i,ji->j', query_binary_vector, binary_vectors)
-        top_k_indices = np.argpartition(binary_similarities, -top_k*4)[-top_k*4:]
+        top_k_indices = np.argpartition(binary_similarities, -top_k)[-top_k:]
         top_k_ids = [list(self.index.keys())[idx] for idx in top_k_indices]
 
-        # Apply metadata filter on the retrieved top_k*4 items
+        # Apply metadata filter on the retrieved top_k items
         if metadata:
             filtered_ids = []
             for item_id in top_k_ids:
                 item_metadata = self.index[item_id]['metadata']
                 if all(item_metadata.get(key) == value for key, value in metadata.items()):
                     filtered_ids.append(item_id)
-            top_k_ids = filtered_ids[:top_k*4]
-
-        # Perform rescoring using int8 embeddings
-        int8_vectors = np.array([self.index[idx]['int8_vector'] for idx in top_k_ids])
-        int8_similarities = np.einsum('i,ji->j', query_int8_vector, int8_vectors)
+            top_k_ids = filtered_ids[:top_k]
 
-        # Sort the results based on the int8 similarities
-        sorted_indices = np.argpartition(int8_similarities, -top_k)[-top_k:]
-        sorted_ids = np.take(top_k_ids, sorted_indices)
-        sorted_scores = int8_similarities[sorted_indices]
+        # Get the similarity scores for the top_k items
+        top_k_scores = binary_similarities[top_k_indices]
 
-        return list(zip(sorted_ids, sorted_scores))
+        return list(zip(top_k_ids, top_k_scores))
 
     def delete(self, ids):
-        """
-        Deletes items from the collection by their IDs.
-
-        Args:
-            ids (list or str): A single ID or a list of IDs of the items to delete.
-
-        Returns:
-            int: The number of items deleted from the collection.
-        """
         if isinstance(ids, str):
             ids = [ids]
 
@@ -209,18 +141,6 @@ def delete(self, ids):
         return deleted_count
     
     def update(self, id, text=None, metadata=None, vector=None):
-        """
-        Updates an item in the collection by its ID.
-
-        Args:
-            id (str): The ID of the item to update.
-            text (str, optional): The updated text content of the item.
-            metadata (dict, optional): The updated metadata of the item.
-            vector (numpy.ndarray, optional): The updated embedding vector of the item.
-
-        Returns:
-            bool: True if the item was successfully updated, False otherwise.
-        """
         if id in self.index:
             if text is not None:
                 self.index[id]['text'] = text
@@ -239,40 +159,18 @@ def update(self, id, text=None, metadata=None, vector=None):
             return False
     
     def get(self, ids=None, where=None):
-        """
-        Retrieves items from the collection based on IDs and/or metadata.
-
-        Args:
-            ids (list, optional): List of IDs to retrieve. If provided, only items with the specified IDs will be returned.
-            where (dict, optional): Metadata filter to apply. Items matching the filter will be returned.
-
-        Returns:
-            list: A list of retrieved items, each item being a tuple of (text, metadata).
-        """
         if ids is not None:
-            # Convert ids to a set for faster membership testing
             id_set = set(ids)
             items = [(self.index[id]['text'], self.index[id]['metadata']) for id in self.index if id in id_set]
         else:
             items = [(self.index[id]['text'], self.index[id]['metadata']) for id in self.index]
 
         if where is not None:
-            # Filter items based on metadata
             items = [item for item in items if all(item[1].get(key) == value for key, value in where.items())]
 
         return items
 
-
     def set(self, id, text=None, metadata=None, vector=None):
-        """
-        Updates the attributes of an item in the collection by ID.
-
-        Args:
-            id (str): ID of the item to update.
-            text (str, optional): Updated text content of the item.
-            metadata (dict, optional): Updated metadata of the item.
-            vector (numpy.ndarray, optional): Updated embedding vector of the item.
-        """
         print(f"Setting attributes for item with ID: {id}")
         if id in self.index:
             if text is not None:
@@ -286,48 +184,31 @@ def set(self, id, text=None, metadata=None, vector=None):
             print(f"Item with ID {id} not found.")
 
     def count(self):
-        """
-        Returns the number of items in the collection.
-
-        Returns:
-            int: The count of items in the collection.
-        """
         return len(self.index)
+    
 
     def save(self):
-        """
-        Saves the current state of the collection to a file.
-        """
         print(f"Saving collection to {self.collection}")
-        index_data = {
-            chunk_id: {
-                'text': chunk_data['text'],
-                'metadata': chunk_data['metadata'],
-                'vector': chunk_data['vector'],
-                'binary_vector': chunk_data['binary_vector'],
-                'int8_vector': chunk_data['int8_vector']
-            }
-            for chunk_id, chunk_data in self.index.items()
-        }
-        with open(self.collection, 'wb') as f:
-            np.savez(f, index=index_data)
+        with self.omom.create(self.collection) as omom_file:
+            omom_file.set_header(
+                embedding_model=self.model.model_metadata['general.name'],
+                embedding_size=self.model.model_metadata.get('bert.embedding_length', 1024),
+                embedding_dtype=self.model.embedding_dtype,
+                context_length=self.model.model_metadata.get('bert.context_length', 512)
+            )
+            for chunk_id, chunk_data in self.index.items():
+                omom_file.add_embedding(chunk_data['binary_vector'])
+                omom_file.add_context(chunk_data['text'])
+                omom_file.add_metadata(chunk_id, chunk_data['metadata'])
         print("Collection saved successfully.")
-        
 
     def clear(self):
-        """
-        Clears the entire collection, removing all items and resetting the attributes.
-        """
         print("Clearing the collection...")
         self.index = {}
-        self.save()
+        self.omom.delete(self.collection)
         print("Collection cleared.")
     
     def info(self):
-        """
-        Prints information about the collection, including the number of items, collection file path,
-        and the embedding model used.
-        """
         print("Collection Information:")
         print(f"  Items: {self.count()}")
         print(f"  Collection file: {self.collection}")
@@ -337,10 +218,4 @@ def __repr__(self):
         return f"VLite(collection={self.collection}, device={self.device}, model={self.model})"
 
     def dump(self):
-        """
-        Dumps the collection data to a dictionary for serialization.
-
-        Returns:
-            dict: A dictionary containing the collection data.
-        """
         return self.index
\ No newline at end of file
diff --git a/vlite/model.py b/vlite/model.py
index 2ab11e1..2de451d 100644
--- a/vlite/model.py
+++ b/vlite/model.py
@@ -9,38 +9,28 @@ class EmbeddingModel:
     def __init__(self, model_name='mixedbread-ai/mxbai-embed-large-v1'):
         hf_path = hf_hub_download(repo_id="mixedbread-ai/mxbai-embed-large-v1", filename="gguf/mxbai-embed-large-v1-f16.gguf")
         print(f"Downloaded model to {hf_path}")
-        
         self.model = llama_cpp.Llama(model_path=hf_path, embedding=True)
-        self.dimension = 1024 # hardcoded
-        self.max_seq_length = 512 # hardcoded
+        self.model_metadata = self.model.metadata
+        self.embedding_size = self.model_metadata.get("bert.embedding_length", 1024)
+        self.context_length = self.model_metadata.get("bert.context_length", 512)
+        self.embedding_dtype = "float32"
 
     def embed(self, texts, max_seq_length=512, device="cpu"):
         if isinstance(texts, str):
             texts = [texts]
         embeddings_dict = self.model.create_embedding(texts)
         return [item["embedding"] for item in embeddings_dict["data"]]
-    
+
     def token_count(self, texts):
-        enc = tiktoken.get_encoding("cl100k_base")        
+        enc = tiktoken.get_encoding("cl100k_base")
         tokens = 0
         for text in texts:
             token_ids = enc.encode(text, disallowed_special=())
             tokens += len(token_ids)
         return tokens
-    
-    def quantize(self, embeddings, precision="binary"):
-        """
-        Quantizes the embeddings to the specified precision.
-
-        Args:
-            embeddings (list or numpy.ndarray): Input embeddings to quantize.
-            precision (str, optional): Precision to quantize the embeddings. Can be "binary" or "int8". Defaults to "binary".
-
-        Returns:
-            numpy.ndarray: Quantized embeddings.
-        """
-        embeddings = np.array(embeddings)  # Convert embeddings to a numpy array
 
+    def quantize(self, embeddings, precision="binary"):
+        embeddings = np.array(embeddings)
         if precision == "binary":
             return np.packbits(embeddings > 0).reshape(embeddings.shape[0], -1)
         elif precision == "int8":
@@ -49,16 +39,4 @@ def quantize(self, embeddings, precision="binary"):
             raise ValueError(f"Unsupported precision: {precision}")
 
     def rescore(self, query_vector, vectors):
-        """
-        Rescores the retrieved vectors using the query vector.
-
-        Args:
-            query_vector (numpy.ndarray): Query vector for rescoring.
-            vectors (numpy.ndarray): Retrieved vectors to rescore.
-
-        Returns:
-            numpy.ndarray: Rescored similarities.
-        """
-        return np.dot(query_vector, vectors.T).flatten()
-
-    
\ No newline at end of file
+        return np.dot(query_vector, vectors.T).flatten()
\ No newline at end of file
diff --git a/vlite/omom.py b/vlite/omom.py
new file mode 100644
index 0000000..1f3dc23
--- /dev/null
+++ b/vlite/omom.py
@@ -0,0 +1,149 @@
+import os
+import struct
+import json
+from enum import Enum
+from typing import List, Dict, Union
+import numpy as np
+
+class OmomSectionType(Enum):
+    HEADER = 0
+    EMBEDDINGS = 1
+    CONTEXTS = 2
+    METADATA = 3
+
+class OmomFile:
+    MAGIC_NUMBER = b"OMOM"
+    VERSION = 1
+
+    def __init__(self, file_path):
+        self.file_path = file_path
+        self.header = {
+            "embedding_model": "default",
+            "embedding_size": 0,
+            "embedding_dtype": "float32",
+            "context_length": 0,
+        }
+        self.embeddings = []
+        self.contexts = []
+        self.metadata = {}
+
+    def set_header(self, embedding_model: str, embedding_size: int, embedding_dtype: str, context_length: int):
+        self.header["embedding_model"] = embedding_model
+        self.header["embedding_size"] = embedding_size
+        self.header["embedding_dtype"] = embedding_dtype
+        self.header["context_length"] = context_length
+
+    def add_embedding(self, embedding: List[float]):
+        self.embeddings.append(embedding)
+
+    def add_context(self, context: str):
+        self.contexts.append(context)
+
+    def add_metadata(self, key: str, value: Union[int, float, str]):
+        self.metadata[key] = value
+
+    def save(self):
+        with open(self.file_path, "wb") as file:
+            file.write(self.MAGIC_NUMBER)
+            file.write(struct.pack("<I", self.VERSION))
+
+            header_json = json.dumps(self.header).encode("utf-8")
+            file.write(struct.pack("<II", OmomSectionType.HEADER.value, len(header_json)))
+            file.write(header_json)
+
+            if self.embeddings:
+                embeddings_data = b"".join(
+                    struct.pack(f"<{len(emb)}f", *[float(x) if not np.isnan(x) else 0.0 for x in emb])
+                    for emb in self.embeddings
+                )
+                file.write(struct.pack("<II", OmomSectionType.EMBEDDINGS.value, len(embeddings_data)))
+                file.write(embeddings_data)
+
+            contexts_data = b"".join(struct.pack("<I", len(context.encode("utf-8"))) + context.encode("utf-8") for context in self.contexts)
+            file.write(struct.pack("<II", OmomSectionType.CONTEXTS.value, len(contexts_data)))
+            file.write(contexts_data)
+
+            metadata_json = json.dumps(self.metadata).encode("utf-8")
+            file.write(struct.pack("<II", OmomSectionType.METADATA.value, len(metadata_json)))
+            file.write(metadata_json)
+        
+    def load(self):
+        try:
+            with open(self.file_path, "rb") as file:
+                # Read and verify header
+                magic_number = file.read(len(self.MAGIC_NUMBER))
+                if magic_number != self.MAGIC_NUMBER:
+                    raise ValueError(f"Invalid magic number: {magic_number}")
+
+                version = struct.unpack("<I", file.read(4))[0]
+                if version != self.VERSION:
+                    raise ValueError(f"Unsupported version: {version}")
+
+                # Read sections
+                while True:
+                    section_header = file.read(8)
+                    if not section_header:
+                        break
+                    section_type, section_length = struct.unpack("<II", section_header)
+
+                    if section_type == OmomSectionType.HEADER.value:
+                        header_json = file.read(section_length).decode("utf-8")
+                        self.header = json.loads(header_json)
+                    elif section_type == OmomSectionType.EMBEDDINGS.value:
+                        embeddings_data = file.read(section_length)
+                        if embeddings_data:
+                            embedding_size = len(embeddings_data) // 4
+                            self.embeddings = [
+                                list(struct.unpack_from(f"<{embedding_size // len(self.embeddings)}f", embeddings_data, i * embedding_size))
+                                for i in range(len(self.embeddings))
+                            ] if self.embeddings else [list(struct.unpack_from(f"<{embedding_size}f", embeddings_data))]
+                    elif section_type == OmomSectionType.CONTEXTS.value:
+                        contexts_data = file.read(section_length)
+                        self.contexts = []
+                        offset = 0
+                        while offset < len(contexts_data):
+                            context_length = struct.unpack_from("<I", contexts_data, offset)[0]
+                            offset += 4
+                            try:
+                                context = contexts_data[offset : offset + context_length].decode("utf-8")
+                                self.contexts.append(context)
+                            except UnicodeDecodeError as e:
+                                print(f"Error decoding context: {e}")
+                            offset += context_length
+                    elif section_type == OmomSectionType.METADATA.value:
+                        metadata_json = file.read(section_length).decode("utf-8")
+                        self.metadata = json.loads(metadata_json)
+                    else:
+                        raise ValueError(f"Unknown section type: {section_type}")
+
+        except FileNotFoundError:
+            pass
+
+    def __enter__(self):
+        self.load()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.save()
+    
+class Omom:
+    def __init__(self, directory="omnoms"):
+        self.directory = directory
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+    def get(self, user):
+        return os.path.join(self.directory, f"{user}.omom")
+
+    def create(self, user: str) -> OmomFile:
+        file_path = self.get(user)
+        return OmomFile(file_path)
+
+    def read(self, user_id: str) -> OmomFile:
+        file_path = self.get(user_id)
+        return OmomFile(file_path)
+
+    def delete(self, user_id: str):
+        file_path = self.get(user_id)
+        if os.path.exists(file_path):
+            os.remove(file_path)
\ No newline at end of file