From fcd056ba26ffbd258043df093a13652de6c33aa2 Mon Sep 17 00:00:00 2001 From: Michele Riva Date: Thu, 17 Oct 2024 09:59:40 +0200 Subject: [PATCH] docs: updates docs --- README.md | 1 + .../open-source/plugins/plugin-embeddings.mdx | 5 + .../plugins/plugin-secure-proxy.mdx | 12 +- packages/orama/README.md | 134 +++++++++++++++++- 4 files changed, 147 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 831fafc6..33321040 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ If you need more info, help, or want to provide general feedback on Orama, join - [Full-Text search](https://docs.orama.com/open-source/usage/search/introduction) - [Vector Search](https://docs.orama.com/open-source/usage/search/vector-search) - [Hybrid Search](https://docs.orama.com/open-source/usage/search/hybrid-search) +- [GenAI Chat Sessions](https://docs.orama.com/open-source/usage/answer-engine/introduction) - [Search Filters](https://docs.orama.com/open-source/usage/search/filters) - [Geosearch](https://docs.orama.com/open-source/usage/search/geosearch) - [Facets](https://docs.orama.com/open-source/usage/search/facets) diff --git a/packages/docs/src/content/docs/open-source/plugins/plugin-embeddings.mdx b/packages/docs/src/content/docs/open-source/plugins/plugin-embeddings.mdx index b9dee0b4..8e2c0fbe 100644 --- a/packages/docs/src/content/docs/open-source/plugins/plugin-embeddings.mdx +++ b/packages/docs/src/content/docs/open-source/plugins/plugin-embeddings.mdx @@ -2,6 +2,7 @@ title: Plugin Embeddings description: Generate embeddings for your documents offline and use them for vector search. --- +import { Aside } from '@astrojs/starlight/components'; import Install from '../../../../components/Install.astro' To perform vector and hybrid search, you need to convert your text data into embeddings. @@ -35,6 +36,10 @@ If you're using Orama in Node.js, we recommend using `@tensorflow/tfjs-node`: ## Usage + + This plugin will generate text embeddings for you at insert and search time, allowing you to perform vector and hybrid searches on your documents. ```js diff --git a/packages/docs/src/content/docs/open-source/plugins/plugin-secure-proxy.mdx b/packages/docs/src/content/docs/open-source/plugins/plugin-secure-proxy.mdx index 6f975b40..2e8e5f0c 100644 --- a/packages/docs/src/content/docs/open-source/plugins/plugin-secure-proxy.mdx +++ b/packages/docs/src/content/docs/open-source/plugins/plugin-secure-proxy.mdx @@ -58,6 +58,10 @@ Right now, the Orama Secure Proxy Plugin supports two different models for gener ## Running queries + + By telling on which property to perform search by default (in the example above, `'embeddings'`), the plugin will automatically translate your search term into a vector by calling the OpenAI API for you and setting the result into the `vector.value` property. This will finally allow you to perform hybrid and vector search with the exact same APIs used for full-text search. @@ -65,12 +69,12 @@ This will finally allow you to perform hybrid and vector search with the exact s ```js import { search } from '@orama/orama' -const resultsHybrid = search(db, { +const resultsHybrid = await search(db, { mode: 'hybrid', term: 'Videogame for little kids with a passion about ice cream' }) -const resultsVector = search(db, { +const resultsVector = await search(db, { mode: 'vector', term: 'Videogame for little kids with a passion about ice cream' }) @@ -81,9 +85,9 @@ const resultsVector = search(db, { If you have a more complex schema with multiple vector properties, you can always override the vector property to perform search on by using the default `vector` property: ```js -const resultsVector = search(db, { +const resultsVector = await search(db, { mode: 'vector', - term: 'Videogame for little kids with a passion about ice cream' + term: 'Videogame for little kids with a passion about ice cream', vector: { property: 'myAlternativeProperty' } diff --git a/packages/orama/README.md b/packages/orama/README.md index 04f97c00..33321040 100644 --- a/packages/orama/README.md +++ b/packages/orama/README.md @@ -9,8 +9,10 @@ If you need more info, help, or want to provide general feedback on Orama, join # Highlighted features +- [Full-Text search](https://docs.orama.com/open-source/usage/search/introduction) - [Vector Search](https://docs.orama.com/open-source/usage/search/vector-search) - [Hybrid Search](https://docs.orama.com/open-source/usage/search/hybrid-search) +- [GenAI Chat Sessions](https://docs.orama.com/open-source/usage/answer-engine/introduction) - [Search Filters](https://docs.orama.com/open-source/usage/search/filters) - [Geosearch](https://docs.orama.com/open-source/usage/search/geosearch) - [Facets](https://docs.orama.com/open-source/usage/search/facets) @@ -129,6 +131,136 @@ Orama currently supports 10 different data types: | `enum[]` | An array of enums. | `['comedy', 'action', 'romance']` | | `vector[]` | A vector of numbers to perform vector search on. | `[0.403, 0.192, 0.830]` | +# Vector and Hybrid Search Support + +Orama supports both vector and hybrid search by just setting `mode: 'vector'` when performing search. + +To perform this kind of search, you'll need to provide [text embeddings](https://en.wikipedia.org/wiki/Word_embedding) at search time: + +```js +import { create, insertMultiple, search } from '@orama/orama' + +const db = create({ + schema: { + title: 'string', + embedding: 'vector[5]'', // we are using a 5-dimensional vector. + }, +}); + +insertMultiple(db, [ + { title: 'The Prestige', embedding: [0.938293, 0.284951, 0.348264, 0.948276, 0.56472] }, + { title: 'Barbie', embedding: [0.192839, 0.028471, 0.284738, 0.937463, 0.092827] }, + { title: 'Oppenheimer', embedding: [0.827391, 0.927381, 0.001982, 0.983821, 0.294841] }, +]) + +const results = search(db, { + // Search mode. Can be 'vector', 'hybrid', or 'fulltext' + mode: 'vector', + vector: { + // The vector (text embedding) to use for search + value: [0.938292, 0.284961, 0.248264, 0.748276, 0.26472], + // The schema property where Orama should compare embeddings + property: 'embedding', + }, + // Minimum similarity to determine a match. Defaults to `0.8` + similarity: 0.85, + // Defaults to `false`. Setting to 'true' will return the embeddings in the response (which can be very large). + includeVectors: true, +}) +``` + +Have trouble generating embeddings for vector and hybrid search? Try our `@orama/plugin-embeddings` plugin! + +```js +import { create } from '@orama/orama' +import { pluginEmbeddings } from '@orama/plugin-embeddings' +import '@tensorflow/tfjs-node' // Or any other appropriate TensorflowJS backend, like @tensorflow/tfjs-backend-webgl + +const plugin = await pluginEmbeddings({ + embeddings: { + // Schema property used to store generated embeddings + defaultProperty: 'embeddings', + onInsert: { + // Generate embeddings at insert-time + generate: true, + // properties to use for generating embeddings at insert time. + // Will be concatenated to generate a unique embedding. + properties: ['description'], + verbose: true, + } + } +}) + +const db = create({ + schema: { + description: 'string', + // Orama generates 512-dimensions vectors. + // When using @orama/plugin-embeddings, set the property where you want to store embeddings as `vector[512]`. + embeddings: 'vector[512]' + }, + plugins: [plugin] +}) + +// Orama will generate and store embeddings at insert-time! +await insert(db, { description: 'Classroom Headphones Bulk 5 Pack, Student On Ear Color Varieties' }) +await insert(db, { description: 'Kids Wired Headphones for School Students K-12' }) +await insert(db, { description: 'Kids Headphones Bulk 5-Pack for K-12 School' }) +await insert(db, { description: 'Bose QuietComfort Bluetooth Headphones' }) + +// Orama will also generate and use embeddings at search time when search mode is set to "vector" or "hybrid"! +const searchResults = await search(db, { + term: 'Headphones for 12th grade students', + mode: 'vector' +}) +``` + +Want to use OpenAI embedding models? Use our [Secure Proxy](https://docs.orama.com/open-source/plugins/plugin-secure-proxy) plugin to call OpenAI from the client-side securely. + +# RAG and Chat Experiences with Orama + +Since `v3.0.0`, Orama allows you to create your own ChatGPT/Perplexity/SearchGPT-like experience. You will need to call the OpenAI APIs, so we strongly recommend using the [Secure Proxy Plugin](https://docs.orama.com/open-source/plugins/plugin-secure-proxy) to do that securely from your client side. It's free! + +```js +import { create, insert } from '@orama/orama' +import { pluginSecureProxy } from '@orama/plugin-secure-proxy' + +const secureProxy = await pluginSecureProxy({ + apiKey: 'my-api-key', + defaultProperty: 'embeddings', + models: { + // The chat model to use to generate the chat answer + chat: 'openai/gpt-4o-mini' + } +}) + +const db = create({ + schema: { + name: 'string' + }, + plugins: [secureProxy] +}) + +insert(db, { name: 'John Doe' }) +insert(db, { name: 'Jane Doe' }) + +const session = new AnswerSession(db, { + // Customize the prompt for the system + systemPrompt: 'You will get a name as context, please provide a greeting message', + events: { + // Log all state changes. Useful to reactively update a UI on a new message chunk, sources, etc. + onStateChange: console.log, + } +}) + +const response = await session.ask({ + term: 'john' +}) + +console.log(response) // Hello, John Doe! How are you doing? +``` + +Read the complete documentation [here](https://docs.orama.com/open-source/usage/answer-engine/introduction). + # Official Docs Read the complete documentation at [https://docs.orama.com/open-source](https://docs.orama.com/open-source). @@ -136,13 +268,13 @@ Read the complete documentation at [https://docs.orama.com/open-source](https:// # Official Orama Plugins - [Plugin Embeddings](https://docs.orama.com/open-source/plugins/plugin-embeddings) +- [Plugin Secure Proxy](https://docs.orama.com/open-source/plugins/plugin-secure-proxy) - [Plugin Analytics](https://docs.orama.com/open-source/plugins/plugin-analytics) - [Plugin Data Persistence](https://docs.orama.com/open-source/plugins/plugin-data-persistence) - [Plugin QPS](https://docs.orama.com/open-source/plugins/plugin-qps) - [Plugin PT15](https://docs.orama.com/open-source/plugins/plugin-pt15) - [Plugin Vitepress](https://docs.orama.com/open-source/plugins/plugin-vitepress) - [Plugin Docusaurus](https://docs.orama.com/open-source/plugins/plugin-docusaurus) -- [Plugin Secure Proxy](https://docs.orama.com/open-source/plugins/plugin-secure-proxy) - [Plugin Astro](https://docs.orama.com/open-source/plugins/plugin-astro) - [Plugin Nextra](https://docs.orama.com/open-source/plugins/plugin-nextra)