-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ddfe537
Showing
21 changed files
with
2,195 additions
and
0 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
import{u as a,j as e}from"./index-BmZJsBfh.js";const s={title:"RAG API Pipeline Architecture Overview",description:"undefined"};function r(i){const n={a:"a",div:"div",h1:"h1",h2:"h2",h3:"h3",header:"header",li:"li",ol:"ol",p:"p",ul:"ul",...a(),...i.components};return e.jsxs(e.Fragment,{children:[e.jsx(n.header,{children:e.jsxs(n.h1,{id:"rag-api-pipeline-architecture-overview",children:["RAG API Pipeline Architecture Overview",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#rag-api-pipeline-architecture-overview",children:e.jsx(n.div,{"data-autolink-icon":!0})})]})}),` | ||
`,e.jsxs(n.h2,{id:"introduction",children:["Introduction",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#introduction",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsx(n.p,{children:"This document provides an overview of the RAG (Retrieval-Augmented Generation) API Pipeline architecture. The system is designed to extract, process, and store data from the Boardroom Governance API, creating a knowledge base that can be queried using natural language processing techniques."}),` | ||
`,e.jsxs(n.h2,{id:"diagram",children:["Diagram",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#diagram",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsx("iframe",{src:"https://mermaid.live/view#pako:eNplVF1vozAQ_CuWXy8l4SPQoNNJiZqmVZM2Ou7pnD44eAOogJEx7eWa_PdbQ0lzqSUke3Z2Zz22eaexFEBDmihepeTXbFMSHHWz7YBpprZ7DWxDP2bkG1lznb7x_YY-d2Qz1iu2zirIsxLIipfZDmr9TK6ufhxsyyYKuDiQ6ZJN1_dLyQWos9SniD1VUGKIRBXEfZZzyvqkTpdt0MGSCZSguIYDiVYsko2KgRS98GdGtGozXMyoNVf6QH5OFww_0rd7RjawYXstWyos_jBjD6V8y0EkQGa8Pqc_zD7YTs_GAl0YSnFhJIYiUK-g0Eqj0263Bf7zcblcMfxIgaeSn5s0ve9cujeZZ4H5is2LLQiRlcmXrItmZgs2k1wJJWVBFhK1S16ib6ai2Up_2h27d2NsjcgOuG4UEOilDp8b6tgfZvhIjkpe1anUXzgXy8UjW_CMP4ImayW1jGVOcPEm1cvzRQL5btpbPHbw3S27a5LE7PiWx4C9WyhtLtYXyZP_S0hOPrRG2wxFY6jrc_cddsM1J5E5znPcZfM_2tiVt4eWxX0Ua9IBLUAVPBP4jN4NvKE6hQI2NMSp4OplQzflEXm80TLalzENtWpgQJVskpSGO57XuGoqgRf6JuPYcXGBzkWGPZ3AvH1ENHynel-1zzerNQrEstxlicEblSOcal3V4XBowlaS6bTZWrEshnUmUnwN6evEH_qOf80dF_zA5WPXFfHWnlzvHM_eiWBkO5wejwMKrf6q-1e0v4wBrXj5G2_SqSlcG-U_NPS8seU7I8_3XMcbjQN_PKB7GrquhdBoPPa8IJg49gQL_20rjKxrN_B8Z-Lb2Ebg-sHxHxjOZcQ",width:"100%",height:"900px"}),` | ||
`,e.jsx(n.p,{children:e.jsx(n.a,{href:"https://res.cloudinary.com/dwx9alovg/image/upload/v1725856208/rag-pipeline/toms0dzpmfbrmuw87xmd.png",children:"Full Image"})}),` | ||
`,e.jsxs(n.h2,{id:"components",children:["Components",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#components",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.h3,{id:"1-pipeline-manifest",children:["1. Pipeline Manifest",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#1-pipeline-manifest",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"A YAML file that defines the configuration settings and API endpoints for extraction."}),` | ||
`,e.jsx(n.li,{children:"Read at the start of the pipeline process (step 1.1)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"2-openapi-spec",children:["2. OpenAPI Spec",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#2-openapi-spec",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"A YAML file containing the OpenAPI specification for the Boardroom Governance API."}),` | ||
`,e.jsx(n.li,{children:"Read by the APILoader component (step 1.2)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"3-apiloader",children:["3. APILoader",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#3-apiloader",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"Reads the Pipeline Manifest and OpenAPI Spec."}),` | ||
`,e.jsx(n.li,{children:"Generates a Source Manifest (step 2) based on the input configurations."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"4-source-manifest",children:["4. Source Manifest",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#4-source-manifest",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"A YAML file generated by the APILoader."}),` | ||
`,e.jsx(n.li,{children:"Contains detailed information about the data sources and extraction parameters."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"5-boardroom-governance-api",children:["5. Boardroom Governance API",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#5-boardroom-governance-api",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"The primary data source for the pipeline."}),` | ||
`,e.jsx(n.li,{children:"Data is extracted from this API (step 4)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"6-airbyte--pathway",children:["6. Airbyte + Pathway",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#6-airbyte--pathway",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"Airbyte is used for data extraction and initial processing."}),` | ||
`,e.jsx(n.li,{children:"Pathway is used for data transformation and pipelining."}),` | ||
`,e.jsx(n.li,{children:"These components work together to process the extracted data (step 5)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"7-rag-pipeline",children:["7. RAG Pipeline",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#7-rag-pipeline",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsxs(n.li,{children:["Consists of several sub-steps:",` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"Preprocessing"}),` | ||
`,e.jsx(n.li,{children:"Normalization"}),` | ||
`,e.jsx(n.li,{children:"Semantic chunking"}),` | ||
`,e.jsx(n.li,{children:"Feature embeddings"}),` | ||
`]}),` | ||
`]}),` | ||
`,e.jsx(n.li,{children:"Processes the data extracted by Airbyte (step 5)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"8-qdrant-vector-store",children:["8. Qdrant Vector Store",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#8-qdrant-vector-store",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"A vector database used to store the processed and embedded data."}),` | ||
`,e.jsx(n.li,{children:"Data is stored here after processing (step 5.5)."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"9-rag-api-server",children:["9. RAG API Server",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#9-rag-api-server",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsxs(n.li,{children:["Hosts the following components:",` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"LLM (Language Model)"}),` | ||
`,e.jsx(n.li,{children:"Embedding model"}),` | ||
`,e.jsx(n.li,{children:"OpenAI API integration"}),` | ||
`]}),` | ||
`]}),` | ||
`,e.jsx(n.li,{children:"Interfaces with the Qdrant Vector Store to retrieve relevant information."}),` | ||
`,e.jsx(n.li,{children:"Connects to the GaiaNet Protocol Network."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"10-openai-api",children:["10. OpenAI API",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#10-openai-api",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"Used by the RAG API Server for advanced natural language processing tasks."}),` | ||
`]}),` | ||
`,e.jsxs(n.h3,{id:"11-gaianet-protocol-network",children:["11. GaiaNet Protocol Network",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#11-gaianet-protocol-network",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"The broader network that the RAG API Server interfaces with."}),` | ||
`,e.jsx(n.li,{children:"Consists of multiple Gaia Nodes."}),` | ||
`]}),` | ||
`,e.jsxs(n.h2,{id:"process-flow",children:["Process Flow",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#process-flow",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ol,{children:[` | ||
`,e.jsx(n.li,{children:"The pipeline starts by reading the Pipeline Manifest (1.1) and OpenAPI Spec (1.2)."}),` | ||
`,e.jsx(n.li,{children:"The APILoader generates a Source Manifest based on these inputs."}),` | ||
`,e.jsx(n.li,{children:"The pipeline begins data extraction from the Boardroom Governance API."}),` | ||
`,e.jsx(n.li,{children:"Extracted data is processed through the Airbyte + Pathway components."}),` | ||
`,e.jsx(n.li,{children:"The RAG Pipeline performs preprocessing, normalization, semantic chunking, and feature embedding."}),` | ||
`,e.jsx(n.li,{children:"Processed data is stored in the Qdrant Vector Store."}),` | ||
`,e.jsx(n.li,{children:"The RAG API Server can now access this data to respond to queries."}),` | ||
`,e.jsx(n.li,{children:"The RAG API Server may use the OpenAI API for additional processing or generation tasks."}),` | ||
`,e.jsx(n.li,{children:"The RAG API Server interfaces with the GaiaNet Protocol Network to provide its services."}),` | ||
`]}),` | ||
`,e.jsxs(n.h2,{id:"using-pre-generated-snapshots-and-models",children:["Using Pre-generated snapshots and models",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#using-pre-generated-snapshots-and-models",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),` | ||
`,e.jsxs(n.ul,{children:[` | ||
`,e.jsx(n.li,{children:"You can also use models and snapshots supported by Gaianet node by defining them in the Gaianet node config file."}),` | ||
`,e.jsxs(n.li,{children:["Check out the ",e.jsx(n.a,{href:"http://docs.gaianet.ai",children:"Gaianet docs"})," for more information."]}),` | ||
`]})]})}function d(i={}){const{wrapper:n}={...a(),...i.components};return n?e.jsx(n,{...i,children:e.jsx(r,{...i})}):r(i)}export{d as default,s as frontmatter}; |
Oops, something went wrong.