Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
  • Loading branch information
santteegt committed Sep 18, 2024
0 parents commit ddfe537
Show file tree
Hide file tree
Showing 21 changed files with 2,195 additions and 0 deletions.
113 changes: 113 additions & 0 deletions architecture/index.html

Large diffs are not rendered by default.

93 changes: 93 additions & 0 deletions assets/architecture-BXTo28X5.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import{u as a,j as e}from"./index-BmZJsBfh.js";const s={title:"RAG API Pipeline Architecture Overview",description:"undefined"};function r(i){const n={a:"a",div:"div",h1:"h1",h2:"h2",h3:"h3",header:"header",li:"li",ol:"ol",p:"p",ul:"ul",...a(),...i.components};return e.jsxs(e.Fragment,{children:[e.jsx(n.header,{children:e.jsxs(n.h1,{id:"rag-api-pipeline-architecture-overview",children:["RAG API Pipeline Architecture Overview",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#rag-api-pipeline-architecture-overview",children:e.jsx(n.div,{"data-autolink-icon":!0})})]})}),`
`,e.jsxs(n.h2,{id:"introduction",children:["Introduction",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#introduction",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsx(n.p,{children:"This document provides an overview of the RAG (Retrieval-Augmented Generation) API Pipeline architecture. The system is designed to extract, process, and store data from the Boardroom Governance API, creating a knowledge base that can be queried using natural language processing techniques."}),`
`,e.jsxs(n.h2,{id:"diagram",children:["Diagram",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#diagram",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsx("iframe",{src:"https://mermaid.live/view#pako:eNplVF1vozAQ_CuWXy8l4SPQoNNJiZqmVZM2Ou7pnD44eAOogJEx7eWa_PdbQ0lzqSUke3Z2Zz22eaexFEBDmihepeTXbFMSHHWz7YBpprZ7DWxDP2bkG1lznb7x_YY-d2Qz1iu2zirIsxLIipfZDmr9TK6ufhxsyyYKuDiQ6ZJN1_dLyQWos9SniD1VUGKIRBXEfZZzyvqkTpdt0MGSCZSguIYDiVYsko2KgRS98GdGtGozXMyoNVf6QH5OFww_0rd7RjawYXstWyos_jBjD6V8y0EkQGa8Pqc_zD7YTs_GAl0YSnFhJIYiUK-g0Eqj0263Bf7zcblcMfxIgaeSn5s0ve9cujeZZ4H5is2LLQiRlcmXrItmZgs2k1wJJWVBFhK1S16ib6ai2Up_2h27d2NsjcgOuG4UEOilDp8b6tgfZvhIjkpe1anUXzgXy8UjW_CMP4ImayW1jGVOcPEm1cvzRQL5btpbPHbw3S27a5LE7PiWx4C9WyhtLtYXyZP_S0hOPrRG2wxFY6jrc_cddsM1J5E5znPcZfM_2tiVt4eWxX0Ua9IBLUAVPBP4jN4NvKE6hQI2NMSp4OplQzflEXm80TLalzENtWpgQJVskpSGO57XuGoqgRf6JuPYcXGBzkWGPZ3AvH1ENHynel-1zzerNQrEstxlicEblSOcal3V4XBowlaS6bTZWrEshnUmUnwN6evEH_qOf80dF_zA5WPXFfHWnlzvHM_eiWBkO5wejwMKrf6q-1e0v4wBrXj5G2_SqSlcG-U_NPS8seU7I8_3XMcbjQN_PKB7GrquhdBoPPa8IJg49gQL_20rjKxrN_B8Z-Lb2Ebg-sHxHxjOZcQ",width:"100%",height:"900px"}),`
`,e.jsx(n.p,{children:e.jsx(n.a,{href:"https://res.cloudinary.com/dwx9alovg/image/upload/v1725856208/rag-pipeline/toms0dzpmfbrmuw87xmd.png",children:"Full Image"})}),`
`,e.jsxs(n.h2,{id:"components",children:["Components",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#components",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.h3,{id:"1-pipeline-manifest",children:["1. Pipeline Manifest",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#1-pipeline-manifest",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"A YAML file that defines the configuration settings and API endpoints for extraction."}),`
`,e.jsx(n.li,{children:"Read at the start of the pipeline process (step 1.1)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"2-openapi-spec",children:["2. OpenAPI Spec",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#2-openapi-spec",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"A YAML file containing the OpenAPI specification for the Boardroom Governance API."}),`
`,e.jsx(n.li,{children:"Read by the APILoader component (step 1.2)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"3-apiloader",children:["3. APILoader",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#3-apiloader",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"Reads the Pipeline Manifest and OpenAPI Spec."}),`
`,e.jsx(n.li,{children:"Generates a Source Manifest (step 2) based on the input configurations."}),`
`]}),`
`,e.jsxs(n.h3,{id:"4-source-manifest",children:["4. Source Manifest",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#4-source-manifest",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"A YAML file generated by the APILoader."}),`
`,e.jsx(n.li,{children:"Contains detailed information about the data sources and extraction parameters."}),`
`]}),`
`,e.jsxs(n.h3,{id:"5-boardroom-governance-api",children:["5. Boardroom Governance API",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#5-boardroom-governance-api",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"The primary data source for the pipeline."}),`
`,e.jsx(n.li,{children:"Data is extracted from this API (step 4)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"6-airbyte--pathway",children:["6. Airbyte + Pathway",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#6-airbyte--pathway",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"Airbyte is used for data extraction and initial processing."}),`
`,e.jsx(n.li,{children:"Pathway is used for data transformation and pipelining."}),`
`,e.jsx(n.li,{children:"These components work together to process the extracted data (step 5)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"7-rag-pipeline",children:["7. RAG Pipeline",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#7-rag-pipeline",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsxs(n.li,{children:["Consists of several sub-steps:",`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"Preprocessing"}),`
`,e.jsx(n.li,{children:"Normalization"}),`
`,e.jsx(n.li,{children:"Semantic chunking"}),`
`,e.jsx(n.li,{children:"Feature embeddings"}),`
`]}),`
`]}),`
`,e.jsx(n.li,{children:"Processes the data extracted by Airbyte (step 5)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"8-qdrant-vector-store",children:["8. Qdrant Vector Store",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#8-qdrant-vector-store",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"A vector database used to store the processed and embedded data."}),`
`,e.jsx(n.li,{children:"Data is stored here after processing (step 5.5)."}),`
`]}),`
`,e.jsxs(n.h3,{id:"9-rag-api-server",children:["9. RAG API Server",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#9-rag-api-server",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsxs(n.li,{children:["Hosts the following components:",`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"LLM (Language Model)"}),`
`,e.jsx(n.li,{children:"Embedding model"}),`
`,e.jsx(n.li,{children:"OpenAI API integration"}),`
`]}),`
`]}),`
`,e.jsx(n.li,{children:"Interfaces with the Qdrant Vector Store to retrieve relevant information."}),`
`,e.jsx(n.li,{children:"Connects to the GaiaNet Protocol Network."}),`
`]}),`
`,e.jsxs(n.h3,{id:"10-openai-api",children:["10. OpenAI API",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#10-openai-api",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"Used by the RAG API Server for advanced natural language processing tasks."}),`
`]}),`
`,e.jsxs(n.h3,{id:"11-gaianet-protocol-network",children:["11. GaiaNet Protocol Network",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#11-gaianet-protocol-network",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"The broader network that the RAG API Server interfaces with."}),`
`,e.jsx(n.li,{children:"Consists of multiple Gaia Nodes."}),`
`]}),`
`,e.jsxs(n.h2,{id:"process-flow",children:["Process Flow",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#process-flow",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ol,{children:[`
`,e.jsx(n.li,{children:"The pipeline starts by reading the Pipeline Manifest (1.1) and OpenAPI Spec (1.2)."}),`
`,e.jsx(n.li,{children:"The APILoader generates a Source Manifest based on these inputs."}),`
`,e.jsx(n.li,{children:"The pipeline begins data extraction from the Boardroom Governance API."}),`
`,e.jsx(n.li,{children:"Extracted data is processed through the Airbyte + Pathway components."}),`
`,e.jsx(n.li,{children:"The RAG Pipeline performs preprocessing, normalization, semantic chunking, and feature embedding."}),`
`,e.jsx(n.li,{children:"Processed data is stored in the Qdrant Vector Store."}),`
`,e.jsx(n.li,{children:"The RAG API Server can now access this data to respond to queries."}),`
`,e.jsx(n.li,{children:"The RAG API Server may use the OpenAI API for additional processing or generation tasks."}),`
`,e.jsx(n.li,{children:"The RAG API Server interfaces with the GaiaNet Protocol Network to provide its services."}),`
`]}),`
`,e.jsxs(n.h2,{id:"using-pre-generated-snapshots-and-models",children:["Using Pre-generated snapshots and models",e.jsx(n.a,{"aria-hidden":"true",tabIndex:"-1",href:"#using-pre-generated-snapshots-and-models",children:e.jsx(n.div,{"data-autolink-icon":!0})})]}),`
`,e.jsxs(n.ul,{children:[`
`,e.jsx(n.li,{children:"You can also use models and snapshots supported by Gaianet node by defining them in the Gaianet node config file."}),`
`,e.jsxs(n.li,{children:["Check out the ",e.jsx(n.a,{href:"http://docs.gaianet.ai",children:"Gaianet docs"})," for more information."]}),`
`]})]})}function d(i={}){const{wrapper:n}={...a(),...i.components};return n?e.jsx(n,{...i,children:e.jsx(r,{...i})}):r(i)}export{d as default,s as frontmatter};
Loading

0 comments on commit ddfe537

Please sign in to comment.