diff --git a/website/src/app/api/downloadTutorialDataset/route.ts b/website/src/app/api/downloadTutorialDataset/route.ts new file mode 100644 index 00000000..25703ec8 --- /dev/null +++ b/website/src/app/api/downloadTutorialDataset/route.ts @@ -0,0 +1,27 @@ +import { NextResponse } from "next/server"; + +export async function GET(request: Request) { + const { searchParams } = new URL(request.url); + const fileId = searchParams.get("fileId"); + + if (!fileId) { + return new NextResponse("File ID is required", { status: 400 }); + } + + try { + const driveUrl = `https://drive.google.com/uc?export=download&id=${fileId}`; + const response = await fetch(driveUrl); + + if (!response.ok) { + throw new Error("Failed to download file from Google Drive"); + } + + const data = await response.blob(); + return new NextResponse(data); + } catch (error) { + console.error("Error downloading tutorial dataset:", error); + return new NextResponse("Failed to download tutorial dataset", { + status: 500, + }); + } +} diff --git a/website/src/app/api/utils.ts b/website/src/app/api/utils.ts index 291f013c..261dd4ee 100644 --- a/website/src/app/api/utils.ts +++ b/website/src/app/api/utils.ts @@ -193,27 +193,14 @@ export function generatePipelineConfig( ); // Fix type errors by asserting the pipeline config type - const pipelineConfig: { - datasets: any; - default_model: string; - optimizer_config: any; - operations: any[]; - pipeline: { - steps: any[]; - output: { - type: string; - path: string; - intermediate_dir: string; - }; - }; - system_prompt: Record; - llm_api_keys?: Record; - } = { + const pipelineConfig: any = { datasets, default_model, - optimizer_config: { - force_decompose: true, - }, + ...(enable_observability && { + optimizer_config: { + force_decompose: true, + }, + }), operations: updatedOperations, pipeline: { steps: [ diff --git a/website/src/app/playground/page.tsx b/website/src/app/playground/page.tsx index 7e4c2983..22e08b39 100644 --- a/website/src/app/playground/page.tsx +++ b/website/src/app/playground/page.tsx @@ -104,6 +104,7 @@ const NamespaceDialog = dynamic( ); import { ThemeProvider, useTheme, Theme } from "@/contexts/ThemeContext"; import { APIKeysDialog } from "@/components/APIKeysDialog"; +import { TutorialsDialog, TUTORIALS } from "@/components/TutorialsDialog"; const LeftPanelIcon: React.FC<{ isActive: boolean }> = ({ isActive }) => ( { const [showChat, setShowChat] = useState(false); const [showNamespaceDialog, setShowNamespaceDialog] = useState(false); const [showAPIKeysDialog, setShowAPIKeysDialog] = useState(false); + const [showTutorialsDialog, setShowTutorialsDialog] = useState(false); + const [selectedTutorial, setSelectedTutorial] = + useState<(typeof TUTORIALS)[0]>(); const { theme, setTheme } = useTheme(); const { @@ -271,6 +275,10 @@ const CodeEditorPipelineApp: React.FC = () => { unsavedChanges, namespace, setNamespace, + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, } = usePipelineContext(); useEffect(() => { @@ -490,6 +498,22 @@ const CodeEditorPipelineApp: React.FC = () => { > Show Documentation + + Tutorials + + {TUTORIALS.map((tutorial) => ( + { + setSelectedTutorial(tutorial); + setShowTutorialsDialog(true); + }} + > + {tutorial.title} + + ))} + + setShowChat(!showChat)}> Show Chat @@ -740,6 +764,23 @@ const CodeEditorPipelineApp: React.FC = () => { open={showAPIKeysDialog} onOpenChange={setShowAPIKeysDialog} /> + + setFiles((prevFiles) => [...prevFiles, file]) + } + setCurrentFile={setCurrentFile} + setOperations={setOperations} + setPipelineName={setPipelineName} + setSampleSize={setSampleSize} + setDefaultModel={setDefaultModel} + setFiles={setFiles} + currentFile={currentFile} + files={files} + /> ); diff --git a/website/src/app/types.ts b/website/src/app/types.ts index b7f85f2c..fd6768d7 100644 --- a/website/src/app/types.ts +++ b/website/src/app/types.ts @@ -1,8 +1,9 @@ export type File = { name: string; path: string; - type: "json" | "document"; + type: "json" | "document" | "pipeline-yaml"; parentFolder?: string; + blob?: Blob; }; export type Operation = { diff --git a/website/src/components/FileExplorer.tsx b/website/src/components/FileExplorer.tsx index 90516791..6026a910 100644 --- a/website/src/components/FileExplorer.tsx +++ b/website/src/components/FileExplorer.tsx @@ -54,6 +54,7 @@ import { TooltipTrigger, } from "./ui/tooltip"; import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group"; +import { useDatasetUpload } from "@/hooks/useDatasetUpload"; interface FileExplorerProps { files: File[]; @@ -211,12 +212,17 @@ export const FileExplorer: React.FC = ({ const [draggedFiles, setDraggedFiles] = useState(0); const [viewingDocument, setViewingDocument] = useState(null); const [folderToDelete, setFolderToDelete] = useState(null); - const [uploadingFiles, setUploadingFiles] = useState>(new Set()); const [conversionMethod, setConversionMethod] = useState("local"); const [azureEndpoint, setAzureEndpoint] = useState(""); const [azureKey, setAzureKey] = useState(""); + const { uploadingFiles, uploadDataset } = useDatasetUpload({ + namespace, + onFileUpload, + setCurrentFile, + }); + // Group files by folder const groupedFiles = files.reduce((acc: { [key: string]: File[] }, file) => { const folder = file.parentFolder || "root"; @@ -243,73 +249,13 @@ export const FileExplorer: React.FC = ({ console.log("No file selected"); return; } - - if (!uploadedFile.name.toLowerCase().endsWith(".json")) { - toast({ - variant: "destructive", - title: "Error", - description: "Please upload a JSON file", - }); - return; - } - - // Add loading indicator immediately - toast({ - title: "Uploading dataset...", - description: "This may take a few seconds", - }); - - // Add to uploading files set to show spinner in file list - setUploadingFiles((prev) => new Set(prev).add(uploadedFile.name)); - - try { - // Validate JSON structure before uploading - await validateJsonDataset(uploadedFile); - - const formData = new FormData(); - formData.append("file", uploadedFile); - formData.append("namespace", namespace); - - const response = await fetch("/api/uploadFile", { - method: "POST", - body: formData, - }); - - if (!response.ok) { - throw new Error("Upload failed"); - } - - const data = await response.json(); - - const newFile = { - name: uploadedFile.name, - path: data.path, - type: "json" as const, - parentFolder: "root", - }; - - onFileUpload(newFile); - setCurrentFile(newFile); - - toast({ - title: "Success", - description: "Dataset uploaded successfully", - }); - } catch (error) { - console.error(error); - toast({ - variant: "destructive", - title: "Error", - description: - error instanceof Error ? error.message : "Failed to upload file", - }); - } finally { - setUploadingFiles((prev) => { - const next = new Set(prev); - next.delete(uploadedFile.name); - return next; - }); - } + const fileToUpload: File = { + name: uploadedFile.name, + path: uploadedFile.name, + type: "json", + blob: uploadedFile, + }; + await uploadDataset(fileToUpload); }; const handleFileSelection = (file: File) => { diff --git a/website/src/components/PipelineGui.tsx b/website/src/components/PipelineGui.tsx index bea2f497..5c7fdaa2 100644 --- a/website/src/components/PipelineGui.tsx +++ b/website/src/components/PipelineGui.tsx @@ -77,6 +77,7 @@ import { HoverCardContent, HoverCardTrigger, } from "@/components/ui/hover-card"; +import { useRestorePipeline } from "@/hooks/useRestorePipeline"; interface OperationMenuItemProps { name: string; @@ -365,6 +366,17 @@ const PipelineGUI: React.FC = () => { }, }); + const { restoreFromYAML } = useRestorePipeline({ + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, + setFiles, + setCurrentFile, + currentFile, + files, + }); + useEffect(() => { if (lastMessage) { if (lastMessage.type === "output") { @@ -531,122 +543,17 @@ const PipelineGUI: React.FC = () => { ) => { const file = event.target.files?.[0]; if (file) { - const reader = new FileReader(); - reader.onload = async (e) => { - const content = e.target?.result; - if (typeof content === "string") { - try { - const yamlFileName = file.name.split("/").pop()?.split(".")[0]; - const yamlContent = yaml.load(content) as YAMLContent; - setOperations([]); - - // Update PipelineContext with the loaded YAML data - setOperations( - (yamlContent.operations || []).map((op) => { - const { - id, - type, - name, - prompt, - output, - validate, - sample, - ...otherKwargs - } = op; - - // If the operation type is 'reduce', ensure reduce_key is a list - if (type === "reduce" && otherKwargs.reduce_key) { - otherKwargs.reduce_key = Array.isArray(otherKwargs.reduce_key) - ? otherKwargs.reduce_key - : [otherKwargs.reduce_key]; - } - - return { - id: id || uuidv4(), - llmType: - type === "map" || - type === "reduce" || - type === "resolve" || - type === "filter" || - type === "parallel_map" - ? "LLM" - : "non-LLM", - type: type as Operation["type"], - name: name || "Untitled Operation", - prompt, - output: output - ? { - schema: schemaDictToItemSet( - output.schema as Record - ), - } - : undefined, - validate, - sample, - otherKwargs, - visibility: true, - } as Operation; - }) - ); - setPipelineName(yamlFileName || "Untitled Pipeline"); - setSampleSize( - (yamlContent.operations?.[0]?.sample as number) || null - ); - setDefaultModel(yamlContent.default_model || "gpt-4o-mini"); - - // Set current file if it exists in the YAML - // Look for paths in all datasets - const datasetPaths = Object.values(yamlContent.datasets || {}) - .filter( - (dataset: Dataset) => dataset.type === "file" && dataset.path - ) - .map((dataset: Dataset) => dataset.path); - - if (datasetPaths.length > 0) { - const newFiles = datasetPaths.map((filePath) => ({ - name: path.basename(filePath), - path: filePath, - type: "json", - })); - - setFiles((prevFiles: File[]) => { - const uniqueNewFiles = newFiles - .filter( - (newFile) => - !prevFiles.some( - (prevFile) => prevFile.path === newFile.path - ) - ) - .map((file) => ({ - ...file, - type: "json" as const, // Explicitly type as literal "json" - })); - return [...prevFiles, ...uniqueNewFiles]; - }); - - // Set the first file as current if no current file exists - if (!currentFile) { - setCurrentFile({ ...newFiles[0], type: "json" }); - } - } - - toast({ - title: "Pipeline Loaded", - description: - "Your pipeline configuration has been loaded successfully.", - duration: 3000, - }); - } catch (error) { - console.error("Error parsing YAML:", error); - toast({ - title: "Error", - description: "Failed to parse the uploaded YAML file.", - variant: "destructive", - }); - } - } - }; - reader.readAsText(file); + try { + const fileToUpload: File = { + name: file.name, + path: file.name, + type: "pipeline-yaml", + blob: file, + }; + await restoreFromYAML(fileToUpload); + } catch (error) { + console.error("Error handling file upload:", error); + } } }; diff --git a/website/src/components/TutorialsDialog.tsx b/website/src/components/TutorialsDialog.tsx new file mode 100644 index 00000000..93499701 --- /dev/null +++ b/website/src/components/TutorialsDialog.tsx @@ -0,0 +1,393 @@ +import React, { useState, useEffect } from "react"; +import { + AlertDialog, + AlertDialogCancel, + AlertDialogContent, + AlertDialogDescription, + AlertDialogFooter, + AlertDialogHeader, + AlertDialogTitle, +} from "@/components/ui/alert-dialog"; +import { useToast } from "@/hooks/use-toast"; +import { useDatasetUpload } from "@/hooks/useDatasetUpload"; +import { useRestorePipeline } from "@/hooks/useRestorePipeline"; +import { Loader2 } from "lucide-react"; +import { Button } from "@/components/ui/button"; +import type { File as FileType, Operation } from "@/app/types"; + +interface Tutorial { + id: string; + title: string; + description: string; + datasetUrl: string; + datasetDescription: string; + operations: string[]; + pipelineTemplate: string; +} + +const TUTORIALS: Tutorial[] = [ + { + id: "supreme-court", + title: "Supreme Court Transcript Analysis", + description: + "Analyze Supreme Court oral arguments to understand how different Justices approach legal problems and how lawyers adapt their arguments.", + datasetUrl: + "https://drive.google.com/file/d/1n-muIvBYb3VGfZOYOBqJUYcO1swsBELt/view?usp=share_link", + datasetDescription: + "Collection of Supreme Court oral argument transcripts from 2024, covering various legal domains", + operations: [ + "Map: Analyze reasoning patterns, justice opinions, and notable exchanges in individual transcripts", + "Reduce: Synthesize patterns across multiple cases into a magazine-style article", + ], + pipelineTemplate: `datasets: + input: + type: file + path: DATASET_PATH_PLACEHOLDER + source: local +default_model: gpt-4o-mini +operations: + - type: map + name: find_patterns_in_reasoning + prompt: >- + You are analyzing a Supreme Court oral argument transcript to identify + interesting patterns in legal reasoning. Your analysis will be read by + people without legal training, so explain everything in clear, everyday + language. + + Here is the transcript: + + {{ input.content }} + + Please analyze this transcript in everyday language: + + 1. First, in 1-2 sentences, what's the key question this case is trying to + answer? What area of law is this (e.g. tech privacy, free speech)? + + 2. Find 4-5 notable exchanges between Justices and attorneys that show + different ways of reasoning. For each exchange: + - Quote the actual back-and-forth + - Explain what's interesting about how they're thinking through the problem + - Which Justice is asking the questions and what's distinctive about their approach? + - What everyday situation would help understand this exchange? + + 3. Justice Focus: Look at each Justice who spoke substantially: + - What kinds of questions do they tend to ask? + - What seems to concern them most? + - How do lawyers adapt to their particular style? + + 4. Looking at all these exchanges, what seems to be the main way the + Justices are approaching this problem? Are they focused on real-world + impacts, strict interpretation of laws, historical examples, etc? + + Avoid legal jargon - if you need to use a legal term, explain it like you + would to a friend. Use concrete examples that anyone could understand. + output: + schema: + summary: string + legal_domain: string + validate: [] + - type: reduce + name: analyze_common_patterns + prompt: >- + You're writing an engaging magazine article about how the Supreme Court + works in 2024. Below are detailed analyses of several oral arguments: + + {% for input in inputs %} + + Legal Domain: {{ input.legal_domain }} + + Case Analysis: + + {{ input.summary }} + + {% endfor %} + + Write an analysis that reveals fascinating patterns in how Supreme Court + arguments work. Structure your analysis like a magazine feature article + that explores: + + 1. Different Justices, Different Styles + - What's distinctive about how each Justice approaches problems? + - Do certain Justices focus more on specific aspects (practical effects, text, precedent)? + - How do their questioning styles differ? + - Use specific exchanges to show these personal styles + + 2. How Fields Shape the Conversation + - How do tech cases differ from free speech cases? From business regulation? + - Do certain Justices become more active in particular types of cases? + - What unique challenges come up in each field? + + 3. The Art of Persuasion + - How do lawyers adapt their arguments for different Justices? + - What strategies work across all cases vs. specific domains? + - Show examples of lawyers skillfully handling different Justice's styles + + 4. Bigger Patterns + - What does this tell us about how the Court approaches problems? + - Where do they prioritize practical effects vs. theoretical concerns? + - Are there surprising similarities across very different cases? + + Write this for an intelligent but non-legal audience - help them + understand the fascinating human dynamics of how America's highest court + works. Use specific quotes and moments from the arguments, but explain + everything like you're telling a story to an interested friend. + output: + schema: + analysis: string + reduce_key: + - _all +pipeline: + steps: + - name: data_processing + input: input + operations: + - find_patterns_in_reasoning + - analyze_common_patterns + output: + type: file + path: DATASET_PATH_PLACEHOLDER_OUTPUT + intermediate_dir: DATASET_PATH_PLACEHOLDER_INTERMEDIATES +system_prompt: {}`, + }, +]; + +interface TutorialsDialogProps { + open: boolean; + onOpenChange: (open: boolean) => void; + selectedTutorial?: Tutorial; + namespace: string; + onFileUpload: (file: FileType) => void; + setCurrentFile: (file: FileType | null) => void; + setOperations: (operations: Operation[]) => void; + setPipelineName: (name: string) => void; + setSampleSize: (size: number | null) => void; + setDefaultModel: (model: string) => void; + setFiles: (files: FileType[]) => void; + currentFile: FileType | null; + files: FileType[]; +} + +export function TutorialsDialog({ + open, + onOpenChange, + selectedTutorial, + namespace, + onFileUpload, + setCurrentFile, + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, + setFiles, + currentFile, + files, +}: TutorialsDialogProps) { + const { toast } = useToast(); + const { uploadDataset } = useDatasetUpload({ + namespace, + onFileUpload, + setCurrentFile, + }); + const { restoreFromYAML } = useRestorePipeline({ + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, + setFiles, + setCurrentFile, + currentFile, + files, + }); + + // Add state to track the uploaded dataset path + const [uploadedDatasetPath, setUploadedDatasetPath] = useState( + null + ); + const [isLoading, setIsLoading] = useState(false); + + // Use effect to watch for currentFile changes + useEffect(() => { + if (uploadedDatasetPath && currentFile?.path) { + const finishTutorialLoad = async () => { + try { + // Create pipeline YAML with the correct dataset path + const pipelineYaml = selectedTutorial?.pipelineTemplate + .replace(/DATASET_PATH_PLACEHOLDER/g, currentFile.path) + .replace( + /DATASET_PATH_PLACEHOLDER_OUTPUT/g, + `${currentFile.path.replace(".json", "")}_output.json` + ) + .replace( + /DATASET_PATH_PLACEHOLDER_INTERMEDIATES/g, + `${currentFile.path.replace(".json", "")}_intermediates` + ); + + if (!pipelineYaml) { + throw new Error("Pipeline template not found"); + } + + // Create pipeline file object + const pipelineBlob = new Blob([pipelineYaml], { + type: "application/x-yaml", + }); + const pipelineFile: FileType = { + name: `${selectedTutorial?.id}-pipeline.yaml`, + path: `${selectedTutorial?.id}-pipeline.yaml`, + type: "pipeline-yaml", + blob: pipelineBlob, + }; + + // Restore pipeline from the YAML + await restoreFromYAML(pipelineFile); + + toast({ + title: "Tutorial Loaded", + description: + "The tutorial pipeline and dataset have been loaded successfully.", + }); + + // Reset states + setUploadedDatasetPath(null); + setIsLoading(false); + onOpenChange(false); + } catch (error) { + console.error("Error loading tutorial:", error); + toast({ + title: "Error", + description: "Failed to load the tutorial. Please try again.", + variant: "destructive", + }); + setIsLoading(false); + } + }; + + finishTutorialLoad(); + } + }, [currentFile, uploadedDatasetPath, selectedTutorial]); + + if (!selectedTutorial) return null; + + const loadTutorial = async () => { + try { + setIsLoading(true); + + // Get file ID from Google Drive URL + const fileId = selectedTutorial.datasetUrl.split("/")[5]; + + // Download dataset through our API route + const datasetResponse = await fetch( + `/api/downloadTutorialDataset?fileId=${fileId}` + ); + if (!datasetResponse.ok) { + throw new Error("Failed to download dataset"); + } + + const datasetFileName = `${selectedTutorial.id}-dataset.json`; + const datasetBlob = new File( + [await datasetResponse.blob()], + datasetFileName, + { + type: "application/json", + } + ); + + // Create file object + const datasetFile: FileType = { + name: datasetFileName, + path: datasetFileName, + type: "json", + blob: datasetBlob, + }; + + // Set the path we're expecting + setUploadedDatasetPath(datasetFileName); + + // Upload dataset and wait for currentFile to update + await uploadDataset(datasetFile); + } catch (error) { + console.error("Error loading tutorial:", error); + toast({ + title: "Error", + description: "Failed to load the tutorial. Please try again.", + variant: "destructive", + }); + setIsLoading(false); + } + }; + + return ( + + + + + Load Example Pipeline + + +
+
+

+ {selectedTutorial.title} +

+

+ {selectedTutorial.description} +

+
+ +
+
+

+ Dataset +

+

+ {selectedTutorial.datasetDescription} +

+
+ +
+

+ Pipeline Operations +

+
    + {selectedTutorial.operations.map((op, index) => ( +
  • {op}
  • + ))} +
+
+
+ +
+

+ Loading this example will replace your current pipeline + configuration. +

+
+
+ + + + + Cancel + + + + + + ); +} + +export { TUTORIALS }; diff --git a/website/src/hooks/useDatasetUpload.ts b/website/src/hooks/useDatasetUpload.ts new file mode 100644 index 00000000..533533c3 --- /dev/null +++ b/website/src/hooks/useDatasetUpload.ts @@ -0,0 +1,137 @@ +import { useState } from "react"; +import { useToast } from "@/hooks/use-toast"; +import type { File } from "@/app/types"; + +interface UseDatasetUploadOptions { + namespace: string; + onFileUpload: (file: File) => void; + setCurrentFile: (file: File | null) => void; +} + +export function useDatasetUpload({ + namespace, + onFileUpload, + setCurrentFile, +}: UseDatasetUploadOptions) { + const { toast } = useToast(); + const [uploadingFiles, setUploadingFiles] = useState>(new Set()); + + async function validateJsonDataset(file: Blob): Promise { + const text = await file.text(); + let data: unknown; + + try { + data = JSON.parse(text); + } catch { + throw new Error("Invalid JSON format"); + } + + // Check if it's an array + if (!Array.isArray(data)) { + throw new Error( + "Dataset must be an array of objects, like this: [{key: value}, {key: value}]" + ); + } + + // Check if array is not empty + if (data.length === 0) { + throw new Error("Dataset cannot be empty"); + } + + // Check if first item is an object + if (typeof data[0] !== "object" || data[0] === null) { + throw new Error("Dataset must contain objects"); + } + + // Get keys of first object + const firstObjectKeys = Object.keys(data[0]).sort(); + + // Check if all objects have the same keys + const hasConsistentKeys = data.every((item) => { + if (typeof item !== "object" || item === null) return false; + const currentKeys = Object.keys(item).sort(); + return ( + currentKeys.length === firstObjectKeys.length && + currentKeys.every((key, index) => key === firstObjectKeys[index]) + ); + }); + + if (!hasConsistentKeys) { + throw new Error("All objects in dataset must have the same keys"); + } + } + + const uploadDataset = async (file: File) => { + if (!file.name.toLowerCase().endsWith(".json")) { + toast({ + variant: "destructive", + title: "Error", + description: "Please upload a JSON file", + }); + return; + } + + // Add loading indicator immediately + toast({ + title: "Uploading dataset...", + description: "This may take a few seconds", + }); + + // Add to uploading files set to show spinner in file list + setUploadingFiles((prev) => new Set(prev).add(file.name)); + + try { + // Validate JSON structure before uploading + await validateJsonDataset(file.blob); + + const formData = new FormData(); + formData.append("file", file.blob); + formData.append("namespace", namespace); + + const response = await fetch("/api/uploadFile", { + method: "POST", + body: formData, + }); + + if (!response.ok) { + throw new Error("Upload failed"); + } + + const data = await response.json(); + + const newFile = { + name: file.name, + path: data.path, + type: "json" as const, + parentFolder: "root", + }; + + onFileUpload(newFile); + setCurrentFile(newFile); + + toast({ + title: "Success", + description: "Dataset uploaded successfully", + }); + } catch (error) { + console.error(error); + toast({ + variant: "destructive", + title: "Error", + description: + error instanceof Error ? error.message : "Failed to upload file", + }); + } finally { + setUploadingFiles((prev) => { + const next = new Set(prev); + next.delete(file.name); + return next; + }); + } + }; + + return { + uploadingFiles, + uploadDataset, + }; +} diff --git a/website/src/hooks/useRestorePipeline.ts b/website/src/hooks/useRestorePipeline.ts new file mode 100644 index 00000000..e852c9a5 --- /dev/null +++ b/website/src/hooks/useRestorePipeline.ts @@ -0,0 +1,189 @@ +import { useCallback } from "react"; +import yaml from "js-yaml"; +import { v4 as uuidv4 } from "uuid"; +import path from "path"; +import { Operation, File } from "@/app/types"; +import { schemaDictToItemSet } from "@/components/utils"; +import { useToast } from "@/hooks/use-toast"; + +interface Dataset { + type: string; + path: string; +} + +interface YAMLOperation { + id?: string; + type: string; + name?: string; + prompt?: string; + output?: { + schema: Record; + }; + validate?: unknown; + sample?: unknown; + [key: string]: unknown; +} + +interface YAMLContent { + operations?: YAMLOperation[]; + datasets?: Record; + default_model?: string; +} + +interface RestorePipelineConfig { + setOperations: (operations: Operation[]) => void; + setPipelineName: (name: string) => void; + setSampleSize: (size: number | null) => void; + setDefaultModel: (model: string) => void; + setFiles: (files: File[]) => void; + setCurrentFile: (file: File | null) => void; + currentFile: File | null; + files: File[]; +} + +export const useRestorePipeline = ({ + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, + setFiles, + setCurrentFile, + currentFile, + files, +}: RestorePipelineConfig) => { + const { toast } = useToast(); + + const restoreFromYAML = useCallback( + async (file: File) => { + const reader = new FileReader(); + + return new Promise((resolve, reject) => { + reader.onload = async (e) => { + const content = e.target?.result; + if (typeof content === "string") { + try { + const yamlFileName = file.name.split("/").pop()?.split(".")[0]; + const yamlContent = yaml.load(content) as YAMLContent; + setOperations([]); + + // Update operations from YAML + setOperations( + (yamlContent.operations || []).map((op) => { + const { + id, + type, + name, + prompt, + output, + validate, + sample, + ...otherKwargs + } = op; + + // If the operation type is 'reduce', ensure reduce_key is a list + if (type === "reduce" && otherKwargs.reduce_key) { + otherKwargs.reduce_key = Array.isArray( + otherKwargs.reduce_key + ) + ? otherKwargs.reduce_key + : [otherKwargs.reduce_key]; + } + + return { + id: id || uuidv4(), + llmType: + type === "map" || + type === "reduce" || + type === "resolve" || + type === "filter" || + type === "parallel_map" + ? "LLM" + : "non-LLM", + type: type as Operation["type"], + name: name || "Untitled Operation", + prompt, + output: output + ? { + schema: schemaDictToItemSet( + output.schema as Record + ), + } + : undefined, + validate, + sample, + otherKwargs, + visibility: true, + } as Operation; + }) + ); + + setPipelineName(yamlFileName || "Untitled Pipeline"); + setSampleSize( + (yamlContent.operations?.[0]?.sample as number) || null + ); + setDefaultModel(yamlContent.default_model || "gpt-4o-mini"); + + // Look for paths in all datasets + const datasetPaths = Object.values(yamlContent.datasets || {}) + .filter( + (dataset: Dataset) => dataset.type === "file" && dataset.path + ) + .map((dataset: Dataset) => dataset.path); + + if (datasetPaths.length > 0) { + const requiredPath = datasetPaths[0]; // Take the first dataset path + const existingFile = files.find( + (file) => file.path === requiredPath + ); + + if (existingFile) { + // If the file exists, set it as current + setCurrentFile(existingFile); + } else { + // If the file doesn't exist, show a toast message + toast({ + title: "Dataset Required", + description: `This pipeline requires a dataset at path: ${requiredPath}. Please upload the dataset using the file explorer.`, + variant: "destructive", + }); + } + } + + toast({ + title: "Pipeline Loaded", + description: + "Your pipeline configuration has been loaded successfully.", + duration: 3000, + }); + + resolve(); + } catch (error) { + console.error("Error parsing YAML:", error); + toast({ + title: "Error", + description: "Failed to parse the uploaded YAML file.", + variant: "destructive", + }); + reject(error); + } + } + }; + + reader.onerror = (error) => reject(error); + reader.readAsText(file.blob); + }); + }, + [ + setOperations, + setPipelineName, + setSampleSize, + setDefaultModel, + setFiles, + setCurrentFile, + files, + toast, + ] + ); + + return { restoreFromYAML }; +};