From ed56e30a1f61bc2a12b9ff8ff0e39400a6ce87ff Mon Sep 17 00:00:00 2001 From: Birdup <34012548+birdup000@users.noreply.github.com> Date: Fri, 27 Dec 2024 19:33:29 -0600 Subject: [PATCH] checkpoint add gemini live stuff --- app/components/GeminiConfig.tsx | 66 ++++++ app/components/ModernTaskPanel.tsx | 30 +++ app/components/VoiceTaskAssistant.tsx | 254 ++++++++++++++++++++++ app/components/audio-pulse/AudioPulse.tsx | 27 +++ app/globals.d.ts | 9 + app/lib/multimodal-live-client.ts | 44 ++++ app/tasks/page.tsx | 2 +- app/types/recognition.ts | 19 ++ 8 files changed, 450 insertions(+), 1 deletion(-) create mode 100644 app/components/GeminiConfig.tsx create mode 100644 app/components/VoiceTaskAssistant.tsx create mode 100644 app/components/audio-pulse/AudioPulse.tsx create mode 100644 app/globals.d.ts create mode 100644 app/lib/multimodal-live-client.ts create mode 100644 app/types/recognition.ts diff --git a/app/components/GeminiConfig.tsx b/app/components/GeminiConfig.tsx new file mode 100644 index 0000000..bdbff37 --- /dev/null +++ b/app/components/GeminiConfig.tsx @@ -0,0 +1,66 @@ +import React, { useState } from 'react'; + +interface GeminiConfigProps { + onClose: () => void; + onSave: (config: { apiKey: string; model: string }) => void; +} + +const GeminiConfig: React.FC = ({ onClose, onSave }) => { + const [apiKey, setApiKey] = useState(''); + const [model, setModel] = useState('gemini-pro'); + + const handleSubmit = (e: React.FormEvent) => { + e.preventDefault(); + onSave({ apiKey, model }); + }; + + return ( +
+

Gemini Configuration

+
+
+ + setApiKey(e.target.value)} + className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500" + placeholder="Enter your Gemini API key" + /> +
+
+ + +
+
+ + +
+
+
+ ); +}; + +export default GeminiConfig; \ No newline at end of file diff --git a/app/components/ModernTaskPanel.tsx b/app/components/ModernTaskPanel.tsx index bf7464a..7c62720 100644 --- a/app/components/ModernTaskPanel.tsx +++ b/app/components/ModernTaskPanel.tsx @@ -11,6 +11,7 @@ import TaskDetailsPanel from './TaskDetailsPanel'; import TaskForm from './TaskForm'; import AGiXTConfig from './AGiXTConfig'; import AIAssistantPanel from './AIAssistantPanel'; +import VoiceTaskAssistant from './VoiceTaskAssistant'; import { useSearch } from '../hooks/useSearch'; import { useKeyboardShortcuts } from '../hooks/useKeyboardShortcuts'; import LayoutSettingsPanel from './LayoutSettingsPanel'; @@ -56,6 +57,8 @@ const ModernTaskPanel: React.FC = ({ const [isEditorOpen, setIsEditorOpen] = useState(false); const [isAGiXTConfigOpen, setIsAGiXTConfigOpen] = useState(false); const [agixtConfig, setAgixtConfig] = useState({ backendUrl: '', authToken: '' }); + const [geminiConfig, setGeminiConfig] = useState({ apiKey: '', model: 'gemini-pro' }); + const [isGeminiConfigOpen, setIsGeminiConfigOpen] = useState(false); const [currentView, setCurrentView] = useState<'board' | 'matrix'>('board'); const [layoutSettings, setLayoutSettings] = useState({ selectedLayout: 'board', @@ -207,6 +210,12 @@ const ModernTaskPanel: React.FC = ({ > ⚙ī¸ AGiXT Config + + + + + + + + {transcription && ( +
+ {transcription} +
+ )} + + ); +}; + +export default VoiceTaskAssistant; \ No newline at end of file diff --git a/app/components/audio-pulse/AudioPulse.tsx b/app/components/audio-pulse/AudioPulse.tsx new file mode 100644 index 0000000..845c28c --- /dev/null +++ b/app/components/audio-pulse/AudioPulse.tsx @@ -0,0 +1,27 @@ +import React from 'react'; + +interface AudioPulseProps { + active: boolean; + volume: number; +} + +const AudioPulse: React.FC = ({ active, volume }) => { + if (!active) return null; + + const pulseSize = Math.max(20, Math.min(60, volume * 100)); + + return ( +
+
+
+
+ ); +}; + +export default AudioPulse; \ No newline at end of file diff --git a/app/globals.d.ts b/app/globals.d.ts new file mode 100644 index 0000000..ce2ee3f --- /dev/null +++ b/app/globals.d.ts @@ -0,0 +1,9 @@ +interface Window { + SpeechRecognition: typeof SpeechRecognition; + webkitSpeechRecognition: typeof SpeechRecognition; +} + +interface ImageCapture { + constructor(videoTrack: MediaStreamTrack): ImageCapture; + grabFrame(): Promise; +} \ No newline at end of file diff --git a/app/lib/multimodal-live-client.ts b/app/lib/multimodal-live-client.ts new file mode 100644 index 0000000..90c779f --- /dev/null +++ b/app/lib/multimodal-live-client.ts @@ -0,0 +1,44 @@ +interface MultimodalLiveClientConfig { + url: string; + apiKey: string; +} + +export class MultimodalLiveClient { + private config: MultimodalLiveClientConfig; + private connection: WebSocket | null = null; + + constructor(config: MultimodalLiveClientConfig) { + this.config = config; + } + + public async connect(): Promise { + // Implementation can be expanded based on needs + return Promise.resolve(); + } + + public disconnect(): void { + if (this.connection) { + this.connection.close(); + this.connection = null; + } + } + + public async analyzeInput(input: any): Promise { + const response = await fetch(`${this.config.url}/v1beta/models/gemini-pro:streamGenerateContent`, { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Authorization': `Bearer ${this.config.apiKey}` + }, + body: JSON.stringify({ + contents: [{ + parts: [{ + text: JSON.stringify(input) + }] + }] + }) + }); + + return response.json(); + } +} \ No newline at end of file diff --git a/app/tasks/page.tsx b/app/tasks/page.tsx index 2e0dab0..7f8b190 100644 --- a/app/tasks/page.tsx +++ b/app/tasks/page.tsx @@ -2,7 +2,7 @@ import React, { useEffect } from 'react'; import { useRouter } from 'next/navigation'; -import TaskPanel from '../components/TaskPanel'; +import TaskPanel from '../components/ModernTaskPanel'; export default function TasksPage() { const router = useRouter(); diff --git a/app/types/recognition.ts b/app/types/recognition.ts new file mode 100644 index 0000000..230f856 --- /dev/null +++ b/app/types/recognition.ts @@ -0,0 +1,19 @@ +export interface TranscriptionResult { + text: string; + confidence: number; + isFinal: boolean; +} + +export interface VoiceCommand { + command: string; + args?: { + [key: string]: any; + }; + confidence: number; +} + +export interface ScreenCaptureData { + timestamp: number; + imageData: string; + contextDescription?: string; +} \ No newline at end of file