checkpoint add gemini live stuff

birdup000 · Dec 28, 2024 · ed56e30 · ed56e30
1 parent 412b507
commit ed56e30
Show file tree

Hide file tree

Showing 8 changed files with 450 additions and 1 deletion.
diff --git a/app/components/GeminiConfig.tsx b/app/components/GeminiConfig.tsx
@@ -0,0 +1,66 @@
+import React, { useState } from 'react';
+
+interface GeminiConfigProps {
+  onClose: () => void;
+  onSave: (config: { apiKey: string; model: string }) => void;
+}
+
+const GeminiConfig: React.FC<GeminiConfigProps> = ({ onClose, onSave }) => {
+  const [apiKey, setApiKey] = useState('');
+  const [model, setModel] = useState('gemini-pro');
+
+  const handleSubmit = (e: React.FormEvent) => {
+    e.preventDefault();
+    onSave({ apiKey, model });
+  };
+
+  return (
+    <div className="bg-[#212121] p-6 rounded-lg w-full max-w-md">
+      <h2 className="text-xl font-semibold mb-4">Gemini Configuration</h2>
+      <form onSubmit={handleSubmit} className="space-y-4">
+        <div>
+          <label className="block text-sm font-medium text-gray-200 mb-1">
+            API Key
+          </label>
+          <input
+            type="password"
+            value={apiKey}
+            onChange={(e) => setApiKey(e.target.value)}
+            className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500"
+            placeholder="Enter your Gemini API key"
+          />
+        </div>
+        <div>
+          <label className="block text-sm font-medium text-gray-200 mb-1">
+            Model
+          </label>
+          <select
+            value={model}
+            onChange={(e) => setModel(e.target.value)}
+            className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500"
+          >
+            <option value="gemini-pro">Gemini Pro</option>
+            <option value="gemini-pro-vision">Gemini Pro Vision</option>
+          </select>
+        </div>
+        <div className="flex justify-end gap-2 pt-4">
+          <button
+            type="button"
+            onClick={onClose}
+            className="px-4 py-2 rounded-md bg-gray-700 hover:bg-gray-600 transition-colors"
+          >
+            Cancel
+          </button>
+          <button
+            type="submit"
+            className="px-4 py-2 rounded-md bg-indigo-600 hover:bg-indigo-700 transition-colors"
+          >
+            Save
+          </button>
+        </div>
+      </form>
+    </div>
+  );
+};
+
+export default GeminiConfig;
diff --git a/app/components/ModernTaskPanel.tsx b/app/components/ModernTaskPanel.tsx
@@ -11,6 +11,7 @@ import TaskDetailsPanel from './TaskDetailsPanel';
 import TaskForm from './TaskForm';
 import AGiXTConfig from './AGiXTConfig';
 import AIAssistantPanel from './AIAssistantPanel';
+import VoiceTaskAssistant from './VoiceTaskAssistant';
 import { useSearch } from '../hooks/useSearch';
 import { useKeyboardShortcuts } from '../hooks/useKeyboardShortcuts';
 import LayoutSettingsPanel from './LayoutSettingsPanel';
@@ -56,6 +57,8 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
   const [isEditorOpen, setIsEditorOpen] = useState(false);
   const [isAGiXTConfigOpen, setIsAGiXTConfigOpen] = useState(false);
   const [agixtConfig, setAgixtConfig] = useState({ backendUrl: '', authToken: '' });
+  const [geminiConfig, setGeminiConfig] = useState({ apiKey: '', model: 'gemini-pro' });
+  const [isGeminiConfigOpen, setIsGeminiConfigOpen] = useState(false);
   const [currentView, setCurrentView] = useState<'board' | 'matrix'>('board');
   const [layoutSettings, setLayoutSettings] = useState<LayoutSettings>({
     selectedLayout: 'board',
@@ -207,6 +210,12 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
           >
             <span>⚙️ AGiXT Config</span>
           </button>
+          <button
+            onClick={() => setIsGeminiConfigOpen(true)}
+            className="px-4 py-2 bg-gray-700 hover:bg-gray-600 rounded-lg transition-colors flex items-center space-x-2"
+          >
+            <span>🤖 Gemini Config</span>
+          </button>
           <button
             onClick={() => setIsEditorOpen(true)}
             className="px-4 py-2 bg-gradient-to-r from-indigo-600 to-purple-600 hover:from-indigo-700 hover:to-purple-700 rounded-lg transition-colors flex items-center space-x-2 shadow-lg"
@@ -326,6 +335,15 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
 
         {/* Right Sidebar */}
         <div className="w-80 space-y-6">
+          {geminiConfig.apiKey && agixtConfig.backendUrl && agixtConfig.authToken && (
+            <VoiceTaskAssistant
+              onTaskUpdate={onUpdateTask}
+              onNewTask={onAddTask}
+              geminiApiKey={geminiConfig.apiKey}
+              selectedTask={selectedTask}
+              agixtConfig={agixtConfig}
+            />
+          )}
           {agixtConfig.backendUrl && agixtConfig.authToken && (
             <AIAssistantPanel
               backendUrl={agixtConfig.backendUrl}
@@ -402,6 +420,18 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
         </div>
       )}
 
+      {isGeminiConfigOpen && (
+        <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center">
+          <GeminiConfig
+            onClose={() => setIsGeminiConfigOpen(false)}
+            onSave={(config) => {
+              setGeminiConfig(config);
+              setIsGeminiConfigOpen(false);
+            }}
+          />
+        </div>
+      )}
+
       {isEditorOpen && (
         <div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center">
           <TaskForm

diff --git a/app/components/VoiceTaskAssistant.tsx b/app/components/VoiceTaskAssistant.tsx
@@ -0,0 +1,254 @@
+import React, { useState, useEffect, useRef } from 'react';
+import { Task } from '../types/task';
+import { TranscriptionResult, VoiceCommand, ScreenCaptureData } from '../types/recognition';
+import AudioPulse from './audio-pulse/AudioPulse';
+import { MultimodalLiveClient } from '../lib/multimodal-live-client';
+import AGiXT from 'agixt';
+
+interface VoiceTaskAssistantProps {
+  onTaskUpdate: (task: Task) => void;
+  onNewTask: (task: Task) => void;
+  geminiApiKey: string;
+  selectedTask?: Task;
+  agixtConfig: {
+    backendUrl: string;
+    authToken: string;
+  };
+}
+
+const VoiceTaskAssistant: React.FC<VoiceTaskAssistantProps> = ({
+  onTaskUpdate,
+  onNewTask,
+  geminiApiKey,
+  selectedTask,
+  agixtConfig
+}) => {
+  const recognitionRef = useRef<SpeechRecognition | null>(null);
+  const agixtClientRef = useRef<AGiXT | null>(null);
+  const [isListening, setIsListening] = useState(false);
+  const [volume, setVolume] = useState(0);
+  const [transcription, setTranscription] = useState('');
+  const [isScreenSharing, setIsScreenSharing] = useState(false);
+  const [mediaStream, setMediaStream] = useState<MediaStream | null>(null);
+  const clientRef = useRef<MultimodalLiveClient | null>(null);
+
+  useEffect(() => {
+    // Initialize the multimodal client and AGiXT client
+    if (geminiApiKey && agixtConfig.backendUrl && agixtConfig.authToken) {
+      clientRef.current = new MultimodalLiveClient({
+        url: 'https://generativelanguage.googleapis.com',
+        apiKey: geminiApiKey
+      });
+
+      agixtClientRef.current = new AGiXT({
+        baseUrl: agixtConfig.backendUrl,
+        authToken: agixtConfig.authToken
+      });
+
+      // Initialize Web Speech API recognition
+      if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
+        const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
+        recognitionRef.current = new SpeechRecognition();
+        recognitionRef.current.continuous = true;
+        recognitionRef.current.interimResults = true;
+
+        recognitionRef.current.onresult = (event) => {
+          const result = event.results[event.results.length - 1];
+          const transcriptionResult: TranscriptionResult = {
+            text: result[0].transcript,
+            confidence: result[0].confidence,
+            isFinal: result.isFinal
+          };
+
+          setTranscription(transcriptionResult.text);
+          if (result.isFinal) {
+            processVoiceCommand(transcriptionResult);
+          }
+        };
+      }
+    }
+    return () => {
+      if (clientRef.current) {
+        clientRef.current.disconnect();
+      }
+      if (recognitionRef.current) {
+        recognitionRef.current.stop();
+      }
+    };
+  }, [geminiApiKey, agixtConfig]);
+
+  const startVoiceRecording = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
+      setIsListening(true);
+
+      // Initialize audio processing for visualization
+      const audioContext = new AudioContext();
+      const source = audioContext.createMediaStreamSource(stream);
+      const analyzer = audioContext.createAnalyser();
+      const dataArray = new Uint8Array(analyzer.frequencyBinCount);
+
+      source.connect(analyzer);
+
+      // Update volume visualization
+      const updateVolume = () => {
+        if (isListening) {
+          analyzer.getByteFrequencyData(dataArray);
+          const average = dataArray.reduce((a, b) => a + b) / dataArray.length;
+          setVolume(average / 255);
+          requestAnimationFrame(updateVolume);
+        }
+      };
+      updateVolume();
+
+      // Start speech recognition
+      if (recognitionRef.current) {
+        recognitionRef.current.start();
+      }
+    } catch (error) {
+      console.error('Error accessing microphone:', error);
+    }
+  };
+
+  const processVoiceCommand = async (transcription: TranscriptionResult) => {
+    try {
+      if (!agixtClientRef.current) return;
+
+      // Use AGiXT to process the voice command
+      const response = await agixtClientRef.current.command({
+        command: transcription.text,
+        context: {
+          currentTask: selectedTask,
+          isScreenSharing,
+          confidence: transcription.confidence
+        }
+      });
+
+      // Handle the AGiXT response
+      if (response.action === 'CREATE_TASK') {
+        onNewTask({
+          id: Date.now().toString(),
+          title: response.data.title,
+          description: response.data.description || '',
+          priority: response.data.priority || 'medium',
+          status: 'todo',
+          createdAt: new Date(),
+          updatedAt: new Date(),
+          listId: 'default'
+        });
+      } else if (response.action === 'UPDATE_TASK' && selectedTask) {
+        onTaskUpdate({
+          ...selectedTask,
+          ...response.data,
+          updatedAt: new Date()
+        });
+      }
+    } catch (error) {
+      console.error('Error processing voice command:', error);
+    }
+  };
+
+  const stopVoiceRecording = () => {
+    setIsListening(false);
+    if (recognitionRef.current) {
+      recognitionRef.current.stop();
+    }
+  };
+
+  const startScreenSharing = async () => {
+    try {
+      const stream = await navigator.mediaDevices.getDisplayMedia({ video: true });
+      setMediaStream(stream);
+      setIsScreenSharing(true);
+
+      // Process screen capture
+      const videoTrack = stream.getVideoTracks()[0];
+      const imageCapture = new ImageCapture(videoTrack);
+
+      // Periodically capture and process screens
+      const captureInterval = setInterval(async () => {
+        if (!isScreenSharing) {
+          clearInterval(captureInterval);
+          return;
+        }
+
+        try {
+          const bitmap = await imageCapture.grabFrame();
+          const canvas = document.createElement('canvas');
+          canvas.width = bitmap.width;
+          canvas.height = bitmap.height;
+          const context = canvas.getContext('2d');
+          context?.drawImage(bitmap, 0, 0);
+
+          const imageData = canvas.toDataURL('image/jpeg', 0.8);
+          const screenData: ScreenCaptureData = {
+            timestamp: Date.now(),
+            imageData
+          };
+
+          // Send to AGiXT for processing if needed
+          if (agixtClientRef.current && selectedTask) {
+            const contextResponse = await agixtClientRef.current.analyzeScreen({
+              image: imageData,
+              taskContext: selectedTask
+            });
+
+            if (contextResponse.needsUpdate) {
+              onTaskUpdate({
+                ...selectedTask,
+                ...contextResponse.updates,
+                updatedAt: new Date()
+              });
+            }
+          }
+        } catch (error) {
+          console.error('Error processing screen capture:', error);
+        }
+      }, 5000); // Capture every 5 seconds
+    } catch (error) {
+      console.error('Error sharing screen:', error);
+    }
+  };
+
+  const stopScreenSharing = () => {
+    if (mediaStream) {
+      mediaStream.getTracks().forEach(track => track.stop());
+      setMediaStream(null);
+    }
+    setIsScreenSharing(false);
+  };
+
+  return (
+    <div className="fixed bottom-4 right-4 bg-gray-800 p-4 rounded-lg shadow-lg">
+      <div className="flex items-center space-x-4">
+        <button
+          onClick={isListening ? stopVoiceRecording : startVoiceRecording}
+          className={`p-3 rounded-full ${
+            isListening ? 'bg-red-600' : 'bg-blue-600'
+          } hover:opacity-80 transition-opacity`}
+        >
+          {isListening ? '⏹️' : '🎤'}
+        </button>
+
+        <AudioPulse active={isListening} volume={volume} />
+
+        <button
+          onClick={isScreenSharing ? stopScreenSharing : startScreenSharing}
+          className={`p-3 rounded-full ${
+            isScreenSharing ? 'bg-red-600' : 'bg-green-600'
+          } hover:opacity-80 transition-opacity`}
+        >
+          {isScreenSharing ? '⏹️' : '🖥️'}
+        </button>
+      </div>
+
+      {transcription && (
+        <div className="mt-2 p-2 bg-gray-700 rounded text-sm">
+          {transcription}
+        </div>
+      )}
+    </div>
+  );
+};
+
+export default VoiceTaskAssistant;