Skip to content

Commit

Permalink
checkpoint add gemini live stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
birdup000 committed Dec 28, 2024
1 parent 412b507 commit ed56e30
Show file tree
Hide file tree
Showing 8 changed files with 450 additions and 1 deletion.
66 changes: 66 additions & 0 deletions app/components/GeminiConfig.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import React, { useState } from 'react';

interface GeminiConfigProps {
onClose: () => void;
onSave: (config: { apiKey: string; model: string }) => void;
}

const GeminiConfig: React.FC<GeminiConfigProps> = ({ onClose, onSave }) => {
const [apiKey, setApiKey] = useState('');
const [model, setModel] = useState('gemini-pro');

const handleSubmit = (e: React.FormEvent) => {
e.preventDefault();
onSave({ apiKey, model });
};

return (
<div className="bg-[#212121] p-6 rounded-lg w-full max-w-md">
<h2 className="text-xl font-semibold mb-4">Gemini Configuration</h2>
<form onSubmit={handleSubmit} className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-200 mb-1">
API Key
</label>
<input
type="password"
value={apiKey}
onChange={(e) => setApiKey(e.target.value)}
className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500"
placeholder="Enter your Gemini API key"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-200 mb-1">
Model
</label>
<select
value={model}
onChange={(e) => setModel(e.target.value)}
className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500"
>
<option value="gemini-pro">Gemini Pro</option>
<option value="gemini-pro-vision">Gemini Pro Vision</option>
</select>
</div>
<div className="flex justify-end gap-2 pt-4">
<button
type="button"
onClick={onClose}
className="px-4 py-2 rounded-md bg-gray-700 hover:bg-gray-600 transition-colors"
>
Cancel
</button>
<button
type="submit"
className="px-4 py-2 rounded-md bg-indigo-600 hover:bg-indigo-700 transition-colors"
>
Save
</button>
</div>
</form>
</div>
);
};

export default GeminiConfig;
30 changes: 30 additions & 0 deletions app/components/ModernTaskPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import TaskDetailsPanel from './TaskDetailsPanel';
import TaskForm from './TaskForm';
import AGiXTConfig from './AGiXTConfig';
import AIAssistantPanel from './AIAssistantPanel';
import VoiceTaskAssistant from './VoiceTaskAssistant';
import { useSearch } from '../hooks/useSearch';
import { useKeyboardShortcuts } from '../hooks/useKeyboardShortcuts';
import LayoutSettingsPanel from './LayoutSettingsPanel';
Expand Down Expand Up @@ -56,6 +57,8 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
const [isEditorOpen, setIsEditorOpen] = useState(false);
const [isAGiXTConfigOpen, setIsAGiXTConfigOpen] = useState(false);
const [agixtConfig, setAgixtConfig] = useState({ backendUrl: '', authToken: '' });
const [geminiConfig, setGeminiConfig] = useState({ apiKey: '', model: 'gemini-pro' });
const [isGeminiConfigOpen, setIsGeminiConfigOpen] = useState(false);
const [currentView, setCurrentView] = useState<'board' | 'matrix'>('board');
const [layoutSettings, setLayoutSettings] = useState<LayoutSettings>({
selectedLayout: 'board',
Expand Down Expand Up @@ -207,6 +210,12 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
>
<span>⚙️ AGiXT Config</span>
</button>
<button
onClick={() => setIsGeminiConfigOpen(true)}
className="px-4 py-2 bg-gray-700 hover:bg-gray-600 rounded-lg transition-colors flex items-center space-x-2"
>
<span>🤖 Gemini Config</span>
</button>
<button
onClick={() => setIsEditorOpen(true)}
className="px-4 py-2 bg-gradient-to-r from-indigo-600 to-purple-600 hover:from-indigo-700 hover:to-purple-700 rounded-lg transition-colors flex items-center space-x-2 shadow-lg"
Expand Down Expand Up @@ -326,6 +335,15 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({

{/* Right Sidebar */}
<div className="w-80 space-y-6">
{geminiConfig.apiKey && agixtConfig.backendUrl && agixtConfig.authToken && (
<VoiceTaskAssistant
onTaskUpdate={onUpdateTask}
onNewTask={onAddTask}
geminiApiKey={geminiConfig.apiKey}
selectedTask={selectedTask}
agixtConfig={agixtConfig}
/>
)}
{agixtConfig.backendUrl && agixtConfig.authToken && (
<AIAssistantPanel
backendUrl={agixtConfig.backendUrl}
Expand Down Expand Up @@ -402,6 +420,18 @@ const ModernTaskPanel: React.FC<ModernTaskPanelProps> = ({
</div>
)}

{isGeminiConfigOpen && (
<div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center">
<GeminiConfig
onClose={() => setIsGeminiConfigOpen(false)}
onSave={(config) => {
setGeminiConfig(config);
setIsGeminiConfigOpen(false);
}}
/>
</div>
)}

{isEditorOpen && (
<div className="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center">
<TaskForm
Expand Down
254 changes: 254 additions & 0 deletions app/components/VoiceTaskAssistant.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,254 @@
import React, { useState, useEffect, useRef } from 'react';
import { Task } from '../types/task';
import { TranscriptionResult, VoiceCommand, ScreenCaptureData } from '../types/recognition';
import AudioPulse from './audio-pulse/AudioPulse';
import { MultimodalLiveClient } from '../lib/multimodal-live-client';
import AGiXT from 'agixt';

interface VoiceTaskAssistantProps {
onTaskUpdate: (task: Task) => void;
onNewTask: (task: Task) => void;
geminiApiKey: string;
selectedTask?: Task;
agixtConfig: {
backendUrl: string;
authToken: string;
};
}

const VoiceTaskAssistant: React.FC<VoiceTaskAssistantProps> = ({
onTaskUpdate,
onNewTask,
geminiApiKey,
selectedTask,
agixtConfig
}) => {
const recognitionRef = useRef<SpeechRecognition | null>(null);
const agixtClientRef = useRef<AGiXT | null>(null);
const [isListening, setIsListening] = useState(false);
const [volume, setVolume] = useState(0);
const [transcription, setTranscription] = useState('');
const [isScreenSharing, setIsScreenSharing] = useState(false);
const [mediaStream, setMediaStream] = useState<MediaStream | null>(null);
const clientRef = useRef<MultimodalLiveClient | null>(null);

useEffect(() => {
// Initialize the multimodal client and AGiXT client
if (geminiApiKey && agixtConfig.backendUrl && agixtConfig.authToken) {
clientRef.current = new MultimodalLiveClient({
url: 'https://generativelanguage.googleapis.com',
apiKey: geminiApiKey
});

agixtClientRef.current = new AGiXT({
baseUrl: agixtConfig.backendUrl,
authToken: agixtConfig.authToken
});

// Initialize Web Speech API recognition
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) {
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
recognitionRef.current = new SpeechRecognition();
recognitionRef.current.continuous = true;
recognitionRef.current.interimResults = true;

recognitionRef.current.onresult = (event) => {
const result = event.results[event.results.length - 1];
const transcriptionResult: TranscriptionResult = {
text: result[0].transcript,
confidence: result[0].confidence,
isFinal: result.isFinal
};

setTranscription(transcriptionResult.text);
if (result.isFinal) {
processVoiceCommand(transcriptionResult);
}
};
}
}
return () => {
if (clientRef.current) {
clientRef.current.disconnect();
}
if (recognitionRef.current) {
recognitionRef.current.stop();
}
};
}, [geminiApiKey, agixtConfig]);

const startVoiceRecording = async () => {
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
setIsListening(true);

// Initialize audio processing for visualization
const audioContext = new AudioContext();
const source = audioContext.createMediaStreamSource(stream);
const analyzer = audioContext.createAnalyser();
const dataArray = new Uint8Array(analyzer.frequencyBinCount);

source.connect(analyzer);

// Update volume visualization
const updateVolume = () => {
if (isListening) {
analyzer.getByteFrequencyData(dataArray);
const average = dataArray.reduce((a, b) => a + b) / dataArray.length;
setVolume(average / 255);
requestAnimationFrame(updateVolume);
}
};
updateVolume();

// Start speech recognition
if (recognitionRef.current) {
recognitionRef.current.start();
}
} catch (error) {
console.error('Error accessing microphone:', error);
}
};

const processVoiceCommand = async (transcription: TranscriptionResult) => {
try {
if (!agixtClientRef.current) return;

// Use AGiXT to process the voice command
const response = await agixtClientRef.current.command({
command: transcription.text,
context: {
currentTask: selectedTask,
isScreenSharing,
confidence: transcription.confidence
}
});

// Handle the AGiXT response
if (response.action === 'CREATE_TASK') {
onNewTask({
id: Date.now().toString(),
title: response.data.title,
description: response.data.description || '',
priority: response.data.priority || 'medium',
status: 'todo',
createdAt: new Date(),
updatedAt: new Date(),
listId: 'default'
});
} else if (response.action === 'UPDATE_TASK' && selectedTask) {
onTaskUpdate({
...selectedTask,
...response.data,
updatedAt: new Date()
});
}
} catch (error) {
console.error('Error processing voice command:', error);
}
};

const stopVoiceRecording = () => {
setIsListening(false);
if (recognitionRef.current) {
recognitionRef.current.stop();
}
};

const startScreenSharing = async () => {
try {
const stream = await navigator.mediaDevices.getDisplayMedia({ video: true });
setMediaStream(stream);
setIsScreenSharing(true);

// Process screen capture
const videoTrack = stream.getVideoTracks()[0];
const imageCapture = new ImageCapture(videoTrack);

// Periodically capture and process screens
const captureInterval = setInterval(async () => {
if (!isScreenSharing) {
clearInterval(captureInterval);
return;
}

try {
const bitmap = await imageCapture.grabFrame();
const canvas = document.createElement('canvas');
canvas.width = bitmap.width;
canvas.height = bitmap.height;
const context = canvas.getContext('2d');
context?.drawImage(bitmap, 0, 0);

const imageData = canvas.toDataURL('image/jpeg', 0.8);
const screenData: ScreenCaptureData = {
timestamp: Date.now(),
imageData
};

// Send to AGiXT for processing if needed
if (agixtClientRef.current && selectedTask) {
const contextResponse = await agixtClientRef.current.analyzeScreen({
image: imageData,
taskContext: selectedTask
});

if (contextResponse.needsUpdate) {
onTaskUpdate({
...selectedTask,
...contextResponse.updates,
updatedAt: new Date()
});
}
}
} catch (error) {
console.error('Error processing screen capture:', error);
}
}, 5000); // Capture every 5 seconds
} catch (error) {
console.error('Error sharing screen:', error);
}
};

const stopScreenSharing = () => {
if (mediaStream) {
mediaStream.getTracks().forEach(track => track.stop());
setMediaStream(null);
}
setIsScreenSharing(false);
};

return (
<div className="fixed bottom-4 right-4 bg-gray-800 p-4 rounded-lg shadow-lg">
<div className="flex items-center space-x-4">
<button
onClick={isListening ? stopVoiceRecording : startVoiceRecording}
className={`p-3 rounded-full ${
isListening ? 'bg-red-600' : 'bg-blue-600'
} hover:opacity-80 transition-opacity`}
>
{isListening ? '⏹️' : '🎤'}
</button>

<AudioPulse active={isListening} volume={volume} />

<button
onClick={isScreenSharing ? stopScreenSharing : startScreenSharing}
className={`p-3 rounded-full ${
isScreenSharing ? 'bg-red-600' : 'bg-green-600'
} hover:opacity-80 transition-opacity`}
>
{isScreenSharing ? '⏹️' : '🖥️'}
</button>
</div>

{transcription && (
<div className="mt-2 p-2 bg-gray-700 rounded text-sm">
{transcription}
</div>
)}
</div>
);
};

export default VoiceTaskAssistant;
Loading

0 comments on commit ed56e30

Please sign in to comment.