-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
450 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import React, { useState } from 'react'; | ||
|
||
interface GeminiConfigProps { | ||
onClose: () => void; | ||
onSave: (config: { apiKey: string; model: string }) => void; | ||
} | ||
|
||
const GeminiConfig: React.FC<GeminiConfigProps> = ({ onClose, onSave }) => { | ||
const [apiKey, setApiKey] = useState(''); | ||
const [model, setModel] = useState('gemini-pro'); | ||
|
||
const handleSubmit = (e: React.FormEvent) => { | ||
e.preventDefault(); | ||
onSave({ apiKey, model }); | ||
}; | ||
|
||
return ( | ||
<div className="bg-[#212121] p-6 rounded-lg w-full max-w-md"> | ||
<h2 className="text-xl font-semibold mb-4">Gemini Configuration</h2> | ||
<form onSubmit={handleSubmit} className="space-y-4"> | ||
<div> | ||
<label className="block text-sm font-medium text-gray-200 mb-1"> | ||
API Key | ||
</label> | ||
<input | ||
type="password" | ||
value={apiKey} | ||
onChange={(e) => setApiKey(e.target.value)} | ||
className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500" | ||
placeholder="Enter your Gemini API key" | ||
/> | ||
</div> | ||
<div> | ||
<label className="block text-sm font-medium text-gray-200 mb-1"> | ||
Model | ||
</label> | ||
<select | ||
value={model} | ||
onChange={(e) => setModel(e.target.value)} | ||
className="w-full px-3 py-2 bg-[#333333] rounded-md text-white focus:outline-none focus:ring-2 focus:ring-indigo-500" | ||
> | ||
<option value="gemini-pro">Gemini Pro</option> | ||
<option value="gemini-pro-vision">Gemini Pro Vision</option> | ||
</select> | ||
</div> | ||
<div className="flex justify-end gap-2 pt-4"> | ||
<button | ||
type="button" | ||
onClick={onClose} | ||
className="px-4 py-2 rounded-md bg-gray-700 hover:bg-gray-600 transition-colors" | ||
> | ||
Cancel | ||
</button> | ||
<button | ||
type="submit" | ||
className="px-4 py-2 rounded-md bg-indigo-600 hover:bg-indigo-700 transition-colors" | ||
> | ||
Save | ||
</button> | ||
</div> | ||
</form> | ||
</div> | ||
); | ||
}; | ||
|
||
export default GeminiConfig; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,254 @@ | ||
import React, { useState, useEffect, useRef } from 'react'; | ||
import { Task } from '../types/task'; | ||
import { TranscriptionResult, VoiceCommand, ScreenCaptureData } from '../types/recognition'; | ||
import AudioPulse from './audio-pulse/AudioPulse'; | ||
import { MultimodalLiveClient } from '../lib/multimodal-live-client'; | ||
import AGiXT from 'agixt'; | ||
|
||
interface VoiceTaskAssistantProps { | ||
onTaskUpdate: (task: Task) => void; | ||
onNewTask: (task: Task) => void; | ||
geminiApiKey: string; | ||
selectedTask?: Task; | ||
agixtConfig: { | ||
backendUrl: string; | ||
authToken: string; | ||
}; | ||
} | ||
|
||
const VoiceTaskAssistant: React.FC<VoiceTaskAssistantProps> = ({ | ||
onTaskUpdate, | ||
onNewTask, | ||
geminiApiKey, | ||
selectedTask, | ||
agixtConfig | ||
}) => { | ||
const recognitionRef = useRef<SpeechRecognition | null>(null); | ||
const agixtClientRef = useRef<AGiXT | null>(null); | ||
const [isListening, setIsListening] = useState(false); | ||
const [volume, setVolume] = useState(0); | ||
const [transcription, setTranscription] = useState(''); | ||
const [isScreenSharing, setIsScreenSharing] = useState(false); | ||
const [mediaStream, setMediaStream] = useState<MediaStream | null>(null); | ||
const clientRef = useRef<MultimodalLiveClient | null>(null); | ||
|
||
useEffect(() => { | ||
// Initialize the multimodal client and AGiXT client | ||
if (geminiApiKey && agixtConfig.backendUrl && agixtConfig.authToken) { | ||
clientRef.current = new MultimodalLiveClient({ | ||
url: 'https://generativelanguage.googleapis.com', | ||
apiKey: geminiApiKey | ||
}); | ||
|
||
agixtClientRef.current = new AGiXT({ | ||
baseUrl: agixtConfig.backendUrl, | ||
authToken: agixtConfig.authToken | ||
}); | ||
|
||
// Initialize Web Speech API recognition | ||
if ('SpeechRecognition' in window || 'webkitSpeechRecognition' in window) { | ||
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition; | ||
recognitionRef.current = new SpeechRecognition(); | ||
recognitionRef.current.continuous = true; | ||
recognitionRef.current.interimResults = true; | ||
|
||
recognitionRef.current.onresult = (event) => { | ||
const result = event.results[event.results.length - 1]; | ||
const transcriptionResult: TranscriptionResult = { | ||
text: result[0].transcript, | ||
confidence: result[0].confidence, | ||
isFinal: result.isFinal | ||
}; | ||
|
||
setTranscription(transcriptionResult.text); | ||
if (result.isFinal) { | ||
processVoiceCommand(transcriptionResult); | ||
} | ||
}; | ||
} | ||
} | ||
return () => { | ||
if (clientRef.current) { | ||
clientRef.current.disconnect(); | ||
} | ||
if (recognitionRef.current) { | ||
recognitionRef.current.stop(); | ||
} | ||
}; | ||
}, [geminiApiKey, agixtConfig]); | ||
|
||
const startVoiceRecording = async () => { | ||
try { | ||
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | ||
setIsListening(true); | ||
|
||
// Initialize audio processing for visualization | ||
const audioContext = new AudioContext(); | ||
const source = audioContext.createMediaStreamSource(stream); | ||
const analyzer = audioContext.createAnalyser(); | ||
const dataArray = new Uint8Array(analyzer.frequencyBinCount); | ||
|
||
source.connect(analyzer); | ||
|
||
// Update volume visualization | ||
const updateVolume = () => { | ||
if (isListening) { | ||
analyzer.getByteFrequencyData(dataArray); | ||
const average = dataArray.reduce((a, b) => a + b) / dataArray.length; | ||
setVolume(average / 255); | ||
requestAnimationFrame(updateVolume); | ||
} | ||
}; | ||
updateVolume(); | ||
|
||
// Start speech recognition | ||
if (recognitionRef.current) { | ||
recognitionRef.current.start(); | ||
} | ||
} catch (error) { | ||
console.error('Error accessing microphone:', error); | ||
} | ||
}; | ||
|
||
const processVoiceCommand = async (transcription: TranscriptionResult) => { | ||
try { | ||
if (!agixtClientRef.current) return; | ||
|
||
// Use AGiXT to process the voice command | ||
const response = await agixtClientRef.current.command({ | ||
command: transcription.text, | ||
context: { | ||
currentTask: selectedTask, | ||
isScreenSharing, | ||
confidence: transcription.confidence | ||
} | ||
}); | ||
|
||
// Handle the AGiXT response | ||
if (response.action === 'CREATE_TASK') { | ||
onNewTask({ | ||
id: Date.now().toString(), | ||
title: response.data.title, | ||
description: response.data.description || '', | ||
priority: response.data.priority || 'medium', | ||
status: 'todo', | ||
createdAt: new Date(), | ||
updatedAt: new Date(), | ||
listId: 'default' | ||
}); | ||
} else if (response.action === 'UPDATE_TASK' && selectedTask) { | ||
onTaskUpdate({ | ||
...selectedTask, | ||
...response.data, | ||
updatedAt: new Date() | ||
}); | ||
} | ||
} catch (error) { | ||
console.error('Error processing voice command:', error); | ||
} | ||
}; | ||
|
||
const stopVoiceRecording = () => { | ||
setIsListening(false); | ||
if (recognitionRef.current) { | ||
recognitionRef.current.stop(); | ||
} | ||
}; | ||
|
||
const startScreenSharing = async () => { | ||
try { | ||
const stream = await navigator.mediaDevices.getDisplayMedia({ video: true }); | ||
setMediaStream(stream); | ||
setIsScreenSharing(true); | ||
|
||
// Process screen capture | ||
const videoTrack = stream.getVideoTracks()[0]; | ||
const imageCapture = new ImageCapture(videoTrack); | ||
|
||
// Periodically capture and process screens | ||
const captureInterval = setInterval(async () => { | ||
if (!isScreenSharing) { | ||
clearInterval(captureInterval); | ||
return; | ||
} | ||
|
||
try { | ||
const bitmap = await imageCapture.grabFrame(); | ||
const canvas = document.createElement('canvas'); | ||
canvas.width = bitmap.width; | ||
canvas.height = bitmap.height; | ||
const context = canvas.getContext('2d'); | ||
context?.drawImage(bitmap, 0, 0); | ||
|
||
const imageData = canvas.toDataURL('image/jpeg', 0.8); | ||
const screenData: ScreenCaptureData = { | ||
timestamp: Date.now(), | ||
imageData | ||
}; | ||
|
||
// Send to AGiXT for processing if needed | ||
if (agixtClientRef.current && selectedTask) { | ||
const contextResponse = await agixtClientRef.current.analyzeScreen({ | ||
image: imageData, | ||
taskContext: selectedTask | ||
}); | ||
|
||
if (contextResponse.needsUpdate) { | ||
onTaskUpdate({ | ||
...selectedTask, | ||
...contextResponse.updates, | ||
updatedAt: new Date() | ||
}); | ||
} | ||
} | ||
} catch (error) { | ||
console.error('Error processing screen capture:', error); | ||
} | ||
}, 5000); // Capture every 5 seconds | ||
} catch (error) { | ||
console.error('Error sharing screen:', error); | ||
} | ||
}; | ||
|
||
const stopScreenSharing = () => { | ||
if (mediaStream) { | ||
mediaStream.getTracks().forEach(track => track.stop()); | ||
setMediaStream(null); | ||
} | ||
setIsScreenSharing(false); | ||
}; | ||
|
||
return ( | ||
<div className="fixed bottom-4 right-4 bg-gray-800 p-4 rounded-lg shadow-lg"> | ||
<div className="flex items-center space-x-4"> | ||
<button | ||
onClick={isListening ? stopVoiceRecording : startVoiceRecording} | ||
className={`p-3 rounded-full ${ | ||
isListening ? 'bg-red-600' : 'bg-blue-600' | ||
} hover:opacity-80 transition-opacity`} | ||
> | ||
{isListening ? '⏹️' : '🎤'} | ||
</button> | ||
|
||
<AudioPulse active={isListening} volume={volume} /> | ||
|
||
<button | ||
onClick={isScreenSharing ? stopScreenSharing : startScreenSharing} | ||
className={`p-3 rounded-full ${ | ||
isScreenSharing ? 'bg-red-600' : 'bg-green-600' | ||
} hover:opacity-80 transition-opacity`} | ||
> | ||
{isScreenSharing ? '⏹️' : '🖥️'} | ||
</button> | ||
</div> | ||
|
||
{transcription && ( | ||
<div className="mt-2 p-2 bg-gray-700 rounded text-sm"> | ||
{transcription} | ||
</div> | ||
)} | ||
</div> | ||
); | ||
}; | ||
|
||
export default VoiceTaskAssistant; |
Oops, something went wrong.