From 9d6632d2d024ed2d8cf612345dbc9592b4be1b17 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Fri, 21 Feb 2025 11:24:07 -0700 Subject: [PATCH 1/5] Use server instead of client for open ai request + large refactors and bug fixes --- src/app/api/tts/route.ts | 46 +++ src/app/api/tts/voices/route.ts | 34 ++ src/components/EPUBViewer.tsx | 48 ++- src/contexts/EPUBContext.tsx | 7 +- src/contexts/PDFContext.tsx | 11 +- src/contexts/TTSContext.tsx | 447 +++++++++++++++----------- src/hooks/audio/useAudioCache.ts | 4 +- src/hooks/audio/useVoiceManagement.ts | 8 +- src/hooks/epub/useEPUBResize.ts | 35 +- src/utils/audio.ts | 80 +---- 10 files changed, 417 insertions(+), 303 deletions(-) create mode 100644 src/app/api/tts/route.ts create mode 100644 src/app/api/tts/voices/route.ts diff --git a/src/app/api/tts/route.ts b/src/app/api/tts/route.ts new file mode 100644 index 0000000..40b6fbe --- /dev/null +++ b/src/app/api/tts/route.ts @@ -0,0 +1,46 @@ +import { NextRequest, NextResponse } from 'next/server'; +import OpenAI from 'openai'; + +export async function POST(req: NextRequest) { + try { + // Get API credentials from headers + const openApiKey = req.headers.get('x-openai-key'); + const openApiBaseUrl = req.headers.get('x-openai-base-url'); + const { text, voice, speed } = await req.json(); + console.log('Received TTS request:', text, voice, speed); + + if (!openApiKey || !openApiBaseUrl) { + return NextResponse.json({ error: 'Missing API credentials' }, { status: 401 }); + } + + if (!text || !voice || !speed) { + return NextResponse.json({ error: 'Missing required parameters' }, { status: 400 }); + } + + // Initialize OpenAI client + const openai = new OpenAI({ + apiKey: openApiKey, + baseURL: openApiBaseUrl, + }); + + // Request audio from OpenAI + const response = await openai.audio.speech.create({ + model: 'tts-1', + voice: voice as "alloy", + input: text, + speed: speed, + }); + + // Get the audio data as array buffer + const arrayBuffer = await response.arrayBuffer(); + + // Return audio data with appropriate headers + return new NextResponse(arrayBuffer); + } catch (error) { + console.error('Error generating TTS:', error); + return NextResponse.json( + { error: 'Failed to generate audio' }, + { status: 500 } + ); + } +} \ No newline at end of file diff --git a/src/app/api/tts/voices/route.ts b/src/app/api/tts/voices/route.ts new file mode 100644 index 0000000..461a98b --- /dev/null +++ b/src/app/api/tts/voices/route.ts @@ -0,0 +1,34 @@ +import { NextRequest, NextResponse } from 'next/server'; + +const DEFAULT_VOICES = ['alloy', 'ash', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer']; + +export async function GET(req: NextRequest) { + try { + // Get API credentials from headers + const openApiKey = req.headers.get('x-openai-key'); + const openApiBaseUrl = req.headers.get('x-openai-base-url'); + + if (!openApiKey || !openApiBaseUrl) { + return NextResponse.json({ error: 'Missing API credentials' }, { status: 401 }); + } + + // Request voices from OpenAI + const response = await fetch(`${openApiBaseUrl}/audio/voices`, { + headers: { + 'Authorization': `Bearer ${openApiKey}`, + 'Content-Type': 'application/json', + }, + }); + + if (!response.ok) { + throw new Error('Failed to fetch voices'); + } + + const data = await response.json(); + return NextResponse.json({ voices: data.voices || DEFAULT_VOICES }); + } catch (error) { + console.error('Error fetching voices:', error); + // Return default voices on error + return NextResponse.json({ voices: DEFAULT_VOICES }); + } +} \ No newline at end of file diff --git a/src/components/EPUBViewer.tsx b/src/components/EPUBViewer.tsx index fc30dff..a9389a2 100644 --- a/src/components/EPUBViewer.tsx +++ b/src/components/EPUBViewer.tsx @@ -25,7 +25,7 @@ interface EPUBViewerProps { export function EPUBViewer({ className = '' }: EPUBViewerProps) { const { id } = useParams(); const { currDocData, currDocName, currDocPage, extractPageText } = useEPUB(); - const { skipToLocation, registerLocationChangeHandler, setIsEPUB } = useTTS(); + const { skipToLocation, registerLocationChangeHandler, setIsEPUB, pause } = useTTS(); const { epubTheme } = useConfig(); const bookRef = useRef(null); const rendition = useRef(undefined); @@ -35,9 +35,7 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) { const containerRef = useRef(null); const isEPUBSetOnce = useRef(false); - const isResizing = useRef(false); - - useEPUBResize(containerRef, isResizing); + const { isResizing, setIsResizing, dimensions } = useEPUBResize(containerRef); const handleLocationChanged = useCallback((location: string | number) => { // Set the EPUB flag once the location changes @@ -68,23 +66,45 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) { setLastDocumentLocation(id as string, location.toString()); } - if (isResizing.current) { - skipToLocation(location, false); - isResizing.current = false; - } else { - skipToLocation(location, true); - } + skipToLocation(location); locationRef.current = location; extractPageText(bookRef.current, rendition.current); + }, [id, skipToLocation, extractPageText, setIsEPUB]); - // Load the initial location + const initialExtract = useCallback(() => { + if (!bookRef.current || !rendition.current?.location || isEPUBSetOnce.current) return; + extractPageText(bookRef.current, rendition.current, false); + }, [extractPageText]); + + const checkResize = useCallback(() => { + if (isResizing && bookRef.current?.isOpen && rendition.current && isEPUBSetOnce.current) { + pause(); + // Only extract text when we have dimensions, ensuring the resize is complete + if (dimensions) extractPageText(bookRef.current, rendition.current, true); + setIsResizing(false); + + return true; + } else { + return false; + } + }, [isResizing, dimensions, pause, extractPageText]); + + // Check for isResizing to pause TTS and re-extract text useEffect(() => { - if (!bookRef.current || !rendition.current || isEPUBSetOnce.current) return; + if (checkResize()) return; - extractPageText(bookRef.current, rendition.current); - }, [extractPageText]); + // Load initial location when not resizing + initialExtract(); + }, [checkResize, initialExtract]); + + // Load the initial location + // useEffect(() => { + // if (!bookRef.current || !rendition.current || isEPUBSetOnce.current) return; + + // extractPageText(bookRef.current, rendition.current, false); + // }, [extractPageText]); // Register the location change handler useEffect(() => { diff --git a/src/contexts/EPUBContext.tsx b/src/contexts/EPUBContext.tsx index 7dae7e2..c765f41 100644 --- a/src/contexts/EPUBContext.tsx +++ b/src/contexts/EPUBContext.tsx @@ -21,7 +21,7 @@ interface EPUBContextType { currDocText: string | undefined; setCurrentDocument: (id: string) => Promise; clearCurrDoc: () => void; - extractPageText: (book: Book, rendition: Rendition) => Promise; + extractPageText: (book: Book, rendition: Rendition, shouldPause?: boolean) => Promise; } const EPUBContext = createContext(undefined); @@ -83,9 +83,10 @@ export function EPUBProvider({ children }: { children: ReactNode }) { * Extracts text content from the current EPUB page/location * @param {Book} book - The EPUB.js Book instance * @param {Rendition} rendition - The EPUB.js Rendition instance + * @param {boolean} shouldPause - Whether to pause TTS * @returns {Promise} The extracted text content */ - const extractPageText = useCallback(async (book: Book, rendition: Rendition): Promise => { + const extractPageText = useCallback(async (book: Book, rendition: Rendition, shouldPause = false): Promise => { try { const { start, end } = rendition?.location; if (!start?.cfi || !end?.cfi || !book || !book.isOpen || !rendition) return ''; @@ -95,7 +96,7 @@ export function EPUBProvider({ children }: { children: ReactNode }) { const range = await book.getRange(rangeCfi); const textContent = range.toString().trim(); - setTTSText(textContent); + setTTSText(textContent, shouldPause); setCurrDocText(textContent); return textContent; diff --git a/src/contexts/PDFContext.tsx b/src/contexts/PDFContext.tsx index bcaef07..e6ae4b0 100644 --- a/src/contexts/PDFContext.tsx +++ b/src/contexts/PDFContext.tsx @@ -105,13 +105,16 @@ export function PDFProvider({ children }: { children: ReactNode }) { try { if (!pdfDocument) return; const text = await extractTextFromPDF(pdfDocument, currDocPage); - setCurrDocText(text); - setTTSText(text); - + // Only update TTS text if the content has actually changed + // This prevents unnecessary resets of the sentence index + if (text !== currDocText || text === '') { + setCurrDocText(text); + setTTSText(text); + } } catch (error) { console.error('Error loading PDF text:', error); } - }, [pdfDocument, currDocPage, setTTSText]); + }, [pdfDocument, currDocPage, setTTSText, currDocText]); /** * Effect hook to update document text when the page changes diff --git a/src/contexts/TTSContext.tsx b/src/contexts/TTSContext.tsx index 16d58cb..b2c3cc6 100644 --- a/src/contexts/TTSContext.tsx +++ b/src/contexts/TTSContext.tsx @@ -24,7 +24,6 @@ import { useMemo, ReactNode, } from 'react'; -import OpenAI from 'openai'; import { Howl } from 'howler'; import toast from 'react-hot-toast'; import { useParams } from 'next/navigation'; @@ -65,13 +64,14 @@ interface TTSContextType { togglePlay: () => void; skipForward: () => void; skipBackward: () => void; + pause: () => void; stop: () => void; stopAndPlayFromIndex: (index: number) => void; - setText: (text: string) => void; + setText: (text: string, shouldPause?: boolean) => void; setCurrDocPages: (num: number | undefined) => void; setSpeedAndRestart: (speed: number) => void; setVoiceAndRestart: (voice: string) => void; - skipToLocation: (location: string | number, keepPlaying?: boolean) => void; + skipToLocation: (location: string | number) => void; registerLocationChangeHandler: (handler: (location: string | number) => void) => void; // EPUB-only: Handles chapter navigation setIsEPUB: (isEPUB: boolean) => void; } @@ -99,10 +99,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { skipBlank, } = useConfig(); - // OpenAI client reference - const openaiRef = useRef(null); - - // Use custom hooks + // Remove OpenAI client reference as it's no longer needed const audioContext = useAudioContext(); const audioCache = useAudioCache(25); const { availableVoices, fetchVoices } = useVoiceManagement(openApiKey, openApiBaseUrl); @@ -139,7 +136,9 @@ export function TTSProvider({ children }: { children: ReactNode }) { const [activeHowl, setActiveHowl] = useState(null); const [speed, setSpeed] = useState(voiceSpeed); const [voice, setVoice] = useState(configVoice); - const [nextPageLoading, setNextPageLoading] = useState(false); + + // Track pending preload requests + const preloadRequests = useRef>>(new Map()); //console.log('page:', currDocPage, 'pages:', currDocPages); @@ -170,12 +169,16 @@ export function TTSProvider({ children }: { children: ReactNode }) { /** * Stops the current audio playback and clears the active Howl instance + * @param {boolean} [clearPending=false] - Whether to clear pending requests */ - const abortAudio = useCallback(() => { + const abortAudio = useCallback((clearPending = false) => { if (activeHowl) { activeHowl.stop(); setActiveHowl(null); } + if (clearPending) { + preloadRequests.current.clear(); + } }, [activeHowl]); /** @@ -183,21 +186,53 @@ export function TTSProvider({ children }: { children: ReactNode }) { * Works for both PDF pages and EPUB locations * * @param {string | number} location - The target location to navigate to + * @param {boolean} keepPlaying - Whether to maintain playback state */ - const skipToLocation = useCallback((location: string | number, keepPlaying = false) => { - setNextPageLoading(true); - - // Reset state for new content + const skipToLocation = useCallback((location: string | number) => { + // Reset state for new content in correct order abortAudio(); - if (!keepPlaying) { - setIsPlaying(false); - } setCurrentIndex(0); setSentences([]); - - // Update current page/location setCurrDocPage(location); - }, [abortAudio]); + + }, [abortAudio, isPlaying]); + + /** + * Moves to the next or previous sentence + * + * @param {boolean} [backwards=false] - Whether to move backwards + */ + const advance = useCallback(async (backwards = false) => { + const nextIndex = currentIndex + (backwards ? -1 : 1); + + // Handle within current page bounds + if (nextIndex < sentences.length && nextIndex >= 0) { + setCurrentIndex(nextIndex); + return; + } + + // For EPUB documents, always try to advance to next/prev section + if (isEPUB && locationChangeHandlerRef.current) { + locationChangeHandlerRef.current(nextIndex >= sentences.length ? 'next' : 'prev'); + return; + } + + // For PDFs and other documents, check page bounds + if (!isEPUB) { + // Handle next/previous page transitions + if ((nextIndex >= sentences.length && currDocPageNumber < currDocPages!) || + (nextIndex < 0 && currDocPageNumber > 1)) { + // Pass wasPlaying to maintain playback state during page turn + skipToLocation(currDocPageNumber + (nextIndex >= sentences.length ? 1 : -1)); + return; + } + + // Handle end of document (PDF only) + if (nextIndex >= sentences.length && currDocPageNumber >= currDocPages!) { + setIsPlaying(false); + } + } + }, [currentIndex, sentences, currDocPageNumber, currDocPages, isEPUB, skipToLocation]); /** * Handles blank text sections based on document type @@ -210,58 +245,39 @@ export function TTSProvider({ children }: { children: ReactNode }) { return false; } - if (isEPUB && locationChangeHandlerRef.current) { - locationChangeHandlerRef.current('next'); - - toast.success('Skipping blank section', { - id: `epub-section-skip`, - iconTheme: { - primary: 'var(--accent)', - secondary: 'var(--background)', - }, - style: { - background: 'var(--background)', - color: 'var(--accent)', - }, - duration: 1000, - position: 'top-center', - }); - return true; - } + // Use advance to handle navigation for both EPUB and PDF + advance(); - if (currDocPageNumber < currDocPages!) { - // Pass true to keep playing when skipping blank pages - skipToLocation(currDocPageNumber + 1, true); - - toast.success(`Skipping blank page ${currDocPageNumber}`, { - id: `page-${currDocPageNumber}`, - iconTheme: { - primary: 'var(--accent)', - secondary: 'var(--background)', - }, - style: { - background: 'var(--background)', - color: 'var(--accent)', - }, - duration: 1000, - position: 'top-center', - }); - return true; - } + toast.success(isEPUB ? 'Skipping blank section' : `Skipping blank page ${currDocPageNumber}`, { + id: isEPUB ? `epub-section-skip` : `page-${currDocPageNumber}`, + iconTheme: { + primary: 'var(--accent)', + secondary: 'var(--background)', + }, + style: { + background: 'var(--background)', + color: 'var(--accent)', + }, + duration: 1000, + position: 'top-center', + }); - return false; - }, [isPlaying, skipBlank, isEPUB, currDocPageNumber, currDocPages, skipToLocation]); + return true; + }, [isPlaying, skipBlank, advance, isEPUB, currDocPageNumber]); /** * Sets the current text and splits it into sentences * * @param {string} text - The text to be processed */ - const setText = useCallback((text: string) => { - console.log('Setting page text:', text); - + const setText = useCallback((text: string, shouldPause = false) => { + // Check for blank section first if (handleBlankSection(text)) return; + // Keep track of previous state + const wasPlaying = isPlaying; + + console.log('Setting text:', text); processTextToSentences(text) .then(newSentences => { if (newSentences.length === 0) { @@ -270,7 +286,12 @@ export function TTSProvider({ children }: { children: ReactNode }) { } setSentences(newSentences); - setNextPageLoading(false); + setCurrentIndex(0); + + // Only restore previous playback state if we shouldn't pause + if (shouldPause) setIsPlaying(false); + else if (wasPlaying) setIsPlaying(true); + }) .catch(error => { console.warn('Error processing text:', error); @@ -282,7 +303,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { duration: 3000, }); }); - }, [processTextToSentences, handleBlankSection]); + }, [processTextToSentences, handleBlankSection, isPlaying]); /** * Toggles the playback state between playing and paused @@ -299,71 +320,38 @@ export function TTSProvider({ children }: { children: ReactNode }) { }, [abortAudio]); /** - * Moves to the next or previous sentence - * - * @param {boolean} [backwards=false] - Whether to move backwards + * Pauses the current audio playback + * Used for external control of playback state */ - const advance = useCallback(async (backwards = false) => { - const nextIndex = currentIndex + (backwards ? -1 : 1); - - // Handle within current page bounds - if (nextIndex < sentences.length && nextIndex >= 0) { - console.log('Advancing to next sentence:', sentences[nextIndex]); - setCurrentIndex(nextIndex); - return; - } - - // For EPUB documents, always try to advance to next/prev section - if (isEPUB && locationChangeHandlerRef.current) { - console.log('EPUB: Advancing to next/prev section'); - setCurrentIndex(0); - setSentences([]); - locationChangeHandlerRef.current(nextIndex >= sentences.length ? 'next' : 'prev'); - return; - } - - // For PDFs and other documents, check page bounds - if (!isEPUB) { - // Handle next/previous page transitions - if ((nextIndex >= sentences.length && currDocPageNumber < currDocPages!) || - (nextIndex < 0 && currDocPageNumber > 1)) { - console.log('PDF: Advancing to next/prev page'); - setCurrentIndex(0); - setSentences([]); - skipToLocation(currDocPageNumber + (nextIndex >= sentences.length ? 1 : -1), true); - return; - } - - // Handle end of document (PDF only) - if (nextIndex >= sentences.length && currDocPageNumber >= currDocPages!) { - console.log('PDF: Reached end of document'); - setIsPlaying(false); - } - } - }, [currentIndex, sentences, currDocPageNumber, currDocPages, isEPUB, skipToLocation]); + const pause = useCallback(() => { + abortAudio(); + setIsPlaying(false); + }, [abortAudio]); + /** * Moves forward one sentence in the text */ - const skipForward = useCallback(() => { - setIsProcessing(true); - abortAudio(); - advance(); - setIsProcessing(false); - }, [abortAudio, advance]); + const skipForward = useCallback(async () => { + // Only show processing state if we're currently playing + if (isPlaying) { + setIsProcessing(true); + } + abortAudio(false); // Don't clear pending requests + await advance(); + }, [isPlaying, abortAudio, advance]); /** * Moves backward one sentence in the text */ - const skipBackward = useCallback(() => { - setIsProcessing(true); - - abortAudio(); - - advance(true); // Pass true to go backwards - - setIsProcessing(false); - }, [abortAudio, advance]); + const skipBackward = useCallback(async () => { + // Only show processing state if we're currently playing + if (isPlaying) { + setIsProcessing(true); + } + abortAudio(false); // Don't clear pending requests + await advance(true); + }, [isPlaying, abortAudio, advance]); /** * Updates the voice and speed settings from the configuration @@ -374,15 +362,10 @@ export function TTSProvider({ children }: { children: ReactNode }) { }, [configVoice, voiceSpeed]); /** - * Initializes OpenAI configuration and fetches available voices + * Initializes configuration and fetches available voices */ useEffect(() => { if (!configIsLoading && openApiKey && openApiBaseUrl) { - openaiRef.current = new OpenAI({ - apiKey: openApiKey, - baseURL: openApiBaseUrl, - dangerouslyAllowBrowser: true, - }); fetchVoices(); updateVoiceAndSpeed(); } @@ -394,7 +377,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { * @param {string} sentence - The sentence to generate audio for * @returns {Promise} The generated audio buffer */ - const getAudio = useCallback(async (sentence: string): Promise => { + const getAudio = useCallback(async (sentence: string): Promise => { // Check if the audio is already cached const cachedAudio = audioCache.get(sentence); if (cachedAudio) { @@ -402,39 +385,47 @@ export function TTSProvider({ children }: { children: ReactNode }) { return cachedAudio; } - // If not cached, fetch the audio from OpenAI API - if (openaiRef.current) { - try { - console.log('Requesting audio for sentence:', sentence); - - const response = await openaiRef.current.audio.speech.create({ - model: 'tts-1', - voice: voice as "alloy", - input: sentence, + try { + console.log('Requesting audio for sentence:', sentence); + + const response = await fetch('/api/tts', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-openai-key': openApiKey || '', + 'x-openai-base-url': openApiBaseUrl || '', + }, + body: JSON.stringify({ + text: sentence, + voice: voice, speed: speed, - }); + }), + }); - const arrayBuffer = await response.arrayBuffer(); - const audioBuffer = await audioContext!.decodeAudioData(arrayBuffer); + if (!response.ok) { + throw new Error('Failed to generate audio'); + } - // Cache the audio buffer - audioCache.set(sentence, audioBuffer); + // Get the raw array buffer - no need to decode since it's already MP3 + const arrayBuffer = await response.arrayBuffer(); - return audioBuffer; - } catch (error) { - setIsPlaying(false); - toast.error('Failed to generate audio. API not responding.', { - id: 'tts-api-error', - style: { - background: 'var(--background)', - color: 'var(--accent)', - }, - duration: 7000, - }); - throw error; - } + // Cache the array buffer + audioCache.set(sentence, arrayBuffer); + + return arrayBuffer; + } catch (error) { + setIsPlaying(false); + toast.error('Failed to generate audio. Server not responding.', { + id: 'tts-api-error', + style: { + background: 'var(--background)', + color: 'var(--accent)', + }, + duration: 7000, + }); + throw error; } - }, [audioContext, voice, speed, audioCache]); + }, [voice, speed, audioCache, openApiKey, openApiBaseUrl]); /** * Processes and plays the current sentence @@ -444,16 +435,45 @@ export function TTSProvider({ children }: { children: ReactNode }) { * @returns {Promise} The URL of the processed audio */ const processSentence = useCallback(async (sentence: string, preload = false): Promise => { - if (isProcessing && !preload) throw new Error('Audio is already being processed'); - if (!audioContext || !openaiRef.current) throw new Error('Audio context not initialized'); + if (!audioContext) throw new Error('Audio context not initialized'); + // Check if there's a pending preload request for this sentence + const pendingRequest = preloadRequests.current.get(sentence); + if (pendingRequest) { + console.log('Using pending preload request for:', sentence.substring(0, 20)); + setIsProcessing(true); // Show processing state when using pending request + // If this is not a preload request, remove it from the pending map + if (!preload) { + preloadRequests.current.delete(sentence); + } + return pendingRequest; + } + // Only set processing state if not preloading if (!preload) setIsProcessing(true); - // No need to preprocess again since setText already did it - const audioBuffer = await getAudio(sentence); - return audioBufferToURL(audioBuffer!); - }, [isProcessing, audioContext, getAudio]); + // Create the audio processing promise + const processPromise = (async () => { + try { + const audioBuffer = await getAudio(sentence); + return audioBufferToURL(audioBuffer!); + } catch (error) { + setIsProcessing(false); + throw error; + } + })(); + + // If this is a preload request, store it in the map + if (preload) { + preloadRequests.current.set(sentence, processPromise); + // Clean up the map entry once the promise resolves or rejects + processPromise.finally(() => { + preloadRequests.current.delete(sentence); + }); + } + + return processPromise; + }, [audioContext, getAudio]); /** * Plays the current sentence with Howl @@ -461,6 +481,12 @@ export function TTSProvider({ children }: { children: ReactNode }) { * @param {string} sentence - The sentence to play */ const playSentenceWithHowl = useCallback(async (sentence: string) => { + if (!sentence) { + console.log('No sentence to play'); + setIsProcessing(false); + return; + } + try { const audioUrl = await processSentence(sentence); if (!audioUrl) { @@ -469,9 +495,12 @@ export function TTSProvider({ children }: { children: ReactNode }) { const howl = new Howl({ src: [audioUrl], - format: ['wav'], + format: ['mp3'], html5: true, + preload: true, + pool: 1, onplay: () => { + setIsProcessing(false); if ('mediaSession' in navigator) { navigator.mediaSession.playbackState = 'playing'; } @@ -496,17 +525,19 @@ export function TTSProvider({ children }: { children: ReactNode }) { // Don't auto-advance on load error setIsPlaying(false); }, + onstop: () => { + setIsProcessing(false); + URL.revokeObjectURL(audioUrl); + } }); setActiveHowl(howl); howl.play(); - setIsProcessing(false); } catch (error) { console.error('Error playing TTS:', error); setActiveHowl(null); setIsProcessing(false); - //setIsPlaying(false); toast.error('Failed to process audio. Skipping problematic sentence.', { id: 'tts-processing-error', @@ -524,13 +555,17 @@ export function TTSProvider({ children }: { children: ReactNode }) { /** * Preloads the next sentence's audio */ - const preloadNextAudio = useCallback(() => { + const preloadNextAudio = useCallback(async () => { try { - if (sentences[currentIndex + 1] && !audioCache.has(sentences[currentIndex + 1])) { - processSentence(sentences[currentIndex + 1], true); // True indicates preloading + const nextSentence = sentences[currentIndex + 1]; + if (nextSentence && !audioCache.has(nextSentence) && !preloadRequests.current.has(nextSentence)) { + // Start preloading but don't wait for it to complete + processSentence(nextSentence, true).catch(error => { + console.error('Error preloading next sentence:', error); + }); } } catch (error) { - console.error('Error preloading next sentence:', error); + console.error('Error initiating preload:', error); } }, [currentIndex, sentences, audioCache, processSentence]); @@ -549,17 +584,19 @@ export function TTSProvider({ children }: { children: ReactNode }) { if (!isPlaying) return; // Don't proceed if stopped if (isProcessing) return; // Don't proceed if processing audio if (!sentences[currentIndex]) return; // Don't proceed if no sentence to play - if (nextPageLoading) return; // Don't proceed if loading next page if (activeHowl) return; // Don't proceed if audio is already playing - // Play the current sentence and preload the next one if available + // Start playing current sentence playAudio(); - if (sentences[currentIndex + 1]) { - preloadNextAudio(); - } + + // Start preloading next sentence in parallel + preloadNextAudio(); return () => { - abortAudio(); + // Only abort if we're actually stopping playback + if (!isPlaying) { + abortAudio(); + } }; }, [ isPlaying, @@ -567,7 +604,6 @@ export function TTSProvider({ children }: { children: ReactNode }) { currentIndex, sentences, activeHowl, - nextPageLoading, playAudio, preloadNextAudio, abortAudio @@ -585,7 +621,6 @@ export function TTSProvider({ children }: { children: ReactNode }) { setSentences([]); setCurrDocPage(1); setCurrDocPages(undefined); - setNextPageLoading(false); setIsProcessing(false); setIsEPUB(false); }, [abortAudio]); @@ -608,17 +643,29 @@ export function TTSProvider({ children }: { children: ReactNode }) { * @param {number} newSpeed - The new speed to set */ const setSpeedAndRestart = useCallback((newSpeed: number) => { + const wasPlaying = isPlaying; + + // Set a flag to prevent double audio requests during config update + setIsProcessing(true); + + // First stop any current playback + setIsPlaying(false); + abortAudio(true); // Clear pending requests since speed changed + setActiveHowl(null); + + // Update speed, clear cache, and config setSpeed(newSpeed); - updateConfigKey('voiceSpeed', newSpeed); - // Clear the audio cache since it contains audio at the old speed audioCache.clear(); - - if (isPlaying) { - setIsPlaying(false); - abortAudio(); - setIsPlaying(true); - } - }, [isPlaying, abortAudio, updateConfigKey, audioCache]); + + // Update config after state changes + updateConfigKey('voiceSpeed', newSpeed).then(() => { + setIsProcessing(false); + // Resume playback if it was playing before + if (wasPlaying) { + setIsPlaying(true); + } + }); + }, [abortAudio, updateConfigKey, audioCache, isPlaying]); /** * Sets the voice and restarts the playback @@ -626,17 +673,29 @@ export function TTSProvider({ children }: { children: ReactNode }) { * @param {string} newVoice - The new voice to set */ const setVoiceAndRestart = useCallback((newVoice: string) => { + const wasPlaying = isPlaying; + + // Set a flag to prevent double audio requests during config update + setIsProcessing(true); + + // First stop any current playback + setIsPlaying(false); + abortAudio(true); // Clear pending requests since voice changed + setActiveHowl(null); + + // Update voice, clear cache, and config setVoice(newVoice); - updateConfigKey('voice', newVoice); - // Clear the audio cache since it contains audio with the old voice audioCache.clear(); - - if (isPlaying) { - setIsPlaying(false); - abortAudio(); - setIsPlaying(true); - } - }, [isPlaying, abortAudio, updateConfigKey, audioCache]); + + // Update config after state changes + updateConfigKey('voice', newVoice).then(() => { + setIsProcessing(false); + // Resume playback if it was playing before + if (wasPlaying) { + setIsPlaying(true); + } + }); + }, [abortAudio, updateConfigKey, audioCache, isPlaying]); /** * Provides the TTS context value to child components @@ -653,6 +712,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { skipForward, skipBackward, stop, + pause, stopAndPlayFromIndex, setText, setCurrDocPages, @@ -674,6 +734,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { skipForward, skipBackward, stop, + pause, stopAndPlayFromIndex, setText, setCurrDocPages, diff --git a/src/hooks/audio/useAudioCache.ts b/src/hooks/audio/useAudioCache.ts index ef5c8b9..9901845 100644 --- a/src/hooks/audio/useAudioCache.ts +++ b/src/hooks/audio/useAudioCache.ts @@ -9,11 +9,11 @@ import { LRUCache } from 'lru-cache'; * @returns Object containing cache methods */ export function useAudioCache(maxSize = 50) { - const cacheRef = useRef(new LRUCache({ max: maxSize })); + const cacheRef = useRef(new LRUCache({ max: maxSize })); return { get: (key: string) => cacheRef.current.get(key), - set: (key: string, value: AudioBuffer) => cacheRef.current.set(key, value), + set: (key: string, value: ArrayBuffer) => cacheRef.current.set(key, value), has: (key: string) => cacheRef.current.has(key), clear: () => cacheRef.current.clear(), }; diff --git a/src/hooks/audio/useVoiceManagement.ts b/src/hooks/audio/useVoiceManagement.ts index 0f4ba2e..6474ea0 100644 --- a/src/hooks/audio/useVoiceManagement.ts +++ b/src/hooks/audio/useVoiceManagement.ts @@ -17,15 +17,17 @@ export function useVoiceManagement(apiKey: string | undefined, baseUrl: string | if (!apiKey || !baseUrl) return; try { - const response = await fetch(`${baseUrl}/audio/voices`, { + const response = await fetch('/api/tts/voices', { headers: { - 'Authorization': `Bearer ${apiKey}`, + 'x-openai-key': apiKey, + 'x-openai-base-url': baseUrl, 'Content-Type': 'application/json', }, }); + if (!response.ok) throw new Error('Failed to fetch voices'); const data = await response.json(); - setAvailableVoices(data.voices || []); + setAvailableVoices(data.voices || DEFAULT_VOICES); } catch (error) { console.error('Error fetching voices:', error); // Set available voices to default openai voices diff --git a/src/hooks/epub/useEPUBResize.ts b/src/hooks/epub/useEPUBResize.ts index 59b1e0e..015ddfc 100644 --- a/src/hooks/epub/useEPUBResize.ts +++ b/src/hooks/epub/useEPUBResize.ts @@ -1,18 +1,22 @@ -import { useEffect, RefObject } from 'react'; +import { useEffect, RefObject, useState } from 'react'; +import { debounce } from '@/utils/pdf'; + +export function useEPUBResize(containerRef: RefObject) { + const [isResizing, setIsResizing] = useState(false); + const [dimensions, setDimensions] = useState(null); + + useEffect(() => { + const debouncedResize = debounce((...args: unknown[]) => { + const entries = args[0] as ResizeObserverEntry[]; + console.log('Debounced resize', entries[0].contentRect); + setDimensions(entries[0].contentRect); + }, 150); -export function useEPUBResize( - containerRef: RefObject, - isResizing: RefObject -) { - useEffect(() => { - let resizeTimeout: NodeJS.Timeout; - const resizeObserver = new ResizeObserver((entries) => { - clearTimeout(resizeTimeout); - resizeTimeout = setTimeout(() => { - console.log('Resizing detected (debounced)', entries[0].contentRect); - isResizing.current = true; - }, 250); + if (!isResizing) { + setIsResizing(true); + } + debouncedResize(entries); }); const mutationObserver = new MutationObserver((mutations) => { @@ -44,9 +48,10 @@ export function useEPUBResize( } return () => { - clearTimeout(resizeTimeout); mutationObserver.disconnect(); resizeObserver.disconnect(); }; - }, [containerRef, isResizing]); + }, [containerRef]); + + return { isResizing, setIsResizing, dimensions }; } \ No newline at end of file diff --git a/src/utils/audio.ts b/src/utils/audio.ts index ba4d097..07dabeb 100644 --- a/src/utils/audio.ts +++ b/src/utils/audio.ts @@ -1,71 +1,13 @@ -// Add utility function to convert AudioBuffer to URL -export const audioBufferToURL = (audioBuffer: AudioBuffer): string => { - // Get WAV file bytes - const wavBytes = getWavBytes(audioBuffer.getChannelData(0), { - isFloat: true, // floating point or 16-bit integer - numChannels: 1, // number of channels - sampleRate: audioBuffer.sampleRate, // audio sample rate - }); - - // Create blob and URL - const blob = new Blob([wavBytes], { type: 'audio/wav' }); +/** + * Utility functions for audio processing + */ + +/** + * Creates a URL from an ArrayBuffer containing MP3 audio data + * @param buffer The ArrayBuffer containing MP3 audio data + * @returns A blob URL that can be used for audio playback + */ +export const audioBufferToURL = (buffer: ArrayBuffer): string => { + const blob = new Blob([buffer], { type: 'audio/mp3' }); return URL.createObjectURL(blob); -}; - -// Add helper function for WAV conversion -export const getWavBytes = (samples: Float32Array, opts: { - isFloat?: boolean, - numChannels?: number, - sampleRate?: number, -}) => { - const { - isFloat = true, - numChannels = 1, - sampleRate = 44100, - } = opts; - - const bytesPerSample = isFloat ? 4 : 2; - const numSamples = samples.length; - - // WAV header size is 44 bytes - const buffer = new ArrayBuffer(44 + numSamples * bytesPerSample); - const dv = new DataView(buffer); - - let pos = 0; - - // Write WAV header - writeString(dv, pos, 'RIFF'); pos += 4; - dv.setUint32(pos, 36 + numSamples * bytesPerSample, true); pos += 4; - writeString(dv, pos, 'WAVE'); pos += 4; - writeString(dv, pos, 'fmt '); pos += 4; - dv.setUint32(pos, 16, true); pos += 4; - dv.setUint16(pos, isFloat ? 3 : 1, true); pos += 2; - dv.setUint16(pos, numChannels, true); pos += 2; - dv.setUint32(pos, sampleRate, true); pos += 4; - dv.setUint32(pos, sampleRate * numChannels * bytesPerSample, true); pos += 4; - dv.setUint16(pos, numChannels * bytesPerSample, true); pos += 2; - dv.setUint16(pos, bytesPerSample * 8, true); pos += 2; - writeString(dv, pos, 'data'); pos += 4; - dv.setUint32(pos, numSamples * bytesPerSample, true); pos += 4; - - if (isFloat) { - for (let i = 0; i < numSamples; i++) { - dv.setFloat32(pos, samples[i], true); - pos += bytesPerSample; - } - } else { - for (let i = 0; i < numSamples; i++) { - const s = Math.max(-1, Math.min(1, samples[i])); - dv.setInt16(pos, s < 0 ? s * 0x8000 : s * 0x7FFF, true); - pos += bytesPerSample; - } - } - - return buffer; -}; - -export const writeString = (view: DataView, offset: number, string: string): void => { - for (let i = 0; i < string.length; i++) { - view.setUint8(offset + i, string.charCodeAt(i)); - } }; \ No newline at end of file From 868bb8898886c6b252948bd1474c1b293b0b7f30 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Fri, 21 Feb 2025 11:30:06 -0700 Subject: [PATCH 2/5] Update docker publish flow --- .github/workflows/docker-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 0f7959e..2d15f92 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -38,7 +38,7 @@ jobs: with: images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} tags: | - type=raw,value=latest + type=raw,value=latest,enable=${{ !contains(github.ref, '-pre') }} type=semver,pattern={{version}} - name: Build and push Docker image From 99868f29b2c2405cf0bdfb0ca0e483ad1c853df1 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Fri, 21 Feb 2025 12:01:52 -0700 Subject: [PATCH 3/5] EPUB resize debounce working perfectly --- src/components/EPUBViewer.tsx | 6 +++--- src/contexts/TTSContext.tsx | 2 +- src/hooks/epub/useEPUBResize.ts | 12 ++++++++---- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/components/EPUBViewer.tsx b/src/components/EPUBViewer.tsx index a9389a2..ec4a3f9 100644 --- a/src/components/EPUBViewer.tsx +++ b/src/components/EPUBViewer.tsx @@ -79,17 +79,17 @@ export function EPUBViewer({ className = '' }: EPUBViewerProps) { }, [extractPageText]); const checkResize = useCallback(() => { - if (isResizing && bookRef.current?.isOpen && rendition.current && isEPUBSetOnce.current) { + if (isResizing && dimensions && bookRef.current?.isOpen && rendition.current && isEPUBSetOnce.current) { pause(); // Only extract text when we have dimensions, ensuring the resize is complete - if (dimensions) extractPageText(bookRef.current, rendition.current, true); + extractPageText(bookRef.current, rendition.current, true); setIsResizing(false); return true; } else { return false; } - }, [isResizing, dimensions, pause, extractPageText]); + }, [isResizing, setIsResizing, dimensions, pause, extractPageText]); // Check for isResizing to pause TTS and re-extract text useEffect(() => { diff --git a/src/contexts/TTSContext.tsx b/src/contexts/TTSContext.tsx index b2c3cc6..0bb18a8 100644 --- a/src/contexts/TTSContext.tsx +++ b/src/contexts/TTSContext.tsx @@ -195,7 +195,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { setSentences([]); setCurrDocPage(location); - }, [abortAudio, isPlaying]); + }, [abortAudio]); /** * Moves to the next or previous sentence diff --git a/src/hooks/epub/useEPUBResize.ts b/src/hooks/epub/useEPUBResize.ts index 015ddfc..f51ae37 100644 --- a/src/hooks/epub/useEPUBResize.ts +++ b/src/hooks/epub/useEPUBResize.ts @@ -10,12 +10,16 @@ export function useEPUBResize(containerRef: RefObject) { const entries = args[0] as ResizeObserverEntry[]; console.log('Debounced resize', entries[0].contentRect); setDimensions(entries[0].contentRect); + setIsResizing((prev) => { + if (!prev) return true; + return prev; + }); }, 150); const resizeObserver = new ResizeObserver((entries) => { - if (!isResizing) { - setIsResizing(true); - } + // if (!isResizing) { + // setIsResizing(true); + // } debouncedResize(entries); }); @@ -51,7 +55,7 @@ export function useEPUBResize(containerRef: RefObject) { mutationObserver.disconnect(); resizeObserver.disconnect(); }; - }, [containerRef]); + }, [containerRef]); return { isResizing, setIsResizing, dimensions }; } \ No newline at end of file From 0251aae04b3bd3d90c6391263a219580c09f7456 Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Sat, 22 Feb 2025 13:36:45 -0700 Subject: [PATCH 4/5] Huge iOS playback fix --- src/contexts/TTSContext.tsx | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) diff --git a/src/contexts/TTSContext.tsx b/src/contexts/TTSContext.tsx index 0bb18a8..189108b 100644 --- a/src/contexts/TTSContext.tsx +++ b/src/contexts/TTSContext.tsx @@ -174,6 +174,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { const abortAudio = useCallback((clearPending = false) => { if (activeHowl) { activeHowl.stop(); + activeHowl.unload(); // Ensure Howl instance is fully cleaned up setActiveHowl(null); } if (clearPending) { @@ -274,27 +275,34 @@ export function TTSProvider({ children }: { children: ReactNode }) { // Check for blank section first if (handleBlankSection(text)) return; - // Keep track of previous state + // Keep track of previous state and pause playback const wasPlaying = isPlaying; + setIsPlaying(false); + abortAudio(true); // Clear pending requests since text is changing + setIsProcessing(true); // Set processing state before text processing starts console.log('Setting text:', text); processTextToSentences(text) .then(newSentences => { if (newSentences.length === 0) { console.warn('No sentences found in text'); + setIsProcessing(false); return; } + // Set all state updates in a predictable order setSentences(newSentences); setCurrentIndex(0); + setIsProcessing(false); - // Only restore previous playback state if we shouldn't pause - if (shouldPause) setIsPlaying(false); - else if (wasPlaying) setIsPlaying(true); - + // Restore playback state if needed + if (!shouldPause && wasPlaying) { + setIsPlaying(true); + } }) .catch(error => { console.warn('Error processing text:', error); + setIsProcessing(false); toast.error('Failed to process text', { style: { background: 'var(--background)', @@ -303,7 +311,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { duration: 3000, }); }); - }, [processTextToSentences, handleBlankSection, isPlaying]); + }, [isPlaying, handleBlankSection, abortAudio, processTextToSentences]); /** * Toggles the playback state between playing and paused @@ -493,12 +501,17 @@ export function TTSProvider({ children }: { children: ReactNode }) { throw new Error('No audio URL generated'); } + // Force unload any previous Howl instance to free up resources + if (activeHowl) { + activeHowl.unload(); + } + const howl = new Howl({ src: [audioUrl], format: ['mp3'], html5: true, preload: true, - pool: 1, + pool: 5, // Reduced pool size for iOS compatibility onplay: () => { setIsProcessing(false); if ('mediaSession' in navigator) { @@ -512,6 +525,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { }, onend: () => { URL.revokeObjectURL(audioUrl); + howl.unload(); // Explicitly unload when done setActiveHowl(null); if (isPlaying) { advance(); @@ -522,12 +536,14 @@ export function TTSProvider({ children }: { children: ReactNode }) { setIsProcessing(false); setActiveHowl(null); URL.revokeObjectURL(audioUrl); + howl.unload(); // Ensure cleanup on error // Don't auto-advance on load error setIsPlaying(false); }, onstop: () => { setIsProcessing(false); URL.revokeObjectURL(audioUrl); + howl.unload(); // Ensure cleanup on stop } }); @@ -550,7 +566,7 @@ export function TTSProvider({ children }: { children: ReactNode }) { advance(); // Skip problematic sentence } - }, [isPlaying, processSentence, advance]); + }, [isPlaying, processSentence, advance, activeHowl]); /** * Preloads the next sentence's audio From 67f3452f3184c466972600a440bd8560ca4e4d5d Mon Sep 17 00:00:00 2001 From: Richard Roberson Date: Sat, 22 Feb 2025 13:41:25 -0700 Subject: [PATCH 5/5] Bump version --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 8445720..4f1e204 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "openreader-webui", - "version": "0.1.6", + "version": "0.2.0", "private": true, "scripts": { "dev": "next dev --turbopack",