diff --git a/enjoy/src/i18n/en.json b/enjoy/src/i18n/en.json index a7851e2bc..5124e9067 100644 --- a/enjoy/src/i18n/en.json +++ b/enjoy/src/i18n/en.json @@ -927,5 +927,7 @@ "openaiTtsServiceDescription": "Use OpenAI TTS service from your own key.", "enjoyTtsServiceDescription": "Use TTS service provided by Enjoy. OpenAI or Azure is supported.", "compressMediaBeforeAdding": "Compress media before adding", - "keepOriginalMedia": "Keep original media" + "keepOriginalMedia": "Keep original media", + "myPronunciation": "My pronunciation", + "originalPronunciation": "Original pronunciation" } diff --git a/enjoy/src/i18n/zh-CN.json b/enjoy/src/i18n/zh-CN.json index d83ef469d..4fbb8f7e6 100644 --- a/enjoy/src/i18n/zh-CN.json +++ b/enjoy/src/i18n/zh-CN.json @@ -927,5 +927,7 @@ "openaiTtsServiceDescription": "使用您自己的 API key 来使用 OpenAI TTS 服务。", "enjoyTtsServiceDescription": "使用 Enjoy 提供的 TTS 服务,支持 OpenAI 或 Azure。", "compressMediaBeforeAdding": "添加前压缩媒体", - "keepOriginalMedia": "保存原始媒体" + "keepOriginalMedia": "保存原始媒体", + "myPronunciation": "我的发音", + "originalPronunciation": "原始发音" } diff --git a/enjoy/src/renderer/components/medias/media-bottom-panel/media-current-recording.tsx b/enjoy/src/renderer/components/medias/media-bottom-panel/media-current-recording.tsx index 89b437bb5..e4595ff51 100644 --- a/enjoy/src/renderer/components/medias/media-bottom-panel/media-current-recording.tsx +++ b/enjoy/src/renderer/components/medias/media-bottom-panel/media-current-recording.tsx @@ -1,4 +1,4 @@ -import { useEffect, useContext, useRef, useState } from "react"; +import { useEffect, useContext, useRef, useState, useMemo } from "react"; import { AppSettingsProviderContext, HotKeysSettingsProviderContext, @@ -50,17 +50,13 @@ import { formatDuration } from "@renderer/lib/utils"; import { useHotkeys } from "react-hotkeys-hook"; import { LiveAudioVisualizer } from "react-audio-visualize"; import debounce from "lodash/debounce"; +import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js"; const ACTION_BUTTON_HEIGHT = 35; export const MediaCurrentRecording = () => { const { isRecording, isPaused, - cancelRecording, - togglePauseResume, - stopRecording, - recordingTime, - mediaRecorder, currentRecording, renderPitchContour: renderMediaPitchContour, regions: mediaRegions, @@ -71,6 +67,8 @@ export const MediaCurrentRecording = () => { currentSegment, createSegment, currentTime: mediaCurrentTime, + caption, + toggleRegion, } = useContext(MediaShadowProviderContext); const { webApi, EnjoyApp } = useContext(AppSettingsProviderContext); const { currentHotkeys } = useContext(HotKeysSettingsProviderContext); @@ -263,6 +261,23 @@ export const MediaCurrentRecording = () => { }); }; + const playWord = (word: string, index: number) => { + const candidates = caption.timeline.filter( + (w: TimelineEntry) => w.text.toLowerCase() === word.toLowerCase() + ); + const target = candidates[index]; + if (!target) return; + + const wordIndex = caption.timeline.findIndex( + (w) => w.startTime === target.startTime + ); + + toggleRegion([wordIndex]); + setTimeout(() => { + wavesurfer?.playPause(); + }, 250); + }; + const calContainerSize = () => { const size = ref?.current ?.closest(".media-recording-wrapper") @@ -685,7 +700,12 @@ export const MediaCurrentRecording = () => { - + + playWord(word, index) + } + /> @@ -745,7 +765,6 @@ const MediaRecorder = () => { const { mediaRecorder, recordingTime, - isRecording, isPaused, cancelRecording, togglePauseResume, diff --git a/enjoy/src/renderer/components/medias/media-bottom-panel/media-player-controls.tsx b/enjoy/src/renderer/components/medias/media-bottom-panel/media-player-controls.tsx index db388fd98..f83f0da4d 100644 --- a/enjoy/src/renderer/components/medias/media-bottom-panel/media-player-controls.tsx +++ b/enjoy/src/renderer/components/medias/media-bottom-panel/media-player-controls.tsx @@ -318,7 +318,7 @@ export const MediaPlayerControls = () => { wavesurfer.pause(); setTimeout(() => { activeRegionDebouncePlay(); - }, 500); + }, 250); } else if (playMode === "single") { wavesurfer.pause(); } diff --git a/enjoy/src/renderer/components/medias/media-right-panel/media-caption.tsx b/enjoy/src/renderer/components/medias/media-right-panel/media-caption.tsx index 558468259..731c28ee3 100644 --- a/enjoy/src/renderer/components/medias/media-right-panel/media-caption.tsx +++ b/enjoy/src/renderer/components/medias/media-right-panel/media-caption.tsx @@ -34,7 +34,13 @@ export const MediaCaption = (props: { const [notedquoteIndices, setNotedquoteIndices] = useState([]); - let words = caption.text.split(" "); + let words = caption.text + .replace(/ ([.,!?:;])/g, "$1") + .replace(/ (['"")])/g, "$1") + .replace(/ \.\.\./g, "...") + .split(/([—]|\s+)/g) + .filter((word) => word.trim() !== "" && word !== "—"); + const ipas = caption.timeline.map((w) => w.timeline?.map((t) => t.timeline && language.startsWith("en") diff --git a/enjoy/src/renderer/components/medias/media-right-panel/media-right-panel.tsx b/enjoy/src/renderer/components/medias/media-right-panel/media-right-panel.tsx index 04035f48d..d5a1547be 100644 --- a/enjoy/src/renderer/components/medias/media-right-panel/media-right-panel.tsx +++ b/enjoy/src/renderer/components/medias/media-right-panel/media-right-panel.tsx @@ -1,4 +1,4 @@ -import { useEffect, useState, useContext, useRef } from "react"; +import { useEffect, useState, useContext, useRef, useMemo } from "react"; import { MediaShadowProviderContext } from "@renderer/context"; import cloneDeep from "lodash/cloneDeep"; import { @@ -11,10 +11,6 @@ import { } from "@renderer/components/ui"; import { MediaCaption, MediaCaptionActions } from "@renderer/components"; import { t } from "i18next"; -import { - Timeline, - TimelineEntry, -} from "echogarden/dist/utilities/Timeline.d.js"; import { MediaCaptionAnalysis, MediaCaptionNote, @@ -29,12 +25,14 @@ export const MediaRightPanel = (props: { }) => { const { className, setDisplayPanel } = props; const { + caption, currentSegmentIndex, currentTime, transcription, regions, activeRegion, setActiveRegion, + toggleRegion, editingRegion, setEditingRegion, setTranscriptionDraft, @@ -47,7 +45,6 @@ export const MediaRightPanel = (props: { const [displayIpa, setDisplayIpa] = useState(true); const [displayNotes, setDisplayNotes] = useState(true); - const [caption, setCaption] = useState(null); const [tab, setTab] = useState("translation"); const toggleMultiSelect = (event: KeyboardEvent) => { @@ -79,67 +76,6 @@ export const MediaRightPanel = (props: { } }; - const toggleRegion = (params: number[]) => { - if (!activeRegion) return; - if (editingRegion) { - toast.warning(t("currentRegionIsBeingEdited")); - return; - } - if (params.length === 0) { - if (activeRegion.id.startsWith("word-region")) { - activeRegion.remove(); - setActiveRegion( - regions.getRegions().find((r) => r.id.startsWith("segment-region")) - ); - } - return; - } - - const startIndex = Math.min(...params); - const endIndex = Math.max(...params); - - const startWord = caption.timeline[startIndex]; - if (!startWord) return; - - const endWord = caption.timeline[endIndex] || startWord; - - const start = startWord.startTime; - const end = endWord.endTime; - - // If the active region is a word region, then merge the selected words into a single region. - if (activeRegion.id.startsWith("word-region")) { - activeRegion.remove(); - - const region = regions.addRegion({ - id: `word-region-${startIndex}`, - start, - end, - color: "#fb6f9233", - drag: false, - resize: editingRegion, - }); - - setActiveRegion(region); - // If the active region is a meaning group region, then active the segment region. - } else if (activeRegion.id.startsWith("meaning-group-region")) { - setActiveRegion( - regions.getRegions().find((r) => r.id.startsWith("segment-region")) - ); - // If the active region is a segment region, then create a new word region. - } else { - const region = regions.addRegion({ - id: `word-region-${startIndex}`, - start, - end, - color: "#fb6f9233", - drag: false, - resize: false, - }); - - setActiveRegion(region); - } - }; - useEffect(() => { if (!caption) return; @@ -160,6 +96,7 @@ export const MediaRightPanel = (props: { toggleRegion(selectedIndices); }, [caption, selectedIndices]); + // Edit region to update transcription draft useEffect(() => { if (!activeRegion) return; if (!activeRegion.id.startsWith("word-region")) return; @@ -234,12 +171,6 @@ export const MediaRightPanel = (props: { }; }, [editingRegion]); - useEffect(() => { - setCaption( - (transcription?.result?.timeline as Timeline)?.[currentSegmentIndex] - ); - }, [currentSegmentIndex, transcription]); - useEffect(() => { return () => setSelectedIndices([]); }, [caption]); diff --git a/enjoy/src/renderer/components/medias/media-shadow-player.tsx b/enjoy/src/renderer/components/medias/media-shadow-player.tsx index 4ddb6235c..937786321 100644 --- a/enjoy/src/renderer/components/medias/media-shadow-player.tsx +++ b/enjoy/src/renderer/components/medias/media-shadow-player.tsx @@ -4,16 +4,13 @@ import { MediaRightPanel, MediaLeftPanel, MediaBottomPanel, - MediaProvider, } from "@renderer/components"; import { - Button, ResizableHandle, ResizablePanel, ResizablePanelGroup, } from "@renderer/components/ui"; import { useContext, useState } from "react"; -import { RefreshCcwDotIcon } from "lucide-react"; export const MediaShadowPlayer = () => { return ( diff --git a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx index b6c5451c9..857963b1b 100644 --- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx +++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-fulltext-result.tsx @@ -8,8 +8,9 @@ export const PronunciationAssessmentFulltextResult = (props: { words: PronunciationAssessmentWordResultType[]; currentTime?: number; src?: string; + onPlayOrigin?: (word: string, index: number) => void; }) => { - const { words, currentTime, src } = props; + const { words, currentTime, src, onPlayOrigin } = props; const [errorStats, setErrorStats] = useState({ mispronunciation: 0, omission: 0, @@ -65,6 +66,16 @@ export const PronunciationAssessmentFulltextResult = (props: { errorDisplay={errorDisplay} currentTime={currentTime} src={src} + onPlayOrigin={() => { + if (!onPlayOrigin) return; + + const word = words[index]; + const candidates = words.filter((w) => w.word === word.word); + const wordIndex = candidates.findIndex( + (w) => w.offset === word.offset + ); + onPlayOrigin(word.word, wordIndex); + }} /> ))} diff --git a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx index 025b59283..fa58c92ef 100644 --- a/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx +++ b/enjoy/src/renderer/components/pronunciation-assessments/pronunciation-assessment-word-result.tsx @@ -20,6 +20,7 @@ export const PronunciationAssessmentWordResult = (props: { monotone: boolean; }; currentTime?: number; + onPlayOrigin?: () => void; }) => { const { result, @@ -32,6 +33,7 @@ export const PronunciationAssessmentWordResult = (props: { monotone: true, }, currentTime = 0, + onPlayOrigin, } = props; const audio = useRef(null); @@ -71,25 +73,41 @@ export const PronunciationAssessmentWordResult = (props: { }[result.pronunciationAssessment.errorType]; const play = () => { + if (!audio.current || !props.src) return; + const { offset, duration } = result; + if (!offset || !duration) return; + + const startTime = (offset * 1.0) / 1e7; + const endTime = ((offset + duration) * 1.0) / 1e7; - // create a new audio element and play the segment - audio.current.src = `${props.src}#t=${(offset * 1.0) / 1e7},${ - ((offset + duration) * 1.0) / 1e7 - }`; + audio.current.currentTime = startTime; + + // Add timeupdate listener to stop at the end of the segment + const handleTimeUpdate = () => { + if (audio.current.currentTime >= endTime) { + audio.current.pause(); + audio.current.removeEventListener("timeupdate", handleTimeUpdate); + } + }; + + audio.current.addEventListener("timeupdate", handleTimeUpdate); audio.current.play(); }; useEffect(() => { if (!audio.current) { - audio.current = new Audio(); + audio.current = new Audio(props.src); } return () => { - audio.current?.pause(); - delete audio.current; + if (audio.current) { + audio.current.pause(); + audio.current.removeEventListener("timeupdate", () => {}); + audio.current = null; + } }; - }, []); + }, [props.src]); return ( @@ -152,11 +170,20 @@ export const PronunciationAssessmentWordResult = (props: { )} -
+
+ {t("myPronunciation")}:
+ {onPlayOrigin && ( +
+ {t("originalPronunciation")}: + +
+ )} ); diff --git a/enjoy/src/renderer/components/recordings/recording-detail.tsx b/enjoy/src/renderer/components/recordings/recording-detail.tsx index 741e9d2fc..84bf54cfb 100644 --- a/enjoy/src/renderer/components/recordings/recording-detail.tsx +++ b/enjoy/src/renderer/components/recordings/recording-detail.tsx @@ -14,8 +14,9 @@ export const RecordingDetail = (props: { recording: RecordingType; pronunciationAssessment?: PronunciationAssessmentType; onAssess?: (assessment: PronunciationAssessmentType) => void; + onPlayOrigin?: (word: string) => void; }) => { - const { recording, onAssess } = props; + const { recording, onAssess, onPlayOrigin } = props; if (!recording) return; const [pronunciationAssessment, setPronunciationAssessment] = @@ -40,7 +41,7 @@ export const RecordingDetail = (props: { setAssessing(true); createAssessment({ recording, - reference: recording.referenceText || "", + reference: recording.referenceText?.replace(/[—]/g, ", ") || "", language: recording.language || learningLanguage, }) .then((assessment) => { @@ -76,6 +77,7 @@ export const RecordingDetail = (props: { words={result.words} currentTime={currentTime} src={recording.src} + onPlayOrigin={onPlayOrigin} /> ) : ( diff --git a/enjoy/src/renderer/context/media-shadow-provider.tsx b/enjoy/src/renderer/context/media-shadow-provider.tsx index ac9bc2c2f..6e6494ca5 100644 --- a/enjoy/src/renderer/context/media-shadow-provider.tsx +++ b/enjoy/src/renderer/context/media-shadow-provider.tsx @@ -1,4 +1,4 @@ -import { createContext, useEffect, useState, useContext } from "react"; +import { createContext, useEffect, useState, useContext, useMemo } from "react"; import { convertIpaToNormal, extractFrequencies } from "@/utils"; import { AppSettingsProviderContext } from "@renderer/context"; import { @@ -12,7 +12,10 @@ import Regions, { type Region as RegionType, } from "wavesurfer.js/dist/plugins/regions"; import Chart from "chart.js/auto"; -import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js"; +import { + Timeline, + TimelineEntry, +} from "echogarden/dist/utilities/Timeline.d.js"; import { toast } from "@renderer/components/ui"; import { Tooltip } from "react-tooltip"; import { useAudioRecorder } from "react-audio-voice-recorder"; @@ -48,6 +51,7 @@ type MediaShadowContextType = { regions: Regions | null; activeRegion: RegionType; setActiveRegion: (region: RegionType) => void; + toggleRegion: (params: number[]) => void; renderPitchContour: ( region: RegionType, options?: { @@ -74,6 +78,7 @@ type MediaShadowContextType = { transcribingOutput: string; transcriptionDraft: TranscriptionType["result"]; setTranscriptionDraft: (result: TranscriptionType["result"]) => void; + caption: TimelineEntry; // Recordings startRecording: () => void; stopRecording: () => void; @@ -180,6 +185,10 @@ export const MediaShadowProvider = ({ toast.error(exception.message); }); + const caption = useMemo(() => { + return (transcription?.result?.timeline as Timeline)?.[currentSegmentIndex]; + }, [currentSegmentIndex, transcription]); + const { segment, createSegment } = useSegments({ targetId: media?.id, targetType: media?.mediaType, @@ -466,6 +475,67 @@ export const MediaShadowProvider = ({ ); }; + const toggleRegion = (params: number[]) => { + if (!activeRegion) return; + if (editingRegion) { + toast.warning(t("currentRegionIsBeingEdited")); + return; + } + if (params.length === 0) { + if (activeRegion.id.startsWith("word-region")) { + activeRegion.remove(); + setActiveRegion( + regions.getRegions().find((r) => r.id.startsWith("segment-region")) + ); + } + return; + } + + const startIndex = Math.min(...params); + const endIndex = Math.max(...params); + + const startWord = caption.timeline[startIndex]; + if (!startWord) return; + + const endWord = caption.timeline[endIndex] || startWord; + + const start = startWord.startTime; + const end = endWord.endTime; + + // If the active region is a word region, then merge the selected words into a single region. + if (activeRegion.id.startsWith("word-region")) { + activeRegion.remove(); + + const region = regions.addRegion({ + id: `word-region-${startIndex}`, + start, + end, + color: "#fb6f9233", + drag: false, + resize: editingRegion, + }); + + setActiveRegion(region); + // If the active region is a meaning group region, then active the segment region. + } else if (activeRegion.id.startsWith("meaning-group-region")) { + setActiveRegion( + regions.getRegions().find((r) => r.id.startsWith("segment-region")) + ); + // If the active region is a segment region, then create a new word region. + } else { + const region = regions.addRegion({ + id: `word-region-${startIndex}`, + start, + end, + color: "#fb6f9233", + drag: false, + resize: false, + }); + + setActiveRegion(region); + } + }; + /* * When wavesurfer is decoded, * set up event listeners for wavesurfer @@ -667,6 +737,7 @@ export const MediaShadowProvider = ({ pitchChart, activeRegion, setActiveRegion, + toggleRegion, renderPitchContour, editingRegion, setEditingRegion, @@ -676,6 +747,7 @@ export const MediaShadowProvider = ({ transcribingOutput, transcriptionDraft, setTranscriptionDraft, + caption, startRecording, stopRecording, cancelRecording,