Skip to content

Commit

Permalink
Improve transcription (#1182)
Browse files Browse the repository at this point in the history
* handle special character in transcription when assess

* may play original pronounciation in assessment result
  • Loading branch information
an-lee authored Nov 15, 2024
1 parent 1f531b0 commit b8167a9
Show file tree
Hide file tree
Showing 11 changed files with 171 additions and 102 deletions.
4 changes: 3 additions & 1 deletion enjoy/src/i18n/en.json
Original file line number Diff line number Diff line change
Expand Up @@ -927,5 +927,7 @@
"openaiTtsServiceDescription": "Use OpenAI TTS service from your own key.",
"enjoyTtsServiceDescription": "Use TTS service provided by Enjoy. OpenAI or Azure is supported.",
"compressMediaBeforeAdding": "Compress media before adding",
"keepOriginalMedia": "Keep original media"
"keepOriginalMedia": "Keep original media",
"myPronunciation": "My pronunciation",
"originalPronunciation": "Original pronunciation"
}
4 changes: 3 additions & 1 deletion enjoy/src/i18n/zh-CN.json
Original file line number Diff line number Diff line change
Expand Up @@ -927,5 +927,7 @@
"openaiTtsServiceDescription": "使用您自己的 API key 来使用 OpenAI TTS 服务。",
"enjoyTtsServiceDescription": "使用 Enjoy 提供的 TTS 服务,支持 OpenAI 或 Azure。",
"compressMediaBeforeAdding": "添加前压缩媒体",
"keepOriginalMedia": "保存原始媒体"
"keepOriginalMedia": "保存原始媒体",
"myPronunciation": "我的发音",
"originalPronunciation": "原始发音"
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useEffect, useContext, useRef, useState } from "react";
import { useEffect, useContext, useRef, useState, useMemo } from "react";
import {
AppSettingsProviderContext,
HotKeysSettingsProviderContext,
Expand Down Expand Up @@ -50,17 +50,13 @@ import { formatDuration } from "@renderer/lib/utils";
import { useHotkeys } from "react-hotkeys-hook";
import { LiveAudioVisualizer } from "react-audio-visualize";
import debounce from "lodash/debounce";
import { TimelineEntry } from "echogarden/dist/utilities/Timeline.d.js";

const ACTION_BUTTON_HEIGHT = 35;
export const MediaCurrentRecording = () => {
const {
isRecording,
isPaused,
cancelRecording,
togglePauseResume,
stopRecording,
recordingTime,
mediaRecorder,
currentRecording,
renderPitchContour: renderMediaPitchContour,
regions: mediaRegions,
Expand All @@ -71,6 +67,8 @@ export const MediaCurrentRecording = () => {
currentSegment,
createSegment,
currentTime: mediaCurrentTime,
caption,
toggleRegion,
} = useContext(MediaShadowProviderContext);
const { webApi, EnjoyApp } = useContext(AppSettingsProviderContext);
const { currentHotkeys } = useContext(HotKeysSettingsProviderContext);
Expand Down Expand Up @@ -263,6 +261,23 @@ export const MediaCurrentRecording = () => {
});
};

const playWord = (word: string, index: number) => {
const candidates = caption.timeline.filter(
(w: TimelineEntry) => w.text.toLowerCase() === word.toLowerCase()
);
const target = candidates[index];
if (!target) return;

const wordIndex = caption.timeline.findIndex(
(w) => w.startTime === target.startTime
);

toggleRegion([wordIndex]);
setTimeout(() => {
wavesurfer?.playPause();
}, 250);
};

const calContainerSize = () => {
const size = ref?.current
?.closest(".media-recording-wrapper")
Expand Down Expand Up @@ -685,7 +700,12 @@ export const MediaCurrentRecording = () => {
</SheetClose>
</SheetHeader>

<RecordingDetail recording={currentRecording} />
<RecordingDetail
recording={currentRecording}
onPlayOrigin={(word: string, index: number = 0) =>
playWord(word, index)
}
/>
</SheetContent>
</Sheet>
</div>
Expand Down Expand Up @@ -745,7 +765,6 @@ const MediaRecorder = () => {
const {
mediaRecorder,
recordingTime,
isRecording,
isPaused,
cancelRecording,
togglePauseResume,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ export const MediaPlayerControls = () => {
wavesurfer.pause();
setTimeout(() => {
activeRegionDebouncePlay();
}, 500);
}, 250);
} else if (playMode === "single") {
wavesurfer.pause();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,13 @@ export const MediaCaption = (props: {

const [notedquoteIndices, setNotedquoteIndices] = useState<number[]>([]);

let words = caption.text.split(" ");
let words = caption.text
.replace(/ ([.,!?:;])/g, "$1")
.replace(/ (['"")])/g, "$1")
.replace(/ \.\.\./g, "...")
.split(/([—]|\s+)/g)
.filter((word) => word.trim() !== "" && word !== "—");

const ipas = caption.timeline.map((w) =>
w.timeline?.map((t) =>
t.timeline && language.startsWith("en")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useEffect, useState, useContext, useRef } from "react";
import { useEffect, useState, useContext, useRef, useMemo } from "react";
import { MediaShadowProviderContext } from "@renderer/context";
import cloneDeep from "lodash/cloneDeep";
import {
Expand All @@ -11,10 +11,6 @@ import {
} from "@renderer/components/ui";
import { MediaCaption, MediaCaptionActions } from "@renderer/components";
import { t } from "i18next";
import {
Timeline,
TimelineEntry,
} from "echogarden/dist/utilities/Timeline.d.js";
import {
MediaCaptionAnalysis,
MediaCaptionNote,
Expand All @@ -29,12 +25,14 @@ export const MediaRightPanel = (props: {
}) => {
const { className, setDisplayPanel } = props;
const {
caption,
currentSegmentIndex,
currentTime,
transcription,
regions,
activeRegion,
setActiveRegion,
toggleRegion,
editingRegion,
setEditingRegion,
setTranscriptionDraft,
Expand All @@ -47,7 +45,6 @@ export const MediaRightPanel = (props: {
const [displayIpa, setDisplayIpa] = useState<boolean>(true);
const [displayNotes, setDisplayNotes] = useState<boolean>(true);

const [caption, setCaption] = useState<TimelineEntry | null>(null);
const [tab, setTab] = useState<string>("translation");

const toggleMultiSelect = (event: KeyboardEvent) => {
Expand Down Expand Up @@ -79,67 +76,6 @@ export const MediaRightPanel = (props: {
}
};

const toggleRegion = (params: number[]) => {
if (!activeRegion) return;
if (editingRegion) {
toast.warning(t("currentRegionIsBeingEdited"));
return;
}
if (params.length === 0) {
if (activeRegion.id.startsWith("word-region")) {
activeRegion.remove();
setActiveRegion(
regions.getRegions().find((r) => r.id.startsWith("segment-region"))
);
}
return;
}

const startIndex = Math.min(...params);
const endIndex = Math.max(...params);

const startWord = caption.timeline[startIndex];
if (!startWord) return;

const endWord = caption.timeline[endIndex] || startWord;

const start = startWord.startTime;
const end = endWord.endTime;

// If the active region is a word region, then merge the selected words into a single region.
if (activeRegion.id.startsWith("word-region")) {
activeRegion.remove();

const region = regions.addRegion({
id: `word-region-${startIndex}`,
start,
end,
color: "#fb6f9233",
drag: false,
resize: editingRegion,
});

setActiveRegion(region);
// If the active region is a meaning group region, then active the segment region.
} else if (activeRegion.id.startsWith("meaning-group-region")) {
setActiveRegion(
regions.getRegions().find((r) => r.id.startsWith("segment-region"))
);
// If the active region is a segment region, then create a new word region.
} else {
const region = regions.addRegion({
id: `word-region-${startIndex}`,
start,
end,
color: "#fb6f9233",
drag: false,
resize: false,
});

setActiveRegion(region);
}
};

useEffect(() => {
if (!caption) return;

Expand All @@ -160,6 +96,7 @@ export const MediaRightPanel = (props: {
toggleRegion(selectedIndices);
}, [caption, selectedIndices]);

// Edit region to update transcription draft
useEffect(() => {
if (!activeRegion) return;
if (!activeRegion.id.startsWith("word-region")) return;
Expand Down Expand Up @@ -234,12 +171,6 @@ export const MediaRightPanel = (props: {
};
}, [editingRegion]);

useEffect(() => {
setCaption(
(transcription?.result?.timeline as Timeline)?.[currentSegmentIndex]
);
}, [currentSegmentIndex, transcription]);

useEffect(() => {
return () => setSelectedIndices([]);
}, [caption]);
Expand Down
3 changes: 0 additions & 3 deletions enjoy/src/renderer/components/medias/media-shadow-player.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,13 @@ import {
MediaRightPanel,
MediaLeftPanel,
MediaBottomPanel,
MediaProvider,
} from "@renderer/components";
import {
Button,
ResizableHandle,
ResizablePanel,
ResizablePanelGroup,
} from "@renderer/components/ui";
import { useContext, useState } from "react";
import { RefreshCcwDotIcon } from "lucide-react";

export const MediaShadowPlayer = () => {
return (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ export const PronunciationAssessmentFulltextResult = (props: {
words: PronunciationAssessmentWordResultType[];
currentTime?: number;
src?: string;
onPlayOrigin?: (word: string, index: number) => void;
}) => {
const { words, currentTime, src } = props;
const { words, currentTime, src, onPlayOrigin } = props;
const [errorStats, setErrorStats] = useState({
mispronunciation: 0,
omission: 0,
Expand Down Expand Up @@ -65,6 +66,16 @@ export const PronunciationAssessmentFulltextResult = (props: {
errorDisplay={errorDisplay}
currentTime={currentTime}
src={src}
onPlayOrigin={() => {
if (!onPlayOrigin) return;

const word = words[index];
const candidates = words.filter((w) => w.word === word.word);
const wordIndex = candidates.findIndex(
(w) => w.offset === word.offset
);
onPlayOrigin(word.word, wordIndex);
}}
/>
))}
</div>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const PronunciationAssessmentWordResult = (props: {
monotone: boolean;
};
currentTime?: number;
onPlayOrigin?: () => void;
}) => {
const {
result,
Expand All @@ -32,6 +33,7 @@ export const PronunciationAssessmentWordResult = (props: {
monotone: true,
},
currentTime = 0,
onPlayOrigin,
} = props;

const audio = useRef<HTMLAudioElement>(null);
Expand Down Expand Up @@ -71,25 +73,41 @@ export const PronunciationAssessmentWordResult = (props: {
}[result.pronunciationAssessment.errorType];

const play = () => {
if (!audio.current || !props.src) return;

const { offset, duration } = result;
if (!offset || !duration) return;

const startTime = (offset * 1.0) / 1e7;
const endTime = ((offset + duration) * 1.0) / 1e7;

// create a new audio element and play the segment
audio.current.src = `${props.src}#t=${(offset * 1.0) / 1e7},${
((offset + duration) * 1.0) / 1e7
}`;
audio.current.currentTime = startTime;

// Add timeupdate listener to stop at the end of the segment
const handleTimeUpdate = () => {
if (audio.current.currentTime >= endTime) {
audio.current.pause();
audio.current.removeEventListener("timeupdate", handleTimeUpdate);
}
};

audio.current.addEventListener("timeupdate", handleTimeUpdate);
audio.current.play();
};

useEffect(() => {
if (!audio.current) {
audio.current = new Audio();
audio.current = new Audio(props.src);
}

return () => {
audio.current?.pause();
delete audio.current;
if (audio.current) {
audio.current.pause();
audio.current.removeEventListener("timeupdate", () => {});
audio.current = null;
}
};
}, []);
}, [props.src]);

return (
<Popover>
Expand Down Expand Up @@ -152,11 +170,20 @@ export const PronunciationAssessmentWordResult = (props: {
</div>
)}

<div className="">
<div className="flex items-center space-x-2">
<span className="text-sm">{t("myPronunciation")}:</span>
<Button onClick={play} variant="ghost" size="icon">
<Volume2Icon className="w-5 h-5" />
</Button>
</div>
{onPlayOrigin && (
<div className="flex items-center space-x-2">
<span className="text-sm">{t("originalPronunciation")}:</span>
<Button onClick={onPlayOrigin} variant="ghost" size="icon">
<Volume2Icon className="w-5 h-5" />
</Button>
</div>
)}
</PopoverContent>
</Popover>
);
Expand Down
Loading

0 comments on commit b8167a9

Please sign in to comment.