From f08ac9d99717dedab7d35c8071946d462b4fa4b0 Mon Sep 17 00:00:00 2001 From: Tyrrrz <1935960+Tyrrrz@users.noreply.github.com> Date: Wed, 27 Dec 2023 18:43:17 +0200 Subject: [PATCH] Escape arrow sequences when writing closed captions to an SRT file Closes #755 --- YoutubeExplode.Converter.Tests/SubtitleSpecs.cs | 8 ++++---- .../Videos/ClosedCaptions/ClosedCaptionClient.cs | 15 ++++++++++++++- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/YoutubeExplode.Converter.Tests/SubtitleSpecs.cs b/YoutubeExplode.Converter.Tests/SubtitleSpecs.cs index 845dd628..1682de97 100644 --- a/YoutubeExplode.Converter.Tests/SubtitleSpecs.cs +++ b/YoutubeExplode.Converter.Tests/SubtitleSpecs.cs @@ -24,7 +24,7 @@ public async Task I_can_download_a_video_as_a_single_mp4_file_with_subtitles() using var dir = TempDir.Create(); var filePath = Path.Combine(dir.Path, "video.mp4"); - var streamManifest = await youtube.Videos.Streams.GetManifestAsync("YltHGKX80Y8"); + var streamManifest = await youtube.Videos.Streams.GetManifestAsync("NtQkz0aRDe8"); var streamInfos = streamManifest .GetVideoStreams() .Where(s => s.Container == Container.Mp4) @@ -32,7 +32,7 @@ public async Task I_can_download_a_video_as_a_single_mp4_file_with_subtitles() .Take(1) .ToArray(); - var trackManifest = await youtube.Videos.ClosedCaptions.GetManifestAsync("YltHGKX80Y8"); + var trackManifest = await youtube.Videos.ClosedCaptions.GetManifestAsync("NtQkz0aRDe8"); var trackInfos = trackManifest.Tracks; // Act @@ -61,7 +61,7 @@ public async Task I_can_download_a_video_as_a_single_webm_file_with_subtitles() using var dir = TempDir.Create(); var filePath = Path.Combine(dir.Path, "video.webm"); - var streamManifest = await youtube.Videos.Streams.GetManifestAsync("YltHGKX80Y8"); + var streamManifest = await youtube.Videos.Streams.GetManifestAsync("NtQkz0aRDe8"); var streamInfos = streamManifest .GetVideoStreams() .Where(s => s.Container == Container.WebM) @@ -69,7 +69,7 @@ public async Task I_can_download_a_video_as_a_single_webm_file_with_subtitles() .Take(1) .ToArray(); - var trackManifest = await youtube.Videos.ClosedCaptions.GetManifestAsync("YltHGKX80Y8"); + var trackManifest = await youtube.Videos.ClosedCaptions.GetManifestAsync("NtQkz0aRDe8"); var trackInfos = trackManifest.Tracks; // Act diff --git a/YoutubeExplode/Videos/ClosedCaptions/ClosedCaptionClient.cs b/YoutubeExplode/Videos/ClosedCaptions/ClosedCaptionClient.cs index deed0e11..f6c0dde6 100644 --- a/YoutubeExplode/Videos/ClosedCaptions/ClosedCaptionClient.cs +++ b/YoutubeExplode/Videos/ClosedCaptions/ClosedCaptionClient.cs @@ -165,7 +165,20 @@ static string FormatTimestamp(TimeSpan value) => .Append(FormatTimestamp(caption.Offset + caption.Duration)) .AppendLine() // Content - .AppendLine(caption.Text); + .AppendLine( + caption + .Text + // Caption text may contain valid SRT-formatted data in itself. + // This can happen, for example, if the subtitles for a YouTube video + // were imported from an SRT file, but something went wrong in the + // process, resulting in parts of the file being read as captions + // rather than control sequences. + // SRT file format does not provide any means of escaping special + // characters, so as a workaround we just replace the dashes in the + // arrow sequence with en-dashes, which look similar enough. + // https://github.com/Tyrrrz/YoutubeExplode/issues/755 + .Replace("-->", "––>") + ); await writer.WriteLineAsync(buffer.ToString()); buffer.Clear();