diff --git a/src/wiktextract/extractor/share.py b/src/wiktextract/extractor/share.py index 31cb001f..1c09bdbf 100644 --- a/src/wiktextract/extractor/share.py +++ b/src/wiktextract/extractor/share.py @@ -55,8 +55,8 @@ def split_chinese_variants(text: str) -> Iterable[tuple[Optional[str], str]]: def create_audio_url_dict(filename: str) -> dict[str, str]: # remove white space and left-to-right mark filename = filename.strip(" \u200e") - file_extension = filename[filename.rfind(".") + 1 :] - if file_extension.lower() == "ogv": + file_extension = filename[filename.rfind(".") + 1 :].lower() + if file_extension == "ogv": # ".ogv" pages are redirected to ".oga" pages in Wikipedia Commons filename = filename[:filename.rfind(".")] + ".oga" file_extension = "oga" diff --git a/tests/test_utils.py b/tests/test_utils.py index eb4e9035..012437e0 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -75,6 +75,7 @@ def test_audio_transcode_url(self): "ogg_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/e/e1/LL-Q150_(fra)-DenisdeShawi-bonjour.wav/LL-Q150_(fra)-DenisdeShawi-bonjour.wav.ogg", }, ) + sound_data = create_audio_url_dict("File:Fr-BonjourF.oga") self.assertEqual( sound_data, @@ -84,5 +85,16 @@ def test_audio_transcode_url(self): "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/b/b9/Fr-BonjourF.oga/Fr-BonjourF.oga.mp3", }, ) + sound_data = create_audio_url_dict("Qc-Vancouver.ogv") self.assertEqual(sound_data["audio"], "Qc-Vancouver.oga") + + sound_data = create_audio_url_dict("De-Fisch.OGG") + self.assertEqual( + sound_data, + { + "audio": "De-Fisch.OGG", + "ogg_url": "https://commons.wikimedia.org/wiki/Special:FilePath/De-Fisch.OGG", + "mp3_url": "https://upload.wikimedia.org/wikipedia/commons/transcoded/0/0f/De-Fisch.OGG/De-Fisch.OGG.mp3", + }, + )