Skip to content

Commit

Permalink
Merge pull request #78 from k-aito/support-ass
Browse files Browse the repository at this point in the history
Support ass and few fix for pydeeplx
  • Loading branch information
sinedie authored Dec 20, 2023
2 parents 24d8889 + b1284ae commit e14fde3
Show file tree
Hide file tree
Showing 7 changed files with 277 additions and 29 deletions.
32 changes: 24 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,16 @@ Import stuff

```python
import os

# SRT File
from srtranslator import SrtFile
# ASS File
from srtranslator import AssFile

from srtranslator.translators.deepl_api import DeeplApi
from srtranslator.translators.deepl_scrap import DeeplTranslator
from srtranslator.translators.translatepy import TranslatePy
from srtranslator.translators.pydeeplx import DeepLX
from srtranslator.translators.pydeeplx import PyDeepLX
```

Initialize translator. It can be any translator, even your own, check the docs, there are instructions per translator and how to create your own.
Expand All @@ -35,13 +40,19 @@ Load, translate and save. For multiple recursive files in folder, check `example

```python
filepath = "./filepath/to/srt"
srt = SrtFile(filepath)
srt.translate(translator, "en", "es")

# SRT File
sub = SrtFile(filepath)
# ASS File
sub = AssFile(filepath)

# Translate
sub.translate(translator, "en", "es")

# Making the result subtitles prettier
srt.wrap_lines()
sub.wrap_lines()

srt.save(f"{os.path.splitext(filepath)[0]}_translated.srt")
sub.save(f"{os.path.splitext(filepath)[0]}_translated.srt")
```

Quit translator
Expand All @@ -57,15 +68,19 @@ translator.quit()
## Usage command line

```bash
# SRT file
python -m srtranslator ./filepath/to/srt -i SRC_LANG -o DEST_LANG

# ASS file
python -m srtranslator ./filepath/to/ass -i SRC_LANG -o DEST_LANG
```

## Advanced usage

```
usage: __main__.py [-h] [-i SRC_LANG] [-o DEST_LANG] [-v] [-vv] [-s] [-w WRAP_LIMIT] [-t {deepl-scrap,translatepy,deepl-api}] [--auth AUTH] path
usage: __main__.py [-h] [-i SRC_LANG] [-o DEST_LANG] [-v] [-vv] [-s] [-w WRAP_LIMIT] [-t {deepl-scrap,translatepy,deepl-api,pydeeplx}] [--auth AUTH] path
Translate an .STR file
Translate an .STR and .ASS file
positional arguments:
path File to translate
Expand All @@ -81,7 +96,8 @@ options:
-s, --show-browser Show browser window
-w WRAP_LIMIT, --wrap-limit WRAP_LIMIT
Number of characters -including spaces- to wrap a line of text. Default: 50
-t {deepl-scrap,translatepy,deepl-api}, --translator {deepl-scrap,translatepy,deepl-api}
-t {deepl-scrap,translatepy,deepl-api}, --translator {deepl-scrap,translatepy,deepl-api,pydeeplx}
Built-in translator to use
--auth AUTH Api key if needed on translator
--proxies Use proxy by default for pydeeplx
```
2 changes: 2 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ mypy-extensions==0.4.3
outcome==1.2.0
pathspec==0.10.3
platformdirs==2.6.2
pyass==0.1.4
pycparser==2.21
PyDeepLX==1.0.4
PySocks==1.7.1
python-editor==1.0.4
pyuseragents==1.0.5
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ inquirer==3.1.1
jinxed==1.2.0
lxml==4.9.3
outcome==1.2.0
pyass==0.1.4
pycparser==2.21
PyDeepLX==1.0.4
PySocks==1.7.1
Expand Down
31 changes: 22 additions & 9 deletions srtranslator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
import traceback

from .srt_file import SrtFile
from .ass_file import AssFile
from .translators.deepl_api import DeeplApi
from .translators.deepl_scrap import DeeplTranslator
from .translators.translatepy import TranslatePy
from .translators.pydeeplx import DeepLX
from .translators.pydeeplx import PyDeepLX

parser = argparse.ArgumentParser(description="Translate an .STR file")
parser = argparse.ArgumentParser(description="Translate an .STR and .ASS file")

parser.add_argument(
"filepath",
Expand Down Expand Up @@ -72,7 +73,7 @@
"-t",
"--translator",
type=str,
choices=["deepl-scrap", "translatepy", "deepl-api", "deeplx"],
choices=["deepl-scrap", "translatepy", "deepl-api", "pydeeplx"],
help="Built-in translator to use",
default="deepl-scrap",
)
Expand All @@ -83,11 +84,17 @@
help="Api key if needed on translator",
)

parser.add_argument(
"--proxies",
action="store_true",
help="Use proxy by default for pydeeplx",
)

builtin_translators = {
"deepl-scrap": DeeplTranslator,
"deepl-api": DeeplApi,
"translatepy": TranslatePy,
"deeplx": DeepLX,
"pydeeplx": PyDeepLX,
}

args = parser.parse_args()
Expand All @@ -104,17 +111,23 @@
translator_args = {}
if args.auth:
translator_args["api_key"] = args.auth
if args.proxies:
translator_args["proxies"] = args.proxies

translator = builtin_translators[args.translator](**translator_args)

srt = SrtFile(args.filepath)
try:
sub = AssFile(args.filepath)
except AttributeError:
print("... Exception while loading as ASS try as SRT")
sub = SrtFile(args.filepath)

try:
srt.translate(translator, args.src_lang, args.dest_lang)
srt.wrap_lines(args.wrap_limit)
srt.save(f"{os.path.splitext(args.filepath)[0]}_{args.dest_lang}.srt")
sub.translate(translator, args.src_lang, args.dest_lang)
sub.wrap_lines(args.wrap_limit)
sub.save(f"{os.path.splitext(args.filepath)[0]}_{args.dest_lang}{os.path.splitext(args.filepath)[1]}")
except:
srt.save_backup()
sub.save_backup()
traceback.print_exc()

translator.quit()
207 changes: 207 additions & 0 deletions srtranslator/ass_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
import os
import re
import pyass

from typing import List, Generator

from .translators.base import Translator


class AssFile:
"""ASS file class abstraction
Args:
filepath (str): file path of ass
"""

def __init__(self, filepath: str) -> None:
self.filepath = filepath
self.backup_file = f"{self.filepath}.tmp"
self.subtitles = []
self.start_from = 0
self.current_subtitle = 0
self.text_styles = []

print(f"Loading {filepath} as ASS")
with open(filepath, "r", encoding="utf-8", errors="ignore") as input_file:
self.subtitles = self.load_from_file(input_file)

self._load_backup()

def _load_backup(self):
if not os.path.exists(self.backup_file):
return

print(f"Backup file found = {self.backup_file}")
with open(
self.backup_file, "r", encoding="utf-8", errors="ignore"
) as input_file:
subtitles = self.load_from_file(input_file)

self.start_from = len(subtitles.events)
self.current_subtitle = self.start_from
print(f"Starting from subtitle {self.start_from}")
self.subtitles.events = [
*subtitles.events,
*self.subtitles.events[self.start_from :],
]

def load_from_file(self, input_file):
ass_file = pyass.load(input_file)
ass_file.events = sorted(ass_file.events, key=lambda e: (e.start))
return self._clean_subs_content(ass_file)

def _get_next_chunk(self, chunk_size: int = 4500) -> Generator:
"""Get a portion of the subtitles at the time based on the chunk size
Args:
chunk_size (int, optional): Maximum number of letter in text chunk. Defaults to 4500.
Yields:
Generator: Each chunk at the time
"""
portion = []

for subtitle in self.subtitles.events[self.start_from :]:
# Manage ASS styles for subtitle before add it to the portion
# Extract a list of styles
# Replace the styles by |

# Each style starts with { and end with }
# If we have an "}" then we can split and keep the part on the left and keep it in our list
for i in subtitle.text.split("{"):
if "}" in i:
self.text_styles.append("{" + i.split("}")[0] + "}")

subtitle.text = re.sub(r"{.*?}", r"|", subtitle.text)

# Calculate new chunk size if subtitle content is added to actual chunk
n_char = (
sum(len(sub.text) for sub in portion) # All subtitles in chunk
+ len(subtitle.text) # New subtitle
+ len(portion) # Break lines in chunk
+ 1 # New breakline
)

# If chunk goes beyond the limit, yield it
if n_char >= chunk_size and len(portion) != 0:
yield portion
portion = []

# Put subtitle content in chunk
portion.append(subtitle)

# Yield last chunk
yield portion

def _clean_subs_content(self, subtitles):
"""Cleans subtitles content and delete line breaks
Args:
subtitles List of subtitles
Returns:
Same list of subtitles, but cleaned
"""
cleanr = re.compile("<.*?>")

for sub in subtitles.events:
sub.text = cleanr.sub("", sub.text)
# No real equivalent in ASS
#sub.text = srt.make_legal_content(sub.content)
sub.text = sub.text.strip()

if sub.text == "":
sub.text = "..."

if all(sentence.startswith("-") for sentence in sub.text.split("\n")):
sub.text = sub.text.replace("\n", "////")
continue

# It looks like \N is removed by the translation so we replace them by \\\\
sub.text = sub.text.replace(r"\N", r"\\\\")

# The \\\\ must be separated from the words to avoid weird conversions
sub.text = re.sub(r"[aA0-zZ9]\\\\", r" \\\\", sub.text)
sub.text = re.sub(r"\\\\[aA0-zZ9]", r"\\\\ ", sub.text)

sub.text = sub.text.replace("\n", " ")

return subtitles

def wrap_lines(self, line_wrap_limit: int = 50) -> None:
"""
Args:
line_wrap_limit (int): Number of maximum characters in a line before wrap. Defaults to 50. (not used)
"""
for sub in self.subtitles.events:
sub.text = sub.text.replace("////", "\n")
sub.text = sub.text.replace(r" \\\\ ", r"\N")

def translate(
self,
translator: Translator,
source_language: str,
destination_language: str,
) -> None:
"""Translate ASS file using a translator of your choose
Args:
translator (Translator): Translator object of choose
destination_language (str): Destination language (must be coherent with your translator)
source_language (str): Source language (must be coherent with your translator)
"""

# For each chunk of the file (based on the translator capabilities)
for subs_slice in self._get_next_chunk(translator.max_char):
progress = int(100 * self.current_subtitle / len(self.subtitles.events))
print(f"... Translating {progress} %")

# Put chunk in a single text with break lines
text = [sub.text for sub in subs_slice]
text = "\n".join(text)

# Translate
translation = translator.translate(
text, source_language, destination_language
)

# Manage ASS commands
# Insert the styles back in the text instead of |
self.text_styles.reverse()
translation_with_styles = ""
for i in translation.split(r"|"):
try:
# We set i at the left part because the style must "replace" the "|"
translation_with_styles += i + self.text_styles.pop()
except IndexError:
translation_with_styles += i

# Break each line back into subtitle content
translation = translation_with_styles.splitlines()
for i in range(len(subs_slice)):
subs_slice[i].text = translation[i]
self.current_subtitle += 1

print(f"... Translation done")

def save_backup(self):
self.subtitles.events = self.subtitles.events[: self.current_subtitle]
self.save(self.backup_file)

def _delete_backup(self):
if os.path.exists(self.backup_file):
os.remove(self.backup_file)

def save(self, filepath: str) -> None:
"""Saves ASS to file
Args:
filepath (str): Path of the new file
"""
self._delete_backup()

print(f"Saving {filepath}")
with open(filepath, "w", encoding="utf-8") as file_out:
pyass.dump(self.subtitles, file_out)
2 changes: 1 addition & 1 deletion srtranslator/srt_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, filepath: str) -> None:
self.start_from = 0
self.current_subtitle = 0

print(f"Loading {filepath}")
print(f"Loading {filepath} as SRT")
with open(filepath, "r", encoding="utf-8", errors="ignore") as input_file:
self.subtitles = self.load_from_file(input_file)

Expand Down
Loading

0 comments on commit e14fde3

Please sign in to comment.