Skip to content

Commit

Permalink
Optionally preprocess subtitles on load (#164)
Browse files Browse the repository at this point in the history
* Preliminary subtitle pre-processor with tests
* Convert whitespace to newlines in preprocess step
* Helpers normalisation
* Cache duration for performance
* Docstring for settings dialog
  • Loading branch information
machinewrapped authored Apr 21, 2024
1 parent a4dedd9 commit d2fc486
Show file tree
Hide file tree
Showing 31 changed files with 943 additions and 473 deletions.
3 changes: 1 addition & 2 deletions GUI/AboutDialog.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import os
import pkg_resources
from PySide6.QtWidgets import (QDialog, QVBoxLayout, QDialogButtonBox, QLabel, QHBoxLayout)
from PySide6.QtGui import QPixmap
from PySide6.QtCore import Qt

from PySubtitle.Helpers.resources import GetResourcePath
from PySubtitle.Helpers.Resources import GetResourcePath
from PySubtitle.version import __version__

class AboutDialog(QDialog):
Expand Down
6 changes: 6 additions & 0 deletions GUI/Commands/BatchSubtitlesCommand.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from GUI.ProjectDataModel import ProjectDataModel
from PySubtitle.Options import Options
from PySubtitle.SubtitleBatcher import CreateSubtitleBatcher, SubtitleBatcher
from PySubtitle.SubtitleProcessor import SubtitleProcessor
from PySubtitle.SubtitleProject import SubtitleProject

import logging
Expand All @@ -14,6 +15,7 @@ def __init__(self, project : SubtitleProject, options : Options):
super().__init__()
self.project : SubtitleProject = project
self.options : Options = options
self.preprocess_subtitles = options.get('preprocess_subtitles', False)

def execute(self):
logging.info("Executing BatchSubtitlesCommand")
Expand All @@ -23,6 +25,10 @@ def execute(self):
if not project or not project.subtitles:
logging.error("No subtitles to batch")

if self.preprocess_subtitles:
preprocessor = SubtitleProcessor(self.options)
project.subtitles.PreProcess(preprocessor)

batcher : SubtitleBatcher = CreateSubtitleBatcher(self.options)
project.subtitles.AutoBatch(batcher)

Expand Down
13 changes: 4 additions & 9 deletions GUI/GuiHelpers.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
from datetime import timedelta
import logging
import os
import darkdetect

from srt import timedelta_to_srt_timestamp
from PySide6.QtWidgets import (QApplication, QFormLayout)

from PySubtitle.Helpers.resources import GetResourcePath
from PySubtitle.Helpers.Resources import GetResourcePath

def GetThemeNames():
themes = []
Expand All @@ -17,7 +15,7 @@ def GetThemeNames():
themes.append(theme_name)

themes.sort()
return themes
return themes

def LoadStylesheet(name):
if not name or name == "default":
Expand All @@ -44,14 +42,11 @@ def GetLineHeight(text: str, wrap_length: int = 60) -> int:
wraps = -(-len(text) // wrap_length) if wrap_length else None # Ceiling division
return text.count('\n') + wraps

def TimeDeltaToText(time: timedelta) -> str:
return timedelta_to_srt_timestamp(time).replace('00:', '') if time is not None else None

def DescribeLineCount(line_count, translated_count):
if translated_count == 0:
return f"{line_count} lines"
return f"{line_count} lines"
elif line_count == translated_count:
return f"{translated_count} lines translated"
return f"{translated_count} lines translated"
else:
return f"{translated_count} of {line_count} lines translated"

Expand Down
4 changes: 2 additions & 2 deletions GUI/MainWindow.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from GUI.Widgets.LogWindow import LogWindow
from GUI.Widgets.ModelView import ModelView
from GUI.NewProjectSettings import NewProjectSettings
from PySubtitle.Helpers.resources import GetResourcePath
from PySubtitle.Helpers.Resources import GetResourcePath
from PySubtitle.Options import Options
from PySubtitle.SubtitleError import ProviderConfigurationError
from PySubtitle.TranslationProvider import TranslationProvider
Expand Down Expand Up @@ -203,7 +203,7 @@ def _update_settings(self, settings):

if 'theme' in updated_settings:
LoadStylesheet(self.global_options.theme)

def _load_subtitle_file(self, filepath):
self.QueueCommand(LoadSubtitleFile(filepath, self.global_options))

Expand Down
11 changes: 9 additions & 2 deletions GUI/NewProjectSettings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from PySubtitle.Instructions import GetInstructionFiles, LoadInstructionsResource
from PySubtitle.SubtitleBatcher import CreateSubtitleBatcher
from PySubtitle.SubtitleProcessor import SubtitleProcessor
from PySubtitle.SubtitleProject import SubtitleProject
from PySubtitle.SubtitleScene import SubtitleScene

Expand All @@ -17,6 +18,7 @@ class NewProjectSettings(QDialog):
'scene_threshold': (float, "Number of seconds gap to consider it a new scene"),
'min_batch_size': (int, "Fewest lines to send in separate batch"),
'max_batch_size': (int, "Most lines to send in each batch"),
'preprocess_subtitles': (bool, "Preprocess subtitles before batching"),
'use_simple_batcher': (bool, "Use old batcher instead of batching dynamically based on gap size"),
'batch_threshold': (float, "Number of seconds gap to consider starting a new batch (simple batcher)"),
'instruction_file': (str, "Detailed instructions for the translator"),
Expand All @@ -33,7 +35,7 @@ def __init__(self, datamodel : ProjectDataModel, parent=None):
self.datamodel = datamodel
self.project : SubtitleProject = datamodel.project
self.settings = datamodel.project_options.GetSettings()

self.providers = datamodel.available_providers
self.OPTIONS['provider'] = (self.providers, self.OPTIONS['provider'][1])
self.settings['provider'] = datamodel.provider
Expand Down Expand Up @@ -125,9 +127,14 @@ def _preview_batches(self):
self._update_settings()
self._update_inputs()

lines = self.project.subtitles.originals
if self.settings.get('preprocess_subtitles'):
preprocessor = SubtitleProcessor(self.settings)
lines = preprocessor.PreprocessSubtitles(lines)

batcher = CreateSubtitleBatcher(self.settings)
if batcher.min_batch_size < batcher.max_batch_size:
scenes : list[SubtitleScene] = batcher.BatchSubtitles(self.project.subtitles.originals)
scenes : list[SubtitleScene] = batcher.BatchSubtitles(lines)
batch_count = sum(scene.size for scene in scenes)
line_count = sum(scene.linecount for scene in scenes)
self.preview_widget.setText(f"{line_count} lines in {len(scenes)} scenes and {batch_count} batches")
Expand Down
93 changes: 73 additions & 20 deletions GUI/SettingsDialog.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,19 @@
from PySubtitle.TranslationProvider import TranslationProvider

class SettingsDialog(QDialog):
"""
Dialog for editing user settings in various categories
The settings are stored in a dictionary with a section for each tab and the settings it contains as key-value pairs.
Each value is either a type indicating the type of the setting, or a tuple containing the type and a tooltip string.
The PROVIDER_SECTION is special and contains the settings for the translation provider, which are loaded dynamically based on the selected provider.
The VISIBILITY_DEPENDENCIES dictionary contains the conditions for showing or hiding each section based on the settings.
Some dropdowns are populated dynamically when the dialog is created, based on the available themes and instruction files.
"""
PROVIDER_SECTION = 'Provider Settings'
SECTIONS = {
'General': {
Expand All @@ -17,6 +30,7 @@ class SettingsDialog(QDialog):
'instruction_file': (str, "Instructions for the translation provider to follow"),
'prompt': (str, "The (brief) instruction for each batch of subtitles. Some [tags] are automatically filled in"),
'theme': [],
'preprocess_subtitles': (bool, "Preprocess subtitles before translation"),
'autosave': (bool, "Automatically save the project after each translation batch"),
'write_backup': (bool, "Save a backup copy of the project when opening it"),
# 'autosplit_incomplete': (bool, "If true, incomplete translations will be split into smaller batches and retried"),
Expand All @@ -27,6 +41,14 @@ class SettingsDialog(QDialog):
'provider': ([], "The AI translation service to use"),
'provider_settings': TranslationProvider,
},
'Processing': {
'max_line_duration': (float, "Maximum duration of a single line of subtitles"),
'min_line_duration': (float, "Minimum duration of a single line of subtitles"),
'min_split_chars': (int, "Minimum number of characters to split a line at"),
'break_dialog_on_one_line': (bool, "Add line breaks to text with dialog markers"),
'normalise_dialog_tags': (bool, "Ensure dialog markers match in multi-line subtitles"),
'whitespaces_to_newline': (bool, "Convert blocks of whitespace and Chinese Commas to newlines"),
},
'Advanced': {
'max_threads': (int, "Maximum number of simultaneous translation threads for fast translation"),
'min_batch_size': (int, "Avoid creating a new batch smaller than this"),
Expand All @@ -35,7 +57,6 @@ class SettingsDialog(QDialog):
'batch_threshold': (float, "Consider starting a new batch after a gap of this many seconds (simple batcher only)"),
'use_simple_batcher': (bool, "Use old batcher instead of batching dynamically based on gap size"),
'match_partial_words': (bool, "Used with substitutions, required for some languages where word boundaries aren't detected"),
'whitespaces_to_newline': (bool, "Convert blocks of whitespace and Chinese Commas to newlines"),
'max_context_summaries': (int, "Limits the number of scene/batch summaries to include as context with each translation batch"),
'max_summary_length': (int, "Maximum length of the context summary to include with each translation batch"),
'max_characters': (int, "Validator: Maximum number of characters to allow in a single translated line"),
Expand All @@ -45,6 +66,12 @@ class SettingsDialog(QDialog):
}
}

VISIBILITY_DEPENDENCIES = {
'Processing' : {
'preprocess_subtitles': True
}
}

def __init__(self, options : Options, provider_cache = None, parent=None, focus_provider_settings : bool = False):
super(SettingsDialog, self).__init__(parent)
self.setWindowTitle("GUI-Subtrans Settings")
Expand Down Expand Up @@ -89,6 +116,9 @@ def __init__(self, options : Options, provider_cache = None, parent=None, focus_
if focus_provider_settings:
self.tabs.setCurrentWidget(self.sections[self.PROVIDER_SECTION])

# Conditionally hide or show tabs
self._update_section_visibility()

# Add Ok and Cancel buttons
self.buttonBox = QDialogButtonBox(QDialogButtonBox.Ok | QDialogButtonBox.Cancel, self)
self.buttonBox.accepted.connect(self.accept)
Expand Down Expand Up @@ -117,7 +147,7 @@ def accept(self):
self.provider_settings[provider][field.key] = field.GetValue()
else:
self.settings[field.key] = field.GetValue()

except Exception as e:
logging.error(f"Unable to update settings: {e}")

Expand All @@ -129,12 +159,15 @@ def accept(self):
self.reject()

def _create_section_widget(self, section_name):
"""
Create the form for a settings tab
"""
section_widget = QFrame(self)
section_widget.setObjectName(section_name)

layout = QFormLayout(section_widget)
layout.setFieldGrowthPolicy(QFormLayout.FieldGrowthPolicy.ExpandingFieldsGrow)

self._populate_form(section_name, layout)

self.sections[section_name] = section_widget
Expand All @@ -143,7 +176,7 @@ def _create_section_widget(self, section_name):

def _populate_form(self, section_name : str, layout : QFormLayout):
"""
Create the form fields for the options
Create the form fields for the options
"""
ClearForm(layout)

Expand All @@ -159,6 +192,16 @@ def _populate_form(self, section_name : str, layout : QFormLayout):
layout.addRow(field.name, field)
self.widgets[key] = field

def _update_section_visibility(self):
"""
Update the visibility of section tabs based on dependencies
"""
for section_name, dependencies in self.VISIBILITY_DEPENDENCIES.items():
section_tab = self.tabs.findChild(QWidget, section_name)
if section_tab:
visible = all(self.settings.get(key) == value for key, value in dependencies.items())
self.tabs.setTabVisible(self.tabs.indexOf(section_tab), visible)

def _initialise_translation_provider(self):
"""
Initialise translation provider
Expand Down Expand Up @@ -191,22 +234,28 @@ def _add_provider_options(self, section_name : str, layout : QFormLayout):
field.contentChanged.connect(lambda setting=field: self._on_setting_changed(section_name, setting.key, setting.GetValue()))
layout.addRow(field.name, field)
self.widgets[key] = field

provider_info = self.translation_provider.GetInformation()
if provider_info:
provider_layout = QVBoxLayout()
infoLabel = QLabel(provider_info)
infoLabel.setWordWrap(True)
infoLabel.setTextFormat(Qt.TextFormat.RichText)
infoLabel.setOpenExternalLinks(True)
provider_layout.addWidget(infoLabel)
provider_layout.addStretch(1)

scrollArea = QScrollArea()
scrollArea.setWidgetResizable(True)
scrollArea.setSizeAdjustPolicy(QScrollArea.SizeAdjustPolicy.AdjustToContents)
scrollArea.setLayout(provider_layout)
layout.addRow(scrollArea)
self._add_provider_info_widget(layout, provider_info)

def _add_provider_info_widget(self, layout, provider_info):
"""
Create a rich text widget for provider information and add it to the layout
"""
provider_layout = QVBoxLayout()
infoLabel = QLabel(provider_info)
infoLabel.setWordWrap(True)
infoLabel.setTextFormat(Qt.TextFormat.RichText)
infoLabel.setOpenExternalLinks(True)
provider_layout.addWidget(infoLabel)
provider_layout.addStretch(1)

scrollArea = QScrollArea()
scrollArea.setWidgetResizable(True)
scrollArea.setSizeAdjustPolicy(QScrollArea.SizeAdjustPolicy.AdjustToContents)
scrollArea.setLayout(provider_layout)
layout.addRow(scrollArea)

def _refresh_provider_options(self):
"""
Expand All @@ -215,7 +264,7 @@ def _refresh_provider_options(self):
if not self.translation_provider:
logging.warning("Translation provider is not configured")
return

provider_settings = self.provider_settings.get(self.translation_provider.name, {})
self.translation_provider.settings.update(provider_settings)

Expand All @@ -238,6 +287,10 @@ def _on_setting_changed(self, section_name, key, value):
self.settings[key] = value
self._update_instruction_file()

elif key == 'preprocess_subtitles':
self.settings[key] = value
self._update_section_visibility()

elif section_name == self.PROVIDER_SECTION:
provider = self.settings.get('provider')
self.provider_settings[provider][key] = value
Expand All @@ -258,5 +311,5 @@ def _update_instruction_file(self):
self.widgets['prompt'].SetValue(instructions.prompt)
except Exception as e:
logging.error(f"Unable to load instructions from {instruction_file}: {e}")


4 changes: 3 additions & 1 deletion GUI/ViewModel/LineItem.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from GUI.GuiHelpers import GetLineHeight
from PySubtitle.Helpers import Linearise, UpdateFields
from PySubtitle.Helpers import UpdateFields
from PySubtitle.Helpers.Text import Linearise

from GUI.ViewModel.ViewModelError import ViewModelError

from PySide6.QtCore import Qt
Expand Down
Loading

0 comments on commit d2fc486

Please sign in to comment.