[scene_manager] Add crop functionality (#449)

* [scene_manager] Add ability to crop input * [scene_manager] Validate crop config params and improve error messaging Make sure exceptions are always thrown in debug mode from the source location.
Breakthrough · Nov 25, 2024 · 18c7ab8 · 18c7ab8
1 parent 95091f8
commit 18c7ab8
Show file tree

Hide file tree

Showing 11 changed files with 245 additions and 39 deletions.
diff --git a/docs/cli.rst b/docs/cli.rst
@@ -57,6 +57,10 @@ Options
 
   Path to config file. See :ref:`config file reference <scenedetect_cli-config_file>` for details.
 
+.. option:: --crop X0 Y0 X1 Y1
+
+  Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).
+
 .. option:: -s CSV, --stats CSV
 
   Stats file (.csv) to write frame metrics. Existing files will be overwritten. Used for tuning detection parameters and data analysis.

diff --git a/scenedetect.cfg b/scenedetect.cfg
@@ -27,15 +27,19 @@
 # Must be one of: detect-adaptive, detect-content, detect-threshold, detect-hist
 #default-detector = detect-adaptive
 
-# Video backend interface, must be one of: opencv, pyav, moviepy.
-#backend = opencv
+# Output directory for written files. Defaults to working directory.
+#output = /usr/tmp/scenedetect/
 
 # Verbosity of console output (debug, info, warning, error, or none).
 # Set to none for the same behavior as specifying -q/--quiet.
 #verbosity = debug
 
-# Output directory for written files. Defaults to working directory.
-#output = /usr/tmp/scenedetect/
+# Crop input video to area. Specified as two points in the form X0 Y0 X1 Y1 or
+# as (X0 Y0), (X1 Y1). Coordinate (0, 0) is the top-left corner.
+#crop = 100 100 200 250
+
+# Video backend interface, must be one of: opencv, pyav, moviepy.
+#backend = opencv
 
 # Minimum length of a given scene.
 #min-scene-len = 0.6s

diff --git a/scenedetect/_cli/__init__.py b/scenedetect/_cli/__init__.py
@@ -256,6 +256,14 @@ def print_command_help(ctx: click.Context, command: click.Command):
     help="Backend to use for video input. Backend options can be set using a config file (-c/--config). [available: %s]%s"
     % (", ".join(AVAILABLE_BACKENDS.keys()), USER_CONFIG.get_help_string("global", "backend")),
 )
+@click.option(
+    "--crop",
+    metavar="X0 Y0 X1 Y1",
+    type=(int, int, int, int),
+    default=None,
+    help="Crop input video. Specified as two points representing top left and bottom right corner of crop region. 0 0 is top-left of the video frame. Bounds are inclusive (e.g. for a 100x100 video, the region covering the whole frame is 0 0 99 99).%s"
+    % (USER_CONFIG.get_help_string("global", "crop", show_default=False)),
+)
 @click.option(
     "--downscale",
     "-d",
@@ -312,6 +320,7 @@ def scenedetect(
     drop_short_scenes: ty.Optional[bool],
     merge_last_scene: ty.Optional[bool],
     backend: ty.Optional[str],
+    crop: ty.Optional[ty.Tuple[int, int, int, int]],
     downscale: ty.Optional[int],
     frame_skip: ty.Optional[int],
     verbosity: ty.Optional[str],
@@ -326,12 +335,13 @@ def scenedetect(
         output=output,
         framerate=framerate,
         stats_file=stats,
-        downscale=downscale,
         frame_skip=frame_skip,
         min_scene_len=min_scene_len,
         drop_short_scenes=drop_short_scenes,
         merge_last_scene=merge_last_scene,
         backend=backend,
+        crop=crop,
+        downscale=downscale,
         quiet=quiet,
         logfile=logfile,
         config=config,

diff --git a/scenedetect/_cli/config.py b/scenedetect/_cli/config.py
@@ -135,6 +135,47 @@ def from_config(config_value: str, default: "RangeValue") -> "RangeValue":
             ) from ex
 
 
+class CropValue(ValidatedValue):
+    """Validator for crop region defined as X0 Y0 X1 Y1."""
+
+    _IGNORE_CHARS = [",", "/", "(", ")"]
+    """Characters to ignore."""
+
+    def __init__(self, value: Optional[Union[str, Tuple[int, int, int, int]]] = None):
+        if isinstance(value, CropValue) or value is None:
+            self._crop = value
+        else:
+            crop = ()
+            if isinstance(value, str):
+                translation_table = str.maketrans(
+                    {char: " " for char in ScoreWeightsValue._IGNORE_CHARS}
+                )
+                values = value.translate(translation_table).split()
+                crop = tuple(int(val) for val in values)
+            elif isinstance(value, tuple):
+                crop = value
+            if not len(crop) == 4:
+                raise ValueError("Crop region must be four numbers of the form X0 Y0 X1 Y1!")
+            if any(coordinate < 0 for coordinate in crop):
+                raise ValueError("Crop coordinates must be >= 0")
+            (x0, y0, x1, y1) = crop
+            self._crop = (min(x0, x1), min(y0, y1), max(x0, x1), max(y0, y1))
+
+    @property
+    def value(self) -> Tuple[int, int, int, int]:
+        return self._crop
+
+    def __str__(self) -> str:
+        return "[%d, %d], [%d, %d]" % self.value
+
+    @staticmethod
+    def from_config(config_value: str, default: "CropValue") -> "CropValue":
+        try:
+            return CropValue(config_value)
+        except ValueError as ex:
+            raise OptionParseFailure(f"{ex}") from ex
+
+
 class ScoreWeightsValue(ValidatedValue):
     """Validator for score weight values (currently a tuple of four numbers)."""
 
@@ -154,7 +195,7 @@ def __init__(self, value: Union[str, ContentDetector.Components]):
             self._value = ContentDetector.Components(*(float(val) for val in values))
 
     @property
-    def value(self) -> Tuple[float, float, float, float]:
+    def value(self) -> ContentDetector.Components:
         return self._value
 
     def __str__(self) -> str:
@@ -340,6 +381,7 @@ def format(self, timecode: FrameTimecode) -> str:
     },
     "global": {
         "backend": "opencv",
+        "crop": CropValue(),
         "default-detector": "detect-adaptive",
         "downscale": 0,
         "downscale-method": Interpolation.LINEAR,
@@ -484,7 +526,7 @@ def _parse_config(config: ConfigParser) -> Tuple[ConfigDict, List[str]]:
                             out_map[command][option] = parsed
                         except TypeError:
                             errors.append(
-                                "Invalid [%s] value for %s: %s. Must be one of: %s."
+                                "Invalid value for [%s] option %s': %s. Must be one of: %s."
                                 % (
                                     command,
                                     option,
@@ -498,7 +540,7 @@ def _parse_config(config: ConfigParser) -> Tuple[ConfigDict, List[str]]:
 
                 except ValueError as _:
                     errors.append(
-                        "Invalid [%s] value for %s: %s is not a valid %s."
+                        "Invalid value for [%s] option '%s': %s is not a valid %s."
                         % (command, option, config.get(command, option), value_type)
                     )
                     continue
@@ -514,7 +556,7 @@ def _parse_config(config: ConfigParser) -> Tuple[ConfigDict, List[str]]:
                         )
                     except OptionParseFailure as ex:
                         errors.append(
-                            "Invalid [%s] value for %s:\n  %s\n%s"
+                            "Invalid value for [%s] option '%s':  %s\nError: %s"
                             % (command, option, config_value, ex.error)
                         )
                     continue
@@ -526,7 +568,7 @@ def _parse_config(config: ConfigParser) -> Tuple[ConfigDict, List[str]]:
                     if command in CHOICE_MAP and option in CHOICE_MAP[command]:
                         if config_value.lower() not in CHOICE_MAP[command][option]:
                             errors.append(
-                                "Invalid [%s] value for %s: %s. Must be one of: %s."
+                                "Invalid value for [%s] option '%s': %s. Must be one of: %s."
                                 % (
                                     command,
                                     option,
@@ -612,8 +654,12 @@ def _load_from_disk(self, path=None):
                 config_file_contents = config_file.read()
             config.read_string(config_file_contents, source=path)
         except ParsingError as ex:
+            if __debug__:
+                raise
             raise ConfigLoadFailure(self._init_log, reason=ex) from None
         except OSError as ex:
+            if __debug__:
+                raise
             raise ConfigLoadFailure(self._init_log, reason=ex) from None
         # At this point the config file syntax is correct, but we need to still validate
         # the parsed options (i.e. that the options have valid values).
@@ -638,8 +684,8 @@ def get_value(
         """Get the current setting or default value of the specified command option."""
         assert command in CONFIG_MAP and option in CONFIG_MAP[command]
         if override is not None:
-            return override
-        if command in self._config and option in self._config[command]:
+            value = override
+        elif command in self._config and option in self._config[command]:
             value = self._config[command][option]
         else:
             value = CONFIG_MAP[command][option]

diff --git a/scenedetect/_cli/context.py b/scenedetect/_cli/context.py
@@ -22,6 +22,7 @@
     CHOICE_MAP,
     ConfigLoadFailure,
     ConfigRegistry,
+    CropValue,
 )
 from scenedetect.detectors import (
     AdaptiveDetector,
@@ -157,12 +158,13 @@ def handle_options(
         output: ty.Optional[ty.AnyStr],
         framerate: float,
         stats_file: ty.Optional[ty.AnyStr],
-        downscale: ty.Optional[int],
         frame_skip: int,
         min_scene_len: str,
         drop_short_scenes: ty.Optional[bool],
         merge_last_scene: ty.Optional[bool],
         backend: ty.Optional[str],
+        crop: ty.Optional[ty.Tuple[int, int, int, int]],
+        downscale: ty.Optional[int],
         quiet: bool,
         logfile: ty.Optional[ty.AnyStr],
         config: ty.Optional[ty.AnyStr],
@@ -212,7 +214,7 @@ def handle_options(
                 logger.log(log_level, log_str)
             if init_failure:
                 logger.critical("Error processing configuration file.")
-                raise click.Abort()
+                raise SystemExit(1)
 
         if self.config.config_dict:
             logger.debug("Current configuration:\n%s", str(self.config.config_dict).encode("utf-8"))
@@ -285,9 +287,23 @@ def handle_options(
                 scene_manager.downscale = downscale
             except ValueError as ex:
                 logger.debug(str(ex))
-                raise click.BadParameter(str(ex), param_hint="downscale factor") from None
+                raise click.BadParameter(str(ex), param_hint="downscale factor") from ex
         scene_manager.interpolation = self.config.get_value("global", "downscale-method")
 
+        # If crop was set, make sure it's valid (e.g. it should cover at least a single pixel).
+        try:
+            crop = self.config.get_value("global", "crop", CropValue(crop))
+            if crop is not None:
+                (min_x, min_y) = crop[0:2]
+                frame_size = self.video_stream.frame_size
+                if min_x >= frame_size[0] or min_y >= frame_size[1]:
+                    region = CropValue(crop)
+                    raise ValueError(f"{region} is outside of video boundary of {frame_size}")
+                scene_manager.crop = crop
+        except ValueError as ex:
+            logger.debug(str(ex))
+            raise click.BadParameter(str(ex), param_hint="--crop") from ex
+
         self.scene_manager = scene_manager
 
     #
@@ -318,6 +334,8 @@ def get_detect_content_params(
             try:
                 weights = ContentDetector.Components(*weights)
             except ValueError as ex:
+                if __debug__:
+                    raise
                 logger.debug(str(ex))
                 raise click.BadParameter(str(ex), param_hint="weights") from None
 
@@ -373,6 +391,8 @@ def get_detect_adaptive_params(
             try:
                 weights = ContentDetector.Components(*weights)
             except ValueError as ex:
+                if __debug__:
+                    raise
                 logger.debug(str(ex))
                 raise click.BadParameter(str(ex), param_hint="weights") from None
         return {
@@ -545,20 +565,31 @@ def _open_video_stream(
                     framerate=framerate,
                     backend=backend,
                 )
-            logger.debug("Video opened using backend %s", type(self.video_stream).__name__)
+            logger.debug(f"""Video information:
+  Backend:      {type(self.video_stream).__name__}
+  Resolution:   {self.video_stream.frame_size}
+  Framerate:    {self.video_stream.frame_rate}
+  Duration:     {self.video_stream.duration} ({self.video_stream.duration.frame_num} frames)""")
+
         except FrameRateUnavailable as ex:
+            if __debug__:
+                raise
             raise click.BadParameter(
                 "Failed to obtain framerate for input video. Manually specify framerate with the"
                 " -f/--framerate option, or try re-encoding the file.",
                 param_hint="-i/--input",
             ) from ex
         except VideoOpenFailure as ex:
+            if __debug__:
+                raise
             raise click.BadParameter(
                 "Failed to open input video%s: %s"
                 % (" using %s backend" % backend if backend else "", str(ex)),
                 param_hint="-i/--input",
             ) from ex
         except OSError as ex:
+            if __debug__:
+                raise
             raise click.BadParameter(
                 "Input error:\n\n\t%s\n" % str(ex), param_hint="-i/--input"
             ) from None
diff --git a/scenedetect/detectors/content_detector.py b/scenedetect/detectors/content_detector.py
@@ -133,7 +133,6 @@ def __init__(
             self._weights = ContentDetector.LUMA_ONLY_WEIGHTS
         self._kernel: Optional[numpy.ndarray] = None
         if kernel_size is not None:
-            print(kernel_size)
             if kernel_size < 3 or kernel_size % 2 == 0:
                 raise ValueError("kernel_size must be odd integer >= 3")
             self._kernel = numpy.ones((kernel_size, kernel_size), numpy.uint8)

diff --git a/scenedetect/platform.py b/scenedetect/platform.py
@@ -330,7 +330,10 @@ def get_system_version_info() -> str:
     for module_name in third_party_packages:
         try:
             module = importlib.import_module(module_name)
-            out_lines.append(output_template.format(module_name, module.__version__))
+            if hasattr(module, "__version__"):
+                out_lines.append(output_template.format(module_name, module.__version__))
+            else:
+                out_lines.append(output_template.format(module_name, not_found_str))
         except ModuleNotFoundError:
             out_lines.append(output_template.format(module_name, not_found_str))