Skip to content

Commit

Permalink
comtune.py: quality and performance improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
double16 committed May 7, 2024
1 parent 903e0e3 commit 06510e4
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 55 deletions.
8 changes: 4 additions & 4 deletions dvrprocess/common/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
def _ffmpeg_version_parser(path):
_maybe_version = float(
re.search(r"version (\d+[.]\d+)", subprocess.check_output([path, '-version'], text=True))[1])
if int(_maybe_version) not in [4, 5, 6]:
raise FileNotFoundError('ffmpeg version [4,5,6] not found')
if int(_maybe_version) not in [4, 5, 6, 7]:
raise FileNotFoundError('ffmpeg version [4,5,6,7] not found')
return _maybe_version


Expand Down Expand Up @@ -83,8 +83,8 @@ def _run(self, arguments: list[str], kwargs) -> int:
def _ffprobe_version_parser(path):
_maybe_version = float(
re.search(r"version (\d+[.]\d+)", subprocess.check_output([path, '-version'], text=True))[1])
if int(_maybe_version) not in [4, 5, 6]:
raise FileNotFoundError('ffprobe version [4,5,6] not found')
if int(_maybe_version) not in [4, 5, 6, 7]:
raise FileNotFoundError('ffprobe version [4,5,6,7] not found')
return _maybe_version


Expand Down
127 changes: 78 additions & 49 deletions dvrprocess/comtune.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,11 +125,11 @@ def space_has_elements(self):
# Requires user entry: ComskipGene((INI_GROUP_LOGO_INTERPRETATION, 'shrink_logo'), True, "", True, [0, 1, 3, 5], int, 5),
# Requires user entry: ComskipGene((INI_GROUP_LOGO_INTERPRETATION, 'shrink_logo_tail'), True, "", True, [0, 1, 2, 3], int, 0),
ComskipGene((INI_GROUP_LOGO_INTERPRETATION, 'before_logo'), True,
"Cutpoints can be inserted just before the logo appears. Set value set is the amount of seconds to start a search for a silence before the logo appears.",
False, [0, 6, 999], int, 0),
"Cutpoints can be inserted just before the logo appears. This is the amount of seconds to start a search for silence before the logo appears.",
False, [0, 2, 999], int, 0),
ComskipGene((INI_GROUP_LOGO_INTERPRETATION, 'after_logo'), True,
"Cutpoints can be inserted just after the logo disappears. Set value set is the amount of seconds to start a search for a silence after the logo disappears.",
False, [0, 6, 999], int, 0),
"Cutpoints can be inserted just after the logo disappears. This is the amount of seconds to start a search for silence after the logo disappears.",
False, [0, 2, 999], int, 0),
# Calculated: ComskipGene((INI_GROUP_MAIN_SETTINGS, 'max_brightness'), False, "", True, range(15, 60, 5), int, 60),
# Calculated: ComskipGene((INI_GROUP_MAIN_SETTINGS, 'test_brightness'), False, "", True, range(15, 60, 5), int, 40),
# Calculated: ComskipGene((INI_GROUP_MAIN_SETTINGS, 'max_avg_brightness'), False, "", True, range(15, 60, 5), int, 25),
Expand Down Expand Up @@ -224,6 +224,20 @@ def space_has_elements(self):
False, [0, 1, 2, 4, 8, 16, 20, 32, 64, 255], int, 0),
]

# Genes for 30 minute show
GENES_30: list[ComskipGene] = [
ComskipGene((INI_GROUP_GLOBAL_REMOVES, 'added_recording'), True,
"Number of minutes added to show content to fill up the 60 or 30 minute slot",
False, [7, 4], int, 7),
]

# Genes for 60 minute show
GENES_60: list[ComskipGene] = [
ComskipGene((INI_GROUP_GLOBAL_REMOVES, 'added_recording'), True,
"Number of minutes added to show content to fill up the 60 or 30 minute slot",
False, [14, 9], int, 14),
]


@lru_cache(maxsize=None)
def find_gene(section: str, name: str) -> ComskipGene:
Expand Down Expand Up @@ -488,7 +502,8 @@ def edl_tempfile(infile, workdir):

def setup_gad(process_pool: Pool, thread_pool: ThreadPoolExecutor, files, workdir, dry_run=False, force=0,
expensive_genes=False, check_compute=True,
num_generations=0, comskip_defaults: configparser.ConfigParser = None, experimental=False) -> \
num_generations=0, comskip_defaults: configparser.ConfigParser = None, experimental=False,
file_sample_size=None) -> \
(object, list, list, list, progress.progress, str):
"""
Creates and returns a fitness function for comskip parameters for the given video files.
Expand All @@ -500,20 +515,11 @@ def setup_gad(process_pool: Pool, thread_pool: ThreadPoolExecutor, files, workdi
:param force:
:param expensive_genes: True to use genes that require generating the CSV from video for each solution
:param check_compute: True to stop processing if compute is too high
:param comskip_defaults:
:param comskip_defaults:
:param file_sample_size: if >0 only include this many files in the algorithm to decrease total time
:return: fitness_func, genes, gene_space, gene_type, fitness_json_path
"""

# TODO: support locking genes, i.e. detect_method if we need to exclude methods we know are broken for the recording

genes = list(
filter(lambda g: (experimental or not g.experimental) and g.space_has_elements() and (
g.use_csv or expensive_genes), GENES))
permutations = math.prod(map(lambda g: len(g.space), genes))
logger.debug("fitting for genes: %s, permutations %d", list(map(lambda e: e.config, genes)), permutations)
gene_space = list(map(lambda g: g.space, genes))
gene_type = list(map(lambda g: g.data_type, genes))

season_dir = os.path.dirname(files[0])
comskip_ini_path = os.path.join(season_dir, 'comskip.ini')
framearray_results: list[Future] = []
Expand Down Expand Up @@ -552,9 +558,8 @@ def setup_gad(process_pool: Pool, thread_pool: ThreadPoolExecutor, files, workdi
if len(dvr_infos) == 0:
raise UserWarning("No files look like they have commercials")

expected_adjusted_duration = get_expected_adjusted_duration(dvr_infos[0])
logger.info(f"Expected adjusted duration: {common.seconds_to_timespec(expected_adjusted_duration)}, "
f"mean duration {common.seconds_to_timespec(mean(dvr_durations))}")
expected_adjusted_duration_default = get_expected_adjusted_duration(dvr_infos[0])
logger.info(f"Mean duration {common.seconds_to_timespec(mean(dvr_durations))}")

black_frame_tuning_done = False
if os.path.isfile(comskip_ini_path):
Expand Down Expand Up @@ -588,7 +593,7 @@ def setup_gad(process_pool: Pool, thread_pool: ThreadPoolExecutor, files, workdi
try:
result.result()
video_stats_progress.progress(result_idx)
except subprocess.CalledProcessError as e:
except subprocess.CalledProcessError:
# generate with the files we have
pass
except KeyboardInterrupt as e:
Expand Down Expand Up @@ -617,6 +622,26 @@ def setup_gad(process_pool: Pool, thread_pool: ThreadPoolExecutor, files, workdi
max_volume=int(median(max_volume_list)) if non_uniformity_list else None,
)

# construct list of genes
# TODO: support locking genes, i.e. detect_method if we need to exclude methods we know are broken for the recording
genes_all = GENES.copy()
if episode_common_duration == 30*60:
genes_all.extend(GENES_30)
elif episode_common_duration == 60*60:
genes_all.extend(GENES_60)
genes = list(
filter(lambda g: (experimental or not g.experimental) and g.space_has_elements() and (
g.use_csv or expensive_genes), genes_all))
permutations = math.prod(map(lambda g: len(g.space), genes))
logger.info("fitting for genes: %s, permutations %d", list(map(lambda e: e.config, genes)), permutations)
gene_space = list(map(lambda g: g.space, genes))
gene_type = list(map(lambda g: g.data_type, genes))
added_recording_gene_idx = -1
for idx, g in enumerate(genes):
if g.config[1] == 'added_recording':
added_recording_gene_idx = idx
break

# create fitness function
filename_hash = hashlib.sha512(",".join(filter(lambda e: os.path.basename(e), files)).encode("utf-8")).hexdigest()
comskip_fitness_ini_path = os.path.join(workdir, 'comskip-fitness-' + filename_hash + '.ini')
Expand Down Expand Up @@ -646,8 +671,13 @@ def f(gad: pygad.GA, solution, solution_idx):
else:
csv_suffix = "-fitness"

dvr_infos_sample = dvr_infos.copy()
if file_sample_size and file_sample_size > 0:
random.shuffle(dvr_infos_sample)
dvr_infos_sample = dvr_infos_sample[:file_sample_size]

results: list[Future] = []
for video_info in dvr_infos:
for video_info in dvr_infos_sample:
file_path = video_info[constants.K_FORMAT]['filename']
csvfile = common.replace_extension(
os.path.join(workdir, common.remove_extension(os.path.basename(file_path)) + csv_suffix),
Expand All @@ -667,34 +697,31 @@ def f(gad: pygad.GA, solution, solution_idx):
force_csv_regen=(force > 1 or not black_frame_tuning_done)
))

csv_config_d = dict(zip(csv_configs, csv_values))
video_stats_progress = progress.progress(
'video stats ' + ','.join(map(lambda k: f"{k[1]}={csv_config_d[k]}", csv_config_d.keys())),
0, len(results) - 1)
try:
for result_idx, result in enumerate(results):
if check_compute and common.should_stop_processing():
thread_pool.shutdown(cancel_futures=True)
raise StopIteration('over loaded')
try:
result.result()
video_stats_progress.progress(result_idx)
except subprocess.CalledProcessError:
# generate fitness with the files we have
pass
except KeyboardInterrupt as e:
thread_pool.shutdown(cancel_futures=True)
os.remove(comskip_fitness_ini_path)
raise e
finally:
video_stats_progress.stop()
for result_idx, result in enumerate(results):
if check_compute and common.should_stop_processing():
thread_pool.shutdown(cancel_futures=True)
raise StopIteration('over loaded')
try:
result.result()
except subprocess.CalledProcessError:
# generate fitness with the files we have
pass
except KeyboardInterrupt as e:
thread_pool.shutdown(cancel_futures=True)
os.remove(comskip_fitness_ini_path)
raise e

# added_recording may be a gene, so we need to calculate it for each run
if added_recording_gene_idx >= 0:
expected_adjusted_duration = common.round_episode_duration(dvr_infos_sample[0]) - ((int(solution[added_recording_gene_idx])+1) * 60.0)
else:
expected_adjusted_duration = expected_adjusted_duration_default

os.remove(comskip_fitness_ini_path)

adjusted_durations = []
commercial_breaks: list[list[edl_util.EdlEvent]] = []
# if we want to ignore already cut files, iterate over dvr_infos instead of video_infos
for video_info in video_infos:
for video_info in dvr_infos_sample:
file_path = video_info[constants.K_FORMAT]['filename']
episode_count, episode_duration, video_duration = common.episode_info(video_info)
adjusted_duration = video_duration
Expand All @@ -714,10 +741,11 @@ def f(gad: pygad.GA, solution, solution_idx):
sigma = stdev(adjusted_durations)
avg = mean(adjusted_durations)
expected_adjusted_duration_diff = abs(expected_adjusted_duration - avg)
logger.info(
logger.debug(
f"Fitness for {solution_repl(genes, solution)}\nis "
f"σ{common.seconds_to_timespec(sigma)}, "
f"duration {common.seconds_to_timespec(avg)}, "
f"expected_adjusted_duration {common.seconds_to_timespec(expected_adjusted_duration)}, "
f"expected_adjusted_duration_diff = {common.seconds_to_timespec(expected_adjusted_duration_diff)}, "
f"count_of_non_defaults = {count_of_non_defaults}"
)
Expand Down Expand Up @@ -773,7 +801,7 @@ def fitness_value(sigma: float, expected_adjusted_duration_diff: float, count_of
logger.warning("Commercial break score < 1: %f", commercial_break_score)
else:
# we care about removing commercials, so do not consider solutions that found no commercials
logger.info("No commercial breaks available for scoring")
logger.debug("No commercial breaks available for scoring")
return -9999

result = 0
Expand Down Expand Up @@ -939,7 +967,7 @@ def tune_show(season_dir, process_pool: Pool, files, workdir, dry_run, force, ex

# https://pygad.readthedocs.io/en/latest/README_pygad_ReadTheDocs.html#pygad-ga-class
num_generations = 50
sol_per_pop = 200
sol_per_pop = 500
num_parents_mating = ceil(sol_per_pop / 2)
keep_elitism = 5

Expand All @@ -949,7 +977,8 @@ def tune_show(season_dir, process_pool: Pool, files, workdir, dry_run, force, ex
process_pool=process_pool, thread_pool=thread_pool, files=files, workdir=workdir, dry_run=dry_run,
force=force, comskip_defaults=comskip_defaults,
expensive_genes=expensive_genes, check_compute=check_compute, num_generations=num_generations,
experimental=experimental)
experimental=experimental,
file_sample_size=10)
except UserWarning as e:
logger.warning(e.args[0])
thread_pool.shutdown(cancel_futures=True)
Expand Down Expand Up @@ -1047,7 +1076,7 @@ def gen_callback(ga_instance: pygad.GA):
if config_default in [None, val, str(genes[idx].default_value)] and genes[idx].exclude_if_default:
logger.info(f"{config} removing because the default value is part of the solution space")
solution[idx] = None
if type(genes[idx].space) is list and len(genes[idx].space) == len(val):
if isinstance(genes[idx].space, list) and len(genes[idx].space) == len(val):
logger.info(f"{config} removing because all gene space is part of the solution space")
solution[idx] = None
else:
Expand Down
4 changes: 2 additions & 2 deletions dvrprocess/smart-comcut.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,8 +269,8 @@ def smart_comcut_cli_run(args: list, dry_run, keep, workdir, preset, force_encod
commercial_break_histo[key] = 1

logger.info(f"{show_label}: commercial_break_histo = {commercial_break_histo}")
logger.info(
f"{show_label}: commercial break score = {commercial_break_score}\n"
logger.info(f"{show_label}: commercial break score = {commercial_break_score}")
logger.debug(
f"{show_label}: commercial breaks combined = ({len(combined_commercial_breaks)}) {edl_util.pretty_print_commercial_breaks([combined_commercial_breaks])}\n"
f"{edl_util.pretty_print_commercial_breaks(aligned_commercial_breaks)}"
)
Expand Down

0 comments on commit 06510e4

Please sign in to comment.