From 8b27681a37c052efa33ae04118a5e086c558cc6d Mon Sep 17 00:00:00 2001 From: Florent Thiery Date: Fri, 13 Dec 2024 17:42:50 +0100 Subject: [PATCH] add script which produces stats about source file resolutions (#40) --- examples/report_video_resolutions.py | 80 ++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100755 examples/report_video_resolutions.py diff --git a/examples/report_video_resolutions.py b/examples/report_video_resolutions.py new file mode 100755 index 0000000..1f43dd6 --- /dev/null +++ b/examples/report_video_resolutions.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python3 +''' +Script which will produce stats about the video files on the platform +''' +import os +import sys + + +def format_seconds(seconds): + m, s = divmod(seconds, 60) + h, m = divmod(m, 60) + timecode = '%d:%02d:%02d' % (h, m, s) + return timecode + + +def format_bytes(size): + power = 1000 + n = 0 + power_labels = {0: '', 1: 'kilo', 2: 'mega', 3: 'giga', 4: 'tera'} + while size > power: + size /= power + n += 1 + return f'{round(size, 1)} {power_labels[n]}bytes' + + +if __name__ == '__main__': + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + from ms_client.client import MediaServerClient + + local_conf = sys.argv[1] if len(sys.argv) > 1 else None + msc = MediaServerClient(local_conf) + # ping + print('Dumping catalog') + videos = msc.get_catalog(fmt='json')['videos'] + + all_resources_duration = dict() + all_resources_count = dict() + all_resources_size = dict() + + for index, video in enumerate(videos): + print(f'{index + 1}/{len(videos)}', end='\r') + oid = video['oid'] + duration = int(video['duration_s']) # in seconds + storage = int(video['storage_used']) # in bytes + resources = msc.api('/medias/resources-list/', params={'oid': oid})["resources"] + resources_sorted = sorted(resources, key=lambda d: d["height"], reverse=True) + if len(resources_sorted) > 0 and duration: + source_resolution = resources_sorted[0]['height'] # first one should be the largest + all_resources_duration.setdefault(source_resolution, 0) + all_resources_duration[source_resolution] += duration + all_resources_count.setdefault(source_resolution, 0) + all_resources_count[source_resolution] += 1 + all_resources_size.setdefault(source_resolution, 0) + all_resources_size[source_resolution] += storage + + print() + print('Source resolutions by duration:') + all_resources_duration = dict( + sorted(all_resources_duration.items(), key=lambda item: item[1], reverse=True) + ) + for mode, duration in all_resources_duration.items(): + mode_size = all_resources_size[mode] + size_per_hour = int(mode_size / (duration / 3600)) + print(f'{mode}: {format_seconds(duration)}, average size: {format_bytes(size_per_hour)} per hour') + + print() + print('Source resolutions by count:') + all_resources_count = dict( + sorted(all_resources_count.items(), key=lambda item: item[1], reverse=True) + ) + for mode, count in all_resources_count.items(): + print(f'{mode}: {count}') + + print() + print('Source resolutions by size:') + all_resources_size = dict( + sorted(all_resources_size.items(), key=lambda item: item[1], reverse=True) + ) + for mode, size in all_resources_size.items(): + print(f'{mode}: {format_bytes(size)}')