Skip to content

Commit

Permalink
add script which produces stats about source file resolutions (#40)
Browse files Browse the repository at this point in the history
  • Loading branch information
fthiery authored Dec 13, 2024
1 parent dbae8fa commit 8b27681
Showing 1 changed file with 80 additions and 0 deletions.
80 changes: 80 additions & 0 deletions examples/report_video_resolutions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#!/usr/bin/env python3
'''
Script which will produce stats about the video files on the platform
'''
import os
import sys


def format_seconds(seconds):
m, s = divmod(seconds, 60)
h, m = divmod(m, 60)
timecode = '%d:%02d:%02d' % (h, m, s)
return timecode


def format_bytes(size):
power = 1000
n = 0
power_labels = {0: '', 1: 'kilo', 2: 'mega', 3: 'giga', 4: 'tera'}
while size > power:
size /= power
n += 1
return f'{round(size, 1)} {power_labels[n]}bytes'


if __name__ == '__main__':
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from ms_client.client import MediaServerClient

local_conf = sys.argv[1] if len(sys.argv) > 1 else None
msc = MediaServerClient(local_conf)
# ping
print('Dumping catalog')
videos = msc.get_catalog(fmt='json')['videos']

all_resources_duration = dict()
all_resources_count = dict()
all_resources_size = dict()

for index, video in enumerate(videos):
print(f'{index + 1}/{len(videos)}', end='\r')
oid = video['oid']
duration = int(video['duration_s']) # in seconds
storage = int(video['storage_used']) # in bytes
resources = msc.api('/medias/resources-list/', params={'oid': oid})["resources"]
resources_sorted = sorted(resources, key=lambda d: d["height"], reverse=True)
if len(resources_sorted) > 0 and duration:
source_resolution = resources_sorted[0]['height'] # first one should be the largest
all_resources_duration.setdefault(source_resolution, 0)
all_resources_duration[source_resolution] += duration
all_resources_count.setdefault(source_resolution, 0)
all_resources_count[source_resolution] += 1
all_resources_size.setdefault(source_resolution, 0)
all_resources_size[source_resolution] += storage

print()
print('Source resolutions by duration:')
all_resources_duration = dict(
sorted(all_resources_duration.items(), key=lambda item: item[1], reverse=True)
)
for mode, duration in all_resources_duration.items():
mode_size = all_resources_size[mode]
size_per_hour = int(mode_size / (duration / 3600))
print(f'{mode}: {format_seconds(duration)}, average size: {format_bytes(size_per_hour)} per hour')

print()
print('Source resolutions by count:')
all_resources_count = dict(
sorted(all_resources_count.items(), key=lambda item: item[1], reverse=True)
)
for mode, count in all_resources_count.items():
print(f'{mode}: {count}')

print()
print('Source resolutions by size:')
all_resources_size = dict(
sorted(all_resources_size.items(), key=lambda item: item[1], reverse=True)
)
for mode, size in all_resources_size.items():
print(f'{mode}: {format_bytes(size)}')

0 comments on commit 8b27681

Please sign in to comment.