Skip to content

Commit fa6efbc

Browse files
committed
Add script to reindex video
1 parent 957229e commit fa6efbc

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed

reindex-video.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
This script will reindex a video.
3+
4+
Usage:
5+
python reindex-video.py [<video_id> | <input_file>]
6+
Specify either:
7+
<video_id> - A single video ID or the youtube URL
8+
<input_file> - File containing list of video IDs
9+
"""
10+
11+
import os
12+
import sys
13+
import re
14+
import requests
15+
from urllib.parse import urlparse, parse_qs
16+
from elasticsearch import Elasticsearch
17+
from dotenv import load_dotenv
18+
19+
20+
load_dotenv()
21+
22+
es = Elasticsearch(
23+
[os.getenv("ES_HOST")], basic_auth=(os.getenv("ES_USER"), os.getenv("ES_PASSWORD"))
24+
)
25+
26+
script_dir = os.path.dirname(__file__)
27+
28+
session = requests.Session()
29+
session.verify = False
30+
31+
TA_URL = os.getenv("TA_URL")
32+
API_URL = f"{TA_URL}/api"
33+
34+
session.headers.update(
35+
{
36+
"Authorization": f"Token {os.getenv('TA_API_TOKEN')}",
37+
}
38+
)
39+
40+
41+
def extract_video_id(video_id=None):
42+
"""Extracts video ID from the string"""
43+
if "youtube.com" in video_id or "youtu.be" in video_id:
44+
url_data = urlparse(video_id)
45+
query = parse_qs(url_data.query)
46+
if url_data.netloc == "youtu.be":
47+
video_id = url_data.path[1:]
48+
else:
49+
if "v" in query:
50+
video_id = query["v"][0]
51+
else:
52+
id_search = re.search(r"([a-zA-Z0-9_-]{11})", video_id)
53+
if id_search is not None:
54+
video_id = id_search.group(1)
55+
56+
return video_id
57+
58+
59+
def main():
60+
"""Main function"""
61+
62+
print("Starting script")
63+
64+
filename = os.path.basename(__file__)
65+
if len(sys.argv) < 2:
66+
print(f"Usage: python {filename} [<video_id>|<input_file>]")
67+
return
68+
69+
user_input = sys.argv[1]
70+
71+
if os.path.exists(user_input):
72+
with open(user_input, "r", encoding="utf-8") as f:
73+
video_ids = f.readlines()
74+
video_ids = [extract_video_id(video_id) for video_id in video_ids]
75+
76+
else:
77+
video_ids = [extract_video_id(user_input)]
78+
79+
if not video_ids:
80+
print("No video IDs found")
81+
return
82+
83+
for video_id in video_ids:
84+
data = {"video": [video_id]}
85+
r = session.post(f"{API_URL}/refresh", json=data)
86+
if r.status_code == 200:
87+
print(f"Reindex video {video_id}")
88+
else:
89+
print(f"Failed to reindex video {video_id}: {r.status_code}")
90+
91+
print("Script completed successfully!")
92+
93+
94+
if __name__ == "__main__":
95+
main()

0 commit comments

Comments
 (0)