Skip to content

Commit

Permalink
Merge pull request #9 from pycontw/fix-#7-conflicts
Browse files Browse the repository at this point in the history
Add new feature: generate pyvideo data
  • Loading branch information
josix authored Nov 9, 2021
2 parents 4b6fae0 + 250b6ed commit eb4572e
Show file tree
Hide file tree
Showing 6 changed files with 560 additions and 282 deletions.
4 changes: 3 additions & 1 deletion Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pytz = '*'
requests = '*'
requests-oauthlib = '*'
tqdm = '*'
slugify = "*"

[dev-packages]
mypy = '*'
Expand All @@ -16,4 +17,5 @@ mypy = '*'
python_version = '3.7'

[scripts]
upload = 'python -m vidpub'
upload = 'python -m vidpub --upload'
playlist = 'python -m vidpub --playlist'
313 changes: 208 additions & 105 deletions Pipfile.lock

Large diffs are not rendered by default.

35 changes: 28 additions & 7 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@
Session Video Uploader
======================

Upload session videos to YouTube.
Supported features:
- Upload session videos to YouTube.
- Generate PyCon YouTube channel video data.
- Purpose: Generating PyCon YouTube channel video data for [PyVideo](https://github.com/pyvideo/data) usage

To use:

* Clone the project.

* Add ``.env`` in project containing::

# ===== Followings are for upload videos =====
# Point to the directory containing video files.
# Video files should be named by the session title. They don't need to be
# exactly identical, the script will use fuzzy match to find them.
Expand All @@ -18,17 +22,34 @@ To use:
# YouTube OAuth2 secret files, downloaded from Google Console.
OAUTH2_CLIENT_SECRET='path/to/oauth-client-secret.json'

# First day of the conference.
YEAR='2018'
MONTH='6'
DAY='1'

# Get talks list API.
URL='https://tw.pycon.org/2018/ccip/'


# ===== Followings are for playlist generation =====

# YouTube data v3 API key
YOUTUBE_API_KEY='YOUR_YOUTUBE_API_KEY'

# YouTube channel information
# You can provide CHANNEL_ID & PLAYLIST_TITLE
# or provide PLAYLIST_ID information is enough
CHANNEL_ID='YOUR_YOUTUBE_CHANNEL_ID'
PLAYLIST_TITLE='YOUR_YOUTUBE_PLAYLIST_TITLE'


# ===== Followings are for both actions =====

# First day of the conference.
YEAR='2020'
MONTH='9'
DAY='5'

* ``pipenv sync``

* ``pipenv run upload``
* ``pipenv run upload`` for uploading session videos

* ``pipenv run playlist`` for generating video playlist data


Troubleshooting
Expand Down
181 changes: 12 additions & 169 deletions vidpub/__main__.py
Original file line number Diff line number Diff line change
@@ -1,188 +1,31 @@
import argparse
import datetime
import functools
import io
import itertools
import json
import os
import pathlib
import string

import apiclient.http
import fuzzywuzzy.fuzz
import pytz
import requests
import tqdm

from .info import Conference, ConferenceInfoSource, Session

YOUTUBE_SCOPE = "https://www.googleapis.com/auth/youtube"
YOUTUBE_UPLOAD_SCOPE = "https://www.googleapis.com/auth/youtube.upload"

VIDEO_ROOT = pathlib.Path(os.environ["VIDEO_ROOT"]).resolve()
print(f"Reading video files from {VIDEO_ROOT}")

VIDEO_PATHS = list(
itertools.chain.from_iterable(
VIDEO_ROOT.glob(f"*{ext}") for ext in (".avi", ".mp4")
)
)
assert VIDEO_PATHS
print(f" {len(VIDEO_PATHS)} files loaded")

DONE_DIR_PATH = VIDEO_ROOT.joinpath("done")
DONE_DIR_PATH.mkdir(parents=True, exist_ok=True)

FIRST_DATE = datetime.date(
int(os.environ["YEAR"]), int(os.environ["MONTH"]), int(os.environ["DAY"])
)

CONFERENCE_NAME = f"PyCon Taiwan {FIRST_DATE.year}"

TIMEZONE_TAIPEI = pytz.timezone("Asia/Taipei")


def guess_language(s: str) -> str:
"""Guess language of a string.
The only two possible return values are `zh-hant` and `en`.
Nothing scientific, just a vaguely educated guess. If more than half of the
string is ASCII, probably English; othereise we assume it's Chinese.
"""
if sum(c in string.ascii_letters for c in s) > len(s) / 2:
return "en"
return "zh-hant"


def format_datetime_for_google(dt: datetime.datetime) -> str:
"""Format a datetime into ISO format for Google API.
Google API is wierdly strict on the format here. It REQUIRES exactly
three digits of milliseconds, and only accepts "Z" suffix (not +00:00),
so we need to roll our own formatting instead relying on `isoformat()`.
"""
return dt.astimezone(pytz.utc).strftime(r"%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"


def build_body(session: Session) -> dict:
title = session.render_video_title()

return {
"snippet": {
"title": title,
"description": session.render_video_description(),
"tags": [
session.conference.name,
"PyCon Taiwan",
"PyCon",
"Python",
],
"defaultAudioLanguage": session.lang,
"defaultLanguage": guess_language(title),
"categoryId": "28",
},
"status": {
"license": "creativeCommon",
"privacyStatus": "unlisted",
"publishAt": None,
},
"recordingDetails": {
"recordingDate": format_datetime_for_google(session.start)
},
}


def get_match_ratio(session: Session, path: pathlib.Path) -> float:
return fuzzywuzzy.fuzz.ratio(session.title, path.stem)


def choose_video(session: Session) -> pathlib.Path:
"""Look through the file list and choose the one that "looks most like it"."""
score, match = max((get_match_ratio(session, p), p) for p in VIDEO_PATHS)
if score < 70:
raise ValueError("no match")
return match


def build_youtube():
from apiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow

flow = InstalledAppFlow.from_client_secrets_file(
os.environ["OAUTH2_CLIENT_SECRET"], scopes=[YOUTUBE_UPLOAD_SCOPE]
)
credentials = flow.run_console()
return build("youtube", "v3", credentials=credentials)
from .upload_video import upload_video
from .generate_playlist import generate_playlist


def parse_args(argv):
parser = argparse.ArgumentParser()
parser.add_argument(
"--upload", action="store_true", help="Actually upload"
"-u", "--upload", action="store_true", help="Upload videos to YouTube channel"
)
parser.add_argument(
"-p", "--playlist", action="store_true", help="Generate playlist information in json files"
)
parser.add_argument(
"-o", "--output_dir", default="./videos", help="Output path of video information"
)
return parser.parse_args(argv)


def media_batch_reader(file_path, chuncksize=64 * (1 << 20)):
print(f"Reading Vedio from:\n\t{file_path}")
out = io.BytesIO()
total = file_path.stat().st_size // chuncksize
with open(file_path, "rb") as f:
for block in tqdm.tqdm(
functools.partial(f.read, chuncksize), total=total
):
out.write(block)
return out.getvalue()


def main(argv=None):
options = parse_args(argv)

if options.upload:
youtube = build_youtube()

source = ConferenceInfoSource(
requests.get(os.environ["URL"]).json(),
Conference(CONFERENCE_NAME, FIRST_DATE, TIMEZONE_TAIPEI),
)

for session in source.iter_sessions():
body = build_body(session)
try:
vid_path = choose_video(session)
except ValueError:
print(f"No match, ignoring {session.title}")
continue

print(f"Uploading {session.title}")
print(f" {vid_path}")
if not options.upload:
print(f"Would post: {json.dumps(body, indent=4)}\n")
continue

media = apiclient.http.MediaInMemoryUpload(
media_batch_reader(vid_path), resumable=True
)
request = youtube.videos().insert(
part=",".join(body.keys()), body=body, media_body=media
)

with tqdm.tqdm(total=100, ascii=True) as progressbar:
prev = 0
while True:
status, response = request.next_chunk()
if status:
curr = int(status.progress() * 100)
progressbar.update(curr - prev)
prev = curr
if response:
break
print(f" Done, as: https://youtu.be/{response['id']}")
upload_video()

new_name = DONE_DIR_PATH.joinpath(vid_path.name)
print(f" {vid_path} -> {new_name}")
vid_path.rename(new_name)
if options.playlist:
generate_playlist(options.output_dir)


if __name__ == "__main__":
Expand Down
Loading

0 comments on commit eb4572e

Please sign in to comment.