Skip to content

Commit

Permalink
feat: Add job to download captions
Browse files Browse the repository at this point in the history
  • Loading branch information
Betree committed Jun 17, 2024
1 parent af7d1df commit 3c2ec3c
Show file tree
Hide file tree
Showing 10 changed files with 135 additions and 54 deletions.
19 changes: 0 additions & 19 deletions apps/cf/config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,6 @@ config :cf,
soft_limitations_period: 15 * 60,
hard_limitations_period: 3 * 60 * 60

# Configure scheduler
config :cf, CF.Scheduler,
# Run only one instance across cluster
global: true,
debug_logging: false,
jobs: [
# credo:disable-for-lines:10
# Actions analysers
# Every minute
{"*/1 * * * *", {CF.Jobs.Reputation, :update, []}},
# Every day
{"@daily", {CF.Jobs.Reputation, :reset_daily_limits, []}},
# Every minute
{"*/1 * * * *", {CF.Jobs.Flags, :update, []}},
# Various updaters
# Every 5 minutes
{"*/5 * * * *", {CF.Jobs.Moderation, :update, []}}
]

# Configure mailer
config :cf, CF.Mailer, adapter: Bamboo.MailgunAdapter

Expand Down
3 changes: 0 additions & 3 deletions apps/cf/config/test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ config :cf,
# Print only warnings and errors during test
config :logger, level: :warn

# Disable CRON tasks on test
config :cf, CF.Scheduler, jobs: []

# Mails
config :cf, CF.Mailer, adapter: Bamboo.TestAdapter

Expand Down
3 changes: 2 additions & 1 deletion apps/cf/lib/videos/captions_fetcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ defmodule CF.Videos.CaptionsFetcher do
Fetch captions for videos.
"""

@callback fetch(DB.Schema.Video.t()) :: {:ok, DB.Schema.VideoCaption.t()} | {:error, binary()}
@callback fetch(DB.Schema.Video.t()) ::
{:ok, %{content: String.t(), format: String.t()}} | {:error, term()}
end
2 changes: 1 addition & 1 deletion apps/cf/lib/videos/captions_fetcher_test.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ defmodule CF.Videos.CaptionsFetcherTest do

@impl true
def fetch(_video) do
captions = %DB.Schema.VideoCaption{
captions = %{
content: "__TEST-CONTENT__",
format: "xml"
}
Expand Down
45 changes: 22 additions & 23 deletions apps/cf/lib/videos/captions_fetcher_youtube.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,34 +5,33 @@ defmodule CF.Videos.CaptionsFetcherYoutube do

@behaviour CF.Videos.CaptionsFetcher

require Logger

alias GoogleApi.YouTube.V3.Connection, as: YouTubeConnection
alias GoogleApi.YouTube.V3.Api.Captions, as: YouTubeCaptions

@impl true
def fetch(%{youtube_id: youtube_id, language: language}) do
with {:ok, content} <- fetch_captions_content(youtube_id, language) do
captions = %DB.Schema.VideoCaption{
content: content,
format: "xml"
}

{:ok, captions}
if Application.get_env(:cf, :youtube_api_key) do
with {:ok, content} <- fetch_captions_content(youtube_id, language) do
captions = %{
content: content,
format: "xml"
}

{:ok, captions}
end
else
Logger.error("No YouTube API key provided")
{:error, "No YouTube API key provided"}
end
end

defp fetch_captions_content(video_id, locale) do
case HTTPoison.get("http://video.google.com/timedtext?lang=#{locale}&v=#{video_id}") do
{:ok, %HTTPoison.Response{status_code: 200, body: ""}} ->
{:error, :not_found}

{:ok, %HTTPoison.Response{status_code: 200, body: body}} ->
{:ok, body}

{:ok, %HTTPoison.Response{status_code: 404}} ->
{:error, :not_found}

{:ok, %HTTPoison.Response{status_code: _}} ->
{:error, :unknown}

{:error, %HTTPoison.Error{reason: reason}} ->
{:error, reason}
end
# TODO: YouTube returns a 401: Request is missing required authentication credential. Expected OAuth 2 access token, login cookie or other valid authentication credential.
# Maybe the API key is invalid, or we need to use OAuth 2.0x
YouTubeConnection.new()
|> YouTubeCaptions.youtube_captions_download(video_id, tfmt: "srt")
|> IO.inspect()
end
end
35 changes: 29 additions & 6 deletions apps/cf/lib/videos/videos.ex
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ defmodule CF.Videos do
The boundary for the Videos system.
"""

require Logger

import Ecto.Query, warn: false
import CF.Videos.MetadataFetcher
import CF.Videos.CaptionsFetcher
Expand Down Expand Up @@ -167,12 +169,33 @@ defmodule CF.Videos do
iex> download_captions(video)
"""
def download_captions(video = %Video{}) do
with {:ok, captions} <- @captions_fetcher.fetch(video) do
captions
|> VideoCaption.changeset(%{video_id: video.id})
|> Repo.insert()

{:ok, captions}
# Get latest captions
existing_captions =
VideoCaption
|> where([vc], vc.video_id == ^video.id)
|> order_by(desc: :inserted_at)
|> limit(1)
|> Repo.one()

# Try to fetch new captions
case @captions_fetcher.fetch(video) do
{:ok, captions} ->
(existing_captions or %VideoCaption{})
|> VideoCaption.changeset(Map.merge(captions, %{video_id: video.id}))
|> Repo.insert_or_update()
|> Kaur.Result.ok()

# If no Youtube caption found, insert a dummy entry in DB to prevent retrying for 30 days
{:error, :not_found} ->
# TODO
# unless existing_captions do
# Repo.insert(%DB.Schema.VideoCaption{video_id: video.id, content: "", format: "xml"})
# end

{:error, :not_found}

result ->
result
end
end

Expand Down
12 changes: 12 additions & 0 deletions apps/cf_jobs/config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ config :cf_jobs, CF.Jobs.Scheduler,
jobs: [
# Reputation
update_reputations: [
# every 20 minutes
schedule: {:extended, "*/20"},
task: {CF.Jobs.Reputation, :update, []},
overlap: false
Expand All @@ -19,21 +20,32 @@ config :cf_jobs, CF.Jobs.Scheduler,
],
# Moderation
update_moderation: [
# every 5 minutes
schedule: "*/5 * * * *",
task: {CF.Jobs.Moderation, :update, []},
overlap: false
],
# Flags
update_flags: [
# every minute
schedule: "*/1 * * * *",
task: {CF.Jobs.Flags, :update, []},
overlap: false
],
# Notifications
create_notifications: [
# every 5 seconds
schedule: {:extended, "*/5"},
task: {CF.Jobs.CreateNotifications, :update, []},
overlap: false
],
# Captions
download_captions: [
schedule: {:extended, "*/10"},
# every 10 minutes
# TODO: schedule: "*/10 * * * *",
task: {CF.Jobs.DownloadCaptions, :update, []},
overlap: false
]
]

Expand Down
3 changes: 2 additions & 1 deletion apps/cf_jobs/lib/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ defmodule CF.Jobs.Application do
worker(CF.Jobs.Reputation, []),
worker(CF.Jobs.Flags, []),
worker(CF.Jobs.Moderation, []),
worker(CF.Jobs.CreateNotifications, [])
worker(CF.Jobs.CreateNotifications, []),
worker(CF.Jobs.DownloadCaptions, [])
]

# Do not start scheduler in tests
Expand Down
66 changes: 66 additions & 0 deletions apps/cf_jobs/lib/jobs/download_captions.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
defmodule CF.Jobs.DownloadCaptions do
@behaviour CF.Jobs.Job

require Logger
import Ecto.Query
import ScoutApm.Tracing

alias DB.Repo
alias DB.Schema.UserAction
alias DB.Schema.Video
alias DB.Schema.VideoCaption
alias DB.Schema.UsersActionsReport

alias CF.Jobs.ReportManager

@name :download_captions
@analyser_id UsersActionsReport.analyser_id(@name)

# --- Client API ---

def name, do: @name

def start_link() do
GenServer.start_link(__MODULE__, :ok, name: __MODULE__)
end

def init(args) do
{:ok, args}
end

# 2 minutes
@timeout 120_000
def update() do
GenServer.call(__MODULE__, :download_captions, @timeout)
end

# --- Server callbacks ---
@transaction_opts [type: "background", name: "download_captions"]
def handle_call(:download_captions, _from, _state) do
get_videos()
|> Enum.map(fn video ->
Logger.info("Downloading captions for video #{video.id}")
CF.Videos.download_captions(video)
end)

{:reply, :ok, :ok}
end

# Get all videos that need new captions. We fetch new captions:
# - For any videos that doesn't have any captions yet
# - For videos whose captions haven't been updated in the last 30 days
defp get_videos() do
Repo.all(
from(v in Video,
limit: 15,
left_join: captions in VideoCaption,
on: captions.video_id == v.id,
where:
is_nil(captions.id) or
captions.inserted_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second),
group_by: v.id,
order_by: [desc: v.inserted_at]
)
)
end
end
1 change: 1 addition & 0 deletions apps/db/lib/db_schema/users_actions_report.ex
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ defmodule DB.Schema.UsersActionsReport do
def analyser_id(:achievements), do: 3
def analyser_id(:votes), do: 4
def analyser_id(:create_notifications), do: 5
def analyser_id(:download_captions), do: 6

def status(:pending), do: 1
def status(:running), do: 2
Expand Down

0 comments on commit 3c2ec3c

Please sign in to comment.