diff --git a/apps/cf/config/config.exs b/apps/cf/config/config.exs index a6a864dd..05b44a01 100644 --- a/apps/cf/config/config.exs +++ b/apps/cf/config/config.exs @@ -14,25 +14,6 @@ config :cf, soft_limitations_period: 15 * 60, hard_limitations_period: 3 * 60 * 60 -# Configure scheduler -config :cf, CF.Scheduler, - # Run only one instance across cluster - global: true, - debug_logging: false, - jobs: [ - # credo:disable-for-lines:10 - # Actions analysers - # Every minute - {"*/1 * * * *", {CF.Jobs.Reputation, :update, []}}, - # Every day - {"@daily", {CF.Jobs.Reputation, :reset_daily_limits, []}}, - # Every minute - {"*/1 * * * *", {CF.Jobs.Flags, :update, []}}, - # Various updaters - # Every 5 minutes - {"*/5 * * * *", {CF.Jobs.Moderation, :update, []}} - ] - # Configure mailer config :cf, CF.Mailer, adapter: Bamboo.MailgunAdapter diff --git a/apps/cf/config/test.exs b/apps/cf/config/test.exs index 306ecf23..1a6cb8b6 100644 --- a/apps/cf/config/test.exs +++ b/apps/cf/config/test.exs @@ -14,9 +14,6 @@ config :cf, # Print only warnings and errors during test config :logger, level: :warn -# Disable CRON tasks on test -config :cf, CF.Scheduler, jobs: [] - # Mails config :cf, CF.Mailer, adapter: Bamboo.TestAdapter diff --git a/apps/cf/lib/videos/captions_fetcher.ex b/apps/cf/lib/videos/captions_fetcher.ex index 649d294f..5e184f99 100644 --- a/apps/cf/lib/videos/captions_fetcher.ex +++ b/apps/cf/lib/videos/captions_fetcher.ex @@ -3,5 +3,6 @@ defmodule CF.Videos.CaptionsFetcher do Fetch captions for videos. """ - @callback fetch(DB.Schema.Video.t()) :: {:ok, DB.Schema.VideoCaption.t()} | {:error, binary()} + @callback fetch(DB.Schema.Video.t()) :: + {:ok, %{content: String.t(), format: String.t()}} | {:error, term()} end diff --git a/apps/cf/lib/videos/captions_fetcher_test.ex b/apps/cf/lib/videos/captions_fetcher_test.ex index 178e08ca..7b581593 100644 --- a/apps/cf/lib/videos/captions_fetcher_test.ex +++ b/apps/cf/lib/videos/captions_fetcher_test.ex @@ -7,7 +7,7 @@ defmodule CF.Videos.CaptionsFetcherTest do @impl true def fetch(_video) do - captions = %DB.Schema.VideoCaption{ + captions = %{ content: "__TEST-CONTENT__", format: "xml" } diff --git a/apps/cf/lib/videos/captions_fetcher_youtube.ex b/apps/cf/lib/videos/captions_fetcher_youtube.ex index 69666aa6..ffe421bf 100644 --- a/apps/cf/lib/videos/captions_fetcher_youtube.ex +++ b/apps/cf/lib/videos/captions_fetcher_youtube.ex @@ -5,34 +5,33 @@ defmodule CF.Videos.CaptionsFetcherYoutube do @behaviour CF.Videos.CaptionsFetcher + require Logger + + alias GoogleApi.YouTube.V3.Connection, as: YouTubeConnection + alias GoogleApi.YouTube.V3.Api.Captions, as: YouTubeCaptions + @impl true def fetch(%{youtube_id: youtube_id, language: language}) do - with {:ok, content} <- fetch_captions_content(youtube_id, language) do - captions = %DB.Schema.VideoCaption{ - content: content, - format: "xml" - } - - {:ok, captions} + if Application.get_env(:cf, :youtube_api_key) do + with {:ok, content} <- fetch_captions_content(youtube_id, language) do + captions = %{ + content: content, + format: "xml" + } + + {:ok, captions} + end + else + Logger.error("No YouTube API key provided") + {:error, "No YouTube API key provided"} end end defp fetch_captions_content(video_id, locale) do - case HTTPoison.get("http://video.google.com/timedtext?lang=#{locale}&v=#{video_id}") do - {:ok, %HTTPoison.Response{status_code: 200, body: ""}} -> - {:error, :not_found} - - {:ok, %HTTPoison.Response{status_code: 200, body: body}} -> - {:ok, body} - - {:ok, %HTTPoison.Response{status_code: 404}} -> - {:error, :not_found} - - {:ok, %HTTPoison.Response{status_code: _}} -> - {:error, :unknown} - - {:error, %HTTPoison.Error{reason: reason}} -> - {:error, reason} - end + # TODO: YouTube returns a 401: Request is missing required authentication credential. Expected OAuth 2 access token, login cookie or other valid authentication credential. + # Maybe the API key is invalid, or we need to use OAuth 2.0x + YouTubeConnection.new() + |> YouTubeCaptions.youtube_captions_download(video_id, tfmt: "srt") + |> IO.inspect() end end diff --git a/apps/cf/lib/videos/videos.ex b/apps/cf/lib/videos/videos.ex index 81ec17e2..3eae8b8b 100644 --- a/apps/cf/lib/videos/videos.ex +++ b/apps/cf/lib/videos/videos.ex @@ -3,6 +3,8 @@ defmodule CF.Videos do The boundary for the Videos system. """ + require Logger + import Ecto.Query, warn: false import CF.Videos.MetadataFetcher import CF.Videos.CaptionsFetcher @@ -167,12 +169,33 @@ defmodule CF.Videos do iex> download_captions(video) """ def download_captions(video = %Video{}) do - with {:ok, captions} <- @captions_fetcher.fetch(video) do - captions - |> VideoCaption.changeset(%{video_id: video.id}) - |> Repo.insert() - - {:ok, captions} + # Get latest captions + existing_captions = + VideoCaption + |> where([vc], vc.video_id == ^video.id) + |> order_by(desc: :inserted_at) + |> limit(1) + |> Repo.one() + + # Try to fetch new captions + case @captions_fetcher.fetch(video) do + {:ok, captions} -> + (existing_captions or %VideoCaption{}) + |> VideoCaption.changeset(Map.merge(captions, %{video_id: video.id})) + |> Repo.insert_or_update() + |> Kaur.Result.ok() + + # If no Youtube caption found, insert a dummy entry in DB to prevent retrying for 30 days + {:error, :not_found} -> + # TODO + # unless existing_captions do + # Repo.insert(%DB.Schema.VideoCaption{video_id: video.id, content: "", format: "xml"}) + # end + + {:error, :not_found} + + result -> + result end end diff --git a/apps/cf_jobs/config/config.exs b/apps/cf_jobs/config/config.exs index 310ab01c..10f187d9 100644 --- a/apps/cf_jobs/config/config.exs +++ b/apps/cf_jobs/config/config.exs @@ -8,6 +8,7 @@ config :cf_jobs, CF.Jobs.Scheduler, jobs: [ # Reputation update_reputations: [ + # every 20 minutes schedule: {:extended, "*/20"}, task: {CF.Jobs.Reputation, :update, []}, overlap: false @@ -19,21 +20,32 @@ config :cf_jobs, CF.Jobs.Scheduler, ], # Moderation update_moderation: [ + # every 5 minutes schedule: "*/5 * * * *", task: {CF.Jobs.Moderation, :update, []}, overlap: false ], # Flags update_flags: [ + # every minute schedule: "*/1 * * * *", task: {CF.Jobs.Flags, :update, []}, overlap: false ], # Notifications create_notifications: [ + # every 5 seconds schedule: {:extended, "*/5"}, task: {CF.Jobs.CreateNotifications, :update, []}, overlap: false + ], + # Captions + download_captions: [ + schedule: {:extended, "*/10"}, + # every 10 minutes + # TODO: schedule: "*/10 * * * *", + task: {CF.Jobs.DownloadCaptions, :update, []}, + overlap: false ] ] diff --git a/apps/cf_jobs/lib/application.ex b/apps/cf_jobs/lib/application.ex index 277aae33..b1e2853d 100644 --- a/apps/cf_jobs/lib/application.ex +++ b/apps/cf_jobs/lib/application.ex @@ -16,7 +16,8 @@ defmodule CF.Jobs.Application do worker(CF.Jobs.Reputation, []), worker(CF.Jobs.Flags, []), worker(CF.Jobs.Moderation, []), - worker(CF.Jobs.CreateNotifications, []) + worker(CF.Jobs.CreateNotifications, []), + worker(CF.Jobs.DownloadCaptions, []) ] # Do not start scheduler in tests diff --git a/apps/cf_jobs/lib/jobs/download_captions.ex b/apps/cf_jobs/lib/jobs/download_captions.ex new file mode 100644 index 00000000..feeace97 --- /dev/null +++ b/apps/cf_jobs/lib/jobs/download_captions.ex @@ -0,0 +1,66 @@ +defmodule CF.Jobs.DownloadCaptions do + @behaviour CF.Jobs.Job + + require Logger + import Ecto.Query + import ScoutApm.Tracing + + alias DB.Repo + alias DB.Schema.UserAction + alias DB.Schema.Video + alias DB.Schema.VideoCaption + alias DB.Schema.UsersActionsReport + + alias CF.Jobs.ReportManager + + @name :download_captions + @analyser_id UsersActionsReport.analyser_id(@name) + + # --- Client API --- + + def name, do: @name + + def start_link() do + GenServer.start_link(__MODULE__, :ok, name: __MODULE__) + end + + def init(args) do + {:ok, args} + end + + # 2 minutes + @timeout 120_000 + def update() do + GenServer.call(__MODULE__, :download_captions, @timeout) + end + + # --- Server callbacks --- + @transaction_opts [type: "background", name: "download_captions"] + def handle_call(:download_captions, _from, _state) do + get_videos() + |> Enum.map(fn video -> + Logger.info("Downloading captions for video #{video.id}") + CF.Videos.download_captions(video) + end) + + {:reply, :ok, :ok} + end + + # Get all videos that need new captions. We fetch new captions: + # - For any videos that doesn't have any captions yet + # - For videos whose captions haven't been updated in the last 30 days + defp get_videos() do + Repo.all( + from(v in Video, + limit: 15, + left_join: captions in VideoCaption, + on: captions.video_id == v.id, + where: + is_nil(captions.id) or + captions.inserted_at < ^DateTime.add(DateTime.utc_now(), -30 * 24 * 60 * 60, :second), + group_by: v.id, + order_by: [desc: v.inserted_at] + ) + ) + end +end diff --git a/apps/db/lib/db_schema/users_actions_report.ex b/apps/db/lib/db_schema/users_actions_report.ex index 2abef0d0..6cd958b1 100644 --- a/apps/db/lib/db_schema/users_actions_report.ex +++ b/apps/db/lib/db_schema/users_actions_report.ex @@ -34,6 +34,7 @@ defmodule DB.Schema.UsersActionsReport do def analyser_id(:achievements), do: 3 def analyser_id(:votes), do: 4 def analyser_id(:create_notifications), do: 5 + def analyser_id(:download_captions), do: 6 def status(:pending), do: 1 def status(:running), do: 2