diff --git a/core/config/config.exs b/core/config/config.exs index df9b6aa4..08755ad2 100644 --- a/core/config/config.exs +++ b/core/config/config.exs @@ -60,6 +60,7 @@ config :canary, Oban, default: 10, github_processor: 2, webpage_processor: 10, + openapi_processor: 10, email: 10 ], repo: Canary.Repo, diff --git a/core/lib/canary/application.ex b/core/lib/canary/application.ex index a8c5705c..54a9a878 100644 --- a/core/lib/canary/application.ex +++ b/core/lib/canary/application.ex @@ -10,6 +10,7 @@ defmodule Canary.Application do attach_oban_telemetry() :ok = Canary.Index.Collection.ensure(:webpage) + :ok = Canary.Index.Collection.ensure(:openapi) :ok = Canary.Index.Collection.ensure(:github_issue) :ok = Canary.Index.Collection.ensure(:github_discussion) :ok = Canary.Index.Stopword.ensure() diff --git a/core/lib/canary/index/collection.ex b/core/lib/canary/index/collection.ex index 7b6b42b4..fc39c710 100644 --- a/core/lib/canary/index/collection.ex +++ b/core/lib/canary/index/collection.ex @@ -1,5 +1,5 @@ defmodule Canary.Index.Collection do - def ensure(name) when name in [:webpage, :github_issue, :github_discussion] do + def ensure(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do with {:error, _} <- Canary.Index.Client.get_collection(name), {:error, _} <- Canary.Index.Client.create_collection(name, fields(name)) do :error @@ -8,7 +8,7 @@ defmodule Canary.Index.Collection do end end - defp fields(name) when name in [:webpage, :github_issue, :github_discussion] do + defp fields(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do # https://typesense.org/docs/27.0/api/collections.html#indexing-all-but-some-fields shared = [ %{name: "source_id", type: "string"}, @@ -26,6 +26,15 @@ defmodule Canary.Index.Collection do %{name: "content", type: "string", stem: true} ] + :openapi -> + [ + %{name: "path", type: "string", stem: true}, + %{name: "get", type: "string", stem: true, optional: true}, + %{name: "post", type: "string", stem: true, optional: true}, + %{name: "put", type: "string", stem: true, optional: true}, + %{name: "delete", type: "string", stem: true, optional: true} + ] + :github_issue -> [ %{name: "title", type: "string", stem: true}, diff --git a/core/lib/canary/index/document.ex b/core/lib/canary/index/document.ex index 8988bac0..e50ee87b 100644 --- a/core/lib/canary/index/document.ex +++ b/core/lib/canary/index/document.ex @@ -33,6 +33,20 @@ defmodule Canary.Index.Document.Webpage.Meta do defstruct Shared.meta_fields() end +defmodule Canary.Index.Document.OpenAPI do + alias Canary.Index.Document.Shared + + @derive Jason.Encoder + defstruct Shared.top_level_fields() ++ [:path, :get, :post, :put, :delete] +end + +defmodule Canary.Index.Document.OpenAPI.Meta do + alias Canary.Index.Document.Shared + + @derive Jason.Encoder + defstruct Shared.meta_fields() +end + defmodule Canary.Index.Document.GithubIssue do alias Canary.Index.Document.Shared diff --git a/core/lib/canary/index/index.ex b/core/lib/canary/index/index.ex index 6effeb4e..557138a0 100644 --- a/core/lib/canary/index/index.ex +++ b/core/lib/canary/index/index.ex @@ -1,6 +1,7 @@ defmodule Canary.Index do alias Canary.Sources.Source alias Canary.Sources.Webpage + alias Canary.Sources.OpenAPI alias Canary.Sources.GithubIssue alias Canary.Sources.GithubDiscussion @@ -29,6 +30,29 @@ defmodule Canary.Index do Client.index_document(:webpage, doc) end + def insert_document(%OpenAPI.Chunk{} = chunk) do + meta = %Document.OpenAPI.Meta{ + url: chunk.url, + document_id: chunk.document_id, + is_parent: false + } + + doc = %Document.OpenAPI{ + id: chunk.index_id, + source_id: chunk.source_id, + path: chunk.path, + get: chunk.get, + post: chunk.post, + put: chunk.put, + delete: chunk.delete, + tags: [], + is_empty_tags: true, + meta: meta + } + + Client.index_document(:openapi, doc) + end + def insert_document(%GithubIssue.Chunk{} = chunk) do meta = %Document.GithubIssue.Meta{ url: chunk.url, @@ -72,6 +96,7 @@ defmodule Canary.Index do def delete_document(source_type, id) when source_type in [ :webpage, + :openapi, :github_issue, :github_discussion ] do @@ -126,29 +151,41 @@ defmodule Canary.Index do |> Enum.reject(&is_nil/1) |> Enum.join(" && ") - query_by = ["title", "content"] |> Enum.join(",") - query_by_weights = [3, 1] |> Enum.join(",") - %{ collection: to_string(type), q: query, prefix: true, - query_by: query_by, - query_by_weights: query_by_weights, filter_by: filter_by, sort_by: "_text_match:desc", - highlight_fields: "content", stopwords: Canary.Index.Stopword.id(), prioritize_exact_match: true, prioritize_token_position: false, prioritize_num_matching_fields: false, max_candidates: 4 * 4 } - |> add_embedding_args(opts) + |> handle_source_type(type) + |> handle_embedding(opts) end) end - defp add_embedding_args(args, opts) do + defp handle_source_type(args, type) + when type in [:webpage, :github_issue, :github_discussion] do + args + |> Map.put(:highlight_fields, "content") + |> Map.put(:query_by, Enum.join(["title", "content"], ",")) + |> Map.put(:query_by_weights, Enum.join([3, 1], ",")) + end + + defp handle_source_type(args, type) when type in [:openapi] do + ops = ["get", "post", "put", "delete"] + + args + |> Map.put(:highlight_fields, Enum.join(ops, ",")) + |> Map.put(:query_by, Enum.join(["path"] ++ ops, ",")) + |> Map.put(:query_by_weights, Enum.join([1, 2, 2, 2, 2], ",")) + end + + defp handle_embedding(args, opts) do embedding = opts[:embedding] embedding_alpha = opts[:embedding_alpha] || 0.3 diff --git a/core/lib/canary/searcher.ex b/core/lib/canary/searcher.ex index 2ee955e2..96f0c34c 100644 --- a/core/lib/canary/searcher.ex +++ b/core/lib/canary/searcher.ex @@ -108,6 +108,9 @@ defmodule Canary.Searcher.Default do :webpage -> %{} + :openapi -> + %{} + :github_issue -> %{closed: doc.meta.value.closed} diff --git a/core/lib/canary/sources/document/create_openapi.ex b/core/lib/canary/sources/document/create_openapi.ex new file mode 100644 index 00000000..3aab12d3 --- /dev/null +++ b/core/lib/canary/sources/document/create_openapi.ex @@ -0,0 +1,90 @@ +defmodule Canary.Sources.Document.CreateOpenAPI do + use Ash.Resource.Change + + alias Canary.Sources.Document + alias Canary.Sources.OpenAPI + + @impl true + def init(opts) do + if [ + :source_id_argument, + :fetcher_result_argument, + :chunks_attribute, + :meta_attribute + ] + |> Enum.any?(&is_nil(opts[&1])) do + :error + else + {:ok, opts} + end + end + + @impl true + def change(changeset, opts, _context) do + source_id = Ash.Changeset.get_argument(changeset, opts[:source_id_argument]) + + %OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema, served_url: served_url} = + Ash.Changeset.get_argument(changeset, opts[:fetcher_result_argument]) + + changeset + |> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%OpenAPI.DocumentMeta{})) + |> Ash.Changeset.change_attribute(opts[:chunks_attribute], []) + |> Ash.Changeset.after_action(fn _, record -> + hash = + schema + |> Jason.encode!() + |> then(&:crypto.hash(:sha256, &1)) + |> Base.encode16(case: :lower) + + chunks_create_result = + schema.paths + |> Enum.map(fn + {path, %OpenApiSpex.PathItem{get: get, post: post, put: put, delete: delete}} -> + %{ + source_id: source_id, + document_id: record.id, + url: render_url(served_url, path), + path: path, + get: render_operation(get), + post: render_operation(post), + put: render_operation(put), + delete: render_operation(delete) + } + end) + |> Ash.bulk_create(OpenAPI.Chunk, :create, + return_errors?: true, + return_records?: true + ) + + meta = %OpenAPI.DocumentMeta{hash: hash} + + case chunks_create_result do + %Ash.BulkResult{status: :success, records: records} -> + case Document.update(record, wrap_union(meta), Enum.map(records, &wrap_union/1)) do + {:ok, updated_record} -> {:ok, updated_record} + error -> error + end + + %Ash.BulkResult{errors: errors} -> + {:error, errors} + end + end) + end + + defp render_url(base_url, path) do + URI.parse(base_url) + |> Map.put(:fragment, ":~:text=#{path}") + |> URI.to_string() + end + + defp render_operation(nil), do: nil + + defp render_operation(%OpenApiSpex.Operation{} = op) do + [op.summary, op.description, op.operationId] + |> Enum.reject(&is_nil/1) + |> Enum.join(" / ") + end + + defp wrap_union(%Ash.Union{} = v), do: v + defp wrap_union(v), do: %Ash.Union{type: :openapi, value: v} +end diff --git a/core/lib/canary/sources/document/document.ex b/core/lib/canary/sources/document/document.ex index 2011b880..2f381d02 100644 --- a/core/lib/canary/sources/document/document.ex +++ b/core/lib/canary/sources/document/document.ex @@ -46,6 +46,21 @@ defmodule Canary.Sources.Document do } end + create :create_openapi do + argument :source_id, :uuid, allow_nil?: false + argument :fetcher_result, :map, allow_nil?: false + + change manage_relationship(:source_id, :source, type: :append) + + change { + Canary.Sources.Document.CreateOpenAPI, + source_id_argument: :source_id, + fetcher_result_argument: :fetcher_result, + chunks_attribute: :chunks, + meta_attribute: :meta + } + end + create :create_github_issue do argument :source_id, :uuid, allow_nil?: false argument :fetcher_results, {:array, :map}, allow_nil?: false diff --git a/core/lib/canary/sources/openapi_chunk.ex b/core/lib/canary/sources/openapi_chunk.ex new file mode 100644 index 00000000..091ae9bf --- /dev/null +++ b/core/lib/canary/sources/openapi_chunk.ex @@ -0,0 +1,37 @@ +defmodule Canary.Sources.OpenAPI.Chunk do + use Ash.Resource, data_layer: :embedded + + @ops [:get, :post, :put, :delete] + + attributes do + attribute :index_id, :uuid, allow_nil?: false + attribute :source_id, :uuid, allow_nil?: false + attribute :document_id, :string, allow_nil?: false + + attribute :url, :string, allow_nil?: false + attribute :path, :string, allow_nil?: false + + Enum.each(@ops, fn op -> + attribute op, :string, allow_nil?: true + end) + end + + actions do + defaults [:read] + + create :create do + primary? true + accept [:index_id, :source_id, :document_id] ++ [:url, :path] ++ @ops + change {Canary.Change.AddToIndex, index_id_attribute: :index_id} + end + + destroy :destroy do + primary? true + + change { + Canary.Change.RemoveFromIndex, + source_type: :openapi, index_id_attribute: :index_id + } + end + end +end diff --git a/core/lib/canary/sources/openapi_config.ex b/core/lib/canary/sources/openapi_config.ex new file mode 100644 index 00000000..9f576784 --- /dev/null +++ b/core/lib/canary/sources/openapi_config.ex @@ -0,0 +1,26 @@ +defmodule Canary.Sources.OpenAPI.Config do + use Ash.Resource, data_layer: :embedded + + attributes do + attribute :source_url, :string, allow_nil?: false + attribute :served_url, :string, allow_nil?: false + + attribute :served_as, :atom, + constraints: [one_of: [:swagger, :redoc, :rapi]], + allow_nil?: true + end + + actions do + defaults [:read] + + create :create do + primary? true + accept [:source_url, :served_url, :served_as] + end + + update :update do + primary? true + accept [:source_url, :served_url, :served_as] + end + end +end diff --git a/core/lib/canary/sources/openapi_document_meta.ex b/core/lib/canary/sources/openapi_document_meta.ex new file mode 100644 index 00000000..9768ec00 --- /dev/null +++ b/core/lib/canary/sources/openapi_document_meta.ex @@ -0,0 +1,21 @@ +defmodule Canary.Sources.OpenAPI.DocumentMeta do + use Ash.Resource, data_layer: :embedded + + attributes do + attribute :hash, :string, allow_nil?: false + end + + actions do + defaults [:read, :destroy] + + create :create do + primary? true + accept [:hash] + end + + update :update do + primary? true + accept [:hash] + end + end +end diff --git a/core/lib/canary/sources/openapi_fetcher.ex b/core/lib/canary/sources/openapi_fetcher.ex new file mode 100644 index 00000000..1119fea1 --- /dev/null +++ b/core/lib/canary/sources/openapi_fetcher.ex @@ -0,0 +1,25 @@ +defmodule Canary.Sources.OpenAPI.FetcherResult do + defstruct [:schema, :served_url, :served_as] + + @type t :: %__MODULE__{ + schema: OpenApiSpex.OpenApi.t(), + served_url: String.t(), + served_as: atom() + } +end + +defmodule Canary.Sources.OpenAPI.Fetcher do + alias Canary.Sources.OpenAPI + + def run(%OpenAPI.Config{} = config) do + with {:ok, %Req.Response{status: 200, body: map}} <- Req.get(config.source_url), + schema = OpenApiSpex.schema_from_map(map) do + {:ok, + %OpenAPI.FetcherResult{ + schema: schema, + served_url: config.source_url, + served_as: config.served_as + }} + end + end +end diff --git a/core/lib/canary/sources/openapi_syncer.ex b/core/lib/canary/sources/openapi_syncer.ex new file mode 100644 index 00000000..771832af --- /dev/null +++ b/core/lib/canary/sources/openapi_syncer.ex @@ -0,0 +1,56 @@ +defmodule Canary.Sources.OpenAPI.Syncer do + alias Canary.Sources.Document + alias Canary.Sources.OpenAPI + + require Ash.Query + + def run(source_id, %OpenAPI.FetcherResult{} = incomings) do + existing_doc = + Document + |> Ash.Query.filter(source_id == ^source_id) + |> Ash.Query.build(select: [:id, :meta]) + |> Ash.read!() + |> Enum.at(0, nil) + + if hash_eq?(existing_doc, incomings) do + :ok + else + create_changeset = + Ash.Changeset.for_create(Document, :create_openapi, %{ + source_id: source_id, + fetcher_result: incomings + }) + + with {:ok, %{id: id}} <- Ash.create(create_changeset), + :ok <- remove_docs(source_id, exclude_id: id) do + :ok + end + end + end + + defp remove_docs(source_id, opts) do + exclude_id = opts[:exclude_id] || "" + + case Document + |> Ash.Query.filter(source_id == ^source_id and id != ^exclude_id) + |> Ash.bulk_destroy(:destroy, %{}, return_errors?: true) do + %Ash.BulkResult{status: :success} -> :ok + %Ash.BulkResult{errors: errors} -> {:error, errors} + end + end + + defp hash_eq?(nil, _), do: false + + defp hash_eq?( + %Document{meta: %Ash.Union{type: :openapi, value: %OpenAPI.DocumentMeta{hash: hash_a}}}, + %OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema} + ) do + hash_b = + schema + |> Jason.encode!() + |> then(&:crypto.hash(:sha256, &1)) + |> Base.encode16(case: :lower) + + hash_a == hash_b + end +end diff --git a/core/lib/canary/sources/source.ex b/core/lib/canary/sources/source.ex index 306e92dd..3d6ddd05 100644 --- a/core/lib/canary/sources/source.ex +++ b/core/lib/canary/sources/source.ex @@ -111,6 +111,7 @@ defmodule Canary.Sources.Source do worker = case type do :webpage -> Canary.Workers.WebpageProcessor + :openapi -> Canary.Workers.OpenAPIProcessor :github_issue -> Canary.Workers.GithubIssueProcessor :github_discussion -> Canary.Workers.GithubDiscussionProcessor end @@ -151,6 +152,9 @@ defmodule Canary.Sources.Source do :webpage -> Canary.Workers.WebpageProcessor.new(%{source_id: source_id}) + :openapi -> + Canary.Workers.OpenAPIProcessor.new(%{source_id: source_id}) + :github_issue -> Canary.Workers.GithubIssueProcessor.new(%{source_id: source_id}) diff --git a/core/lib/canary/workers/job_reporter.ex b/core/lib/canary/workers/job_reporter.ex index 3a4ff73d..a3fdd201 100644 --- a/core/lib/canary/workers/job_reporter.ex +++ b/core/lib/canary/workers/job_reporter.ex @@ -6,6 +6,7 @@ defmodule Canary.Workers.JobReporter do @processors Enum.map( [ Workers.WebpageProcessor, + Workers.OpenAPIProcessor, Workers.GithubIssueProcessor, Workers.GithubDiscussionProcessor ], diff --git a/core/lib/canary/workers/openapi_processor.ex b/core/lib/canary/workers/openapi_processor.ex new file mode 100644 index 00000000..acd784e2 --- /dev/null +++ b/core/lib/canary/workers/openapi_processor.ex @@ -0,0 +1,28 @@ +defmodule Canary.Workers.OpenAPIProcessor do + use Oban.Worker, + queue: :openapi_processor, + max_attempts: 2, + unique: [ + period: if(Application.get_env(:canary, :env) == :prod, do: 24 * 60 * 60, else: 10), + fields: [:worker, :queue, :args], + states: Oban.Job.states() -- [:discarded, :cancelled], + timestamp: :scheduled_at + ] + + alias Canary.Sources.Source + alias Canary.Sources.OpenAPI + + @impl true + def perform(%Oban.Job{args: %{"source_id" => id}}) do + case Ash.get(Source, id) do + {:error, _} -> :ok + {:ok, source} -> process(source) + end + end + + defp process(%Source{id: source_id, config: %Ash.Union{type: :openapi, value: config}}) do + with {:ok, %OpenAPI.FetcherResult{} = incomings} = OpenAPI.Fetcher.run(config) do + OpenAPI.Syncer.run(source_id, incomings) + end + end +end diff --git a/core/lib/canary_web/live/source_live/create.ex b/core/lib/canary_web/live/source_live/create.ex index ba27d07a..7c0164b6 100644 --- a/core/lib/canary_web/live/source_live/create.ex +++ b/core/lib/canary_web/live/source_live/create.ex @@ -4,10 +4,17 @@ defmodule CanaryWeb.SourceLive.Create do @config_types [ {"Webpage", "webpage"}, + {"OpenAPI", "openapi"}, {"Github Issue", "github_issue"}, {"Github Discussion", "github_discussion"} ] + @openapi_types [ + {"Swagger", "swagger"}, + {"Redoc", "redoc"}, + {"Rapi", "rapi"} + ] + @impl true def render(assigns) do ~H""" @@ -48,6 +55,28 @@ defmodule CanaryWeb.SourceLive.Create do form_control={%{label: "URL"}} is_full_width /> + <% "openapi" -> %> + + + <% "github_issue" -> %> assign(assigns) |> assign(:config_types, @config_types) + |> assign(:openapi_types, @openapi_types) form = Canary.Sources.Source diff --git a/core/lib/canary_web/live/source_live/detail.ex b/core/lib/canary_web/live/source_live/detail.ex index 41ed5d2d..fd577ab0 100644 --- a/core/lib/canary_web/live/source_live/detail.ex +++ b/core/lib/canary_web/live/source_live/detail.ex @@ -36,6 +36,7 @@ defmodule CanaryWeb.SourceLive.Detail do value={ case @source.config.type do :webpage -> "Webpage" + :openapi -> "OpenAPI" :github_issue -> "Github Issue" :github_discussion -> "Github Discussion" end @@ -158,6 +159,32 @@ defmodule CanaryWeb.SourceLive.Detail do + <% :openapi -> %> + + + <% :github_issue -> %>