diff --git a/core/config/config.exs b/core/config/config.exs
index df9b6aa4..08755ad2 100644
--- a/core/config/config.exs
+++ b/core/config/config.exs
@@ -60,6 +60,7 @@ config :canary, Oban,
default: 10,
github_processor: 2,
webpage_processor: 10,
+ openapi_processor: 10,
email: 10
],
repo: Canary.Repo,
diff --git a/core/lib/canary/application.ex b/core/lib/canary/application.ex
index a8c5705c..54a9a878 100644
--- a/core/lib/canary/application.ex
+++ b/core/lib/canary/application.ex
@@ -10,6 +10,7 @@ defmodule Canary.Application do
attach_oban_telemetry()
:ok = Canary.Index.Collection.ensure(:webpage)
+ :ok = Canary.Index.Collection.ensure(:openapi)
:ok = Canary.Index.Collection.ensure(:github_issue)
:ok = Canary.Index.Collection.ensure(:github_discussion)
:ok = Canary.Index.Stopword.ensure()
diff --git a/core/lib/canary/index/collection.ex b/core/lib/canary/index/collection.ex
index 7b6b42b4..fc39c710 100644
--- a/core/lib/canary/index/collection.ex
+++ b/core/lib/canary/index/collection.ex
@@ -1,5 +1,5 @@
defmodule Canary.Index.Collection do
- def ensure(name) when name in [:webpage, :github_issue, :github_discussion] do
+ def ensure(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
with {:error, _} <- Canary.Index.Client.get_collection(name),
{:error, _} <- Canary.Index.Client.create_collection(name, fields(name)) do
:error
@@ -8,7 +8,7 @@ defmodule Canary.Index.Collection do
end
end
- defp fields(name) when name in [:webpage, :github_issue, :github_discussion] do
+ defp fields(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
# https://typesense.org/docs/27.0/api/collections.html#indexing-all-but-some-fields
shared = [
%{name: "source_id", type: "string"},
@@ -26,6 +26,15 @@ defmodule Canary.Index.Collection do
%{name: "content", type: "string", stem: true}
]
+ :openapi ->
+ [
+ %{name: "path", type: "string", stem: true},
+ %{name: "get", type: "string", stem: true, optional: true},
+ %{name: "post", type: "string", stem: true, optional: true},
+ %{name: "put", type: "string", stem: true, optional: true},
+ %{name: "delete", type: "string", stem: true, optional: true}
+ ]
+
:github_issue ->
[
%{name: "title", type: "string", stem: true},
diff --git a/core/lib/canary/index/document.ex b/core/lib/canary/index/document.ex
index 8988bac0..e50ee87b 100644
--- a/core/lib/canary/index/document.ex
+++ b/core/lib/canary/index/document.ex
@@ -33,6 +33,20 @@ defmodule Canary.Index.Document.Webpage.Meta do
defstruct Shared.meta_fields()
end
+defmodule Canary.Index.Document.OpenAPI do
+ alias Canary.Index.Document.Shared
+
+ @derive Jason.Encoder
+ defstruct Shared.top_level_fields() ++ [:path, :get, :post, :put, :delete]
+end
+
+defmodule Canary.Index.Document.OpenAPI.Meta do
+ alias Canary.Index.Document.Shared
+
+ @derive Jason.Encoder
+ defstruct Shared.meta_fields()
+end
+
defmodule Canary.Index.Document.GithubIssue do
alias Canary.Index.Document.Shared
diff --git a/core/lib/canary/index/index.ex b/core/lib/canary/index/index.ex
index 6effeb4e..557138a0 100644
--- a/core/lib/canary/index/index.ex
+++ b/core/lib/canary/index/index.ex
@@ -1,6 +1,7 @@
defmodule Canary.Index do
alias Canary.Sources.Source
alias Canary.Sources.Webpage
+ alias Canary.Sources.OpenAPI
alias Canary.Sources.GithubIssue
alias Canary.Sources.GithubDiscussion
@@ -29,6 +30,29 @@ defmodule Canary.Index do
Client.index_document(:webpage, doc)
end
+ def insert_document(%OpenAPI.Chunk{} = chunk) do
+ meta = %Document.OpenAPI.Meta{
+ url: chunk.url,
+ document_id: chunk.document_id,
+ is_parent: false
+ }
+
+ doc = %Document.OpenAPI{
+ id: chunk.index_id,
+ source_id: chunk.source_id,
+ path: chunk.path,
+ get: chunk.get,
+ post: chunk.post,
+ put: chunk.put,
+ delete: chunk.delete,
+ tags: [],
+ is_empty_tags: true,
+ meta: meta
+ }
+
+ Client.index_document(:openapi, doc)
+ end
+
def insert_document(%GithubIssue.Chunk{} = chunk) do
meta = %Document.GithubIssue.Meta{
url: chunk.url,
@@ -72,6 +96,7 @@ defmodule Canary.Index do
def delete_document(source_type, id)
when source_type in [
:webpage,
+ :openapi,
:github_issue,
:github_discussion
] do
@@ -126,29 +151,41 @@ defmodule Canary.Index do
|> Enum.reject(&is_nil/1)
|> Enum.join(" && ")
- query_by = ["title", "content"] |> Enum.join(",")
- query_by_weights = [3, 1] |> Enum.join(",")
-
%{
collection: to_string(type),
q: query,
prefix: true,
- query_by: query_by,
- query_by_weights: query_by_weights,
filter_by: filter_by,
sort_by: "_text_match:desc",
- highlight_fields: "content",
stopwords: Canary.Index.Stopword.id(),
prioritize_exact_match: true,
prioritize_token_position: false,
prioritize_num_matching_fields: false,
max_candidates: 4 * 4
}
- |> add_embedding_args(opts)
+ |> handle_source_type(type)
+ |> handle_embedding(opts)
end)
end
- defp add_embedding_args(args, opts) do
+ defp handle_source_type(args, type)
+ when type in [:webpage, :github_issue, :github_discussion] do
+ args
+ |> Map.put(:highlight_fields, "content")
+ |> Map.put(:query_by, Enum.join(["title", "content"], ","))
+ |> Map.put(:query_by_weights, Enum.join([3, 1], ","))
+ end
+
+ defp handle_source_type(args, type) when type in [:openapi] do
+ ops = ["get", "post", "put", "delete"]
+
+ args
+ |> Map.put(:highlight_fields, Enum.join(ops, ","))
+ |> Map.put(:query_by, Enum.join(["path"] ++ ops, ","))
+ |> Map.put(:query_by_weights, Enum.join([1, 2, 2, 2, 2], ","))
+ end
+
+ defp handle_embedding(args, opts) do
embedding = opts[:embedding]
embedding_alpha = opts[:embedding_alpha] || 0.3
diff --git a/core/lib/canary/searcher.ex b/core/lib/canary/searcher.ex
index 2ee955e2..96f0c34c 100644
--- a/core/lib/canary/searcher.ex
+++ b/core/lib/canary/searcher.ex
@@ -108,6 +108,9 @@ defmodule Canary.Searcher.Default do
:webpage ->
%{}
+ :openapi ->
+ %{}
+
:github_issue ->
%{closed: doc.meta.value.closed}
diff --git a/core/lib/canary/sources/document/create_openapi.ex b/core/lib/canary/sources/document/create_openapi.ex
new file mode 100644
index 00000000..3aab12d3
--- /dev/null
+++ b/core/lib/canary/sources/document/create_openapi.ex
@@ -0,0 +1,90 @@
+defmodule Canary.Sources.Document.CreateOpenAPI do
+ use Ash.Resource.Change
+
+ alias Canary.Sources.Document
+ alias Canary.Sources.OpenAPI
+
+ @impl true
+ def init(opts) do
+ if [
+ :source_id_argument,
+ :fetcher_result_argument,
+ :chunks_attribute,
+ :meta_attribute
+ ]
+ |> Enum.any?(&is_nil(opts[&1])) do
+ :error
+ else
+ {:ok, opts}
+ end
+ end
+
+ @impl true
+ def change(changeset, opts, _context) do
+ source_id = Ash.Changeset.get_argument(changeset, opts[:source_id_argument])
+
+ %OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema, served_url: served_url} =
+ Ash.Changeset.get_argument(changeset, opts[:fetcher_result_argument])
+
+ changeset
+ |> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%OpenAPI.DocumentMeta{}))
+ |> Ash.Changeset.change_attribute(opts[:chunks_attribute], [])
+ |> Ash.Changeset.after_action(fn _, record ->
+ hash =
+ schema
+ |> Jason.encode!()
+ |> then(&:crypto.hash(:sha256, &1))
+ |> Base.encode16(case: :lower)
+
+ chunks_create_result =
+ schema.paths
+ |> Enum.map(fn
+ {path, %OpenApiSpex.PathItem{get: get, post: post, put: put, delete: delete}} ->
+ %{
+ source_id: source_id,
+ document_id: record.id,
+ url: render_url(served_url, path),
+ path: path,
+ get: render_operation(get),
+ post: render_operation(post),
+ put: render_operation(put),
+ delete: render_operation(delete)
+ }
+ end)
+ |> Ash.bulk_create(OpenAPI.Chunk, :create,
+ return_errors?: true,
+ return_records?: true
+ )
+
+ meta = %OpenAPI.DocumentMeta{hash: hash}
+
+ case chunks_create_result do
+ %Ash.BulkResult{status: :success, records: records} ->
+ case Document.update(record, wrap_union(meta), Enum.map(records, &wrap_union/1)) do
+ {:ok, updated_record} -> {:ok, updated_record}
+ error -> error
+ end
+
+ %Ash.BulkResult{errors: errors} ->
+ {:error, errors}
+ end
+ end)
+ end
+
+ defp render_url(base_url, path) do
+ URI.parse(base_url)
+ |> Map.put(:fragment, ":~:text=#{path}")
+ |> URI.to_string()
+ end
+
+ defp render_operation(nil), do: nil
+
+ defp render_operation(%OpenApiSpex.Operation{} = op) do
+ [op.summary, op.description, op.operationId]
+ |> Enum.reject(&is_nil/1)
+ |> Enum.join(" / ")
+ end
+
+ defp wrap_union(%Ash.Union{} = v), do: v
+ defp wrap_union(v), do: %Ash.Union{type: :openapi, value: v}
+end
diff --git a/core/lib/canary/sources/document/document.ex b/core/lib/canary/sources/document/document.ex
index 2011b880..2f381d02 100644
--- a/core/lib/canary/sources/document/document.ex
+++ b/core/lib/canary/sources/document/document.ex
@@ -46,6 +46,21 @@ defmodule Canary.Sources.Document do
}
end
+ create :create_openapi do
+ argument :source_id, :uuid, allow_nil?: false
+ argument :fetcher_result, :map, allow_nil?: false
+
+ change manage_relationship(:source_id, :source, type: :append)
+
+ change {
+ Canary.Sources.Document.CreateOpenAPI,
+ source_id_argument: :source_id,
+ fetcher_result_argument: :fetcher_result,
+ chunks_attribute: :chunks,
+ meta_attribute: :meta
+ }
+ end
+
create :create_github_issue do
argument :source_id, :uuid, allow_nil?: false
argument :fetcher_results, {:array, :map}, allow_nil?: false
diff --git a/core/lib/canary/sources/openapi_chunk.ex b/core/lib/canary/sources/openapi_chunk.ex
new file mode 100644
index 00000000..091ae9bf
--- /dev/null
+++ b/core/lib/canary/sources/openapi_chunk.ex
@@ -0,0 +1,37 @@
+defmodule Canary.Sources.OpenAPI.Chunk do
+ use Ash.Resource, data_layer: :embedded
+
+ @ops [:get, :post, :put, :delete]
+
+ attributes do
+ attribute :index_id, :uuid, allow_nil?: false
+ attribute :source_id, :uuid, allow_nil?: false
+ attribute :document_id, :string, allow_nil?: false
+
+ attribute :url, :string, allow_nil?: false
+ attribute :path, :string, allow_nil?: false
+
+ Enum.each(@ops, fn op ->
+ attribute op, :string, allow_nil?: true
+ end)
+ end
+
+ actions do
+ defaults [:read]
+
+ create :create do
+ primary? true
+ accept [:index_id, :source_id, :document_id] ++ [:url, :path] ++ @ops
+ change {Canary.Change.AddToIndex, index_id_attribute: :index_id}
+ end
+
+ destroy :destroy do
+ primary? true
+
+ change {
+ Canary.Change.RemoveFromIndex,
+ source_type: :openapi, index_id_attribute: :index_id
+ }
+ end
+ end
+end
diff --git a/core/lib/canary/sources/openapi_config.ex b/core/lib/canary/sources/openapi_config.ex
new file mode 100644
index 00000000..9f576784
--- /dev/null
+++ b/core/lib/canary/sources/openapi_config.ex
@@ -0,0 +1,26 @@
+defmodule Canary.Sources.OpenAPI.Config do
+ use Ash.Resource, data_layer: :embedded
+
+ attributes do
+ attribute :source_url, :string, allow_nil?: false
+ attribute :served_url, :string, allow_nil?: false
+
+ attribute :served_as, :atom,
+ constraints: [one_of: [:swagger, :redoc, :rapi]],
+ allow_nil?: true
+ end
+
+ actions do
+ defaults [:read]
+
+ create :create do
+ primary? true
+ accept [:source_url, :served_url, :served_as]
+ end
+
+ update :update do
+ primary? true
+ accept [:source_url, :served_url, :served_as]
+ end
+ end
+end
diff --git a/core/lib/canary/sources/openapi_document_meta.ex b/core/lib/canary/sources/openapi_document_meta.ex
new file mode 100644
index 00000000..9768ec00
--- /dev/null
+++ b/core/lib/canary/sources/openapi_document_meta.ex
@@ -0,0 +1,21 @@
+defmodule Canary.Sources.OpenAPI.DocumentMeta do
+ use Ash.Resource, data_layer: :embedded
+
+ attributes do
+ attribute :hash, :string, allow_nil?: false
+ end
+
+ actions do
+ defaults [:read, :destroy]
+
+ create :create do
+ primary? true
+ accept [:hash]
+ end
+
+ update :update do
+ primary? true
+ accept [:hash]
+ end
+ end
+end
diff --git a/core/lib/canary/sources/openapi_fetcher.ex b/core/lib/canary/sources/openapi_fetcher.ex
new file mode 100644
index 00000000..1119fea1
--- /dev/null
+++ b/core/lib/canary/sources/openapi_fetcher.ex
@@ -0,0 +1,25 @@
+defmodule Canary.Sources.OpenAPI.FetcherResult do
+ defstruct [:schema, :served_url, :served_as]
+
+ @type t :: %__MODULE__{
+ schema: OpenApiSpex.OpenApi.t(),
+ served_url: String.t(),
+ served_as: atom()
+ }
+end
+
+defmodule Canary.Sources.OpenAPI.Fetcher do
+ alias Canary.Sources.OpenAPI
+
+ def run(%OpenAPI.Config{} = config) do
+ with {:ok, %Req.Response{status: 200, body: map}} <- Req.get(config.source_url),
+ schema = OpenApiSpex.schema_from_map(map) do
+ {:ok,
+ %OpenAPI.FetcherResult{
+ schema: schema,
+ served_url: config.source_url,
+ served_as: config.served_as
+ }}
+ end
+ end
+end
diff --git a/core/lib/canary/sources/openapi_syncer.ex b/core/lib/canary/sources/openapi_syncer.ex
new file mode 100644
index 00000000..771832af
--- /dev/null
+++ b/core/lib/canary/sources/openapi_syncer.ex
@@ -0,0 +1,56 @@
+defmodule Canary.Sources.OpenAPI.Syncer do
+ alias Canary.Sources.Document
+ alias Canary.Sources.OpenAPI
+
+ require Ash.Query
+
+ def run(source_id, %OpenAPI.FetcherResult{} = incomings) do
+ existing_doc =
+ Document
+ |> Ash.Query.filter(source_id == ^source_id)
+ |> Ash.Query.build(select: [:id, :meta])
+ |> Ash.read!()
+ |> Enum.at(0, nil)
+
+ if hash_eq?(existing_doc, incomings) do
+ :ok
+ else
+ create_changeset =
+ Ash.Changeset.for_create(Document, :create_openapi, %{
+ source_id: source_id,
+ fetcher_result: incomings
+ })
+
+ with {:ok, %{id: id}} <- Ash.create(create_changeset),
+ :ok <- remove_docs(source_id, exclude_id: id) do
+ :ok
+ end
+ end
+ end
+
+ defp remove_docs(source_id, opts) do
+ exclude_id = opts[:exclude_id] || ""
+
+ case Document
+ |> Ash.Query.filter(source_id == ^source_id and id != ^exclude_id)
+ |> Ash.bulk_destroy(:destroy, %{}, return_errors?: true) do
+ %Ash.BulkResult{status: :success} -> :ok
+ %Ash.BulkResult{errors: errors} -> {:error, errors}
+ end
+ end
+
+ defp hash_eq?(nil, _), do: false
+
+ defp hash_eq?(
+ %Document{meta: %Ash.Union{type: :openapi, value: %OpenAPI.DocumentMeta{hash: hash_a}}},
+ %OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema}
+ ) do
+ hash_b =
+ schema
+ |> Jason.encode!()
+ |> then(&:crypto.hash(:sha256, &1))
+ |> Base.encode16(case: :lower)
+
+ hash_a == hash_b
+ end
+end
diff --git a/core/lib/canary/sources/source.ex b/core/lib/canary/sources/source.ex
index 306e92dd..3d6ddd05 100644
--- a/core/lib/canary/sources/source.ex
+++ b/core/lib/canary/sources/source.ex
@@ -111,6 +111,7 @@ defmodule Canary.Sources.Source do
worker =
case type do
:webpage -> Canary.Workers.WebpageProcessor
+ :openapi -> Canary.Workers.OpenAPIProcessor
:github_issue -> Canary.Workers.GithubIssueProcessor
:github_discussion -> Canary.Workers.GithubDiscussionProcessor
end
@@ -151,6 +152,9 @@ defmodule Canary.Sources.Source do
:webpage ->
Canary.Workers.WebpageProcessor.new(%{source_id: source_id})
+ :openapi ->
+ Canary.Workers.OpenAPIProcessor.new(%{source_id: source_id})
+
:github_issue ->
Canary.Workers.GithubIssueProcessor.new(%{source_id: source_id})
diff --git a/core/lib/canary/workers/job_reporter.ex b/core/lib/canary/workers/job_reporter.ex
index 3a4ff73d..a3fdd201 100644
--- a/core/lib/canary/workers/job_reporter.ex
+++ b/core/lib/canary/workers/job_reporter.ex
@@ -6,6 +6,7 @@ defmodule Canary.Workers.JobReporter do
@processors Enum.map(
[
Workers.WebpageProcessor,
+ Workers.OpenAPIProcessor,
Workers.GithubIssueProcessor,
Workers.GithubDiscussionProcessor
],
diff --git a/core/lib/canary/workers/openapi_processor.ex b/core/lib/canary/workers/openapi_processor.ex
new file mode 100644
index 00000000..acd784e2
--- /dev/null
+++ b/core/lib/canary/workers/openapi_processor.ex
@@ -0,0 +1,28 @@
+defmodule Canary.Workers.OpenAPIProcessor do
+ use Oban.Worker,
+ queue: :openapi_processor,
+ max_attempts: 2,
+ unique: [
+ period: if(Application.get_env(:canary, :env) == :prod, do: 24 * 60 * 60, else: 10),
+ fields: [:worker, :queue, :args],
+ states: Oban.Job.states() -- [:discarded, :cancelled],
+ timestamp: :scheduled_at
+ ]
+
+ alias Canary.Sources.Source
+ alias Canary.Sources.OpenAPI
+
+ @impl true
+ def perform(%Oban.Job{args: %{"source_id" => id}}) do
+ case Ash.get(Source, id) do
+ {:error, _} -> :ok
+ {:ok, source} -> process(source)
+ end
+ end
+
+ defp process(%Source{id: source_id, config: %Ash.Union{type: :openapi, value: config}}) do
+ with {:ok, %OpenAPI.FetcherResult{} = incomings} = OpenAPI.Fetcher.run(config) do
+ OpenAPI.Syncer.run(source_id, incomings)
+ end
+ end
+end
diff --git a/core/lib/canary_web/live/source_live/create.ex b/core/lib/canary_web/live/source_live/create.ex
index ba27d07a..7c0164b6 100644
--- a/core/lib/canary_web/live/source_live/create.ex
+++ b/core/lib/canary_web/live/source_live/create.ex
@@ -4,10 +4,17 @@ defmodule CanaryWeb.SourceLive.Create do
@config_types [
{"Webpage", "webpage"},
+ {"OpenAPI", "openapi"},
{"Github Issue", "github_issue"},
{"Github Discussion", "github_discussion"}
]
+ @openapi_types [
+ {"Swagger", "swagger"},
+ {"Redoc", "redoc"},
+ {"Rapi", "rapi"}
+ ]
+
@impl true
def render(assigns) do
~H"""
@@ -48,6 +55,28 @@ defmodule CanaryWeb.SourceLive.Create do
form_control={%{label: "URL"}}
is_full_width
/>
+ <% "openapi" -> %>
+
+
+
<% "github_issue" -> %>
assign(assigns)
|> assign(:config_types, @config_types)
+ |> assign(:openapi_types, @openapi_types)
form =
Canary.Sources.Source
diff --git a/core/lib/canary_web/live/source_live/detail.ex b/core/lib/canary_web/live/source_live/detail.ex
index 41ed5d2d..fd577ab0 100644
--- a/core/lib/canary_web/live/source_live/detail.ex
+++ b/core/lib/canary_web/live/source_live/detail.ex
@@ -36,6 +36,7 @@ defmodule CanaryWeb.SourceLive.Detail do
value={
case @source.config.type do
:webpage -> "Webpage"
+ :openapi -> "OpenAPI"
:github_issue -> "Github Issue"
:github_discussion -> "Github Discussion"
end
@@ -158,6 +159,32 @@ defmodule CanaryWeb.SourceLive.Detail do
+ <% :openapi -> %>
+
+
+
<% :github_issue -> %>