Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support OpenAPI source #117

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ config :canary, Oban,
default: 10,
github_processor: 2,
webpage_processor: 10,
openapi_processor: 10,
email: 10
],
repo: Canary.Repo,
Expand Down
1 change: 1 addition & 0 deletions core/lib/canary/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ defmodule Canary.Application do
attach_oban_telemetry()

:ok = Canary.Index.Collection.ensure(:webpage)
:ok = Canary.Index.Collection.ensure(:openapi)
:ok = Canary.Index.Collection.ensure(:github_issue)
:ok = Canary.Index.Collection.ensure(:github_discussion)
:ok = Canary.Index.Stopword.ensure()
Expand Down
13 changes: 11 additions & 2 deletions core/lib/canary/index/collection.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule Canary.Index.Collection do
def ensure(name) when name in [:webpage, :github_issue, :github_discussion] do
def ensure(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
with {:error, _} <- Canary.Index.Client.get_collection(name),
{:error, _} <- Canary.Index.Client.create_collection(name, fields(name)) do
:error
Expand All @@ -8,7 +8,7 @@ defmodule Canary.Index.Collection do
end
end

defp fields(name) when name in [:webpage, :github_issue, :github_discussion] do
defp fields(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
# https://typesense.org/docs/27.0/api/collections.html#indexing-all-but-some-fields
shared = [
%{name: "source_id", type: "string"},
Expand All @@ -26,6 +26,15 @@ defmodule Canary.Index.Collection do
%{name: "content", type: "string", stem: true}
]

:openapi ->
[
%{name: "path", type: "string", stem: true},
%{name: "get", type: "string", stem: true, optional: true},
%{name: "post", type: "string", stem: true, optional: true},
%{name: "put", type: "string", stem: true, optional: true},
%{name: "delete", type: "string", stem: true, optional: true}
]

:github_issue ->
[
%{name: "title", type: "string", stem: true},
Expand Down
14 changes: 14 additions & 0 deletions core/lib/canary/index/document.ex
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ defmodule Canary.Index.Document.Webpage.Meta do
defstruct Shared.meta_fields()
end

defmodule Canary.Index.Document.OpenAPI do
alias Canary.Index.Document.Shared

@derive Jason.Encoder
defstruct Shared.top_level_fields() ++ [:path, :get, :post, :put, :delete]
end

defmodule Canary.Index.Document.OpenAPI.Meta do
alias Canary.Index.Document.Shared

@derive Jason.Encoder
defstruct Shared.meta_fields()
end

defmodule Canary.Index.Document.GithubIssue do
alias Canary.Index.Document.Shared

Expand Down
53 changes: 45 additions & 8 deletions core/lib/canary/index/index.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defmodule Canary.Index do
alias Canary.Sources.Source
alias Canary.Sources.Webpage
alias Canary.Sources.OpenAPI
alias Canary.Sources.GithubIssue
alias Canary.Sources.GithubDiscussion

Expand Down Expand Up @@ -29,6 +30,29 @@ defmodule Canary.Index do
Client.index_document(:webpage, doc)
end

def insert_document(%OpenAPI.Chunk{} = chunk) do
meta = %Document.OpenAPI.Meta{
url: chunk.url,
document_id: chunk.document_id,
is_parent: false
}

doc = %Document.OpenAPI{
id: chunk.index_id,
source_id: chunk.source_id,
path: chunk.path,
get: chunk.get,
post: chunk.post,
put: chunk.put,
delete: chunk.delete,
tags: [],
is_empty_tags: true,
meta: meta
}

Client.index_document(:openapi, doc)
end

def insert_document(%GithubIssue.Chunk{} = chunk) do
meta = %Document.GithubIssue.Meta{
url: chunk.url,
Expand Down Expand Up @@ -72,6 +96,7 @@ defmodule Canary.Index do
def delete_document(source_type, id)
when source_type in [
:webpage,
:openapi,
:github_issue,
:github_discussion
] do
Expand Down Expand Up @@ -126,29 +151,41 @@ defmodule Canary.Index do
|> Enum.reject(&is_nil/1)
|> Enum.join(" && ")

query_by = ["title", "content"] |> Enum.join(",")
query_by_weights = [3, 1] |> Enum.join(",")

%{
collection: to_string(type),
q: query,
prefix: true,
query_by: query_by,
query_by_weights: query_by_weights,
filter_by: filter_by,
sort_by: "_text_match:desc",
highlight_fields: "content",
stopwords: Canary.Index.Stopword.id(),
prioritize_exact_match: true,
prioritize_token_position: false,
prioritize_num_matching_fields: false,
max_candidates: 4 * 4
}
|> add_embedding_args(opts)
|> handle_source_type(type)
|> handle_embedding(opts)
end)
end

defp add_embedding_args(args, opts) do
defp handle_source_type(args, type)
when type in [:webpage, :github_issue, :github_discussion] do
args
|> Map.put(:highlight_fields, "content")
|> Map.put(:query_by, Enum.join(["title", "content"], ","))
|> Map.put(:query_by_weights, Enum.join([3, 1], ","))
end

defp handle_source_type(args, type) when type in [:openapi] do
ops = ["get", "post", "put", "delete"]

args
|> Map.put(:highlight_fields, Enum.join(ops, ","))
|> Map.put(:query_by, Enum.join(["path"] ++ ops, ","))
|> Map.put(:query_by_weights, Enum.join([1, 2, 2, 2, 2], ","))
end

defp handle_embedding(args, opts) do
embedding = opts[:embedding]
embedding_alpha = opts[:embedding_alpha] || 0.3

Expand Down
3 changes: 3 additions & 0 deletions core/lib/canary/searcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ defmodule Canary.Searcher.Default do
:webpage ->
%{}

:openapi ->
%{}

:github_issue ->
%{closed: doc.meta.value.closed}

Expand Down
90 changes: 90 additions & 0 deletions core/lib/canary/sources/document/create_openapi.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
defmodule Canary.Sources.Document.CreateOpenAPI do
use Ash.Resource.Change

alias Canary.Sources.Document
alias Canary.Sources.OpenAPI

@impl true
def init(opts) do
if [
:source_id_argument,
:fetcher_result_argument,
:chunks_attribute,
:meta_attribute
]
|> Enum.any?(&is_nil(opts[&1])) do
:error
else
{:ok, opts}
end
end

@impl true
def change(changeset, opts, _context) do
source_id = Ash.Changeset.get_argument(changeset, opts[:source_id_argument])

%OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema, served_url: served_url} =
Ash.Changeset.get_argument(changeset, opts[:fetcher_result_argument])

changeset
|> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%OpenAPI.DocumentMeta{}))
|> Ash.Changeset.change_attribute(opts[:chunks_attribute], [])
|> Ash.Changeset.after_action(fn _, record ->
hash =
schema
|> Jason.encode!()
|> then(&:crypto.hash(:sha256, &1))
|> Base.encode16(case: :lower)

chunks_create_result =
schema.paths
|> Enum.map(fn
{path, %OpenApiSpex.PathItem{get: get, post: post, put: put, delete: delete}} ->
%{
source_id: source_id,
document_id: record.id,
url: render_url(served_url, path),
path: path,
get: render_operation(get),
post: render_operation(post),
put: render_operation(put),
delete: render_operation(delete)
}
end)
|> Ash.bulk_create(OpenAPI.Chunk, :create,
return_errors?: true,
return_records?: true
)

meta = %OpenAPI.DocumentMeta{hash: hash}

case chunks_create_result do
%Ash.BulkResult{status: :success, records: records} ->
case Document.update(record, wrap_union(meta), Enum.map(records, &wrap_union/1)) do
{:ok, updated_record} -> {:ok, updated_record}
error -> error
end

%Ash.BulkResult{errors: errors} ->
{:error, errors}
end
end)
end

defp render_url(base_url, path) do
URI.parse(base_url)
|> Map.put(:fragment, ":~:text=#{path}")
|> URI.to_string()
end

defp render_operation(nil), do: nil

defp render_operation(%OpenApiSpex.Operation{} = op) do
[op.summary, op.description, op.operationId]
|> Enum.reject(&is_nil/1)
|> Enum.join(" / ")
end

defp wrap_union(%Ash.Union{} = v), do: v
defp wrap_union(v), do: %Ash.Union{type: :openapi, value: v}
end
15 changes: 15 additions & 0 deletions core/lib/canary/sources/document/document.ex
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ defmodule Canary.Sources.Document do
}
end

create :create_openapi do
argument :source_id, :uuid, allow_nil?: false
argument :fetcher_result, :map, allow_nil?: false

change manage_relationship(:source_id, :source, type: :append)

change {
Canary.Sources.Document.CreateOpenAPI,
source_id_argument: :source_id,
fetcher_result_argument: :fetcher_result,
chunks_attribute: :chunks,
meta_attribute: :meta
}
end

create :create_github_issue do
argument :source_id, :uuid, allow_nil?: false
argument :fetcher_results, {:array, :map}, allow_nil?: false
Expand Down
37 changes: 37 additions & 0 deletions core/lib/canary/sources/openapi_chunk.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
defmodule Canary.Sources.OpenAPI.Chunk do
use Ash.Resource, data_layer: :embedded

@ops [:get, :post, :put, :delete]

attributes do
attribute :index_id, :uuid, allow_nil?: false
attribute :source_id, :uuid, allow_nil?: false
attribute :document_id, :string, allow_nil?: false

attribute :url, :string, allow_nil?: false
attribute :path, :string, allow_nil?: false

Enum.each(@ops, fn op ->
attribute op, :string, allow_nil?: true
end)
end

actions do
defaults [:read]

create :create do
primary? true
accept [:index_id, :source_id, :document_id] ++ [:url, :path] ++ @ops
change {Canary.Change.AddToIndex, index_id_attribute: :index_id}
end

destroy :destroy do
primary? true

change {
Canary.Change.RemoveFromIndex,
source_type: :openapi, index_id_attribute: :index_id
}
end
end
end
26 changes: 26 additions & 0 deletions core/lib/canary/sources/openapi_config.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
defmodule Canary.Sources.OpenAPI.Config do
use Ash.Resource, data_layer: :embedded

attributes do
attribute :source_url, :string, allow_nil?: false
attribute :served_url, :string, allow_nil?: false

attribute :served_as, :atom,
constraints: [one_of: [:swagger, :redoc, :rapi]],
allow_nil?: true
end

actions do
defaults [:read]

create :create do
primary? true
accept [:source_url, :served_url, :served_as]
end

update :update do
primary? true
accept [:source_url, :served_url, :served_as]
end
end
end
21 changes: 21 additions & 0 deletions core/lib/canary/sources/openapi_document_meta.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
defmodule Canary.Sources.OpenAPI.DocumentMeta do
use Ash.Resource, data_layer: :embedded

attributes do
attribute :hash, :string, allow_nil?: false
end

actions do
defaults [:read, :destroy]

create :create do
primary? true
accept [:hash]
end

update :update do
primary? true
accept [:hash]
end
end
end
Loading
Loading