Skip to content

Commit

Permalink
got it working
Browse files Browse the repository at this point in the history
  • Loading branch information
yujonglee committed Oct 5, 2024
1 parent c0399d1 commit 91e4955
Show file tree
Hide file tree
Showing 12 changed files with 251 additions and 14 deletions.
1 change: 1 addition & 0 deletions core/lib/canary/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ defmodule Canary.Application do
attach_oban_telemetry()

:ok = Canary.Index.Collection.ensure(:webpage)
:ok = Canary.Index.Collection.ensure(:openapi)
:ok = Canary.Index.Collection.ensure(:github_issue)
:ok = Canary.Index.Collection.ensure(:github_discussion)
:ok = Canary.Index.Stopword.ensure()
Expand Down
13 changes: 11 additions & 2 deletions core/lib/canary/index/collection.ex
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
defmodule Canary.Index.Collection do
def ensure(name) when name in [:webpage, :github_issue, :github_discussion] do
def ensure(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
with {:error, _} <- Canary.Index.Client.get_collection(name),
{:error, _} <- Canary.Index.Client.create_collection(name, fields(name)) do
:error
Expand All @@ -8,7 +8,7 @@ defmodule Canary.Index.Collection do
end
end

defp fields(name) when name in [:webpage, :github_issue, :github_discussion] do
defp fields(name) when name in [:webpage, :openapi, :github_issue, :github_discussion] do
# https://typesense.org/docs/27.0/api/collections.html#indexing-all-but-some-fields
shared = [
%{name: "source_id", type: "string"},
Expand All @@ -26,6 +26,15 @@ defmodule Canary.Index.Collection do
%{name: "content", type: "string", stem: true}
]

:openapi ->
[
%{name: "path", type: "string", stem: true},
%{name: "get", type: "string", stem: true, optional: true},
%{name: "post", type: "string", stem: true, optional: true},
%{name: "put", type: "string", stem: true, optional: true},
%{name: "delete", type: "string", stem: true, optional: true}
]

:github_issue ->
[
%{name: "title", type: "string", stem: true},
Expand Down
14 changes: 14 additions & 0 deletions core/lib/canary/index/document.ex
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,20 @@ defmodule Canary.Index.Document.Webpage.Meta do
defstruct Shared.meta_fields()
end

defmodule Canary.Index.Document.OpenAPI do
alias Canary.Index.Document.Shared

@derive Jason.Encoder
defstruct Shared.top_level_fields() ++ [:path, :get, :post, :put, :delete]
end

defmodule Canary.Index.Document.OpenAPI.Meta do
alias Canary.Index.Document.Shared

@derive Jason.Encoder
defstruct Shared.meta_fields()
end

defmodule Canary.Index.Document.GithubIssue do
alias Canary.Index.Document.Shared

Expand Down
53 changes: 45 additions & 8 deletions core/lib/canary/index/index.ex
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
defmodule Canary.Index do
alias Canary.Sources.Source
alias Canary.Sources.Webpage
alias Canary.Sources.OpenAPI
alias Canary.Sources.GithubIssue
alias Canary.Sources.GithubDiscussion

Expand Down Expand Up @@ -29,6 +30,29 @@ defmodule Canary.Index do
Client.index_document(:webpage, doc)
end

def insert_document(%OpenAPI.Chunk{} = chunk) do
meta = %Document.OpenAPI.Meta{
url: chunk.url,
document_id: chunk.document_id,
is_parent: false
}

doc = %Document.OpenAPI{
id: chunk.index_id,
source_id: chunk.source_id,
path: chunk.path,
get: chunk.get,
post: chunk.post,
put: chunk.put,
delete: chunk.delete,
tags: [],
is_empty_tags: true,
meta: meta
}

Client.index_document(:openapi, doc)
end

def insert_document(%GithubIssue.Chunk{} = chunk) do
meta = %Document.GithubIssue.Meta{
url: chunk.url,
Expand Down Expand Up @@ -72,6 +96,7 @@ defmodule Canary.Index do
def delete_document(source_type, id)
when source_type in [
:webpage,
:openapi,
:github_issue,
:github_discussion
] do
Expand Down Expand Up @@ -126,29 +151,41 @@ defmodule Canary.Index do
|> Enum.reject(&is_nil/1)
|> Enum.join(" && ")

query_by = ["title", "content"] |> Enum.join(",")
query_by_weights = [3, 1] |> Enum.join(",")

%{
collection: to_string(type),
q: query,
prefix: true,
query_by: query_by,
query_by_weights: query_by_weights,
filter_by: filter_by,
sort_by: "_text_match:desc",
highlight_fields: "content",
stopwords: Canary.Index.Stopword.id(),
prioritize_exact_match: true,
prioritize_token_position: false,
prioritize_num_matching_fields: false,
max_candidates: 4 * 4
}
|> add_embedding_args(opts)
|> handle_source_type(type)
|> handle_embedding(opts)
end)
end

defp add_embedding_args(args, opts) do
defp handle_source_type(args, type)
when type in [:webpage, :github_issue, :github_discussion] do
args
|> Map.put(:highlight_fields, "content")
|> Map.put(:query_by, Enum.join(["title", "content"], ","))
|> Map.put(:query_by_weights, Enum.join([3, 1], ","))
end

defp handle_source_type(args, type) when type in [:openapi] do
ops = ["get", "post", "put", "delete"]

args
|> Map.put(:highlight_fields, Enum.join(ops, ","))
|> Map.put(:query_by, Enum.join(["path"] ++ ops, ","))
|> Map.put(:query_by_weights, Enum.join([1, 2, 2, 2, 2], ","))
end

defp handle_embedding(args, opts) do
embedding = opts[:embedding]
embedding_alpha = opts[:embedding_alpha] || 0.3

Expand Down
3 changes: 3 additions & 0 deletions core/lib/canary/searcher.ex
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ defmodule Canary.Searcher.Default do
:webpage ->
%{}

:openapi ->
%{}

:github_issue ->
%{closed: doc.meta.value.closed}

Expand Down
84 changes: 84 additions & 0 deletions core/lib/canary/sources/document/create_openapi.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
defmodule Canary.Sources.Document.CreateOpenAPI do
use Ash.Resource.Change

alias Canary.Sources.Document
alias Canary.Sources.OpenAPI

@impl true
def init(opts) do
if [
:source_id_argument,
:fetcher_result_argument,
:chunks_attribute,
:meta_attribute
]
|> Enum.any?(&is_nil(opts[&1])) do
:error
else
{:ok, opts}
end
end

@impl true
def change(changeset, opts, _context) do
source_id = Ash.Changeset.get_argument(changeset, opts[:source_id_argument])

%OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema} =
Ash.Changeset.get_argument(changeset, opts[:fetcher_result_argument])

changeset
|> Ash.Changeset.change_attribute(opts[:meta_attribute], wrap_union(%OpenAPI.DocumentMeta{}))
|> Ash.Changeset.change_attribute(opts[:chunks_attribute], [])
|> Ash.Changeset.after_action(fn _, record ->
hash =
schema
|> Jason.encode!()
|> then(&:crypto.hash(:sha256, &1))
|> Base.encode16(case: :lower)

chunks_create_result =
schema.paths
|> Enum.map(fn
{path, %OpenApiSpex.PathItem{get: get, post: post, put: put, delete: delete}} ->
%{
source_id: source_id,
document_id: record.id,
url: "TODO",
path: path,
get: render_operation(get),
post: render_operation(post),
put: render_operation(put),
delete: render_operation(delete)
}
end)
|> Ash.bulk_create(OpenAPI.Chunk, :create,
return_errors?: true,
return_records?: true
)

meta = %OpenAPI.DocumentMeta{hash: hash}

case chunks_create_result do
%Ash.BulkResult{status: :success, records: records} ->
case Document.update(record, wrap_union(meta), Enum.map(records, &wrap_union/1)) do
{:ok, updated_record} -> {:ok, updated_record}
error -> error
end

%Ash.BulkResult{errors: errors} ->
{:error, errors}
end
end)
end

defp render_operation(nil), do: nil

defp render_operation(%OpenApiSpex.Operation{} = op) do
[op.summary, op.description, op.operationId]
|> Enum.reject(&is_nil/1)
|> Enum.join(" / ")
end

defp wrap_union(%Ash.Union{} = v), do: v
defp wrap_union(v), do: %Ash.Union{type: :openapi, value: v}
end
15 changes: 15 additions & 0 deletions core/lib/canary/sources/document/document.ex
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ defmodule Canary.Sources.Document do
}
end

create :create_openapi do
argument :source_id, :uuid, allow_nil?: false
argument :fetcher_result, :map, allow_nil?: false

change manage_relationship(:source_id, :source, type: :append)

change {
Canary.Sources.Document.CreateOpenAPI,
source_id_argument: :source_id,
fetcher_result_argument: :fetcher_result,
chunks_attribute: :chunks,
meta_attribute: :meta
}
end

create :create_github_issue do
argument :source_id, :uuid, allow_nil?: false
argument :fetcher_results, {:array, :map}, allow_nil?: false
Expand Down
21 changes: 17 additions & 4 deletions core/lib/canary/sources/openapi_chunk.ex
Original file line number Diff line number Diff line change
@@ -1,24 +1,37 @@
defmodule Canary.Sources.OpenAPI.Chunk do
use Ash.Resource, data_layer: :embedded

@ops [:get, :post, :put, :delete]

attributes do
attribute :index_id, :uuid, allow_nil?: false
attribute :source_id, :uuid, allow_nil?: false
attribute :document_id, :string, allow_nil?: false
attribute :is_parent, :boolean, allow_nil?: false

attribute :tags, {:array, :string}, default: []
attribute :url, :string, allow_nil?: false
attribute :path, :string, allow_nil?: false

Enum.each(@ops, fn op ->
attribute op, :string, allow_nil?: true
end)
end

actions do
defaults [:read, :destroy]
defaults [:read]

create :create do
primary? true
accept [:index_id, :source_id, :document_id] ++ [:url, :path] ++ @ops
change {Canary.Change.AddToIndex, index_id_attribute: :index_id}
end

update :update do
destroy :destroy do
primary? true

change {
Canary.Change.RemoveFromIndex,
source_type: :openapi, index_id_attribute: :index_id
}
end
end
end
3 changes: 3 additions & 0 deletions core/lib/canary/sources/openapi_document_meta.ex
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ defmodule Canary.Sources.OpenAPI.DocumentMeta do
use Ash.Resource, data_layer: :embedded

attributes do
attribute :hash, :string, allow_nil?: false
end

actions do
defaults [:read, :destroy]

create :create do
primary? true
accept [:hash]
end

update :update do
primary? true
accept [:hash]
end
end
end
56 changes: 56 additions & 0 deletions core/lib/canary/sources/openapi_syncer.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
defmodule Canary.Sources.OpenAPI.Syncer do
alias Canary.Sources.Document
alias Canary.Sources.OpenAPI

require Ash.Query

def run(source_id, %OpenAPI.FetcherResult{} = incomings) do
existing_doc =
Document
|> Ash.Query.filter(source_id == ^source_id)
|> Ash.Query.build(select: [:id, :meta])
|> Ash.read!()
|> Enum.at(0, nil)

if hash_eq?(existing_doc, incomings) do
:ok
else
create_changeset =
Ash.Changeset.for_create(Document, :create_openapi, %{
source_id: source_id,
fetcher_result: incomings
})

with {:ok, %{id: id}} <- Ash.create(create_changeset),
:ok <- remove_docs(source_id, exclude_id: id) do
:ok
end
end
end

defp remove_docs(source_id, opts) do
exclude_id = opts[:exclude_id] || ""

case Document
|> Ash.Query.filter(source_id == ^source_id and id != ^exclude_id)
|> Ash.bulk_destroy(:destroy, %{}, return_errors?: true) do
%Ash.BulkResult{status: :success} -> :ok
%Ash.BulkResult{errors: errors} -> {:error, errors}
end
end

defp hash_eq?(nil, _), do: false

defp hash_eq?(
%Document{meta: %Ash.Union{type: :openapi, value: %OpenAPI.DocumentMeta{hash: hash_a}}},
%OpenAPI.FetcherResult{schema: %OpenApiSpex.OpenApi{} = schema}
) do
hash_b =
schema
|> Jason.encode!()
|> then(&:crypto.hash(:sha256, &1))
|> Base.encode16(case: :lower)

hash_a == hash_b
end
end
Loading

0 comments on commit 91e4955

Please sign in to comment.