-
Notifications
You must be signed in to change notification settings - Fork 0
/
schema.sql
64 lines (61 loc) · 1.26 KB
/
schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
-- RUN 1st
create extension vector;
-- RUN 2nd
create table tns (
id bigserial primary key,
chapter_title text,
chapter_num bigint,
section_title text,
section_num bigint,
section_url text,
chunk_num bigint,
content text,
content_length bigint,
content_tokens bigint,
embedding vector (1536)
);
-- RUN 3rd after running the scripts
create or replace function tns_search (
query_embedding vector(1536),
similarity_threshold float,
match_count int
)
returns table (
id bigint,
chapter_title text,
chapter_num bigint,
section_title text,
section_num bigint,
section_url text,
chunk_num bigint,
content text,
content_length bigint,
content_tokens bigint,
similarity float
)
language plpgsql
as $$
begin
return query
select
tns.id,
tns.chapter_title,
tns.chapter_num,
tns.section_title,
tns.section_num,
tns.section_url,
tns.chunk_num,
tns.content,
tns.content_length,
tns.content_tokens,
1 - (tns.embedding <=> query_embedding) as similarity
from tns
where 1 - (tns.embedding <=> query_embedding) > similarity_threshold
order by tns.embedding <=> query_embedding
limit match_count;
end;
$$;
-- RUN 4th
create index on tns
using ivfflat (embedding vector_cosine_ops)
with (lists = 100);