-
Notifications
You must be signed in to change notification settings - Fork 59
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support sparsevec in weighted vector search function #328
base: main
Are you sure you want to change the base?
Changes from all commits
b689c2d
ec924eb
decf208
c661fea
4a20f39
2eff4f1
17bc1f5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
ARG VERSION=15 | ||
ARG PGVECTOR_VERSION=0.5.1 | ||
ARG PGVECTOR_VERSION=0.7.3-lanterncloud | ||
#fix pg_cron at the latest commit of the time | ||
ARG PG_CRON_COMMIT_SHA=7e91e72b1bebc5869bb900d9253cc9e92518b33f | ||
|
||
|
@@ -31,7 +31,7 @@ RUN gem install pg -- --with-pg-include=/usr/local/pgsql/include/ --with-pg-lib= | |
# hack to make sure postgres user has write access to externally mounted volumes | ||
RUN mkdir /lantern_shared && chown postgres:postgres /lantern_shared | ||
|
||
RUN cd /root/postgresql-15.5/contrib && make install -j | ||
RUN cd /root/postgresql-15.5/contrib && make install | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why change this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The error I get with -j |
||
|
||
# allow non-root users to install in the container to make it easier to run update-tests | ||
RUN chmod -R 777 /usr/local/pgsql/lib/ /usr/local/pgsql/share/extension/ /usr/local/pgsql/include/server/ | ||
|
@@ -41,7 +41,7 @@ USER postgres | |
RUN pip install GitPython libtmux | ||
|
||
# Build & Install pgvector | ||
RUN wget --quiet -O pgvector.tar.gz https://github.com/pgvector/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ | ||
RUN wget --quiet -O pgvector.tar.gz https://github.com/lanterndata/pgvector/archive/refs/tags/v${PGVECTOR_VERSION}.tar.gz && \ | ||
tar xzf pgvector.tar.gz && \ | ||
(cd pgvector-${PGVECTOR_VERSION} && make -j && make install) | ||
|
||
|
@@ -55,7 +55,7 @@ COPY . . | |
RUN sudo rm -rf build \ | ||
&& mkdir build \ | ||
&& cd build \ | ||
&& cmake -DCMAKE_BUILD_TYPE=Debug .. \ | ||
&& cmake .. \ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. info: Removed '-DCMAKE_BUILD_TYPE=Debug'. This changes the build type from Debug to default (usually Release). Ensure this is intentional and won't affect development workflows. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why change this? The dockerfile is meant for development and I often use GDB from inside it, so having a build with symbols is often good. Though, I usually attach a folder from host and rebuild the DB for debugging so not a big deal but would still like to know why the change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The build did not succeed for me without it |
||
&& make -j install | ||
|
||
# Install benchmarking tools in build folder | ||
|
@@ -65,7 +65,7 @@ RUN git clone https://github.com/lanterndata/benchmark build/benchmark \ | |
&& pip install -r external/requirements.txt | ||
|
||
# Install perf | ||
RUN sudo apt update && sudo apt install -y linux-tools-common linux-tools-generic linux-tools-`uname -r` | ||
RUN sudo apt update && sudo apt install -y linux-tools-common linux-tools-generic | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. info: Removed 'linux-tools- |
||
# in host, enable perf_event paranoid via | ||
# echo -1 | sudo tee /proc/sys/kernel/perf_event_paranoid | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import psycopg2 | ||
|
||
# Database connection parameters | ||
db_params = { | ||
'database': 'postgres', | ||
'user': 'postgres', # Update with your username if different | ||
'password': '', # Update with your password if required | ||
'host': 'localhost', | ||
'port': '5432' | ||
} | ||
|
||
# Connect to the database | ||
conn = psycopg2.connect(**db_params) | ||
conn.autocommit = True | ||
cur = conn.cursor() | ||
|
||
# Execute the SQL commands | ||
cur.execute(""" | ||
DROP EXTENSION IF EXISTS lantern; | ||
CREATE EXTENSION IF NOT EXISTS vector; | ||
CREATE EXTENSION IF NOT EXISTS lantern; | ||
|
||
CREATE TABLE IF NOT EXISTS small_world_weighted_search ( | ||
id VARCHAR(3) PRIMARY KEY, | ||
b BOOLEAN, | ||
v VECTOR(3), | ||
s SPARSEVEC(3) | ||
); | ||
|
||
INSERT INTO small_world_weighted_search (id, b, v, s) VALUES | ||
('000', TRUE, '[0,0,0]', '{}/3'), | ||
('001', TRUE, '[0,0,1]', '{3:1}/3'), | ||
('010', FALSE, '[0,1,0]' , '{2:1}/3'), | ||
('011', TRUE, '[0,1,1]', '{2:1,3:1}/3'), | ||
('100', FALSE, '[1,0,0]', '{1:1}/3'), | ||
('101', FALSE, '[1,0,1]', '{1:1,3:1}/3'), | ||
('110', FALSE, '[1,1,0]', '{1:1,2:1}/3'), | ||
('111', TRUE, '[1,1,1]', '{1:1,2:1,3:1}/3') | ||
ON CONFLICT DO NOTHING; | ||
""") | ||
|
||
distance_metrics = ["", "cos", "l2sq"] | ||
for distance_metric in distance_metrics: | ||
operator = op = { 'l2sq': '<->', 'cos': '<=>', 'hamming': '<+>' }[distance_metric or 'l2sq'] | ||
query_s = "{1:0.4,2:0.3,3:0.2}/3" | ||
query_v = "[-0.5,-0.1,-0.3]" | ||
function = f'weighted_vector_search_{distance_metric}' if distance_metric else 'weighted_vector_search' | ||
query = f""" | ||
SELECT | ||
id, | ||
round(cast(0.9 * (s {operator} '{query_s}'::sparsevec) + 0.1 * (v {operator} '{query_v}'::vector) as numeric), 2) as dist | ||
FROM lantern.{function}(CAST(NULL as "small_world_weighted_search"), distance_operator=>'{operator}', | ||
w1=> 0.9, col1=>'s'::text, vec1=>'{query_s}'::sparsevec, | ||
w2=> 0.1, col2=>'v'::text, vec2=>'{query_v}'::vector | ||
) | ||
LIMIT 3; | ||
""" | ||
cur.execute(query) | ||
res = cur.fetchall() | ||
res = [(key, float(value)) for key, value in res] | ||
|
||
expected_results_cos = [('111', 0.22), ('110', 0.24), ('101', 0.39)] | ||
expected_results_l2sq = [('000', 0.54), ('100', 0.78), ('010', 0.87)] | ||
if distance_metric == 'cos': | ||
assert res == expected_results_cos | ||
else: | ||
assert res == expected_results_l2sq |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
info: Removed '-j' flag from make install. This might slow down the build process.