Skip to content

Commit

Permalink
Merge pull request #343 from EvgSkv/ti2023
Browse files Browse the repository at this point in the history
Improve DuckDB support.
  • Loading branch information
EvgSkv committed Jun 22, 2024
2 parents 1da5a0d + 114d9aa commit cc889cc
Show file tree
Hide file tree
Showing 20 changed files with 201 additions and 45 deletions.
26 changes: 21 additions & 5 deletions common/concertina_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ def UnderstandIterations(self):

def __init__(self, config, engine, display_mode='colab', iterations=None):
self.config = config
self.recent_display_update_seconds = 0
self.display_update_period = 0.0000000001
self.iterations = iterations or {}
self.action_iteration = None
self.iteration_repetitions = None
Expand Down Expand Up @@ -197,7 +199,7 @@ def RunOneAction(self):
def Run(self):
while self.actions_to_run:
self.RunOneAction()
self.UpdateDisplay()
self.UpdateDisplay(final=True)

def ActionColor(self, a):
if self.action[a].get('type') == 'data':
Expand Down Expand Up @@ -258,9 +260,12 @@ def ColoredNode(node):
else:
assert False, self.display_mode
elif node in self.complete_actions and self.display_mode == 'colab-text' and self.actions_to_run:
if node not in self.engine.completion_time:
suffix = ' (input data)'
else:
suffix = ' (%d ms)' % self.engine.completion_time[node]
return (
'<span style="opacity: 0.6;">' + node +
' (%d ms)' % self.engine.completion_time[node] + '</span>'
'<span style="opacity: 0.6;">' + node + suffix + '</span>'
)
else:
if node in self.complete_actions:
Expand Down Expand Up @@ -305,7 +310,7 @@ def ProgressBar(self):
'.' * (30 - (complete_work * 30 // total_work)) +
']' + ' %.2f%% complete.' % percent_complete)
if total_work == complete_work:
progress_bar = '[' + 'Execution complete.'.center(30, ' ') + ']'
progress_bar = '[' + 'Execution complete.'.center(30, ' ') + ']' + ' ' * 30
return progress_bar

def StateAsSimpleHTML(self):
Expand Down Expand Up @@ -333,7 +338,18 @@ def Display(self):
else:
assert 'Unexpected mode:', self.display_mode

def UpdateDisplay(self):
def UpdateDisplay(self, final=False):
# This is now it's done, right?
now = (datetime.datetime.now() -
datetime.datetime(1, 12, 25)).total_seconds()
# Trying to have the state on if the process fails at early step.
self.display_update_period = min(0.5, self.display_update_period * 1.2)
if (now - self.recent_display_update_seconds <
self.display_update_period and
not final):
# Avoid frequent display updates slowing down execution.
return
self.recent_display_update_seconds = now
if self.display_mode == 'colab':
update_display(self.AsGraphViz(), display_id=self.display_id)
elif self.display_mode == 'terminal':
Expand Down
22 changes: 16 additions & 6 deletions common/logica_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,13 @@ def SetRunOnlyTests(cls, value):

@classmethod
def RunTest(cls, name, src, predicate, golden, user_flags,
import_root=None, use_concertina=False):
import_root=None, use_concertina=False,
duckify_psql=False):
if cls.RUN_ONLY and name not in cls.RUN_ONLY:
return
RunTest(name, src, predicate, golden, user_flags,
cls.GOLDEN_RUN, cls.ANNOUNCE_TESTS,
import_root, use_concertina)
import_root, use_concertina, duckify_psql)

@classmethod
def RunTypesTest(cls, name, src=None, golden=None):
Expand Down Expand Up @@ -107,19 +108,24 @@ def RunTypesTest(name, src=None, golden=None,
def RunTest(name, src, predicate, golden,
user_flags=None,
overwrite=False, announce=False,
import_root=None, use_concertina=False):
import_root=None, use_concertina=False,
duckify_psql=False):
"""Run one test."""
if announce:
print('Running test:', name)
test_result = '{warning}RUNNING{end}'
print(color.Format('% 50s %s' % (name, test_result)))

if duckify_psql:
duck_src = '/tmp/%s.l' % name
with open(duck_src, 'w') as duck_source:
duck_source.write(open(src).read().replace('"psql"', '"duckdb"'))
src = duck_src
if use_concertina:
result = run_in_terminal.Run(src, predicate, display_mode='silent')
else:
result = logica_lib.RunPredicate(src, predicate,
user_flags=user_flags,
import_root=import_root)
user_flags=user_flags,
import_root=import_root)
# Hacky way to remove query that BQ prints.
if '+---' in result[200:]:
result = result[result.index('+---'):]
Expand All @@ -135,6 +141,10 @@ def RunTest(name, src, predicate, golden,
if result == golden_result:
test_result = '{ok}PASSED{end}'
else:
# print('\n' * 3)
# print(golden_result)
# print(result)
# print('\n' * 3)
p = subprocess.Popen(['diff', '--strip-trailing-cr', '-', golden], stdin=subprocess.PIPE)
p.communicate(result.encode())
if golden_result == 'This file does not exist. (<_<)':
Expand Down
12 changes: 10 additions & 2 deletions compiler/dialect_libraries/duckdb_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,9 @@
"(array_agg({arg_1} order by {value_1}))[1:{lim}]",
{arg_1: a.arg, value_1: a.value, lim: l});
Array(arr) =
SqlExpr("ArgMin({v}, {a})", {a:, v:}) :- Arrow(a, v) == arr;
Array(a) = SqlExpr(
"ARRAY_AGG({value} order by {arg})",
{arg: a.arg, value: a.value});
RecordAsJson(r) = SqlExpr(
"ROW_TO_JSON({r})", {r:});
Expand All @@ -53,4 +54,11 @@
Num(a) = a;
Str(a) = a;
NaturalHash(x) = ToInt64(SqlExpr("hash(cast({x} as string)) // cast(2 as ubigint)", {x:}));
# This is unsafe to use because due to the way Logica compiles this number
# will be unique for each use of the variable, which can be a pain to debug.
# It is OK to use it as long as you undertand and are OK with the difficulty.
UnsafeToUseUniqueNumber() = SqlExpr("nextval('eternal_logical_sequence')", {});
"""
10 changes: 3 additions & 7 deletions compiler/dialects.py
Original file line number Diff line number Diff line change
Expand Up @@ -399,21 +399,17 @@ def BuiltInFunctions(self):
return {
'Set': 'DistinctListAgg({0})',
'Element': "array_extract({0}, {1}+1)",
'Range': ('(select [n] from (with recursive t as'
'(select 0 as n union all '
'select n + 1 as n from t where n + 1 < {0}) '
'select n from t) where n < {0})'),
'Range': 'Range({0})',
'ValueOfUnnested': '{0}.unnested_pod',
'List': '[{0}]',
'Size': 'JSON_ARRAY_LENGTH({0})',
'Join': 'JOIN_STRINGS({0}, {1})',
'Count': 'COUNT(DISTINCT {0})',
'StringAgg': 'GROUP_CONCAT(%s)',
'Sort': 'SortList({0})',
'MagicalEntangle': '(CASE WHEN {1} = 0 THEN {0} ELSE NULL END)',
'Format': 'Printf(%s)',
'Least': 'MIN(%s)',
'Greatest': 'MAX(%s)',
'Least': 'LEAST(%s)',
'Greatest': 'GREATEST(%s)',
'ToString': 'CAST(%s AS TEXT)',
'DateAddDay': "DATE({0}, {1} || ' days')",
'DateDiffDay': "CAST(JULIANDAY({0}) - JULIANDAY({1}) AS INT64)"
Expand Down
7 changes: 4 additions & 3 deletions compiler/rule_translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,11 @@ def HeadToSelect(head):
return (select, aggregated_vars)


def AllMentionedVariables(x, dive_in_combines=False):
def AllMentionedVariables(x, dive_in_combines=False, this_is_select=False):
"""Extracting all variables mentioned in an expression."""
r = []
if isinstance(x, dict) and 'variable' in x:
# In select there can be a variable named variable.
if isinstance(x, dict) and 'variable' in x and not this_is_select:
r.append(x['variable']['var_name'])
if isinstance(x, list):
for v in x:
Expand Down Expand Up @@ -249,7 +250,7 @@ def InternalVariables(self):

def AllVariables(self):
r = set()
r |= AllMentionedVariables(self.select)
r |= AllMentionedVariables(self.select, this_is_select=True)
r |= AllMentionedVariables(self.vars_unification)
r |= AllMentionedVariables(self.constraints)
r |= AllMentionedVariables(self.unnestings)
Expand Down
17 changes: 14 additions & 3 deletions compiler/universe.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ def Preamble(self):
'-- Initializing PostgreSQL environment.\n'
'set client_min_messages to warning;\n'
'create schema if not exists logica_home;\n\n')
elif self.Engine() == 'duckdb':
preamble += (
'-- Initializing DuckDB environment.\n'
'create schema if not exists logica_home;\n'
'create sequence if not exists eternal_logical_sequence;\n\n')
return preamble

def BuildFlagValues(self):
Expand Down Expand Up @@ -273,7 +278,7 @@ def OrderBy(self, predicate_name):
def Dataset(self):
default_dataset = 'logica_test'
# This change is intended for all engines in the future.
if self.Engine() == 'psql':
if self.Engine() in ['psql', 'duckdb']:
default_dataset = 'logica_home'
if self.Engine() == 'sqlite' and 'logica_home' in self.AttachedDatabases():
default_dataset = 'logica_home'
Expand All @@ -296,7 +301,7 @@ def ShouldTypecheck(self):

engine_annotation = list(self.annotations['@Engine'].values())[0]
if 'type_checking' not in engine_annotation:
if engine == 'psql':
if engine in ['psql', 'duckdb']:
return True
else:
return False
Expand Down Expand Up @@ -590,7 +595,13 @@ def CheckDistinctConsistency(self):
def UnfoldRecursion(self, rules):
annotations = Annotations(rules, {})
f = functors.Functors(rules)
return f.UnfoldRecursions(annotations.annotations.get('@Recursive', {}))
depth_map = annotations.annotations.get('@Recursive', {})
# Annotations are not ready at this point.
# if (self.execution.annotations.Engine() == 'duckdb'):
# for p in depth_map:
# # DuckDB struggles with long querries.
# depth_map[p]['iterative'] = True
return f.UnfoldRecursions(depth_map)

def BuildUdfs(self):
"""Build UDF definitions."""
Expand Down
12 changes: 12 additions & 0 deletions integration_tests/duckdb_combine_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
+-------+------+--------------+
| col0 | col1 | col2 |
+-------+------+--------------+
| test1 | 1 | [1] |
| test1 | 2 | [2] |
| test1 | 3 | [3] |
| test1 | 4 | [4] |
| test2 | 1 | [1, 2, 3, 4] |
| test2 | 2 | [1, 3, 4] |
| test2 | 3 | [1, 2, 4] |
| test2 | 4 | [1, 2, 3, 4] |
+-------+------+--------------+
18 changes: 18 additions & 0 deletions integration_tests/duckdb_flow_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
+------+------+--------------------+
| col0 | col1 | logica_value |
+------+------+--------------------+
| 0.0 | 1.0 | 2.9970000000000003 |
| 0.0 | 4.0 | 9.994999999999997 |
| 1.0 | 0.0 | 0.0 |
| 1.0 | 2.0 | 0.0 |
| 1.0 | 5.0 | 9.992999999999999 |
| 2.0 | 1.0 | 6.995999999999998 |
| 2.0 | 3.0 | 2.999 |
| 2.0 | 4.0 | 0.0 |
| 3.0 | 2.0 | 0.0 |
| 3.0 | 5.0 | 0.0 |
| 4.0 | 0.0 | 0.0 |
| 4.0 | 2.0 | 9.994999999999997 |
| 5.0 | 1.0 | 0.0 |
| 5.0 | 3.0 | 9.992999999999999 |
+------+------+--------------------+
6 changes: 6 additions & 0 deletions integration_tests/duckdb_graph_coloring_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
+------+---------------+
| col0 | col1 |
+------+---------------+
| G1 | colorable |
| G2 | not colorable |
+------+---------------+
5 changes: 5 additions & 0 deletions integration_tests/duckdb_pair_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
+------------------------------------------------------------------------------------------------------------------------------+
| logica_value |
+------------------------------------------------------------------------------------------------------------------------------+
| [{'word': 'sun', 'length': 3}, {'word': 'fire', 'length': 4}, {'word': 'wind', 'length': 4}, {'word': 'water', 'length': 5}] |
+------------------------------------------------------------------------------------------------------------------------------+
12 changes: 12 additions & 0 deletions integration_tests/duckdb_purchase_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
+-------------+---------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+
| purchase_id | items | expensive_items | buyer_id |
+-------------+---------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+
| 1 | [{'item': 'Soap', 'quantity': 3, 'price': 20}] | [{'item': 'Soap', 'more_expensive_than': ['Bread', 'Coffee', 'Firewood', 'Milk']}] | 11 |
| 2 | [{'item': 'Milk', 'quantity': 1, 'price': 10}] | [{'item': 'Milk', 'more_expensive_than': ['Bread', 'Coffee']}] | 12 |
| 3 | [{'item': 'Bread', 'quantity': 2, 'price': 5}, {'item': 'Coffee', 'quantity': 1, 'price': 7}] | [{'item': 'Coffee', 'more_expensive_than': ['Bread']}] | 13 |
| 4 | [{'item': 'Firewood', 'quantity': 5, 'price': 15}, {'item': 'Soap', 'quantity': 1, 'price': 20}] | [{'item': 'Firewood', 'more_expensive_than': ['Bread', 'Coffee', 'Milk']}, {'item': 'Soap', 'more_expensive_than': ['Bread', 'Coffee', 'Firewood', 'Milk']}] | 14 |
| 5 | [{'item': 'Bread', 'quantity': 1, 'price': 5}, {'item': 'Coffee', 'quantity': 2, 'price': 7}, {'item': 'Milk', 'quantity': 4, 'price': 10}] | [{'item': 'Coffee', 'more_expensive_than': ['Bread']}, {'item': 'Milk', 'more_expensive_than': ['Bread', 'Coffee']}] | 12 |
| 6 | [{'item': 'Firewood', 'quantity': 1, 'price': 15}, {'item': 'Soap', 'quantity': 3, 'price': 20}] | [{'item': 'Firewood', 'more_expensive_than': ['Bread', 'Coffee', 'Milk']}, {'item': 'Soap', 'more_expensive_than': ['Bread', 'Coffee', 'Firewood', 'Milk']}] | 13 |
| 7 | [{'item': 'Bread', 'quantity': 2, 'price': 5}, {'item': 'Coffee', 'quantity': 1, 'price': 7}, {'item': 'Milk', 'quantity': 1, 'price': 10}] | [{'item': 'Coffee', 'more_expensive_than': ['Bread']}, {'item': 'Milk', 'more_expensive_than': ['Bread', 'Coffee']}] | 14 |
| 8 | [{'item': 'Firewood', 'quantity': 5, 'price': 15}, {'item': 'Soap', 'quantity': 1, 'price': 20}] | [{'item': 'Firewood', 'more_expensive_than': ['Bread', 'Coffee', 'Milk']}, {'item': 'Soap', 'more_expensive_than': ['Bread', 'Coffee', 'Firewood', 'Milk']}] | 11 |
+-------------+---------------------------------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------------------------+----------+
15 changes: 15 additions & 0 deletions integration_tests/duckdb_recursion_test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
+--------+-----------+--------------------------------------------------------------------------------------------+
| vertex | component | distances |
+--------+-----------+--------------------------------------------------------------------------------------------+
| 1 | 1 | [{'y': 5, 'd': 4}, {'y': 4, 'd': 3}, {'y': 3, 'd': 2}, {'y': 2, 'd': 1}, {'y': 1, 'd': 0}] |
| 2 | 1 | [{'y': 5, 'd': 3}, {'y': 4, 'd': 2}, {'y': 3, 'd': 1}, {'y': 2, 'd': 0}, {'y': 1, 'd': 1}] |
| 3 | 1 | [{'y': 5, 'd': 2}, {'y': 4, 'd': 1}, {'y': 3, 'd': 0}, {'y': 2, 'd': 1}, {'y': 1, 'd': 2}] |
| 4 | 1 | [{'y': 5, 'd': 1}, {'y': 4, 'd': 0}, {'y': 3, 'd': 1}, {'y': 2, 'd': 2}, {'y': 1, 'd': 3}] |
| 5 | 1 | [{'y': 5, 'd': 0}, {'y': 4, 'd': 1}, {'y': 3, 'd': 2}, {'y': 2, 'd': 3}, {'y': 1, 'd': 4}] |
| 6 | 6 | [{'y': 8, 'd': 2}, {'y': 7, 'd': 1}, {'y': 6, 'd': 0}] |
| 7 | 6 | [{'y': 8, 'd': 1}, {'y': 7, 'd': 0}, {'y': 6, 'd': 1}] |
| 8 | 6 | [{'y': 8, 'd': 0}, {'y': 7, 'd': 1}, {'y': 6, 'd': 2}] |
| 9 | 9 | [{'y': 11, 'd': 1}, {'y': 10, 'd': 1}, {'y': 9, 'd': 0}] |
| 10 | 9 | [{'y': 11, 'd': 1}, {'y': 10, 'd': 0}, {'y': 9, 'd': 1}] |
| 11 | 9 | [{'y': 11, 'd': 0}, {'y': 10, 'd': 1}, {'y': 9, 'd': 1}] |
+--------+-----------+--------------------------------------------------------------------------------------------+
5 changes: 3 additions & 2 deletions integration_tests/psql_combine_test.l
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ T(4);
@With(R);
R(x, l) :- T(x), l == (if x == 2 || x == 3 then [x] else []);

P1(x, y) :- T(x), y List= x;
P2(x, col1? List= y) distinct :- T(x), y in [1,2,3,4], R(x, l), ~(y in l);
P1(x, y) :- T(x), y Array= x -> x;
P2(x, col1? Array= y -> y) distinct :- T(x), y in [1,2,3,4], R(x, l), ~(y in l);

@OrderBy(Test, "col0", "col1");
Test("test1", x, y) :- P1(x, y);
Test("test2", x, y) :- P2(x, y);
3 changes: 3 additions & 0 deletions integration_tests/psql_graph_coloring_test.l
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
-+-(left:x, right:y) = ToString(x) ++ ":" ++ ToString(y);
G("a" -+- i, "a" -+- (i + 1)) :- i in Range(6);

@Recursive(L, 8, iterative: true);
L(1, 1, 2, 1) distinct;
L(x, y, x * 2, y) distinct :- L(a, b, x, y);
L(x, y, x, y * 3) distinct :- L(a, b, x, y);

G("b" -+- x -+- y, "b" -+- x1 -+- y1) :- L(x, y, x1, y1), x < 6, y < 6;

E(a, b) :- G(a, b) | G(b, a);
Expand All @@ -34,6 +36,7 @@ E(a, b) :- G(a, b) | G(b, a);
# Finding connected components.
#

@Recursive(ComponentOf, 25);
ComponentOf(x) Min= x :- E(x);
ComponentOf(x) Min= ComponentOf(y) :- E(x, y);

Expand Down
13 changes: 7 additions & 6 deletions integration_tests/psql_purchase_test.l
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Items(item: "Bread", price: 5);
Items(item: "Coffee", price: 7);
Items(item: "Firewood", price: 15);

MoreExpensiveThan(item1) List= item2 :-
MoreExpensiveThan(item1) Array= item2 -> item2 :-
Items(item: item1, price: price1),
Items(item: item2, price: price2),
price1 > price2;
Expand Down Expand Up @@ -52,16 +52,17 @@ Buyer(buyer_id: 13, purchase_id: 6);
Buyer(buyer_id: 14, purchase_id: 7);
Buyer(buyer_id: 11, purchase_id: 8);

@OrderBy(Purchase, "purchase_id");
@OrderBy(Purchase, "purchase_id", "items", "expensive_items", "buyer_id");
Purchase(purchase_id:, items:, expensive_items:, buyer_id:) :-
Buyer(buyer_id:, purchase_id:),
items List= (
{item:, quantity:, price:} :-
items Array= (
item -> {item:, quantity:, price:} :-
BuyEvent(purchase_id:, item:, quantity:),
Items(item:, price:)
),
expensive_items List= (
{item:, more_expensive_than: MoreExpensiveThan(item)} :-
expensive_items Array= (
{item:, more_expensive_than:} -> {item:, more_expensive_than:} :-
more_expensive_than = MoreExpensiveThan(item),
item_record in items,
item = item_record.item
);
Expand Down
Loading

0 comments on commit cc889cc

Please sign in to comment.