Skip to content

Commit

Permalink
WIP convert SQL select into data frame expression to support ordering…
Browse files Browse the repository at this point in the history
…, limit, offset
  • Loading branch information
agentm committed Aug 9, 2023
1 parent e0cd333 commit 8913f08
Show file tree
Hide file tree
Showing 6 changed files with 122 additions and 49 deletions.
8 changes: 4 additions & 4 deletions docs/dataframes.markdown
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ The default sort order is `ascending`.
## Examples

```
TutorialD (master/main): :showdataframe s orderby {status}
TutorialD (master/main): :showdataframe (s) orderby {status}
┌──┬───────────┬─────────┬────────────┬────────────────┐
│DF│city::Text↕│s#::Text↕│sname::Text↕│status::Integer⬆│
├──┼───────────┼─────────┼────────────┼────────────────┤
Expand All @@ -27,21 +27,21 @@ TutorialD (master/main): :showdataframe s orderby {status}
│4 │"Athens" │"S5" │"Adams" │30 │
│5 │"Paris" │"S3" │"Blake" │30 │
└──┴───────────┴─────────┴────────────┴────────────────┘
TutorialD (master/main): :showdataframe s{status} orderby {status}
TutorialD (master/main): :showdataframe (s{status}) orderby {status}
┌──┬────────────────┐
│DF│status::Integer⬆│
├──┼────────────────┤
│1 │10 │
│2 │20 │
│3 │30 │
└──┴────────────────┘
TutorialD (master/main): :showdataframe s{status} orderby {status descending} limit 1
TutorialD (master/main): :showdataframe (s{status}) orderby {status descending} limit 1
┌──┬────────────────┐
│DF│status::Integer⬇│
├──┼────────────────┤
│1 │30 │
└──┴────────────────┘
TutorialD (master/main): :showdataframe s{status} orderby {status descending} offset 1 limit 3
TutorialD (master/main): :showdataframe (s{status}) orderby {status descending} offset 1 limit 3
┌──┬────────────────┐
│DF│status::Integer⬇│
├──┼────────────────┤
Expand Down
4 changes: 3 additions & 1 deletion project-m36.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,9 @@ Test-Suite test-sql
import: commontest
type: exitcode-stdio-1.0
main-is: SQL/InterpreterTest.hs
Other-Modules: SQL.Interpreter.Select, SQL.Interpreter.Base, TutorialD.Interpreter.Base, TutorialD.Interpreter.RelationalExpr, TutorialD.Interpreter.Types, SQL.Interpreter.Convert
Other-Modules: SQL.Interpreter.Select, SQL.Interpreter.Base, TutorialD.Interpreter.Base, TutorialD.Interpreter.RelationalExpr, TutorialD.Interpreter.Types, SQL.Interpreter.Convert, TutorialD.Interpreter.DatabaseContextExpr, TutorialD.Interpreter.RODatabaseContextOperator
TutorialD.Printer

Build-Depends: base, HUnit, Cabal, containers, hashable, unordered-containers, mtl, vector, time, bytestring, uuid, stm, deepseq, deepseq-generics, parallel, cassava, attoparsec, gnuplot, directory, temporary, haskeline, megaparsec, text, base64-bytestring, data-interval, filepath, stm-containers, list-t, project-m36, random, MonadRandom, semigroups, parser-combinators, prettyprinter, scientific, recursion-schemes

Test-Suite test-tutoriald
Expand Down
61 changes: 52 additions & 9 deletions src/bin/SQL/Interpreter/Convert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
module SQL.Interpreter.Convert where
import ProjectM36.Base
import ProjectM36.Error
import ProjectM36.DataFrame (DataFrameExpr(..), AttributeOrderExpr(..), AttributeOrder(..),Order(..))
import ProjectM36.AttributeNames as A
import ProjectM36.Attribute as A
import qualified ProjectM36.WithNameExpr as W
Expand Down Expand Up @@ -32,17 +33,22 @@ class SQLConvert sqlexpr where
convert :: TypeForRelExprF -> sqlexpr -> Either SQLError (ConverterF sqlexpr)

instance SQLConvert Select where
type ConverterF Select = RelationalExpr
type ConverterF Select = DataFrameExpr
convert typeF sel = do
projF <- convert typeF (projectionClause sel)
let baseDFExpr = DataFrameExpr { convertExpr = ExistingRelation relationTrue,
orderExprs = [],
offset = Nothing,
limit = Nothing }
case tableExpr sel of
Nothing -> pure $ ExistingRelation relationTrue
Nothing -> pure baseDFExpr
Just tExpr -> do
(rvExpr, withNames) <- convert typeF tExpr
(dfExpr, withNames) <- convert typeF tExpr
let withF = case withNames of
[] -> id
_ -> With withNames
pure (withF (projF rvExpr))
pure (dfExpr { convertExpr = withF (projF (convertExpr dfExpr)) })


tableAliasesAsWithNameAssocs :: TableAliasMap -> Either SQLError WithNamesAssocs
tableAliasesAsWithNameAssocs tmap =
Expand Down Expand Up @@ -98,7 +104,6 @@ instance SQLConvert [SelectItem] where
pure $ acc { taskExtenders = AttributeExtendTupleExpr newAttrName atomExpr : taskExtenders acc,
taskProjections = S.insert (QualifiedProjectionName [ProjectionName newAttrName]) (taskProjections acc)
}
traceShowM ("selItems", selItems)
task <- foldM selItemFolder emptyTask (zip [1::Int ..] selItems)
--apply projections
fProjection <- if S.null (taskProjections task) then
Expand All @@ -123,8 +128,8 @@ instance SQLConvert [SelectItem] where
pure (fProjection . fExtended . fRenames)

instance SQLConvert TableExpr where
type ConverterF TableExpr = (RelationalExpr, WithNamesAssocs)
--does not handle non-relational aspects such as offset, order by, or limit
--pass with exprs up because they must be applied after applying projections
type ConverterF TableExpr = (DataFrameExpr, WithNamesAssocs)
convert typeF tExpr = do
(fromExpr, tableAliasMap) <- convert typeF (fromClause tExpr)
let tableAliasMap' = M.filterWithKey filterRedundantAlias tableAliasMap
Expand All @@ -139,11 +144,33 @@ instance SQLConvert TableExpr where
restrictPredExpr <- convert typeF whereExpr
pure $ Restrict restrictPredExpr fromExpr
Nothing -> pure fromExpr
pure (expr', withExprs)
orderExprs <- convert typeF (orderByClause tExpr)
let dfExpr = DataFrameExpr { convertExpr = expr',
orderExprs = orderExprs,
offset = offsetClause tExpr,
limit = limitClause tExpr }
pure (dfExpr, withExprs)
--group by
--having


instance SQLConvert [SortExpr] where
type ConverterF [SortExpr] = [AttributeOrderExpr]
convert typeF exprs = mapM converter exprs
where
converter (SortExpr sexpr mDirection mNullsOrder) = do
atomExpr <- convert typeF sexpr
attrn <- case atomExpr of
AttributeAtomExpr aname -> pure aname
x -> Left (NotSupportedError (T.pack (show x)))
let ordering = case mDirection of
Nothing -> AscendingOrder
Just Ascending -> AscendingOrder
Just Descending -> DescendingOrder
case mNullsOrder of
Nothing -> pure ()
Just x -> Left (NotSupportedError (T.pack (show x)))
pure (AttributeOrderExpr attrn ordering)

instance SQLConvert [TableRef] where
-- returns base relation expressions plus top-level renames required
type ConverterF [TableRef] = (RelationalExpr, TableAliasMap)
Expand Down Expand Up @@ -302,6 +329,20 @@ instance SQLConvert RestrictionExpr where
b <- convert typeF exprB
f <- lookupFunc qn
pure (AtomExprPredicate (f [a,b]))
InExpr inOrNotIn sexpr (InList matches') -> do
eqExpr <- convert typeF sexpr
let (match:matches) = reverse matches'
firstItem <- convert typeF match
let inFunc a b = AtomExprPredicate (FunctionAtomExpr "eq" [a,b] ())
predExpr' = inFunc eqExpr firstItem
folder predExpr'' sexprItem = do
item <- convert typeF sexprItem
pure $ OrPredicate (inFunc eqExpr item) predExpr''
res <- foldM folder predExpr' matches --be careful here once we introduce NULLs
case inOrNotIn of
In -> pure res
NotIn -> pure (NotPredicate res)


-- this could be amended to support more complex expressions such as coalesce by returning an [AtomExpr] -> AtomExpr function
lookupFunc :: QualifiedName -> Either SQLError ([AtomExpr] -> AtomExpr)
Expand Down Expand Up @@ -340,6 +381,8 @@ instance SQLConvert ScalarExpr where
b <- convert typeF exprB
f <- lookupFunc qn
pure $ f [a,b]
-- PrefixOperator qn expr -> do


instance SQLConvert JoinOnCondition where
type ConverterF JoinOnCondition = (RelationalExpr -> RelationalExpr)
Expand Down
17 changes: 10 additions & 7 deletions src/bin/TutorialD/Interpreter/RODatabaseContextOperator.hs
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,16 @@ interpretRODatabaseContextOp sessionId conn tutdstring = case parse roDatabaseCo
showDataFrameP :: Parser RODatabaseContextOperator
showDataFrameP = do
colonOp ":showdataframe"
relExpr <- relExprP
reservedOp "orderby"
attrOrdersExpr <- attrOrdersExprP
mbOffset <- optional offsetP
mbLimit <- optional limitP
pure $ ShowDataFrame (DF.DataFrameExpr relExpr attrOrdersExpr mbOffset mbLimit)
dfExpr <- dataFrameP
pure (ShowDataFrame dfExpr)

dataFrameP :: Parser DF.DataFrameExpr
dataFrameP = do
relExpr <- parens relExprP
attrOrdersExpr <- try attrOrdersExprP <|> pure []
mbLimit <- optional limitP
mbOffset <- optional offsetP
pure $ DF.DataFrameExpr relExpr attrOrdersExpr mbOffset mbLimit

offsetP :: Parser Integer
offsetP = do
Expand All @@ -221,7 +224,7 @@ limitP = do
natural

attrOrdersExprP :: Parser [DF.AttributeOrderExpr]
attrOrdersExprP = braces (sepBy attrOrderExprP comma)
attrOrdersExprP = reserved "orderby" *> braces (sepBy attrOrderExprP comma)

attrOrderExprP :: Parser DF.AttributeOrderExpr
attrOrderExprP = DF.AttributeOrderExpr <$> identifier <*> orderP
Expand Down
13 changes: 10 additions & 3 deletions src/lib/ProjectM36/DataFrame.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ import Data.Monoid
#endif

data AttributeOrderExpr = AttributeOrderExpr AttributeName Order
deriving (Show, Generic)
deriving (Show, Generic, Eq)

data AttributeOrder = AttributeOrder AttributeName Order
deriving (Show, Generic)
deriving (Show, Generic, Eq)

data Order = AscendingOrder | DescendingOrder
deriving (Eq, Show, Generic)
Expand Down Expand Up @@ -127,7 +127,14 @@ data DataFrameExpr = DataFrameExpr {
offset :: Maybe Integer,
limit :: Maybe Integer
}
deriving (Show, Generic)
deriving (Show, Generic, Eq)

-- | Returns a data frame expression without any sorting or limits.
nakedDataFrameExpr :: RelationalExpr -> DataFrameExpr
nakedDataFrameExpr rexpr = DataFrameExpr { convertExpr = rexpr,
orderExprs = [],
offset = Nothing,
limit = Nothing }

dataFrameAsHTML :: DataFrame -> T.Text
-- web browsers don't display tables with empty cells or empty headers, so we have to insert some placeholders- it's not technically the same, but looks as expected in the browser
Expand Down
68 changes: 43 additions & 25 deletions test/SQL/InterpreterTest.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
{-# LANGUAGE OverloadedStrings #-}
import SQL.Interpreter.Select
import SQL.Interpreter.Convert
import TutorialD.Interpreter.RelationalExpr
--import TutorialD.Interpreter.RelationalExpr
import TutorialD.Interpreter.RODatabaseContextOperator
import ProjectM36.RelationalExpression
import ProjectM36.TransactionGraph
import ProjectM36.DateExamples
Expand All @@ -26,53 +27,67 @@ testSelect = TestCase $ do
let p tin = parse selectP "test" tin
readTests = [
-- simple relvar
("SELECT * FROM test", "test"),
("SELECT * FROM test", "(test)"),
-- simple projection
("SELECT a FROM test", "test{a}"),
("SELECT a FROM test", "(test{a})"),
-- restriction
("SELECT a FROM test where b=3","(test where b=3){a}"),
("SELECT a FROM test where b=3","((test where b=3){a})"),
-- restriction
("SELECT a,b FROM test where b>3","(test where gt(@b,3)){a,b}"),
("SELECT a,b FROM test where b>3","((test where gt(@b,3)){a,b})"),
-- extension mixed with projection
("SELECT a,b,10 FROM test","(test:{attr_3:=10}){a,b,attr_3}"),
("SELECT a,b,10 FROM test","((test:{attr_3:=10}){a,b,attr_3})"),
-- column alias
("SELECT a AS x FROM test","(test rename {a as x}){x}"),
("SELECT a AS x FROM test","((test rename {a as x}){x})"),
-- case insensitivity
("sElECt A aS X FRoM TeST","(test rename {a as x}){x}"),
("sElECt A aS X FRoM TeST","((test rename {a as x}){x})"),
--column from aliased table
("SELECT sup.city FROM s AS sup","with (sup as s) ((sup rename {city as `sup.city`}){`sup.city`})"),
("SELECT sup.city FROM s AS sup","(with (sup as s) ((sup rename {city as `sup.city`}){`sup.city`}))"),
--projection with alias
("SELECT sup.city,sup.sname FROM s AS sup","with (sup as s) ((sup rename {city as `sup.city`,sname as `sup.sname`}){`sup.city`,`sup.sname`})"),
("SELECT sup.* FROM s as sup","with (sup as s) (sup{all from sup})"),
("SELECT sup.city,sup.sname FROM s AS sup","(with (sup as s) ((sup rename {city as `sup.city`,sname as `sup.sname`}){`sup.city`,`sup.sname`}))"),
("SELECT sup.* FROM s as sup","(with (sup as s) (sup{all from sup}))"),
-- natural join
("SELECT * FROM s NATURAL JOIN sp","s join sp"),
("SELECT * FROM s NATURAL JOIN sp","(s join sp)"),
-- cross join
("SELECT * FROM s CROSS JOIN sp", "(s rename {s# as `s.s#`}) join sp"),
("SELECT * FROM s CROSS JOIN sp", "((s rename {s# as `s.s#`}) join sp)"),
-- unaliased join using
("SELECT * FROM sp INNER JOIN sp USING (\"s#\")",
"(sp rename {p# as `sp.p#`, qty as `sp.qty`}) join sp"),
"((sp rename {p# as `sp.p#`, qty as `sp.qty`}) join sp)"),
-- unaliased join
("SELECT * FROM sp JOIN s ON s.s# = sp.s#","((((s rename {s# as `s.s#`,sname as `s.sname`,city as `s.city`,status as `s.status`}) join (sp rename {s# as `sp.s#`,p# as `sp.p#`,qty as `sp.qty`})):{join_1:=eq(@`s.s#`,@`sp.s#`)}) where join_1=True) {all but join_1}"),
("SELECT * FROM sp JOIN s ON s.s# = sp.s#","(((((s rename {s# as `s.s#`,sname as `s.sname`,city as `s.city`,status as `s.status`}) join (sp rename {s# as `sp.s#`,p# as `sp.p#`,qty as `sp.qty`})):{join_1:=eq(@`s.s#`,@`sp.s#`)}) where join_1=True) {all but join_1})"),
-- aliased join on
("SELECT * FROM sp AS sp2 JOIN s AS s2 ON s2.s# = sp2.s#",
"with (s2 as s, sp2 as sp) ((((s2 rename {s# as `s2.s#`,sname as `s2.sname`,city as `s2.city`,status as `s2.status`}) join (sp2 rename {s# as `sp2.s#`,p# as `sp2.p#`,qty as `sp2.qty`})):{join_1:=eq(@`s2.s#`,@`sp2.s#`)}) where join_1=True) {all but join_1}"),
"(with (s2 as s, sp2 as sp) ((((s2 rename {s# as `s2.s#`,sname as `s2.sname`,city as `s2.city`,status as `s2.status`}) join (sp2 rename {s# as `sp2.s#`,p# as `sp2.p#`,qty as `sp2.qty`})):{join_1:=eq(@`s2.s#`,@`sp2.s#`)}) where join_1=True) {all but join_1})"),
-- formula extension
("SELECT status+10 FROM s", "(s : {attr_1:=add(@status,10)}) { attr_1 }"),
("SELECT status+10 FROM s", "((s : {attr_1:=add(@status,10)}) { attr_1 })"),
-- extension and formula
("SELECT status+10,city FROM s", "(s : {attr_1:=add(@status,10)}) {city,attr_1}"),
("SELECT status+10,city FROM s", "((s : {attr_1:=add(@status,10)}) {city,attr_1})"),
-- complex join condition
("SELECT * FROM sp JOIN s ON s.s# = sp.s# AND s.s# = sp.s#","((((s rename {s# as `s.s#`,sname as `s.sname`,city as `s.city`,status as `s.status`}) join (sp rename {s# as `sp.s#`,p# as `sp.p#`,qty as `sp.qty`})):{join_1:=and(eq(@`s.s#`,@`sp.s#`),eq(@`s.s#`,@`sp.s#`))}) where join_1=True) {all but join_1}"),
("SELECT * FROM sp JOIN s ON s.s# = sp.s# AND s.s# = sp.s#",
"(((((s rename {s# as `s.s#`,sname as `s.sname`,city as `s.city`,status as `s.status`}) join (sp rename {s# as `sp.s#`,p# as `sp.p#`,qty as `sp.qty`})):{join_1:=and(eq(@`s.s#`,@`sp.s#`),eq(@`s.s#`,@`sp.s#`))}) where join_1=True) {all but join_1})"),
-- TABLE <tablename>
("TABLE s", "s")
("TABLE s", "(s)"),
-- any, all, some
-- IN()
("SELECT * FROM s WHERE s# IN ('S1','S2')", "(s where eq(@s#,\"S1\") or eq(@s#,\"S2\"))"),
-- NOT IN()
("SELECT * FROM s WHERE s# NOT IN ('S1','S2')",
"(s where not (eq(@s#,\"S1\") or eq(@s#,\"S2\")))"),
-- where exists
--("SELECT * FROM s WHERE EXISTS (SELECT * FROM sp WHERE s.s#=sp.s#)","s"),
-- where not exists
-- group by
-- group by having
-- limit
("SELECT * FROM s LIMIT 10","(s) limit 10"),
-- offset
("SELECT * FROM s OFFSET 10","(s) offset 10"),
-- limit offset
("SELECT * FROM s LIMIT 10 OFFSET 20","(s) limit 10 offset 20"),
-- order by
("SELECT * FROM s ORDER BY status","(s) orderby {status}"),
-- order by descending
("SELECT * FROM s ORDER BY status DESC,city","(s) orderby {status descending,city}")
-- CTEs
]
gfEnv = GraphRefRelationalExprEnv {
gre_context = Just dateExamples,
Expand All @@ -82,20 +97,23 @@ testSelect = TestCase $ do
let gfExpr = runProcessExprM (TransactionMarker transId) (processRelationalExpr expr)
runGraphRefRelationalExprM gfEnv (typeForGraphRefRelationalExpr gfExpr)
check (sql, tutd) = do
print sql
--print sql
--parse SQL
select <- case parse (queryExprP <* eof) "test" sql of
Left err -> error (errorBundlePretty err)
Right x -> do
--print x
print x
pure x
--parse tutd
relExpr <- case parse (relExprP <* eof) "test" tutd of
relExpr <- case parse (dataFrameP <* eof) "test" tutd of
Left err -> error (errorBundlePretty err)
Right x -> pure x
Right x -> do
--print x
pure x
selectAsRelExpr <- case convert typeF select of
Left err -> error (show err)
Right x -> pure x
Right x -> do
pure x

--print ("selectAsRelExpr"::String, selectAsRelExpr)
assertEqual (T.unpack sql) relExpr selectAsRelExpr
Expand Down

0 comments on commit 8913f08

Please sign in to comment.