Skip to content

Commit

Permalink
WIP join checkpoint
Browse files Browse the repository at this point in the history
tests pass
enable bulk renaming in Rename expression to make expression equality easier to find
  • Loading branch information
agentm committed Aug 6, 2023
1 parent 426f82f commit 6cd3b92
Show file tree
Hide file tree
Showing 18 changed files with 148 additions and 82 deletions.
1 change: 0 additions & 1 deletion project-m36.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,6 @@ Executable tutd
TutorialD.Interpreter.Types,
TutorialD.Interpreter.TransGraphRelationalOperator,
TutorialD.Interpreter.SchemaOperator,
TutorialD.Printer,
SQL.Interpreter.Base,
SQL.Interpreter.Select,
SQL.Interpreter.Convert
Expand Down
141 changes: 96 additions & 45 deletions src/bin/SQL/Interpreter/Convert.hs
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,16 @@ import Data.Text as T (pack,intercalate,Text,concat)
import ProjectM36.Relation
import Control.Monad (foldM)
import qualified Data.Set as S
import qualified Data.Map as M
import Data.List (foldl')
import qualified Data.Functor.Foldable as Fold

import Debug.Trace

data SQLError = NotSupportedError T.Text |
TypeMismatch AtomType AtomType |
NoSuchFunction QualifiedName |
TypeMismatchError AtomType AtomType |
NoSuchSQLFunctionError QualifiedName |
DuplicateTableReferenceError QualifiedName |
SQLRelationalError RelationalError
deriving (Show, Eq)

Expand Down Expand Up @@ -99,16 +101,24 @@ instance SQLConvert [SelectItem] where
-- apply extensions
let fExtended = foldl' (\acc ext -> (Extend ext) . acc) id (taskExtenders task)
-- apply rename
fRenames <- foldM (\acc (qProjName, (AliasName newName)) -> do
renamesSet <- foldM (\acc (qProjName, (AliasName newName)) -> do
oldName <- convert typeF qProjName
pure $ Rename oldName newName . acc) id (taskRenames task)
pure $ S.insert (oldName, newName) acc) S.empty (taskRenames task)
let fRenames = if S.null renamesSet then id else Rename renamesSet
pure (fExtended . fProjection . fRenames)

instance SQLConvert TableExpr where
type ConverterF TableExpr = (RelationalExpr, WithNamesAssocs)
--does not handle non-relational aspects such as offset, order by, or limit
convert typeF tExpr = do
(fromExpr, withExprs) <- convert typeF (fromClause tExpr)
(fromExpr, tableAliasMap) <- convert typeF (fromClause tExpr)
let tableAliasMap' = M.filterWithKey filterRedundantAlias tableAliasMap
filterRedundantAlias (QualifiedName [nam]) (RelationVariable nam' ())
| nam == nam' = False
filterRedundantAlias _ _ = True
withExprs <- mapM (\(qnam, expr) -> do
nam <- convert typeF qnam
pure (WithNameExpr nam (), expr)) (M.toList tableAliasMap')
expr' <- case whereClause tExpr of
Just whereExpr -> do
restrictPredExpr <- convert typeF whereExpr
Expand All @@ -121,29 +131,35 @@ instance SQLConvert TableExpr where

instance SQLConvert [TableRef] where
-- returns base relation expressions plus top-level renames required
type ConverterF [TableRef] = (RelationalExpr, WithNamesAssocs)
convert _ [] = pure (ExistingRelation relationFalse, [])
type ConverterF [TableRef] = (RelationalExpr, TableAliasMap)
convert _ [] = pure (ExistingRelation relationFalse, M.empty)
convert typeF (firstRef:trefs) = do
--the first table ref must be a straight RelationVariable
(firstRel, withRenames) <- convert typeF firstRef
(expr', withRenames') <- foldM joinTRef (firstRel, withRenames) (zip [1..] trefs)
pure (expr', withRenames')
(firstRel, tableAliases) <- convert typeF firstRef
(expr', tableAliases') <- foldM joinTRef (firstRel, tableAliases) (zip [1..] trefs)
pure (expr', tableAliases')
where
--TODO: if any of the previous relations have overlap in their attribute names, we must change it to prevent a natural join!
joinTRef (rvA,withRenames) (c,tref) = do
let renamerFolder x expr old_name =
let new_name = T.concat [old_name, "_", x, T.pack (show c)]
in
pure $ Rename old_name new_name expr
joinTRef (rvA,tAliases) (c,tref) = do
let attrRenamer x expr attrs = do
renamed <- mapM (renameOneAttr x expr) attrs
pure (Rename (S.fromList renamed) expr)
renameOneAttr x expr old_name = pure (old_name, new_name)
where
new_name = T.concat [prefix, ".", old_name]
prefix = case expr of
RelationVariable rvName () -> rvName
_ -> x -- probably need to return errors for some expressions

case tref of
NaturalJoinTableRef jtref -> do
-- then natural join is the only type of join which the relational algebra supports natively
(rvB, withRenames') <- convert typeF jtref
pure $ (Join rvA rvB, withRenames <> withRenames')
(rvB, tAliases') <- convert typeF jtref
pure $ (Join rvA rvB, M.union tAliases tAliases)
CrossJoinTableRef jtref -> do
--rename all columns to prefix them with a generated alias to prevent any natural join occurring, then perform normal join
-- we need the type to get all the attribute names for both relexprs
(rvB, withRenames') <- convert typeF jtref
(rvB, tAliases) <- convert typeF jtref
case typeF rvA of
Left err -> Left (SQLRelationalError err)
Right typeA ->
Expand All @@ -154,46 +170,66 @@ instance SQLConvert [TableRef] where
attrsB = A.attributeNameSet (attributes typeB)
attrsIntersection = S.intersection attrsA attrsB
--find intersection of attributes and rename all of them with prefix 'expr'+c+'.'
traceShowM ("cross gonk", attrsIntersection)
exprA <- foldM (renamerFolder "a") rvA (S.toList attrsIntersection)
pure (Join exprA rvB, withRenames')
exprA <- attrRenamer "a" rvA (S.toList attrsIntersection)
pure (Join exprA rvB, tAliases)
InnerJoinTableRef jtref (JoinUsing qnames) -> do
(rvB, withRenames') <- convert typeF jtref
(rvB, tAliases) <- convert typeF jtref
jCondAttrs <- S.fromList <$> mapM (convert typeF) qnames
(attrsIntersection, attrsA, attrsB) <- commonAttributeNames typeF rvA rvB
--rename attributes which are not part of the join condition
--rename attributes used in the join condition
let attrsToRename = S.difference attrsIntersection jCondAttrs
traceShowM ("inner", attrsToRename, attrsIntersection, jCondAttrs)
exprA <- foldM (renamerFolder "a") rvA (S.toList attrsToRename)
pure (Join exprA rvB, withRenames')
InnerJoinTableRef jtref (JoinOn sexpr) -> do
-- traceShowM ("inner", attrsToRename, attrsIntersection, jCondAttrs)
exprA <- attrRenamer "a" rvA (S.toList attrsToRename)
pure (Join exprA rvB, tAliases)

InnerJoinTableRef jtref (JoinOn (JoinOnCondition joinExpr)) -> do
--create a cross join but extend with the boolean sexpr
--extend the table with the join conditions, then join on those
--exception: for simple attribute equality, use regular join renames using JoinOn logic
(rvB, withRenames') <- convert typeF jtref
(rvB, tAliases) <- convert typeF jtref

--rvA and rvB now reference potentially aliased relation variables (needs with clause to execute), but this is useful for making attributes rv-prefixed
-- traceShowM ("converted", rvA, rvB, tAliases)
--extract all table aliases to create a remapping for SQL names discovered in the sexpr
(commonAttrs, attrsA, attrsB) <- commonAttributeNames typeF rvA rvB
let sexpr' = renameIdentifier renamer sexpr
(commonAttrs, attrsA, attrsB) <- commonAttributeNames typeF rvA rvB
-- first, execute the rename, renaming all attributes according to their table aliases
let rvPrefix rvExpr =
case rvExpr of
RelationVariable nam () -> pure nam
x -> Left $ NotSupportedError ("cannot derived name for relational expression " <> T.pack (show x))
rvPrefixA <- rvPrefix rvA
rvPrefixB <- rvPrefix rvB
exprA <- attrRenamer rvPrefixA rvA (S.toList attrsA)
exprB <- attrRenamer rvPrefixB rvB (S.toList attrsB)
-- for the join condition, we can potentially extend to include all the join criteria columns, then project them away after constructing the join condition
let joinExpr' = renameIdentifier renamer joinExpr
renamer n@(QualifiedName [tableAlias,attr]) = --lookup prefixed with table alias
case W.lookup tableAlias withRenames' of
Nothing -> QualifiedName [attr]-- the table was not renamed, but the attribute may have been renamed- how do we know at this point when the sexpr' converter hasn't run yet?!
case M.lookup n tAliases of
-- the table was not renamed, but the attribute may have been renamed
-- find the source of the attribute
Nothing -> n
Just found -> error (show (tableAlias, found))
renamer n@(QualifiedName [attr]) = error (show n)
joinRe <- convert typeF sexpr'
-- traceShowM ("joinExpr'", joinExpr')
joinRe <- convert typeF joinExpr'

--let joinCommonAttrRenamer (RelationVariable rvName ()) old_name =
--rename all common attrs and use the new names in the join condition
exprA <- foldM (renamerFolder "a") rvA (S.toList commonAttrs)
exprB <- foldM (renamerFolder "b") rvB (S.toList commonAttrs)
let allAttrs = S.union attrsA attrsB
firstAvailableName c allAttrs' =
let new_name = T.pack ("join_" <> show c) in
if S.member new_name allAttrs' then
firstAvailableName (c + 1) allAttrs'
else
new_name
extender = AttributeExtendTupleExpr (firstAvailableName 1 allAttrs) joinRe
pure (Join (Extend extender exprA) exprB, withRenames')
joinName = firstAvailableName 1 allAttrs
extender = AttributeExtendTupleExpr joinName joinRe
joinMatchRestriction = Restrict (AttributeEqualityPredicate joinName (ConstructedAtomExpr "True" [] ()))
projectAwayJoinMatch = Project (InvertedAttributeNames (S.fromList [joinName]))
pure (projectAwayJoinMatch (joinMatchRestriction (Extend extender (Join exprB exprA))), tAliases)


--type AttributeNameRemap = M.Map RelVarName AttributeName

-- | Used in join condition detection necessary for renames to enable natural joins.
commonAttributeNames :: TypeForRelExprF -> RelationalExpr -> RelationalExpr -> Either SQLError (S.Set AttributeName, S.Set AttributeName, S.Set AttributeName)
Expand All @@ -208,25 +244,34 @@ commonAttributeNames typeF rvA rvB =
attrsB = A.attributeNameSet (attributes typeB)
pure $ (S.intersection attrsA attrsB, attrsA, attrsB)



--over the course of conversion, we collect all the table aliases we encounter, including non-aliased table references
type TableAliasMap = M.Map QualifiedName RelationalExpr

insertTableAlias :: QualifiedName -> RelationalExpr -> TableAliasMap -> Either SQLError TableAliasMap
insertTableAlias qn expr map' =
case M.lookup qn map' of
Nothing -> pure $ M.insert qn expr map'
Just _ -> Left (DuplicateTableReferenceError qn)

-- convert a TableRef in isolation- to be used with the first TableRef only
instance SQLConvert TableRef where
-- return base relation variable expression plus a function to apply top-level rv renames using WithNameExpr
type ConverterF TableRef = (RelationalExpr, WithNamesAssocs)
type ConverterF TableRef = (RelationalExpr, TableAliasMap)
--SELECT x FROM a,_b_ creates a cross join
convert _ (SimpleTableRef (QualifiedName [nam])) =
pure (RelationVariable nam (), [])
convert _ (SimpleTableRef qn@(QualifiedName [nam])) = do
let rv = RelationVariable nam ()
pure (rv, M.singleton qn rv) -- include with clause even for simple cases because we use this mapping to
convert typeF (AliasedTableRef tnam (AliasName newName)) = do
(rv, withNames) <- convert typeF tnam
pure $ (RelationVariable newName (), (WithNameExpr newName (), rv):withNames)
(rv, _) <- convert typeF tnam
pure $ (RelationVariable newName (), M.singleton (QualifiedName [newName]) rv)
convert _ x = Left $ NotSupportedError (T.pack (show x))


instance SQLConvert RestrictionExpr where
type ConverterF RestrictionExpr = RestrictionPredicateExpr
convert typeF (RestrictionExpr rexpr) = do
let wrongType t = Left $ TypeMismatch t BoolAtomType --must be boolean expression
let wrongType t = Left $ TypeMismatchError t BoolAtomType --must be boolean expression
attrName' (QualifiedName ts) = T.intercalate "." ts
case rexpr of
IntegerLiteral{} -> wrongType IntegerAtomType
Expand All @@ -247,7 +292,7 @@ lookupFunc qname =
case qname of
QualifiedName [nam] ->
case lookup nam sqlFuncs of
Nothing -> Left $ NoSuchFunction qname
Nothing -> Left $ NoSuchSQLFunctionError qname
Just match -> pure match
where
f n args = FunctionAtomExpr n args ()
Expand Down Expand Up @@ -276,6 +321,12 @@ instance SQLConvert ScalarExpr where
f <- lookupFunc qn
pure $ f [a,b]

instance SQLConvert JoinOnCondition where
type ConverterF JoinOnCondition = (RelationalExpr -> RelationalExpr)
convert typeF (JoinOnCondition expr) = do
case expr of
Identifier (QualifiedName [tAlias, colName]) -> undefined

instance SQLConvert ProjectionScalarExpr where
type ConverterF ProjectionScalarExpr = AtomExpr
convert typeF expr = do
Expand Down
9 changes: 6 additions & 3 deletions src/bin/SQL/Interpreter/Select.hs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,10 @@ data NullsOrder = NullsFirst | NullsLast
data JoinType = InnerJoin | RightOuterJoin | LeftOuterJoin | FullOuterJoin | CrossJoin | NaturalJoin
deriving (Show, Eq)

data JoinCondition = JoinOn ScalarExpr | JoinUsing [UnqualifiedName]
data JoinCondition = JoinOn JoinOnCondition | JoinUsing [UnqualifiedName]
deriving (Show, Eq)

newtype JoinOnCondition = JoinOnCondition ScalarExpr
deriving (Show, Eq)

data Alias = Alias QualifiedName (Maybe AliasName)
Expand All @@ -98,7 +101,7 @@ data ProjectionName = ProjectionName Text | Asterisk
deriving (Show, Eq, Ord)

data QualifiedName = QualifiedName [Text]
deriving (Show, Eq)
deriving (Show, Eq, Ord)

data UnqualifiedName = UnqualifiedName Text
deriving (Show, Eq)
Expand Down Expand Up @@ -165,7 +168,7 @@ fromP = reserved "from" *> ((:) <$> nonJoinTref <*> sepByComma joinP)

joinConditionP :: Parser JoinCondition
joinConditionP = do
(JoinOn <$> (reserved "on" *> scalarExprP)) <|>
(JoinOn <$> (reserved "on" *> (JoinOnCondition <$> scalarExprP))) <|>
JoinUsing <$> (reserved "using" *> parens (sepBy1 unqualifiedNameP comma))

joinTypeP :: Parser JoinType
Expand Down
4 changes: 2 additions & 2 deletions src/bin/TutorialD/Interpreter/RelationalExpr.hs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,9 @@ renameP = do
reservedOp "rename"
renameList <- braces (sepBy renameClauseP comma)
case renameList of
[] -> pure (Restrict TruePredicate) --no-op when rename list is empty
[] -> pure id
renames ->
pure $ \expr -> foldl (\acc (oldAttr, newAttr) -> Rename oldAttr newAttr acc) expr renames
pure $ Rename (S.fromList renames)

whereClauseP :: RelationalMarkerExpr a => Parser (RelationalExprBase a -> RelationalExprBase a)
whereClauseP = reservedOp "where" *> (Restrict <$> restrictionPredicateP)
Expand Down
7 changes: 6 additions & 1 deletion src/bin/TutorialD/Printer.hs
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ instance Pretty RelationalExpr where
pretty (MakeStaticRelation attrs tupSet) = "relation" <> prettyBracesList (A.toList attrs) <> prettyBracesList (asList tupSet)
pretty (Union a b) = parens $ pretty' a <+> "union" <+> pretty' b
pretty (Join a b) = parens $ pretty' a <+> "join" <+> pretty' b
pretty (Rename n1 n2 relExpr) = parens $ pretty relExpr <+> "rename" <+> braces (pretty n1 <+> "as" <+> pretty n2)
pretty (Rename attrs relExpr) = parens $ pretty relExpr <+> "rename" <+> prettyBracesList (map RenameTuple (S.toList attrs))
pretty (Difference a b) = parens $ pretty' a <+> "minus" <+> pretty' b
pretty (Group attrNames attrName relExpr) = parens $ pretty relExpr <+> "group" <+> parens (pretty attrNames <+> "as" <+> pretty attrName)
pretty (Ungroup attrName relExpr) = parens $ pretty' relExpr <+> "ungroup" <+> pretty attrName
Expand Down Expand Up @@ -146,6 +146,11 @@ instance Pretty AtomType where
instance Pretty ExtendTupleExpr where
pretty (AttributeExtendTupleExpr attrName atomExpr) = pretty attrName <> ":=" <> pretty atomExpr

newtype RenameTuple = RenameTuple { _unRenameTuple :: (AttributeName, AttributeName) }

instance Pretty RenameTuple where
pretty (RenameTuple (n1, n2)) = pretty n1 <+> "as" <+> pretty n2


instance Pretty RestrictionPredicateExpr where
pretty TruePredicate = "true"
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ProjectM36/AtomFunctions/Primitive.hs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ primitiveAtomFunctions = HS.fromList [
funcBody = body $ relationAtomFunc relationMin
},
Function { funcName = "eq",
funcType = [IntegerAtomType, IntegerAtomType, BoolAtomType],
funcType = [TypeVariableType "a", TypeVariableType "a", BoolAtomType],
funcBody = body $ \case
[i1,i2] -> pure (BoolAtom (i1 == i2))
_ -> Left AtomFunctionTypeMismatchError
Expand Down
2 changes: 1 addition & 1 deletion src/lib/ProjectM36/Base.hs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ data RelationalExprBase a =
--- | Create a join of two relational expressions. The join occurs on attributes which are identical. If the expressions have no overlapping attributes, the join becomes a cross-product of both tuple sets.
Join (RelationalExprBase a) (RelationalExprBase a) |
--- | Rename an attribute (first argument) to another (second argument).
Rename AttributeName AttributeName (RelationalExprBase a) |
Rename (S.Set (AttributeName, AttributeName)) (RelationalExprBase a) |
--- | Return a relation containing all tuples of the first argument which do not appear in the second argument (minus).
Difference (RelationalExprBase a) (RelationalExprBase a) |
--- | Create a sub-relation composed of the first argument's attributes which will become an attribute of the result expression. The unreferenced attributes are not altered in the result but duplicate tuples in the projection of the expression minus the attribute names are compressed into one. For more information, <https://github.com/agentm/project-m36/blob/master/docs/introduction_to_the_relational_algebra.markdown#group read the relational algebra tutorial.>
Expand Down
9 changes: 6 additions & 3 deletions src/lib/ProjectM36/HashSecurely.hs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ import qualified Data.Set as S
import Data.Time.Calendar
import Data.Time.Clock
import Codec.Winery (Serialise)
import Data.Int (Int64)

newtype SecureHash = SecureHash { _unSecureHash :: B.ByteString }
deriving (Serialise, Show, Eq)
Expand Down Expand Up @@ -86,8 +85,8 @@ instance HashBytes a => HashBytes (RelationalExprBase a) where
hashBytesL ctx "Union" [SHash exprA, SHash exprB]
hashBytes (Join exprA exprB) ctx =
hashBytesL ctx "Join" [SHash exprA, SHash exprB]
hashBytes (Rename nameA nameB expr) ctx =
hashBytesL ctx "Rename" [SHash nameA, SHash nameB, SHash expr]
hashBytes (Rename attrs expr) ctx =
hashBytesL ctx "Rename" [SHash attrs, SHash expr]
hashBytes (Difference exprA exprB) ctx =
hashBytesL ctx "Difference" [SHash exprA, SHash exprB]
hashBytes (Group names name expr) ctx =
Expand Down Expand Up @@ -116,6 +115,10 @@ instance HashBytes a => HashBytes (ExtendTupleExprBase a) where
hashBytes (AttributeExtendTupleExpr name expr) ctx =
hashBytesL ctx "AttributeExtendTupleExpr" [SHash name, SHash expr]

instance HashBytes (S.Set (AttributeName, AttributeName)) where
hashBytes attrs ctx =
hashBytesL ctx "RenameAttrSet" (V.concatMap (\(a,b) -> V.fromList [SHash a, SHash b]) (V.fromList $ S.toList attrs))

instance HashBytes a => HashBytes (WithNameExprBase a) where
hashBytes (WithNameExpr rv marker) ctx = hashBytesL ctx "WithNameExpr" [SHash rv, SHash marker]

Expand Down
Loading

0 comments on commit 6cd3b92

Please sign in to comment.