diff --git a/.gitignore b/.gitignore index c6a7d4f3..48fe5ef5 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.jl.cov *.jl.*.cov *.jl.mem +*.idea/ docs/build/ docs/site/ docs/Manifest.toml diff --git a/Manifest.toml b/Manifest.toml new file mode 100644 index 00000000..3a473da0 --- /dev/null +++ b/Manifest.toml @@ -0,0 +1,178 @@ +# This file is machine-generated - editing it directly is not advised + +[[Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" + +[[Compat]] +deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"] +git-tree-sha1 = "49269e311ffe11ac5b334681d212329002a9832a" +uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" +version = "1.5.1" + +[[DataStructures]] +deps = ["InteractiveUtils", "OrderedCollections", "Random", "Serialization", "Test"] +git-tree-sha1 = "ca971f03e146cf144a9e2f2ce59674f5bf0e8038" +uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" +version = "0.15.0" + +[[DataValues]] +deps = ["Dates", "InteractiveUtils", "LinearAlgebra", "Random", "Test"] +git-tree-sha1 = "05e4a87fe52a2af1b4a1ffd3ab2fc996c038b192" +uuid = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5" +version = "0.4.7" + +[[Dates]] +deps = ["Printf"] +uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" + +[[DelimitedFiles]] +deps = ["Mmap"] +uuid = "8bb1440f-4735-579b-a4ab-409b98df4dab" + +[[Distributed]] +deps = ["Random", "Serialization", "Sockets"] +uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" + +[[InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" + +[[IterableTables]] +deps = ["DataValues", "IteratorInterfaceExtensions", "Requires", "TableTraits", "TableTraitsUtils", "Test"] +git-tree-sha1 = "0eec91e8185899f3926f56db515559bfe95b9db7" +uuid = "1c8ee90f-4401-5389-894e-7a04a3dc0f4d" +version = "0.10.0" + +[[IteratorInterfaceExtensions]] +deps = ["Test"] +git-tree-sha1 = "5484e5ede2a4137b9643f4d646e8e7b87b794415" +uuid = "82899510-4779-5014-852e-03e436cf321d" +version = "0.1.1" + +[[JSON]] +deps = ["Dates", "Distributed", "Mmap", "Sockets", "Test", "Unicode"] +git-tree-sha1 = "1f7a25b53ec67f5e9422f1f551ee216503f4a0fa" +uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" +version = "0.20.0" + +[[LibGit2]] +uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" + +[[Libdl]] +uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" + +[[LinearAlgebra]] +deps = ["Libdl"] +uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" + +[[Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" + +[[MLStyle]] +deps = ["Statistics", "Test"] +git-tree-sha1 = "bb14ec600351a59326c3058df0e45bd8b4a6dda4" +uuid = "d8e11817-5142-5d16-987a-aa16d5891078" +version = "0.2.4" + +[[MacroTools]] +deps = ["Compat"] +git-tree-sha1 = "c443e1c8d58a4e9f61b708ad0a88286c7042145b" +uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" +version = "0.4.4" + +[[Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" + +[[Missings]] +deps = ["Dates", "InteractiveUtils", "SparseArrays", "Test"] +git-tree-sha1 = "d1d2585677f2bd93a97cfeb8faa7a0de0f982042" +uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" +version = "0.4.0" + +[[Mmap]] +uuid = "a63ad114-7e13-5084-954f-fe012c677804" + +[[OrderedCollections]] +deps = ["Random", "Serialization", "Test"] +git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b" +uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" +version = "1.0.2" + +[[Pkg]] +deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"] +uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" + +[[Printf]] +deps = ["Unicode"] +uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" + +[[QueryOperators]] +deps = ["DataStructures", "DataValues", "IteratorInterfaceExtensions", "TableShowUtils", "Test"] +git-tree-sha1 = "c4b313e8fdead212a6100953b7611e0e8cc79f8f" +uuid = "2aef5ad7-51ca-5a8f-8e88-e75cf067b44b" +version = "0.6.0" + +[[REPL]] +deps = ["InteractiveUtils", "Markdown", "Sockets"] +uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" + +[[Random]] +deps = ["Serialization"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" + +[[Requires]] +deps = ["Test"] +git-tree-sha1 = "f6fbf4ba64d295e146e49e021207993b6b48c7d1" +uuid = "ae029012-a4dd-5104-9daa-d747884805df" +version = "0.5.2" + +[[SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" + +[[Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" + +[[SharedArrays]] +deps = ["Distributed", "Mmap", "Random", "Serialization"] +uuid = "1a1011a3-84de-559e-8e89-a11a2f7dc383" + +[[Sockets]] +uuid = "6462fe0b-24de-5631-8697-dd941f90decc" + +[[SparseArrays]] +deps = ["LinearAlgebra", "Random"] +uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" + +[[Statistics]] +deps = ["LinearAlgebra", "SparseArrays"] +uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" + +[[TableShowUtils]] +deps = ["DataValues", "Dates", "JSON", "Markdown", "Test"] +git-tree-sha1 = "6ffc18504eab3f900af2d23adc0e7bddf94bd29b" +uuid = "5e66a065-1f0a-5976-b372-e0b8c017ca10" +version = "0.2.1" + +[[TableTraits]] +deps = ["IteratorInterfaceExtensions", "Test"] +git-tree-sha1 = "eba4b1d0a82bdd773307d652c6e5f8c82104c676" +uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" +version = "0.4.1" + +[[TableTraitsUtils]] +deps = ["DataValues", "IteratorInterfaceExtensions", "Missings", "TableTraits", "Test"] +git-tree-sha1 = "a355f1882d64881a11f853e64dcc353975c4df6e" +uuid = "382cd787-c1b6-5bf2-a167-d5b971a19bda" +version = "0.3.1" + +[[Test]] +deps = ["Distributed", "InteractiveUtils", "Logging", "Random"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[[UUIDs]] +deps = ["Random", "SHA"] +uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" + +[[Unicode]] +uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" diff --git a/REQUIRE b/REQUIRE index 27fef956..6af5dd63 100644 --- a/REQUIRE +++ b/REQUIRE @@ -4,3 +4,4 @@ IterableTables 0.8.2 DataValues 0.4.4 MacroTools 0.4.4 QueryOperators 0.6.0 +MLStyle 0.3.0 \ No newline at end of file diff --git a/src/Query.jl b/src/Query.jl index 780e6c10..82262269 100644 --- a/src/Query.jl +++ b/src/Query.jl @@ -5,6 +5,7 @@ import IterableTables using DataValues using MacroTools: postwalk using QueryOperators +using MLStyle export @from, @query, @count, Grouping, key diff --git a/src/table_query_macros.jl b/src/table_query_macros.jl index 40495823..e1ae8540 100644 --- a/src/table_query_macros.jl +++ b/src/table_query_macros.jl @@ -23,94 +23,97 @@ julia> df |> @select(startswith("b"), -:bar) |> DataFrame │ 3 │ c │ ``` """ + +@active Predicate(x) begin + (op :: Any, x) = @match x begin + :(-$x) => (:-, x) + :(!$x) => (:!, x) + _ => (nothing, x) + end + res = @match x begin + :(startswith($arg)) => (:startswith, arg) + :(endswith($arg)) => (:endswith, arg) + :(occursin($arg)) => (:occursin, arg) + _ => nothing + end + if res !== nothing + (kind, arg) = res + if arg isa String + arg = QuoteNode(Symbol(arg)) + end + (op, kind, arg) + end +end + + +@active QuoteNodeP(x) begin + x isa QuoteNode ? x.value : nothing +end + macro select(args...) - prev = NamedTuple() - for arg in args - if typeof(arg) == Expr && (string(arg) == "everything()") - # select everything - prev = :_ - elseif typeof(arg) == Int - # select by position - if arg > 0 - prev = :( merge($prev, QueryOperators.NamedTupleUtilities.select(_, Val(keys(_)[$arg]))) ) - # remove by position - elseif arg < 0 - sel = ifelse(prev == NamedTuple(), :_, prev) - prev = :( QueryOperators.NamedTupleUtilities.remove($sel, Val(keys($sel)[-$arg])) ) - end - elseif typeof(arg) == QuoteNode - # select by name - prev = :( merge($prev, QueryOperators.NamedTupleUtilities.select(_, Val($(arg)))) ) - else - arg = string(arg) - # select by element type - m_type = match(r":\(:(.+)\)", arg) - # remove by name - m_rem = match(r"^-:(.+)", arg) - # remove by predicate functions - m_rem_pred = match(r"^-\(*(startswith|endswith|occursin)\(\"(.+)\"\)+", arg) - # select by range, with multiple syntaxes supported - m_range = match(r"^:([^,:]+) *: *:([^,:]+)", arg) - m_range_ind = match(r"^([0-9]+) *: *([0-9]+)", arg) - if m_range == nothing && m_range_ind == nothing - m_range = match(r"^rangeat\(:([^,]+), *:([^,]+)\)", arg) - m_range_ind = match(r"^rangeat\(([0-9]+), *([0-9]+)\)", arg) - end - # select by predicate functions - m_pred = match(r"^(startswith|endswith|occursin)\(\"(.+)\"\)", arg) - is_neg_pred = false - if m_pred == nothing - m_pred = match(r"^!\(*(startswith|endswith|occursin)\(\"(.+)\"\)+", arg) - is_neg_pred = true - end + foldl(args, init=NamedTuple()) do prev, arg + @match arg begin + :(everything()) => :_ + + ::Int && if arg > 0 end => + :( merge($prev, QueryOperators.NamedTupleUtilities.select(_, Val(keys(_)[$arg]))) ) - # TODO: eltype - if m_type !== nothing - prev = :( merge($prev, QueryOperators.NamedTupleUtilities.oftype(_, parse(DataType, @datatype($m_type[1])))) ) - elseif m_rem !== nothing - prev = ifelse(prev == NamedTuple(), :_, prev) - prev = :( QueryOperators.NamedTupleUtilities.remove($prev, Val($(QuoteNode(Symbol(m_rem[1]))))) ) - elseif m_rem_pred !== nothing - prev = ifelse(prev == NamedTuple(), :_, prev) - if m_rem_pred[1] == "startswith" - prev = :( QueryOperators.NamedTupleUtilities.not_startswith($prev, Val($(QuoteNode(Symbol(m_rem_pred[2]))))) ) - elseif m_rem_pred[1] == "endswith" - prev = :( QueryOperators.NamedTupleUtilities.not_endswith($prev, Val($(QuoteNode(Symbol(m_rem_pred[2]))))) ) - elseif m_rem_pred[1] == "occursin" - prev = :( QueryOperators.NamedTupleUtilities.not_occursin($prev, Val($(QuoteNode(Symbol(m_rem_pred[2]))))) ) + ::Int && if arg < 0 end => + let sel = ifelse(prev == NamedTuple(), :_, prev) + :( QueryOperators.NamedTupleUtilities.remove($sel, Val(keys($sel)[-$arg])) ) end - elseif m_range !== nothing || m_range_ind !== nothing - if m_range_ind !== nothing - a = parse(Int, m_range_ind[1]) - b = parse(Int, m_range_ind[2]) - prev = :( merge($prev, QueryOperators.NamedTupleUtilities.range(_, Val(keys(_)[$a]), Val(keys(_)[$b]))) ) - else - prev = :( merge($prev, QueryOperators.NamedTupleUtilities.range(_, Val($(QuoteNode(Symbol(m_range[1])))), Val($(QuoteNode(Symbol(m_range[2])))))) ) + ::QuoteNode => + :( merge($prev, QueryOperators.NamedTupleUtilities.select(_, Val($(arg)))) ) + + # remove by name + :(-$(name :: QuoteNode)) && if name.value isa Symbol end => + let prev = ifelse(prev == NamedTuple(), :_, prev) + :( QueryOperators.NamedTupleUtilities.remove($prev, Val($name)) ) end - elseif m_pred !== nothing - if is_neg_pred == false - if m_pred[1] == "startswith" - sel = :( QueryOperators.NamedTupleUtilities.startswith(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - elseif m_pred[1] == "endswith" - sel = :( QueryOperators.NamedTupleUtilities.endswith(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - elseif m_pred[1] == "occursin" - sel = :( QueryOperators.NamedTupleUtilities.occursin(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - end + + # select by element type + :(::$typ) => + :( merge($prev, QueryOperators.NamedTupleUtilities.oftype(_, typ)) ) + + # select by range, with multiple syntaxes supported + :(rangeat($a, $b)) || :($a : $b) => + if a isa Int && b isa Int + :( merge($prev, QueryOperators.NamedTupleUtilities.range(_, Val(keys(_)[$a]), Val(keys(_)[$b]))) ) else - if m_pred[1] == "startswith" - sel = :( QueryOperators.NamedTupleUtilities.not_startswith(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - elseif m_pred[1] == "endswith" - sel = :( QueryOperators.NamedTupleUtilities.not_endswith(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - elseif m_pred[1] == "occursin" - sel = :( QueryOperators.NamedTupleUtilities.not_occursin(_, Val($(QuoteNode(Symbol(m_pred[2]))))) ) - end + :( merge($prev, QueryOperators.NamedTupleUtilities.range(_, Val($a), Val($b))) ) + end + Predicate(op, kind, arg) => + let + pos_f = @match kind begin + :startswith => :(QueryOperators.NamedTupleUtilities.startswith) + :endswith => :(QueryOperators.NamedTupleUtilities.endswith) + :occursin => :(QueryOperators.NamedTupleUtilities.occursin) + end + + neg_f = @match kind begin + :startswith => :(QueryOperators.NamedTupleUtilities.not_startswith) + :endswith => :(QueryOperators.NamedTupleUtilities.not_endswith) + :occursin => :(QueryOperators.NamedTupleUtilities.not_occursin) + end + + # select by predicate functions + select_by_predicate(pred) = Expr(:call, merge, prev, Expr(:call, pred, :_, Expr(:call, Val, arg))) + + @match op begin + if op === nothing end => select_by_predicate(pos_f) + :! => select_by_predicate(neg_f) + + # remove by predicate functions + :- => + let prev = ifelse(prev == NamedTuple(), :_, prev) + Expr(:call, neg_f, prev, Expr(:call, Val, arg)) + end + end - prev = :( merge($prev, $sel) ) end end - end - - return :(Query.@map( $prev ) ) + end |> prev -> + :(Query.@map($prev)) end """ @@ -137,24 +140,15 @@ julia> df |> @rename(:foo => :fat, :bar => :ban) |> DataFrame ``` """ macro rename(args...) - prev = :_ - for arg in args - n = match(r"^(.+) *=> *:(.+)", string(arg)) - try - # rename by position - n1 = parse(Int, n[1]) - n2 = strip(n[2]) - prev = :( QueryOperators.NamedTupleUtilities.rename($prev, Val(keys(_)[$n1]), Val($(QuoteNode(Symbol(n2))))) ) - catch - # rename by name - m = match(r"^:(.+) *=> *:(.+)", string(arg)) - m1, m2 = strip(m[1]), strip(m[2]) - if m !== nothing - prev = :( QueryOperators.NamedTupleUtilities.rename($prev, Val($(QuoteNode(Symbol(m1)))), Val($(QuoteNode(Symbol(m2))))) ) - end + foldl(args, init = :_) do prev, arg + @match arg begin + :($(n1 :: Int) => $n2) => + :( QueryOperators.NamedTupleUtilities.rename($prev, Val(keys(_)[$n1]), Val($n2)) ) + :($m1 => $m2) => + :( QueryOperators.NamedTupleUtilities.rename($prev, Val($m1), Val($m2))) end - end - return :(Query.@map( $prev ) ) + end |> prev -> + :(Query.@map( $prev ) ) end """ @@ -181,11 +175,12 @@ julia> df |> @mutate(bar = _.foo + 2 * _.bar, bat = "com" * _.bat) |> DataFrame ``` """ macro mutate(args...) - prev = :_ - for arg in args - prev = :( merge($prev, ($(esc(arg.args[1])) = $(arg.args[2]),)) ) - end - return :( Query.@map( $prev ) ) + foldl(args, init=:_) do prev, arg + @match arg begin + :($alias = $expr) => :( merge($prev, ($(esc(alias)) = $(expr),)) ) + end + end |> prev -> + :( Query.@map( $prev ) ) end macro datatype(str)