Skip to content

Commit 55ec83b

Browse files
authored
Implement tree explain for NestedLoopJoinExec, CrossJoinExec, `So… (#15081)
* Implement tree explain for `NestedLoopJoinExec`, `CrossJoinExec`, `SortMergeJoinExec` and `SymmetricHashJoinExec` * fix issues * fix issues * fix * fix proj
1 parent dfaede0 commit 55ec83b

File tree

5 files changed

+172
-11
lines changed

5 files changed

+172
-11
lines changed

datafusion/physical-plan/src/joins/cross_join.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,11 @@ impl DisplayAs for CrossJoinExec {
237237
f: &mut std::fmt::Formatter,
238238
) -> std::fmt::Result {
239239
match t {
240-
DisplayFormatType::Default | DisplayFormatType::Verbose => {
240+
DisplayFormatType::Default
241+
| DisplayFormatType::Verbose
242+
| DisplayFormatType::TreeRender => {
241243
write!(f, "CrossJoinExec")
242244
}
243-
DisplayFormatType::TreeRender => {
244-
// TODO: collect info
245-
write!(f, "")
246-
}
247245
}
248246
}
249247
}

datafusion/physical-plan/src/joins/nested_loop_join.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,11 @@ impl DisplayAs for NestedLoopJoinExec {
425425
)
426426
}
427427
DisplayFormatType::TreeRender => {
428-
// TODO: collect info
429-
write!(f, "")
428+
if *self.join_type() != JoinType::Inner {
429+
writeln!(f, "join_type={:?}", self.join_type)
430+
} else {
431+
Ok(())
432+
}
430433
}
431434
}
432435
}

datafusion/physical-plan/src/joins/sort_merge_join.rs

+11-2
Original file line numberDiff line numberDiff line change
@@ -370,8 +370,17 @@ impl DisplayAs for SortMergeJoinExec {
370370
)
371371
}
372372
DisplayFormatType::TreeRender => {
373-
// TODO: collect info
374-
write!(f, "")
373+
let on = self
374+
.on
375+
.iter()
376+
.map(|(c1, c2)| format!("({} = {})", c1, c2))
377+
.collect::<Vec<String>>()
378+
.join(", ");
379+
380+
if self.join_type() != JoinType::Inner {
381+
writeln!(f, "join_type={:?}", self.join_type)?;
382+
}
383+
writeln!(f, "on={}", on)
375384
}
376385
}
377386
}

datafusion/physical-plan/src/joins/symmetric_hash_join.rs

+12-2
Original file line numberDiff line numberDiff line change
@@ -381,8 +381,18 @@ impl DisplayAs for SymmetricHashJoinExec {
381381
)
382382
}
383383
DisplayFormatType::TreeRender => {
384-
// TODO: collect info
385-
write!(f, "")
384+
let on = self
385+
.on
386+
.iter()
387+
.map(|(c1, c2)| format!("({} = {})", c1, c2))
388+
.collect::<Vec<String>>()
389+
.join(", ");
390+
391+
writeln!(f, "mode={:?}", self.mode)?;
392+
if *self.join_type() != JoinType::Inner {
393+
writeln!(f, "join_type={:?}", self.join_type)?;
394+
}
395+
writeln!(f, "on={}", on)
386396
}
387397
}
388398
}

datafusion/sqllogictest/test_files/explain_tree.slt

+141
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,44 @@ WITH ORDER (a ASC, b ASC, c ASC)
9595
LOCATION '../core/tests/data/window_2.csv'
9696
OPTIONS ('format.has_header' 'true');
9797

98+
statement ok
99+
CREATE TABLE hashjoin_datatype_table_t1_source(c1 INT, c2 BIGINT, c3 DECIMAL(5,2), c4 VARCHAR)
100+
AS VALUES
101+
(1, 86400000, 1.23, 'abc'),
102+
(2, 172800000, 456.00, 'def'),
103+
(null, 259200000, 789.000, 'ghi'),
104+
(3, null, -123.12, 'jkl')
105+
;
106+
107+
statement ok
108+
CREATE TABLE hashjoin_datatype_table_t1
109+
AS SELECT
110+
arrow_cast(c1, 'Date32') as c1,
111+
arrow_cast(c2, 'Date64') as c2,
112+
c3,
113+
arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
114+
FROM
115+
hashjoin_datatype_table_t1_source
116+
117+
statement ok
118+
CREATE TABLE hashjoin_datatype_table_t2_source(c1 INT, c2 BIGINT, c3 DECIMAL(10,2), c4 VARCHAR)
119+
AS VALUES
120+
(1, 86400000, -123.12, 'abc'),
121+
(null, null, 100000.00, 'abcdefg'),
122+
(null, 259200000, 0.00, 'qwerty'),
123+
(3, null, 789.000, 'qwe')
124+
;
125+
126+
statement ok
127+
CREATE TABLE hashjoin_datatype_table_t2
128+
AS SELECT
129+
arrow_cast(c1, 'Date32') as c1,
130+
arrow_cast(c2, 'Date64') as c2,
131+
c3,
132+
arrow_cast(c4, 'Dictionary(Int32, Utf8)') as c4
133+
FROM
134+
hashjoin_datatype_table_t2_source
135+
98136
######## Begin Queries ########
99137

100138
# Filter
@@ -897,6 +935,109 @@ physical_plan
897935
48)│ format: csv │
898936
49)└───────────────────────────┘
899937

938+
# Query with nested loop join.
939+
query TT
940+
explain select int_col from table1 where exists (select count(*) from table2);
941+
----
942+
logical_plan
943+
01)LeftSemi Join:
944+
02)--TableScan: table1 projection=[int_col], partial_filters=[Boolean(true)]
945+
03)--SubqueryAlias: __correlated_sq_1
946+
04)----Projection:
947+
05)------Aggregate: groupBy=[[]], aggr=[[count(Int64(1))]]
948+
06)--------TableScan: table2 projection=[]
949+
physical_plan
950+
01)┌───────────────────────────┐
951+
02)│ NestedLoopJoinExec │
952+
03)│ -------------------- ├──────────────┐
953+
04)│ join_type: LeftSemi │ │
954+
05)└─────────────┬─────────────┘ │
955+
06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
956+
07)│ DataSourceExec ││ ProjectionExec │
957+
08)│ -------------------- ││ │
958+
09)│ files: 1 ││ │
959+
10)│ format: csv ││ │
960+
11)└───────────────────────────┘└─────────────┬─────────────┘
961+
12)-----------------------------┌─────────────┴─────────────┐
962+
13)-----------------------------│ AggregateExec │
963+
14)-----------------------------└─────────────┬─────────────┘
964+
15)-----------------------------┌─────────────┴─────────────┐
965+
16)-----------------------------│ CoalescePartitionsExec │
966+
17)-----------------------------└─────────────┬─────────────┘
967+
18)-----------------------------┌─────────────┴─────────────┐
968+
19)-----------------------------│ AggregateExec │
969+
20)-----------------------------└─────────────┬─────────────┘
970+
21)-----------------------------┌─────────────┴─────────────┐
971+
22)-----------------------------│ RepartitionExec │
972+
23)-----------------------------└─────────────┬─────────────┘
973+
24)-----------------------------┌─────────────┴─────────────┐
974+
25)-----------------------------│ DataSourceExec │
975+
26)-----------------------------│ -------------------- │
976+
27)-----------------------------│ files: 1 │
977+
28)-----------------------------│ format: parquet │
978+
29)-----------------------------└───────────────────────────┘
979+
980+
# Query with cross join.
981+
query TT
982+
explain select * from table1 cross join table2 ;
983+
----
984+
logical_plan
985+
01)Cross Join:
986+
02)--TableScan: table1 projection=[int_col, string_col, bigint_col, date_col]
987+
03)--TableScan: table2 projection=[int_col, string_col, bigint_col, date_col]
988+
physical_plan
989+
01)┌───────────────────────────┐
990+
02)│ CrossJoinExec ├──────────────┐
991+
03)└─────────────┬─────────────┘ │
992+
04)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
993+
05)│ DataSourceExec ││ RepartitionExec │
994+
06)│ -------------------- ││ │
995+
07)│ files: 1 ││ │
996+
08)│ format: csv ││ │
997+
09)└───────────────────────────┘└─────────────┬─────────────┘
998+
10)-----------------------------┌─────────────┴─────────────┐
999+
11)-----------------------------│ DataSourceExec │
1000+
12)-----------------------------│ -------------------- │
1001+
13)-----------------------------│ files: 1 │
1002+
14)-----------------------------│ format: parquet │
1003+
15)-----------------------------└───────────────────────────┘
1004+
1005+
1006+
# Query with sort merge join.
1007+
statement ok
1008+
set datafusion.optimizer.prefer_hash_join = false;
1009+
1010+
query TT
1011+
explain select * from hashjoin_datatype_table_t1 t1 join hashjoin_datatype_table_t2 t2 on t1.c1 = t2.c1
1012+
----
1013+
logical_plan
1014+
01)Inner Join: t1.c1 = t2.c1
1015+
02)--SubqueryAlias: t1
1016+
03)----TableScan: hashjoin_datatype_table_t1 projection=[c1, c2, c3, c4]
1017+
04)--SubqueryAlias: t2
1018+
05)----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4]
1019+
physical_plan
1020+
01)┌───────────────────────────┐
1021+
02)│ SortMergeJoinExec │
1022+
03)│ -------------------- ├──────────────┐
1023+
04)│ on: (c1@0 = c1@0) │ │
1024+
05)└─────────────┬─────────────┘ │
1025+
06)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1026+
07)│ SortExec ││ SortExec │
1027+
08)│ -------------------- ││ -------------------- │
1028+
09)│ sort keys: [c1@0 ASC] ││ sort keys: [c1@0 ASC] │
1029+
10)└─────────────┬─────────────┘└─────────────┬─────────────┘
1030+
11)┌─────────────┴─────────────┐┌─────────────┴─────────────┐
1031+
12)│ DataSourceExec ││ DataSourceExec │
1032+
13)│ -------------------- ││ -------------------- │
1033+
14)│ bytes: 6040 ││ bytes: 6040 │
1034+
15)│ format: memory ││ format: memory │
1035+
16)│ rows: 1 ││ rows: 1 │
1036+
17)└───────────────────────────┘└───────────────────────────┘
1037+
1038+
statement ok
1039+
set datafusion.optimizer.prefer_hash_join = true;
1040+
9001041
# cleanup
9011042
statement ok
9021043
drop table table1;

0 commit comments

Comments
 (0)