17
17
18
18
use crate :: { Expr , LogicalPlan , SortExpr , Volatility } ;
19
19
use std:: cmp:: Ordering ;
20
- use std:: collections:: HashMap ;
20
+ use std:: collections:: { BTreeSet , HashMap , HashSet } ;
21
21
use std:: sync:: Arc ;
22
22
use std:: {
23
23
fmt:: { self , Display } ,
@@ -28,7 +28,8 @@ use crate::expr::Sort;
28
28
use arrow:: datatypes:: DataType ;
29
29
use datafusion_common:: tree_node:: { Transformed , TreeNodeContainer , TreeNodeRecursion } ;
30
30
use datafusion_common:: {
31
- Constraints , DFSchemaRef , Result , SchemaReference , TableReference ,
31
+ schema_err, Column , Constraints , DFSchema , DFSchemaRef , DataFusionError , Result ,
32
+ SchemaError , SchemaReference , TableReference ,
32
33
} ;
33
34
use sqlparser:: ast:: Ident ;
34
35
@@ -306,6 +307,7 @@ impl CreateExternalTable {
306
307
constraints,
307
308
column_defaults,
308
309
} = fields;
310
+ check_fields_unique ( & schema) ?;
309
311
Ok ( Self {
310
312
name,
311
313
schema,
@@ -544,6 +546,7 @@ impl CreateMemoryTable {
544
546
column_defaults,
545
547
temporary,
546
548
} = fields;
549
+ check_fields_unique ( input. schema ( ) ) ?;
547
550
Ok ( Self {
548
551
name,
549
552
constraints,
@@ -698,6 +701,7 @@ impl CreateView {
698
701
definition,
699
702
temporary,
700
703
} = fields;
704
+ check_fields_unique ( input. schema ( ) ) ?;
701
705
Ok ( Self {
702
706
name,
703
707
input,
@@ -800,6 +804,48 @@ impl CreateViewBuilder {
800
804
} )
801
805
}
802
806
}
807
+ fn check_fields_unique ( schema : & DFSchema ) -> Result < ( ) > {
808
+ // Use tree set for deterministic error messages
809
+ let mut qualified_names = BTreeSet :: new ( ) ;
810
+ let mut unqualified_names = HashSet :: new ( ) ;
811
+ let mut name_occurrences: HashMap < & String , usize > = HashMap :: new ( ) ;
812
+
813
+ for ( qualifier, field) in schema. iter ( ) {
814
+ if let Some ( qualifier) = qualifier {
815
+ // Check for duplicate qualified field names
816
+ if !qualified_names. insert ( ( qualifier, field. name ( ) ) ) {
817
+ return schema_err ! ( SchemaError :: DuplicateQualifiedField {
818
+ qualifier: Box :: new( qualifier. clone( ) ) ,
819
+ name: field. name( ) . to_string( ) ,
820
+ } ) ;
821
+ }
822
+ // Check for duplicate unqualified field names
823
+ } else if !unqualified_names. insert ( field. name ( ) ) {
824
+ return schema_err ! ( SchemaError :: DuplicateUnqualifiedField {
825
+ name: field. name( ) . to_string( )
826
+ } ) ;
827
+ }
828
+ * name_occurrences. entry ( field. name ( ) ) . or_default ( ) += 1 ;
829
+ }
830
+
831
+ for ( qualifier, name) in qualified_names {
832
+ // Check for duplicate between qualified and unqualified field names
833
+ if unqualified_names. contains ( name) {
834
+ return schema_err ! ( SchemaError :: AmbiguousReference {
835
+ field: Column :: new( Some ( qualifier. clone( ) ) , name)
836
+ } ) ;
837
+ }
838
+ // Check for duplicates between qualified names as the qualification will be stripped off
839
+ if name_occurrences[ name] > 1 {
840
+ return schema_err ! ( SchemaError :: QualifiedFieldWithDuplicateName {
841
+ qualifier: Box :: new( qualifier. clone( ) ) ,
842
+ name: name. to_owned( ) ,
843
+ } ) ;
844
+ }
845
+ }
846
+
847
+ Ok ( ( ) )
848
+ }
803
849
804
850
/// Creates a catalog (aka "Database").
805
851
#[ derive( Debug , Clone , PartialEq , Eq , Hash ) ]
@@ -1085,7 +1131,9 @@ impl PartialOrd for CreateIndex {
1085
1131
1086
1132
#[ cfg( test) ]
1087
1133
mod test {
1134
+ use super :: * ;
1088
1135
use crate :: { CreateCatalog , DdlStatement , DropView } ;
1136
+ use arrow:: datatypes:: { DataType , Field , Schema } ;
1089
1137
use datafusion_common:: { DFSchema , DFSchemaRef , TableReference } ;
1090
1138
use std:: cmp:: Ordering ;
1091
1139
@@ -1112,4 +1160,85 @@ mod test {
1112
1160
1113
1161
assert_eq ! ( drop_view. partial_cmp( & catalog) , Some ( Ordering :: Greater ) ) ;
1114
1162
}
1163
+
1164
+ #[ test]
1165
+ fn test_check_fields_unique ( ) -> Result < ( ) > {
1166
+ // no duplicate fields, unqualified schema
1167
+ check_fields_unique ( & DFSchema :: try_from ( Schema :: new ( vec ! [
1168
+ Field :: new( "c100" , DataType :: Boolean , true ) ,
1169
+ Field :: new( "c101" , DataType :: Boolean , true ) ,
1170
+ ] ) ) ?) ?;
1171
+
1172
+ // no duplicate fields, qualified schema
1173
+ check_fields_unique ( & DFSchema :: try_from_qualified_schema (
1174
+ "t1" ,
1175
+ & Schema :: new ( vec ! [
1176
+ Field :: new( "c100" , DataType :: Boolean , true ) ,
1177
+ Field :: new( "c101" , DataType :: Boolean , true ) ,
1178
+ ] ) ,
1179
+ ) ?) ?;
1180
+
1181
+ // duplicate unqualified field with same qualifier
1182
+ assert_eq ! (
1183
+ check_fields_unique( & DFSchema :: try_from( Schema :: new( vec![
1184
+ Field :: new( "c0" , DataType :: Boolean , true ) ,
1185
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1186
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1187
+ Field :: new( "c2" , DataType :: Boolean , true ) ,
1188
+ ] ) ) ?)
1189
+ . unwrap_err( )
1190
+ . strip_backtrace( )
1191
+ . to_string( ) ,
1192
+ "Schema error: Schema contains duplicate unqualified field name c1"
1193
+ ) ;
1194
+
1195
+ // duplicate qualified field with same qualifier
1196
+ assert_eq ! (
1197
+ check_fields_unique( & DFSchema :: try_from_qualified_schema(
1198
+ "t1" ,
1199
+ & Schema :: new( vec![
1200
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1201
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1202
+ ] ) ,
1203
+ ) ?)
1204
+ . unwrap_err( )
1205
+ . strip_backtrace( )
1206
+ . to_string( ) ,
1207
+ "Schema error: Schema contains duplicate qualified field name t1.c1"
1208
+ ) ;
1209
+
1210
+ // duplicate qualified and unqualified field
1211
+ assert_eq ! (
1212
+ check_fields_unique( & DFSchema :: from_field_specific_qualified_schema(
1213
+ vec![
1214
+ None ,
1215
+ Some ( TableReference :: from( "t1" ) ) ,
1216
+ ] ,
1217
+ & Arc :: new( Schema :: new( vec![
1218
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1219
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1220
+ ] ) )
1221
+ ) ?)
1222
+ . unwrap_err( ) . strip_backtrace( ) . to_string( ) ,
1223
+ "Schema error: Schema contains qualified field name t1.c1 and unqualified field name c1 which would be ambiguous"
1224
+ ) ;
1225
+
1226
+ // qualified fields with duplicate unqualified names
1227
+ assert_eq ! (
1228
+ check_fields_unique( & DFSchema :: from_field_specific_qualified_schema(
1229
+ vec![
1230
+ Some ( TableReference :: from( "t1" ) ) ,
1231
+ Some ( TableReference :: from( "t2" ) ) ,
1232
+ ] ,
1233
+ & Arc :: new( Schema :: new( vec![
1234
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1235
+ Field :: new( "c1" , DataType :: Boolean , true ) ,
1236
+ ] ) )
1237
+ ) ?)
1238
+ . unwrap_err( ) . strip_backtrace( ) . to_string( ) ,
1239
+ "Schema error: Schema contains qualified fields with duplicate unqualified names t1.c1"
1240
+ ) ;
1241
+
1242
+ Ok ( ( ) )
1243
+ }
1115
1244
}
0 commit comments