Skip to content

Commit

Permalink
[CALCITE-6301] Following "must-filter" columns, add "bypass" columns
Browse files Browse the repository at this point in the history
In [CALCITE-6219], we introduced `interface SemanticTable`. A
table that implements this interface can define "must-filter"
fields; a query that does not filter those fields in its
WHERE or HAVING clause will throw a validation error.

In this commit, we allow "bypass" fields. If a query filters
on any bypass field in its WHERE or HAVING clause, then the
must-filter fields are ignored ("defused").

Add `class FilterRequirement`, which holds must-filter
fields, bypass fields, and an internal concept called
"remnant-filter" fields.

Various tidy-ups (Julian Hyde): improve javadoc formatting,
remove accessor methods to public final fields, and use the
new `ImmutableBitSet.stream()` method.

Close apache#3984
  • Loading branch information
olivrlee authored and julianhyde committed Jan 14, 2025
1 parent a601040 commit 7ad5a39
Show file tree
Hide file tree
Showing 12 changed files with 706 additions and 101 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;

Expand Down Expand Up @@ -58,9 +57,11 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
/** As {@link #rowType}, but not necessarily a struct. */
protected @Nullable RelDataType type;

/** Ordinals of fields that must be filtered. Initially the empty set, but
* should typically be re-assigned on validate. */
protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of();
/** Information about what fields need to be filtered and what bypass fields
* can defuse the errors if they are filtered on as an alternative.
* Initialized as an empty object, but typically re-assigned during
* validation. */
protected FilterRequirement filterRequirement = FilterRequirement.EMPTY;

protected final @Nullable SqlNode enclosingNode;

Expand Down Expand Up @@ -164,9 +165,9 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
return ImmutableList.of();
}

@Override public ImmutableBitSet getMustFilterFields() {
return requireNonNull(mustFilterFields,
"mustFilterFields (maybe validation is not complete?)");
@Override public FilterRequirement getFilterRequirement() {
return requireNonNull(filterRequirement,
"filterRequirement (maybe validation is not complete?)");
}

@Override public SqlMonotonicity getMonotonicity(String columnName) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.sql.validate;

import org.apache.calcite.util.ImmutableBitSet;

import com.google.common.collect.ImmutableSet;

import java.util.Set;

/**
* Filtering requirements of a query, describing "must-filter" fields and
* "bypass" fields.
*
* <p>"Must-filter" fields must be filtered for a query to be considered valid;
* and "bypass" fields can defuse the errors if they are filtered on as an
* alternative.
*
* <p>Filter requirements originate in a {@link SemanticTable} in the model
* and propagate to any query that uses that table.
*
* <p>For example, consider table {@code t},
* which has a must-filter field {@code f}
* and bypass-fields {@code b0} and {@code b1},
* and the following queries:
*
* <ol>
* <li>Query {@code select f from t}
* is invalid because there is no filter on {@code f}.
*
* <li>Query {@code select * from (select f from t)} gives an error in the
* subquery because there is no filter on {@code f}.
*
* <li>Query {@code select f from t where f = 1} is valid because there is a
* filter on {@code f}.
*
* <li>Query {@code select * from (select f from t) where f = 1} is valid
* because there is a filter on {@code f}.
*
* <li>Query {@code select f from t where b0 = 1} is valid because there is a
* filter on the bypass-field {@code b0}.
* </ol>
*
* <p>{@code FilterRequirement} is immutable, and has an instance {@link #EMPTY}
* with no filters.
*
* <p><b>Notes on remnantFilterFields</b>
*
* <p>{@link #remnantFilterFields} identifies whether the query should error
* at the top level query. It is populated with the filter-field value when a
* filter-field is not selected or filtered on, but a bypass-field for the
* table is selected.
*
* <p>A remnant-filter field is no longer accessible by the enclosing query,
* and so the query can no longer be defused by filtering on it. We must keep
* track of the remnant-filter field because the query can still be defused by
* filtering on a bypass-field.
*
* <p>For example, consider table {@code t} with a must-filter field {@code f}
* and bypass-fields {@code b0} and {@code b1}.
*
* <ol>
* <li>Query {@code select b0, b1 from t} results in
* {@code filterFields} = [],
* {@code bypassFields} = [{@code b0}, {@code b1}],
* {@code remnantFilterFields} = [{@code f}].
* The query is invalid because it is a top-level query and
* {@link #remnantFilterFields} is not empty.
*
* <li>Query {@code select * from (select b0, b1 from t) where b0 = 1} is valid.
* When unwrapping the subquery we get the same {@code FilterRequirement}
* as the previous example:
* {@code filterFields} = [],
* {@code bypassFields} = [{@code b0}, {@code b1}],
* {@code remnantFilterFields} = [{@code f}].
* But when unwrapping the top-level query, the filter on {@code b0} defuses
* the {@code remnantFilterField} requirement of [{@code f}] because it
* originated in the same table, resulting in the following:
* {@code filterFields} = [],
* {@code bypassFields} = [{@code b0}, {@code b1}],
* {@code remnantFilterFields} = [].
* The query is valid because {@link #remnantFilterFields} is now empty.
* </ol>
*
* @see SqlValidatorNamespace#getFilterRequirement()
*/
public class FilterRequirement {
/** Empty filter requirement. */
public static final FilterRequirement EMPTY =
new FilterRequirement(ImmutableBitSet.of(), ImmutableBitSet.of(),
ImmutableSet.of());

/** Ordinals (in the row type) of the "must-filter" fields,
* fields that must be filtered in a query. */
public final ImmutableBitSet filterFields;

/** Ordinals (in the row type) of the "bypass" fields,
* fields that can defuse validation errors on {@link #filterFields}
* if filtered on. */
public final ImmutableBitSet bypassFields;

/** Set of {@link SqlQualified} instances representing fields that have not
* been defused in the current query, but can still be defused by filtering
* on a bypass field in the enclosing query. */
public final ImmutableSet<SqlQualified> remnantFilterFields;

/**
* Creates a {@code FilterRequirement}.
*
* @param filterFields Ordinals of the "must-filter" fields
* @param bypassFields Ordinals of the "bypass" fields
* @param remnantFilterFields Filter fields that can no longer be filtered on,
* but can only be defused if a bypass field is filtered on
*/
FilterRequirement(Iterable<Integer> filterFields,
Iterable<Integer> bypassFields, Set<SqlQualified> remnantFilterFields) {
this.filterFields = ImmutableBitSet.of(filterFields);
this.bypassFields = ImmutableBitSet.of(bypassFields);
this.remnantFilterFields = ImmutableSet.copyOf(remnantFilterFields);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) {
}
}

this.mustFilterFields = resolvedNamespace.getMustFilterFields();
filterRequirement = resolvedNamespace.getFilterRequirement();
RelDataType rowType = resolvedNamespace.getRowType();

if (extendList != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@
*/
package org.apache.calcite.sql.validate;

import com.google.common.collect.ImmutableList;

import org.checkerframework.checker.nullness.qual.Nullable;

import java.util.List;

/**
* Extension to {@link SqlValidatorTable} with extra, optional metadata.
*
Expand All @@ -44,4 +48,12 @@ public interface SemanticTable {
default boolean mustFilter(int column) {
return getFilter(column) != null;
}

/**
* Returns a list of column ordinals (0-based) of fields that defuse
* must-filter columns when filtered on.
*/
default List<Integer> bypassFieldList() {
return ImmutableList.of();
}
}
Loading

0 comments on commit 7ad5a39

Please sign in to comment.