Skip to content

Commit

Permalink
opt,sql: support hint to avoid full scan
Browse files Browse the repository at this point in the history
Added a hint to avoid full scans (see release note below for details).
To support this change, added a field to memo.Cost with a new type
memo.CostFlags, which contains a number of boolean flags and supports
"multi-dimensional costing". This allows the optimizer to compare plans
based on the flags set in addition to the single-dimensional float64
cost. For example, plans with the new FullScanPenalty cost flag enabled
will always be more expensive than plans without any cost flags, even
if the base float64 cost is lower.

The new CostFlags type also includes a flag for HugeCostPenalty, which
must be set for plans with "hugeCost". This ensures that existing
hints that use hugeCost still work if some other cost flags are set,
since HugeCostPenalty takes precedence over other cost flags.

This new CostFlags field is needed to support hints that do not cause an
error if the optimizer cannot find a plan complying with the hint. This
is needed because the previous approach of simply using "hugeCost" to
avoid certain plans meant that if such plans were unavoidable, we could
not effectively compare plans with cost greater than hugeCost due to
loss of floating point precision.

Informs #79683

Release note (sql change): Added support for a new index hint,
AVOID_FULL_SCAN, which will prevent the optimizer from planning a
full scan for the specified table if any other plan is possible. The
hint can be used in the same way as other existing index hints. For
example, SELECT * FROM table_name@{AVOID_FULL_SCAN};. This hint is
similar to NO_FULL_SCAN, but will not error if a full scan cannot be
avoided. Note that normally a full scan of a partial index would not
be considered a "full scan" for the purposes of the NO_FULL_SCAN and
AVOID_FULL_SCAN hints, but if the user has explicitly forced the
partial index via FORCE_INDEX=index_name, we do consider it a full
scan.
  • Loading branch information
rytaft committed Dec 26, 2024
1 parent 271a8f4 commit 85a9bf4
Show file tree
Hide file tree
Showing 24 changed files with 436 additions and 33 deletions.
3 changes: 3 additions & 0 deletions docs/generated/sql/bnf/stmt_block.bnf
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,7 @@ unreserved_keyword ::=
| 'ATTRIBUTE'
| 'AUTOMATIC'
| 'AVAILABILITY'
| 'AVOID_FULL_SCAN'
| 'BACKUP'
| 'BACKUPS'
| 'BACKWARD'
Expand Down Expand Up @@ -3753,6 +3754,7 @@ bare_label_keywords ::=
| 'AUTHORIZATION'
| 'AUTOMATIC'
| 'AVAILABILITY'
| 'AVOID_FULL_SCAN'
| 'BACKUP'
| 'BACKUPS'
| 'BACKWARD'
Expand Down Expand Up @@ -4345,6 +4347,7 @@ index_flags_param ::=
| 'NO_INDEX_JOIN'
| 'NO_ZIGZAG_JOIN'
| 'NO_FULL_SCAN'
| 'AVOID_FULL_SCAN'
| 'FORCE_ZIGZAG'
| 'FORCE_ZIGZAG' '=' index_name

Expand Down
19 changes: 19 additions & 0 deletions pkg/sql/logictest/testdata/logic_test/select
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,25 @@ SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,NO_FULL_SCAN} WHERE b > 0
statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,NO_FULL_SCAN} WHERE a > 0 AND b = 1

# Now avoid full scans with a hint. A full scan should not cause an error.
statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN}

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{AVOID_FULL_SCAN} WHERE a > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_idx,AVOID_FULL_SCAN} WHERE b > 0

statement ok
SELECT * FROM t_disallow_scans@{FORCE_INDEX=b_partial,AVOID_FULL_SCAN} WHERE a > 0 AND b = 1

# Now disable full scans with the session variable.
statement ok
SET disallow_full_table_scans = true;
Expand Down
30 changes: 30 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/delete
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,36 @@ vectorized: true
spans: /1-/1000 /2001-/3000
locking strength: for update

# AVOID_FULL_SCAN also works to ensure a constrained scan.
query T
EXPLAIN (VERBOSE) DELETE FROM xyz@{AVOID_FULL_SCAN} WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000) RETURNING z
----
distribution: local
vectorized: true
·
• project
│ columns: (z)
└── • delete
│ columns: (x, z)
│ estimated row count: 990 (missing stats)
│ from: xyz
│ auto commit
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@xyz_y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

# Testcase for issue 105803.

statement ok
Expand Down
2 changes: 1 addition & 1 deletion pkg/sql/opt/exec/execbuilder/testdata/explain_redact
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,7 @@ update e
query T
EXPLAIN (OPT, MEMO, REDACT) UPDATE e SET e = 'eee' WHERE e > 'a'
----
memo (optimized, ~16KB, required=[presentation: info:17] [distribution: test])
memo (optimized, ~17KB, required=[presentation: info:17] [distribution: test])
├── G1: (explain G2 [distribution: test])
│ └── [presentation: info:17] [distribution: test]
│ ├── best: (explain G2="[distribution: test]" [distribution: test])
Expand Down
35 changes: 35 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/update
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,41 @@ vectorized: true
spans: /1-/1000 /2001-/3000
locking strength: for update

# AVOID_FULL_SCAN also works to ensure a constrained scan.
query T
EXPLAIN (VERBOSE) UPDATE xyz@{AVOID_FULL_SCAN} SET x = 5 WHERE (y > 0 AND y < 1000) OR (y > 2000 AND y < 3000)
----
distribution: local
vectorized: true
·
• update
│ columns: ()
│ estimated row count: 0 (missing stats)
│ table: xyz
│ set: x
│ auto commit
└── • render
│ columns: (x, y, z, x_new)
│ render x_new: 5
│ render x: x
│ render y: y
│ render z: z
└── • index join
│ columns: (x, y, z)
│ estimated row count: 990 (missing stats)
│ table: xyz@xyz_pkey
│ key columns: x
│ locking strength: for update
└── • scan
columns: (x, y)
estimated row count: 990 (missing stats)
table: xyz@y_idx
spans: /1-/1000 /2001-/3000
locking strength: for update

statement ok
CREATE TABLE pks (
k1 INT,
Expand Down
106 changes: 106 additions & 0 deletions pkg/sql/opt/exec/execbuilder/testdata/upsert
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,112 @@ vectorized: true
query error could not produce a query plan conforming to the NO_FULL_SCAN hint
EXPLAIN (VERBOSE) UPSERT INTO indexed@{FORCE_INDEX=secondary,NO_FULL_SCAN} VALUES (1)

# AVOID_FULL_SCAN also works to ensure a constrained scan, and does not error if
# one is not possible.
query T
EXPLAIN (VERBOSE) UPSERT INTO indexed@{AVOID_FULL_SCAN} VALUES (1)
----
distribution: local
vectorized: true
·
• upsert
│ columns: ()
│ estimated row count: 0 (missing stats)
│ into: indexed(a, b, c, d)
│ auto commit
│ arbiter indexes: indexed_pkey
└── • project
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d, b_default, c_default, d_comp, a, check1)
└── • render
│ columns: (check1, column1, b_default, c_default, d_comp, a, b, c, d)
│ render check1: c_default > 0
│ render column1: column1
│ render b_default: b_default
│ render c_default: c_default
│ render d_comp: d_comp
│ render a: a
│ render b: b
│ render c: c
│ render d: d
└── • cross join (left outer)
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d)
│ estimated row count: 1 (missing stats)
├── • values
│ columns: (column1, b_default, c_default, d_comp)
│ size: 4 columns, 1 row
│ row 0, expr 0: 1
│ row 0, expr 1: CAST(NULL AS INT8)
│ row 0, expr 2: 10
│ row 0, expr 3: 11
└── • scan
columns: (a, b, c, d)
estimated row count: 1 (missing stats)
table: indexed@indexed_pkey
spans: /1/0
locking strength: for update

query T
EXPLAIN (VERBOSE) UPSERT INTO indexed@{FORCE_INDEX=secondary,AVOID_FULL_SCAN} VALUES (1)
----
distribution: local
vectorized: true
·
• upsert
│ columns: ()
│ estimated row count: 0 (missing stats)
│ into: indexed(a, b, c, d)
│ auto commit
│ arbiter indexes: indexed_pkey
└── • project
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d, b_default, c_default, d_comp, a, check1)
└── • render
│ columns: (check1, column1, b_default, c_default, d_comp, a, b, c, d)
│ render check1: c_default > 0
│ render column1: column1
│ render b_default: b_default
│ render c_default: c_default
│ render d_comp: d_comp
│ render a: a
│ render b: b
│ render c: c
│ render d: d
└── • cross join (left outer)
│ columns: (column1, b_default, c_default, d_comp, a, b, c, d)
│ estimated row count: 1 (missing stats)
├── • values
│ columns: (column1, b_default, c_default, d_comp)
│ size: 4 columns, 1 row
│ row 0, expr 0: 1
│ row 0, expr 1: CAST(NULL AS INT8)
│ row 0, expr 2: 10
│ row 0, expr 3: 11
└── • filter
│ columns: (a, b, c, d)
│ estimated row count: 1 (missing stats)
│ filter: a = 1
└── • index join
│ columns: (a, b, c, d)
│ estimated row count: 1,000 (missing stats)
│ table: indexed@indexed_pkey
│ key columns: a
└── • scan
columns: (a, b, d)
estimated row count: 1,000 (missing stats)
table: indexed@secondary
spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
INSERT INTO indexed@indexed_pkey AS indexed_pk
Expand Down
47 changes: 42 additions & 5 deletions pkg/sql/opt/memo/cost.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,23 @@ import "math"
// particular operator tree.
// TODO: Need more details about what one "unit" of cost means.
type Cost struct {
Cost float64
// CostFlags is used as a placeholder for cost flags that will be added in a
// future commit.
CostFlags int
Cost float64
Flags CostFlags
}

// MaxCost is the maximum possible estimated cost. It's used to suppress memo
// group members during testing, by setting their cost so high that any other
// member will have a lower cost.
var MaxCost = Cost{Cost: math.Inf(+1)}
var MaxCost = Cost{
Cost: math.Inf(+1),
Flags: CostFlags{FullScanPenalty: true, HugeCostPenalty: true},
}

// Less returns true if this cost is lower than the given cost.
func (c Cost) Less(other Cost) bool {
if c.Flags != other.Flags {
return c.Flags.Less(other.Flags)
}
// Two plans with the same cost can have slightly different floating point
// results (e.g. same subcosts being added up in a different order). So we
// treat plans with very similar cost as equal.
Expand All @@ -40,4 +44,37 @@ func (c Cost) Less(other Cost) bool {
// Add adds the other cost to this cost.
func (c *Cost) Add(other Cost) {
c.Cost += other.Cost
c.Flags.Add(other.Flags)
}

// CostFlags contains flags that penalize the cost of an operator.
type CostFlags struct {
FullScanPenalty bool
HugeCostPenalty bool
}

// Less returns true if these flags indicate a lower penalty than the other
// CostFlags.
func (c CostFlags) Less(other CostFlags) bool {
// HugeCostPenalty takes precedence over other penalties, since it indicates
// that a plan is being forced with a hint, and will error if we cannot comply
// with the hint.
if c.HugeCostPenalty != other.HugeCostPenalty {
return !c.HugeCostPenalty
}
if c.FullScanPenalty != other.FullScanPenalty {
return !c.FullScanPenalty
}
return false
}

// Add adds the other flags to these flags.
func (c *CostFlags) Add(other CostFlags) {
c.FullScanPenalty = c.FullScanPenalty || other.FullScanPenalty
c.HugeCostPenalty = c.HugeCostPenalty || other.HugeCostPenalty
}

// Empty returns true if these flags are empty.
func (c CostFlags) Empty() bool {
return !c.FullScanPenalty && !c.HugeCostPenalty
}
47 changes: 46 additions & 1 deletion pkg/sql/opt/memo/cost_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ func TestCostLess(t *testing.T) {
{memo.Cost{Cost: 1}, memo.Cost{Cost: 1.00000001}, true},
{memo.Cost{Cost: 1000}, memo.Cost{Cost: 1000.00000000001}, false},
{memo.Cost{Cost: 1000}, memo.Cost{Cost: 1000.00001}, true},
{memo.Cost{Cost: 1.0, Flags: memo.CostFlags{FullScanPenalty: true}}, memo.Cost{Cost: 1.0}, false},
{memo.Cost{Cost: 1.0}, memo.Cost{Cost: 1.0, Flags: memo.CostFlags{HugeCostPenalty: true}}, true},
{memo.Cost{Cost: 1.0, Flags: memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}}, memo.Cost{Cost: 1.0}, false},
{memo.Cost{Cost: 1.0, Flags: memo.CostFlags{FullScanPenalty: true}}, memo.Cost{Cost: 1.0, Flags: memo.CostFlags{HugeCostPenalty: true}}, true},
{memo.MaxCost, memo.Cost{Cost: 1.0}, false},
{memo.Cost{Cost: 0.0}, memo.MaxCost, true},
{memo.MaxCost, memo.MaxCost, false},
{memo.MaxCost, memo.Cost{Cost: 1.0, Flags: memo.CostFlags{FullScanPenalty: true}}, false},
{memo.Cost{Cost: 1.0, Flags: memo.CostFlags{HugeCostPenalty: true}}, memo.MaxCost, true},
}
for _, tc := range testCases {
if tc.left.Less(tc.right) != tc.expected {
Expand All @@ -40,10 +49,46 @@ func TestCostAdd(t *testing.T) {
}{
{memo.Cost{Cost: 1.0}, memo.Cost{Cost: 2.0}, memo.Cost{Cost: 3.0}},
{memo.Cost{Cost: 0.0}, memo.Cost{Cost: 0.0}, memo.Cost{Cost: 0.0}},
{memo.Cost{Cost: -1.0}, memo.Cost{Cost: 1.0}, memo.Cost{Cost: 0.0}},
{memo.Cost{Cost: 1.5}, memo.Cost{Cost: 2.5}, memo.Cost{Cost: 4.0}},
{memo.Cost{Cost: 1.0, Flags: memo.CostFlags{FullScanPenalty: true}}, memo.Cost{Cost: 2.0}, memo.Cost{Cost: 3.0, Flags: memo.CostFlags{FullScanPenalty: true}}},
{memo.Cost{Cost: 1.0}, memo.Cost{Cost: 2.0, Flags: memo.CostFlags{HugeCostPenalty: true}}, memo.Cost{Cost: 3.0, Flags: memo.CostFlags{HugeCostPenalty: true}}},
}
for _, tc := range testCases {
tc.left.Add(tc.right)
if tc.left != tc.expected {
t.Errorf("expected %v.Add(%v) to be %v, got %v", tc.left, tc.right, tc.expected, tc.left)
}
}
}

func TestCostFlagsLess(t *testing.T) {
testCases := []struct {
left, right memo.CostFlags
expected bool
}{
{memo.CostFlags{FullScanPenalty: false, HugeCostPenalty: false}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, true},
{memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, memo.CostFlags{FullScanPenalty: false, HugeCostPenalty: false}, false},
{memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, false},
{memo.CostFlags{FullScanPenalty: false}, memo.CostFlags{FullScanPenalty: true}, true},
{memo.CostFlags{HugeCostPenalty: false}, memo.CostFlags{HugeCostPenalty: true}, true},
}
for _, tc := range testCases {
if tc.left.Less(tc.right) != tc.expected {
t.Errorf("expected %v.Less(%v) to be %v", tc.left, tc.right, tc.expected)
}
}
}

func TestCostFlagsAdd(t *testing.T) {
testCases := []struct {
left, right, expected memo.CostFlags
}{
{memo.CostFlags{FullScanPenalty: false, HugeCostPenalty: false}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}},
{memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}, memo.CostFlags{FullScanPenalty: false, HugeCostPenalty: false}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}},
{memo.CostFlags{FullScanPenalty: false}, memo.CostFlags{FullScanPenalty: true}, memo.CostFlags{FullScanPenalty: true}},
{memo.CostFlags{HugeCostPenalty: false}, memo.CostFlags{HugeCostPenalty: true}, memo.CostFlags{HugeCostPenalty: true}},
{memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: false}, memo.CostFlags{FullScanPenalty: false, HugeCostPenalty: true}, memo.CostFlags{FullScanPenalty: true, HugeCostPenalty: true}},
}
for _, tc := range testCases {
tc.left.Add(tc.right)
if tc.left != tc.expected {
Expand Down
3 changes: 3 additions & 0 deletions pkg/sql/opt/memo/expr.go
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,9 @@ type ScanFlags struct {
// NoFullScan disallows use of a full scan for scanning this table.
NoFullScan bool

// AvoidFullScan avoids use of a full scan for scanning this table.
AvoidFullScan bool

// ForceIndex forces the use of a specific index (specified in Index).
// ForceIndex and NoIndexJoin cannot both be set at the same time.
ForceIndex bool
Expand Down
Loading

0 comments on commit 85a9bf4

Please sign in to comment.