diff --git a/statistics/column.go b/statistics/column.go index 7aed38d50abc7..c534cbfdd7262 100644 --- a/statistics/column.go +++ b/statistics/column.go @@ -206,7 +206,24 @@ func (c *Column) equalRowCount(sctx sessionctx.Context, val types.Datum, encoded // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) histNDV := float64(c.Histogram.NDV - int64(c.TopN.Num())) if histNDV <= 0 { - return 0, nil + // If there has been no modifcations - return zero + modifiedRows := float64(realtimeRowCount) - c.TotalRowCount() + if modifiedRows == 0 { + return 0, nil + } else if modifiedRows < 0 { + modifiedRows = float64(realtimeRowCount) + } + // ELSE calculate an approximate estimate based upon newly inserted rows. + // + // Reset to the original NDV, or if no NDV - derive an NDV using sqrt + if c.Histogram.NDV > 0 { + histNDV = float64(c.Histogram.NDV) + } else { + histNDV = math.Sqrt(math.Min(c.TotalRowCount(), modifiedRows)) + } + // As a conservative estimate - take the smaller of the orignal totalRows or the additions. + totalRowCount := math.Min(c.TotalRowCount(), modifiedRows) + return math.Max(1, totalRowCount/histNDV), nil } return c.Histogram.notNullCount() / histNDV, nil } diff --git a/statistics/index.go b/statistics/index.go index 12b92865bded9..e27f258d159f9 100644 --- a/statistics/index.go +++ b/statistics/index.go @@ -218,7 +218,24 @@ func (idx *Index) equalRowCount(sctx sessionctx.Context, b []byte, realtimeRowCo // 3. use uniform distribution assumption for the rest (even when this value is not covered by the range of stats) histNDV := float64(idx.Histogram.NDV - int64(idx.TopN.Num())) if histNDV <= 0 { - return 0 + // If there has been no modifcations - return zero + modifiedRows := float64(realtimeRowCount) - idx.TotalRowCount() + if modifiedRows == 0 { + return 0 + } else if modifiedRows < 0 { + modifiedRows = float64(realtimeRowCount) + } + // ELSE calculate an approximate estimate based upon newly inserted rows. + // + // Reset to the original NDV, or if no NDV - derive an NDV using sqrt + if idx.Histogram.NDV > 0 { + histNDV = float64(idx.Histogram.NDV) + } else { + histNDV = math.Sqrt(math.Min(idx.TotalRowCount(), modifiedRows)) + } + // As a conservative estimate - take the smaller of the orignal totalRows or the additions. + totalRowCount := math.Min(idx.TotalRowCount(), modifiedRows) + return math.Max(1, totalRowCount/histNDV) } return idx.Histogram.notNullCount() / histNDV }