From ab1d071f3229add73fe4d747678f0125746fb1f4 Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Fri, 17 Nov 2017 14:55:34 +0800 Subject: [PATCH 1/2] stats: fix estimation in between row count --- statistics/histogram.go | 7 +------ statistics/selectivity_test.go | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index 5cc9eb80c09b6..e525134a8b896 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -309,7 +309,7 @@ func (hg *Histogram) betweenRowCount(sc *variable.StatementContext, a, b types.D return 0, errors.Trace(err) } if lessCountA >= lessCountB { - return hg.inBucketBetweenCount(), nil + return hg.totalRowCount() / float64(hg.NDV), nil } return lessCountB - lessCountA, nil } @@ -325,11 +325,6 @@ func (hg *Histogram) bucketRowCount() float64 { return hg.totalRowCount() / float64(len(hg.Buckets)) } -func (hg *Histogram) inBucketBetweenCount() float64 { - // TODO: Make this estimation more accurate using uniform spread assumption. - return hg.bucketRowCount()/3 + 1 -} - func (hg *Histogram) lowerBound(sc *variable.StatementContext, target types.Datum) (index int, match bool, err error) { index = sort.Search(len(hg.Buckets), func(i int) bool { cmp, err1 := hg.Buckets[i].UpperBound.CompareDatum(sc, &target) diff --git a/statistics/selectivity_test.go b/statistics/selectivity_test.go index 8aef01ad99ec4..74caf5904e95c 100644 --- a/statistics/selectivity_test.go +++ b/statistics/selectivity_test.go @@ -155,7 +155,7 @@ func (s *testSelectivitySuite) TestSelectivity(c *C) { }, { exprs: "a > 1 and b < 2 and c > 3 and d < 4 and e > 5", - selectivity: 0.00123287439, + selectivity: 0.00352249826, }, } for _, tt := range tests { From e36967aaf6bf2b69c6f909f72d7464ac5470d49d Mon Sep 17 00:00:00 2001 From: Haibin Xie Date: Fri, 17 Nov 2017 15:09:25 +0800 Subject: [PATCH 2/2] remove useless function --- statistics/histogram.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/statistics/histogram.go b/statistics/histogram.go index e525134a8b896..caf727bc71a70 100644 --- a/statistics/histogram.go +++ b/statistics/histogram.go @@ -321,10 +321,6 @@ func (hg *Histogram) totalRowCount() float64 { return float64(hg.Buckets[len(hg.Buckets)-1].Count) } -func (hg *Histogram) bucketRowCount() float64 { - return hg.totalRowCount() / float64(len(hg.Buckets)) -} - func (hg *Histogram) lowerBound(sc *variable.StatementContext, target types.Datum) (index int, match bool, err error) { index = sort.Search(len(hg.Buckets), func(i int) bool { cmp, err1 := hg.Buckets[i].UpperBound.CompareDatum(sc, &target)