Skip to content

Commit

Permalink
statistics: fix repetitive selectivity accounting and stabilify the r…
Browse files Browse the repository at this point in the history
  • Loading branch information
sre-bot authored Apr 3, 2020
1 parent cce7ff2 commit 4f03354
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 21 deletions.
73 changes: 52 additions & 21 deletions statistics/selectivity.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ package statistics

import (
"math"
"math/bits"
"sort"

"github.com/pingcap/errors"
"github.com/pingcap/parser/ast"
Expand Down Expand Up @@ -49,6 +51,32 @@ const (
colType
)

func compareType(l, r int) int {
if l == r {
return 0
}
if l == colType {
return -1
}
if l == pkType {
return 1
}
if r == colType {
return 1
}
return -1
}

// MockExprSet is only used for test.
func MockExprSet(id int64, m int64, num int) *exprSet {
return &exprSet{ID: id, mask: m, numCols: num}
}

// MockExprSetSlice is only used for test.
func MockExprSetSlice(l int) []*exprSet {
return make([]*exprSet, l)
}

const unknownColumnID = math.MinInt64

// getConstantColumnID receives two expressions and if one of them is column and another is constant, it returns the
Expand Down Expand Up @@ -201,7 +229,7 @@ func (coll *HistColl) Selectivity(ctx sessionctx.Context, exprs []expression.Exp
sets = append(sets, &exprSet{tp: indexType, ID: id, mask: maskCovered, ranges: ranges, numCols: len(idxInfo.Info.Columns), partCover: partCover})
}
}
sets = getUsableSetsByGreedy(sets)
sets = GetUsableSetsByGreedy(sets)
// Initialize the mask with the full set.
mask := (int64(1) << uint(len(remainedExprs))) - 1
for _, set := range sets {
Expand Down Expand Up @@ -271,48 +299,51 @@ func getMaskAndRanges(ctx sessionctx.Context, exprs []expression.Expression, ran
return mask, ranges, false, nil
}

// getUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.
func getUsableSetsByGreedy(sets []*exprSet) (newBlocks []*exprSet) {
// GetUsableSetsByGreedy will select the indices and pk used for calculate selectivity by greedy algorithm.
func GetUsableSetsByGreedy(sets []*exprSet) (newBlocks []*exprSet) {
sort.Slice(sets, func(i int, j int) bool {
if r := compareType(sets[i].tp, sets[j].tp); r != 0 {
return r < 0
}
return sets[i].ID < sets[j].ID
})
marked := make([]bool, len(sets))
mask := int64(math.MaxInt64)
for {
// Choose the index that covers most.
bestID, bestCount, bestTp, bestNumCols := -1, 0, colType, 0
bestID, bestCount, bestTp, bestNumCols, bestMask := -1, 0, colType, 0, int64(0)
for i, set := range sets {
set.mask &= mask
bits := popCount(set.mask)
if marked[i] {
continue
}
curMask := set.mask & mask
if curMask != set.mask {
marked[i] = true
continue
}
bits := bits.OnesCount64(uint64(curMask))
// This set cannot cover any thing, just skip it.
if bits == 0 {
marked[i] = true
continue
}
// We greedy select the stats info based on:
// (1): The stats type, always prefer the primary key or index.
// (2): The number of expression that it covers, the more the better.
// (3): The number of columns that it contains, the less the better.
if (bestTp == colType && set.tp != colType) || bestCount < bits || (bestCount == bits && bestNumCols > set.numCols) {
bestID, bestCount, bestTp, bestNumCols = i, bits, set.tp, set.numCols
bestID, bestCount, bestTp, bestNumCols, bestMask = i, bits, set.tp, set.numCols, curMask
}
}
if bestCount == 0 {
break
}

// update the mask, remove the bit that sets[bestID].mask has.
mask &^= sets[bestID].mask
mask &^= bestMask

newBlocks = append(newBlocks, sets[bestID])
// remove the chosen one
sets = append(sets[:bestID], sets[bestID+1:]...)
marked[bestID] = true
}
return
}

// popCount is the digit sum of the binary representation of the number x.
func popCount(x int64) int {
ret := 0
// x -= x & -x, remove the lowest bit of the x.
// e.g. result will be 2 if x is 3.
for ; x > 0; x -= x & -x {
ret++
}
return ret
}
18 changes: 18 additions & 0 deletions statistics/selectivity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,21 @@ func (s *testSelectivitySuite) TestColumnIndexNullEstimation(c *C) {
" └─TableScan_5 5.00 cop table:t, range:[-inf,+inf], keep order:false",
))
}

func (s *testSelectivitySuite) TestSelectivityGreedyAlgo(c *C) {
nodes := statistics.MockExprSetSlice(3)
nodes[0] = statistics.MockExprSet(1, 3, 2)
nodes[1] = statistics.MockExprSet(2, 5, 2)
nodes[2] = statistics.MockExprSet(3, 9, 2)

// Sets should not overlap on mask, so only nodes[0] is chosen.
usedSets := statistics.GetUsableSetsByGreedy(nodes)
c.Assert(len(usedSets), Equals, 1)
c.Assert(usedSets[0].ID, Equals, int64(1))

nodes[0], nodes[1] = nodes[1], nodes[0]
// Sets chosen should be stable, so the returned node is still the one with ID 1.
usedSets = statistics.GetUsableSetsByGreedy(nodes)
c.Assert(len(usedSets), Equals, 1)
c.Assert(usedSets[0].ID, Equals, int64(1))
}

0 comments on commit 4f03354

Please sign in to comment.