Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

infoschema: fill data length fields for tables #7657

Merged
merged 9 commits into from
Sep 13, 2018
108 changes: 87 additions & 21 deletions infoschema/tables.go
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,62 @@ func getRowCountAllTable(ctx sessionctx.Context) (map[int64]uint64, error) {
return rowCountMap, nil
}

type tableHistID struct {
tableID int64
histID int64
}

func getColLengthAllTables(ctx sessionctx.Context) (map[tableHistID]int64, error) {
rows, _, err := ctx.(sqlexec.RestrictedSQLExecutor).ExecRestrictedSQL(ctx, "select table_id, hist_id, tot_col_size from mysql.stats_histograms where is_index = 0")
if err != nil {
return nil, errors.Trace(err)
}
colLengthMap := make(map[tableHistID]int64, len(rows))
for _, row := range rows {
tableID := row.GetInt64(0)
histID := row.GetInt64(1)
totalSize := row.GetInt64(2)
if totalSize < 0 {
totalSize = 0
}
colLengthMap[tableHistID{tableID: tableID, histID: histID}] = totalSize
}
return colLengthMap, nil
}

func getDataAndIndexLength(info *model.TableInfo, rowCount uint64, columnLengthMap map[tableHistID]int64) (uint64, uint64) {
columnLength := make(map[string]uint64)
for _, col := range info.Columns {
if col.State != model.StatePublic {
continue
}
length := col.FieldType.Length()
if length != types.VarElemLen {
columnLength[col.Name.L] = rowCount * uint64(length)
} else {
length := columnLengthMap[tableHistID{tableID: info.ID, histID: col.ID}]
columnLength[col.Name.L] = uint64(length)
}
}
dataLength, indexLength := uint64(0), uint64(0)
for _, length := range columnLength {
dataLength += length
}
for _, idx := range info.Indices {
if idx.State != model.StatePublic {
continue
}
for _, col := range idx.Columns {
if col.Length == types.UnspecifiedLength {
indexLength += columnLength[col.Name.L]
} else {
indexLength += rowCount * uint64(col.Length)
}
}
}
return dataLength, indexLength
}

func getAutoIncrementID(ctx sessionctx.Context, schema *model.DBInfo, tblInfo *model.TableInfo) (int64, error) {
hasAutoIncID := false
for _, col := range tblInfo.Cols() {
Expand Down Expand Up @@ -720,6 +776,10 @@ func dataForTables(ctx sessionctx.Context, schemas []*model.DBInfo) ([][]types.D
if err != nil {
return nil, errors.Trace(err)
}
colLengthMap, err := getColLengthAllTables(ctx)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current implementation doesn't do any RPC.
After this change, it may take too much time to execute if we have many tables.
And this may run frequently in some application.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It does. You can take a look at getRowCountAllTable.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I see.

We can cache the result in case it is called too frequently, just like the way GlobalVariableCache does.

if err != nil {
return nil, errors.Trace(err)
}

checker := privilege.GetPrivilegeManager(ctx)

Expand All @@ -744,28 +804,34 @@ func dataForTables(ctx sessionctx.Context, schemas []*model.DBInfo) ([][]types.D
if err != nil {
return nil, errors.Trace(err)
}
rowCount := tableRowsMap[table.ID]
dataLength, indexLength := getDataAndIndexLength(table, rowCount, colLengthMap)
avgRowLength := uint64(0)
if rowCount != 0 {
avgRowLength = dataLength / rowCount
}
record := types.MakeDatums(
catalogVal, // TABLE_CATALOG
schema.Name.O, // TABLE_SCHEMA
table.Name.O, // TABLE_NAME
"BASE TABLE", // TABLE_TYPE
"InnoDB", // ENGINE
uint64(10), // VERSION
"Compact", // ROW_FORMAT
tableRowsMap[table.ID], // TABLE_ROWS
uint64(0), // AVG_ROW_LENGTH
uint64(16384), // DATA_LENGTH
uint64(0), // MAX_DATA_LENGTH
uint64(0), // INDEX_LENGTH
uint64(0), // DATA_FREE
autoIncID, // AUTO_INCREMENT
createTime, // CREATE_TIME
nil, // UPDATE_TIME
nil, // CHECK_TIME
collation, // TABLE_COLLATION
nil, // CHECKSUM
"", // CREATE_OPTIONS
table.Comment, // TABLE_COMMENT
catalogVal, // TABLE_CATALOG
schema.Name.O, // TABLE_SCHEMA
table.Name.O, // TABLE_NAME
"BASE TABLE", // TABLE_TYPE
"InnoDB", // ENGINE
uint64(10), // VERSION
"Compact", // ROW_FORMAT
rowCount, // TABLE_ROWS
avgRowLength, // AVG_ROW_LENGTH
dataLength, // DATA_LENGTH
uint64(0), // MAX_DATA_LENGTH
indexLength, // INDEX_LENGTH
uint64(0), // DATA_FREE
autoIncID, // AUTO_INCREMENT
createTime, // CREATE_TIME
nil, // UPDATE_TIME
nil, // CHECK_TIME
collation, // TABLE_COLLATION
nil, // CHECKSUM
"", // CREATE_OPTIONS
table.Comment, // TABLE_COMMENT
)
rows = append(rows, record)
}
Expand Down
88 changes: 44 additions & 44 deletions infoschema/tables_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,52 +47,9 @@ func (s *testSuite) TestInfoschemaFielValue(c *C) {
Check(testkit.Rows("<nil> <nil> <nil> <nil> <nil>", "<nil> <nil> <nil> <nil> 3", "<nil> <nil> <nil> <nil> 3", "<nil> <nil> <nil> <nil> 4", "<nil> <nil> <nil> <nil> <nil>"))
tk.MustQuery("select CHARACTER_MAXIMUM_LENGTH,CHARACTER_OCTET_LENGTH,NUMERIC_PRECISION,NUMERIC_SCALE,DATETIME_PRECISION from information_schema.COLUMNS where table_name='strschema'").
Check(testkit.Rows("3 3 <nil> <nil> <nil>", "3 3 <nil> <nil> <nil>", "3 3 <nil> <nil> <nil>", "3 3 <nil> <nil> <nil>")) // FIXME: for mysql last two will be "255 255 <nil> <nil> <nil>", "255 255 <nil> <nil> <nil>"
}

func (s *testSuite) TestDataForTableRowsCountField(c *C) {
testleak.BeforeTest()
defer testleak.AfterTest(c)()
store, err := mockstore.NewMockTikvStore()
c.Assert(err, IsNil)
defer store.Close()
session.SetStatsLease(0)
do, err := session.BootstrapSession(store)
c.Assert(err, IsNil)
defer do.Close()

h := do.StatsHandle()
is := do.InfoSchema()
tk := testkit.NewTestKit(c, store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (c int, d int)")
h.HandleDDLEvent(<-h.DDLEventCh())
tk.MustQuery("select table_rows from information_schema.tables where table_name='t'").Check(
testkit.Rows("0"))
tk.MustExec("insert into t(c, d) values(1, 2), (2, 3), (3, 4)")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows from information_schema.tables where table_name='t'").Check(
testkit.Rows("3"))
tk.MustExec("insert into t(c, d) values(4, 5)")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows from information_schema.tables where table_name='t'").Check(
testkit.Rows("4"))
tk.MustExec("delete from t where c >= 3")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows from information_schema.tables where table_name='t'").Check(
testkit.Rows("2"))
tk.MustExec("delete from t where c=3")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows from information_schema.tables where table_name='t'").Check(
testkit.Rows("2"))

// Test for auto increment ID.
tk.MustExec("drop table t")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (c int auto_increment primary key, d int)")
tk.MustQuery("select auto_increment from information_schema.tables where table_name='t'").Check(
testkit.Rows("1"))
Expand Down Expand Up @@ -122,3 +79,46 @@ func (s *testSuite) TestDataForTableRowsCountField(c *C) {

tk1.MustQuery("select distinct(table_schema) from information_schema.tables").Check(testkit.Rows("INFORMATION_SCHEMA"))
}

func (s *testSuite) TestDataForTableStatsField(c *C) {
testleak.BeforeTest()
defer testleak.AfterTest(c)()
store, err := mockstore.NewMockTikvStore()
c.Assert(err, IsNil)
defer store.Close()
session.SetStatsLease(0)
do, err := session.BootstrapSession(store)
c.Assert(err, IsNil)
defer do.Close()

h := do.StatsHandle()
is := do.InfoSchema()
tk := testkit.NewTestKit(c, store)

tk.MustExec("use test")
tk.MustExec("drop table if exists t")
tk.MustExec("create table t (c int, d int, e char(5), index idx(e))")
h.HandleDDLEvent(<-h.DDLEventCh())
tk.MustQuery("select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t'").Check(
testkit.Rows("0 0 0 0"))
tk.MustExec(`insert into t(c, d, e) values(1, 2, "c"), (2, 3, "d"), (3, 4, "e")`)
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t'").Check(
testkit.Rows("3 17 51 3"))
tk.MustExec(`insert into t(c, d, e) values(4, 5, "f")`)
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t'").Check(
testkit.Rows("4 17 68 4"))
tk.MustExec("delete from t where c >= 3")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t'").Check(
testkit.Rows("2 17 34 2"))
tk.MustExec("delete from t where c=3")
h.DumpStatsDeltaToKV(statistics.DumpAll)
h.Update(is)
tk.MustQuery("select table_rows, avg_row_length, data_length, index_length from information_schema.tables where table_name='t'").Check(
testkit.Rows("2 17 34 2"))
}
18 changes: 5 additions & 13 deletions statistics/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,20 +108,12 @@ func (c *Column) AvgColSize(count int64) float64 {
if count == 0 {
return 0
}
switch c.Histogram.tp.Tp {
case mysql.TypeFloat:
return 4
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong, mysql.TypeLonglong,
mysql.TypeDouble, mysql.TypeYear:
return 8
case mysql.TypeDuration, mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
return 16
case mysql.TypeNewDecimal:
return types.MyDecimalStructSize
default:
// Keep two decimal place.
return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
len := c.Histogram.tp.Length()
if len != types.VarElemLen {
return float64(len)
}
// Keep two decimal place.
return math.Round(float64(c.TotColSize)/float64(count)*100) / 100
}

// AppendBucket appends a bucket into `hg`.
Expand Down
20 changes: 20 additions & 0 deletions types/field_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -1421,3 +1421,23 @@ func SetBinChsClnFlag(ft *FieldType) {
ft.Collate = charset.CollationBin
ft.Flag |= mysql.BinaryFlag
}

// VarElemLen indicates this column is a variable length column.
const VarElemLen = -1

// Length is the length of value for the type.
func (ft *FieldType) Length() int {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's misleading because we already have a Flen field in FieldType.

switch ft.Tp {
case mysql.TypeFloat:
return 4
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong,
mysql.TypeLonglong, mysql.TypeDouble, mysql.TypeYear, mysql.TypeDuration:
return 8
case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
return 16
case mysql.TypeNewDecimal:
return MyDecimalStructSize
default:
return VarElemLen
}
}
4 changes: 2 additions & 2 deletions util/chunk/chunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ func New(fields []*types.FieldType, cap, maxChunkSize int) *Chunk {
chk.columns = make([]*column, 0, len(fields))
chk.capacity = mathutil.Min(cap, maxChunkSize)
for _, f := range fields {
elemLen := getFixedLen(f)
if elemLen == varElemLen {
elemLen := f.Length()
if elemLen == types.VarElemLen {
chk.columns = append(chk.columns, newVarLenColumn(chk.capacity, nil))
} else {
chk.columns = append(chk.columns, newFixedLenColumn(elemLen, chk.capacity))
Expand Down
22 changes: 1 addition & 21 deletions util/chunk/codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"unsafe"

"github.com/cznic/mathutil"
"github.com/pingcap/tidb/mysql"
"github.com/pingcap/tidb/types"
)

Expand Down Expand Up @@ -124,7 +123,7 @@ func (c *Codec) decodeColumn(buffer []byte, col *column, ordinal int) (remained
}

// decode offsets.
numFixedBytes := getFixedLen(c.colTypes[ordinal])
numFixedBytes := c.colTypes[ordinal].Length()
numDataBytes := numFixedBytes * col.length
if numFixedBytes == -1 {
numOffsetBytes := (col.length + 1) * 4
Expand Down Expand Up @@ -163,25 +162,6 @@ func (c *Codec) bytesToI32Slice(b []byte) (i32s []int32) {
return i32s
}

// varElemLen indicates this column is a variable length column.
const varElemLen = -1

func getFixedLen(colType *types.FieldType) int {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the length in Chunk, but the length in the storage is different.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but we do not maintain the length for fixed length column.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mean we should use a different function to estimate the average column size in the storage.

switch colType.Tp {
case mysql.TypeFloat:
return 4
case mysql.TypeTiny, mysql.TypeShort, mysql.TypeInt24, mysql.TypeLong,
mysql.TypeLonglong, mysql.TypeDouble, mysql.TypeYear, mysql.TypeDuration:
return 8
case mysql.TypeDate, mysql.TypeDatetime, mysql.TypeTimestamp:
return 16
case mysql.TypeNewDecimal:
return types.MyDecimalStructSize
default:
return varElemLen
}
}

func init() {
for i := 0; i < 128; i++ {
allNotNullBitmap[i] = 0xFF
Expand Down