Skip to content

Commit

Permalink
[fix](statistics)Skip analyze if the collected info is invalid. (apac…
Browse files Browse the repository at this point in the history
  • Loading branch information
Jibing-Li authored Oct 18, 2024
1 parent 21af64b commit 4fceaa0
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,11 @@ protected void runQuery(String sql) {
try (AutoCloseConnectContext a = StatisticsUtil.buildConnectContext(false)) {
stmtExecutor = new StmtExecutor(a.connectContext, sql);
ColStatsData colStatsData = new ColStatsData(stmtExecutor.executeInternalQuery().get(0));
if (!colStatsData.isValid()) {
String message = String.format("ColStatsData is invalid, skip analyzing. %s", colStatsData.toSQL(true));
LOG.warn(message);
throw new RuntimeException(message);
}
// Update index row count after analyze.
if (this instanceof OlapAnalysisTask) {
AnalysisInfo jobInfo = Env.getCurrentEnv().getAnalysisManager().findJobInfo(job.getJobInfo().jobId);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,4 +186,25 @@ public ColumnStatistic toColumnStatistic() {
return ColumnStatistic.UNKNOWN;
}
}

public boolean isNull(String value) {
// Checking "NULL" as null is a historical bug which treat literal value "NULL" as null. Will fix it soon.
return value == null || value.equalsIgnoreCase("NULL");
}

public boolean isValid() {
if (ndv > 10 * count) {
LOG.debug("Ndv {} is much larger than count {}", ndv, count);
return false;
}
if (ndv == 0 && (!isNull(minLit) || !isNull(maxLit))) {
LOG.debug("Ndv is 0 but min or max exists");
return false;
}
if (count > 0 && ndv == 0 && isNull(minLit) && isNull(maxLit) && (nullCount == 0 || count > nullCount * 10)) {
LOG.debug("count {} not 0, ndv is 0, min and max are all null, null count {} is too small", count, count);
return false;
}
return true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,16 @@
import org.apache.doris.analysis.TableSample;
import org.apache.doris.catalog.Column;
import org.apache.doris.catalog.PrimitiveType;
import org.apache.doris.qe.StmtExecutor;

import com.google.common.collect.Lists;
import mockit.Mock;
import mockit.MockUp;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;

import java.util.List;

public class BaseAnalysisTaskTest {

@Test
Expand Down Expand Up @@ -60,4 +66,43 @@ public void testGetFunctions() {
System.out.println(ndvFunction);
}

@Test
public void testInvalidColStats() {
List<String> values = Lists.newArrayList();
values.add("id");
values.add("10000");
values.add("20000");
values.add("30000");
values.add("0");
values.add("col");
values.add(null);
values.add("100"); // count
values.add("1100"); // ndv
values.add("300"); // null
values.add("min");
values.add("max");
values.add("400");
values.add("500");
ResultRow row = new ResultRow(values);
List<ResultRow> result = Lists.newArrayList();
result.add(row);

new MockUp<StmtExecutor>() {
@Mock
public List<ResultRow> executeInternalQuery() {
return result;
}
};
BaseAnalysisTask task = new OlapAnalysisTask();
try {
task.runQuery("test");
} catch (Exception e) {
Assertions.assertEquals(e.getMessage(),
"ColStatsData is invalid, skip analyzing. "
+ "('id',10000,20000,30000,0,'col',null,100,1100,300,'min','max',400,'500')");
return;
}
Assertions.fail();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,91 @@ public void testToColumnStatisticNormal(@Mocked StatisticsUtil mockedClass) {
Assertions.assertEquals(400, columnStatistic.dataSize);
Assertions.assertEquals("500", columnStatistic.updatedTime);
}

@Test
public void testIsNull() {
ColStatsData stats = new ColStatsData();
Assertions.assertTrue(stats.isNull(null));
Assertions.assertTrue(stats.isNull("null"));
Assertions.assertTrue(stats.isNull("NuLl"));
Assertions.assertFalse(stats.isNull(""));
Assertions.assertFalse(stats.isNull(" "));
Assertions.assertFalse(stats.isNull("123"));
}

@Test
public void testIsValid() {
List<String> values = Lists.newArrayList();
values.add("id");
values.add("10000");
values.add("20000");
values.add("30000");
values.add("0");
values.add("col");
values.add(null);
values.add("100"); // count
values.add("1100"); // ndv
values.add("300"); // null
values.add("min");
values.add("max");
values.add("400");
values.add("500");
ResultRow row = new ResultRow(values);
ColStatsData data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// Set count = 200
values.set(7, "200");
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertTrue(data.isValid());

// Set ndv = 0, min/max is not null
values.set(8, "0");
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// Set min to null, min/max is not null
values.set(10, null);
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// Set max to null, min/max is not null
values.set(11, null);
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertTrue(data.isValid());

// Set min to not null, min/max is not null
values.set(10, "min");
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// Set min and max to null, nullNum = 0
values.set(9, "0");
values.set(10, "nuLl");
values.set(11, null);
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// nullNum = 19, count = 200
values.set(9, "19");
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertFalse(data.isValid());

// nullNum = 21, count = 200, so count < nullNum * 10
values.set(9, "21");
row = new ResultRow(values);
data = new ColStatsData(row);
Assertions.assertTrue(data.isValid());

// Empty table stats is valid.
data = new ColStatsData();
Assertions.assertTrue(data.isValid());
}
}

0 comments on commit 4fceaa0

Please sign in to comment.