From a7e40dd4d8367e1fbcdd0716fb738bff8e161ddd Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Fri, 23 Aug 2024 11:35:47 +0800 Subject: [PATCH] [fix](ctas) fix NPE when ctas with old planner and varchar issue (#39744) Fix 2 bugs: 1. introduced from #35489 When the target table is unpartitioned with text type column from external table, NPE will be thrown. Only happen when using old planner. The new planner works well. 2. If first column is varchar type, the new planner will change it to string type and then failed because first column can not be string type. --- .../doris/datasource/InternalCatalog.java | 5 ++-- .../plans/commands/CreateTableCommand.java | 17 ++++++++++--- .../jdbc/test_mysql_jdbc_catalog.out | 18 +++++++++++++ .../jdbc/test_mysql_jdbc_catalog.groovy | 25 +++++++++++++++++++ 4 files changed, 60 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java index b76f23b7283927..296c787a54b099 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/InternalCatalog.java @@ -1307,8 +1307,9 @@ public void createTableAsSelect(CreateTableAsSelectStmt stmt) throws DdlExceptio if (resultExpr.getSrcSlotRef() != null && resultExpr.getSrcSlotRef().getTable() != null && !resultExpr.getSrcSlotRef().getTable().isManagedTable()) { - if (createTableStmt.getPartitionDesc().inIdentifierPartitions( - resultExpr.getSrcSlotRef().getColumnName()) + if ((createTableStmt.getPartitionDesc() != null + && createTableStmt.getPartitionDesc().inIdentifierPartitions( + resultExpr.getSrcSlotRef().getColumnName())) || (createTableStmt.getDistributionDesc() != null && createTableStmt.getDistributionDesc().inDistributionColumns( resultExpr.getSrcSlotRef().getColumnName()))) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java index 33d25723893e00..8541dc29d71a46 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java @@ -123,6 +123,8 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { Slot s = slots.get(i); DataType dataType = s.getDataType().conversion(); if (i == 0 && dataType.isStringType()) { + // first column of olap table can not be string type. + // So change it to varchar type. dataType = VarcharType.createVarcharType(ScalarType.MAX_VARCHAR_LENGTH); } else { dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, @@ -135,13 +137,21 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { if (createTableInfo.getPartitionTableInfo().inIdentifierPartitions(s.getName()) || (createTableInfo.getDistribution() != null && createTableInfo.getDistribution().inDistributionColumns(s.getName()))) { - // String type can not be used in partition/distributed column + // String type can not be used in partition/distributed column, // so we replace it to varchar dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, CharacterType.class, VarcharType.MAX_VARCHAR_TYPE); } else { - dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, - CharacterType.class, StringType.INSTANCE); + if (i == 0) { + // first column of olap table can not be string type. + // So change it to varchar type. + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + CharacterType.class, VarcharType.MAX_VARCHAR_TYPE); + } else { + // change varchar/char column from external table to string type + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + CharacterType.class, StringType.INSTANCE); + } } } } else { @@ -212,3 +222,4 @@ public CreateTableInfo getCreateTableInfo() { return createTableInfo; } } + diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out index 440b95d3b5915a..d697c8e5e600ec 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out @@ -433,3 +433,21 @@ doris -- !sql -- +-- !sql -- +int_u bigint Yes true \N +text varchar(65533) Yes true \N +t2 text Yes false \N NONE + +-- !sql -- +varchar varchar(65533) Yes true \N +int_u bigint Yes false \N NONE + +-- !sql -- +int_u bigint Yes true \N +text varchar(65533) Yes true \N +t2 varchar(65533) Yes false \N NONE + +-- !sql -- +varchar varchar(65533) Yes true \N +int_u bigint Yes false \N NONE + diff --git a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy index 81d31fd88c4b3e..a510a74b9f00bc 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy @@ -23,6 +23,7 @@ suite("test_mysql_jdbc_catalog", "p0,external,mysql,external_docker,external_doc String s3_endpoint = getS3Endpoint() String bucket = getS3BucketName() String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-java-8.0.25.jar" + // String driver_url = "mysql-connector-java-8.0.25.jar" if (enabled != null && enabled.equalsIgnoreCase("true")) { String user = "test_jdbc_user"; String pwd = '123456'; @@ -614,6 +615,30 @@ suite("test_mysql_jdbc_catalog", "p0,external,mysql,external_docker,external_doc order_qt_sql """select * from mysql_conjuncts.doris_test.text_push where pk <=7;""" + // test create table as select + sql """use internal.${internal_db_name}""" + sql """drop table if exists ctas_partition_text_1""" + sql """drop table if exists ctas_partition_text_2""" + sql """drop table if exists ctas_partition_text_3""" + sql """drop table if exists ctas_partition_text_4""" + sql """set enable_nereids_planner=true""" + // 1. test text type column as distribution col + sql """create table ctas_partition_text_1 distributed by hash(text) buckets 1 properties("replication_num" = "1") as select int_u, text, text as t2 from mysql_conjuncts.doris_test.all_types;""" + qt_sql """desc ctas_partition_text_1""" + // 2. test varchar type column as first col + sql """create table ctas_partition_text_2 distributed by hash(int_u) buckets 1 properties("replication_num" = "1") as select varchar, int_u from mysql_conjuncts.doris_test.all_types;""" + qt_sql """desc ctas_partition_text_2""" + // ctas logic is different between new and old planner. + // so need to test both. + // the old planner's test can be removed once the old planner is removed. + sql """set enable_nereids_planner=false""" + // 1. test text type column as distribution col + sql """create table ctas_partition_text_3 distributed by hash(text) buckets 1 properties("replication_num" = "1") as select int_u, text, text as t2 from mysql_conjuncts.doris_test.all_types;""" + qt_sql """desc ctas_partition_text_3""" + // 2. test varchar type column as first col + sql """create table ctas_partition_text_4 distributed by hash(int_u) buckets 1 properties("replication_num" = "1") as select varchar, int_u from mysql_conjuncts.doris_test.all_types;""" + qt_sql """desc ctas_partition_text_4""" + sql """drop catalog if exists mysql_conjuncts;""" } }