diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7a88b369b843..33f1539ee658 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -902,14 +902,15 @@ jobs: cat < .github/test-pt-matrix.yaml config: - default - - hdp3 - # TODO: config-apache-hive3 suite: - suite-1 - suite-2 - suite-3 # suite-4 does not exist - suite-5 + - suite-6-non-generic + - suite-7-non-generic + - suite-8-non-generic - suite-azure - suite-delta-lake-databricks91 - suite-delta-lake-databricks104 @@ -920,18 +921,22 @@ jobs: - suite-clients - suite-functions - suite-tpch + - suite-tpcds - suite-storage-formats-detailed + - suite-parquet + - suite-oauth2 + - suite-ldap + - suite-compatibility + - suite-all-connectors-smoke + - suite-delta-lake-oss + - suite-kafka + - suite-cassandra + - suite-clickhouse + - suite-mysql + - suite-iceberg + - suite-hudi + - suite-ignite exclude: - - config: default - ignore exclusion if: >- - ${{ github.event_name != 'pull_request' - || github.event.pull_request.head.repo.full_name == github.repository - || contains(github.event.pull_request.labels.*.name, 'tests:all') - || contains(github.event.pull_request.labels.*.name, 'tests:hive') - }} - - - suite: suite-azure - config: default - suite: suite-azure ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || @@ -939,34 +944,22 @@ jobs: secrets.AZURE_ABFS_ACCOUNT != '' || secrets.AZURE_ABFS_ACCESSKEY != '' }} - - suite: suite-gcs - config: default - suite: suite-gcs ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.GCP_CREDENTIALS_KEY != '' }} - - suite: suite-delta-lake-databricks91 - config: hdp3 - suite: suite-delta-lake-databricks91 ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.DATABRICKS_TOKEN != '' }} - - suite: suite-delta-lake-databricks104 - config: hdp3 - suite: suite-delta-lake-databricks104 ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.DATABRICKS_TOKEN != '' }} - - suite: suite-delta-lake-databricks113 - config: hdp3 - suite: suite-delta-lake-databricks113 ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.DATABRICKS_TOKEN != '' }} - - suite: suite-delta-lake-databricks122 - config: hdp3 - suite: suite-delta-lake-databricks122 ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.DATABRICKS_TOKEN != '' }} - - suite: suite-delta-lake-databricks133 - config: hdp3 - suite: suite-delta-lake-databricks133 ignore exclusion if: >- ${{ env.CI_SKIP_SECRETS_PRESENCE_CHECKS != '' || secrets.DATABRICKS_TOKEN != '' }} @@ -983,60 +976,9 @@ jobs: # value of the property, and the exclusion will apply normally. - "false" include: - # this suite is not meant to be run with different configs - - config: default - suite: suite-6-non-generic - # this suite is not meant to be run with different configs - - config: default - suite: suite-7-non-generic - # this suite is not meant to be run with different configs - - config: default - suite: suite-8-non-generic - # this suite is not meant to be run with different configs - - config: default - suite: suite-tpcds - # this suite is not meant to be run with different configs - - config: default - suite: suite-parquet - # this suite is not meant to be run with different configs - - config: default - suite: suite-oauth2 - # this suite is not meant to be run with different configs - - config: default - suite: suite-ldap - # this suite is not meant to be run with different configs - - config: default - suite: suite-compatibility # this suite is designed specifically for apache-hive3. TODO remove the suite once we can run all regular tests on apache-hive3. - config: apache-hive3 suite: suite-hms-only - # this suite is not meant to be run with different configs - - config: default - suite: suite-all-connectors-smoke - # this suite is not meant to be run with different configs - - config: default - suite: suite-delta-lake-oss - # this suite is not meant to be run with different configs - - config: default - suite: suite-kafka - # this suite is not meant to be run with different configs - - config: default - suite: suite-cassandra - # this suite is not meant to be run with different configs - - config: default - suite: suite-clickhouse - # this suite is not meant to be run with different configs - - config: default - suite: suite-mysql - # this suite is not meant to be run with different configs - - config: default - suite: suite-iceberg - # this suite is not meant to be run with different configs - - config: default - suite: suite-hudi - # this suite is not meant to be run with different configs - - config: default - suite: suite-ignite EOF - name: Build PT matrix (all) if: | diff --git a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/Hadoop.java b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/Hadoop.java index c31c6fea2dd4..8b7b3ec66d0c 100644 --- a/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/Hadoop.java +++ b/lib/trino-hdfs/src/test/java/io/trino/filesystem/hdfs/Hadoop.java @@ -32,7 +32,7 @@ public class Hadoop { private static final Logger log = Logger.get(Hadoop.class); - private static final String IMAGE = "ghcr.io/trinodb/testing/hdp2.6-hive:" + getDockerImagesVersion(); + private static final String IMAGE = "ghcr.io/trinodb/testing/hdp3.1-hive:" + getDockerImagesVersion(); private static final int HDFS_PORT = 9000; diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive2OnDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive2OnDataLake.java deleted file mode 100644 index b55338274da4..000000000000 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHive2OnDataLake.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package io.trino.plugin.hive; - -import io.trino.plugin.hive.containers.HiveHadoop; - -public class TestHive2OnDataLake - extends BaseTestHiveOnDataLake -{ - public TestHive2OnDataLake() - { - super(HiveHadoop.DEFAULT_IMAGE); - } -} diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java index ff1d8992eff3..ea0315b280ca 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/TestHiveAnalyzeCorruptStatistics.java @@ -57,7 +57,7 @@ public void testAnalyzeCorruptColumnStatisticsOnEmptyTable() // ANALYZE and drop_stats are unsupported for tables having broken column statistics assertThatThrownBy(() -> query("ANALYZE " + tableName)) - .hasMessage("%s: Socket is closed by peer.", hiveMinioDataLake.getHiveHadoop().getHiveMetastoreEndpoint()) + .hasMessage("Unexpected 2 statistics for 1 columns") .hasStackTraceContaining("ThriftHiveMetastore.setTableColumnStatistics"); assertThatThrownBy(() -> query("CALL system.drop_stats('tpch', '" + tableName + "')")) @@ -72,9 +72,33 @@ private void prepareBrokenColumnStatisticsTable(String tableName) // Insert duplicated row to simulate broken column statistics status https://github.com/trinodb/trino/issues/13787 assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "1"); - onMetastore("INSERT INTO TAB_COL_STATS " + - "SELECT cs_id + 1, db_name, table_name, column_name, column_type, tbl_id, long_low_value, long_high_value, double_high_value, double_low_value, big_decimal_low_value, big_decimal_high_value, num_nulls, num_distincts, avg_col_len, max_col_len, num_trues, num_falses, last_analyzed " + - "FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"); + onMetastore(""" + INSERT INTO TAB_COL_STATS + SELECT + cs_id + 1, + cat_name, + db_name, + table_name, + column_name, + column_type, + tbl_id, + long_low_value, + long_high_value, + double_high_value, + double_low_value, + big_decimal_low_value, + big_decimal_high_value, + num_nulls, + num_distincts, + bit_vector, + avg_col_len, + max_col_len, + num_trues, + num_falses, + last_analyzed + FROM TAB_COL_STATS + WHERE db_name = 'tpch' AND table_name = '%s' + """.formatted(tableName)); assertEquals(onMetastore("SELECT COUNT(1) FROM TAB_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "2"); } @@ -103,9 +127,34 @@ private void prepareBrokenPartitionStatisticsTable(String tableName) // Insert duplicated row to simulate broken partition statistics status https://github.com/trinodb/trino/issues/13787 assertEquals(onMetastore("SELECT COUNT(1) FROM PART_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "1"); - onMetastore("INSERT INTO PART_COL_STATS " + - "SELECT cs_id + 1, db_name, table_name, partition_name, column_name, column_type, part_id, long_low_value, long_high_value, double_high_value, double_low_value, big_decimal_low_value, big_decimal_high_value, num_nulls, num_distincts, avg_col_len, max_col_len, num_trues, num_falses, last_analyzed " + - "FROM PART_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"); + onMetastore(""" + INSERT INTO PART_COL_STATS + SELECT + cs_id + 1, + cat_name, + db_name, + table_name, + partition_name, + column_name, + column_type, + part_id, + long_low_value, + long_high_value, + double_high_value, + double_low_value, + big_decimal_low_value, + big_decimal_high_value, + num_nulls, + num_distincts, + bit_vector, + avg_col_len, + max_col_len, + num_trues, + num_falses, + last_analyzed + FROM PART_COL_STATS + WHERE db_name = 'tpch' AND table_name = '%s' + """.formatted(tableName)); assertEquals(onMetastore("SELECT COUNT(1) FROM PART_COL_STATS WHERE db_name = 'tpch' AND table_name = '" + tableName + "'"), "2"); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java index 977c88f5e0b6..7959030b8661 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveHadoop.java @@ -33,7 +33,6 @@ public class HiveHadoop { private static final Logger log = Logger.get(HiveHadoop.class); - public static final String DEFAULT_IMAGE = "ghcr.io/trinodb/testing/hdp2.6-hive:" + TestingProperties.getDockerImagesVersion(); public static final String HIVE3_IMAGE = "ghcr.io/trinodb/testing/hdp3.1-hive:" + TestingProperties.getDockerImagesVersion(); public static final String HOST_NAME = "hadoop-master"; @@ -104,7 +103,7 @@ public static class Builder { private Builder() { - this.image = DEFAULT_IMAGE; + this.image = HIVE3_IMAGE; this.hostName = HOST_NAME; this.exposePorts = ImmutableSet.of(HIVE_METASTORE_PORT); } diff --git a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveMinioDataLake.java b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveMinioDataLake.java index 15e7f0178450..2e8bba014380 100644 --- a/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveMinioDataLake.java +++ b/plugin/trino-hive/src/test/java/io/trino/plugin/hive/containers/HiveMinioDataLake.java @@ -51,7 +51,7 @@ public class HiveMinioDataLake public HiveMinioDataLake(String bucketName) { - this(bucketName, HiveHadoop.DEFAULT_IMAGE); + this(bucketName, HiveHadoop.HIVE3_IMAGE); } public HiveMinioDataLake(String bucketName, String hiveHadoopImage) diff --git a/pom.xml b/pom.xml index 95b98e911248..271c98b3c66a 100644 --- a/pom.xml +++ b/pom.xml @@ -189,7 +189,7 @@ 1.37 5.10.0 - 81 + 86 + + hadoop.proxyuser.oozie.hosts + * + + + hadoop.proxyuser.oozie.groups + * + + + + + hadoop.proxyuser.httpfs.hosts + * + + + hadoop.proxyuser.httpfs.groups + * + + + + + hadoop.proxyuser.llama.hosts + * + + + hadoop.proxyuser.llama.groups + * + + + + + hadoop.proxyuser.hue.hosts + * + + + hadoop.proxyuser.hue.groups + * + + + + + hadoop.proxyuser.mapred.hosts + * + + + hadoop.proxyuser.mapred.groups + * + + + + + hadoop.proxyuser.hive.hosts + * + + + + hadoop.proxyuser.hive.groups + * + + + + + hadoop.proxyuser.hdfs.groups + * + + + + hadoop.proxyuser.hdfs.hosts + * + diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/hdfs-site.xml b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/hdfs-site.xml new file mode 100644 index 000000000000..c8f55aff9808 --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/hdfs-site.xml @@ -0,0 +1,23 @@ + + + + dfs.namenode.name.dir + /var/lib/hadoop-hdfs/cache/name/ + + + + dfs.datanode.data.dir + /var/lib/hadoop-hdfs/cache/data/ + + + + fs.viewfs.mounttable.hadoop-viewfs.link./default + hdfs://hadoop-master-2:9000/user/hive/warehouse + + + + + dfs.safemode.threshold.pct + 0 + + diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/update-location.sh b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/update-location.sh new file mode 100644 index 000000000000..d0802cb5c08e --- /dev/null +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hadoop-master-2/update-location.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +/usr/bin/mysqld_safe & +while ! mysqladmin ping -proot --silent; do sleep 1; done + +hive --service metatool -updateLocation hdfs://hadoop-master-2:9000/user/hive/warehouse hdfs://hadoop-master:9000/user/hive/warehouse + +killall mysqld +while pgrep mysqld; do sleep 1; done diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive1.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive1.properties index 28b4d97243bb..a4f1d8374acc 100644 --- a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive1.properties +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive1.properties @@ -2,6 +2,8 @@ connector.name=hive hive.metastore.uri=thrift://hadoop-master:9083 hive.config.resources=/docker/presto-product-tests/conf/presto/etc/hive-default-fs-site.xml hive.metastore-cache-ttl=0s +hive.parquet.time-zone=UTC +hive.rcfile.time-zone=UTC hive.metastore.authentication.type=KERBEROS hive.metastore.thrift.impersonation.enabled=true diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive2.properties b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive2.properties index 172f0cad19d0..b01bac39ace1 100644 --- a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive2.properties +++ b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/environment/two-mixed-hives/hive2.properties @@ -11,3 +11,5 @@ hive.metastore-cache-ttl=0s hive.fs.cache.max-size=10 hive.max-partitions-per-scan=100 hive.max-partitions-for-eager-load=100 +hive.parquet.time-zone=UTC +hive.rcfile.time-zone=UTC diff --git a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/tempto/tempto-configuration-for-hive3.yaml b/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/tempto/tempto-configuration-for-hive3.yaml deleted file mode 100644 index 9142bbddb72f..000000000000 --- a/testing/trino-product-tests-launcher/src/main/resources/docker/presto-product-tests/conf/tempto/tempto-configuration-for-hive3.yaml +++ /dev/null @@ -1,11 +0,0 @@ -hdfs: - webhdfs: - # 9870 is the name node's default port in Hadoop 3 - uri: http://${databases.hive.host}:9870 - -databases: - hive: - prepare_statement: - - USE ${databases.hive.schema} - # Hive 3 gathers stats by default. For test purposes we need to disable this behavior. - - SET hive.stats.column.autogather=false diff --git a/testing/trino-product-tests/README.md b/testing/trino-product-tests/README.md index f8c30a763ffd..03a219b44fe2 100644 --- a/testing/trino-product-tests/README.md +++ b/testing/trino-product-tests/README.md @@ -104,11 +104,7 @@ testing/bin/ptl env list #### Environment config -Most of the Hadoop-based environments can be run in multiple configurations that use different Hadoop distribution: - -- **config-default** - executes tests against vanilla Hadoop distribution -- **config-hdp3** - executes tests against HDP3 distribution of Hadoop - +Most of the Hadoop-based environments can be run in multiple configurations. You can obtain list of available environment configurations using command: ``` diff --git a/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml b/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml index 839c23c7b112..45989ef16dfa 100644 --- a/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml +++ b/testing/trino-product-tests/src/main/resources/tempto-configuration.yaml @@ -1,7 +1,7 @@ hdfs: username: hive webhdfs: - uri: http://${databases.hive.host}:50070 + uri: http://${databases.hive.host}:9870 databases: hive: @@ -14,6 +14,8 @@ databases: schema: default prepare_statement: - USE ${databases.hive.schema} + # Hive 3 gathers stats by default. For test purposes we need to disable this behavior. + - SET hive.stats.column.autogather=false table_manager_type: hive warehouse_directory_path: /user/hive/warehouse inject_stats_for_immutable_tables: true