Skip to content

Commit

Permalink
YDB FQ: avoid using clickhouse-connect library in integration tests (
Browse files Browse the repository at this point in the history
  • Loading branch information
vitalyisaev2 authored Oct 23, 2024
1 parent acb4df3 commit 828b029
Show file tree
Hide file tree
Showing 42 changed files with 813 additions and 766 deletions.
2 changes: 0 additions & 2 deletions .github/config/muted_ya.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,6 @@ ydb/library/actors/http/ut HttpProxy.TooLongHeader
ydb/library/actors/http/ut sole chunk chunk
ydb/library/actors/http/ut sole+chunk+chunk
ydb/library/actors/interconnect/ut_huge_cluster HugeCluster.AllToAll
ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse test.py.test_select_datetime[datetime_string_NATIVE-dqrun]
ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse test.py.test_select_datetime[datetime_string_NATIVE-kqprun]
ydb/library/yql/providers/generic/connector/tests/join test.py.test_join[join_ch_ch-dqrun]
ydb/library/yql/tests/sql/hybrid_file/part1 test.py.test[in-in_noansi_join--Debug]
ydb/public/sdk/cpp/client/ydb_persqueue_public/ut/with_offset_ranges_mode_ut [*/*] chunk chunk
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def table_name(self) -> str:
'''
match self.data_source_kind:
case EDataSourceKind.CLICKHOUSE:
return 't' + make_random_string(8)
return self.name_ # without protocol
case EDataSourceKind.MS_SQL_SERVER:
return self.name
case EDataSourceKind.MYSQL:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
DataSourceType,
SelectWhat,
SelectWhere,
makeYdbTypeFromTypeID,
)

from ydb.library.yql.providers.generic.connector.tests.common_test_cases.base import BaseTestCase
Expand Down Expand Up @@ -246,46 +247,65 @@ def _large_table(self) -> Sequence[TestCase]:
schema = Schema(
columns=ColumnList(
Column(
name='col_01_int64',
ydb_type=Type.INT64,
data_source_type=DataSourceType(ch=clickhouse.Int32(), pg=postgresql.Int8()),
name='col_00_int32',
ydb_type=makeYdbTypeFromTypeID(Type.INT32),
data_source_type=DataSourceType(ch=clickhouse.Int64(), pg=postgresql.Int8()),
),
Column(
name='col_02_utf8',
ydb_type=Type.UTF8,
name='col_01_string',
ydb_type=makeYdbTypeFromTypeID(Type.STRING),
data_source_type=DataSourceType(ch=clickhouse.String(), pg=postgresql.Text()),
),
)
)

data_in = generate_table_data(schema=schema, bytes_soft_limit=table_size)

# Assuming that request will look something like:
#
# SELECT * FROM table WHERE id = (SELECT MAX(id) FROM table)
#
# We expect last line to be the answer
data_out = [data_in[-1]]

data_source_kinds = [EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL]

test_case_name = 'large_table'
data_source_kinds = (
EDataSourceKind.CLICKHOUSE,
EDataSourceKind.POSTGRESQL,
)

test_case_name = 'large'
test_cases = []

for data_source_kind in data_source_kinds:
tc = TestCase(
name_=test_case_name,
data_source_kind=data_source_kind,
protocol=EProtocol.NATIVE,
data_in=data_in,
data_out_=data_out,
select_what=SelectWhat.asterisk(schema.columns),
select_where=SelectWhere(
expression_='col_01_int64 IN (SELECT MAX(col_01_int64) FROM {cluster_name}.{table_name})'
),
schema=schema,
pragmas=dict(),
)
match data_source_kind:
case EDataSourceKind.CLICKHOUSE:
tc = TestCase(
name_=test_case_name,
data_source_kind=data_source_kind,
protocol=EProtocol.NATIVE,
data_in=None,
data_out_=[[999999]], # We put 1M of rows in the large table
select_what=SelectWhat(SelectWhat.Item(name='MAX(col_00_int32)', kind='expr')),
select_where=None,
schema=schema,
pragmas=dict(),
)

case EDataSourceKind.POSTGRESQL:
# Assuming that request will look something like:
# `SELECT * FROM table WHERE id = (SELECT MAX(id) FROM table)`
# We expect last line to be the answer
data_in = generate_table_data(schema=schema, bytes_soft_limit=table_size)
data_out = [data_in[-1]]
data_source_kinds = [EDataSourceKind.CLICKHOUSE, EDataSourceKind.POSTGRESQL]

tc = TestCase(
name_=test_case_name,
data_source_kind=data_source_kind,
protocol=EProtocol.NATIVE,
data_in=data_in,
data_out_=data_out,
select_what=SelectWhat.asterisk(schema.columns),
select_where=SelectWhere(
expression_='col_00_int32 IN (SELECT MAX(col_00_int32) FROM {cluster_name}.{table_name})'
),
schema=schema,
pragmas=dict(),
)

case _:
raise ValueError(f'Unknown data source kind: {data_source_kind}')

test_cases.append(tc)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,10 @@

from ydb.library.yql.providers.generic.connector.api.common.data_source_pb2 import EDataSourceKind
from ydb.library.yql.providers.generic.connector.tests.utils.settings import Settings
from ydb.library.yql.providers.generic.connector.tests.utils.clients.clickhouse import Client, make_client

docker_compose_dir: Final = pathlib.Path("ydb/library/yql/providers/generic/connector/tests/datasource/clickhouse")


@pytest.fixture
def settings() -> Settings:
return Settings.from_env(docker_compose_dir=docker_compose_dir, data_source_kinds=[EDataSourceKind.CLICKHOUSE])


@pytest.fixture
def clickhouse_client(settings) -> Client:
cl = make_client(settings.clickhouse)
yield cl
cl.close()
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,18 @@ services:
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
CLICKHOUSE_PASSWORD: password
CLICKHOUSE_USER: user
image: mirror.gcr.io/clickhouse/clickhouse-server:23-alpine@sha256:d75017307e76d1bca81a5ac7ada94620567782c0610541f525d1e443e23f76e3
CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS: 1
image: mirror.gcr.io/clickhouse/clickhouse-server:24.3.12-alpine@sha256:65e5846a0d9672714f2625502b27846563f6d01ec226304cf851aa49004ffde8
volumes:
- ./init:/docker-entrypoint-initdb.d
ports:
- 9000
- 8123
tmpfs:
- /run
- /tmp
- /var
- 9000
- 8123
fq-connector-go:
container_name: fq-tests-ch-fq-connector-go
image: ghcr.io/ydb-platform/fq-connector-go:v0.5.11-rc.5@sha256:c17f67aea314366690545aea1db9f2bf4391ae1269044ebbac7ea2316972e7ff
ports:
- 2130
- 2130
volumes:
- ../../fq-connector-go/:/opt/ydb/cfg/
- ../../fq-connector-go/:/opt/ydb/cfg/
version: "3.4"
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
#!/bin/bash
set -ex

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.primitive_types_non_nullable;
CREATE TABLE db.primitive_types_non_nullable (
col_00_id Int32,
col_01_boolean Boolean,
col_02_int8 Int8,
col_03_uint8 UInt8,
col_04_int16 Int16,
col_05_uint16 UInt16,
col_06_int32 Int32,
col_07_uint32 UInt32,
col_08_int64 Int64,
col_09_uint64 UInt64,
col_10_float32 Float32,
col_11_float64 Float64,
col_12_string String,
col_13_fixed_string FixedString(13),
col_14_date Date,
col_15_date32 Date32,
col_16_datetime DateTime,
col_17_datetime64 DateTime64(3)
) ENGINE = MergeTree ORDER BY col_00_id;
INSERT INTO db.primitive_types_non_nullable (*) VALUES
(1, False, 2, 3, 4, 5, 6, 7, 8, 9, 10.10, 11.11, 'az', 'az', '1988-11-20', '1988-11-20', '1988-11-20 12:55:28', '1988-11-20 12:55:28.123') \
(2, True, -2, 3, -4, 5, -6, 7, -8, 9, -10.10, -11.11, 'буки', 'буки', '2023-03-21', '2023-03-21', '2023-03-21 11:21:31', '2023-03-21 11:21:31.456');
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.primitive_types_nullable;
CREATE TABLE db.primitive_types_nullable (
col_00_id Int32,
col_01_boolean Nullable(Boolean),
col_02_int8 Nullable(Int8),
col_03_uint8 Nullable(UInt8),
col_04_int16 Nullable(Int16),
col_05_uint16 Nullable(UInt16),
col_06_int32 Nullable(Int32),
col_07_uint32 Nullable(UInt32),
col_08_int64 Nullable(Int64),
col_09_uint64 Nullable(UInt64),
col_10_float32 Nullable(Float32),
col_11_float64 Nullable(Float64),
col_12_string Nullable(String),
col_13_fixed_string Nullable(FixedString(13)),
col_14_date Nullable(Date),
col_15_date32 Nullable(Date32),
col_16_datetime Nullable(DateTime('UTC')),
col_17_datetime64 Nullable(DateTime64(6, 'UTC'))
) ENGINE = MergeTree ORDER BY col_00_id;
INSERT INTO db.primitive_types_nullable (*) VALUES
(1, False, 2, 3, 4, 5, 6, 7, 8, 9, 10.10, 11.11, 'az', 'az', '1988-11-20', '1988-11-20', '1988-11-20 12:55:28', '1988-11-20 12:55:28.123') \
(2, NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL) \
(3, True, -2, 3, -4, 5, -6, 7, -8, 9, -10.10, -11.11, 'буки', 'буки', '2023-03-21', '2023-03-21', '2023-03-21 11:21:31', '2023-03-21 11:21:31.456');
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.datetime_string;
CREATE TABLE db.datetime_string (
col_00_id Int32,
col_01_date Date,
col_02_date32 Date32,
col_03_datetime DateTime,
col_04_datetime64 DateTime64(8)
) ENGINE = MergeTree ORDER BY col_00_id;
/*
Value is too early for both CH and YQL
In this case ClickHouse behaviour is undefined
For Datetime Clickhouse returns bottom bound and
cuts off only date part of value along ClickHouse bottom bound for other types
*/
INSERT INTO db.datetime_string (*) VALUES
(1, '1950-01-10', '1850-01-10', '1950-01-10 12:23:45', '1950-01-10 12:23:45.678910');
/* Value is OK for CH, but can be too early for YQL */
INSERT INTO db.datetime_string (*) VALUES
(2, '1970-01-10', '1950-01-10', '1980-01-10 12:23:45', '1950-01-10 12:23:45.678910');
/* Value is OK for both CH and YQL */
INSERT INTO db.datetime_string (*) VALUES
(3, '2004-01-10', '2004-01-10', '2004-01-10 12:23:45', '2004-01-10 12:23:45.678910');
/* Value is OK for CH, but too late for YQL */
INSERT INTO db.datetime_string (*) VALUES
(4, '2110-01-10', '2110-01-10', '2106-01-10 12:23:45', '2110-01-10 12:23:45.678910');
/*
Value is too late for both YQL and CH.
In this case ClickHouse behaviour is undefined.
*/
INSERT INTO db.datetime_string (*) VALUES
(5, '2150-01-10', '2300-01-10', '2107-01-10 12:23:45', '2300-01-10 12:23:45.678910');
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.datetime_YQL;
CREATE TABLE db.datetime_YQL (
col_00_id Int32,
col_01_date Date,
col_02_date32 Date32,
col_03_datetime DateTime,
col_04_datetime64 DateTime64(8)
) ENGINE = MergeTree ORDER BY col_00_id;
INSERT INTO db.datetime_YQL (*) VALUES
(1, '1950-01-10', '1850-01-10', '1950-01-10 12:23:45', '1950-01-10 12:23:45.678910') \
(2, '1970-01-10', '1950-01-10', '1980-01-10 12:23:45', '1950-01-10 12:23:45.678910') \
(3, '2004-01-10', '2004-01-10', '2004-01-10 12:23:45', '2004-01-10 12:23:45.678910') \
(4, '2110-01-10', '2110-01-10', '2106-01-10 12:23:45', '2110-01-10 12:23:45.678910') \
(5, '2150-01-10', '2300-01-10', '2107-01-10 12:23:45', '2300-01-10 12:23:45.678910');
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.constant;
CREATE TABLE db.constant (
id Int32,
) ENGINE = MergeTree ORDER BY id;
INSERT INTO db.constant (*) VALUES
(1) \
(2) \
(3);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.counts;
CREATE TABLE db.counts (
col Float64,
) ENGINE = MergeTree ORDER BY col;
INSERT INTO db.counts (*) VALUES
(3.14) \
(1.0) \
(2.718) \
(-0.0);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.pushdown;
CREATE TABLE db.pushdown (
col_00_int32 Int32,
col_01_string Nullable(String)
) ENGINE = MergeTree ORDER BY col_00_int32;
INSERT INTO db.pushdown (*) VALUES
(1, 'one') \
(2, 'two') \
(3, 'three') \
(4, NULL);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.large;
CREATE TABLE db.large (
col_00_int32 Int32,
col_01_string Nullable(String)
) ENGINE = MergeTree ORDER BY col_00_int32;
INSERT INTO db.large
SELECT
number AS col_00_int32,
substring(randomPrintableASCII(32), 1, 32) AS col_01_string
FROM
numbers(1000000);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_A_b_C_d_E;
CREATE TABLE db.column_selection_A_b_C_d_E (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_A_b_C_d_E (*) VALUES
(1, 2) \
(10, 20);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_COL1;
CREATE TABLE db.column_selection_COL1 (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_COL1 (*) VALUES
(1, 2) \
(10, 20);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_asterisk;
CREATE TABLE db.column_selection_asterisk (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_asterisk (*) VALUES
(1, 2) \
(10, 20);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_col2_COL1;
CREATE TABLE db.column_selection_col2_COL1 (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_col2_COL1 (*) VALUES
(1, 2) \
(10, 20);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_col2;
CREATE TABLE db.column_selection_col2 (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_col2 (*) VALUES
(1, 2) \
(10, 20);
EOSQL

clickhouse-client -n <<-EOSQL
DROP TABLE IF EXISTS db.column_selection_col3;
CREATE TABLE db.column_selection_col3 (COL1 Int32, col2 Int32)
ENGINE = MergeTree ORDER BY COL1;
INSERT INTO db.column_selection_col3 (*) VALUES
(1, 2) \
(10, 20);
EOSQL
Loading

0 comments on commit 828b029

Please sign in to comment.