Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Database name overrides #624

Merged
merged 5 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions sink-connector-lightweight/docker/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ auto.create.tables: "true"
# database.connectionTimeZone: The timezone of the MySQL database server used to correctly shift the commit transaction timestamp.
database.connectionTimeZone: "UTC"

# Configuration to override the clickhouse database name for a given MySQL database name. If this configuration is not
# provided, the MySQL database name will be used as the ClickHouse database name.
#clickhouse.database.override.map: "employees:employees2, products:productsnew

# clickhouse.datetime.timezone: This timezone will override the default timezone of ClickHouse server. Timezone columns will be set to this timezone.
#clickhouse.datetime.timezone: "UTC"

Expand Down
2 changes: 1 addition & 1 deletion sink-connector-lightweight/docker/config_local.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ database.server.name: "ER54"
database.include.list: sbtest

# table.include.list An optional list of regular expressions that match fully-qualified table identifiers for tables to be monitored;
table.include.list: "sbtest.table1,sbtest.table2,sbtest.table3"
#table.include.list: "sbtest.table1,sbtest.table2,sbtest.table3"

# Clickhouse Server URL, Specify only the hostname.
clickhouse.server.url: "localhost"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,8 @@ private void setupProcessingThread(ClickHouseSinkConnectorConfig config) {
new ThreadFactoryBuilder().setNameFormat("Sink Connector thread-pool-%d").build();
this.executor = new ClickHouseBatchExecutor(config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()), namedThreadFactory);
for(int i = 0; i < config.getInt(ClickHouseSinkConnectorConfigVariables.THREAD_POOL_SIZE.toString()); i++) {
this.executor.scheduleAtFixedRate(new ClickHouseBatchRunnable(this.records, config, new HashMap()), 0, config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
this.executor.scheduleAtFixedRate(new ClickHouseBatchRunnable(this.records, config, new HashMap()), 0,
config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
}
//this.executor.scheduleAtFixedRate(this.runnable, 0, config.getLong(ClickHouseSinkConnectorConfigVariables.BUFFER_FLUSH_TIME.toString()), TimeUnit.MILLISECONDS);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
package com.altinity.clickhouse.debezium.embedded;

import com.altinity.clickhouse.debezium.embedded.cdc.DebeziumChangeEventCapture;
import com.altinity.clickhouse.debezium.embedded.cdc.DebeziumOffsetStorageIT;
import com.altinity.clickhouse.debezium.embedded.common.PropertiesHelper;
import com.altinity.clickhouse.debezium.embedded.ddl.parser.MySQLDDLParserService;
import com.altinity.clickhouse.debezium.embedded.parser.SourceRecordParserService;
import com.altinity.clickhouse.sink.connector.ClickHouseSinkConnectorConfig;
import com.altinity.clickhouse.sink.connector.db.BaseDbWriter;
import org.apache.log4j.BasicConfigurator;
import org.junit.Assert;
import org.junit.jupiter.api.AfterEach;
Expand All @@ -22,7 +20,6 @@
import org.testcontainers.utility.DockerImageName;

import java.sql.Connection;
import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package com.altinity.clickhouse.debezium.embedded.cdc;

import com.altinity.clickhouse.debezium.embedded.AppInjector;
import com.altinity.clickhouse.debezium.embedded.ClickHouseDebeziumEmbeddedApplication;
import com.altinity.clickhouse.debezium.embedded.ITCommon;
import com.altinity.clickhouse.debezium.embedded.api.DebeziumEmbeddedRestApi;
import com.altinity.clickhouse.debezium.embedded.ddl.parser.DDLParserService;
import com.altinity.clickhouse.debezium.embedded.parser.DebeziumRecordParserService;
import com.altinity.clickhouse.sink.connector.ClickHouseSinkConnectorConfig;
import com.altinity.clickhouse.sink.connector.db.BaseDbWriter;
import com.clickhouse.jdbc.ClickHouseConnection;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.log4j.BasicConfigurator;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testcontainers.clickhouse.ClickHouseContainer;
import org.testcontainers.containers.MySQLContainer;
import org.testcontainers.containers.wait.strategy.HttpWaitStrategy;
import org.testcontainers.junit.jupiter.Container;
import org.testcontainers.utility.DockerImageName;

import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import static com.altinity.clickhouse.debezium.embedded.ITCommon.getDebeziumProperties;
import static org.junit.Assert.assertTrue;

public class DatabaseOverrideIT {

private static final Logger log = LoggerFactory.getLogger(DatabaseOverrideIT.class);


protected MySQLContainer mySqlContainer;

@Container
public static ClickHouseContainer clickHouseContainer = new ClickHouseContainer(DockerImageName.parse("clickhouse/clickhouse-server:latest")
.asCompatibleSubstituteFor("clickhouse"))
.withInitScript("init_clickhouse_schema_only_column_timezone.sql")
// .withCopyFileToContainer(MountableFile.forClasspathResource("config.xml"), "/etc/clickhouse-server/config.d/config.xml")
.withUsername("ch_user")
.withPassword("password")
.withExposedPorts(8123);

@BeforeEach
public void startContainers() throws InterruptedException {
mySqlContainer = new MySQLContainer<>(DockerImageName.parse("docker.io/bitnami/mysql:latest")
.asCompatibleSubstituteFor("mysql"))
.withDatabaseName("employees").withUsername("root").withPassword("adminpass")
// .withInitScript("15k_tables_mysql.sql")
.withExtraHost("mysql-server", "0.0.0.0")
.waitingFor(new HttpWaitStrategy().forPort(3306));

BasicConfigurator.configure();
mySqlContainer.start();
clickHouseContainer.start();
Thread.sleep(35000);
}


@DisplayName("Test that validates overriding database name in ClickHouse")
@Test
public void testDatabaseOverride() throws Exception {

Injector injector = Guice.createInjector(new AppInjector());

Properties props = getDebeziumProperties(mySqlContainer, clickHouseContainer);
props.setProperty("snapshot.mode", "schema_only");
props.setProperty("schema.history.internal.store.only.captured.tables.ddl", "true");
props.setProperty("schema.history.internal.store.only.captured.databases.ddl", "true");
props.setProperty("clickhouse.database.override.map", "employees:employees2, products:productsnew");
props.setProperty("database.include.list", "employees, products, customers");

// Override clickhouse server timezone.
ClickHouseDebeziumEmbeddedApplication clickHouseDebeziumEmbeddedApplication = new ClickHouseDebeziumEmbeddedApplication();


ExecutorService executorService = Executors.newFixedThreadPool(1);
executorService.execute(() -> {
try {
clickHouseDebeziumEmbeddedApplication.start(injector.getInstance(DebeziumRecordParserService.class),
injector.getInstance(DDLParserService.class), props, false);
DebeziumEmbeddedRestApi.startRestApi(props, injector, clickHouseDebeziumEmbeddedApplication.getDebeziumEventCapture()
, new Properties());
} catch (Exception e) {
throw new RuntimeException(e);
}

});

Thread.sleep(25000);

// Employees table
Connection conn = ITCommon.connectToMySQL(mySqlContainer);
conn.prepareStatement("create table `newtable`(col1 varchar(255) not null, col2 int, col3 int, primary key(col1))").execute();

// Insert a new row in the table
conn.prepareStatement("insert into newtable values('a', 1, 1)").execute();


conn.prepareStatement("create database products").execute();
conn.prepareStatement("create table products.prodtable(col1 varchar(255) not null, col2 int, col3 int, primary key(col1))").execute();
conn.prepareStatement("insert into products.prodtable values('a', 1, 1)").execute();

conn.prepareStatement("create database customers").execute();
conn.prepareStatement("create table customers.custtable(col1 varchar(255) not null, col2 int, col3 int, primary key(col1))").execute();
conn.prepareStatement("insert into customers.custtable values('a', 1, 1)").execute();

Thread.sleep(10000);

// Validate in Clickhouse the last record written is 29999
String jdbcUrl = BaseDbWriter.getConnectionString(clickHouseContainer.getHost(), clickHouseContainer.getFirstMappedPort(),
"system");
ClickHouseConnection chConn = BaseDbWriter.createConnection(jdbcUrl, "Client_1",
clickHouseContainer.getUsername(), clickHouseContainer.getPassword(), new ClickHouseSinkConnectorConfig(new HashMap<>()));
BaseDbWriter writer = new BaseDbWriter(clickHouseContainer.getHost(), clickHouseContainer.getFirstMappedPort(),
"employees", clickHouseContainer.getUsername(), clickHouseContainer.getPassword(), null, chConn);

long col2 = 0L;
ResultSet version1Result = writer.executeQueryWithResultSet("select col2 from employees2.newtable final where col1 = 'a'");

Check failure on line 128 in sink-connector-lightweight/src/test/java/com/altinity/clickhouse/debezium/embedded/cdc/DatabaseOverrideIT.java

View workflow job for this annotation

GitHub Actions / JUnit Test Report

DatabaseOverrideIT.testDatabaseOverride

Code: 81. DB::Exception: Database employees2 does not exist. Maybe you meant employees?. (UNKNOWN_DATABASE) (version 24.5.1.1763 (official build)) , server ClickHouseNode [uri=http://localhost:32825/system, options={custom_settings=allow_experimental_object_type=1,insert_allow_materialized_columns=1,client_name=Client_1}]@-1544883182
Raw output
java.sql.BatchUpdateException: 
Code: 81. DB::Exception: Database employees2 does not exist. Maybe you meant employees?. (UNKNOWN_DATABASE) (version 24.5.1.1763 (official build))
, server ClickHouseNode [uri=http://localhost:32825/system, options={custom_settings=allow_experimental_object_type=1,insert_allow_materialized_columns=1,client_name=Client_1}]@-1544883182
	at com.altinity.clickhouse.debezium.embedded.cdc.DatabaseOverrideIT.testDatabaseOverride(DatabaseOverrideIT.java:128)
while(version1Result.next()) {
col2 = version1Result.getLong("col2");
}
Thread.sleep(10000);
assertTrue(col2 == 1);

long productsCol2 = 0L;
ResultSet productsVersionResult = writer.executeQueryWithResultSet("select col2 from productsnew.prodtable final where col1 = 'a'");
while(productsVersionResult.next()) {
productsCol2 = productsVersionResult.getLong("col2");
}
assertTrue(productsCol2 == 1);
Thread.sleep(10000);

long customersCol2 = 0L;
ResultSet customersVersionResult = writer.executeQueryWithResultSet("select col2 from customers.custtable final where col1 = 'a'");
while(customersVersionResult.next()) {
customersCol2 = customersVersionResult.getLong("col2");
}
assertTrue(customersCol2 == 1);



clickHouseDebeziumEmbeddedApplication.getDebeziumEventCapture().engine.close();

conn.close();
// Files.deleteIfExists(tmpFilePath);
executorService.shutdown();
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import com.altinity.clickhouse.sink.connector.deduplicator.DeDuplicationPolicy;
import com.altinity.clickhouse.sink.connector.deduplicator.DeDuplicationPolicyValidator;
import com.altinity.clickhouse.sink.connector.validators.DatabaseOverrideValidator;
import com.altinity.clickhouse.sink.connector.validators.KafkaProviderValidator;
import com.altinity.clickhouse.sink.connector.validators.TopicToTableValidator;
import org.apache.kafka.common.config.AbstractConfig;
Expand Down Expand Up @@ -103,6 +104,19 @@ static ConfigDef newConfigDef() {
0,
ConfigDef.Width.NONE,
ClickHouseSinkConnectorConfigVariables.CLICKHOUSE_TOPICS_TABLES_MAP.toString())
// Define overrides map for ClickHouse Database
.define(
ClickHouseSinkConnectorConfigVariables.CLICKHOUSE_DATABASE_OVERRIDE_MAP.toString(),
Type.STRING,
"",
new DatabaseOverrideValidator(),
Importance.LOW,
"Map of source to destination database(override) (optional). Format : comma-separated tuples, e.g."
+ " <src_database-1>:<destination_database-1>,<src_database-2>:<destination_database-2>,... ",
CONFIG_GROUP_CONNECTOR_CONFIG,
0,
ConfigDef.Width.NONE,
ClickHouseSinkConnectorConfigVariables.CLICKHOUSE_DATABASE_OVERRIDE_MAP.toString())
.define(
ClickHouseSinkConnectorConfigVariables.BUFFER_COUNT.toString(),
Type.LONG,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ public enum ClickHouseSinkConnectorConfigVariables {
DEDUPLICATION_POLICY("deduplication.policy"),

CLICKHOUSE_TOPICS_TABLES_MAP("clickhouse.topic2table.map"),
CLICKHOUSE_DATABASE_OVERRIDE_MAP("clickhouse.database.override.map"),

CLICKHOUSE_URL("clickhouse.server.url"),
CLICKHOUSE_USER("clickhouse.server.user"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,68 @@ public class Utils {
// Connector version, change every release
public static final String VERSION = "1.0.0";

/**
* Function to parse the topic to table configuration parameter
*
* @param input Delimiter separated list.
* @return key/value pair of configuration.
*/
public static Map<String, String> parseSourceToDestinationDatabaseMap(String input) throws Exception {
Map<String, String> srcToDestinationMap = new HashMap<>();
boolean isInvalid = false;

if(input == null || input.isEmpty()) {
return srcToDestinationMap;
}

for (String str : input.split(",")) {
String[] tt = str.split(":");

if (tt.length != 2 || tt[0].trim().isEmpty() || tt[1].trim().isEmpty()) {
LOGGER.error(
Logging.logMessage(
"Invalid {} config format: {}",
ClickHouseSinkConnectorConfigVariables.CLICKHOUSE_DATABASE_OVERRIDE_MAP.toString(),
input));
return null;
}

String srcDatabase = tt[0].trim();
String dstDatabase = tt[1].trim();

if (!isValidDatabaseName(srcDatabase)) {
LOGGER.error(
Logging.logMessage(
"database name{} should have at least 2 "
+ "characters, start with _a-zA-Z, and only contains "
+ "_$a-zA-z0-9",
srcDatabase));
isInvalid = true;
}

if (!isValidDatabaseName(dstDatabase)) {
LOGGER.error(
Logging.logMessage(
"database name{} should have at least 2 "
+ "characters, start with _a-zA-Z, and only contains "
+ "_$a-zA-z0-9",
dstDatabase));
isInvalid = true;
}

if (srcToDestinationMap.containsKey(srcDatabase)) {
LOGGER.error(Logging.logMessage("source database name {} is duplicated", srcDatabase));
isInvalid = true;
}

srcToDestinationMap.put(tt[0].trim(), tt[1].trim());
}
if (isInvalid) {
throw new Exception("Invalid clickhouse table");
}
return srcToDestinationMap;
}

/**
* Function to parse the topic to table configuration parameter
*
Expand Down Expand Up @@ -108,4 +170,28 @@ public static boolean isValidTable(String tableName) {
return true;
}

public static boolean isValidDatabaseName(String dbName) {
// Check if the name is empty or longer than 63 characters
if (dbName == null || dbName.isEmpty() || dbName.length() > 63) {
return false;
}

// Check the first character: must be a letter or an underscore
char firstChar = dbName.charAt(0);
if (!(Character.isLetter(firstChar) || firstChar == '_')) {
return false;
}

// Check the remaining characters
for (int i = 1; i < dbName.length(); i++) {
char ch = dbName.charAt(i);
if (!(Character.isLetterOrDigit(ch) || ch == '_' || ch == '.')) {
return false;
}
}

return true;
}


}
Loading
Loading