forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
insert_deduplication_token support in non-replicated MergeTree
- Loading branch information
1 parent
1e06420
commit 0857a8d
Showing
10 changed files
with
165 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
15 changes: 15 additions & 0 deletions
15
tests/queries/0_stateless/02124_insert_deduplication_token.reference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
create and check deduplication | ||
two inserts with exact data, one inserted, one deduplicated by data digest | ||
0 1000 | ||
two inserts with the same dedup token, one inserted, one deduplicated by the token | ||
0 1000 | ||
1 1001 | ||
update dedup token, two inserts with the same dedup token, one inserted, one deduplicated by the token | ||
0 1000 | ||
1 1001 | ||
1 1001 | ||
reset deduplication token and insert new row | ||
0 1000 | ||
1 1001 | ||
1 1001 | ||
2 1002 |
34 changes: 34 additions & 0 deletions
34
tests/queries/0_stateless/02124_insert_deduplication_token.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
-- insert data duplicates by providing deduplication token on insert | ||
|
||
DROP TABLE IF EXISTS insert_dedup_token SYNC; | ||
|
||
select 'create and check deduplication'; | ||
CREATE TABLE insert_dedup_token ( | ||
id Int32, val UInt32 | ||
) ENGINE=MergeTree() ORDER BY id | ||
SETTINGS non_replicated_deduplication_window=0xFFFFFFFF; | ||
|
||
select 'two inserts with exact data, one inserted, one deduplicated by data digest'; | ||
INSERT INTO insert_dedup_token VALUES(0, 1000); | ||
INSERT INTO insert_dedup_token VALUES(0, 1000); | ||
SELECT * FROM insert_dedup_token ORDER BY id; | ||
|
||
select 'two inserts with the same dedup token, one inserted, one deduplicated by the token'; | ||
set insert_deduplication_token = '\x61\x00\x62'; | ||
INSERT INTO insert_dedup_token VALUES(1, 1001); | ||
INSERT INTO insert_dedup_token VALUES(2, 1002); | ||
SELECT * FROM insert_dedup_token ORDER BY id; | ||
|
||
select 'update dedup token, two inserts with the same dedup token, one inserted, one deduplicated by the token'; | ||
set insert_deduplication_token = '\x61\x00\x63'; | ||
-- set insert_deduplication_token = '2'; | ||
INSERT INTO insert_dedup_token VALUES(1, 1001); | ||
INSERT INTO insert_dedup_token VALUES(2, 1002); | ||
SELECT * FROM insert_dedup_token ORDER BY id; | ||
|
||
select 'reset deduplication token and insert new row'; | ||
set insert_deduplication_token = ''; | ||
INSERT INTO insert_dedup_token VALUES(2, 1002); | ||
SELECT * FROM insert_dedup_token ORDER BY id; | ||
|
||
DROP TABLE insert_dedup_token SYNC; |
34 changes: 34 additions & 0 deletions
34
tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.reference
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
insert 2 blocks with dedup token, 1 row per block | ||
2 | ||
1 | ||
2 | ||
insert deduplicated by token | ||
2 | ||
1 | ||
2 | ||
insert the same data by providing different dedup token | ||
4 | ||
1 | ||
1 | ||
2 | ||
2 | ||
insert 4 blocks, 2 deduplicated, 2 inserted | ||
6 | ||
1 | ||
1 | ||
2 | ||
2 | ||
3 | ||
4 | ||
disable token based deduplication, insert the same data as with token | ||
10 | ||
1 | ||
1 | ||
1 | ||
2 | ||
2 | ||
2 | ||
3 | ||
3 | ||
4 | ||
4 |
35 changes: 35 additions & 0 deletions
35
tests/queries/0_stateless/02124_insert_deduplication_token_multiple_blocks.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/usr/bin/env bash | ||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) | ||
# shellcheck source=../shell_config.sh | ||
. "$CURDIR"/../shell_config.sh | ||
|
||
QUERY_COUNT_ORIGIN_BLOCKS="SELECT COUNT(*) FROM system.parts WHERE table = 'block_dedup_token' AND min_block_number == max_block_number;" | ||
QUERY_SELECT_FROM_TABLE_ORDERED="SELECT * FROM block_dedup_token ORDER BY id;" | ||
|
||
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS block_dedup_token SYNC" | ||
$CLICKHOUSE_CLIENT --query="CREATE TABLE block_dedup_token (id Int32) ENGINE=MergeTree() ORDER BY id SETTINGS non_replicated_deduplication_window=0xFFFFFFFF;" | ||
|
||
$CLICKHOUSE_CLIENT --query="SELECT 'insert 2 blocks with dedup token, 1 row per block'" | ||
DEDUP_TOKEN='dedup1' | ||
echo 'INSERT INTO block_dedup_token VALUES (1), (2)' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_insert_block_size=1&min_insert_block_size_rows=0&min_insert_block_size_bytes=0&insert_deduplication_token='$DEDUP_TOKEN'&query=" --data-binary @- | ||
$CLICKHOUSE_CLIENT --multiquery --query "$QUERY_COUNT_ORIGIN_BLOCKS;$QUERY_SELECT_FROM_TABLE_ORDERED" | ||
|
||
$CLICKHOUSE_CLIENT --query="SELECT 'insert deduplicated by token'" | ||
echo 'INSERT INTO block_dedup_token VALUES (1), (2)' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_insert_block_size=1&min_insert_block_size_rows=0&min_insert_block_size_bytes=0&insert_deduplication_token='$DEDUP_TOKEN'&query=" --data-binary @- | ||
$CLICKHOUSE_CLIENT --multiquery --query "$QUERY_COUNT_ORIGIN_BLOCKS;$QUERY_SELECT_FROM_TABLE_ORDERED" | ||
|
||
$CLICKHOUSE_CLIENT --query="SELECT 'insert the same data by providing different dedup token'" | ||
DEDUP_TOKEN='dedup2' | ||
echo 'INSERT INTO block_dedup_token VALUES (1), (2)' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_insert_block_size=1&min_insert_block_size_rows=0&min_insert_block_size_bytes=0&insert_deduplication_token='$DEDUP_TOKEN'&query=" --data-binary @- | ||
$CLICKHOUSE_CLIENT --multiquery --query "$QUERY_COUNT_ORIGIN_BLOCKS;$QUERY_SELECT_FROM_TABLE_ORDERED" | ||
|
||
$CLICKHOUSE_CLIENT --query="SELECT 'insert 4 blocks, 2 deduplicated, 2 inserted'" | ||
echo 'INSERT INTO block_dedup_token VALUES (1), (2), (3), (4)' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_insert_block_size=1&min_insert_block_size_rows=0&min_insert_block_size_bytes=0&insert_deduplication_token='$DEDUP_TOKEN'&query=" --data-binary @- | ||
$CLICKHOUSE_CLIENT --multiquery --query "$QUERY_COUNT_ORIGIN_BLOCKS;$QUERY_SELECT_FROM_TABLE_ORDERED" | ||
|
||
$CLICKHOUSE_CLIENT --query="SELECT 'disable token based deduplication, insert the same data as with token'" | ||
DEDUP_TOKEN='' | ||
echo 'INSERT INTO block_dedup_token VALUES (1), (2), (3), (4)' | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&max_insert_block_size=1&min_insert_block_size_rows=0&min_insert_block_size_bytes=0&insert_deduplication_token='$DEDUP_TOKEN'&query=" --data-binary @- | ||
$CLICKHOUSE_CLIENT --multiquery --query "$QUERY_COUNT_ORIGIN_BLOCKS;$QUERY_SELECT_FROM_TABLE_ORDERED" | ||
|
||
$CLICKHOUSE_CLIENT --query="DROP TABLE block_dedup_token SYNC" |