From ca6809a66c62db70376141cee87f0715ef2391d7 Mon Sep 17 00:00:00 2001 From: Jianjun Liao <36503113+Leavrth@users.noreply.github.com> Date: Sat, 14 Sep 2024 06:43:41 +0800 Subject: [PATCH 1/3] This is an automated cherry-pick of #53836 Signed-off-by: ti-chi-bot --- br/pkg/utils/backoff.go | 5 ++ br/tests/br_file_corruption/run.sh | 54 +++++++++++++++++++++ br/tests/br_file_corruption/workload | 12 +++++ br/tests/br_full_ddl/run.sh | 22 +++++++++ br/tests/br_pitr/run.sh | 65 ++++++++++++++++++++++++++ br/tests/br_txn/run.sh | 22 ++++++--- br/tests/run_group_br_tests.sh | 70 ++++++++++++++++++++++++++++ 7 files changed, 244 insertions(+), 6 deletions(-) create mode 100644 br/tests/br_file_corruption/run.sh create mode 100644 br/tests/br_file_corruption/workload create mode 100755 br/tests/run_group_br_tests.sh diff --git a/br/pkg/utils/backoff.go b/br/pkg/utils/backoff.go index f0a81283d68de..ede2acc8cfd61 100644 --- a/br/pkg/utils/backoff.go +++ b/br/pkg/utils/backoff.go @@ -204,6 +204,11 @@ func (bo *importerBackoffer) NextBackoff(err error) time.Duration { } } } + failpoint.Inject("set-import-attempt-to-one", func(_ failpoint.Value) { + if bo.attempt > 1 { + bo.attempt = 1 + } + }) if bo.delayTime > bo.maxDelayTime { return bo.maxDelayTime } diff --git a/br/tests/br_file_corruption/run.sh b/br/tests/br_file_corruption/run.sh new file mode 100644 index 0000000000000..35a7698bb9fef --- /dev/null +++ b/br/tests/br_file_corruption/run.sh @@ -0,0 +1,54 @@ +#!/bin/sh +# +# Copyright 2024 PingCAP, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -eux + +DB="$TEST_NAME" +TABLE="usertable" +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) + +run_sql "CREATE DATABASE $DB;" +go-ycsb load mysql -P $CUR/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB +run_br --pd $PD_ADDR backup full -s "local://$TEST_DIR/$DB" + +filename=$(find $TEST_DIR/$DB -regex ".*.sst" | head -n 1) +filename_temp=$filename"_temp" +filename_bak=$filename"_bak" +echo "corruption" > $filename_temp +cat $filename >> $filename_temp + +# file lost +mv $filename $filename_bak +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'restore success' + exit 1 +fi + +# file corruption +mv $filename_temp $filename +truncate --size=-11 $filename +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$DB" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'restore success' + exit 1 +fi diff --git a/br/tests/br_file_corruption/workload b/br/tests/br_file_corruption/workload new file mode 100644 index 0000000000000..e3fadf9a3d068 --- /dev/null +++ b/br/tests/br_file_corruption/workload @@ -0,0 +1,12 @@ +recordcount=10000 +operationcount=0 +workload=core + +readallfields=true + +readproportion=0 +updateproportion=0 +scanproportion=0 +insertproportion=0 + +requestdistribution=uniform diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index e0871e91dd589..b43ff76e0067b 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -22,6 +22,11 @@ LOG=/$TEST_DIR/backup.log RESTORE_LOG=LOG=/$TEST_DIR/restore.log BACKUP_STAT=/$TEST_DIR/backup_stat RESOTRE_STAT=/$TEST_DIR/restore_stat +<<<<<<< HEAD +======= +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" +>>>>>>> 5399ca70da9 (br: fix br integration test (#53836)) run_sql "CREATE DATABASE $DB;" go-ycsb load mysql -P tests/$TEST_NAME/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB @@ -38,6 +43,23 @@ for i in $(seq $DDL_COUNT); do fi done +# wait until the index creation/drop is done +retry_cnt=0 +while true; do + run_sql "ADMIN SHOW DDL JOBS WHERE DB_NAME = '$DB' AND TABLE_NAME = '$TABLE' AND STATE != 'synced';" + if grep -Fq "1. row" $res_file; then + cat $res_file + retry_cnt=$((retry_cnt+1)) + if [ "$retry_cnt" -gt 50 ]; then + echo 'the wait lag is too large' + exit 1 + fi + continue + fi + + break +done + # run analyze to generate stats run_sql "analyze table $DB.$TABLE;" # record field0's stats and remove last_update_version diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index b9eb63f8eda4e..b3d9308b4f4ea 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -100,6 +100,7 @@ restart_services echo "run pitr" run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" > $res_file 2>&1 +<<<<<<< HEAD # check something in downstream cluster echo "check br log" check_contains "restore log success summary" @@ -114,3 +115,67 @@ expect_delete_range=$(($incremental_delete_range_count-$prepare_delete_range_cou check_contains "DELETE_RANGE_CNT: $expect_delete_range" ## check feature compatibility between PITR and accelerate indexing bash $CUR/check/check_ingest_repair.sh +======= +check_result + +# start a new cluster for incremental + log +echo "restart a services" +restart_services + +echo "run snapshot restore#2" +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/full" + +echo "run incremental restore + log restore" +run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/inc" > $res_file 2>&1 + +check_result + +# start a new cluster for incremental + log +echo "restart a services" +restart_services + +echo "run snapshot restore#3" +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/full" + +echo "run incremental restore but failed" +restore_fail=0 +run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/inc_fail" || restore_fail=1 +if [ $restore_fail -ne 1 ]; then + echo 'pitr success' + exit 1 +fi + +# start a new cluster for corruption +echo "restart a services" +restart_services + +echo "corrupt a log file" +filename=$(find $TEST_DIR/$PREFIX/log -regex ".*\.log" | grep -v "schema-meta" | tail -n 1) +filename_temp=$filename"_temp" +filename_bak=$filename"_bak" +echo "corruption" > $filename_temp +cat $filename >> $filename_temp + +# file lost +mv $filename $filename_bak +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'pitr success' + exit 1 +fi + +# file corruption +mv $filename_temp $filename +truncate --size=-11 $filename +export GO_FAILPOINTS="github.com/pingcap/tidb/br/pkg/utils/set-import-attempt-to-one=return(true)" +restore_fail=0 +run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" || restore_fail=1 +export GO_FAILPOINTS="" +if [ $restore_fail -ne 1 ]; then + echo 'pitr success' + exit 1 +fi +>>>>>>> 5399ca70da9 (br: fix br integration test (#53836)) diff --git a/br/tests/br_txn/run.sh b/br/tests/br_txn/run.sh index 8b15f78764af4..81190458d54cd 100755 --- a/br/tests/br_txn/run.sh +++ b/br/tests/br_txn/run.sh @@ -97,12 +97,22 @@ run_test() { # delete data in range[start-key, end-key) clean "hello" "world" # Ensure the data is deleted - checksum_new=$(checksum "hello" "world") - - if [ "$checksum_new" != "$checksum_empty" ];then - echo "failed to delete data in range after backup" - fail_and_exit - fi + retry_cnt=0 + while true; do + checksum_new=$(checksum "hello" "world") + + if [ "$checksum_new" != "$checksum_empty" ]; then + echo "failed to delete data in range after backup; retry_cnt = $retry_cnt" + retry_cnt=$((retry_cnt+1)) + if [ "$retry_cnt" -gt 50 ]; then + fail_and_exit + fi + sleep 1 + continue + fi + + break + done # restore rawkv echo "restore start..." diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh new file mode 100755 index 0000000000000..04ff8c60701d4 --- /dev/null +++ b/br/tests/run_group_br_tests.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash + +# This script split the integration tests into 9 groups to support parallel group tests execution. +# all the integration tests are located in br/tests directory. only the directories +# containing run.sh will be considered as valid br integration tests. the script will print the total case number + +set -eo pipefail + +# Step 1 +CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +group=$1 +export COV_DIR="/tmp/group_cover" +rm -rf $COV_DIR +mkdir -p $COV_DIR + +# Define groups +# Note: If new group is added, the group name must also be added to CI +# * https://github.com/PingCAP-QE/ci/blob/main/pipelines/pingcap/tidb/latest/pull_br_integration_test.groovy +# Each group of tests consumes as much time as possible, thus reducing CI waiting time. +# Putting multiple light tests together and heavy tests in a separate group. +declare -A groups +groups=( + ["G00"]="br_300_small_tables br_backup_empty br_backup_version br_cache_table br_case_sensitive br_charset_gbk br_check_new_collocation_enable br_history br_gcs br_rawkv" + ["G01"]="br_autoid br_crypter2 br_db br_db_online br_db_online_newkv br_db_skip br_debug_meta br_ebs br_foreign_key br_full br_table_partition br_full_ddl" + ["G02"]="br_full_cluster_restore br_full_index br_incremental_ddl br_pitr_failpoint" + ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index br_other br_partition_add_index br_tidb_placement_policy br_tiflash br_tiflash_conflict' + ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table' + ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' + ["G06"]='br_tikv_outage br_tikv_outage3' + ["G07"]='br_pitr' + ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption' +) + +# Get other cases not in groups, to avoid missing any case +others=() +for script in "$CUR"/*/run.sh; do + test_name="$(basename "$(dirname "$script")")" + if [[ $test_name != br* ]]; then + continue + fi + # shellcheck disable=SC2076 + if [[ ! " ${groups[*]} " =~ " ${test_name} " ]]; then + others=("${others[@]} ${test_name}") + fi +done + +if [[ "$group" == "others" ]]; then + if [[ -z $others ]]; then + echo "All br integration test cases have been added to groups" + exit 0 + fi + echo "Error: "$others" is not added to any group in br/tests/run_group_br_tests.sh" + exit 1 +elif [[ " ${!groups[*]} " =~ " ${group} " ]]; then + test_names="${groups[${group}]}" + # Run test cases + if [[ -n $test_names ]]; then + echo "" + echo "Run cases: ${test_names}" + for case_name in $test_names; do + echo "Run cases: ${case_name}" + rm -rf /tmp/backup_restore_test + mkdir -p /tmp/backup_restore_test + TEST_NAME=${case_name} ${CUR}/run.sh + done + fi +else + echo "Error: invalid group name: ${group}" + exit 1 +fi From 103d449a86e8aab30a5dd532360a9f9fe26f8161 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Tue, 5 Nov 2024 16:18:06 +0800 Subject: [PATCH 2/3] resolve conflicts Signed-off-by: Jianjun Liao --- br/tests/br_full_ddl/run.sh | 4 -- br/tests/br_pitr/run.sh | 31 --------------- br/tests/run_group_br_tests.sh | 70 ---------------------------------- 3 files changed, 105 deletions(-) delete mode 100755 br/tests/run_group_br_tests.sh diff --git a/br/tests/br_full_ddl/run.sh b/br/tests/br_full_ddl/run.sh index b43ff76e0067b..9f3ab963193b4 100755 --- a/br/tests/br_full_ddl/run.sh +++ b/br/tests/br_full_ddl/run.sh @@ -22,11 +22,7 @@ LOG=/$TEST_DIR/backup.log RESTORE_LOG=LOG=/$TEST_DIR/restore.log BACKUP_STAT=/$TEST_DIR/backup_stat RESOTRE_STAT=/$TEST_DIR/restore_stat -<<<<<<< HEAD -======= -CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) res_file="$TEST_DIR/sql_res.$TEST_NAME.txt" ->>>>>>> 5399ca70da9 (br: fix br integration test (#53836)) run_sql "CREATE DATABASE $DB;" go-ycsb load mysql -P tests/$TEST_NAME/workload -p mysql.host=$TIDB_IP -p mysql.port=$TIDB_PORT -p mysql.user=root -p mysql.db=$DB diff --git a/br/tests/br_pitr/run.sh b/br/tests/br_pitr/run.sh index b3d9308b4f4ea..74b64837df336 100644 --- a/br/tests/br_pitr/run.sh +++ b/br/tests/br_pitr/run.sh @@ -100,7 +100,6 @@ restart_services echo "run pitr" run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/full" > $res_file 2>&1 -<<<<<<< HEAD # check something in downstream cluster echo "check br log" check_contains "restore log success summary" @@ -115,35 +114,6 @@ expect_delete_range=$(($incremental_delete_range_count-$prepare_delete_range_cou check_contains "DELETE_RANGE_CNT: $expect_delete_range" ## check feature compatibility between PITR and accelerate indexing bash $CUR/check/check_ingest_repair.sh -======= -check_result - -# start a new cluster for incremental + log -echo "restart a services" -restart_services - -echo "run snapshot restore#2" -run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/full" - -echo "run incremental restore + log restore" -run_br --pd $PD_ADDR restore point -s "local://$TEST_DIR/$PREFIX/log" --full-backup-storage "local://$TEST_DIR/$PREFIX/inc" > $res_file 2>&1 - -check_result - -# start a new cluster for incremental + log -echo "restart a services" -restart_services - -echo "run snapshot restore#3" -run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/full" - -echo "run incremental restore but failed" -restore_fail=0 -run_br --pd $PD_ADDR restore full -s "local://$TEST_DIR/$PREFIX/inc_fail" || restore_fail=1 -if [ $restore_fail -ne 1 ]; then - echo 'pitr success' - exit 1 -fi # start a new cluster for corruption echo "restart a services" @@ -178,4 +148,3 @@ if [ $restore_fail -ne 1 ]; then echo 'pitr success' exit 1 fi ->>>>>>> 5399ca70da9 (br: fix br integration test (#53836)) diff --git a/br/tests/run_group_br_tests.sh b/br/tests/run_group_br_tests.sh deleted file mode 100755 index 04ff8c60701d4..0000000000000 --- a/br/tests/run_group_br_tests.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash - -# This script split the integration tests into 9 groups to support parallel group tests execution. -# all the integration tests are located in br/tests directory. only the directories -# containing run.sh will be considered as valid br integration tests. the script will print the total case number - -set -eo pipefail - -# Step 1 -CUR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -group=$1 -export COV_DIR="/tmp/group_cover" -rm -rf $COV_DIR -mkdir -p $COV_DIR - -# Define groups -# Note: If new group is added, the group name must also be added to CI -# * https://github.com/PingCAP-QE/ci/blob/main/pipelines/pingcap/tidb/latest/pull_br_integration_test.groovy -# Each group of tests consumes as much time as possible, thus reducing CI waiting time. -# Putting multiple light tests together and heavy tests in a separate group. -declare -A groups -groups=( - ["G00"]="br_300_small_tables br_backup_empty br_backup_version br_cache_table br_case_sensitive br_charset_gbk br_check_new_collocation_enable br_history br_gcs br_rawkv" - ["G01"]="br_autoid br_crypter2 br_db br_db_online br_db_online_newkv br_db_skip br_debug_meta br_ebs br_foreign_key br_full br_table_partition br_full_ddl" - ["G02"]="br_full_cluster_restore br_full_index br_incremental_ddl br_pitr_failpoint" - ["G03"]='br_incompatible_tidb_config br_incremental br_incremental_index br_incremental_only_ddl br_incremental_same_table br_insert_after_restore br_key_locked br_log_test br_move_backup br_mv_index br_other br_partition_add_index br_tidb_placement_policy br_tiflash br_tiflash_conflict' - ["G04"]='br_range br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table' - ["G05"]='br_skip_checksum br_split_region_fail br_systables br_table_filter br_txn br_stats br_clustered_index br_crypter' - ["G06"]='br_tikv_outage br_tikv_outage3' - ["G07"]='br_pitr' - ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption' -) - -# Get other cases not in groups, to avoid missing any case -others=() -for script in "$CUR"/*/run.sh; do - test_name="$(basename "$(dirname "$script")")" - if [[ $test_name != br* ]]; then - continue - fi - # shellcheck disable=SC2076 - if [[ ! " ${groups[*]} " =~ " ${test_name} " ]]; then - others=("${others[@]} ${test_name}") - fi -done - -if [[ "$group" == "others" ]]; then - if [[ -z $others ]]; then - echo "All br integration test cases have been added to groups" - exit 0 - fi - echo "Error: "$others" is not added to any group in br/tests/run_group_br_tests.sh" - exit 1 -elif [[ " ${!groups[*]} " =~ " ${group} " ]]; then - test_names="${groups[${group}]}" - # Run test cases - if [[ -n $test_names ]]; then - echo "" - echo "Run cases: ${test_names}" - for case_name in $test_names; do - echo "Run cases: ${case_name}" - rm -rf /tmp/backup_restore_test - mkdir -p /tmp/backup_restore_test - TEST_NAME=${case_name} ${CUR}/run.sh - done - fi -else - echo "Error: invalid group name: ${group}" - exit 1 -fi From 8e7de5154f981104a55a87e99580d33d1d6f44f2 Mon Sep 17 00:00:00 2001 From: Jianjun Liao Date: Thu, 7 Nov 2024 13:25:46 +0800 Subject: [PATCH 3/3] add test group Signed-off-by: Jianjun Liao --- br/tests/run_group.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/br/tests/run_group.sh b/br/tests/run_group.sh index 8ebd878d8aec0..fee1bc912625d 100755 --- a/br/tests/run_group.sh +++ b/br/tests/run_group.sh @@ -29,7 +29,7 @@ groups=( ["G05"]='br_range br_rawkv br_replica_read br_restore_TDE_enable br_restore_log_task_enable br_s3 br_shuffle_leader br_shuffle_region br_single_table' ["G06"]='br_skip_checksum br_small_batch_size br_split_region_fail br_systables br_table_filter br_txn' ["G07"]='br_clustered_index br_crypter br_table_partition br_tidb_placement_policy br_tiflash br_tikv_outage' - ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom lightning_add_index lightning_alter_random lightning_auto_columns' + ["G08"]='br_tikv_outage2 br_ttl br_views_and_sequences br_z_gc_safepoint br_autorandom br_file_corruption lightning_add_index lightning_alter_random lightning_auto_columns' ["G09"]='lightning_auto_random_default lightning_bom_file lightning_character_sets lightning_check_partial_imported lightning_checkpoint lightning_checkpoint_chunks lightning_checkpoint_columns lightning_checkpoint_dirty_tableid' ["G10"]='lightning_checkpoint_engines lightning_checkpoint_engines_order lightning_checkpoint_error_destroy lightning_checkpoint_parquet lightning_checkpoint_timestamp lightning_checksum_mismatch lightning_cmdline_override lightning_column_permutation lightning_common_handle' ["G11"]='lightning_compress lightning_concurrent-restore lightning_config_max_error lightning_config_skip_csv_header lightning_csv lightning_default-columns lightning_disable_scheduler_by_key_range lightning_disk_quota lightning_distributed_import'