Skip to content

Commit

Permalink
Enhance the inconsistency object script for ecpool and comment the pr…
Browse files Browse the repository at this point in the history
…eempt scrub message test case

Signed-off-by: Srinivasa Bharath Kanta <skanta@redhat.com>
  • Loading branch information
SrinivasaBharath committed Jan 7, 2025
1 parent 58d6d99 commit 0560c3b
Show file tree
Hide file tree
Showing 9 changed files with 124 additions and 50 deletions.
4 changes: 3 additions & 1 deletion suites/pacific/rados/tier-2_rados_test-bugfixes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,10 @@ tests:
plugin: jerasure
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- ecpool
comments: Active bug 2277111
comments: Active bugs 2277111,2335727 and 2335762

- test:
name: Inconsistent objects in replicated pool functionality check
Expand All @@ -160,6 +161,7 @@ tests:
pg_num: 1
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- replicated_pool
comments: Active bug 2316244
Expand Down
4 changes: 3 additions & 1 deletion suites/quincy/rados/tier-2_rados_test-bugfixes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ tests:
plugin: jerasure
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- ecpool
comments: Active bug 2277111
comments: Active bugs 2277111,2335727 and 2335762
- test:
name: Inconsistent objects in replicated pool functionality check
desc: Scub and deep-scrub on inconsistent objects in Replicated pool
Expand All @@ -172,6 +173,7 @@ tests:
pg_num: 1
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- replicated_pool
comments: Active bug 2316244
Expand Down
15 changes: 15 additions & 0 deletions suites/quincy/rados/tier-2_rados_test_omap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,21 @@ tests:
obj_end: 50
num_keys_obj: 100
delete_pool: true
# This following test case is commented. Jira ticket is created to track the following task.
# https://issues.redhat.com/browse/RHCEPHQE-17357
# - test:
# name: Preempt scrub messages checks
# desc: Checking preempt messages in the OSDs
# module: test_rados_preempt_scrub.py
# polarion-id: CEPH-83572916
# config:
# verify_osd_preempt:
# configurations:
# pool-1:
# pool_name: preempt_pool
# pool_type: replicated
# pg_num: 16
# delete_pool: true

- test:
name: Omap creations on objects
Expand Down
4 changes: 3 additions & 1 deletion suites/reef/rados/tier-2_rados_test-bugfixes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -157,9 +157,10 @@ tests:
plugin: jerasure
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- ecpool
comments: Active bug 2277111
comments: Active bugs 2277111,2335727 and 2335762

- test:
name: Inconsistent objects in replicated pool functionality check
Expand All @@ -173,6 +174,7 @@ tests:
pg_num: 1
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- replicated_pool
comments: Active bug 2316244
Expand Down
29 changes: 15 additions & 14 deletions suites/reef/rados/tier-2_rados_test_omap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -149,20 +149,21 @@ tests:
obj_end: 50
num_keys_obj: 100
delete_pool: true

- test:
name: Preempt scrub messages checks
desc: Checking preempt messages in the OSDs
module: test_rados_preempt_scrub.py
polarion-id: CEPH-83572916
config:
verify_osd_preempt:
configurations:
pool-1:
pool_name: preempt_pool
pool_type: replicated
pg_num: 16
delete_pool: true
# This following test case is commented. Jira ticket is created to track the following task.
# https://issues.redhat.com/browse/RHCEPHQE-17357
# - test:
# name: Preempt scrub messages checks
# desc: Checking preempt messages in the OSDs
# module: test_rados_preempt_scrub.py
# polarion-id: CEPH-83572916
# config:
# verify_osd_preempt:
# configurations:
# pool-1:
# pool_name: preempt_pool
# pool_type: replicated
# pg_num: 16
# delete_pool: true

- test:
name: Omap creations on objects
Expand Down
4 changes: 3 additions & 1 deletion suites/squid/rados/tier-2_rados_test-bugfixes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,10 @@ tests:
plugin: jerasure
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- ecpool
comments: Active bug 2277111
comments: Active bugs 2277111,2335727 and 2335762

- test:
name: Inconsistent objects in replicated pool functionality check
Expand All @@ -186,6 +187,7 @@ tests:
pg_num: 1
disable_pg_autoscale: true
inconsistent_obj_count: 4
debug_enable: False
delete_pool:
- replicated_pool
comments: Active bug 2316244
Expand Down
29 changes: 15 additions & 14 deletions suites/squid/rados/tier-2_rados_test_omap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -150,20 +150,21 @@ tests:
obj_end: 50
num_keys_obj: 100
delete_pool: true

- test:
name: Preempt scrub messages checks
desc: Checking preempt messages in the OSDs
module: test_rados_preempt_scrub.py
polarion-id: CEPH-83572916
config:
verify_osd_preempt:
configurations:
pool-1:
pool_name: preempt_pool
pool_type: replicated
pg_num: 16
delete_pool: true
# This following test case is commented. Jira ticket is created to track the following task.
# https://issues.redhat.com/browse/RHCEPHQE-17357
# - test:
# name: Preempt scrub messages checks
# desc: Checking preempt messages in the OSDs
# module: test_rados_preempt_scrub.py
# polarion-id: CEPH-83572916
# config:
# verify_osd_preempt:
# configurations:
# pool-1:
# pool_name: preempt_pool
# pool_type: replicated
# pg_num: 16
# delete_pool: true

- test:
name: Omap creations on objects
Expand Down
79 changes: 63 additions & 16 deletions tests/rados/test_osd_ecpool_inconsistency_scenario.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
This file contains the methods to verify the inconsistent object functionality during scrub/deep-scrub in EC pool.
AS part of verification the script perform the following tasks-
1. Creating omaps
1. Create objects
2. Convert the object in to inconsistent object
3. Use the osd_scrub_auto_repair and osd_scrub_auto_repair_num_errors to perform the PG repair
osd_scrub_auto_repair_num_errors - Setting this to true will enable automatic PG repair when errors are
Expand All @@ -28,6 +28,7 @@
from ceph.rados.objectstoretool_workflows import objectstoreToolWorkflows
from ceph.rados.rados_scrub import RadosScrubber
from tests.rados.monitor_configurations import MonConfigMethods
from tests.rados.stretch_cluster import wait_for_clean_pg_sets
from utility.log import Log
from utility.utils import method_should_succeed

Expand All @@ -49,19 +50,24 @@ def run(ceph_cluster, **kw):
objectstore_obj = objectstoreToolWorkflows(node=cephadm)
mon_obj = MonConfigMethods(rados_obj=rados_obj)
client_node = ceph_cluster.get_nodes(role="client")[0]
scrub_obj = RadosScrubber(node=cephadm)
try:
# set global autoscaler to off
rados_obj.configure_pg_autoscaler(**{"default_mode": "off"})
# Creating ec pool
ec_config = config.get("ec_pool")
pool_name = ec_config["pool_name"]
mon_obj.set_config(section="osd", name="debug_osd", value="20/20")

req_no_of_objects = config.get("inconsistent_obj_count")

if not rados_obj.create_erasure_pool(name=pool_name, **ec_config):
log.error("Failed to create the EC Pool")
return 1

if config.get("debug_enable"):
mon_obj.set_config(section="osd", name="debug_osd", value="20/20")
mon_obj.set_config(section="mgr", name="debug_mgr", value="20/20")

pg_info = rados_obj.create_ecpool_inconsistent_obj(
objectstore_obj, client_node, pool_name, req_no_of_objects
)
Expand All @@ -75,7 +81,9 @@ def run(ceph_cluster, **kw):

pg_id, no_of_objects = pg_info
auto_repair_param_value = no_of_objects - 1
log.info(f"The inconsistent object are created in the pg- {pg_id}")
log.info(
f"The inconsistent object are created in the pg- {pg_id} the number of objects are-{no_of_objects}"
)

# Check the default values of the osd_scrub_auto_repair_num_errors and osd_scrub_auto_repair
auto_repair_num_value = mon_obj.get_config(
Expand Down Expand Up @@ -113,8 +121,15 @@ def run(ceph_cluster, **kw):

mon_obj.set_config(section="osd", name="osd_scrub_auto_repair", value="true")
log.info("The osd_scrub_auto_repair value is set to true")
scrub_obj.set_osd_flags("set", "nodeep-scrub")
obj_count = ""
try:
obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "scrub")
except Exception as e:
log.info(e)

scrub_obj.set_osd_flags("unset", "nodeep-scrub")

obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "scrub")
if obj_count == -1:
log.error(f"The scrub not initiated on the pg-{pg_id}")
rados_obj.log_cluster_health()
Expand All @@ -125,7 +140,17 @@ def run(ceph_cluster, **kw):
f"Scrub repaired the {no_of_objects - obj_count} inconsistent objects"
)
return 1
obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "deep-scrub")

method_should_succeed(wait_for_clean_pg_sets, rados_obj)
scrub_obj.set_osd_flags("set", "noscrub")
time.sleep(5)
try:
obj_count = get_inconsistent_count(
scrub_object, pg_id, rados_obj, "deep-scrub"
)
except Exception as e:
log.info(e)
scrub_obj.set_osd_flags("unset", "noscrub")
if obj_count == -1:
log.error(f"The deep-scrub not initiated on the pg-{pg_id}")
rados_obj.log_cluster_health()
Expand All @@ -137,8 +162,8 @@ def run(ceph_cluster, **kw):
rados_obj.log_cluster_health()
return 1
log.info(
"Verification of Test scenario1- inconsistent objects greater than the "
"osd_scrub_auto_repair_num_errors count completed "
"Test scenario1 :- inconsistent objects greater than the "
"osd_scrub_auto_repair_num_errors count verification completed "
)
# Case2: The inconsistent object count is less than osd_scrub_auto_repair_num_errors count
auto_repair_param_value = no_of_objects + 1
Expand All @@ -154,7 +179,16 @@ def run(ceph_cluster, **kw):
log.info(
f"The osd_scrub_auto_repair_num_errors value is set to {auto_repair_param_value}"
)
obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "scrub")
method_should_succeed(wait_for_clean_pg_sets, rados_obj)
# Commented the code due to the BZ#2335762- Scrub operations fail to repair inconsistent data in the EC pool
# when the nodeep-scrub flag is enabled. To track the issue jira task is -RHCEPHQE-17391
# scrub_obj.set_osd_flags("set", "nodeep-scrub")
try:
obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "scrub")
except Exception as e:
log.info(e)

# scrub_obj.set_osd_flags("unset", "nodeep-scrub")
if obj_count == -1:
log.error(f"The scrub not initiated on the pg-{pg_id}")
rados_obj.log_cluster_health()
Expand Down Expand Up @@ -185,8 +219,15 @@ def run(ceph_cluster, **kw):
return 1

pg_id, no_of_objects = pg_info
scrub_obj.set_osd_flags("set", "noscrub")
try:
obj_count = get_inconsistent_count(
scrub_object, pg_id, rados_obj, "deep-scrub"
)
except Exception as e:
log.info(e)

obj_count = get_inconsistent_count(scrub_object, pg_id, rados_obj, "deep-scrub")
scrub_obj.set_osd_flags("unset", "noscrub")
if obj_count == -1:
log.error(f"The deep-scrub not initiated on the pg-{pg_id}")
rados_obj.log_cluster_health()
Expand All @@ -212,6 +253,8 @@ def run(ceph_cluster, **kw):
log.info(
"\n\n================ Execution of finally block =======================\n\n"
)
scrub_obj.set_osd_flags("unset", "nodeep-scrub")
scrub_obj.set_osd_flags("unset", "noscrub")
rados_obj.configure_pg_autoscaler(**{"default_mode": "on"})
if config.get("delete_pool"):
method_should_succeed(rados_obj.delete_pool, pool_name)
Expand All @@ -235,9 +278,9 @@ def get_inconsistent_count(scrub_object, pg_id, rados_obj, operation):
"""
operation_chk_flag = False
osd_scrub_min_interval = 120
osd_scrub_max_interval = 1800
osd_deep_scrub_interval = 1800
osd_scrub_min_interval = 5
osd_scrub_max_interval = 600
osd_deep_scrub_interval = 600
(
scrub_begin_hour,
scrub_begin_weekday,
Expand All @@ -254,24 +297,27 @@ def get_inconsistent_count(scrub_object, pg_id, rados_obj, operation):
scrub_object.set_osd_configuration(
"osd_deep_scrub_interval", osd_deep_scrub_interval
)
endTime = datetime.now() + timedelta(minutes=60)
endTime = datetime.now() + timedelta(minutes=40)

while datetime.now() <= endTime:
if operation == "scrub":
log.debug(f"Running scrub on pg : {pg_id}")

if rados_obj.start_check_scrub_complete(
pg_id=pg_id, user_initiated=False, wait_time=3600
pg_id=pg_id, user_initiated=False, wait_time=900
):
log.info(f"Scrub completed on pg : {pg_id}")
operation_chk_flag = True
break
else:
log.debug(f"Running deep-scrub on pg : {pg_id}")
if rados_obj.start_check_deep_scrub_complete(
pg_id=pg_id, user_initiated=False, wait_time=3600
pg_id=pg_id, user_initiated=False, wait_time=2400
):
log.info(f"Deep scrub completed on pg : {pg_id}")
# Increased the time due to the BZ#2335727-Delay in displaying accurate Inconsistent Data with the
# 'list-inconsistent-obj' Command in a ECpool.Jira tracker - RHCEPHQE-17358
time.sleep(600)
operation_chk_flag = True
break
log.info(f"Wating for the {operation} to complete")
Expand All @@ -283,7 +329,7 @@ def get_inconsistent_count(scrub_object, pg_id, rados_obj, operation):
while chk_count <= 10:
inconsistent_details = rados_obj.get_inconsistent_object_details(pg_id)
log.debug(
f"The inconsistent object details of the pg-{pg_id} - {inconsistent_details}"
f"The inconsistent object details in the pg-{pg_id} is - {inconsistent_details}"
)
obj_count = len(inconsistent_details["inconsistents"])
log.info(f" The inconsistent object count after {operation} is {obj_count}")
Expand Down Expand Up @@ -329,4 +375,5 @@ def set_default_param_value(mon_obj):
mon_obj.remove_config(section="osd", name="osd_deep_scrub_interval")
mon_obj.remove_config(section="osd", name="debug_osd")
mon_obj.remove_config(section="global", name="osd_pool_default_pg_autoscale_mode")
mon_obj.remove_config(section="mgr", name="debug_mgr")
time.sleep(10)
6 changes: 4 additions & 2 deletions tests/rados/test_osd_replicated_inconsistency_scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ def run(ceph_cluster, **kw):
"run the tests"
)
return 1
mon_obj.set_config(section="osd", name="debug_osd", value="20/20")
mon_obj.set_config(section="mgr", name="debug_mgr", value="20/20")
if config.get("debug_enable"):
mon_obj.set_config(section="osd", name="debug_osd", value="20/20")
mon_obj.set_config(section="mgr", name="debug_mgr", value="20/20")

pg_id, inconsistent_obj_count = pg_info
log.info(
f"The inconsistent object count is- {inconsistent_obj_count} on pg -{pg_id}"
Expand Down

0 comments on commit 0560c3b

Please sign in to comment.