Skip to content

Commit

Permalink
Merge pull request #227 from wayyoungboy/2.2.0-qulei
Browse files Browse the repository at this point in the history
2.2.0 add rca scene and uodate check
  • Loading branch information
Teingi authored Jun 6, 2024
2 parents b1d4d2c + e297185 commit 80e1bb0
Show file tree
Hide file tree
Showing 22 changed files with 1,555 additions and 94 deletions.
1 change: 1 addition & 0 deletions cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,6 +627,7 @@ def __init__(self):
self.parser.add_option('--cases', type='string', help="check observer's cases on package_file")
self.parser.add_option('--obproxy_cases', type='string', help="check obproxy's cases on package_file")
self.parser.add_option('--store_dir', type='string', help='the dir to store check result, current dir by default.', default='./check_report/')
self.parser.add_option('--report_type', type='string', help='The type of the check report, support "table", "json", "xml", "yaml". default table', default='table')
self.parser.add_option('-c', type='string', help='obdiag custom config', default=os.path.expanduser('~/.obdiag/config.yml'))

def init(self, cmd, args):
Expand Down
11 changes: 11 additions & 0 deletions docs/check.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,17 @@ Example:
obdiag check --cases= ad
obdiag check --obproxy_cases= proxy
obdiag check --cases=ad --obproxy_cases=proxy

--report_type={ReportType}

report_type是生成报告的格式,目前支持"table", "json", "xml"三种格式
若未设定则默认为table

--store_dir={StoreDir}

store_dir是生成报告的存放路径,若未设定则默认为当前工作目录下的check_report文件夹内


```

### 关联持久化参数:
Expand Down
5 changes: 5 additions & 0 deletions handler/checker/check_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,11 @@ def handle(self):
if not os.path.exists(self.export_report_path):
self.stdio.warn("{0} not exists. mkdir it!".format(self.export_report_path))
os.mkdir(self.export_report_path)
# change self.export_report_type
if Util.get_option(self.options, 'report_type'):
self.export_report_type = Util.get_option(self.options, 'report_type')
if self.export_report_type not in ["table", "json", "xml", "yaml"]:
raise CheckException("report_type must be table, json, xml, yaml")
self.stdio.verbose("export_report_path is " + self.export_report_path)

# get package's by package_name
Expand Down
2 changes: 1 addition & 1 deletion handler/checker/check_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def __init__(self, context, report_target="observer", export_report_path="./chec
now = datetime.datetime.now()
date_format = now.strftime("%Y-%m-%d-%H-%M-%S")

file_name = "/check_report_{0}_".format(report_target) + date_format
file_name = "/obdiag_check_report_{0}_".format(report_target) + date_format
self.report_target = report_target

report_path = self.export_report_path + file_name
Expand Down
70 changes: 70 additions & 0 deletions handler/checker/step/sleep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*
# Copyright (c) 2022 OceanBase
# OceanBase Diagnostic Tool is licensed under Mulan PSL v2.
# You can use this software according to the terms and conditions of the Mulan PSL v2.
# You may obtain a copy of Mulan PSL v2 at:
# http://license.coscl.org.cn/MulanPSL2
# THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
# EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
# MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
# See the Mulan PSL v2 for more details.

"""
@time: 2024/05/28
@file: sleep.py
@desc:
"""
import time

from handler.checker.check_exception import StepExecuteFailException
from common.ob_connector import OBConnector
from common.tool import StringUtils
from common.tool import Util


class StepSleepHandler:
def __init__(self, context, step, task_variable_dict):
self.sleep_time = None
try:
self.context = context
self.stdio = context.stdio
self.ob_cluster = self.context.cluster_config
self.ob_cluster_name = self.ob_cluster.get("cluster_name")
self.tenant_mode = None
self.sys_database = None
self.database = None
self.ob_connector_pool = self.context.get_variable('check_obConnector_pool', None)
if self.ob_connector_pool is not None:
self.ob_connector = self.ob_connector_pool.get_connection()
if self.ob_connector is None:
raise Exception("self.ob_connector is None.")
except Exception as e:
self.stdio.error("StepSleepHandler init fail. Please check the OBCLUSTER conf. Exception : {0} .".format(e))
raise Exception("StepSleepHandler init fail. Please check the OBCLUSTER conf. Exception : {0} .".format(e))
self.task_variable_dict = task_variable_dict
self.enable_dump_db = False
self.trace_id = None
self.STAT_NAME = {}
self.report_file_path = ""
self.enable_fast_dump = False
self.ob_major_version = None
self.step = step

def execute(self):
try:
self.sleep_time = self.step.get("sleep", None)
if self.sleep_time == None:
raise StepExecuteFailException("StepSleepHandler execute sleep is not set")
if type(self.sleep_time) != int:
raise StepExecuteFailException("StepSleepHandler execute sleep type must be int")
time.sleep(int(self.sleep_time))
self.stdio.verbose("StepSleepHandler execute: {0}".format(self.sleep_time))
except Exception as e:
self.stdio.error("StepSleepHandler execute Exception: {0}".format(e))
raise StepExecuteFailException("StepSleepHandler execute Exception: {0}".format(e))
finally:
self.ob_connector_pool.release_connection(self.ob_connector)

def update_step_variable_dict(self):
return self.task_variable_dict
10 changes: 10 additions & 0 deletions handler/checker/tasks/observer/clog/clog_disk_full.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
info: "Check if there is a problem with clog disk full"
task:
- version: "[4.0.0.0,*]"
steps:
- type: sql
sql: 'SELECT GROUP_CONCAT(DISTINCT u.tenant_id) FROM oceanbase.gv$ob_units u JOIN ( SELECT SVR_IP, SVR_PORT, TENANT_ID, value/100 AS value FROM oceanbase.GV$OB_PARAMETERS WHERE name = "log_disk_utilization_threshold") as c ON u.SVR_IP = c.SVR_IP AND u.SVR_PORT = c.SVR_PORT AND u.TENANT_ID = c.TENANT_ID WHERE u.LOG_DISK_IN_USE > u.LOG_DISK_SIZE * c.value;'
result:
set_value: tenant_ids
verify: '[ -z "$tenant_ids" ]'
err_msg: "The following tenants have experienced clog disk full: #{tenant_ids}. Please check by obdiag rca --scene=clog_disk_full "
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
info: 'Check whether there is any observer have CPU oversold.'
task:
- version: "[4.3.1.0,*]"
steps:
- type: sql
sql: "SELECT GROUP_CONCAT(CONCAT(SVR_IP, ':', SVR_PORT) SEPARATOR ', ') AS IP_PORT_COMBINATIONSFROM from oceanbase.GV$OB_SERVERS WHERE CPU_ASSIGNED > CPU_CAPACITY;"
result:
set_value: CPU_oversold
verify: '[ -z "$CPU_oversold" ]'
report_type: warning
err_msg: 'Some observers have CPU oversold. There are #{CPU_oversold}'
1 change: 1 addition & 0 deletions handler/rca/plugins/gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def init_parameters(self):
self.conf_map["gather_scope"] = ""
self.conf_map["store_dir"] = self.work_path
self.conf_map["gather_target"] = "observer"
self.greps_key = []

def grep(self, key):
if key is None or len(key) < 1 or type(key) != str:
Expand Down
Loading

0 comments on commit 80e1bb0

Please sign in to comment.