Not check avg_time_per_image during test (#2665)

* ignore avg_time_per_image during test * do not call stdev when length of array is less than 2 * ignore avg_time_per_image during regerssion test
openvinotoolkit · Nov 22, 2023 · 090ae97 · 090ae97
1 parent 0fbfbb1
commit 090ae97
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 18 deletions.
diff --git a/tests/regression/regression_command.py b/tests/regression/regression_command.py
@@ -130,6 +130,8 @@ def regression_openvino_testing(
         model_criteria = criteria[template.name] * (1.0 - reg_threshold)
 
     for k in trained_performance.keys():
+        if k == "avg_time_per_image":
+            continue
         result_dict[k] = round(exported_performance[k], 3)
         if exported_performance[k] < model_criteria:
             regression_result["passed"] = False
@@ -180,6 +182,8 @@ def regression_deployment_testing(
         modified_criteria = model_criteria - (model_criteria * reg_threshold)
 
     for k in exported_performance.keys():
+        if k == "avg_time_per_image":
+            continue
         if isinstance(criteria, dict) and template.name in criteria.keys():
             result_dict[k] = round(deployed_performance[k], 3)
             if deployed_performance[k] < modified_criteria:

diff --git a/tests/test_suite/run_test_command.py b/tests/test_suite/run_test_command.py
@@ -10,7 +10,7 @@
 import sys
 import torch
 from pathlib import Path
-from typing import Dict
+from typing import Dict, Union
 import onnx
 import onnxruntime
 
@@ -349,11 +349,7 @@ def otx_eval_openvino_testing(
     with open(perf_path) as read_file:
         exported_performance = json.load(read_file)
 
-    for k in trained_performance.keys():
-        assert (
-            exported_performance[k] >= trained_performance[k]
-            or abs(trained_performance[k] - exported_performance[k]) / (trained_performance[k] + 1e-10) <= threshold
-        ), f"{trained_performance[k]=}, {exported_performance[k]=}"
+    compare_model_accuracy(exported_performance, trained_performance, threshold)
 
 
 def otx_demo_testing(template, root, otx_dir, args):
@@ -494,11 +490,7 @@ def otx_eval_deployment_testing(template, root, otx_dir, args, threshold=0.0):
     with open(f"{template_work_dir}/deployed_{template.model_template_id}/performance.json") as read_file:
         deployed_performance = json.load(read_file)
 
-    for k in exported_performance.keys():
-        assert (
-            deployed_performance[k] >= exported_performance[k]
-            or abs(exported_performance[k] - deployed_performance[k]) / (exported_performance[k] + 1e-10) <= threshold
-        ), f"{exported_performance[k]=}, {deployed_performance[k]=}"
+    compare_model_accuracy(deployed_performance, deployed_performance, threshold)
 
 
 def otx_demo_deployment_testing(template, root, otx_dir, args):
@@ -745,11 +737,7 @@ def nncf_eval_testing(template, root, otx_dir, args, threshold=0.01):
     with open(f"{template_work_dir}/nncf_{template.model_template_id}/performance.json") as read_file:
         evaluated_performance = json.load(read_file)
 
-    for k in trained_performance.keys():
-        assert (
-            evaluated_performance[k] >= trained_performance[k]
-            or abs(trained_performance[k] - evaluated_performance[k]) / (trained_performance[k] + 1e-10) <= threshold
-        ), f"{trained_performance[k]=}, {evaluated_performance[k]=}"
+    compare_model_accuracy(evaluated_performance, trained_performance, threshold)
 
 
 def nncf_eval_openvino_testing(template, root, otx_dir, args):
@@ -1174,3 +1162,13 @@ def test_default_for_task(self):
             assert num_default_model == 1
 
     return _TestModelTemplates
+
+
+def compare_model_accuracy(performance_to_test: Dict, target_performance: Dict, threshold: Union[float, int]):
+    for k in target_performance.keys():
+        if k == "avg_time_per_image":
+            continue
+        assert (
+            performance_to_test[k] >= target_performance[k]
+            or abs(target_performance[k] - performance_to_test[k]) / (target_performance[k] + 1e-10) <= threshold
+        ), f"{target_performance[k]=}, {performance_to_test[k]=}"
diff --git a/tools/experiment.py b/tools/experiment.py
@@ -192,11 +192,15 @@ def get_exp_result(self):
     def _calculate_avg_std_per_iter(self):
         if self._iter_time_arr:
             self._exp_result.avg_iter_time = statistics.mean(self._iter_time_arr)
-            self._exp_result.std_iter_time = statistics.stdev(self._iter_time_arr)
+            self._exp_result.std_iter_time = (
+                statistics.stdev(self._iter_time_arr) if len(self._iter_time_arr) > 1 else 0
+            )
 
         if self._data_time_arr:
             self._exp_result.avg_data_time = statistics.mean(self._data_time_arr)
-            self._exp_result.std_data_time = statistics.stdev(self._data_time_arr)
+            self._exp_result.std_data_time = (
+                statistics.stdev(self._data_time_arr) if len(self._data_time_arr) > 1 else 0
+            )
 
     def _parse_eval_output(self, file_path: Path):
         # NOTE: It is assumed that performance.json has key named either score or avg_time_per_image