CMIP-Data-Request · rigoudyg · Jan 24, 2025 · Jan 16, 2025 · Jan 17, 2025 · Jan 23, 2025
diff --git a/data_request_api/__init__.py b/data_request_api/__init__.py
diff --git a/data_request_api/command_line/export_dreq_lists_json.py b/data_request_api/command_line/export_dreq_lists_json.py
@@ -21,7 +21,7 @@ def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument('dreq_version', choices=dc.get_versions(), help="data request version")
     parser.add_argument('--opportunities_file', type=str, help="path to JSON file listing opportunities to respond to. If it doesn't exist a template will be created")
-    parser.add_argument('--all_opportunities', action='store_true', help="Respond to all opporunities")
+    parser.add_argument('--all_opportunities', action='store_true', help="Respond to all opportunities")
     parser.add_argument('--experiments', nargs='+', type=str, help='limit output to the specified experiments')
     parser.add_argument('output_file', help='file to write JSON output to')
     return parser.parse_args()

diff --git a/data_request_api/stable/content/dreq_api/consolidate_export.py b/data_request_api/stable/content/dreq_api/consolidate_export.py
@@ -4,7 +4,7 @@
 import warnings
 
 from data_request_api.stable.utilities.logger import get_logger  # noqa
-from data_request_api.stable.content.dreq_api.mapping_table import version_consistency
+from .mapping_table import version_consistency
 
 # UID generation
 default_count = 0

diff --git a/data_request_api/stable/content/dump_transformation.py b/data_request_api/stable/content/dump_transformation.py
@@ -14,15 +14,15 @@
 import re
 from collections import defaultdict
 
-import six
 
 from data_request_api.stable.utilities.logger import get_logger, change_log_level, change_log_file
 from data_request_api.stable.utilities.tools import read_json_input_file_content, write_json_output_file_content
+from .dreq_api import dreq_content as dc
 
 
 def correct_key_string(input_string, *to_remove_strings):
     logger = get_logger()
-    if isinstance(input_string, six.string_types):
+    if isinstance(input_string, str):
         input_string = input_string.lower()
         for to_remove_string in to_remove_strings:
             input_string = input_string.replace(to_remove_string.lower(), "")
@@ -366,7 +366,7 @@ def transform_content_one_base(content):
                         else:
                             logger.error(f"Could not reshape key {key} from id {uid} of element type {subelt}: contains several elements")
                             raise ValueError(f"Could not reshape key {key} from id {uid} of element type {subelt}: contains several elements")
-                    elif isinstance(content[subelt][uid][key], six.string_types):
+                    elif isinstance(content[subelt][uid][key], str):
                         logger.warning(f"Could not reshape key {key} from id {uid} of element type {subelt}: already a string")
                     else:
                         logger.error(f"Could not reshape key {key} from id {uid} of element type {subelt}: not a list")
@@ -440,6 +440,41 @@ def transform_content(content, version):
         raise TypeError(f"Deal with dict types, not {type(content).__name__}")
 
 
+def get_transformed_content(version="latest_stable", export_version="release", use_consolidation=False,
+                            force_retrieve=False, output_dir=None,
+                            default_transformed_content_pattern="{kind}_{export_version}_content.json"):
+    # Download specified version of data request content (if not locally cached)
+    versions = dc.retrieve(version, export=export_version, consolidate=use_consolidation)
+
+    # Check that there is only one version associated
+    if len(versions) > 1:
+        raise ValueError("Could only deal with one version.")
+    elif len(versions) == 0:
+        raise ValueError("No version found.")
+    else:
+        version = list(versions)[0]
+        content = versions[version]
+        if output_dir is None:
+            output_dir = os.path.dirname(content)
+        if not os.path.exists(output_dir):
+            os.makedirs(output_dir)
+        DR_content = default_transformed_content_pattern.format(kind="DR", export_version=export_version)
+        VS_content = default_transformed_content_pattern.format(kind="VS", export_version=export_version)
+        DR_content = os.sep.join([output_dir, DR_content])
+        VS_content = os.sep.join([output_dir, VS_content])
+        if force_retrieve or not(all(os.path.exists(filepath) for filepath in [DR_content, VS_content])):
+            if os.path.exists(DR_content):
+                os.remove(DR_content)
+            if os.path.exists(VS_content):
+                os.remove(VS_content)
+        if not(all(os.path.exists(filepath) for filepath in [DR_content, VS_content])):
+            content = dc.load(version, export=export_version, consolidate=use_consolidation)
+            data_request, vocabulary_server = transform_content(content, version)
+            write_json_output_file_content(DR_content, data_request)
+            write_json_output_file_content(VS_content, vocabulary_server)
+        return DR_content, VS_content
+
+
 if __name__ == "__main__":
     change_log_file(default=True)
     change_log_level("debug")

diff --git a/data_request_api/stable/query/data_request.py b/data_request_api/stable/query/data_request.py
@@ -12,14 +12,12 @@
 import os
 from collections import defaultdict
 
-import six
-
 from data_request_api.stable.utilities.logger import get_logger, change_log_file, change_log_level
 from data_request_api.stable.content.dump_transformation import transform_content
 from data_request_api.stable.utilities.tools import read_json_file
 from data_request_api.stable.query.vocabulary_server import VocabularyServer, is_link_id_or_value, build_link_from_id
 
-version = "0.1"
+version = "1.0.1"
 
 
 class ConstantValueObj(object):
@@ -70,13 +68,13 @@ def transform_content(input_dict, dr, force_transform=False):
 		for (key, values) in input_dict.items():
 			if isinstance(values, list):
 				for (i, value) in enumerate(values):
-					if isinstance(value, six.string_types) and (force_transform or is_link_id_or_value(value)[0]):
+					if isinstance(value, str) and (force_transform or is_link_id_or_value(value)[0]):
 						input_dict[key][i] = dr.find_element(key, value)
-					elif isinstance(value, six.string_types):
+					elif isinstance(value, str):
 						input_dict[key][i] = ConstantValueObj(value)
-			elif isinstance(values, six.string_types) and (force_transform or is_link_id_or_value(values)[0]):
+			elif isinstance(values, str) and (force_transform or is_link_id_or_value(values)[0]):
 				input_dict[key] = dr.find_element(key, values)
-			elif isinstance(values, six.string_types):
+			elif isinstance(values, str):
 				input_dict[key] = ConstantValueObj(values)
 		return input_dict
 
@@ -404,14 +402,14 @@ def from_input(cls, json_input, version, **kwargs):
 	@classmethod
 	def from_separated_inputs(cls, DR_input, VS_input, **kwargs):
 		logger = get_logger()
-		if isinstance(DR_input, six.string_types) and os.path.isfile(DR_input):
+		if isinstance(DR_input, str) and os.path.isfile(DR_input):
 			DR = read_json_file(DR_input)
 		elif isinstance(DR_input, dict):
 			DR = copy.deepcopy(DR_input)
 		else:
 			logger.error("DR_input should be either the name of a json file or a dictionary.")
 			raise TypeError("DR_input should be either the name of a json file or a dictionary.")
-		if isinstance(VS_input, six.string_types) and os.path.isfile(VS_input):
+		if isinstance(VS_input, str) and os.path.isfile(VS_input):
 			VS = VocabularyServer.from_input(VS_input)
 		elif isinstance(VS_input, dict):
 			VS = VocabularyServer(copy.deepcopy(VS_input))
@@ -423,10 +421,10 @@ def from_separated_inputs(cls, DR_input, VS_input, **kwargs):
 	@staticmethod
 	def _split_content_from_input_json(input_json, version):
 		logger = get_logger()
-		if not isinstance(version, six.string_types):
+		if not isinstance(version, str):
 			logger.error(f"Version should be a string, not {type(version).__name__}.")
 			raise TypeError(f"Version should be a string, not {type(version).__name__}.")
-		if isinstance(input_json, six.string_types) and os.path.isfile(input_json):
+		if isinstance(input_json, str) and os.path.isfile(input_json):
 			content = read_json_file(input_json)
 		elif isinstance(input_json, dict):
 			content = input_json
@@ -639,7 +637,7 @@ def filter_elements_per_request(self, element_type, requests=dict(), operation="
 				if not isinstance(values, list):
 					values = [values, ]
 				for val in values:
-					if isinstance(val, six.string_types):
+					if isinstance(val, str):
 						new_val = self.find_element(element_type=req, value=val, default=None)
 					else:
 						new_val = val
@@ -736,7 +734,7 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
 	                   sorting_column="id", title_column="name", filtering_requests=dict(), filtering_operation="all",
 	                   filtering_skip_if_missing=False):
 		logger = get_logger()
-		logger.info(f"Generate summary for {lines_data}/{columns_data}")
+		logger.debug(f"Generate summary for {lines_data}/{columns_data}")
 		filtered_data = self.filter_elements_per_request(element_type=lines_data, requests=filtering_requests,
 		                                                 operation=filtering_operation,
 		                                                 skip_if_missing=filtering_skip_if_missing)
@@ -750,7 +748,7 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
 		logger.debug(f"{nb_lines} elements found for {lines_data}")
 		logger.debug(f"{len(columns_title)} found elements for {columns_data}")
 
-		logger.info("Generate summary")
+		logger.debug("Generate summary")
 		content = defaultdict(list)
 		for (i, data) in enumerate(columns_datasets):
 			logger.debug(f"Deal with column {i}/{len(columns_title)}")
@@ -764,14 +762,14 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
 				else:
 					content[line_data_title].append("")
 
-		logger.info("Format summary")
+		logger.debug("Format summary")
 		rep = list()
 		rep.append(";".join([table_title, ] + columns_title))
 		for line_data in filtered_data:
 			line_data_title = str(line_data.__getattr__(title_line))
 			rep.append(";".join([line_data_title, ] + content[line_data_title]))
 
-		logger.info("Write summary")
+		logger.debug("Write summary")
 		with open(output_file, "w") as f:
 			f.write(os.linesep.join(rep))
 

diff --git a/data_request_api/stable/query/vocabulary_server.py b/data_request_api/stable/query/vocabulary_server.py
@@ -10,21 +10,19 @@
 import copy
 from collections import defaultdict
 
-import six
-
 from data_request_api.stable.utilities.logger import get_logger
 from data_request_api.stable.utilities.tools import read_json_file
 
 
 def is_link_id_or_value(elt):
-	if isinstance(elt, six.string_types) and elt.startswith("link::"):
+	if isinstance(elt, str) and elt.startswith("link::"):
 		return True, elt.replace("link::", "")
 	else:
 		return False, elt
 
 
 def build_link_from_id(elt):
-	if not isinstance(elt, six.string_types) or elt.startswith("link::"):
+	if not isinstance(elt, str) or elt.startswith("link::"):
 		return elt
 	else:
 		return f"link::{elt}"
@@ -125,7 +123,7 @@ def get_element(self, element_type, element_id, element_key=None, default=False,
 			if id_type in ["id", ] and element_id in element_type_ids:
 				value = self.vocabulary_server[element_type][element_id]
 				found = True
-			elif isinstance(id_type, six.string_types):
+			elif isinstance(id_type, str):
 				if element_id is None:
 					raise ValueError("None element_id found")
 				value = list()

diff --git a/data_request_api/stable/utilities/logger.py b/data_request_api/stable/utilities/logger.py
@@ -8,7 +8,6 @@
 
 import logging
 import os
-import six
 import sys
 
 
@@ -47,7 +46,7 @@ def get_logger():
 
 
 def log_level_to_int(level):
-    if isinstance(level, six.string_types):
+    if isinstance(level, str):
         if level.lower() in ['debug', ]:
             return logging.DEBUG
         elif level.lower() in ['critical', ]:

diff --git a/data_request_api/stable/utilities/tools.py b/data_request_api/stable/utilities/tools.py
@@ -29,6 +29,12 @@ def read_json_input_file_content(filename):
 
 
 def write_json_output_file_content(filename, content, **kwargs):
+    logger = get_logger()
+    logger.debug(f"Writing file {filename}.")
+    dirname = os.path.dirname(filename)
+    if not os.path.isdir(dirname):
+        logger.warning(f"Create directory {dirname}")
+        os.makedirs(dirname)
     with open(filename, "w") as fic:
         defaults = dict(indent=4, allow_nan=True, sort_keys=True)
         defaults.update(kwargs)

diff --git a/env.yml b/env.yml
@@ -9,3 +9,4 @@ dependencies:
   - requests
   - bs4
   - coverage
+  - pytest
diff --git a/launch_test_with_coverage.sh b/launch_test_with_coverage.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+# -*- coding: utf-8 -*-
+
+coverage erase
+
+coverage run
+# To be moved before once tests are fixed
+set -e
+coverage run --parallel-mode scripts/database_transformation.py --output_dir="test" --dreq_export_version="raw"
+coverage run --parallel-mode scripts/database_transformation.py --output_dir="test" --dreq_export_version="release"
+coverage run --parallel-mode scripts/workflow_example.py
+rm -f "requested_v1.0.json" "requested_raw.json"
+coverage run --parallel-mode scripts/workflow_example_2.py --output_dir="test" --dreq_export_version="raw"
+coverage run --parallel-mode scripts/workflow_example_2.py --output_dir="test" --dreq_export_version="release"
+
+coverage combine
+
+coverage html
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,3 +31,33 @@ Issues = "https://github.com/CMIP-Data-Request/CMIP7_DReq_Software/issues"
 # modules and functions that act as command line utilities to be specified here
 # use pip install -e . from the root directory of the repository to set up the entry points
 export_dreq_lists_json = "data_request_api.command_line.export_dreq_lists_json:main"
+
+[tool.pytest.ini_options]
+cache_dir = "tests/.pytest_cache"
+
+[tool.coverage.run]
+branch = true
+parallel = true
+command_line = "-m pytest"
+source = [
+    "data_request_api/stable"
+    ]
+data_file = "tests/.coverage"
+
+[tool.coverage.report]
+fail_under = 50
+exclude_lines = [
+    'if __name__ == "__main__":'
+    ]
+
+[tool.coverage.html]
+directory = "tests/htmlcov"
+
+[tool.coverage.xml]
+output = "tests/coverage.xml"
+
+[tool.coverage.json]
+output = "tests/coverage.json"
+
+[tool.coverage.lcov]
+output = "tests.coverage.lcov"
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,3 +9,4 @@ dependencies: @@
       - requests
       - bs4
       - coverage
+      - pytest