Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Coverage update #42

Merged
merged 7 commits into from
Jan 24, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
[run]
parallel = True
command_line = -m pytest
source =
data_request_api/stable

[report]
fail_under = 50
exclude_lines =
if __name__ == "__main__":
Empty file added data_request_api/__init__.py
Empty file.
11 changes: 4 additions & 7 deletions data_request_api/dev/JA/workflow_example_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,11 @@

stop

import sys
import json
add_paths = ['../sandbox/MS/dreq_api/', '../sandbox/JA', '../sandbox/GR']
for path in add_paths:
if path not in sys.path:
sys.path.append(path)
import dreq_content as dc
import dreq_query as dq


from data_request_api.stable.content.dreq_api import dreq_content as dc
from data_request_api.stable.query import dreq_query as dq

from importlib import reload
reload(dq)
Expand Down
12 changes: 2 additions & 10 deletions data_request_api/stable/content/dreq_api/consolidate_export.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
import json
import os
import re
import sys
import warnings

# TODO: remove after initial "sandbox" dev period
add_paths = [os.path.abspath(os.sep.join([os.path.abspath(os.path.dirname(__file__)), "../.."])), ]
for path in add_paths:
if path not in sys.path:
sys.path.append(path)
from utilities.logger import get_logger # noqa
# from ...transform.logger import get_logger # noqa
from data_request_api.stable.utilities.logger import get_logger # noqa

# from .mapping_table import version_consistency
from mapping_table import version_consistency
from .mapping_table import version_consistency

# UID generation
default_count = 0
Expand Down
16 changes: 4 additions & 12 deletions data_request_api/stable/content/dreq_api/dreq_content.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,13 @@
from filecmp import cmp
from shutil import move

# from . import consolidate_export as ce
import consolidate_export as ce
from . import consolidate_export as ce
import pooch
import requests
from bs4 import BeautifulSoup
# from .mapping_table import mapping_table
from mapping_table import mapping_table

# TODO: remove after initial "sandbox" dev period
add_paths = [os.path.abspath(os.sep.join([os.path.abspath(os.path.dirname(__file__)), "../.."])), ]
for path in add_paths:
if path not in sys.path:
sys.path.append(path)
from utilities.logger import get_logger # noqa
# from ...transform.logger import get_logger # noqa
from .mapping_table import mapping_table

from data_request_api.stable.utilities.logger import get_logger # noqa


# Suppress pooch info output
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os

import dreq_content as dc
from . import dreq_content as dc
import pytest


Expand Down
40 changes: 38 additions & 2 deletions data_request_api/stable/content/dump_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@

import six

from utilities.logger import get_logger, change_log_level, change_log_file
from utilities.tools import read_json_input_file_content, write_json_output_file_content
from data_request_api.stable.utilities.logger import get_logger, change_log_level, change_log_file
from data_request_api.stable.utilities.tools import read_json_input_file_content, write_json_output_file_content
from .dreq_api import dreq_content as dc


def correct_key_string(input_string, *to_remove_strings):
Expand Down Expand Up @@ -438,6 +439,41 @@ def transform_content(content, version):
raise TypeError(f"Deal with dict types, not {type(content).__name__}")


def get_transformed_content(version="latest_stable", export_version="release", use_consolidation=False,
force_retrieve=False, output_dir=None,
default_transformed_content_pattern="{kind}_{export_version}_content.json"):
# Download specified version of data request content (if not locally cached)
versions = dc.retrieve(version, export=export_version, consolidate=use_consolidation)

# Check that there is only one version associated
if len(versions) > 1:
raise ValueError("Could only deal with one version.")
elif len(versions) == 0:
raise ValueError("No version found.")
else:
version = list(versions)[0]
content = versions[version]
if output_dir is None:
output_dir = os.path.dirname(content)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
DR_content = default_transformed_content_pattern.format(kind="DR", export_version=export_version)
VS_content = default_transformed_content_pattern.format(kind="VS", export_version=export_version)
DR_content = os.sep.join([output_dir, DR_content])
VS_content = os.sep.join([output_dir, VS_content])
if force_retrieve or not(all(os.path.exists(filepath) for filepath in [DR_content, VS_content])):
if os.path.exists(DR_content):
os.remove(DR_content)
if os.path.exists(VS_content):
os.remove(VS_content)
if not(all(os.path.exists(filepath) for filepath in [DR_content, VS_content])):
content = dc.load(version, export=export_version, consolidate=use_consolidation)
data_request, vocabulary_server = transform_content(content, version)
write_json_output_file_content(DR_content, data_request)
write_json_output_file_content(VS_content, vocabulary_server)
return DR_content, VS_content


if __name__ == "__main__":
change_log_file(default=True)
change_log_level("debug")
Expand Down
16 changes: 8 additions & 8 deletions data_request_api/stable/query/data_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@

import six

from utilities.logger import get_logger, change_log_file, change_log_level
from content.dump_transformation import transform_content
from utilities.tools import read_json_file
from query.vocabulary_server import VocabularyServer, is_link_id_or_value, build_link_from_id
from data_request_api.stable.utilities.logger import get_logger, change_log_file, change_log_level
from data_request_api.stable.content.dump_transformation import transform_content
from data_request_api.stable.utilities.tools import read_json_file
from data_request_api.stable.query.vocabulary_server import VocabularyServer, is_link_id_or_value, build_link_from_id

version = "0.1"

Expand Down Expand Up @@ -736,7 +736,7 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
sorting_column="id", title_column="name", filtering_requests=dict(), filtering_operation="all",
filtering_skip_if_missing=False):
logger = get_logger()
logger.info(f"Generate summary for {lines_data}/{columns_data}")
logger.debug(f"Generate summary for {lines_data}/{columns_data}")
filtered_data = self.filter_elements_per_request(element_type=lines_data, requests=filtering_requests,
operation=filtering_operation,
skip_if_missing=filtering_skip_if_missing)
Expand All @@ -750,7 +750,7 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
logger.debug(f"{nb_lines} elements found for {lines_data}")
logger.debug(f"{len(columns_title)} found elements for {columns_data}")

logger.info("Generate summary")
logger.debug("Generate summary")
content = defaultdict(list)
for (i, data) in enumerate(columns_datasets):
logger.debug(f"Deal with column {i}/{len(columns_title)}")
Expand All @@ -764,14 +764,14 @@ def export_summary(self, lines_data, columns_data, output_file, sorting_line="id
else:
content[line_data_title].append("")

logger.info("Format summary")
logger.debug("Format summary")
rep = list()
rep.append(";".join([table_title, ] + columns_title))
for line_data in filtered_data:
line_data_title = str(line_data.__getattr__(title_line))
rep.append(";".join([line_data_title, ] + content[line_data_title]))

logger.info("Write summary")
logger.debug("Write summary")
with open(output_file, "w") as f:
f.write(os.linesep.join(rep))

Expand Down
3 changes: 1 addition & 2 deletions data_request_api/stable/query/dreq_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@
# import dreq_classes
# reload(dreq_classes)

# from .dreq_classes import dreq_table, expt_request, UNIQUE_VAR_NAME, PRIORITY_LEVELS
from dreq_classes import dreq_table, expt_request, UNIQUE_VAR_NAME, PRIORITY_LEVELS
from .dreq_classes import dreq_table, expt_request, UNIQUE_VAR_NAME, PRIORITY_LEVELS

DREQ_VERSION = '' # if a tagged version is being used, set this in calling script

Expand Down
13 changes: 4 additions & 9 deletions data_request_api/stable/query/get_variables_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,10 @@
import json
import os
import hashlib
add_paths = []
add_paths.append('../content/dreq_api')
add_paths.append('../transform')
for path in add_paths:
if path not in sys.path:
sys.path.append(path)
import dreq_content as dc
import dreq_query as dq
import dreq_classes

from data_request_api.stable.content.dreq_api import dreq_content as dc
from . import dreq_query as dq
from . import dreq_classes

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

may be better:

from data_request_api.stable.query import dreq_query as dq
from data_request_api.stable.query import dreq_classes

? in case this script moves


from collections import OrderedDict

Expand Down
4 changes: 2 additions & 2 deletions data_request_api/stable/query/vocabulary_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@

import six

from utilities.logger import get_logger
from utilities.tools import read_json_file
from data_request_api.stable.utilities.logger import get_logger
from data_request_api.stable.utilities.tools import read_json_file


def is_link_id_or_value(elt):
Expand Down
8 changes: 7 additions & 1 deletion data_request_api/stable/utilities/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import json
import os

from utilities.logger import get_logger
from .logger import get_logger


def read_json_file(filename):
Expand All @@ -29,6 +29,12 @@ def read_json_input_file_content(filename):


def write_json_output_file_content(filename, content, **kwargs):
logger = get_logger()
logger.debug(f"Writing file {filename}.")
dirname = os.path.dirname(filename)
if not os.path.isdir(dirname):
logger.warning(f"Create directory {dirname}")
os.makedirs(dirname)
with open(filename, "w") as fic:
defaults = dict(indent=4, allow_nan=True, sort_keys=True)
defaults.update(kwargs)
Expand Down
1 change: 1 addition & 0 deletions env.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ dependencies:
- requests
- bs4
- coverage
- pytest
20 changes: 20 additions & 0 deletions launch_test_with_coverage.sh

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ran for me, and the html report htmlcov/index.html is very nice! It took a long time to run, maybe 20 min... should it?

Does the six module need to be added as a dependency?

Can this file and .coveragerc be moved into tests/ just to keep the top-level repo dir uncluttered?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need to add the six package as dependency but I may be wrong as we are in python3. Did you have issues with it when you run the API the first time?

The htmlcov directory can be customed through .coveragerc. I will change the default value to a place in the tests directory in my branch.
I'm not sure that we can move the .coveragerc (it won't be taken into account if I do so).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have checked and I change the locations of the temporary files and .coveragerc too.

Copy link
Author

@rigoudyg rigoudyg Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To be done once #39 will be merged, change the default cache for pytest in the pyproj.toml file :
[pytest]
cache_dir = tests/.pytest_cache

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tests are quite long because some scripts make the splitting from the single and the several databases.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have just tested the launch_test_with_coverage.sh script and it works fine, rolling all the coverage tests (about 20 min as for @JamesAnstey) with a nice html view. Well done @rigoudyg 👍

Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash
# -*- coding: utf-8 -*-

coverage erase

coverage run
# To be moved before once tests are fixed
set -e
coverage run --parallel-mode scripts/database_transformation.py --output_dir="test" --dreq_export_version="raw"
coverage run --parallel-mode scripts/database_transformation.py --output_dir="test" --dreq_export_version="release"
coverage run --parallel-mode scripts/export_dreq_lists_json.py --all_opportunities "v1.0" "result.json"
rm -f result.json
coverage run --parallel-mode scripts/workflow_example.py
rm -f "requested_v1.0.json" "requested_raw.json"
coverage run --parallel-mode scripts/workflow_example_2.py --output_dir="test" --dreq_export_version="raw"
coverage run --parallel-mode scripts/workflow_example_2.py --output_dir="test" --dreq_export_version="release"

coverage combine

coverage html
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,5 @@ openpyxl
pandas
requests
bs4
coverage
coverage
pytest
86 changes: 54 additions & 32 deletions scripts/database_transformation.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,46 +8,68 @@

import os
import sys
import argparse
import tempfile

add_paths = ['../data_request_api/stable/content/dreq_api/',
'../data_request_api/stable']
for path in add_paths:
if path not in sys.path:
sys.path.append(path)

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

import dreq_content as dc
from content.dump_transformation import transform_content
from query.data_request import DataRequest
from utilities.tools import write_json_output_file_content
from utilities.logger import change_log_file, change_log_level

import data_request_api.stable.content.dreq_api.dreq_content as dc
from data_request_api.stable.content.dump_transformation import transform_content, get_transformed_content
from data_request_api.stable.query.data_request import DataRequest
from data_request_api.stable.utilities.tools import write_json_output_file_content
from data_request_api.stable.utilities.logger import change_log_file, change_log_level

# Set up log file (default to stdout) and log level
change_log_file(default=True)
change_log_level("debug")

### Step 1: Get the content of the DR
# Define content version to be used
# use_dreq_version = 'v1.0alpha'
# use_dreq_version = "first_export"
# use_dreq_version = 'new_export_15Oct2024'
use_dreq_version = "v1.0"
use_export_versions = ["raw", "release"]
output_directory = f'{dc._dreq_res}/{use_dreq_version}'
for use_export_version in use_export_versions:

parser = argparse.ArgumentParser()
parser.add_argument("--log_level", default="info", help="Log level")
parser.add_argument("--dreq_version", default="latest_stable", help="Version to be used")
parser.add_argument("--dreq_export_version", default="release", help="Export version to be used")
parser.add_argument("--use_consolidation", default=False, help="Should content consolidation be used?")
parser.add_argument("--output_dir", choices=["default", "test", "customed"], default="default",
help="Output directory management")
parser.add_argument("--output_dir_customed", default=None, help="Customed output directory to be used")
args = parser.parse_args()


def database_transformation(output_dir, dreq_version="latest_stable", dreq_export_version="release",
use_consolidation=False):
# Download specified version of data request content (if not locally cached)
content = dc.load(use_dreq_version, export=use_export_version, consolidate=False)
versions = dc.retrieve(dreq_version, export=dreq_export_version, consolidate=use_consolidation)

### Step 2: Transform content into DR and VS
data_request, vocabulary_server = transform_content(content, version=use_dreq_version)
for (version, content) in versions.items():
# Load the content
content = dc.load(version, export=dreq_export_version, consolidate=use_consolidation)

### Step 3: Write down the two files
DR_file = os.path.sep.join([output_directory, f"DR_{use_export_version}_content.json"])
VS_file = os.path.sep.join([output_directory, f"VS_{use_export_version}_content.json"])
write_json_output_file_content(DR_file, data_request)
write_json_output_file_content(VS_file, vocabulary_server)
# Transform content into DR and VS
data_request, vocabulary_server = transform_content(content, version=dreq_version)

DR = DataRequest.from_separated_inputs(DR_input=DR_file, VS_input=VS_file)
# Write down the two files
DR_file = os.path.sep.join([output_dir, version, f"DR_{dreq_export_version}_content.json"])
VS_file = os.path.sep.join([output_dir, version, f"VS_{dreq_export_version}_content.json"])
write_json_output_file_content(DR_file, data_request)
write_json_output_file_content(VS_file, vocabulary_server)

# Test that the two files do not produce issues with the API
DR = DataRequest.from_separated_inputs(DR_input=DR_file, VS_input=VS_file)


# Set up log file (default to stdout) and log level
change_log_file(default=True)
change_log_level(args.log_level)

if args.output_dir in ["default", "customed"]:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this implement this feature:

  • be able to specify the directory in which the database are downloaded (useful for users which could use an administrator install and could not write in the source directory)

from the PR description?

Copy link
Author

@rigoudyg rigoudyg Jan 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it only deals with the files derived from the export file.

if args.output_dir in ["default", ]:
output_dir = dc._dreq_res
elif args.output_dir_customed is not None:
output_dir = args.output_dir_customed
else:
raise ValueError("If --output_dir='customed', --output_dir_customed should be defined with a not None value.")

database_transformation(output_dir=output_dir, dreq_version=args.dreq_version,
dreq_export_version=args.dreq_export_version, use_consolidation=args.use_consolidation)
elif args.output_dir in ["test", ]:
with tempfile.TemporaryDirectory() as output_dir:
database_transformation(output_dir=output_dir, dreq_version=args.dreq_version,
dreq_export_version=args.dreq_export_version, use_consolidation=args.use_consolidation)
Loading
Loading