Skip to content

Commit

Permalink
refactor: use pydantic for configuration handling #256
Browse files Browse the repository at this point in the history
  • Loading branch information
Viktor Bozhinov committed Nov 4, 2021
1 parent d7f7bef commit 0cca696
Show file tree
Hide file tree
Showing 10 changed files with 150 additions and 137 deletions.
211 changes: 110 additions & 101 deletions datagateway_api/common/config.py
Original file line number Diff line number Diff line change
@@ -1,102 +1,126 @@
from enum import Enum
import json
import logging
from pathlib import Path
import sys
from typing import Optional

import requests
from pydantic import (
BaseModel,
StrictBool,
StrictInt,
StrictStr,
ValidationError,
validator,
)


log = logging.getLogger()


class APIConfigOptions(Enum):
class TestUserCredentials(BaseModel):
username: StrictStr
password: StrictStr


class APIConfig(BaseModel):
"""
Class to map config keys to variables in Python - implemented for ease of
development (IntelliSense in IDEs)
Configuration model class that implements pydantic's BaseModel class to allow for
validation of the API config data using Python type annotations. It ensures that
all config options exist before getting too far into the setup of the API. It takes
the backend into account, meaning only the config options for the backend used is
required.
If a mandatory config option is missing or misspelled, or has a wrong value type,
Pydantic raises a validation error with a breakdown of what was wrong and the
application is exited.
Config options used for testing are not checked here as they should only be used
during tests, not in the typical running of the API.
Some options used when running the API (host, debug_mode etc.) aren't mandatory
when running the API in production (these options aren't used in the `wsgi.py`
entrypoint). As a result, they're not present in `config_keys`. However, they
are required when using `main.py` as an entrypoint. In any case of these
specific missing config options when using that entrypoint, they are checked at
API startup so any missing options will be caught quickly.
"""

BACKEND = "backend"
CLIENT_CACHE_SIZE = "client_cache_size"
CLIENT_POOL_INIT_SIZE = "client_pool_init_size"
CLIENT_POOL_MAX_SIZE = "client_pool_max_size"
DB_URL = "db_url"
DEBUG_MODE = "debug_mode"
FLASK_RELOADER = "flask_reloader"
GENERATE_SWAGGER = "generate_swagger"
HOST = "host"
ICAT_CHECK_CERT = "icat_check_cert"
ICAT_URL = "icat_url"
LOG_LEVEL = "log_level"
LOG_LOCATION = "log_location"
PORT = "port"
TEST_MECHANISM = "test_mechanism"
TEST_USER_CREDENTIALS = "test_user_credentials"


class Config(object):
def __init__(self, path=Path(__file__).parent.parent / "config.json"):
self.path = path
with open(self.path) as target:
self._config = json.load(target)

self._check_config_items_exist()

def _check_config_items_exist(self):
"""
A function to check that all config options exist before getting too far into
the setup of the API. This check takes the backend into account, meaning only
the config options for the backend used is required.
If a config option is missing, this will be picked up in `get_config_value()` by
exiting the application
Config options used for testing are not checked here as they should only be used
during tests, not in the typical running of the API.
Some options used when running the API (host, debug_mode etc.) aren't mandatory
when running the API in production (these options aren't used in the `wsgi.py`
entrypoint). As a result, they're not present in `config_keys`. However, they
are required when using `main.py` as an entrypoint. In any case of these
specific missing config options when using that entrypoint, they are checked at
API startup so any missing options will be caught quickly.
backend: StrictStr
client_cache_size: Optional[StrictInt]
client_pool_init_size: Optional[StrictInt]
client_pool_max_size: Optional[StrictInt]
db_url: Optional[StrictStr]
debug_mode: StrictBool
flask_reloader: StrictBool
generate_swagger: StrictBool
host: StrictStr
icat_check_cert: Optional[StrictBool]
icat_url: Optional[StrictStr]
log_level: StrictStr
log_location: StrictStr
port: StrictStr
test_mechanism: StrictStr
test_user_credentials: TestUserCredentials

@classmethod
def load(cls, path=Path(__file__).parent.parent / "config.json"):
"""
# These keys are non-backend specific and therefore are mandatory for all uses
config_keys = [
APIConfigOptions.BACKEND,
APIConfigOptions.GENERATE_SWAGGER,
APIConfigOptions.LOG_LEVEL,
APIConfigOptions.LOG_LOCATION,
]

if self.get_config_value(APIConfigOptions.BACKEND) == "python_icat":
icat_backend_specific_config_keys = [
APIConfigOptions.CLIENT_CACHE_SIZE,
APIConfigOptions.CLIENT_POOL_INIT_SIZE,
APIConfigOptions.CLIENT_POOL_MAX_SIZE,
APIConfigOptions.ICAT_CHECK_CERT,
APIConfigOptions.ICAT_URL,
]
config_keys.extend(icat_backend_specific_config_keys)
elif self.get_config_value(APIConfigOptions.BACKEND) == "db":
db_backend_specific_config_keys = [APIConfigOptions.DB_URL]
config_keys.extend(db_backend_specific_config_keys)

for key in config_keys:
self.get_config_value(key)

def get_config_value(self, config_key):
"""
Given a config key, the corresponding config value is returned
Loads the config data from the JSON file and returns it as a APIConfig pydantic
model. Exits the application if it fails to locate the JSON config file or
the APIConfig model validation fails.
:param config_key: Enum of a configuration key that's in `config.json`
:type config_key: :class:`APIConfigOptions`
:return: Config value of the given key
:param path: path to the configuration file
:return: APIConfig model object that contains the config data
"""
try:
return self._config[config_key.value]
except KeyError:
sys.exit(f"Missing config value: {config_key.value}")
with open(path, encoding="utf-8") as target:
data = json.load(target)
return cls(**data)
except (IOError, ValidationError) as error:
sys.exit(f"An error occurred while trying to load the config data: {error}")

@validator("db_url", always=True)
def require_db_config_value(cls, value, values): # noqa: B902, N805
"""
By default the `db_url` config field is optional so that it does not have to be
present in the config file if `backend` is set to `python_icat`. However, if the
`backend` is set to `db`, this validator esentially makes the `db_url` config
field mandatory. This means that if the an error is raised, at which point the
application exits, if a `db_url` config value is not present in the config file.
:param cls: :class:`APIConfig` pointer
:param value: The value of the given config field
:param values: The config field values loaded before the given config field
"""
if "backend" in values and values["backend"] == "db" and value is None:
raise TypeError("field required")
return value

@validator(
"client_cache_size",
"client_pool_init_size",
"client_pool_max_size",
"icat_check_cert",
"icat_url",
always=True,
)
def require_icat_config_value(cls, value, values): # noqa: B902, N805
"""
By default the above config fields that are passed to the `@validator` decorator
are optional so that they not have to be present in the config file if `backend`
is set to `db`. However, if the `backend` is set to `python_icat`, this
validator esentially makes these config fields mandatory. This means that an
error is raised, at which point the application exits, if any of these config
values are not present in the config file.
:param cls: :class:`APIConfig` pointer
:param value: The value of the given config field
:param values: The config field values loaded before the given config field
"""

if "backend" in values and values["backend"] == "python_icat" and value is None:
raise TypeError("field required")
return value

def set_backend_type(self, backend_type):
"""
Expand All @@ -109,25 +133,10 @@ def set_backend_type(self, backend_type):
type must be fetched. This must be done using this module (rather than directly
importing and checking the Flask app's config) to avoid circular import issues.
"""
self._config["backend"] = backend_type

def get_icat_properties(self):
"""
ICAT properties can be retrieved using Python ICAT's client object, however this
requires the client object to be authenticated which may not always be the case
when requesting these properties, hence a HTTP request is sent as an alternative
"""
properties_url = (
f"{config.get_config_value(APIConfigOptions.ICAT_URL)}/icat/properties"
)
r = requests.request(
"GET",
properties_url,
verify=config.get_config_value(APIConfigOptions.ICAT_CHECK_CERT),
)
icat_properties = r.json()
self.backend = backend_type

return icat_properties
class Config:
validate_assignment = True


config = Config()
config = APIConfig.load()
3 changes: 2 additions & 1 deletion datagateway_api/common/datagateway_api/icat/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
SkipFilter,
WhereFilter,
)
from datagateway_api.common.helpers import get_icat_properties


log = logging.getLogger()
Expand Down Expand Up @@ -213,7 +214,7 @@ def __init__(self, skip_value):
super().__init__(skip_value)

def apply_filter(self, query):
icat_properties = config.get_icat_properties()
icat_properties = get_icat_properties(config.icat_url, config.icat_check_cert)
icat_set_limit(query, self.skip_value, icat_properties["maxEntities"])


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from icat.client import Client
from object_pool import ObjectPool

from datagateway_api.common.config import APIConfigOptions, config
from datagateway_api.common.config import config

log = logging.getLogger()

Expand All @@ -13,8 +13,7 @@ class ICATClient(Client):

def __init__(self):
super().__init__(
config.get_config_value(APIConfigOptions.ICAT_URL),
checkCert=config.get_config_value(APIConfigOptions.ICAT_CHECK_CERT),
config.icat_url, checkCert=config.icat_check_cert,
)
# When clients are cleaned up, sessions won't be logged out
self.autoLogout = False
Expand All @@ -36,8 +35,8 @@ def create_client_pool():

return ObjectPool(
ICATClient,
min_init=config.get_config_value(APIConfigOptions.CLIENT_POOL_INIT_SIZE),
max_capacity=config.get_config_value(APIConfigOptions.CLIENT_POOL_MAX_SIZE),
min_init=config.client_pool_init_size,
max_capacity=config.client_pool_max_size,
max_reusable=0,
expires=0,
)
6 changes: 2 additions & 4 deletions datagateway_api/common/datagateway_api/icat/lru_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from cachetools.lru import LRUCache

from datagateway_api.common.config import APIConfigOptions, config
from datagateway_api.common.config import config

log = logging.getLogger()

Expand All @@ -19,9 +19,7 @@ class ExtendedLRUCache(LRUCache):
"""

def __init__(self):
super().__init__(
maxsize=config.get_config_value(APIConfigOptions.CLIENT_CACHE_SIZE),
)
super().__init__(maxsize=config.client_cache_size)

def popitem(self):
key, client = super().popitem()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

from datagateway_api.common.config import APIConfigOptions, config
from datagateway_api.common.config import config
from datagateway_api.common.exceptions import (
ApiError,
FilterError,
Expand All @@ -27,7 +27,7 @@ def get_query_filter(request_filter):
:raises FilterError: If the filter name is not recognised
"""

backend_type = config.get_config_value(APIConfigOptions.BACKEND)
backend_type = config.backend
if backend_type == "db":
from datagateway_api.common.datagateway_api.database.filters import (
DatabaseDistinctFieldFilter as DistinctFieldFilter,
Expand Down
14 changes: 14 additions & 0 deletions datagateway_api/common/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from dateutil.tz.tz import tzlocal
from flask import request
from flask_restful import reqparse
import requests
from sqlalchemy.exc import IntegrityError

from datagateway_api.common.datagateway_api.database import models
Expand Down Expand Up @@ -135,6 +136,19 @@ def get_entity_object_from_name(entity_name):
)


def get_icat_properties(icat_url, icat_check_cert):
"""
ICAT properties can be retrieved using Python ICAT's client object, however this
requires the client object to be authenticated which may not always be the case
when requesting these properties, hence a HTTP request is sent as an alternative
"""
properties_url = f"{icat_url}/icat/properties"
r = requests.request("GET", properties_url, verify=icat_check_cert)
icat_properties = r.json()

return icat_properties


def map_distinct_attributes_to_results(distinct_attributes, query_result):
"""
Maps the attribute names from a distinct filter onto the results given by the result
Expand Down
11 changes: 4 additions & 7 deletions datagateway_api/common/logger_setup.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import logging.config
from pathlib import Path

from datagateway_api.common.config import APIConfigOptions, config
from datagateway_api.common.config import config

LOG_FILE_NAME = Path(config.get_config_value(APIConfigOptions.LOG_LOCATION))
LOG_FILE_NAME = Path(config.log_location)
logger_config = {
"version": 1,
"formatters": {
Expand All @@ -14,18 +14,15 @@
},
"handlers": {
"default": {
"level": config.get_config_value(APIConfigOptions.LOG_LEVEL),
"level": config.log_level,
"formatter": "default",
"class": "logging.handlers.RotatingFileHandler",
"filename": LOG_FILE_NAME,
"maxBytes": 5000000,
"backupCount": 10,
},
},
"root": {
"level": config.get_config_value(APIConfigOptions.LOG_LEVEL),
"handlers": ["default"],
},
"root": {"level": config.log_level, "handlers": ["default"]},
}


Expand Down
Loading

0 comments on commit 0cca696

Please sign in to comment.