Skip to content

Commit

Permalink
Merge pull request #211 from MITLibraries/TIMX-404-establish-feature-…
Browse files Browse the repository at this point in the history
…flagging-pathways

TIMX 404 - Establish feature flag branching for parquet work
  • Loading branch information
ghukill authored Nov 22, 2024
2 parents 68bef31 + d398153 commit d3a6018
Show file tree
Hide file tree
Showing 10 changed files with 745 additions and 329 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ help: ## Print this message
### Dependency commands ###
install: ## Install script and dependencies
pipenv install --dev
pipenv run pre-commit install

update: install ## Update all Python dependencies
pipenv clean
Expand Down
8 changes: 5 additions & 3 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,21 @@ beautifulsoup4 = "*"
click = "*"
jsonlines = "*"
lxml = "*"
lxml-stubs = "*"
pyarrow = "*"
python-dateutil = "*"
sentry-sdk = "*"
smart-open = {version = "*", extras = ["s3"]}
types-python-dateutil = "*"
lxml-stubs = "*"

[dev-packages]
black = "*"
coveralls = "*"
mypy = "*"
pytest = "*"
ipython = "*"
mypy = "*"
pre-commit = "*"
pyarrow-stubs = "*"
pytest = "*"
ruff = "*"
safety = "*"

Expand Down
712 changes: 399 additions & 313 deletions Pipfile.lock

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ STATUS_UPDATE_INTERVAL=### The transform process logs the # of records transform
WORKSPACE=### Set to `dev` for local development, this will be set to `stage` and `prod` in those environments by Terraform.
```

### Optional

```shell
ETL_VERSION=### Version number of the TIMDEX ETL infrastructure. This can be used to align application behavior with the requirements of other applications in the TIMDEX ETL pipeline.
```

## CLI commands

### `transform`
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ select = ["ALL", "PT"]

ignore = [
# default
"ANN101",
"ANN102",
"COM812",
"COM812",
"D107",
"N812",
"PTH",
Expand All @@ -44,7 +42,7 @@ ignore = [
"PLR0913",
"PLR0915",
"S320",
"S321",
"S321",
]

# allow autofix behavior for specified rules
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
@pytest.fixture(autouse=True)
def _test_env(monkeypatch):
monkeypatch.setenv("WORKSPACE", "test")
# NOTE: FEATURE FLAG: remove after v2 work is complete
monkeypatch.setenv("ETL_VERSION", "1")


@pytest.fixture(autouse=True, scope="session")
Expand Down
92 changes: 92 additions & 0 deletions tests/test_temporary_feature_flagging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# ruff: noqa: PLR2004

from unittest.mock import patch

import pytest

from transmogrifier.cli import main
from transmogrifier.config import get_etl_version

# NOTE: FEATURE FLAG: this test file can be removed completely after v2 parquet work.


@pytest.fixture
def mocked_v1_write_method():
with patch(
"transmogrifier.cli.Transformer.transform_and_write_output_files"
) as mocked_method:
yield mocked_method


@pytest.fixture
def mocked_v2_write_method():
with patch(
"transmogrifier.cli.Transformer.write_to_parquet_dataset"
) as mocked_method:
yield mocked_method


def test_etl_version_helper_function_no_env_var_is_v1(monkeypatch):
monkeypatch.delenv("ETL_VERSION")
assert get_etl_version() == 1


def test_etl_version_helper_function_env_var_is_1_is_v1(monkeypatch):
monkeypatch.setenv("ETL_VERSION", "1")
assert get_etl_version() == 1


def test_etl_version_helper_function_env_var_is_2_is_v2(monkeypatch):
monkeypatch.setenv("ETL_VERSION", "2")
assert get_etl_version() == 2


@pytest.mark.parametrize(
"env_value",
[
"pumpkin_pie", # throws ValueError because not integer
"3", # throws ValueError because not 1 or 2
],
)
def test_etl_version_helper_function_env_var_value_is_unsupported(env_value, monkeypatch):
monkeypatch.setenv("ETL_VERSION", env_value)
with pytest.raises(ValueError): # noqa: PT011
get_etl_version()


def test_cli_etl_version_v1_invokes_v1_code(
mocked_v1_write_method, monkeypatch, runner, tmp_path
):
monkeypatch.setenv("ETL_VERSION", "1")
mocked_v1_write_method.side_effect = Exception("Do not proceed")
runner.invoke(
main,
[
"-i",
"/does/not/exist/alma-2023-01-13-full-extracted-records-to-index_01.xml",
"-o",
"/does/not/exist/libguides.json",
"-s",
"libguides",
],
)
mocked_v1_write_method.assert_called()


def test_cli_etl_version_v2_invokes_v2_code(
mocked_v2_write_method, monkeypatch, runner, tmp_path
):
monkeypatch.setenv("ETL_VERSION", "2")
mocked_v2_write_method.side_effect = Exception("Do not proceed")
runner.invoke(
main,
[
"-i",
"/does/not/exist/alma-2023-01-13-full-extracted-records-to-index_01.xml",
"-o",
"/does/not/exist/dataset",
"-s",
"libguides",
],
)
mocked_v2_write_method.assert_called()
18 changes: 16 additions & 2 deletions transmogrifier/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@

import click

from transmogrifier.config import SOURCES, configure_logger, configure_sentry
from transmogrifier.config import (
SOURCES,
configure_logger,
configure_sentry,
get_etl_version,
)
from transmogrifier.sources.transformer import Transformer

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -55,7 +60,15 @@ def main(
logger.info("Running transform for source %s", source)

transformer = Transformer.load(source, input_file, run_id=run_id)
transformer.transform_and_write_output_files(output_file)

# NOTE: FEATURE FLAG: branching logic will be removed after v2 work is complete
etl_version = get_etl_version()
match etl_version:
case 1:
transformer.transform_and_write_output_files(output_file)
case 2:
transformer.write_to_parquet_dataset(output_file)

logger.info(
(
"Completed transform, total records processed: %d, "
Expand All @@ -68,6 +81,7 @@ def main(
transformer.skipped_record_count,
len(transformer.deleted_records),
)

elapsed_time = perf_counter() - start_time
logger.info(
"Total time to complete transform: %s", str(timedelta(seconds=elapsed_time))
Expand Down
9 changes: 9 additions & 0 deletions transmogrifier/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,12 @@ def load_external_config(
else:
message = f"Unrecognized file_type parameter: {file_type}"
raise ValueError(message)


# NOTE: FEATURE FLAG: function will be removed after v2 work is complete
def get_etl_version() -> Literal[1, 2]:
etl_version = int(os.environ.get("ETL_VERSION", "1"))
if etl_version not in [1, 2]:
message = f"ETL_VERSION '{etl_version}' not supported"
raise ValueError(message)
return etl_version # type: ignore[return-value]
Loading

0 comments on commit d3a6018

Please sign in to comment.