Skip to content

Commit

Permalink
Fix linter and dependency issues (DOI-DO#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
carlosfelix2 committed Dec 5, 2024
1 parent 7830484 commit 1f1a9c9
Show file tree
Hide file tree
Showing 7 changed files with 619 additions and 730 deletions.
3 changes: 3 additions & 0 deletions data/data-pipeline/data_pipeline/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
help=dataset_cli_help,
)


@click.group()
def cli():
"""Defines a click group for the commands below"""
Expand Down Expand Up @@ -415,6 +416,7 @@ def clear_data_source_cache(dataset: str):

log_goodbye()


@cli.command(
help="Generate scoring and tiles",
)
Expand All @@ -441,6 +443,7 @@ def full_run(ctx, use_cache):
ctx.invoke(etl_run, dataset=None, use_cache=use_cache)
ctx.invoke(full_post_etl)


def log_title(title: str, subtitle: str = None):
"""Logs a title in our fancy title format"""
logger.info("-" * LOG_LINE_WIDTH)
Expand Down
2 changes: 2 additions & 0 deletions data/data-pipeline/data_pipeline/etl/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

logger = get_module_logger(__name__)


class Downloader:
"""A simple class to encapsulate the download capabilities of the application"""

Expand Down Expand Up @@ -44,6 +45,7 @@ def download_file_from_url(
file_contents = response.content
logger.debug("Downloaded.")
else:
# pylint: disable-next=broad-exception-raised
raise Exception(
f"HTTP response {response.status_code} from url {file_url}. Info: {response.content}"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def _choose_best_mask(
for mask in masks_in_priority_order:
if any(geo_df[mask][column_to_impute].notna()):
return mask
# pylint: disable-next=broad-exception-raised
raise Exception("No mask found")


Expand Down
21 changes: 13 additions & 8 deletions data/data-pipeline/data_pipeline/score/score_narwhal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1014,12 +1014,12 @@ def _mark_tribal_dacs(self) -> None:
)

def _mark_territory_dacs(self) -> None:
"""Territory tracts that are flagged as low income are Score N communities.
"""
"""Territory tracts that are flagged as low income are Score N communities."""
self.df[field_names.SCORE_N_COMMUNITIES] = np.where(
self.df[field_names.GEOID_TRACT_FIELD]
.str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)) &
self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
)
& self.df[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED],
True,
self.df[field_names.SCORE_N_COMMUNITIES],
)
Expand Down Expand Up @@ -1049,9 +1049,14 @@ def _mark_poverty_flag(self) -> None:
combined_column_name=field_names.COMBINED_POVERTY_LESS_THAN_200_FPL_FIELD_2010,
threshold_cutoff_for_island_areas=self.LOW_INCOME_THRESHOLD,
)
self.df.loc[self.df[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)),
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED] = (
self.df[island_areas_poverty_200_criteria_field_name] >= self.LOW_INCOME_THRESHOLD
self.df.loc[
self.df[field_names.GEOID_TRACT_FIELD].str.startswith(
tuple(constants.TILES_ISLAND_AREA_FIPS_CODES)
),
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED,
] = (
self.df[island_areas_poverty_200_criteria_field_name]
>= self.LOW_INCOME_THRESHOLD
)

def _get_percent_of_tract_that_is_dac(self) -> float:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pandas as pd
import pytest
from data_pipeline.config import settings
from data_pipeline.etl.score import constants
from data_pipeline.etl.score.etl_score import ScoreETL
from data_pipeline.score import field_names
from data_pipeline.score.score_narwhal import ScoreNarwhal
Expand All @@ -13,9 +12,11 @@

TEST_DATA_FOLDER = settings.APP_ROOT / "tests" / "score" / "test_utils" / "data"


@pytest.fixture
def toy_score_df(scope="module"):
return pd.read_csv(TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
return pd.read_csv(
TEST_DATA_FOLDER / "test_drop_tracts_from_percentile.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)

Expand Down Expand Up @@ -84,41 +85,46 @@ def test_drop_all_tracts(toy_score_df):


def test_mark_territory_dacs():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_territory_dacs.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
assert not test_data[field_names.SCORE_N_COMMUNITIES].all()

scorer = ScoreNarwhal(test_data)
scorer._mark_territory_dacs()
territory_filter = test_data[field_names.GEOID_TRACT_FIELD].str.startswith(tuple(constants.TILES_ISLAND_AREA_FIPS_CODES))
# Check territories are set to true
expected_new_dacs_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['60050951100', '66010951100', '69110001101', '78010990000'])
expected_new_dacs_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
["60050951100", "66010951100", "69110001101", "78010990000"]
)
assert test_data.loc[expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
assert test_data.loc[
expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()
# Non-territories are still false
assert not test_data.loc[~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES].all()
assert not test_data.loc[
~expected_new_dacs_filter, field_names.SCORE_N_COMMUNITIES
].all()


def test_mark_poverty_flag():
test_data = pd.read_csv(TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
test_data = pd.read_csv(
TEST_DATA_FOLDER / "test_mark_poverty_flag.csv",
dtype={field_names.GEOID_TRACT_FIELD: str},
)
# Sanity check on the input data
assert not test_data[field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()

scorer = ScoreNarwhal(test_data)
scorer._mark_poverty_flag()
expected_low_income_filter = (
test_data[field_names.GEOID_TRACT_FIELD].isin(['36087011302', '66010951100', '78010990000'])
expected_low_income_filter = test_data[field_names.GEOID_TRACT_FIELD].isin(
["36087011302", "66010951100", "78010990000"]
)
# Three tracts are set to true
assert (
test_data[expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)
assert test_data[expected_low_income_filter][
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
].all()
# Everything else is false
assert (
not test_data[~expected_low_income_filter][field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED].all()
)
assert not test_data[~expected_low_income_filter][
field_names.FPL_200_SERIES_IMPUTED_AND_ADJUSTED
].all()
Loading

0 comments on commit 1f1a9c9

Please sign in to comment.