Skip to content

Commit

Permalink
Complete unit test coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
troyraen committed Jan 4, 2025
1 parent a0bbd66 commit 4a3445a
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 37 deletions.
67 changes: 30 additions & 37 deletions tests/hats_import/verification/test_run_verification.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,13 @@ def test_runner(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.run(args, write_mode="w")
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"
# # [FIXME] pandas metadata is unexpectedly missing hats columns
# if not all_passed:
# _test = verifier.results_df.test == "schema consistency"
# _target = verifier.results_df.target == "constructed_truth_schema"
# assert verifier.results_df.loc[~(_test & _target)].passed.all()
assert all_passed, "good catalog failed"
written_results = pd.read_csv(args.output_path / args.output_filename)
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.run(args, write_mode="w")
assert not verifier.results_df.passed.all(), "invalid catalog passed"
assert not verifier.results_df.passed.all(), "bad catalog passed"
written_results = pd.read_csv(args.output_path / args.output_filename)
assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"

Expand All @@ -43,27 +38,27 @@ def test_test_file_sets(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_file_sets()
assert passed, "valid catalog failed"
assert passed, "good catalog failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_file_sets()
assert not passed, "invalid catalog passed"
assert not passed, "bad catalog passed"
bad_files = {"Norder=0/Dir=0/Npix=11.extra_file.parquet", "Norder=0/Dir=0/Npix=11.missing_file.parquet"}
assert bad_files == set(verifier.results_df.bad_files.squeeze()), "bad_files failed"


def test_test_is_valid_catalog(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
"""`hats.is_valid_catalog` should pass for valid catalogs, fail for catalogs without ancillary files."""
"""`hats.is_valid_catalog` should pass for good catalogs, fail for catalogs without ancillary files."""
args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_is_valid_catalog()
assert passed, "valid catalog failed"
assert passed, "good catalog failed"

args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
verifier = runner.Verifier.from_args(args)
passed = verifier.test_is_valid_catalog()
assert not passed, "invalid catalog passed"
assert not passed, "bad catalog passed"


def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
Expand All @@ -74,7 +69,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
verifier = runner.Verifier.from_args(args)
verifier.test_num_rows()
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"
assert all_passed, "good catalog failed"

args = VerificationArguments(
input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path, truth_total_rows=131
Expand All @@ -83,7 +78,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
verifier.test_num_rows()
results = verifier.results_df
all_failed = not results.passed.any()
assert all_failed, "invalid catalog passed"
assert all_failed, "bad catalog passed"

targets = {"file footers vs _metadata", "file footers vs truth"}
assert targets == set(results.target), "wrong targets"
Expand All @@ -100,6 +95,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
@pytest.mark.parametrize("check_metadata", [(False,), (True,)])
def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check_metadata):
"""Schema tests should pass if all column names, dtypes, and (optionally) metadata match, else fail."""
# Show that a good catalog passes
args = VerificationArguments(
input_catalog_path=small_sky_object_catalog,
output_path=tmp_path,
Expand All @@ -108,13 +104,9 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check
verifier = runner.Verifier.from_args(args)
verifier.test_schemas(check_metadata=check_metadata)
all_passed = verifier.results_df.passed.all()
assert all_passed, "valid catalog failed"
# # [FIXME] pandas metadata is unexpectedly missing hats columns
# if not all_passed:
# _test = verifier.results_df.test == "schema consistency"
# _target = verifier.results_df.target == "constructed_truth_schema"
# assert verifier.results_df.loc[~(_test & _target)].passed.all()
assert all_passed, "good catalog failed"

# Show that bad schemas fail.
args = VerificationArguments(
input_catalog_path=bad_schemas_dir,
output_path=tmp_path,
Expand All @@ -123,26 +115,27 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check
verifier = runner.Verifier.from_args(args)
verifier.test_schemas(check_metadata=check_metadata)
results = verifier.results_df
all_failed = not any(results.passed)
assert all_failed, "invalid catalog passed"

targets_failed = {"constructed_truth_schema", "_common_metadata vs truth", "file footers vs truth"}
if not check_metadata:
targets_passed = {"_metadata vs truth"}
# Expecting _common_metadata and some file footers to always fail
# and _metadata to fail if check_metadata is true.
expect_failed = ["_common_metadata vs truth", "file footers vs truth"]
if check_metadata:
expect_passed = []
expect_failed = expect_failed + ["_metadata vs truth"]
else:
targets_passed = set()
targets_failed = targets_failed.union({"_metadata vs truth"})
assert targets_passed.union(targets_failed) == set(results.target), "wrong targets"
assert all(results.loc[results.target.isin(targets_passed)].passed), "valid targets failed"
assert not any(results.loc[results.target.isin(targets_failed)].passed), "invalid targets passed"

target = "file footers vs truth"
result = results.loc[results.target == target].squeeze()
expected_bad_files = {
expect_passed = ["_metadata vs truth"]
assert set(expect_passed + expect_failed) == set(results.target), "wrong targets"
assert all(results.loc[results.target.isin(expect_passed)].passed), "good targets failed"
assert not any(results.loc[results.target.isin(expect_failed)].passed), "bad targets passed"

# Expecting data files with wrong columns or dtypes to always fail
# and files with wrong metadata to fail if check_metadata is true.
result = results.loc[results.target == "file footers vs truth"].squeeze()
expected_bad_files = [
"Norder=0/Dir=0/Npix=11.extra_column.parquet",
"Norder=0/Dir=0/Npix=11.missing_column.parquet",
"Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet",
}
]
if check_metadata:
expected_bad_files = expected_bad_files.union({"Norder=0/Dir=0/Npix=11.no_metadata.parquet"})
assert expected_bad_files == set(result.bad_files), "wrong bad_files"
expected_bad_files = expected_bad_files + ["Norder=0/Dir=0/Npix=11.wrong_metadata.parquet"]
assert set(expected_bad_files) == set(result.bad_files), "wrong bad_files"
6 changes: 6 additions & 0 deletions tests/hats_import/verification/test_verification_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ def test_invalid_paths(tmp_path, small_sky_object_catalog):
with pytest.raises(ValueError, match="input_catalog_path must be an existing directory"):
VerificationArguments(input_catalog_path="path", output_path=f"{tmp_path}/path")

# Truth schema is not an existing file
with pytest.raises(ValueError, match="truth_schema must be an existing file or directory"):
VerificationArguments(
input_catalog_path=small_sky_object_catalog, output_path=tmp_path, truth_schema="path"
)


@pytest.mark.timeout(5)
def test_good_paths(tmp_path, small_sky_object_catalog):
Expand Down

0 comments on commit 4a3445a

Please sign in to comment.