Complete unit test coverage

astronomy-commons · Jan 4, 2025 · 4a3445a · 4a3445a
1 parent a0bbd66
commit 4a3445a
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 37 deletions.
diff --git a/tests/hats_import/verification/test_run_verification.py b/tests/hats_import/verification/test_run_verification.py
@@ -22,18 +22,13 @@ def test_runner(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
     args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
     verifier = runner.run(args, write_mode="w")
     all_passed = verifier.results_df.passed.all()
-    assert all_passed, "valid catalog failed"
-    # # [FIXME] pandas metadata is unexpectedly missing hats columns
-    # if not all_passed:
-    #     _test = verifier.results_df.test == "schema consistency"
-    #     _target = verifier.results_df.target == "constructed_truth_schema"
-    #     assert verifier.results_df.loc[~(_test & _target)].passed.all()
+    assert all_passed, "good catalog failed"
     written_results = pd.read_csv(args.output_path / args.output_filename)
     assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"
 
     args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
     verifier = runner.run(args, write_mode="w")
-    assert not verifier.results_df.passed.all(), "invalid catalog passed"
+    assert not verifier.results_df.passed.all(), "bad catalog passed"
     written_results = pd.read_csv(args.output_path / args.output_filename)
     assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed"
 
@@ -43,27 +38,27 @@ def test_test_file_sets(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_
     args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
     verifier = runner.Verifier.from_args(args)
     passed = verifier.test_file_sets()
-    assert passed, "valid catalog failed"
+    assert passed, "good catalog failed"
 
     args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
     verifier = runner.Verifier.from_args(args)
     passed = verifier.test_file_sets()
-    assert not passed, "invalid catalog passed"
+    assert not passed, "bad catalog passed"
     bad_files = {"Norder=0/Dir=0/Npix=11.extra_file.parquet", "Norder=0/Dir=0/Npix=11.missing_file.parquet"}
     assert bad_files == set(verifier.results_df.bad_files.squeeze()), "bad_files failed"
 
 
 def test_test_is_valid_catalog(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
-    """`hats.is_valid_catalog` should pass for valid catalogs, fail for catalogs without ancillary files."""
+    """`hats.is_valid_catalog` should pass for good catalogs, fail for catalogs without ancillary files."""
     args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path)
     verifier = runner.Verifier.from_args(args)
     passed = verifier.test_is_valid_catalog()
-    assert passed, "valid catalog failed"
+    assert passed, "good catalog failed"
 
     args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path)
     verifier = runner.Verifier.from_args(args)
     passed = verifier.test_is_valid_catalog()
-    assert not passed, "invalid catalog passed"
+    assert not passed, "bad catalog passed"
 
 
 def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path):
@@ -74,7 +69,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
     verifier = runner.Verifier.from_args(args)
     verifier.test_num_rows()
     all_passed = verifier.results_df.passed.all()
-    assert all_passed, "valid catalog failed"
+    assert all_passed, "good catalog failed"
 
     args = VerificationArguments(
         input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path, truth_total_rows=131
@@ -83,7 +78,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
     verifier.test_num_rows()
     results = verifier.results_df
     all_failed = not results.passed.any()
-    assert all_failed, "invalid catalog passed"
+    assert all_failed, "bad catalog passed"
 
     targets = {"file footers vs _metadata", "file footers vs truth"}
     assert targets == set(results.target), "wrong targets"
@@ -100,6 +95,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p
 @pytest.mark.parametrize("check_metadata", [(False,), (True,)])
 def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check_metadata):
     """Schema tests should pass if all column names, dtypes, and (optionally) metadata match, else fail."""
+    # Show that a good catalog passes
     args = VerificationArguments(
         input_catalog_path=small_sky_object_catalog,
         output_path=tmp_path,
@@ -108,13 +104,9 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check
     verifier = runner.Verifier.from_args(args)
     verifier.test_schemas(check_metadata=check_metadata)
     all_passed = verifier.results_df.passed.all()
-    assert all_passed, "valid catalog failed"
-    # # [FIXME] pandas metadata is unexpectedly missing hats columns
-    # if not all_passed:
-    #     _test = verifier.results_df.test == "schema consistency"
-    #     _target = verifier.results_df.target == "constructed_truth_schema"
-    #     assert verifier.results_df.loc[~(_test & _target)].passed.all()
+    assert all_passed, "good catalog failed"
 
+    # Show that bad schemas fail.
     args = VerificationArguments(
         input_catalog_path=bad_schemas_dir,
         output_path=tmp_path,
@@ -123,26 +115,27 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check
     verifier = runner.Verifier.from_args(args)
     verifier.test_schemas(check_metadata=check_metadata)
     results = verifier.results_df
-    all_failed = not any(results.passed)
-    assert all_failed, "invalid catalog passed"
 
-    targets_failed = {"constructed_truth_schema", "_common_metadata vs truth", "file footers vs truth"}
-    if not check_metadata:
-        targets_passed = {"_metadata vs truth"}
+    # Expecting _common_metadata and some file footers to always fail
+    # and _metadata to fail if check_metadata is true.
+    expect_failed = ["_common_metadata vs truth", "file footers vs truth"]
+    if check_metadata:
+        expect_passed = []
+        expect_failed = expect_failed + ["_metadata vs truth"]
     else:
-        targets_passed = set()
-        targets_failed = targets_failed.union({"_metadata vs truth"})
-    assert targets_passed.union(targets_failed) == set(results.target), "wrong targets"
-    assert all(results.loc[results.target.isin(targets_passed)].passed), "valid targets failed"
-    assert not any(results.loc[results.target.isin(targets_failed)].passed), "invalid targets passed"
-
-    target = "file footers vs truth"
-    result = results.loc[results.target == target].squeeze()
-    expected_bad_files = {
+        expect_passed = ["_metadata vs truth"]
+    assert set(expect_passed + expect_failed) == set(results.target), "wrong targets"
+    assert all(results.loc[results.target.isin(expect_passed)].passed), "good targets failed"
+    assert not any(results.loc[results.target.isin(expect_failed)].passed), "bad targets passed"
+
+    # Expecting data files with wrong columns or dtypes to always fail
+    # and files with wrong metadata to fail if check_metadata is true.
+    result = results.loc[results.target == "file footers vs truth"].squeeze()
+    expected_bad_files = [
         "Norder=0/Dir=0/Npix=11.extra_column.parquet",
         "Norder=0/Dir=0/Npix=11.missing_column.parquet",
         "Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet",
-    }
+    ]
     if check_metadata:
-        expected_bad_files = expected_bad_files.union({"Norder=0/Dir=0/Npix=11.no_metadata.parquet"})
-    assert expected_bad_files == set(result.bad_files), "wrong bad_files"
+        expected_bad_files = expected_bad_files + ["Norder=0/Dir=0/Npix=11.wrong_metadata.parquet"]
+    assert set(expected_bad_files) == set(result.bad_files), "wrong bad_files"
diff --git a/tests/hats_import/verification/test_verification_arguments.py b/tests/hats_import/verification/test_verification_arguments.py
@@ -27,6 +27,12 @@ def test_invalid_paths(tmp_path, small_sky_object_catalog):
     with pytest.raises(ValueError, match="input_catalog_path must be an existing directory"):
         VerificationArguments(input_catalog_path="path", output_path=f"{tmp_path}/path")
 
+    # Truth schema is not an existing file
+    with pytest.raises(ValueError, match="truth_schema must be an existing file or directory"):
+        VerificationArguments(
+            input_catalog_path=small_sky_object_catalog, output_path=tmp_path, truth_schema="path"
+        )
+
 
 @pytest.mark.timeout(5)
 def test_good_paths(tmp_path, small_sky_object_catalog):