diff --git a/tests/hats_import/verification/test_run_verification.py b/tests/hats_import/verification/test_run_verification.py index 3a46155..71573b0 100644 --- a/tests/hats_import/verification/test_run_verification.py +++ b/tests/hats_import/verification/test_run_verification.py @@ -22,18 +22,13 @@ def test_runner(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path): args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path) verifier = runner.run(args, write_mode="w") all_passed = verifier.results_df.passed.all() - assert all_passed, "valid catalog failed" - # # [FIXME] pandas metadata is unexpectedly missing hats columns - # if not all_passed: - # _test = verifier.results_df.test == "schema consistency" - # _target = verifier.results_df.target == "constructed_truth_schema" - # assert verifier.results_df.loc[~(_test & _target)].passed.all() + assert all_passed, "good catalog failed" written_results = pd.read_csv(args.output_path / args.output_filename) assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed" args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path) verifier = runner.run(args, write_mode="w") - assert not verifier.results_df.passed.all(), "invalid catalog passed" + assert not verifier.results_df.passed.all(), "bad catalog passed" written_results = pd.read_csv(args.output_path / args.output_filename) assert written_results[result_cols].equals(verifier.results_df[result_cols]), "report failed" @@ -43,27 +38,27 @@ def test_test_file_sets(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_ args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path) verifier = runner.Verifier.from_args(args) passed = verifier.test_file_sets() - assert passed, "valid catalog failed" + assert passed, "good catalog failed" args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path) verifier = runner.Verifier.from_args(args) passed = verifier.test_file_sets() - assert not passed, "invalid catalog passed" + assert not passed, "bad catalog passed" bad_files = {"Norder=0/Dir=0/Npix=11.extra_file.parquet", "Norder=0/Dir=0/Npix=11.missing_file.parquet"} assert bad_files == set(verifier.results_df.bad_files.squeeze()), "bad_files failed" def test_test_is_valid_catalog(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path): - """`hats.is_valid_catalog` should pass for valid catalogs, fail for catalogs without ancillary files.""" + """`hats.is_valid_catalog` should pass for good catalogs, fail for catalogs without ancillary files.""" args = VerificationArguments(input_catalog_path=small_sky_object_catalog, output_path=tmp_path) verifier = runner.Verifier.from_args(args) passed = verifier.test_is_valid_catalog() - assert passed, "valid catalog failed" + assert passed, "good catalog failed" args = VerificationArguments(input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path) verifier = runner.Verifier.from_args(args) passed = verifier.test_is_valid_catalog() - assert not passed, "invalid catalog passed" + assert not passed, "bad catalog passed" def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_path): @@ -74,7 +69,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p verifier = runner.Verifier.from_args(args) verifier.test_num_rows() all_passed = verifier.results_df.passed.all() - assert all_passed, "valid catalog failed" + assert all_passed, "good catalog failed" args = VerificationArguments( input_catalog_path=wrong_files_and_rows_dir, output_path=tmp_path, truth_total_rows=131 @@ -83,7 +78,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p verifier.test_num_rows() results = verifier.results_df all_failed = not results.passed.any() - assert all_failed, "invalid catalog passed" + assert all_failed, "bad catalog passed" targets = {"file footers vs _metadata", "file footers vs truth"} assert targets == set(results.target), "wrong targets" @@ -100,6 +95,7 @@ def test_test_num_rows(small_sky_object_catalog, wrong_files_and_rows_dir, tmp_p @pytest.mark.parametrize("check_metadata", [(False,), (True,)]) def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check_metadata): """Schema tests should pass if all column names, dtypes, and (optionally) metadata match, else fail.""" + # Show that a good catalog passes args = VerificationArguments( input_catalog_path=small_sky_object_catalog, output_path=tmp_path, @@ -108,13 +104,9 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check verifier = runner.Verifier.from_args(args) verifier.test_schemas(check_metadata=check_metadata) all_passed = verifier.results_df.passed.all() - assert all_passed, "valid catalog failed" - # # [FIXME] pandas metadata is unexpectedly missing hats columns - # if not all_passed: - # _test = verifier.results_df.test == "schema consistency" - # _target = verifier.results_df.target == "constructed_truth_schema" - # assert verifier.results_df.loc[~(_test & _target)].passed.all() + assert all_passed, "good catalog failed" + # Show that bad schemas fail. args = VerificationArguments( input_catalog_path=bad_schemas_dir, output_path=tmp_path, @@ -123,26 +115,27 @@ def test_test_schemas(small_sky_object_catalog, bad_schemas_dir, tmp_path, check verifier = runner.Verifier.from_args(args) verifier.test_schemas(check_metadata=check_metadata) results = verifier.results_df - all_failed = not any(results.passed) - assert all_failed, "invalid catalog passed" - targets_failed = {"constructed_truth_schema", "_common_metadata vs truth", "file footers vs truth"} - if not check_metadata: - targets_passed = {"_metadata vs truth"} + # Expecting _common_metadata and some file footers to always fail + # and _metadata to fail if check_metadata is true. + expect_failed = ["_common_metadata vs truth", "file footers vs truth"] + if check_metadata: + expect_passed = [] + expect_failed = expect_failed + ["_metadata vs truth"] else: - targets_passed = set() - targets_failed = targets_failed.union({"_metadata vs truth"}) - assert targets_passed.union(targets_failed) == set(results.target), "wrong targets" - assert all(results.loc[results.target.isin(targets_passed)].passed), "valid targets failed" - assert not any(results.loc[results.target.isin(targets_failed)].passed), "invalid targets passed" - - target = "file footers vs truth" - result = results.loc[results.target == target].squeeze() - expected_bad_files = { + expect_passed = ["_metadata vs truth"] + assert set(expect_passed + expect_failed) == set(results.target), "wrong targets" + assert all(results.loc[results.target.isin(expect_passed)].passed), "good targets failed" + assert not any(results.loc[results.target.isin(expect_failed)].passed), "bad targets passed" + + # Expecting data files with wrong columns or dtypes to always fail + # and files with wrong metadata to fail if check_metadata is true. + result = results.loc[results.target == "file footers vs truth"].squeeze() + expected_bad_files = [ "Norder=0/Dir=0/Npix=11.extra_column.parquet", "Norder=0/Dir=0/Npix=11.missing_column.parquet", "Norder=0/Dir=0/Npix=11.wrong_dtypes.parquet", - } + ] if check_metadata: - expected_bad_files = expected_bad_files.union({"Norder=0/Dir=0/Npix=11.no_metadata.parquet"}) - assert expected_bad_files == set(result.bad_files), "wrong bad_files" + expected_bad_files = expected_bad_files + ["Norder=0/Dir=0/Npix=11.wrong_metadata.parquet"] + assert set(expected_bad_files) == set(result.bad_files), "wrong bad_files" diff --git a/tests/hats_import/verification/test_verification_arguments.py b/tests/hats_import/verification/test_verification_arguments.py index 919dde5..f28ef92 100644 --- a/tests/hats_import/verification/test_verification_arguments.py +++ b/tests/hats_import/verification/test_verification_arguments.py @@ -27,6 +27,12 @@ def test_invalid_paths(tmp_path, small_sky_object_catalog): with pytest.raises(ValueError, match="input_catalog_path must be an existing directory"): VerificationArguments(input_catalog_path="path", output_path=f"{tmp_path}/path") + # Truth schema is not an existing file + with pytest.raises(ValueError, match="truth_schema must be an existing file or directory"): + VerificationArguments( + input_catalog_path=small_sky_object_catalog, output_path=tmp_path, truth_schema="path" + ) + @pytest.mark.timeout(5) def test_good_paths(tmp_path, small_sky_object_catalog):