From 6b7753cbc614e85c6331572a9df745a4f5a51a41 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 18 Apr 2023 10:04:22 -0400
Subject: [PATCH 1/5] Write final parquet with pandas metadata.

---
 src/hipscat_import/catalog/map_reduce.py      |  7 ++++-
 .../hipscat_import/catalog/test_map_reduce.py |  6 +++--
 tests/hipscat_import/conftest.py              | 26 +++++++++++++++++++
 3 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hipscat_import/catalog/map_reduce.py
index 5e441d99..1169a098 100644
--- a/src/hipscat_import/catalog/map_reduce.py
+++ b/src/hipscat_import/catalog/map_reduce.py
@@ -212,7 +212,12 @@ def reduce_pixel_shards(
         "Npix",
         [np.full(rows_written, fill_value=destination_pixel_number, dtype=np.int32)],
     )
-    pq.write_table(merged_table, where=destination_file)
+    if add_hipscat_index:
+        merged_table.to_pandas().set_index("_hipscat_index").sort_index().to_parquet(
+            destination_file
+        )
+    else:
+        merged_table.to_pandas().to_parquet(destination_file)
 
     del merged_table, tables
 
diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hipscat_import/catalog/test_map_reduce.py
index 4093494a..d2af2caf 100644
--- a/tests/hipscat_import/catalog/test_map_reduce.py
+++ b/tests/hipscat_import/catalog/test_map_reduce.py
@@ -210,7 +210,9 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
     assert_parquet_file_ids(output_file, "id", expected_ids)
 
 
-def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
+def test_reduce_hipscat_index(
+    parquet_shards_dir, assert_parquet_file_ids, assert_parquet_file_index, tmp_path
+):
     """Test reducing into one large pixel"""
     mr.reduce_pixel_shards(
         cache_path=parquet_shards_dir,
@@ -268,7 +270,7 @@ def test_reduce_hipscat_index(parquet_shards_dir, assert_parquet_file_ids, tmp_p
         13564690156971098112,
         13557377060258709504,
     ]
-    assert_parquet_file_ids(output_file, "_hipscat_index", expected_indexes)
+    assert_parquet_file_index(output_file, expected_indexes)
 
 
 def test_reduce_bad_expectation(parquet_shards_dir, tmp_path):
diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py
index 864b86bd..e5a7bc05 100644
--- a/tests/hipscat_import/conftest.py
+++ b/tests/hipscat_import/conftest.py
@@ -171,3 +171,29 @@ def assert_parquet_file_ids(file_name, id_column, expected_ids):
         npt.assert_array_equal(ids, expected_ids)
 
     return assert_parquet_file_ids
+
+
+@pytest.fixture
+def assert_parquet_file_index():
+    def assert_parquet_file_index(file_name, expected_values):
+        """
+        Convenience method to read a parquet file and compare the index values to
+        a list of expected objects.
+
+        Args:
+            file_name (str): fully-specified path of the file to read
+            expected_values (:obj:`int[]`): list of expected values in index
+        """
+        assert os.path.exists(file_name), f"file not found [{file_name}]"
+
+        data_frame = pd.read_parquet(file_name, engine="pyarrow")
+        values = data_frame.index.values.tolist()
+        expected_values.sort()
+
+        assert len(values) == len(
+            expected_values
+        ), f"object list not the same size ({len(values)} vs {len(expected_values)})"
+
+        npt.assert_array_equal(values, expected_values)
+
+    return assert_parquet_file_index

From 3c5dd455c854a2c88f1ff71d6a6572f140fe1242 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Tue, 18 Apr 2023 12:16:41 -0400
Subject: [PATCH 2/5] Increase test coverage.

---
 .../hipscat_import/catalog/test_map_reduce.py | 34 ++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/tests/hipscat_import/catalog/test_map_reduce.py b/tests/hipscat_import/catalog/test_map_reduce.py
index d2af2caf..fcada968 100644
--- a/tests/hipscat_import/catalog/test_map_reduce.py
+++ b/tests/hipscat_import/catalog/test_map_reduce.py
@@ -4,6 +4,7 @@
 
 import hipscat.pixel_math as hist
 import numpy.testing as npt
+import pandas as pd
 import pyarrow as pa
 import pytest
 
@@ -198,6 +199,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
         destination_pixel_number=11,
         destination_pixel_size=131,
         output_path=tmp_path,
+        add_hipscat_index=True,
         ra_column="ra",
         dec_column="dec",
         id_column="id",
@@ -213,7 +215,7 @@ def test_reduce_order0(parquet_shards_dir, assert_parquet_file_ids, tmp_path):
 def test_reduce_hipscat_index(
     parquet_shards_dir, assert_parquet_file_ids, assert_parquet_file_index, tmp_path
 ):
-    """Test reducing into one large pixel"""
+    """Test reducing with or without a _hipscat_index field"""
     mr.reduce_pixel_shards(
         cache_path=parquet_shards_dir,
         origin_pixel_numbers=[47],
@@ -271,6 +273,36 @@ def test_reduce_hipscat_index(
         13557377060258709504,
     ]
     assert_parquet_file_index(output_file, expected_indexes)
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    assert data_frame.index.name == "_hipscat_index"
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["id", "ra", "dec", "ra_error", "dec_error", "Norder", "Dir", "Npix"],
+    )
+
+    mr.reduce_pixel_shards(
+        cache_path=parquet_shards_dir,
+        origin_pixel_numbers=[47],
+        destination_pixel_order=0,
+        destination_pixel_number=11,
+        destination_pixel_size=18,
+        output_path=tmp_path,
+        add_hipscat_index=False,  ## different from above
+        ra_column="ra",
+        dec_column="dec",
+        id_column="id",
+        delete_input_files=False,
+    )
+
+    assert_parquet_file_ids(output_file, "id", expected_ids)
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    ## No index name.
+    assert data_frame.index.name is None
+    ## Data fields are the same.
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["id", "ra", "dec", "ra_error", "dec_error", "Norder", "Dir", "Npix"],
+    )
 
 
 def test_reduce_bad_expectation(parquet_shards_dir, tmp_path):

From dfa77240e39e6e0c26f882099c3601f4429afc0b Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 19 Apr 2023 13:51:59 -0400
Subject: [PATCH 3/5] Add tests for panda-indexed parquet input.

---
 src/hipscat_import/catalog/map_reduce.py      |  73 ++++---
 .../catalog/test_file_readers.py              |   9 +-
 .../catalog/test_resume_files.py              |  26 +--
 .../hipscat_import/catalog/test_run_import.py | 183 +++++++++++++++++-
 tests/hipscat_import/conftest.py              |  13 +-
 .../{shard_1.parquet => shard_0_0.parquet}    | Bin 4852 -> 4852 bytes
 .../{shard_3.parquet => shard_1_0.parquet}    | Bin 4976 -> 4976 bytes
 .../{shard_2.parquet => shard_2_0.parquet}    | Bin 4908 -> 4908 bytes
 .../{shard_0.parquet => shard_3_0.parquet}    | Bin 4893 -> 4893 bytes
 .../{shard_4.parquet => shard_4_0.parquet}    | Bin 4978 -> 4978 bytes
 .../{shard_1.parquet => shard_0_0.parquet}    | Bin 4887 -> 4887 bytes
 .../{shard_3.parquet => shard_1_0.parquet}    | Bin 4887 -> 4887 bytes
 .../{shard_2.parquet => shard_2_0.parquet}    | Bin 4840 -> 4840 bytes
 .../{shard_0.parquet => shard_3_0.parquet}    | Bin 4841 -> 4841 bytes
 .../{shard_4.parquet => shard_4_0.parquet}    | Bin 4890 -> 4890 bytes
 .../{shard_1.parquet => shard_0_0.parquet}    | Bin 4969 -> 4969 bytes
 .../{shard_3.parquet => shard_1_0.parquet}    | Bin 4878 -> 4878 bytes
 .../{shard_2.parquet => shard_2_0.parquet}    | Bin 4972 -> 4972 bytes
 .../{shard_0.parquet => shard_3_0.parquet}    | Bin 4958 -> 4958 bytes
 .../{shard_4.parquet => shard_4_0.parquet}    | Bin 4841 -> 4841 bytes
 .../{shard_1.parquet => shard_0_0.parquet}    | Bin 4796 -> 4796 bytes
 .../{shard_3.parquet => shard_1_0.parquet}    | Bin 4796 -> 4796 bytes
 .../{shard_2.parquet => shard_2_0.parquet}    | Bin 4823 -> 4823 bytes
 .../{shard_0.parquet => shard_3_0.parquet}    | Bin 4851 -> 4851 bytes
 .../{shard_4.parquet => shard_4_0.parquet}    | Bin 4869 -> 4869 bytes
 .../data/test_formats/multiindex.parquet      | Bin 0 -> 4036 bytes
 .../data/test_formats/pandasindex.parquet     | Bin 0 -> 4636 bytes
 27 files changed, 253 insertions(+), 51 deletions(-)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/{shard_1.parquet => shard_0_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/{shard_3.parquet => shard_1_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/{shard_2.parquet => shard_2_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/{shard_0.parquet => shard_3_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/{shard_4.parquet => shard_4_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/{shard_1.parquet => shard_0_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/{shard_3.parquet => shard_1_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/{shard_2.parquet => shard_2_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/{shard_0.parquet => shard_3_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/{shard_4.parquet => shard_4_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/{shard_1.parquet => shard_0_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/{shard_3.parquet => shard_1_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/{shard_2.parquet => shard_2_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/{shard_0.parquet => shard_3_0.parquet} (96%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/{shard_4.parquet => shard_4_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/{shard_1.parquet => shard_0_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/{shard_3.parquet => shard_1_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/{shard_2.parquet => shard_2_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/{shard_0.parquet => shard_3_0.parquet} (97%)
 rename tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/{shard_4.parquet => shard_4_0.parquet} (97%)
 create mode 100644 tests/hipscat_import/data/test_formats/multiindex.parquet
 create mode 100644 tests/hipscat_import/data/test_formats/pandasindex.parquet

diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hipscat_import/catalog/map_reduce.py
index 1169a098..91b3d7da 100644
--- a/src/hipscat_import/catalog/map_reduce.py
+++ b/src/hipscat_import/catalog/map_reduce.py
@@ -30,6 +30,22 @@ def _get_pixel_directory(cache_path: FilePointer, pixel: np.int64):
     )
 
 
+def _has_named_index(dataframe):
+    """Heuristic to determine if a dataframe has some meaningful index.
+    
+    This will reject dataframes with no index name for a single index,
+    or empty names for multi-index ([] or [None]).    
+    """
+    if dataframe.index.name is not None:
+        ## Single index with a given name.
+        return True
+    if len(dataframe.index.names) == 0:
+        return False
+    if dataframe.index.names[0] is not None:
+        return True
+    return False
+
+
 def map_to_pixels(
     input_file: FilePointer,
     file_reader,
@@ -107,7 +123,10 @@ def map_to_pixels(
                 output_file = file_io.append_paths_to_pointer(
                     pixel_dir, f"shard_{shard_suffix}_{chunk_number}.parquet"
                 )
-                filtered_data.to_parquet(output_file)
+                if _has_named_index(filtered_data):
+                    filtered_data.to_parquet(output_file, index=True)
+                else:
+                    filtered_data.to_parquet(output_file, index=False)
             del filtered_data, data_indexes
 
         ## Pesky memory!
@@ -181,45 +200,35 @@ def reduce_pixel_shards(
             f" Expected {destination_pixel_size}, wrote {rows_written}"
         )
 
+    dataframe = merged_table.to_pandas()
     if id_column:
-        merged_table = merged_table.sort_by(id_column)
+        dataframe = dataframe.sort_values(id_column)
     if add_hipscat_index:
-        merged_table = merged_table.append_column(
-            "_hipscat_index",
-            [
-                pixel_math.compute_hipscat_id(
-                    merged_table[ra_column].to_pylist(),
-                    merged_table[dec_column].to_pylist(),
-                )
-            ],
+        dataframe["_hipscat_index"] = pixel_math.compute_hipscat_id(
+            dataframe[ra_column].values,
+            dataframe[dec_column].values,
         )
-        merged_table = merged_table.sort_by("_hipscat_index")
-    merged_table = merged_table.append_column(
-        "Norder",
-        [np.full(rows_written, fill_value=destination_pixel_order, dtype=np.int32)],
+
+    dataframe["Norder"] = np.full(
+        rows_written, fill_value=destination_pixel_order, dtype=np.int32
     )
-    merged_table = merged_table.append_column(
-        "Dir",
-        [
-            np.full(
-                rows_written,
-                fill_value=int(destination_pixel_number / 10_000) * 10_000,
-                dtype=np.int32,
-            )
-        ],
+    dataframe["Dir"] = np.full(
+        rows_written,
+        fill_value=int(destination_pixel_number / 10_000) * 10_000,
+        dtype=np.int32,
     )
-    merged_table = merged_table.append_column(
-        "Npix",
-        [np.full(rows_written, fill_value=destination_pixel_number, dtype=np.int32)],
+    dataframe["Npix"] = np.full(
+        rows_written, fill_value=destination_pixel_number, dtype=np.int32
     )
+
     if add_hipscat_index:
-        merged_table.to_pandas().set_index("_hipscat_index").sort_index().to_parquet(
-            destination_file
-        )
-    else:
-        merged_table.to_pandas().to_parquet(destination_file)
+        ## If we had a meaningful index before, preserve it as a column.
+        if _has_named_index(dataframe):
+            dataframe = dataframe.reset_index()
+        dataframe = dataframe.set_index("_hipscat_index").sort_index()
+    dataframe.to_parquet(destination_file)
 
-    del merged_table, tables
+    del dataframe, merged_table, tables
 
     if delete_input_files:
         for pixel in origin_pixel_numbers:
diff --git a/tests/hipscat_import/catalog/test_file_readers.py b/tests/hipscat_import/catalog/test_file_readers.py
index affc7c23..87ef60b6 100644
--- a/tests/hipscat_import/catalog/test_file_readers.py
+++ b/tests/hipscat_import/catalog/test_file_readers.py
@@ -9,9 +9,12 @@
 import pytest
 from hipscat.catalog import CatalogParameters
 
-from hipscat_import.catalog.file_readers import (CsvReader, FitsReader,
-                                                 ParquetReader,
-                                                 get_file_reader)
+from hipscat_import.catalog.file_readers import (
+    CsvReader,
+    FitsReader,
+    ParquetReader,
+    get_file_reader,
+)
 
 
 def test_unknown_file_type():
diff --git a/tests/hipscat_import/catalog/test_resume_files.py b/tests/hipscat_import/catalog/test_resume_files.py
index bb8b8000..5412772b 100644
--- a/tests/hipscat_import/catalog/test_resume_files.py
+++ b/tests/hipscat_import/catalog/test_resume_files.py
@@ -4,18 +4,20 @@
 import numpy.testing as npt
 import pytest
 
-from hipscat_import.catalog.resume_files import (clean_resume_files,
-                                                 is_mapping_done,
-                                                 is_reducing_done,
-                                                 read_histogram,
-                                                 read_mapping_keys,
-                                                 read_reducing_keys,
-                                                 set_mapping_done,
-                                                 set_reducing_done,
-                                                 write_histogram,
-                                                 write_mapping_done_key,
-                                                 write_mapping_start_key,
-                                                 write_reducing_key)
+from hipscat_import.catalog.resume_files import (
+    clean_resume_files,
+    is_mapping_done,
+    is_reducing_done,
+    read_histogram,
+    read_mapping_keys,
+    read_reducing_keys,
+    set_mapping_done,
+    set_reducing_done,
+    write_histogram,
+    write_mapping_done_key,
+    write_mapping_start_key,
+    write_reducing_key,
+)
 
 
 def test_mapping_done(tmp_path):
diff --git a/tests/hipscat_import/catalog/test_run_import.py b/tests/hipscat_import/catalog/test_run_import.py
index 8db3d6d9..619947ad 100644
--- a/tests/hipscat_import/catalog/test_run_import.py
+++ b/tests/hipscat_import/catalog/test_run_import.py
@@ -4,6 +4,7 @@
 import shutil
 
 import pandas as pd
+import numpy.testing as npt
 import pytest
 
 import hipscat_import.catalog.resume_files as rf
@@ -105,7 +106,7 @@ def test_resume_dask_runner(
 
     # Check that the partition info file exists
     expected_partition_lines = [
-        "Norder,Dir,Npix,num_objects",
+        "Norder,Dir,Npix,num_rows",
         "0,0,11,131",
     ]
     partition_filename = os.path.join(args.catalog_path, "partition_info.csv")
@@ -187,7 +188,7 @@ def test_dask_runner(
 
     # Check that the partition info file exists
     expected_lines = [
-        "Norder,Dir,Npix,num_objects",
+        "Norder,Dir,Npix,num_rows",
         "0,0,11,131",
     ]
     metadata_filename = os.path.join(args.catalog_path, "partition_info.csv")
@@ -245,7 +246,7 @@ def test_dask_runner_source_table(
 
     # Check that the partition info file exists
     expected_lines = [
-        "Norder,Dir,Npix,num_objects",
+        "Norder,Dir,Npix,num_rows",
         "0,0,4,50",
         "1,0,47,2395",
         "2,0,176,385",
@@ -326,3 +327,179 @@ def test_dask_runner_mixed_schema_csv(
     )
 
     assert_parquet_file_ids(output_file, "id", [*range(700, 708)])
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+@pytest.mark.timeout(10)
+def test_dask_runner_preserve_index(
+    dask_client,
+    formats_pandasindex,
+    assert_parquet_file_ids,
+    assert_parquet_file_index,
+    tmp_path,
+):
+    """Test basic execution, with input with pandas metadata"""
+
+    expected_indexes = [
+        "star1_1",
+        "star1_2",
+        "star1_3",
+        "star1_4",
+        "galaxy1_1",
+        "galaxy1_2",
+        "galaxy2_1",
+        "galaxy2_2",
+    ]
+    assert_parquet_file_index(formats_pandasindex, expected_indexes)
+    data_frame = pd.read_parquet(formats_pandasindex, engine="pyarrow")
+    assert data_frame.index.name == "obs_id"
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["obj_id", "band", "ra", "dec", "mag"],
+    )
+
+    ## Don't generate a hipscat index. Verify that the original index remains.
+    args = ImportArguments(
+        output_catalog_name="pandasindex",
+        input_file_list=[formats_pandasindex],
+        input_format="parquet",
+        id_column="obs_id",
+        add_hipscat_index=False,
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=1,
+        progress_bar=False,
+    )
+
+    runner.run_with_client(args, dask_client)
+
+    # Check that the catalog parquet file exists
+    output_file = os.path.join(
+        args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet"
+    )
+
+    assert_parquet_file_index(output_file, expected_indexes)
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    assert data_frame.index.name == "obs_id"
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["obj_id", "band", "ra", "dec", "mag", "Norder", "Dir", "Npix"],
+    )
+
+    ## DO generate a hipscat index. Verify that the original index is preserved in a column.
+    args = ImportArguments(
+        output_catalog_name="pandasindex_preserve",
+        input_file_list=[formats_pandasindex],
+        input_format="parquet",
+        id_column="obs_id",
+        add_hipscat_index=True,
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=1,
+        progress_bar=False,
+    )
+
+    runner.run_with_client(args, dask_client)
+
+    # Check that the catalog parquet file exists
+    output_file = os.path.join(
+        args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet"
+    )
+
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    assert data_frame.index.name == "_hipscat_index"
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["obs_id", "obj_id", "band", "ra", "dec", "mag", "Norder", "Dir", "Npix"],
+    )
+    assert_parquet_file_ids(output_file, "obs_id", expected_indexes)
+
+
+@pytest.mark.filterwarnings("ignore::DeprecationWarning")
+@pytest.mark.timeout(10)
+def test_dask_runner_multiindex(
+    dask_client,
+    formats_multiindex,
+    assert_parquet_file_ids,
+    assert_parquet_file_index,
+    tmp_path,
+):
+    """Test basic execution, with input with pandas metadata"""
+
+    index_arrays = [
+        [
+            "star1",
+            "star1",
+            "star1",
+            "star1",
+            "galaxy1",
+            "galaxy1",
+            "galaxy2",
+            "galaxy2",
+        ],
+        ["r", "r", "i", "i", "r", "r", "r", "r"],
+    ]
+    expected_indexes = list(zip(index_arrays[0], index_arrays[1]))
+    assert_parquet_file_index(formats_multiindex, expected_indexes)
+    data_frame = pd.read_parquet(formats_multiindex, engine="pyarrow")
+    assert data_frame.index.names == ["obj_id", "band"]
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["ra", "dec", "mag"],
+    )
+
+    ## Don't generate a hipscat index. Verify that the original index remains.
+    args = ImportArguments(
+        output_catalog_name="multiindex",
+        input_file_list=[formats_multiindex],
+        input_format="parquet",
+        id_column=["obj_id", "band"],
+        add_hipscat_index=False,
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=1,
+        progress_bar=False,
+    )
+
+    runner.run_with_client(args, dask_client)
+
+    # Check that the catalog parquet file exists
+    output_file = os.path.join(
+        args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet"
+    )
+
+    assert_parquet_file_index(output_file, expected_indexes)
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    assert data_frame.index.names == ["obj_id", "band"]
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["ra", "dec", "mag", "Norder", "Dir", "Npix"],
+    )
+
+    ## DO generate a hipscat index. Verify that the original index is preserved in a column.
+    args = ImportArguments(
+        output_catalog_name="multiindex_preserve",
+        input_file_list=[formats_multiindex],
+        input_format="parquet",
+        id_column=["obj_id", "band"],
+        add_hipscat_index=True,
+        output_path=tmp_path,
+        dask_tmp=tmp_path,
+        highest_healpix_order=1,
+        progress_bar=False,
+    )
+
+    runner.run_with_client(args, dask_client)
+
+    # Check that the catalog parquet file exists
+    output_file = os.path.join(
+        args.catalog_path, "Norder=0", "Dir=0", "Npix=11.parquet"
+    )
+
+    data_frame = pd.read_parquet(output_file, engine="pyarrow")
+    assert data_frame.index.name == "_hipscat_index"
+    npt.assert_array_equal(
+        data_frame.columns,
+        ["obj_id", "band", "ra", "dec", "mag", "Norder", "Dir", "Npix"],
+    )
+    assert_parquet_file_ids(output_file, "obj_id", index_arrays[0])
diff --git a/tests/hipscat_import/conftest.py b/tests/hipscat_import/conftest.py
index e5a7bc05..d7d7ce8c 100644
--- a/tests/hipscat_import/conftest.py
+++ b/tests/hipscat_import/conftest.py
@@ -69,6 +69,16 @@ def formats_fits(test_data_dir):
     return os.path.join(test_data_dir, "test_formats", "small_sky.fits")
 
 
+@pytest.fixture
+def formats_pandasindex(test_data_dir):
+    return os.path.join(test_data_dir, "test_formats", "pandasindex.parquet")
+
+
+@pytest.fixture
+def formats_multiindex(test_data_dir):
+    return os.path.join(test_data_dir, "test_formats", "multiindex.parquet")
+
+
 @pytest.fixture
 def small_sky_parts_dir(test_data_dir):
     return os.path.join(test_data_dir, "small_sky_parts")
@@ -87,7 +97,7 @@ def parquet_shards_dir(test_data_dir):
 @pytest.fixture
 def parquet_shards_shard_44_0(test_data_dir):
     return os.path.join(
-        test_data_dir, "parquet_shards", "dir_0", "pixel_44", "shard_0.parquet"
+        test_data_dir, "parquet_shards", "dir_0", "pixel_44", "shard_3_0.parquet"
     )
 
 
@@ -188,6 +198,7 @@ def assert_parquet_file_index(file_name, expected_values):
 
         data_frame = pd.read_parquet(file_name, engine="pyarrow")
         values = data_frame.index.values.tolist()
+        values.sort()
         expected_values.sort()
 
         assert len(values) == len(
diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_1.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_0_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_1.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_0_0.parquet
index 0ecc22babdd32f4611321ba83669d833abefbb7f..89f0a5b8c69f7cfae1d068d17bd1939d4ef0e1f9 100644
GIT binary patch
delta 30
lcmeyO`bBj^3^%Kho`Igh=0xryRyNO4UuTo@%{K+4nE;>B35x&#

delta 30
lcmeyO`bBj^3^%Kxp0S?k=0xryRyNmC-%1n1%{K+4nE;?M34Q<o

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_3.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_1_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_3.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_1_0.parquet
index aaadca53adccec0ba31b31e2113bccf74fa6d001..37c12e8409c8084da3e0d5532bc096e796ffa30d 100644
GIT binary patch
delta 30
mcmeyM_CamK4Q^H=Jp(<1&3CzrSlK*FeVt9pH-`#JGXVgw@Co_=

delta 30
mcmeyM_CamK4Q^ILJ!3u7&3CzrSlL`leJf22H-`#JGXVgxPzmh-

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_2.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_2_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_2.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_2_0.parquet
index a0ddff49fa038aef14a3097fff5549084616ab6d..e779d36e88fe7e3a18a172ccfe42f2d9ba2ec92f 100644
GIT binary patch
delta 30
mcmZ3ZwnlBk6mC`{Jp(<1%`>@+SlK*FeVt9pHwy|%GXVgVlnAB(

delta 30
mcmZ3ZwnlBk6mC{SJ!3u7%`>@+SlL`leJf22Hwy|%GXVgV^az;%

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_0.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_3_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_0.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_3_0.parquet
index bb3df3aa84eb8d8a79f687c3322a23061def53ff..968f28ef411efd39a90bf56df4e420e511e2f3c4 100644
GIT binary patch
delta 30
lcmbQMHdk#!D>ti=o`Igh<}U6cRyNO4UuTo@%}j#QOaPCI2wMOE

delta 30
lcmbQMHdk#!D>tj5p0S?k<}U6cRyNmC-%1n1%}j#QOaPDT2u=V1

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_4.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_4_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_4.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_44/shard_4_0.parquet
index ec155e0bbb13555adc24951435259bca5922472d..dfd66ff1b6406d70b07cdc23c89c457c2ca7461d 100644
GIT binary patch
delta 30
mcmeyQ_DOBSEpApLJp(<1&G)&BSlK*FeVt9pH-`&KGXVgxz6t~Y

delta 30
mcmeyQ_DOBSEpAprJ!3u7&G)&BSlL`leJf22H-`&KGXVgy9trmV

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_1.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_0_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_1.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_0_0.parquet
index 09d4b0a25160954499c0308301f3b637215d514c..c375d53c549173e7032c6342aaf48e4a156ae76a 100644
GIT binary patch
delta 30
lcmbQPHeGE)JvXb7o`Igh<`(WERyNO4UuTo@&3^=>nE;U<2|NG*

delta 30
lcmbQPHeGE)JvXbNp0S?k<`(WERyNmC-%1n1&3^=>nE;V~2`>Nu

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_3.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_1_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_3.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_1_0.parquet
index 0681496c9667828be057cea8b1863fdb2d17633e..462df74d38d9ff0092289e4acc832daa0f392aab 100644
GIT binary patch
delta 30
lcmbQPHeGE)JvXb7o`Igh<`(WERyNO4UuTo@&3^=>nE;U<2|NG*

delta 30
lcmbQPHeGE)JvXbNp0S?k<`(WERyNmC-%1n1&3^=>nE;V~2`>Nu

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_2.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_2_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_2.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_2_0.parquet
index e9d7f7587b84ee1e11e14dd4c457f60cc2ebc80e..70cb9f45fc9939e4bfca26ab1892193be82d5105 100644
GIT binary patch
delta 30
lcmaE%`a*R>05_|Vo`Igh=1}e;RyNO4UuTo@&1VIqnE;yo2~Pk3

delta 30
lcmaE%`a*R>05_|lp0S?k=1}e;RyNmC-%1n1&1VIqnE;zz2|@q>

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_0.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_3_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_0.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_3_0.parquet
index cd743ec245888bf5b2d6e90cc20631b144654fd0..8b75a91c17a2014c67e20ea525a03eff64dfd9ac 100644
GIT binary patch
delta 30
lcmaE<`cid6AUCU#o`Igh<}mIeRyNO4UuTo@&F2K9nE;z)2~+?8

delta 30
lcmaE<`cid6AUCU_p0S?k<}mIeRyNmC-%1n1&F2K9nE;!_2}b|`

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_4.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_4_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_4.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_45/shard_4_0.parquet
index 8178d38a3600064341ca4dc1af034c5861860cb7..90628d3bb62aaa3a36351830fa9812c04eca1028 100644
GIT binary patch
delta 30
lcmbQGHcM?o6E~}oo`Igh=63EPRyNO4UuTo@&Hn|YnE;Yg2}=L~

delta 30
lcmbQGHcM?o6E~}&p0S?k=63EPRyNmC-%1n1&Hn|YnE;Zr2|fS-

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_1.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_0_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_1.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_0_0.parquet
index 4fc74471138d3f72ea82a6311b9ed573b80b3ae5..e086dc79ae343b0c77726ed8a3801de98e611be9 100644
GIT binary patch
delta 30
mcmaE<_EK%bMQ&CjJp(<1&DXe#SlK*FeVt9pH~R@nGXVgu9|_k0

delta 30
mcmaE<_EK%bMQ&C@J!3u7&DXe#SlL`leJf22H~R@nGXVgue+kL}

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_3.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_1_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_3.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_1_0.parquet
index eed0e75dbea0d5be45759d3c8a038172d37d1954..7a57bb06cfe95bf8150b35621accdf348f6df149 100644
GIT binary patch
delta 30
lcmeBE>r>lM#?5M^XP{@WxthC(mCdu%*V&|e^Jf8RCIF3p2@e1O

delta 30
lcmeBE>r>lM#?5M|XRK$sxthC(mCd!(x6;IL^Jf8RCIF4!2?78B

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_2.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_2_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_2.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_2_0.parquet
index 026b29d02faf44516d0be3122d2cc068fd22d202..a56718d6193be4f3af8fe4c6624eab0d5078a48a 100644
GIT binary patch
delta 30
mcmaE(_C{^P6>e4|Jp(<1%{RG=SlK*FeVt9pHwOwzGXVgvR0-w)

delta 30
mcmaE(_C{^P6>e5TJ!3u7%{RG=SlL`leJf22HwOwzGXVgvv<cY&

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_0.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_3_0.parquet
similarity index 96%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_0.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_3_0.parquet
index 6fb211f700d4a6f261126adc43fcc8924951e41d..0088271b0231a97c2e10e84563263da7d5d5a229 100644
GIT binary patch
delta 30
mcmcboc28}?F>Y2PJp(<1&8NAGSlK*FeVt9pH#-YTGXVgpvI(F7

delta 30
mcmcboc28}?F>Y2vJ!3u7&8NAGSlL`leJf22H#-YTGXVgq5($$4

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_4.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_4_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_4.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_46/shard_4_0.parquet
index 86829624309e509d841abb9775c04d5caffb669f..c3b514c4dfce7285f06ca9ba41ec20b6db4aecae 100644
GIT binary patch
delta 30
lcmaE<`cid6AUCU#o`Igh<}mIeRyNO4UuTo@&F2K9nE;z)2~+?8

delta 30
lcmaE<`cid6AUCU_p0S?k<}mIeRyNmC-%1n1&F2K9nE;!_2}b|`

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_1.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_0_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_1.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_0_0.parquet
index ead589ed97a767816ffcb41ce3c7b02baf657d6e..f23ccdee35f289a18dd856c2ba850be4f083915b 100644
GIT binary patch
delta 30
lcmdm^x<_?`3OB2fo`IghW=-xQRyNO4UuTo@%?kyjnE;7N2y*}c

delta 30
lcmdm^x<_?`3OB2vp0S?kW=-xQRyNmC-%1n1%?kyjnE;8Y2xb5P

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_3.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_1_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_3.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_1_0.parquet
index 5ea6f49a0ade30111d694088f28973d752e37d74..0132fecfea9ca0a39c856748eefb4076b9cae90c 100644
GIT binary patch
delta 30
lcmdm^x<_?`3OB2fo`IghW=-xQRyNO4UuTo@%?kyjnE;7N2y*}c

delta 30
lcmdm^x<_?`3OB2vp0S?kW=-xQRyNmC-%1n1%?kyjnE;8Y2xb5P

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_2.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_2_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_2.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_2_0.parquet
index 274baece84ce3659cfdf765a268ffd3a9bc5e22f..8802636455ca5a47951c36aca036cbde4a61893f 100644
GIT binary patch
delta 30
lcmcbvdR=vcJvXb7o`IghW*6=vRyNO4UuTo@&3go-nE;d~2>Jj3

delta 30
lcmcbvdR=vcJvXbNp0S?kW*6=vRyNmC-%1n1&3go-nE;fA2<-p>

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_0.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_3_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_0.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_3_0.parquet
index e20c765496c01bd511cdf4537967d3c5c76cbd8c..726c615856018b18b1c2138bb5e98ba66a282a62 100644
GIT binary patch
delta 30
lcmeyY`dM{DG&ie}o`Igh<^=8{RyNO4UuTo@%{K(3nE;<^35Eaw

delta 30
lcmeyY`dM{DG&ifEp0S?k<^=8{RyNmC-%1n1%{K(3nE;>433&hj

diff --git a/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_4.parquet b/tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_4_0.parquet
similarity index 97%
rename from tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_4.parquet
rename to tests/hipscat_import/data/parquet_shards/dir_0/pixel_47/shard_4_0.parquet
index fd5da403319906f2a30ddb65252b5359e89c0ffc..72a901355147ea644802af67c2ff3e543d8caee7 100644
GIT binary patch
delta 30
lcmZowYgOBj%gt(}XP{@Wxrn=nmCdu%*V&|e^D6;qCIE@x2;u+$

delta 30
lcmZowYgOBj%gt)2XRK$sxrn=nmCd!(x6;IL^D6;qCIE^+2-N@p

diff --git a/tests/hipscat_import/data/test_formats/multiindex.parquet b/tests/hipscat_import/data/test_formats/multiindex.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4c5444ec770ab83b76829e48cf059b3d08fb05b3
GIT binary patch
literal 4036
zcmcInZ)_7~7{7K<_D^jH*R*B{vp_SG4cl%7$_MQAM!VLoY@O@%3cB1~|J`<XE!TE!
zL6Ut@@xL)j3@RZ;jWGtvJ{V(+G-!wj3G!9Y%zu0kO^h)ZhaY_2yX)Gmoq@nE+x6~w
zp7(ivzxVmQPq#55;=tG8b2|JfT?jYg*hUQV{PFPF_m6)cyCi!~=O292`i1PVf4(q*
zUz9y3<da|CydZmyUis$tJiISDvG5qgWuLtqIs<XvZMpH|w_wLH+=SoX4+)L90XOuR
z+iMxk?X{dHtgnR?!?3PCJ=~4Dwzi|^7HwzsJ$R?ZWyA~?vtDL!y~$`gtv7wDx1G}2
z+HWVbr5~7LxE_O>5k4@i1@otm83xY1NLF8X{<?JOCRy#bEjWMuhpg^;WoDyek*xl(
zxcBN;*U9QLy_a8l?Mbrg)jt<Z-yo~>`BM*Udzq{@Gp|LO+iN)y^JZ%k^N$U6=52{2
z*FgR4^|pW8fO~V6RdU}F_iIM|8FP&D{Qc@j?`*lkdghMJ{#jr-&-~R3Z!Cm3&qeF&
z=RHQwv*)d^AO9u5c^3Tni|_u-dY;%nH~CYPYti*u26KBYC&JfkZQ_2avw^$Fq#L*&
z@38%~0k}73StWN%?tAc|5J(g0eRJOisGnv8rd)AYy0$vk!)w04$PQ@cCfr<?@eZtS
zz2Y!$(83|IbE_&>ecRXvwiEaC;N6;94Ow=2iccueKhc@yb+*sCZNGK(w8+vDB9smx
z#RA06tvaYAqZ<#PHsYpbpmefX$Ahn<SB0W$v)ktHt0P&*GLnj;Y?{;AuH9>!zZbz!
z*R?8c5nPzrkmoW<k-<ALymOl#oj&yeiBWrk)?j!yI5o1{uuu;GQA|oz&0sQGdMpRl
z^f24j_Pp5ER+4v5Id{}~Q?<inw6&WSIyPaLrQdAGfpVC<`9s5DD=#II<tZ*J6h$d-
z-QBz2stVlN-)ltzvpxn7s(VU_O)61`tr8<9Azms90z{<vq>z|eZd0pF6)HItXe~-S
zmrW#5VyP(RDoYM1nB)q`E+z|1f+;W%e%P9nAe@)dXzk>qzJXoVBS-3BmZPxB3|hYo
zg=SlUA|+&*!j7S46b+6B7>M4Hen6v9v%)E9uA~|7aw#~5&80NsT`pDc`SLBS+gB94
zy=FG;qN1#(hSjWhUQ5x5oMZ}FS<#Zfvoa%Bkd$&2MwYWBw6n8GInQS$WOZ-1Z?{{a
z(^jo`gU_k@FmpE%iR`0xs{_=0vil))xrn_4L5#yMx*j2jA;Oy`d<2mu$h2bfdYAc2
z$OvmRzkhsKeTTb}Mezp+qC~(t_*9a>r?Lp{H2iu?H07G1W1|E?mSDYiXFwE+aes6u
z<_yoWBhdmYjq>|=FB=fuJR`VsaWaq(kU<`;3kY`fJk2`G4wiDI7_h}-8G)+=b_9F{
z$mPng)FfJ8Oi<-~h$Q&9C<v@Sl43<)AwB_dewTywmrx!JOE{Gn5lV`W3_4g*fcz05
zz?agH&%wsLuGnD2#raWQf@ht<gK;WsSN#ql;r9WbNG33yO(!csffd8EaX)ZEn8;u>
zx|c5OBZKZ#bQ$(TD4rV3^GmqMV1=GgxQYxF&BuN2B27626c><!8Ks`&&9Gu5!#Ujc
zWX!Aj#CV={`t0H9aUStyrD%a05gIv(tlynU4AU(!Lwuec0c<17u*}4%Q6*o~{*l2Y
zALsW$?|g?E_a`ovXX)6uGpzOv`rs%fqyK}KirQ%^l1cdYF7+)nQ9^rJ=Px=ku%D=B
z&3jY#wd&^z?#=yNfwvHcUIHgO^g`20*&QO0PHWmyH4t+OGpYvSxj0pxi8)vCx9XV`
zEix3+$>eH1o2A8^Ko4p?y`?=0f;QCn7ClqX57P8n;nDQXiBagKyP}<kJVSx5V?mdy
zW3LOIE9@Yr@J@))3g;ANSstC0fGadj6i4B#ARX7W6&E-uoMW9s%K01Rhe{B~r_{*>
z^NJuU=oUcp2PPO?FkRq60bUM`I`rU;Ae%CT(0r<0Xa*9r2s&0Q!!(M|jKK}18=q9F
zj3_>}AbM1)__g_j-jxxD{A3BOS4?HN6T~FCHPk~hlVB;IL<Dz2<Suv%5YDA~qe>Bl
z)Y52%5QN1a5$MveU&~LTq!0)5m4>ORQU)?LkX}Nq&%w&Z`XqYiM1?lup{`fwy*9E5
s0yA&UG4jD;vM|8qas$fuPOtXG(yM&qSPsA+){Q^ddtDed2LHeR2Y~9Ty#N3J

literal 0
HcmV?d00001

diff --git a/tests/hipscat_import/data/test_formats/pandasindex.parquet b/tests/hipscat_import/data/test_formats/pandasindex.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2fdc75a61d6bd7f1dac08f57239ed21f1b73a678
GIT binary patch
literal 4636
zcmcgwU2NOd6{cjzb{uDQgHn@P4Kak2Ffb>RXvwS6q7_A3GF4l#l|&S|bt*~85^a)F
zsQ<XZ{8Y3kiel|h3|oPs7%*&k7`8qPMIHt{w80Q;4+Hj4plcVP*?<k}I$-Ottwqpt
zFGc-WGSDDSY3u6poO93lzH`q#7xo5|3XtRE<79A+oFIu41aWeHk|2nJq)E+Iz;o;r
zeFR=BRmlvH1o;5zULa@TXM&vYE)nxbu);mrF$c@(Q%)FX9nP_htJF1zmz?p0W{C-p
zd!i(7XVo0(bj`Zna=6}fP#J1!ln>7W03t{y0mCeO5d^$L;58%$fYYZOAT!aRg2@Te
zH2}$Bx*aO$j?P&?9Aj<j##j%>xGWhKil%GNLEUyzH=GEDwQgkaJqP)$V-3Kd@R5H$
zap9GJZ2#x?X5{2Y=dZs0o*DVejemXn>G#dZcfR-7#UI}^BlrDk>+3&y*Npt+`lnC*
z?2l&T^iTQHJMWm0Kdb3me|X!B0BRTcU=Ou>%3kT7vTa<%{0J%VIyP_d8XJ4=`v0lh
z6JBz<lVPVR;q3mx>AL2m{&s|Vd)#{muD=lVuYNfG$*T{s?YeY!ecH>mZztbMUHKo|
zroZdocyy0#zm|H>J2k_$|N6%G2S--e_N(U#>z};9wr@p$Q~%|CY<rlxy}~_Z`$E)*
zr9;&JJ>8=|mdh)B+;1MGu1x{=VJ`>x{zBY;@N?Jhr;AENn*3q*{@=-w-(K>*82VU_
zlux|=(w{z)Bc+w*$H{;a`R`u%`CA{#5ki|j_vlA*WcS5Z`sF8;5!-HS+*7tsgl||n
z#QpDI>Eo_Tg+A`@9;e=%0q(<I4)8sad)q<Q?TUZee-r?8j(9e3<~Tvnp5p|4=0PGz
zXKB<fa2v=%yZD-GVX-F&?%(buVt&%H4Z4qZJr2>kdZKq;p>I^|zH!~@+H+FBzMr}}
zi;nClSEJZ?o(U1|iHcIp>k>IbkTVZE?RNAqvF3&FScE%+Etm!yI8X$MCaAnRGe~yV
z0m9X>cg=dd9__Fi-d#1Ud)pJJdwcAtdt2Gmp;ngb*%{X?HRZZ_>;Z!CEVw7iaJ5Jk
z_xC4veX5bmH?vAftLa9?cXsYuKC6y>Pc6*(EO8dw<gs?wXGnS;?2MYGfyI`Z*K*lG
zndLoOZIzLsuV$!9DVO(wwNcZ{t*!zt+PqRlay?&_a#B?S^Ic!w0CUwSptTn-#7;lr
zyK<!mW)KB{MVB@5F@U5`ephG^%PR{2s>9HSF6H59w)AjgnbL41OSUi0mR6FgOF?8k
znBhJ}Uy6rFLDv|L)Da$XC}>N=p*q4t4%MmS5v76pAwsb);GGpJ&LXZ$GcQ$3Cazsg
zl}*WPq0%k4B-1Q4P@H<+tf(aeNyBISXZ$`WDLZ(dj<$4!dxlA+&hclhi@<&J_&3ng
zVqy%#Y{CaUPclr9i58Hoz_0}@i$(|Z4J2IY>em~~);o;Y3)XWC(_mm7d}Al@jTgZv
zz$e-e_|T4!S!bBUCajNslhd`D7*7W?bh0k5rmM2CuAWn)GN*@CB@ix)Y$CVDGO9p_
zD>*(;%g~m+n6d2gtwc@W0~)toD#%8<BD0MGr;BBgZ|-F1P*LVDB7e0U->e`%N<0Sf
zVwW?EsgM#!I!4X$p)&a25zliKj!l4Vppj39{rOB3`$bqJJ-wY{vA!y1Qbq9L7tV7_
z+-gM?`OvmZr^>RfV1Ma!D!z81pl)RX1K5=o*vG2C*~Z1FGM%^toOU>M6z+X`BU+U8
zR1xyh!)Byw5}&M#zy<Mx98{H6Z7a8`HGt0&@Tm)WSrZmFEj^$13w)}Wi?4L^vUR>u
z=+7gkCm@bxFY1~m$5UIf9;<fzsb~)|-Y*M}ktiONA4OlmyhiP-s5k3EW|K}@`!-@<
z#fnVF{K@S*$xTWVTDT5$iBDI=Sh!n%oZi)S>O9%M54s!=7jw&k-{KMq$eC!UzYh1|
z9jvoB*vFgUH5Q$jzWU)~#zDD}ZQZDS()V#6uA6;$16nQ~gR`3|cF)?Xzk=$&uQuv}
zk=&B_18ZkMKWfkJ>MVA0l!1EY^khrqV}5y&Lv;>0YnBD5BgNRkXB5r|%oFvTwU@M*
zNnDora1C+ews&glY_P!@z>iJz_EJx`6k4mxDmqtOYi*kWyF?iEIIF)c2hpsK1`hSV
zsDh@@?clMRBx=3z5{;&?5H&^AFYPCeCeRZuqn-fM0<z~fU_kA4a~(TDjl0*HVTyF|
zKv8V4HDnKOEW<6s>=Br*EJM?VAb2UBqE`!5bVuaZnLxeczrdDYhVd~?Y=rUm;bV~$
z&Ky?4eF6PlK=+3A6qW^h{Q>;&LP3Wfj8`mk$R30j(R~4aRxL7+tZvYi7+w^$@1IUA
z>pnrf4b>Y6@e*uGxKEZP^HMEeJ*||>r}4cx*SQ$y@Qvxw;IE&3zeE83z}tZT3w;iI
CI4f=d

literal 0
HcmV?d00001


From abd99444c586aaa15de6e6cf7d1a25222e9bc8c2 Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 19 Apr 2023 14:08:14 -0400
Subject: [PATCH 4/5] Formatting and coverage.

---
 src/hipscat_import/catalog/map_reduce.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hipscat_import/catalog/map_reduce.py
index 91b3d7da..4e1d715e 100644
--- a/src/hipscat_import/catalog/map_reduce.py
+++ b/src/hipscat_import/catalog/map_reduce.py
@@ -32,18 +32,18 @@ def _get_pixel_directory(cache_path: FilePointer, pixel: np.int64):
 
 def _has_named_index(dataframe):
     """Heuristic to determine if a dataframe has some meaningful index.
-    
+
     This will reject dataframes with no index name for a single index,
-    or empty names for multi-index ([] or [None]).    
+    or empty names for multi-index ([] or [None]).
     """
     if dataframe.index.name is not None:
         ## Single index with a given name.
         return True
-    if len(dataframe.index.names) == 0:
+    if len(dataframe.index.names) == 0 or all(
+        name is None for name in dataframe.index.names
+    ):
         return False
-    if dataframe.index.names[0] is not None:
-        return True
-    return False
+    return True
 
 
 def map_to_pixels(

From 22f8bca971029b0f57dd7186dfb11706041fb64d Mon Sep 17 00:00:00 2001
From: delucchi-cmu <delucchi@andrew.cmu.edu>
Date: Wed, 19 Apr 2023 15:18:56 -0400
Subject: [PATCH 5/5] Expand docstring

---
 src/hipscat_import/catalog/map_reduce.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/src/hipscat_import/catalog/map_reduce.py b/src/hipscat_import/catalog/map_reduce.py
index 4e1d715e..b2856453 100644
--- a/src/hipscat_import/catalog/map_reduce.py
+++ b/src/hipscat_import/catalog/map_reduce.py
@@ -34,7 +34,7 @@ def _has_named_index(dataframe):
     """Heuristic to determine if a dataframe has some meaningful index.
 
     This will reject dataframes with no index name for a single index,
-    or empty names for multi-index ([] or [None]).
+    or empty names for multi-index (e.g. [] or [None]).
     """
     if dataframe.index.name is not None:
         ## Single index with a given name.
@@ -150,6 +150,24 @@ def reduce_pixel_shards(
 ):
     """Reduce sharded source pixels into destination pixels.
 
+    In addition to combining multiple shards of data into a single
+    parquet file, this method will add a few new columns:
+
+        - `Norder` - the healpix order for the pixel
+        - `Dir` - the directory part, corresponding to the pixel
+        ` `Npix` - the healpix pixel
+        - `_hipscat_index` - optional - a spatially-correlated
+            64-bit index field.
+
+    Notes on `_hipscat_index`:
+
+        - if we generate the field, we will promote any previous
+            *named* pandas index field(s) to a column with
+            that name.
+        - see `hipscat.pixel_math.hipscat_id`
+            for more in-depth discussion of this field.
+
+
     Args:
         cache_path (str): where to read intermediate files
         origin_pixel_numbers (list[int]): high order pixels, with object
@@ -160,8 +178,12 @@ def reduce_pixel_shards(
             for the catalog's final pixel
         output_path (str): where to write the final catalog pixel data
         id_column (str): column for survey identifier, or other sortable column
+        add_hipscat_index (bool): should we add a _hipscat_index column to
+            the resulting parquet file?
         delete_input_files (bool): should we delete the intermediate files
             used as input for this method.
+        use_schema_file (str): use the parquet schema from the indicated
+            parquet file.
 
     Raises:
         ValueError: if the number of rows written doesn't equal provided