apache · alamb · Aug 31, 2024 · Aug 28, 2024 · Aug 28, 2024 · Aug 28, 2024
diff --git a/arrow-pyarrow-integration-testing/tests/test_sql.py b/arrow-pyarrow-integration-testing/tests/test_sql.py
@@ -476,6 +476,35 @@ def test_tensor_array():
 
     del b
 
+
+def test_empty_recordbatch_with_row_count():
+    """
+    The result of a `count` on a dataset is a RecordBatch with no columns but with `num_rows` set
+    """
+
+    # If you know how to create an empty RecordBatch with a specific number of rows, please share
+    # Create an empty schema with no fields
+    schema = pa.schema([])
+
+    # Create an empty RecordBatch with 0 columns
+    record_batch = pa.RecordBatch.from_arrays([], schema=schema)
+
+    # Set the desired number of rows by creating a table and slicing
+    num_rows = 5  # Replace with your desired number of rows
+    empty_table = pa.Table.from_batches([record_batch]).slice(0, num_rows)
+
+    # Get the first batch from the table which will have the desired number of rows
+    batch = empty_table.to_batches()[0]
+
+    b = rust.round_trip_record_batch(batch)
+    assert b == batch
+    assert b.schema == batch.schema
+    assert b.schema.metadata == batch.schema.metadata
+
+    assert b.num_rows == num_rows
+
+    del b
+
 def test_record_batch_reader():
     """
     Python -> Rust -> Python

diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs
@@ -59,7 +59,7 @@ use std::convert::{From, TryFrom};
 use std::ptr::{addr_of, addr_of_mut};
 use std::sync::Arc;
 
-use arrow_array::{RecordBatchIterator, RecordBatchReader, StructArray};
+use arrow_array::{RecordBatchIterator, RecordBatchOptions, RecordBatchReader, StructArray};
 use pyo3::exceptions::{PyTypeError, PyValueError};
 use pyo3::ffi::Py_uintptr_t;
 use pyo3::import_exception;
@@ -333,6 +333,15 @@ impl<T: ToPyArrow> ToPyArrow for Vec<T> {
 
 impl FromPyArrow for RecordBatch {
     fn from_pyarrow_bound(value: &Bound<PyAny>) -> PyResult<Self> {
+        // Technically `num_rows` is an attribute on `pyarrow.RecordBatch`
+        // If other python classes can use the PyCapsule interface and do not have this attribute,
+        // then this will have no effect.
+        let row_count = value
+            .getattr("num_rows")
+            .ok()
+            .and_then(|x| x.extract().ok());
+        let options = RecordBatchOptions::default().with_row_count(row_count);
+
         // Newer versions of PyArrow as well as other libraries with Arrow data implement this
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
@@ -371,7 +380,7 @@ impl FromPyArrow for RecordBatch {
                 0,
                 "Cannot convert nullable StructArray to RecordBatch, see StructArray documentation"
             );
-            return RecordBatch::try_new(schema, columns).map_err(to_py_err);
+            return RecordBatch::try_new_with_options(schema, columns, &options).map_err(to_py_err);
         }
 
         validate_class("RecordBatch", value)?;
@@ -386,7 +395,8 @@ impl FromPyArrow for RecordBatch {
             .map(|a| Ok(make_array(ArrayData::from_pyarrow_bound(&a)?)))
             .collect::<PyResult<_>>()?;
 
-        let batch = RecordBatch::try_new(schema, arrays).map_err(to_py_err)?;
+        let batch =
+            RecordBatch::try_new_with_options(schema, arrays, &options).map_err(to_py_err)?;
         Ok(batch)
     }
 }