diff --git a/ci/scripts/csharp_test.sh b/ci/scripts/csharp_test.sh
index 9e4e35dd40d12..e4bed4e35e3d7 100755
--- a/ci/scripts/csharp_test.sh
+++ b/ci/scripts/csharp_test.sh
@@ -21,6 +21,17 @@ set -ex
source_dir=${1}/csharp
+# Python and PyArrow are required for C Data Interface tests.
+if [ -z "${PYTHON}" ]; then
+ if type python3 > /dev/null 2>&1; then
+ export PYTHON=python3
+ else
+ export PYTHON=python
+ fi
+fi
+${PYTHON} -m pip install pyarrow find-libpython
+export PYTHONNET_PYDLL=$(${PYTHON} -m find_libpython)
+
pushd ${source_dir}
dotnet test
for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchema.cs b/csharp/src/Apache.Arrow/C/CArrowSchema.cs
new file mode 100644
index 0000000000000..af01247800655
--- /dev/null
+++ b/csharp/src/Apache.Arrow/C/CArrowSchema.cs
@@ -0,0 +1,124 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+using System;
+using System.Runtime.InteropServices;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.C
+{
+ ///
+ /// An Arrow C Data Interface Schema, which represents a type, field, or schema.
+ ///
+ ///
+ /// This is used to export , , or
+ /// to other languages. It matches the layout of the
+ /// ArrowSchema struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h.
+ ///
+ [StructLayout(LayoutKind.Sequential)]
+ public unsafe struct CArrowSchema
+ {
+ public byte* format;
+ public byte* name;
+ public byte* metadata;
+ public long flags;
+ public long n_children;
+ public CArrowSchema** children;
+ public CArrowSchema* dictionary;
+ public delegate* unmanaged[Stdcall] release;
+ public void* private_data;
+
+ ///
+ /// Allocate and zero-initialize an unmanaged pointer of this type.
+ ///
+ ///
+ /// This pointer must later be freed by .
+ ///
+ public static CArrowSchema* Create()
+ {
+ var ptr = (CArrowSchema*)Marshal.AllocHGlobal(sizeof(CArrowSchema));
+
+ ptr->format = null;
+ ptr->name = null;
+ ptr->metadata = null;
+ ptr->flags = 0;
+ ptr->n_children = 0;
+ ptr->children = null;
+ ptr->dictionary = null;
+ ptr->release = null;
+ ptr->private_data = null;
+
+ return ptr;
+ }
+
+ ///
+ /// Free a pointer that was allocated in .
+ ///
+ ///
+ /// Do not call this on a pointer that was allocated elsewhere.
+ ///
+ public static void Free(CArrowSchema* schema)
+ {
+ if (schema->release != null)
+ {
+ // Call release if not already called.
+ schema->release(schema);
+ }
+ Marshal.FreeHGlobal((IntPtr)schema);
+ }
+
+
+ ///
+ /// For dictionary-encoded types, whether the ordering of dictionary indices is semantically meaningful.
+ ///
+ public const long ArrowFlagDictionaryOrdered = 1;
+ ///
+ /// Whether this field is semantically nullable (regardless of whether it actually has null values)
+ ///
+ public const long ArrowFlagNullable = 2;
+ ///
+ /// For map types, whether the keys within each map value are sorted.
+ ///
+ public const long ArrowFlagMapKeysSorted = 4;
+
+ ///
+ /// Get the value of a particular flag.
+ ///
+ ///
+ /// Known valid flags are ,
+ /// , and .
+ ///
+ public readonly bool GetFlag(long flag)
+ {
+ return (flags & flag) == flag;
+ }
+
+ internal readonly CArrowSchema* GetChild(long i)
+ {
+ if ((ulong)i >= (ulong)n_children)
+ {
+ throw new ArgumentOutOfRangeException("Child index out of bounds.");
+ }
+ if (children == null)
+ {
+ throw new ArgumentOutOfRangeException($"Child index '{i}' out of bounds.");
+ }
+
+ return children[i];
+ }
+ }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
new file mode 100644
index 0000000000000..5c517f418503a
--- /dev/null
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -0,0 +1,278 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Runtime.InteropServices;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.C
+{
+ public static class CArrowSchemaExporter
+ {
+ ///
+ /// Export a type to a .
+ ///
+ /// The datatype to export
+ /// An allocated but uninitialized CArrowSchema pointer.
+ ///
+ ///
+ /// CArrowSchema* exportPtr = CArrowSchema.Create();
+ /// CArrowSchemaExporter.ExportType(dataType, exportPtr);
+ /// foreign_import_function(exportPtr);
+ /// CArrowSchema.Free(exportPtr);
+ ///
+ ///
+ public static unsafe void ExportType(IArrowType datatype, CArrowSchema* schema)
+ {
+ if (datatype == null)
+ {
+ throw new ArgumentNullException(nameof(datatype));
+ }
+ if (schema == null)
+ {
+ throw new ArgumentNullException(nameof(schema));
+ }
+ if (schema->release != null)
+ {
+ throw new ArgumentException("Cannot export schema to a struct that is already initialized.");
+ }
+
+ schema->format = StringUtil.ToCStringUtf8(GetFormat(datatype));
+ schema->name = null;
+ schema->metadata = null;
+ schema->flags = GetFlags(datatype);
+
+ schema->children = ConstructChildren(datatype, out var numChildren);
+ schema->n_children = numChildren;
+
+ schema->dictionary = ConstructDictionary(datatype);
+
+ schema->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(
+ ReleaseCArrowSchema);
+
+ schema->private_data = null;
+ }
+
+ ///
+ /// Export a field to a .
+ ///
+ /// The field to export
+ /// An allocated but uninitialized CArrowSchema pointer.
+ ///
+ ///
+ /// CArrowSchema* exportPtr = CArrowSchema.Create();
+ /// CArrowSchemaExporter.ExportType(field, exportPtr);
+ /// foreign_import_function(exportPtr);
+ /// CArrowSchema.Free(exportPtr);
+ ///
+ ///
+ public static unsafe void ExportField(Field field, CArrowSchema* schema)
+ {
+ ExportType(field.DataType, schema);
+ schema->name = StringUtil.ToCStringUtf8(field.Name);
+ // TODO: field metadata
+ schema->metadata = null;
+ schema->flags = GetFlags(field.DataType, field.IsNullable);
+ }
+
+ ///
+ /// Export a schema to a .
+ ///
+ /// The schema to export
+ /// An allocated but uninitialized CArrowSchema pointer.
+ ///
+ ///
+ /// CArrowSchema* exportPtr = CArrowSchema.Create();
+ /// CArrowSchemaExporter.ExportType(schema, exportPtr);
+ /// foreign_import_function(exportPtr);
+ /// CArrowSchema.Free(exportPtr);
+ ///
+ ///
+ public static unsafe void ExportSchema(Schema schema, CArrowSchema* out_schema)
+ {
+ var structType = new StructType(schema.FieldsList);
+ // TODO: top-level metadata
+ ExportType(structType, out_schema);
+ }
+
+ private static char FormatTimeUnit(TimeUnit unit) => unit switch
+ {
+ TimeUnit.Second => 's',
+ TimeUnit.Millisecond => 'm',
+ TimeUnit.Microsecond => 'u',
+ TimeUnit.Nanosecond => 'n',
+ _ => throw new InvalidDataException($"Unsupported time unit for export: {unit}"),
+ };
+
+ private static string GetFormat(IArrowType datatype)
+ {
+ switch (datatype)
+ {
+ case NullType _: return "n";
+ case BooleanType _: return "b";
+ // Integers
+ case Int8Type _: return "c";
+ case UInt8Type _: return "C";
+ case Int16Type _: return "s";
+ case UInt16Type _: return "S";
+ case Int32Type _: return "i";
+ case UInt32Type _: return "I";
+ case Int64Type _: return "l";
+ case UInt64Type _: return "L";
+ // Floats
+ case HalfFloatType _: return "e";
+ case FloatType _: return "f";
+ case DoubleType _: return "g";
+ // Decimal
+ case Decimal128Type decimalType:
+ return $"d:{decimalType.Precision},{decimalType.Scale}";
+ case Decimal256Type decimalType:
+ return $"d:{decimalType.Precision},{decimalType.Scale},256";
+ // Binary
+ case BinaryType _: return "z";
+ case StringType _: return "u";
+ case FixedSizeBinaryType binaryType:
+ return $"w:{binaryType.ByteWidth}";
+ // Date
+ case Date32Type _: return "tdD";
+ case Date64Type _: return "tdm";
+ // Time
+ case Time32Type timeType:
+ return String.Format("tt{0}", FormatTimeUnit(timeType.Unit));
+ case Time64Type timeType:
+ // Same prefix as Time32, but allowed time units are different.
+ return String.Format("tt{0}", FormatTimeUnit(timeType.Unit));
+ // Timestamp
+ case TimestampType timestampType:
+ return String.Format("ts{0}:{1}", FormatTimeUnit(timestampType.Unit), timestampType.Timezone);
+ // Nested
+ case ListType _: return "+l";
+ case StructType _: return "+s";
+ // Dictionary
+ case DictionaryType dictionaryType:
+ return GetFormat(dictionaryType.IndexType);
+ default: throw new NotImplementedException($"Exporting {datatype.Name} not implemented");
+ };
+ }
+
+ private static long GetFlags(IArrowType datatype, bool nullable = true)
+ {
+ long flags = 0;
+
+ if (nullable)
+ {
+ flags |= CArrowSchema.ArrowFlagNullable;
+ }
+
+ if (datatype is DictionaryType dictionaryType)
+ {
+ if (dictionaryType.Ordered)
+ {
+ flags |= CArrowSchema.ArrowFlagDictionaryOrdered;
+ }
+ }
+
+ if (datatype.TypeId == ArrowTypeId.Map)
+ {
+ // TODO: when we implement MapType, make sure to set the KEYS_SORTED flag.
+ throw new NotSupportedException("Exporting MapTypes is not supported.");
+ }
+
+ return flags;
+ }
+
+ private static unsafe CArrowSchema** ConstructChildren(IArrowType datatype, out long numChildren)
+ {
+ if (datatype is NestedType nestedType)
+ {
+ IReadOnlyList fields = nestedType.Fields;
+ int numFields = fields.Count;
+ numChildren = numFields;
+ if (numFields == 0)
+ {
+ throw new NotSupportedException("Exporting nested data types with zero children.");
+ };
+
+ var pointerList = (CArrowSchema**)Marshal.AllocHGlobal(numFields * IntPtr.Size);
+
+ for (var i = 0; i < numChildren; i++)
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ ExportField(fields[i], cSchema);
+ pointerList[i] = cSchema;
+ }
+
+ return pointerList;
+
+ }
+ else
+ {
+ numChildren = 0;
+ return null;
+ }
+ }
+
+ private static unsafe CArrowSchema* ConstructDictionary(IArrowType datatype)
+ {
+ if (datatype is DictionaryType dictType)
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ ExportType(dictType.ValueType, cSchema);
+ return cSchema;
+ }
+ else
+ {
+ return null;
+ }
+ }
+
+ private static unsafe void ReleaseCArrowSchema(CArrowSchema* schema)
+ {
+ if (schema == null) return;
+ if (schema->release == null) return;
+
+ Marshal.FreeHGlobal((IntPtr)schema->format);
+ Marshal.FreeHGlobal((IntPtr)schema->name);
+ Marshal.FreeHGlobal((IntPtr)schema->metadata);
+ schema->format = null;
+ schema->name = null;
+ schema->metadata = null;
+
+ if (schema->n_children > 0)
+ {
+ for (int i = 0; i < schema->n_children; i++)
+ {
+ CArrowSchema.Free(schema->GetChild(i));
+ }
+ Marshal.FreeHGlobal((IntPtr)schema->children);
+ }
+
+ if (schema->dictionary != null)
+ {
+ CArrowSchema.Free(schema->dictionary);
+ }
+
+ schema->flags = 0;
+ schema->n_children = 0;
+ schema->dictionary = null;
+ schema->children = null;
+ schema->release = null;
+ }
+ }
+}
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
new file mode 100644
index 0000000000000..8e0b5e21b2383
--- /dev/null
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -0,0 +1,301 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow.C
+{
+ public static class CArrowSchemaImporter
+ {
+ ///
+ /// Import C pointer as an .
+ ///
+ ///
+ /// This will call the release callback on the passed struct, even if
+ /// this function fails.
+ ///
+ ///
+ /// Typically, you will allocate an uninitialized CArrowSchema pointer,
+ /// pass that to external function, and then use this method to import
+ /// the result.
+ ///
+ ///
+ /// CArrowSchema* importedPtr = CArrowSchema.Create();
+ /// foreign_export_function(importedPtr);
+ /// ArrowType importedType = CArrowSchemaImporter.ImportType(importedPtr);
+ /// CArrowSchema.Free(importedPtr);
+ ///
+ ///
+ public static unsafe ArrowType ImportType(CArrowSchema* ptr)
+ {
+ using var importedType = new ImportedArrowSchema(ptr);
+ return importedType.GetAsType();
+ }
+
+ ///
+ /// Import C pointer as a .
+ ///
+ ///
+ /// This will call the release callback on the passed struct, even if
+ /// this function fails.
+ ///
+ ///
+ /// Typically, you will allocate an uninitialized CArrowSchema pointer,
+ /// pass that to external function, and then use this method to import
+ /// the result.
+ ///
+ ///
+ /// CArrowSchema* importedPtr = CArrowSchema.Create();
+ /// foreign_export_function(importedPtr);
+ /// Field importedField = CArrowSchemaImporter.ImportField(importedPtr);
+ /// CArrowSchema.Free(importedPtr);
+ ///
+ ///
+ public static unsafe Field ImportField(CArrowSchema* ptr)
+ {
+ using var importedField = new ImportedArrowSchema(ptr);
+ return importedField.GetAsField();
+ }
+
+ ///
+ /// Import C pointer as a .
+ ///
+ ///
+ /// This will call the release callback on the passed struct, even if
+ /// this function fails.
+ ///
+ ///
+ /// Typically, you will allocate an uninitialized CArrowSchema pointer,
+ /// pass that to external function, and then use this method to import
+ /// the result.
+ ///
+ ///
+ /// CArrowSchema* importedPtr = CArrowSchema.Create();
+ /// foreign_export_function(importedPtr);
+ /// Field importedSchema = CArrowSchemaImporter.ImportSchema(importedPtr);
+ /// CArrowSchema.Free(importedPtr);
+ ///
+ ///
+ public static unsafe Schema ImportSchema(CArrowSchema* ptr)
+ {
+ using var importedSchema = new ImportedArrowSchema(ptr);
+ return importedSchema.GetAsSchema();
+ }
+
+ private sealed unsafe class ImportedArrowSchema : IDisposable
+ {
+ private readonly CArrowSchema* _cSchema;
+ private readonly bool _isRoot;
+
+ public ImportedArrowSchema(CArrowSchema* cSchema)
+ {
+ if (cSchema == null)
+ {
+ throw new ArgumentException("Passed null pointer for cSchema.");
+ }
+ _cSchema = cSchema;
+ if (_cSchema->release == null)
+ {
+ throw new ArgumentException("Tried to import a schema that has already been released.");
+ }
+ _isRoot = true;
+ }
+
+ public ImportedArrowSchema(CArrowSchema* handle, bool isRoot) : this(handle)
+ {
+ _isRoot = isRoot;
+ }
+
+ public void Dispose()
+ {
+ // We only call release on a root-level schema, not child ones.
+ if (_isRoot && _cSchema->release != null)
+ {
+ _cSchema->release(_cSchema);
+ }
+ }
+
+ public ArrowType GetAsType()
+ {
+ var format = StringUtil.PtrToStringUtf8(_cSchema->format);
+ if (_cSchema->dictionary != null)
+ {
+ ArrowType indicesType = format switch
+ {
+ "c" => Int8Type.Default,
+ "C" => UInt8Type.Default,
+ "s" => Int16Type.Default,
+ "S" => UInt16Type.Default,
+ "i" => Int32Type.Default,
+ "I" => UInt32Type.Default,
+ "l" => Int64Type.Default,
+ "L" => UInt64Type.Default,
+ _ => throw new InvalidDataException($"Indices must be an integer, but got format string {format}"),
+ };
+
+ var dictionarySchema = new ImportedArrowSchema(_cSchema->dictionary, isRoot: false);
+ ArrowType dictionaryType = dictionarySchema.GetAsType();
+
+ bool ordered = _cSchema->GetFlag(CArrowSchema.ArrowFlagDictionaryOrdered);
+
+ return new DictionaryType(indicesType, dictionaryType, ordered);
+ }
+
+ // Special handling for nested types
+ if (format == "+l")
+ {
+ if (_cSchema->n_children != 1)
+ {
+ throw new InvalidDataException("Expected list type to have exactly one child.");
+ }
+ ImportedArrowSchema childSchema;
+ if (_cSchema->GetChild(0) == null)
+ {
+ throw new InvalidDataException("Expected list type child to be non-null.");
+ }
+ childSchema = new ImportedArrowSchema(_cSchema->GetChild(0), isRoot: false);
+
+ Field childField = childSchema.GetAsField();
+
+ return new ListType(childField);
+ }
+ else if (format == "+s")
+ {
+ var child_schemas = new ImportedArrowSchema[_cSchema->n_children];
+
+ for (int i = 0; i < _cSchema->n_children; i++)
+ {
+ if (_cSchema->GetChild(i) == null)
+ {
+ throw new InvalidDataException("Expected struct type child to be non-null.");
+ }
+ child_schemas[i] = new ImportedArrowSchema(_cSchema->GetChild(i), isRoot: false);
+ }
+
+
+ List childFields = child_schemas.Select(schema => schema.GetAsField()).ToList();
+
+ return new StructType(childFields);
+ }
+ // TODO: Map type and large list type
+
+ // Decimals
+ if (format.StartsWith("d:"))
+ {
+ bool is256 = format.EndsWith(",256");
+ string parameters_part = format.Remove(0, 2);
+ if (is256) parameters_part.Substring(0, parameters_part.Length - 5);
+ string[] parameters = parameters_part.Split(',');
+ int precision = Int32.Parse(parameters[0]);
+ int scale = Int32.Parse(parameters[1]);
+ if (is256)
+ {
+ return new Decimal256Type(precision, scale);
+ }
+ else
+ {
+ return new Decimal128Type(precision, scale);
+ }
+ }
+
+ // Timestamps
+ if (format.StartsWith("ts"))
+ {
+ TimeUnit timeUnit = format[2] switch
+ {
+ 's' => TimeUnit.Second,
+ 'm' => TimeUnit.Millisecond,
+ 'u' => TimeUnit.Microsecond,
+ 'n' => TimeUnit.Nanosecond,
+ _ => throw new InvalidDataException($"Unsupported time unit for import: {format[2]}"),
+ };
+
+ string timezone = format.Split(':')[1];
+ return new TimestampType(timeUnit, timezone);
+ }
+
+ // Fixed-width binary
+ if (format.StartsWith("w:"))
+ {
+ int width = Int32.Parse(format.Substring(2));
+ return new FixedSizeBinaryType(width);
+ }
+
+ return format switch
+ {
+ // Primitives
+ "n" => NullType.Default,
+ "b" => BooleanType.Default,
+ "c" => Int8Type.Default,
+ "C" => UInt8Type.Default,
+ "s" => Int16Type.Default,
+ "S" => UInt16Type.Default,
+ "i" => Int32Type.Default,
+ "I" => UInt32Type.Default,
+ "l" => Int64Type.Default,
+ "L" => UInt64Type.Default,
+ "e" => HalfFloatType.Default,
+ "f" => FloatType.Default,
+ "g" => DoubleType.Default,
+ // Binary data
+ "z" => BinaryType.Default,
+ //"Z" => new LargeBinaryType() // Not yet implemented
+ "u" => StringType.Default,
+ //"U" => new LargeStringType(), // Not yet implemented
+ // Date and time
+ "tdD" => Date32Type.Default,
+ "tdm" => Date64Type.Default,
+ "tts" => new Time32Type(TimeUnit.Second),
+ "ttm" => new Time32Type(TimeUnit.Millisecond),
+ "ttu" => new Time64Type(TimeUnit.Microsecond),
+ "ttn" => new Time64Type(TimeUnit.Nanosecond),
+ // TODO: duration not yet implemented
+ "tiM" => new IntervalType(IntervalUnit.YearMonth),
+ "tiD" => new IntervalType(IntervalUnit.DayTime),
+ //"tin" => new IntervalType(IntervalUnit.MonthDayNanosecond), // Not yet implemented
+ _ => throw new NotSupportedException("Data type is not yet supported in import.")
+ };
+ }
+
+ public Field GetAsField()
+ {
+ string name = StringUtil.PtrToStringUtf8(_cSchema->name);
+ string fieldName = string.IsNullOrEmpty(name) ? "" : name;
+
+ bool nullable = _cSchema->GetFlag(CArrowSchema.ArrowFlagNullable);
+
+ return new Field(fieldName, GetAsType(), nullable);
+ }
+
+ public Schema GetAsSchema()
+ {
+ ArrowType fullType = GetAsType();
+ if (fullType is StructType structType)
+ {
+ return new Schema(structType.Fields, default);
+ }
+ else
+ {
+ throw new ArgumentException("Imported type is not a struct type, so it cannot be converted to a schema.");
+ }
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/csharp/src/Apache.Arrow/C/StringUtil.cs b/csharp/src/Apache.Arrow/C/StringUtil.cs
new file mode 100644
index 0000000000000..9c16493fbb312
--- /dev/null
+++ b/csharp/src/Apache.Arrow/C/StringUtil.cs
@@ -0,0 +1,63 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.InteropServices;
+
+// Adapted from:
+// https://github.com/G-Research/ParquetSharp/blob/467d99298fb5a5b9d5935b9c8dbde95e63954dd3/csharp/StringUtil.cs
+
+namespace Apache.Arrow.C
+{
+
+ internal static class StringUtil
+ {
+ public static unsafe byte* ToCStringUtf8(string str)
+ {
+ var utf8 = System.Text.Encoding.UTF8;
+ int byteCount = utf8.GetByteCount(str);
+ byte* byteArray = (byte*)Marshal.AllocHGlobal(byteCount + 1);
+
+ fixed (char* chars = str)
+ {
+ utf8.GetBytes(chars, str.Length, byteArray, byteCount);
+ }
+
+ // Need to make sure it is null-terminated.
+ byteArray[byteCount] = 0;
+
+ return byteArray;
+ }
+
+ public static unsafe string PtrToStringUtf8(byte* ptr)
+ {
+#if NETSTANDARD2_1_OR_GREATER
+ return Marshal.PtrToStringUTF8(ptr);
+#else
+ if (ptr == null)
+ {
+ return null;
+ }
+
+ int length;
+ for (length = 0; ptr[length] != '\0'; ++length)
+ {
+ }
+
+ return System.Text.Encoding.UTF8.GetString(ptr, length);
+#endif
+ }
+ }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index a34eaca7a6c5c..cdbfe479470a4 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -13,6 +13,8 @@
all
runtime; build; native; contentfiles; analyzers
+
+
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
new file mode 100644
index 0000000000000..82e0f37dd7278
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -0,0 +1,329 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.C;
+using Apache.Arrow.Types;
+using Python.Runtime;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class CDataSchemaPythonTest
+ {
+ public CDataSchemaPythonTest()
+ {
+ bool inCIJob = Environment.GetEnvironmentVariable("GITHUB_ACTIONS") == "true";
+ bool pythonSet = Environment.GetEnvironmentVariable("PYTHONNET_PYDLL") != null;
+ // We only skip if this is not in CI
+ Skip.If(!pythonSet && !inCIJob, "PYTHONNET_PYDLL not set; skipping C Data Interface tests.");
+
+ PythonEngine.Initialize();
+ }
+
+ private static Schema GetTestSchema()
+ {
+ using (Py.GIL())
+ {
+ var schema = new Schema.Builder()
+ .Field(f => f.Name("null").DataType(NullType.Default).Nullable(true))
+ .Field(f => f.Name("bool").DataType(BooleanType.Default).Nullable(true))
+ .Field(f => f.Name("i8").DataType(Int8Type.Default).Nullable(true))
+ .Field(f => f.Name("u8").DataType(UInt8Type.Default).Nullable(true))
+ .Field(f => f.Name("i16").DataType(Int16Type.Default).Nullable(true))
+ .Field(f => f.Name("u16").DataType(UInt16Type.Default).Nullable(true))
+ .Field(f => f.Name("i32").DataType(Int32Type.Default).Nullable(true))
+ .Field(f => f.Name("u32").DataType(UInt32Type.Default).Nullable(true))
+ .Field(f => f.Name("i64").DataType(Int64Type.Default).Nullable(true))
+ .Field(f => f.Name("u64").DataType(UInt64Type.Default).Nullable(true))
+
+ .Field(f => f.Name("f16").DataType(HalfFloatType.Default).Nullable(true))
+ .Field(f => f.Name("f32").DataType(FloatType.Default).Nullable(true))
+ .Field(f => f.Name("f64").DataType(DoubleType.Default).Nullable(true))
+
+ .Field(f => f.Name("decimal128_19_3").DataType(new Decimal128Type(19, 3)).Nullable(true))
+ .Field(f => f.Name("decimal256_19_3").DataType(new Decimal256Type(19, 3)).Nullable(true))
+ .Field(f => f.Name("decimal256_40_2").DataType(new Decimal256Type(40, 2)).Nullable(false))
+
+ .Field(f => f.Name("binary").DataType(BinaryType.Default).Nullable(false))
+ .Field(f => f.Name("string").DataType(StringType.Default).Nullable(false))
+ .Field(f => f.Name("fw_binary_10").DataType(new FixedSizeBinaryType(10)).Nullable(false))
+
+ .Field(f => f.Name("date32").DataType(Date32Type.Default).Nullable(false))
+ .Field(f => f.Name("date64").DataType(Date64Type.Default).Nullable(false))
+ .Field(f => f.Name("time32_s").DataType(new Time32Type(TimeUnit.Second)).Nullable(false))
+ .Field(f => f.Name("time32_ms").DataType(new Time32Type(TimeUnit.Millisecond)).Nullable(false))
+ .Field(f => f.Name("time64_us").DataType(new Time64Type(TimeUnit.Microsecond)).Nullable(false))
+ .Field(f => f.Name("time64_ns").DataType(new Time64Type(TimeUnit.Nanosecond)).Nullable(false))
+
+ .Field(f => f.Name("timestamp_ns").DataType(new TimestampType(TimeUnit.Nanosecond, "")).Nullable(false))
+ .Field(f => f.Name("timestamp_us").DataType(new TimestampType(TimeUnit.Microsecond, "")).Nullable(false))
+ .Field(f => f.Name("timestamp_us_paris").DataType(new TimestampType(TimeUnit.Microsecond, "Europe/Paris")).Nullable(true))
+
+ .Field(f => f.Name("list_string").DataType(new ListType(StringType.Default)).Nullable(false))
+ .Field(f => f.Name("list_list_i32").DataType(new ListType(new ListType(Int32Type.Default))).Nullable(false))
+
+ .Field(f => f.Name("dict_string").DataType(new DictionaryType(Int32Type.Default, StringType.Default, false)).Nullable(false))
+ .Field(f => f.Name("dict_string_ordered").DataType(new DictionaryType(Int32Type.Default, StringType.Default, true)).Nullable(false))
+ .Field(f => f.Name("list_dict_string").DataType(new ListType(new DictionaryType(Int32Type.Default, StringType.Default, false))).Nullable(false))
+
+ // Checking wider characters.
+ .Field(f => f.Name("hello 你好 😄").DataType(BooleanType.Default).Nullable(true))
+
+ .Build();
+ return schema;
+ }
+ }
+
+ private static IEnumerable GetPythonFields()
+ {
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ yield return pa.field("null", pa.GetAttr("null").Invoke(), true);
+ yield return pa.field("bool", pa.bool_(), true);
+ yield return pa.field("i8", pa.int8(), true);
+ yield return pa.field("u8", pa.uint8(), true);
+ yield return pa.field("i16", pa.int16(), true);
+ yield return pa.field("u16", pa.uint16(), true);
+ yield return pa.field("i32", pa.int32(), true);
+ yield return pa.field("u32", pa.uint32(), true);
+ yield return pa.field("i64", pa.int64(), true);
+ yield return pa.field("u64", pa.uint64(), true);
+
+ yield return pa.field("f16", pa.float16(), true);
+ yield return pa.field("f32", pa.float32(), true);
+ yield return pa.field("f64", pa.float64(), true);
+
+ yield return pa.field("decimal128_19_3", pa.decimal128(19, 3), true);
+ yield return pa.field("decimal256_19_3", pa.decimal256(19, 3), true);
+ yield return pa.field("decimal256_40_2", pa.decimal256(40, 2), false);
+
+ yield return pa.field("binary", pa.binary(), false);
+ yield return pa.field("string", pa.utf8(), false);
+ yield return pa.field("fw_binary_10", pa.binary(10), false);
+
+ yield return pa.field("date32", pa.date32(), false);
+ yield return pa.field("date64", pa.date64(), false);
+ yield return pa.field("time32_s", pa.time32("s"), false);
+ yield return pa.field("time32_ms", pa.time32("ms"), false);
+ yield return pa.field("time64_us", pa.time64("us"), false);
+ yield return pa.field("time64_ns", pa.time64("ns"), false);
+
+ yield return pa.field("timestamp_ns", pa.timestamp("ns"), false);
+ yield return pa.field("timestamp_us", pa.timestamp("us"), false);
+ yield return pa.field("timestamp_us_paris", pa.timestamp("us", "Europe/Paris"), true);
+
+ yield return pa.field("list_string", pa.list_(pa.utf8()), false);
+ yield return pa.field("list_list_i32", pa.list_(pa.list_(pa.int32())), false);
+
+ yield return pa.field("dict_string", pa.dictionary(pa.int32(), pa.utf8(), false), false);
+ yield return pa.field("dict_string_ordered", pa.dictionary(pa.int32(), pa.utf8(), true), false);
+ yield return pa.field("list_dict_string", pa.list_(pa.dictionary(pa.int32(), pa.utf8(), false)), false);
+
+ yield return pa.field("hello 你好 😄", pa.bool_(), true);
+ }
+ }
+
+ private static dynamic GetPythonSchema()
+ {
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ return pa.schema(GetPythonFields().ToList());
+ }
+ }
+
+ // Schemas created in Python, used in CSharp
+ [SkippableFact]
+ public unsafe void ImportType()
+ {
+ Schema schema = GetTestSchema();
+ IEnumerable pyFields = GetPythonFields();
+
+ foreach ((Field field, dynamic pyField) in schema.FieldsList
+ .Zip(pyFields))
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ using (Py.GIL())
+ {
+ dynamic pyDatatype = pyField.type;
+ // Python expects the pointer as an integer
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+ pyDatatype._export_to_c(longPtr);
+ }
+
+ var dataTypeComparer = new ArrayTypeComparer(field.DataType);
+ ArrowType importedType = CArrowSchemaImporter.ImportType(cSchema);
+ dataTypeComparer.Visit(importedType);
+
+ if (importedType is DictionaryType importedDictType)
+ {
+ Assert.Equal(((DictionaryType)field.DataType).Ordered, importedDictType.Ordered);
+ }
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [SkippableFact]
+ public unsafe void ImportField()
+ {
+ Schema schema = GetTestSchema();
+ IEnumerable pyFields = GetPythonFields();
+
+ foreach ((Field field, dynamic pyField) in schema.FieldsList
+ .Zip(pyFields))
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ using (Py.GIL())
+ {
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+ pyField._export_to_c(longPtr);
+ }
+
+ Field importedField = CArrowSchemaImporter.ImportField(cSchema);
+ FieldComparer.Compare(field, importedField);
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [SkippableFact]
+ public unsafe void ImportSchema()
+ {
+ Schema schema = GetTestSchema();
+ dynamic pySchema = GetPythonSchema();
+
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ using (Py.GIL())
+ {
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+ pySchema._export_to_c(longPtr);
+ }
+
+ Schema importedSchema = CArrowSchemaImporter.ImportSchema(cSchema);
+ SchemaComparer.Compare(schema, importedSchema);
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+
+
+ // Schemas created in CSharp, exported to Python
+ [SkippableFact]
+ public unsafe void ExportType()
+ {
+ Schema schema = GetTestSchema();
+ IEnumerable pyFields = GetPythonFields();
+
+ foreach ((Field field, dynamic pyField) in schema.FieldsList
+ .Zip(pyFields))
+ {
+ IArrowType datatype = field.DataType;
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportType(datatype, cSchema);
+
+ // For Python, we need to provide the pointer
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ dynamic expectedPyType = pyField.type;
+ dynamic exportedPyType = pa.DataType._import_from_c(longPtr);
+ Assert.True(exportedPyType == expectedPyType);
+
+ if (pa.types.is_dictionary(exportedPyType))
+ {
+ Assert.Equal(expectedPyType.ordered, exportedPyType.ordered);
+ }
+ }
+
+ // Python should have called release once `exportedPyType` went out-of-scope.
+ Assert.True(cSchema->release == null);
+ Assert.True(cSchema->format == null);
+ Assert.Equal(0, cSchema->flags);
+ Assert.Equal(0, cSchema->n_children);
+ Assert.True(cSchema->dictionary == null);
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [SkippableFact]
+ public unsafe void ExportField()
+ {
+ Schema schema = GetTestSchema();
+ IEnumerable pyFields = GetPythonFields();
+
+ foreach ((Field field, dynamic pyField) in schema.FieldsList
+ .Zip(pyFields))
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportField(field, cSchema);
+
+ // For Python, we need to provide the pointer
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ dynamic exportedPyField = pa.Field._import_from_c(longPtr);
+ Assert.True(exportedPyField == pyField);
+ }
+
+ // Python should have called release once `exportedPyField` went out-of-scope.
+ Assert.True(cSchema->name == null);
+ Assert.True(cSchema->release == null);
+ Assert.True(cSchema->format == null);
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [SkippableFact]
+ public unsafe void ExportSchema()
+ {
+ Schema schema = GetTestSchema();
+ dynamic pySchema = GetPythonSchema();
+
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportSchema(schema, cSchema);
+
+ // For Python, we need to provide the pointer
+ long longPtr = ((IntPtr)cSchema).ToInt64();
+
+ using (Py.GIL())
+ {
+ dynamic pa = Py.Import("pyarrow");
+ dynamic exportedPySchema = pa.Schema._import_from_c(longPtr);
+ Assert.True(exportedPySchema == pySchema);
+ }
+
+ // Since we allocated, we are responsible for freeing the pointer.
+ CArrowSchema.Free(cSchema);
+ }
+ }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs
new file mode 100644
index 0000000000000..357a18816cacc
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs
@@ -0,0 +1,119 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.InteropServices;
+using Apache.Arrow.C;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class CDataSchemaTest
+ {
+ [Fact]
+ public unsafe void InitializeZeroed()
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ Assert.True(cSchema->format == null);
+ Assert.True(cSchema->name == null);
+ Assert.True(cSchema->metadata == null);
+ Assert.Equal(0, cSchema->flags);
+ Assert.Equal(0, cSchema->n_children);
+ Assert.True(cSchema->children == null);
+ Assert.True(cSchema->dictionary == null);
+ Assert.True(cSchema->release == null);
+ Assert.True(cSchema->private_data == null);
+
+ CArrowSchema.Free(cSchema);
+ }
+
+ [Fact]
+ public unsafe void FlagsSet()
+ {
+ // Non-nullable field
+ {
+ var nonNullField = new Field("non_null", Int32Type.Default, false);
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportField(nonNullField, cSchema);
+ Assert.False(cSchema->GetFlag(CArrowSchema.ArrowFlagNullable));
+ CArrowSchema.Free(cSchema);
+ }
+
+ // Nullable field
+ {
+ var nullableField = new Field("nullable", Int32Type.Default, true);
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportField(nullableField, cSchema);
+ Assert.True(cSchema->GetFlag(CArrowSchema.ArrowFlagNullable));
+ CArrowSchema.Free(cSchema);
+ }
+
+ // dictionary ordered
+ {
+ var orderedDictionary = new DictionaryType(Int32Type.Default, StringType.Default, true);
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportType(orderedDictionary, cSchema);
+ Assert.True(cSchema->GetFlag(CArrowSchema.ArrowFlagDictionaryOrdered));
+ CArrowSchema.Free(cSchema);
+ }
+
+ // dictionary unordered
+ {
+ var unorderedDictionary = new DictionaryType(Int32Type.Default, StringType.Default, false);
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportType(unorderedDictionary, cSchema);
+ Assert.False(cSchema->GetFlag(CArrowSchema.ArrowFlagDictionaryOrdered));
+ CArrowSchema.Free(cSchema);
+ }
+ }
+
+ [Fact]
+ public unsafe void CallsReleaseForValid()
+ {
+ CArrowSchema* cSchema = CArrowSchema.Create();
+ CArrowSchemaExporter.ExportType(Int32Type.Default, cSchema);
+ Assert.False(cSchema->release == null);
+ CArrowSchemaImporter.ImportType(cSchema);
+ Assert.True(cSchema->release == null);
+ CArrowSchema.Free(cSchema);
+ }
+
+ [Fact]
+ public unsafe void CallsReleaseForInvalid()
+ {
+ // Make sure we call release callback, even if the imported schema
+ // is invalid.
+ CArrowSchema* cSchema = CArrowSchema.Create();
+
+ bool wasCalled = false;
+ var releaseCallback = (CArrowSchema* cSchema) =>
+ {
+ wasCalled = true;
+ cSchema->release = null;
+ };
+ cSchema->release = (delegate* unmanaged[Stdcall])Marshal.GetFunctionPointerForDelegate(
+ releaseCallback);
+
+ Assert.Throws(() =>
+ {
+ CArrowSchemaImporter.ImportType(cSchema);
+ });
+ Assert.True(wasCalled);
+ CArrowSchema.Free(cSchema);
+ }
+ }
+}