Skip to content

Commit

Permalink
docs: document C data interface
Browse files Browse the repository at this point in the history
  • Loading branch information
wjones127 committed Feb 24, 2023
1 parent 64f56d5 commit e4f2528
Show file tree
Hide file tree
Showing 2 changed files with 102 additions and 49 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,21 @@
using Apache.Arrow.Types;

[UnmanagedFunctionPointer(CallingConvention.StdCall)]
public delegate void ReleaseFFIArrowSchema(IntPtr schema);
public delegate void ReleaseCArrowSchema(IntPtr schema);

namespace Apache.Arrow.C
{


/// <summary>
/// An Arrow C Data Interface Schema, which represents a type, field, or schema.
/// </summary>
///
/// <remarks>
/// This is used to export <see cref="ArrowType"/>, <see cref="Field"/>, or
/// <see cref="Schema"/> to other languages. It matches the layout of the
/// ArrowSchema struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h.
/// </remarks
[StructLayout(LayoutKind.Sequential)]
unsafe public struct FFIArrowSchema
unsafe public struct CArrowSchema
{
[MarshalAs(UnmanagedType.LPStr)]
public string format;
Expand All @@ -44,7 +51,7 @@ unsafe public struct FFIArrowSchema
public IntPtr dictionary;
[MarshalAs(UnmanagedType.FunctionPtr)]

public ReleaseFFIArrowSchema release;
public ReleaseCArrowSchema release;
// Check this out: https://github.com/G-Research/ParquetSharp/blob/386d91bd5e6fe6cb81583803447023c1359957c8/csharp/ParquetHandle.cs#L8
public IntPtr private_data;

Expand Down Expand Up @@ -87,9 +94,9 @@ private static long GetFlags(IArrowType datatype, bool nullable = true)

for (var i = 0; i < n_fields; i++)
{
var ffi_schema = new FFIArrowSchema();
FFIArrowSchema.ExportField(fields[i], out ffi_schema);
IntPtr exported_schema = ffi_schema.AllocateAsPtr();
var c_schema = new CArrowSchema();
CArrowSchema.ExportField(fields[i], out c_schema);
IntPtr exported_schema = c_schema.AllocateAsPtr();
pointer_list[i] = exported_schema;
}

Expand All @@ -105,18 +112,23 @@ private static IntPtr ConstructDictionary(IArrowType datatype)
{
if (datatype is DictionaryType)
{
var ffi_schema = new FFIArrowSchema();
var c_schema = new CArrowSchema();
var value_type = ((DictionaryType)datatype).ValueType;
FFIArrowSchema.ExportDataType(value_type, out ffi_schema);
return ffi_schema.AllocateAsPtr();
CArrowSchema.ExportDataType(value_type, out c_schema);
return c_schema.AllocateAsPtr();
}
else
{
return IntPtr.Zero;
}
}

public static void ExportDataType(IArrowType datatype, out FFIArrowSchema schema)
/// <summary>
/// Initialize the exported C schema as an Arrow type.
/// </summary>
/// <param name="datatype">The Arrow type to export.</param>
/// <param name="schema">An uninitialized CArrowSchema.</param>
public static void ExportDataType(IArrowType datatype, out CArrowSchema schema)
{
schema.format = GetFormat(datatype);
schema.name = null;
Expand All @@ -130,26 +142,32 @@ public static void ExportDataType(IArrowType datatype, out FFIArrowSchema schema

schema.release = (IntPtr self) =>
{
var schema = Marshal.PtrToStructure<FFIArrowSchema>(self);
var schema = Marshal.PtrToStructure<CArrowSchema>(self);
if (schema.n_children > 0)
{
for (int i = 0; i < schema.n_children; i++)
{
FreePtr(schema.children[i]);
}
Marshal.FreeHGlobal((IntPtr)schema.children);
}

if (schema.dictionary != IntPtr.Zero)
{
FreePtr(schema.dictionary);
}
Marshal.DestroyStructure<FFIArrowSchema>(self);
Marshal.DestroyStructure<CArrowSchema>(self);
};

schema.private_data = IntPtr.Zero;
}

public static void ExportField(Field field, out FFIArrowSchema schema)
/// <summary>
/// Initialize the exported C schema as a field.
/// </summary>
/// <param name="field">Field to export.</param>
/// <param name="schema">An uninitialized CArrowSchema.</param>
public static void ExportField(Field field, out CArrowSchema schema)
{
ExportDataType(field.DataType, out schema);
schema.name = field.Name;
Expand All @@ -158,23 +176,41 @@ public static void ExportField(Field field, out FFIArrowSchema schema)
schema.flags = GetFlags(field.DataType, field.IsNullable);
}

public static void ExportSchema(Schema schema, out FFIArrowSchema out_schema)
/// <summary>
/// Initialize the exported C schema as a schema.
/// </summary>
/// <param name="schema">Schema to export.</param>
/// <param name="out_schema">An uninitialized CArrowSchema</param>
public static void ExportSchema(Schema schema, out CArrowSchema out_schema)
{
// TODO: top-level metadata
var struct_type = new StructType(schema.Fields.Values.ToList());
ExportDataType(struct_type, out out_schema);
}

/// <summary>
/// Allocate an unmanaged pointer and copy this instances data to it.
/// </summary>
/// <remarks>
/// To avoid a memory leak, you must call <see cref="FreePtr"/> on this
/// pointer when done using it.
/// </remarks>
public IntPtr AllocateAsPtr()
{
IntPtr ptr = Marshal.AllocHGlobal(Marshal.SizeOf(this));
Marshal.StructureToPtr<FFIArrowSchema>(this, ptr, false);
Marshal.StructureToPtr<CArrowSchema>(this, ptr, false);
return ptr;
}

/// <summary>
/// Free a pointer that was allocated in <see cref="AllocateAsPtr"/>.
/// </summary>
/// <remarks>
/// Do not call this on a pointer that was allocated elsewhere.
/// </remarks>
public static void FreePtr(IntPtr ptr)
{
var schema = Marshal.PtrToStructure<FFIArrowSchema>(ptr);
var schema = Marshal.PtrToStructure<CArrowSchema>(ptr);
if (schema.release != null)
{
// Call release if not already called.
Expand All @@ -190,7 +226,7 @@ public static void FreePtr(IntPtr ptr)
/// <returns></returns>
public IntPtr Export(IntPtr ptr)
{
Marshal.StructureToPtr<FFIArrowSchema>(this, ptr, false);
Marshal.StructureToPtr<CArrowSchema>(this, ptr, false);
return ptr;
}

Expand Down Expand Up @@ -302,15 +338,32 @@ public void Visit(IArrowType type)
}
}

public class ImportedArrowSchema : IDisposable
/// <summary>
/// A <see cref="CArrowSchema"/> imported from somewhere else.
/// </summary>
///
/// <example>
/// Typically, when importing a schema we will allocate an uninitialized
/// <see cref="CArrowSchema"/>, pass the pointer to the foreign function,
/// then construct this class with the initialized pointer.
///
/// <code>
/// var c_schema = new CArrowSchema();
/// IntPtr imported_ptr = c_schema.AllocateAsPtr();
/// foreign_export_function(imported_ptr);
/// var imported_type = new ImportedArrowSchema(imported_ptr);
/// ArrowType arrow_type = imported_type.GetAsType();
/// <code>
/// </example>
public sealed class ImportedArrowSchema : IDisposable
{
private FFIArrowSchema _data;
private CArrowSchema _data;
private IntPtr _handle;
private bool _is_root;

public ImportedArrowSchema(IntPtr handle)
{
_data = Marshal.PtrToStructure<FFIArrowSchema>(handle);
_data = Marshal.PtrToStructure<CArrowSchema>(handle);
if (_data.release == null)
{
throw new Exception("Tried to import a schema that has already been released.");
Expand Down Expand Up @@ -353,7 +406,7 @@ public ArrowType GetAsType()
var dictionary_schema = new ImportedArrowSchema(_data.dictionary, /*is_root*/ false);
var dictionary_type = dictionary_schema.GetAsType();

bool ordered = (_data.flags & FFIArrowSchema.ARROW_FLAG_NULLABLE) == FFIArrowSchema.ARROW_FLAG_NULLABLE;
bool ordered = (_data.flags & CArrowSchema.ARROW_FLAG_NULLABLE) == CArrowSchema.ARROW_FLAG_NULLABLE;

return new DictionaryType(indices_type, dictionary_type, ordered);
}
Expand Down Expand Up @@ -444,7 +497,7 @@ public Field GetAsField()
{
string field_name = string.IsNullOrEmpty(_data.name) ? "" : _data.name;

bool nullable = (_data.flags & FFIArrowSchema.ARROW_FLAG_NULLABLE) == FFIArrowSchema.ARROW_FLAG_NULLABLE;
bool nullable = (_data.flags & CArrowSchema.ARROW_FLAG_NULLABLE) == CArrowSchema.ARROW_FLAG_NULLABLE;

return new Field(field_name, GetAsType(), nullable);
}
Expand Down
50 changes: 25 additions & 25 deletions csharp/test/Apache.Arrow.Tests/CDataInterfaceSchemaTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ public void ImportType()
foreach ((Field field, dynamic py_field) in schema.Fields.Values.AsEnumerable()
.Zip(py_fields))
{
var ffi_schema = new FFIArrowSchema();
IntPtr imported_ptr = ffi_schema.AllocateAsPtr();
var c_schema = new CArrowSchema();
IntPtr imported_ptr = c_schema.AllocateAsPtr();

using (Py.GIL())
{
Expand All @@ -148,7 +148,7 @@ public void ImportType()
dataTypeComparer.Visit(imported_type.GetAsType());

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(imported_ptr);
CArrowSchema.FreePtr(imported_ptr);
}
}

Expand All @@ -162,8 +162,8 @@ public void ImportField()
foreach ((Field field, dynamic py_field) in schema.Fields.Values.AsEnumerable()
.Zip(py_fields))
{
var ffi_schema = new FFIArrowSchema();
IntPtr imported_ptr = ffi_schema.AllocateAsPtr();
var c_schema = new CArrowSchema();
IntPtr imported_ptr = c_schema.AllocateAsPtr();

using (Py.GIL())
{
Expand All @@ -174,7 +174,7 @@ public void ImportField()
FieldComparer.Compare(field, imported_field.GetAsField());

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(imported_ptr);
CArrowSchema.FreePtr(imported_ptr);
}
}

Expand All @@ -185,8 +185,8 @@ public void ImportSchema()
Schema schema = GetTestSchema();
dynamic py_schema = GetPythonSchema();

var ffi_schema = new FFIArrowSchema();
IntPtr imported_ptr = ffi_schema.AllocateAsPtr();
var c_schema = new CArrowSchema();
IntPtr imported_ptr = c_schema.AllocateAsPtr();

using (Py.GIL())
{
Expand All @@ -197,7 +197,7 @@ public void ImportSchema()
SchemaComparer.Compare(schema, imported_field.GetAsSchema());

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(imported_ptr);
CArrowSchema.FreePtr(imported_ptr);
}


Expand All @@ -213,8 +213,8 @@ public void ExportType()
.Zip(py_fields))
{
IArrowType datatype = field.DataType;
var exported_type = new FFIArrowSchema();
FFIArrowSchema.ExportDataType(datatype, out exported_type);
var exported_type = new CArrowSchema();
CArrowSchema.ExportDataType(datatype, out exported_type);

// For Python, we need to provide the pointer
IntPtr exported_ptr = exported_type.AllocateAsPtr();
Expand All @@ -228,15 +228,15 @@ public void ExportType()
}

// Python should have called release once `exported_py_type` went out-of-scope.
var ffi_schema = Marshal.PtrToStructure<FFIArrowSchema>(exported_ptr);
Assert.Null(ffi_schema.release);
Assert.Null(ffi_schema.format);
Assert.Equal(0, ffi_schema.flags);
Assert.Equal(0, ffi_schema.n_children);
Assert.Equal(IntPtr.Zero, ffi_schema.dictionary);
var c_schema = Marshal.PtrToStructure<CArrowSchema>(exported_ptr);
Assert.Null(c_schema.release);
Assert.Null(c_schema.format);
Assert.Equal(0, c_schema.flags);
Assert.Equal(0, c_schema.n_children);
Assert.Equal(IntPtr.Zero, c_schema.dictionary);

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(exported_ptr);
CArrowSchema.FreePtr(exported_ptr);
}
}

Expand All @@ -250,8 +250,8 @@ public void ExportField()
foreach ((Field field, dynamic py_field) in schema.Fields.Values.AsEnumerable()
.Zip(py_fields))
{
var exported_field = new FFIArrowSchema();
FFIArrowSchema.ExportField(field, out exported_field);
var exported_field = new CArrowSchema();
CArrowSchema.ExportField(field, out exported_field);

// For Python, we need to provide the pointer
var exported_ptr = exported_field.AllocateAsPtr();
Expand All @@ -264,13 +264,13 @@ public void ExportField()
}

// Python should have called release once `exported_py_type` went out-of-scope.
var ffi_schema = Marshal.PtrToStructure<FFIArrowSchema>(exported_ptr);
var ffi_schema = Marshal.PtrToStructure<CArrowSchema>(exported_ptr);
Assert.Null(ffi_schema.name);
Assert.Null(ffi_schema.release);
Assert.Null(ffi_schema.format);

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(exported_ptr);
CArrowSchema.FreePtr(exported_ptr);
}
}

Expand All @@ -281,8 +281,8 @@ public void ExportSchema()
Schema schema = GetTestSchema();
dynamic py_schema = GetPythonSchema();

var exported_schema = new FFIArrowSchema();
FFIArrowSchema.ExportSchema(schema, out exported_schema);
var exported_schema = new CArrowSchema();
CArrowSchema.ExportSchema(schema, out exported_schema);

// For Python, we need to provide the pointer
var exported_ptr = exported_schema.AllocateAsPtr();
Expand All @@ -295,7 +295,7 @@ public void ExportSchema()
}

// Since we allocated, we are responsible for freeing the pointer.
FFIArrowSchema.FreePtr(exported_ptr);
CArrowSchema.FreePtr(exported_ptr);
}

}
Expand Down

0 comments on commit e4f2528

Please sign in to comment.