Skip to content

Commit

Permalink
apacheGH-34737: [C#] C Data interface for schemas and types (apache#3…
Browse files Browse the repository at this point in the history
…4133)

### Rationale for this change

This starts the C Data Interface implementation for C# with integration for `ArrowSchema`. `ArrowArray` will come in a follow-up PR.

### What changes are included in this PR?

* Adds classes `CArrowSchema` and `ImportedArrowSchema` which allow interacting with the `CArrowSchema`.
* Adds integration tests with PyArrow, inspired by the similar integration tests in [arrow-rs](https://github.com/apache/arrow-rs/blob/master/arrow/src/pyarrow.rs)

### Are these changes tested?

Yes, the PyArrow integration tests validate the functionality.

### Are there any user-facing changes?

This only adds new APIs, and doesn't change any existing ones.

* Closes: apache#33856
* Closes: apache#34737

Lead-authored-by: Will Jones <willjones127@gmail.com>
Co-authored-by: Weston Pace <weston.pace@gmail.com>
Signed-off-by: Eric Erhardt <eric.erhardt@microsoft.com>
  • Loading branch information
2 people authored and ArgusLi committed May 15, 2023
1 parent d93c357 commit 5dbeb4f
Show file tree
Hide file tree
Showing 8 changed files with 1,227 additions and 0 deletions.
11 changes: 11 additions & 0 deletions ci/scripts/csharp_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@ set -ex

source_dir=${1}/csharp

# Python and PyArrow are required for C Data Interface tests.
if [ -z "${PYTHON}" ]; then
if type python3 > /dev/null 2>&1; then
export PYTHON=python3
else
export PYTHON=python
fi
fi
${PYTHON} -m pip install pyarrow find-libpython
export PYTHONNET_PYDLL=$(${PYTHON} -m find_libpython)

pushd ${source_dir}
dotnet test
for pdb in artifacts/Apache.Arrow/*/*/Apache.Arrow.pdb; do
Expand Down
124 changes: 124 additions & 0 deletions csharp/src/Apache.Arrow/C/CArrowSchema.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

using System;
using System.Runtime.InteropServices;
using Apache.Arrow.Types;

namespace Apache.Arrow.C
{
/// <summary>
/// An Arrow C Data Interface Schema, which represents a type, field, or schema.
/// </summary>
/// <remarks>
/// This is used to export <see cref="ArrowType"/>, <see cref="Field"/>, or
/// <see cref="Schema"/> to other languages. It matches the layout of the
/// ArrowSchema struct described in https://github.com/apache/arrow/blob/main/cpp/src/arrow/c/abi.h.
/// </remarks>
[StructLayout(LayoutKind.Sequential)]
public unsafe struct CArrowSchema
{
public byte* format;
public byte* name;
public byte* metadata;
public long flags;
public long n_children;
public CArrowSchema** children;
public CArrowSchema* dictionary;
public delegate* unmanaged[Stdcall]<CArrowSchema*, void> release;
public void* private_data;

/// <summary>
/// Allocate and zero-initialize an unmanaged pointer of this type.
/// </summary>
/// <remarks>
/// This pointer must later be freed by <see cref="Free"/>.
/// </remarks>
public static CArrowSchema* Create()
{
var ptr = (CArrowSchema*)Marshal.AllocHGlobal(sizeof(CArrowSchema));

ptr->format = null;
ptr->name = null;
ptr->metadata = null;
ptr->flags = 0;
ptr->n_children = 0;
ptr->children = null;
ptr->dictionary = null;
ptr->release = null;
ptr->private_data = null;

return ptr;
}

/// <summary>
/// Free a pointer that was allocated in <see cref="Create"/>.
/// </summary>
/// <remarks>
/// Do not call this on a pointer that was allocated elsewhere.
/// </remarks>
public static void Free(CArrowSchema* schema)
{
if (schema->release != null)
{
// Call release if not already called.
schema->release(schema);
}
Marshal.FreeHGlobal((IntPtr)schema);
}


/// <summary>
/// For dictionary-encoded types, whether the ordering of dictionary indices is semantically meaningful.
/// </summary>
public const long ArrowFlagDictionaryOrdered = 1;
/// <summary>
/// Whether this field is semantically nullable (regardless of whether it actually has null values)
/// </summary>
public const long ArrowFlagNullable = 2;
/// <summary>
/// For map types, whether the keys within each map value are sorted.
/// </summary>
public const long ArrowFlagMapKeysSorted = 4;

/// <summary>
/// Get the value of a particular flag.
/// </summary>
/// <remarks>
/// Known valid flags are <see cref="ArrowFlagDictionaryOrdered" />,
/// <see cref="ArrowFlagNullable" />, and <see cref="ArrowFlagMapKeysSorted" />.
/// </remarks>
public readonly bool GetFlag(long flag)
{
return (flags & flag) == flag;
}

internal readonly CArrowSchema* GetChild(long i)
{
if ((ulong)i >= (ulong)n_children)
{
throw new ArgumentOutOfRangeException("Child index out of bounds.");
}
if (children == null)
{
throw new ArgumentOutOfRangeException($"Child index '{i}' out of bounds.");
}

return children[i];
}
}
}
Loading

0 comments on commit 5dbeb4f

Please sign in to comment.