Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: Add TableDefinition wrapper for python #5892

Merged
merged 22 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 92 additions & 34 deletions py/server/deephaven/column.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@

""" This module implements the Column class and functions that work with Columns. """

from dataclasses import dataclass, field
from enum import Enum
from functools import cached_property
from typing import Sequence, Any

import jpy

import deephaven.dtypes as dtypes
from deephaven import DHError
from deephaven.dtypes import DType
from deephaven.dtypes import _instant_array
from deephaven.dtypes import DType, _instant_array, from_jtype
from deephaven._wrapper import JObjectWrapper

_JColumnHeader = jpy.get_type("io.deephaven.qst.column.header.ColumnHeader")
_JColumn = jpy.get_type("io.deephaven.qst.column.Column")
Expand All @@ -32,47 +32,105 @@ def __repr__(self):
return self.name


@dataclass
class Column:
""" A Column object represents a column definition in a Deephaven Table. """
name: str
data_type: DType
component_type: DType = None
column_type: ColumnType = ColumnType.NORMAL
class Column(JObjectWrapper):
"""A Column object represents a column definition in a Deephaven Table."""

j_object_type = _JColumnDefinition

def __init__(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need a constructor that directly wraps an existing _JColumnDefinition.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This includes that case:

Column(j_column_definition=...)

And we use this construction as the basis for TableDefinition as a Mapping[str, Column].

It looks funky because we need to maintain compatibility with the existing constructor.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We just agreed on a call to introduce a superclass called ColumnDefinition, maybe, in order to preserve the existing behavior and also provide the kind of wrapper we want users to use.

self,
name: str = None,
data_type: DType = None,
component_type: DType = None,
column_type: ColumnType = ColumnType.NORMAL,
j_column_definition: jpy.JType = None,
):
if j_column_definition:
if name or data_type or component_type or column_type != ColumnType.NORMAL:
raise DHError(
"Must specify the data fields or j_column_definition for Column, but not both."
)
self.name = j_column_definition.getName()
self.j_column_definition = j_column_definition
else:
if not name or not data_type:
raise DHError("Must specify both name and data_type")
if hasattr(data_type.j_type, "jclass"):
j_data_type = data_type.j_type.jclass
else:
j_data_type = data_type.qst_type.clazz()
j_component_type = (
component_type.qst_type.clazz() if component_type else None
)
j_column_type = column_type.value
self.name = name
self.j_column_definition = _JColumnDefinition.fromGenericType(
name, j_data_type, j_component_type, j_column_type
)

@property
def j_column_header(self):
return _JColumnHeader.of(self.name, self.data_type.qst_type)
def j_object(self) -> jpy.JType:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pydoc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we pydoc any of our j_object property methods?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, but do a quick check.

return self.j_column_definition

@property
def j_column_definition(self):
if hasattr(self.data_type.j_type, 'jclass'):
j_data_type = self.data_type.j_type.jclass
else:
j_data_type = self.data_type.qst_type.clazz()
j_component_type = self.component_type.qst_type.clazz() if self.component_type else None
j_column_type = self.column_type.value
return _JColumnDefinition.fromGenericType(self.name, j_data_type, j_component_type, j_column_type)
@cached_property
def data_type(self) -> DType:
chipkent marked this conversation as resolved.
Show resolved Hide resolved
return from_jtype(self.j_column_definition.getDataType())

@cached_property
def component_type(self) -> DType:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pydoc

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll note, this was also missing before from Column.

return from_jtype(self.j_column_definition.getComponentType())

@dataclass
class InputColumn(Column):
""" An InputColumn represents a user defined column with some input data. """
input_data: Any = field(default=None)
@cached_property
def column_type(self) -> ColumnType:
chipkent marked this conversation as resolved.
Show resolved Hide resolved
return ColumnType(self.j_column_definition.getColumnType())

def __post_init__(self):
@property
def j_column_header(self):
return _JColumnHeader.of(self.name, self.data_type.qst_type)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we don't need to worry about the mutability of @cached_property, should we use that here too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like more of an internal property and only used when creating an InputColumn; I don't think we expect users to call j_column_header? I would be okay removing it or forcing it to start w/ _, and I think tracing through the callstack, it doesn't make sense to cache it (I don't think we'll ever re-use it).

Where we might conceivably re-use it in the future is if we had a helper from Column -> InputColumn. IE,

my_col : InputColumn = Column("Foo", dtypes.int32).with_data([1, 2, 3])

But at least with the current interfaces, users are always going through InputColumn constructor or helper methods (bool_col, byte_col, etc).


def _to_j_column(self, input_data: Any = None):
if input_data is None:
return _JColumn.empty(self.j_column_header)
if self.data_type.is_primitive:
return _JColumn.ofUnsafe(
self.name,
dtypes.array(
self.data_type,
input_data,
remap=dtypes.null_remap(self.data_type),
),
)
return _JColumn.of(
self.j_column_header,
dtypes.array(self.data_type, input_data),
)


class InputColumn(JObjectWrapper):
""" An InputColumn represents a user defined column with some input data."""
devinrsmith marked this conversation as resolved.
Show resolved Hide resolved

j_object_type = _JColumn

def __init__(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ditto, this is supported via j_column_definition

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We said this may not actually be a wrapper. I'm not clear that it's a wrapper around a JColumnDefinition, regardless.

self,
name: str = None,
data_type: DType = None,
component_type: DType = None,
column_type: ColumnType = ColumnType.NORMAL,
j_column_definition: jpy.JType = None,
devinrsmith marked this conversation as resolved.
Show resolved Hide resolved
input_data: Any = None,
):
try:
if self.input_data is None:
self.j_column = _JColumn.empty(self.j_column_header)
else:
if self.data_type.is_primitive:
self.j_column = _JColumn.ofUnsafe(self.name, dtypes.array(self.data_type, self.input_data,
remap=dtypes.null_remap(self.data_type)))
else:
self.j_column = _JColumn.of(self.j_column_header, dtypes.array(self.data_type, self.input_data))
column_definition = Column(name, data_type, component_type, column_type, j_column_definition)
self.name = column_definition.name
self.j_column = column_definition._to_j_column(input_data)
except Exception as e:
raise DHError(e, f"failed to create an InputColumn ({self.name}).") from e

@property
def j_object(self) -> jpy.JType:
return self.j_column


def bool_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing Boolean data.
Expand Down
Loading
Loading