Skip to content
This repository has been archived by the owner on Jan 27, 2022. It is now read-only.

Commit

Permalink
feat: Converting Pandas DataFrames to Datatables in Python JSON output
Browse files Browse the repository at this point in the history
  • Loading branch information
beneboy committed Sep 2, 2019
1 parent a9ac2a2 commit 39406e5
Showing 1 changed file with 45 additions and 2 deletions.
47 changes: 45 additions & 2 deletions py/stencila/schema/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,54 @@
import typing

from . import types
from .types import Node, Entity
from .types import Node, Entity, Datatable, DatatableColumn, BooleanSchema, IntegerSchema, NumberSchema, StringSchema, \
ArraySchema

try:
from pandas import DataFrame
import numpy

def to_dict(node: Entity) -> dict:
pandas_available = True
except ImportError:
class DataFrame:
pass

pandas_available = False


def data_frame_to_data_table(df: DataFrame) -> Datatable:
columns = []

for column_name in df.columns:
column = df[column_name]
values = column.tolist()
if column.dtype in (numpy.bool_, numpy.bool8):
schema = BooleanSchema()
values = [bool(row) for row in values]
elif column.dtype in (numpy.int8, numpy.int16, numpy.int32, numpy.int64):
schema = IntegerSchema()
values = [int(row) for row in values]
elif column.dtype in (numpy.float16, numpy.float32, numpy.float64):
schema = NumberSchema()
values = [float(row) for row in values]
elif column.dtype in (numpy.str_, numpy.unicode_,):
schema = StringSchema()
else:
schema = None

columns.append(
DatatableColumn(column_name, values, schema=ArraySchema(items=schema))
)

return Datatable(columns)


def to_dict(node: typing.Any) -> dict:
"""Convert an Entity node to a dictionary"""
if pandas_available:
if isinstance(node, DataFrame):
node = data_frame_to_data_table(node)

node_dict = {
"type": node.__class__.__name__
}
Expand Down

0 comments on commit 39406e5

Please sign in to comment.