From 523f73f063431dd309b43a3d6ca789e498137d6e Mon Sep 17 00:00:00 2001 From: Michael Ringgaard Date: Tue, 4 Dec 2018 13:18:06 +0100 Subject: [PATCH] Python API for Myelin (#302) --- doc/guide/flowasm.txt | 166 ++++++ doc/guide/flowin.svg | 249 +++++++++ doc/guide/flowout.svg | 181 +++++++ doc/guide/myelin.md | 269 +++++++-- python/myelin/__init__.py | 5 + python/myelin/builder.py | 55 +- python/myelin/flow.py | 136 ++++- python/myelin/lexical_encoder.py | 2 + python/task/wiki.py | 1 + sling/myelin/kernel/gradients.cc | 4 +- sling/pyapi/BUILD | 5 + sling/pyapi/pyapi.cc | 19 +- sling/pyapi/pymyelin.cc | 903 +++++++++++++++++++++++++++++++ sling/pyapi/pymyelin.h | 278 ++++++++++ 14 files changed, 2194 insertions(+), 79 deletions(-) create mode 100644 doc/guide/flowasm.txt create mode 100644 doc/guide/flowin.svg create mode 100644 doc/guide/flowout.svg create mode 100644 sling/pyapi/pymyelin.cc create mode 100644 sling/pyapi/pymyelin.h diff --git a/doc/guide/flowasm.txt b/doc/guide/flowasm.txt new file mode 100644 index 00000000..236b26a9 --- /dev/null +++ b/doc/guide/flowasm.txt @@ -0,0 +1,166 @@ +0000000000000000 : + 0: push rbp + 1: mov rbp,rdi + +0000000000000004 : + 4: mov rdi,rbp + 7: movabs rsi,0x0 9: R_X86_64_64 f/W + 11: movabs r9,0x0 13: R_X86_64_64 f/b + 1b: lea r8,[rbp+0x100] + 22: vxorps ymm13,ymm13,ymm13 + 27: xor rcx,rcx + 2a: vmovaps ymm0,YMMWORD PTR [r9+rcx*1] + 30: vmovaps ymm1,YMMWORD PTR [r9+rcx*1+0x20] + 37: vmovaps ymm2,YMMWORD PTR [r9+rcx*1+0x40] + 3e: vmovaps ymm3,YMMWORD PTR [r9+rcx*1+0x60] + 45: vmovaps ymm4,YMMWORD PTR [r9+rcx*1+0x80] + 4f: vmovaps ymm5,YMMWORD PTR [r9+rcx*1+0xa0] + 59: vmovaps ymm6,YMMWORD PTR [r9+rcx*1+0xc0] + 63: vmovaps ymm7,YMMWORD PTR [r9+rcx*1+0xe0] + 6d: mov rdx,rsi + 70: xor rax,rax + 73: vbroadcastss ymm12,DWORD PTR [rdi+rax*1] + 79: vfmadd231ps ymm0,ymm12,YMMWORD PTR [rdx] + 7e: vfmadd231ps ymm1,ymm12,YMMWORD PTR [rdx+0x20] + 84: vfmadd231ps ymm2,ymm12,YMMWORD PTR [rdx+0x40] + 8a: vfmadd231ps ymm3,ymm12,YMMWORD PTR [rdx+0x60] + 90: vfmadd231ps ymm4,ymm12,YMMWORD PTR [rdx+0x80] + 99: vfmadd231ps ymm5,ymm12,YMMWORD PTR [rdx+0xa0] + a2: vfmadd231ps ymm6,ymm12,YMMWORD PTR [rdx+0xc0] + ab: vfmadd231ps ymm7,ymm12,YMMWORD PTR [rdx+0xe0] + b4: add rdx,0x400 + bb: add rax,0x4 + bf: cmp rax,0x100 + c5: jl 73 + c7: vmaxps ymm0,ymm0,ymm13 + cc: vmovaps YMMWORD PTR [r8+rcx*1],ymm0 + d2: vmaxps ymm1,ymm1,ymm13 + d7: vmovaps YMMWORD PTR [r8+rcx*1+0x20],ymm1 + de: vmaxps ymm2,ymm2,ymm13 + e3: vmovaps YMMWORD PTR [r8+rcx*1+0x40],ymm2 + ea: vmaxps ymm3,ymm3,ymm13 + ef: vmovaps YMMWORD PTR [r8+rcx*1+0x60],ymm3 + f6: vmaxps ymm4,ymm4,ymm13 + fb: vmovaps YMMWORD PTR [r8+rcx*1+0x80],ymm4 + 105: vmaxps ymm5,ymm5,ymm13 + 10a: vmovaps YMMWORD PTR [r8+rcx*1+0xa0],ymm5 + 114: vmaxps ymm6,ymm6,ymm13 + 119: vmovaps YMMWORD PTR [r8+rcx*1+0xc0],ymm6 + 123: vmaxps ymm7,ymm7,ymm13 + 128: vmovaps YMMWORD PTR [r8+rcx*1+0xe0],ymm7 + 132: add rsi,0x100 + 139: add rcx,0x100 + 140: cmp rcx,0x400 + 147: jl 2a + +000000000000014d : + 14d: lea rcx,[rbp+0x100] + 154: vmovaps ymm0,YMMWORD PTR [rip+0x1a4] # 300 + 15c: xor rax,rax + 15f: vmaxps ymm0,ymm0,YMMWORD PTR [rcx+rax*1] + 164: add rax,0x20 + 168: cmp rax,0x400 + 16e: jl 15f + 170: vperm2f128 ymm1,ymm0,ymm0,0x1 + 176: vmaxps ymm0,ymm0,ymm1 + 17a: vpermilps ymm1,ymm0,0xe + 180: vmaxps ymm0,ymm0,ymm1 + 184: vpermilps ymm1,ymm0,0x1 + 18a: vmaxps ymm0,ymm0,ymm1 + 18e: vmovss DWORD PTR [rbp+0x500],xmm0 + +0000000000000196 : + 196: lea rcx,[rbp+0x100] + 19d: lea rdx,[rbp+0x520] + 1a4: vxorps ymm14,ymm14,ymm14 + 1a9: vmovaps ymm0,YMMWORD PTR [rip+0x16f] # 320 + 1b1: vmovaps ymm1,YMMWORD PTR [rip+0x187] # 340 + 1b9: vmovaps ymm2,YMMWORD PTR [rip+0x19f] # 360 + 1c1: vmovaps ymm3,YMMWORD PTR [rip+0x1b7] # 380 + 1c9: vmovaps ymm4,YMMWORD PTR [rip+0x1cf] # 3a0 + 1d1: vmovaps ymm5,YMMWORD PTR [rip+0x1e7] # 3c0 + 1d9: vmovaps ymm6,YMMWORD PTR [rip+0x1ff] # 3e0 + 1e1: vmovaps ymm7,YMMWORD PTR [rip+0x217] # 400 + 1e9: vbroadcastss ymm8,DWORD PTR [rbp+0x500] + 1f2: xor rax,rax + 1f5: vmovaps ymm9,YMMWORD PTR [rcx+rax*1] + 1fa: vsubps ymm9,ymm9,ymm8 + 1ff: vminps ymm10,ymm9,ymm1 + 203: vmaxps ymm10,ymm10,ymm0 + 207: vmovaps ymm11,ymm10 + 20c: vfmadd213ps ymm11,ymm3,ymm2 + 211: vroundps ymm11,ymm11,0x1 + 217: vmovaps ymm12,ymm11 + 21c: vfmadd213ps ymm12,ymm4,ymm10 + 221: vmulps ymm10,ymm12,ymm12 + 226: vmovaps ymm13,ymm5 + 22a: vfmadd213ps ymm13,ymm12,ymm6 + 22f: vfmadd213ps ymm13,ymm12,ymm7 + 234: vfmadd213ps ymm13,ymm12,YMMWORD PTR [rip+0x1e3] # 420 + 23d: vfmadd213ps ymm13,ymm12,YMMWORD PTR [rip+0x1fa] # 440 + 246: vfmadd213ps ymm13,ymm12,YMMWORD PTR [rip+0x111] # 360 + 24f: vfmadd213ps ymm13,ymm10,ymm12 + 254: vaddps ymm13,ymm13,YMMWORD PTR [rip+0x204] # 460 + 25c: vaddps ymm11,ymm11,YMMWORD PTR [rip+0x21c] # 480 + 264: vcvttps2dq ymm11,ymm11 + 269: vpslld ymm11,ymm11,0x17 + 26f: vmulps ymm13,ymm13,ymm11 + 274: vmaxps ymm13,ymm13,ymm9 + 279: vmovaps YMMWORD PTR [rdx+rax*1],ymm13 + 27e: vaddps ymm14,ymm14,ymm13 + 283: add rax,0x20 + 287: cmp rax,0x400 + 28d: jl 1f5 + 293: vperm2f128 ymm15,ymm14,ymm14,0x1 + 299: vhaddps ymm14,ymm14,ymm15 + 29e: vhaddps ymm14,ymm14,ymm14 + 2a3: vhaddps ymm14,ymm14,ymm14 + 2a8: vmovss DWORD PTR [rbp+0x500],xmm14 + +00000000000002b0 : + 2b0: vmovss xmm0,DWORD PTR [rip+0x1e8] # 4a0 + 2b8: vdivss xmm1,xmm0,DWORD PTR [rbp+0x500] + 2c0: vmovss DWORD PTR [rbp+0x500],xmm1 + +00000000000002c8 : + 2c8: lea rcx,[rbp+0x520] + 2cf: vbroadcastss ymm0,DWORD PTR [rbp+0x500] + 2d8: xor rax,rax + 2db: vmulps ymm1,ymm0,YMMWORD PTR [rcx+rax*1] + 2e0: vmovaps YMMWORD PTR [rcx+rax*1],ymm1 + 2e5: add rax,0x20 + 2e9: cmp rax,0x400 + 2ef: jl 2db + 2f1: pop rbp + 2f2: ret + +00000000000002f3 : + ... + 2ff: ................ + 30f: ................ + 31f: ................ + 32f: ................ + 33f: ....B...B...B... + 34f: B...B...B...B... + 35f: B...?...?...?... + 36f: ?...?...?...?... + 37f: ?;..?;..?;..?;.. + 38f: ?;..?;..?;..?;.. + 39f: ?.r1..r1..r1..r1 + 3af: ..r1..r1..r1..r1 + 3bf: .giP9giP9giP9giP + 3cf: 9giP9giP9giP9giP + 3df: 9.C.:.C.:.C.:.C. + 3ef: :.C.:.C.:.C.:.C. + 3ff: :...<...<...<... + 40f: <...<...<...<... + 41f: <..*=..*=..*=..* + 42f: =..*=..*=..*=..* + 43f: =..*>..*>..*>..* + 44f: >..*>..*>..*>..* + 45f: >...?...?...?... + 46f: ?...?...?...?... + 47f: ?...B...B...B... + 48f: B...B...B...B... + 49f: B...? + diff --git a/doc/guide/flowin.svg b/doc/guide/flowin.svg new file mode 100644 index 00000000..e77230b0 --- /dev/null +++ b/doc/guide/flowin.svg @@ -0,0 +1,249 @@ + + + + + + +flow + +cluster_0 + + +f + + + + +f/Sum + + +Sum +float32 + + + + +f/Reciprocal + + +Reciprocal +float32 + + + + +f/Sum->f/Reciprocal + + + + + + + +f/Relu + + +Relu +float32[1x64] + + + + +f/Sub + + +Sub +float32[1x64] + + + + +f/Relu->f/Sub + + + + + + + +f/Max + + +Max +float32 + + + + +f/Relu->f/Max + + + + + + + +f/Exp + + +Exp +float32[1x64] + + + + +f/Sub->f/Exp + + + + + + + +f/y + + +Mul +float32[1x64] + + + + +v:f/y:0 + + +y:0 +float32[1x64] + + + + +f/y->v:f/y:0 + + + + + + + +f/Max->f/Sub + + + + + + + +f/MatMul + + +MatMul +float32[1x64] + + + + +f/Add + + +Add +float32[1x64] + + + + +f/MatMul->f/Add + + + + + + + +f/Add->f/Relu + + + + + + + +f/Reciprocal->f/y + + + + + + + +f/Exp->f/Sum + + + + + + + +f/Exp->f/y + + + + + + + +v:f/W + + +W +float32[256x64] + + + + +v:f/W->f/MatMul + + + + + + + +v:f/x + + +x +float32[1x256] + + + + +v:f/x->f/MatMul + + + + + + + +v:f/b + + +b +float32[64] + + + + +v:f/b->f/Add + + + + + + + + diff --git a/doc/guide/flowout.svg b/doc/guide/flowout.svg new file mode 100644 index 00000000..4c18d47c --- /dev/null +++ b/doc/guide/flowout.svg @@ -0,0 +1,181 @@ + + + + + + +flow + +cluster_0 + + +f + + + + +f/MatMul + + +MatMulAddRelu +float32[1x256] + + + + +f/Max + + +Max +float32 + + + + +f/MatMul->f/Max + + + + + + + +f/Sub + + +@0=Exp(Sub(%0,%1));@1=Sum(@0) +float32[1x256] + + + + +f/MatMul->f/Sub + + + + + + + +f/Max->f/Sub + + + + + + + +f/Reciprocal + + +Reciprocal +float32 + + + + +f/Sub->f/Reciprocal + + + + + + + +f/y + + +Mul +float32[1x256] + + + + +f/Sub->f/y + + + + + + + +f/Reciprocal->f/y + + + + + + + +v:f/y:0 + + +y:0 +float32[1x256] + + + + +f/y->v:f/y:0 + + + + + + + +v:f/W + + +W +float32[64x256] + + + + +v:f/W->f/MatMul + + + + + + + +v:f/x + + +x +float32[1x64] + + + + +v:f/x->f/MatMul + + + + + + + +v:f/b + + +b +float32[256] + + + + +v:f/b->f/MatMul + + + + + + + + diff --git a/doc/guide/myelin.md b/doc/guide/myelin.md index 667ade0c..b0546581 100644 --- a/doc/guide/myelin.md +++ b/doc/guide/myelin.md @@ -8,8 +8,9 @@ when generating the code so it can take advantage of specialized features like SSE, AVX, and FMA3. Myelin can be used at inference time (as opposed to training time) to speed up -neural network computations. The neural network is stored in a _.flow_ file -which is loaded and compiled into a _network_ at runtime by Myelin. +neural network computations. The neural network can be stored in a _.flow_ file +which can then later be loaded and compiled into a _network_ at runtime by +Myelin. ## Platform @@ -18,7 +19,216 @@ Languages: C++, assembler, Python
CPU: Intel x64 or compatible
Build system: Bazel
-## Creating flow files +## Using Myelin in Python + +Myelin represents a computation graph using a _flow_. The graph is divivded into +_functions_ which can be computed independently. A function is a set of +_operations_ with tensor inputs and outputs. The tensor inputs and outputs are +_variables_ in the flow. Variables can either be global constant tensor, e.g. +learned weights in a neural network, or parameter tensors, which are local to +the function. + +### Building a flow + +Let's consider a simple neural network with a single linear layer with a +softmax on top: +``` +y = softmax(relu(x * W + b)) +``` +This can be computed with the following flow graph: + +![input flow](flowin.svg) + +The graph only shows the input and output variables (green and blue), and the +global variables (rectangles), but does not show the intermediate variables +between the tensor operations. The softmax is also expanded into more basic +operations, i.e.: +``` +softmax(x) = normalize(exp(x - max(x))) +normalize(x) = x * (1 / sum(x)) +``` +You can use a `myelin.Builder` for constructing a flow function for this +computation: + +```python +import sling +import sling.myelin as myelin +import numpy as np + +# Build flow. +flow = myelin.Flow() + +# Create builder for function. +f = myelin.Builder(flow, "f") +``` + +The weights in W and b can be initialized from NumPy arrays or any other +objects that support the +[Python buffer protocol](https://docs.python.org/2/c-api/buffer.html): + +```python +# Initialize weights. +W = f.array("W", np.random.rand(64, 256).astype(np.float32)) +b = f.array("b", np.random.rand(256).astype(np.float32)) +``` + +Next, we create an input variable `x` and build up the computation using the +builder: + +```python +# Create input variable x as a float[1,64] tensor. +x = f.var("x", myelin.DT_FLOAT, [1, 64]) + +# Compute y=softmax(relu(x * W + b)) +y = f.softmax(f.relu(f.add(f.matmul(x, W), b)), name="y") +``` + +### Compiling a flow into a network + +The flow is just a specification of the computation. It needs to be compiled +into a _network_. The Myelin JIT compiler converts the flow into assembly +code for executing the computation. Each function is compiled into a _cell_ +which contains the data layout for the cell as well as the code for the +computation: + +```python +# Compile flow to network. +compiler = myelin.Compiler() +net = compiler.compile(flow) +cell = net.cell("f") +``` + +The flow is first analyzed by the Myelin JIT compiler which transforms the +flow graph into an optimized form using more specialized operations: + +![output flow](flowout.svg) + +In this example, the `MatMul`, `Add`, and `Relu` operations are converted into +a combined kernel doing all three in one operation. The `Exp`, `Sub`, and +`Sum` operations are also turned into a `Calculate` operation computing +`@0=Exp(Sub(%0,%1));@1=Sum(@0)` as one element-wise operation. + +For each function, the compiler determines the optimal layout of the cell +instance data and selects kernels for implementing the operations and the +order of computation: +``` +cell f { // size 2336 + input var f/x: float32[1x64] // offset 0 size 256 alignment 32 row-major + var f/Relu:0: float32[1x256] // offset 256 size 1024 alignment 32 row-major + var f/Max:0: float32 // offset 1280 size 4 alignment 4 row-major linked to f/Sum:0 + union f/Sum:0: float32 // offset 1280 size 4 alignment 4 row-major linked to f/Reciprocal:0 + union f/Reciprocal:0: float32 // offset 1280 size 4 alignment 4 row-major linked to f/Max:0 + var f/Exp:0: float32[1x256] // offset 1312 size 1024 alignment 32 row-major linked to f/y:0 + union f/y:0: float32[1x256] // offset 1312 size 1024 alignment 32 row-major linked to f/Exp:0 + + const f/W: float32[64x256] // size 65536 alignment 32 row-major + const f/b: float32[256] // size 1024 alignment 32 row-major + + f/Relu:0 = AVXFltVecMatMulAddRelu[U8V](f/x, f/W, f/b) + f/Max:0 = MaxExpr[VFltAVX256](f/Relu:0) + f/Exp:0, f/Sum:0 = Calculate[VFltAVX256](f/Relu:0, f/Max:0) + f/Reciprocal:0 = ReciprocalExpr[FltAVX](f/Sum:0) + f/y:0 = MulExpr[VFltAVX256](f/Exp:0, f/Reciprocal:0) +} +``` + +Finally, the Myelin JIT compiler converts the optimized operations into +[assembler code](flowasm.txt) using the selected kernel generators. The code +generated for each function depends the negotiated layout and alignment of the +input and output tensors as well as the features support by the CPU (SSE, AVX, +AVX2, FMA3, AVX512, etc.). + +### Computing using network cell instances + +In order to do any computation with the compiled network, you need to create +a cell _instance_. If a cell is like a class, then an instance is like an object +of that class. A cell instance has memory for storing all the local variables +of a cell. You can create multiple instances of a cell, each with their own +set of local variables. + +```python +# Create new data instance. +data = cell.instance() + +# Set input. +xdata = data[x] +for i in xrange(64): xdata[0, i] = 5 + +# Run computation for data instance. +data.compute() + +# Print result. +ydata = data[y] +print "y", ydata +print "argmax", np.asarray(ydata).argmax() +``` + +The index operator on the cell object (e.g. `data[x]`) returns a _tensor_ object +for the variable in the cell instance with that name. +Alternatively, a numeric tensor parameter id can be used as as the index key. +The `cell.index(name)` method can be used for looking up tensor parameter ids in +advance, and looking up tensors by parameter ids is faster than looking up +tensors by name. +If the index key is neither a string not an integer, the repr() function of the +index key is used for determining the tensor name. + +The tensor is a view into the data in the instance for the variable. The tensor +elements can be read or modified using the index operator, e.g. +`xdata[0, i] = 5`. The tensor object also supports the Python buffer interface, +so you can create a NumPy array sharing the underlying data, e.g. +`np.asarray(ydata)`. You can use the `name()`, `rank()`, `shape()`, and +`type()` methods for inspecting the tensor format. + +The `compute()` method is used for running the cell instance computation, i.e. +compute the output tensor variables from the inputs tensor variables. +A cell instances can be reused for multiple computations. The `clear()` method +can be used for clearing all the tensors in the instance. + +### Putting it all together + +```python +import sling +import sling.myelin as myelin +import numpy as np + +# Build flow. +flow = myelin.Flow() + +# Create builder for function. +f = myelin.Builder(flow, "f") + +# Initialize weights. +W = f.array("W", np.random.rand(64, 256).astype(np.float32)) +b = f.array("b", np.random.rand(256).astype(np.float32)) + +# Create input variable x as a float[1,64] tensor. +x = f.var("x", myelin.DT_FLOAT, [1, 64]) + +# Compute y=softmax(relu(x * W + b)) +y = f.softmax(f.relu(f.add(f.matmul(x, W), b)), name="y") + +# Compile flow to network. +compiler = myelin.Compiler() +net = compiler.compile(flow) +cell = net.cell("f") + +# Create new data instance. +data = cell.instance() + +# Set input. +xdata = data[x] +for i in xrange(64): xdata[0, i] = 5 + +# Run computation for data instance. +data.compute() + +# Print result. +ydata = data[y] +print "y", ydata +print "argmax", np.asarray(ydata).argmax() +``` + +## Creating a flow file from a Tensorflow graph Myelin uses [flow files](#flow-file-format) to store neural networks. A Tensorflow graph can be stored as a flow file using the myelin Python module. @@ -68,34 +278,9 @@ and `Add` _operations_ to this function. It will also add `W` and `b` as constant _variables_ to the flow with the trained weights. The resulting flow is then saved to the file _/tmp/model.flow_. -If the Tensorflow graph has been saved to a checkpoint using a TF Saver object, -you can load the checkpoint and only store the parts needed for inference as -a flow file: - -```python -import tensorflow as tf -from sling.myelin import Flow -from sling.myelin.tf import Extractor - -# Load Tensorflow checkpoint. -sess = tf.Session() -saver = tf.train.import_meta_graph('/tmp/mnist.ckpt.meta') -saver.restore(sess, '/tmp/mnist.ckpt') +## Using Myelin in C++ -# Create Myelin flow. -flow = Flow() -extractor = Extractor(sess, flow) - -# Extract flow from graph. -inputs = [sess.graph.get_tensor_by_name("x:0")] -outputs = [sess.graph.get_tensor_by_name("y:0")] -extractor.add(flow.func("classifier"), inputs, outputs) - -# Save flow. -flow.save("/tmp/mnist.flow") -``` - -## Setting up a kernel library +### Setting up a kernel library ```c++ #include "sling/myelin/compute.h" @@ -115,7 +300,7 @@ used on any x64 processor as well as specialized kernels for CPUs with add your own kernel generators and graph transformations for custom ops or for generating optimized code for special cases of standard ops. -## Compiling a network +### Compiling a network ```c++ // Load and compile neural network. @@ -142,7 +327,7 @@ After the network has been compiled, the parameters can be looked up in the cell or network. The `Tensor` object then knows the location of the parameter in the compiled flow. -## Computing cell functions +### Computing cell functions ```c++ // Create instance of neural network cell for classifying input. @@ -190,32 +375,32 @@ flow = "flow" <#cnxs> cnx* <#blobs> blob* (from version 4) -var = - <#flags> (IN=1, OUT=2, REF=4, LEARNABLE=8 UNIQUE=16, from version 5) +var = <#flags> (IN=1, OUT=2, REF=4, LEARNABLE=8 UNIQUE=16, from version 5) + <#aliases> <#bytes> value -op = - <#flags> (unused, from version 5) +op = <#flags> (unused, from version 5) + <#inputs> * <#outputs> * <#attrs> attr* -blob = - <#flags> (unused, from version 5) +blob = <#flags> (unused, from version 5) + <#attrs> attr* <#bytes> data -func = - <#flags> (TRAINING=1, from version 5) +func = <#flags> (TRAINING=1, from version 5) + <#ops> -cnx = - <#flags> (unused, from version 5) +cnx = <#flags> (unused, from version 5) + <#vars> shape = <#dims> * diff --git a/python/myelin/__init__.py b/python/myelin/__init__.py index c940658e..41d5f80b 100644 --- a/python/myelin/__init__.py +++ b/python/myelin/__init__.py @@ -1,2 +1,7 @@ +from .. import pysling as api + from builder import * from flow import * + +Compiler=api.Compiler + diff --git a/python/myelin/builder.py b/python/myelin/builder.py index 5034e0e1..3de8cdf7 100644 --- a/python/myelin/builder.py +++ b/python/myelin/builder.py @@ -22,6 +22,18 @@ DT_INT = "int32" DT_FLOAT = "float32" +typemap = { + "f": "float32", + "d": "float64", + "i": "int32", + "l": "int32", + "B": "uint8", + "h": "int16", + "b": "int8", + "q": "int64", + "?": "bool", +} + class Builder: def __init__(self, flow, func): self.flow = flow @@ -90,6 +102,15 @@ def const(self, value, dtype=None, shape=None): var.data = value return var + def array(self, name, value): + # Make constant from object with buffer support. + view = memoryview(value) + dtype = typemap[view.format] + shape = list(view.shape) + var = self.flow.var(self.func.name + "/" + name, dtype, shape) + var.data = value + return var + def opname(self, optype): name = self.func.name + '/' + optype if name not in self.flow.ops: return name @@ -145,7 +166,7 @@ def gather(self, embedding, indices, oov=None, name=None): inputs = [embedding, indices] if oov is not None: inputs.append(oov) - result = self.op('Gather', inputs, name) + result = self.op("Gather", inputs, name) result.type = embedding.type if len(embedding.shape) == 2 and len(indices.shape) == 2: result.shape = [indices.shape[1], embedding.shape[1]] @@ -153,8 +174,8 @@ def gather(self, embedding, indices, oov=None, name=None): result.shape = [0] return result - def gather_sum(self, embedding, indices, name=None): - result = self.op('GatherSum', [embedding, indices], name) + def pooling_gather(self, optype, embedding, indices, name=None): + result = self.op(optype, [embedding, indices], name) result.type = embedding.type if len(embedding.shape) == 2: result.shape = [1, embedding.shape[1]] @@ -162,6 +183,15 @@ def gather_sum(self, embedding, indices, name=None): result.shape = [0] return result + def gather_sum(self, embedding, indices, name=None): + return self.pooling_gather("GatherSum", embedding, indices, name) + + def gather_max(self, embedding, indices, name=None): + return self.pooling_gather("GatherMax", embedding, indices, name) + + def gather_avg(self, embedding, indices, name=None): + return self.pooling_gather("GatherAvg", embedding, indices, name) + def matmul(self, x, y, name=None): result = self.op("MatMul", [x, y], name) result.type = x.type @@ -254,17 +284,28 @@ def select(self, c, x, name=None): def identity(self, x, name=None): return self.op("Identity", [x], name) + def reduce(self, optype, x, name=None): + v = self.op(optype, [x], name) + v.shape = [] + return v + def sum(self, x, name=None): - return self.op("Sum", [x], name) + return self.reduce("Sum", x, name) def product(self, x, name=None): - return self.op("Product", [x], name) + return self.reduce("Product", x, name) def min(self, x, name=None): - return self.op("Min", [x], name) + return self.reduce("Min", x, name) def max(self, x, name=None): - return self.op("Max", [x], name) + return self.reduce("Max", x, name) + + def normalize(self, x, name=None): + return self.mul(x, self.rcp(self.sum(x)), name) + + def softmax(self, x, name=None): + return self.normalize(self.exp(self.sub(x, self.max(x))), name) def ref(self, instance, var, name=None): r = self.op("Reference", [instance], name) diff --git a/python/myelin/flow.py b/python/myelin/flow.py index c00b8862..55cae993 100644 --- a/python/myelin/flow.py +++ b/python/myelin/flow.py @@ -21,7 +21,7 @@ from struct import unpack from struct import unpack_from -class File: +class FileWriter: """Flow file writer.""" def __init__(self, file): @@ -123,19 +123,53 @@ def read_string(self): return '' -class Variable: +class Variable(object): """Flow variable.""" def __init__(self, name): """Initialize new variable.""" self.name = name + self.flags = 0 + self.aliases = [] self.type = None self.shape = [] - self.ref = False self.data = None self.producer = None self.consumers = [] + @property + def input(self): + return (self.flags & 1) != 0 + + @input.setter + def input(self, value): + if value: + self.flags |= 1 + else: + self.flags &= ~1 + + @property + def output(self): + return (self.flags & 2) != 0 + + @output.setter + def output(self, value): + if value: + self.flags |= 2 + else: + self.flags &= ~2 + + @property + def ref(self): + return (self.flags & 4) != 0 + + @ref.setter + def ref(self, value): + if value: + self.flags |= 4 + else: + self.flags &= ~4 + def shape_defined(self): for d in self.shape: if d == -1: return False @@ -147,7 +181,6 @@ def __repr__(self): def __str__(self): s = "var " + self.name + " : " + self.typestr() if self.data is not None: - #s += " " + str(self.data.nbytes) + "bytes" s += " = " + str(self.data) s += " {\n" if self.producer != None: @@ -165,12 +198,13 @@ def typestr(self): return t -class Operation: +class Operation(object): """Flow operation with inputs and outputs.""" def __init__(self, name): """Initialize new operation.""" self.name = name + self.flags = 0 self.type = None self.inputs = [] self.outputs = [] @@ -213,12 +247,13 @@ def __str__(self): return s -class Function: +class Function(object): """Flow function with operations.""" def __init__(self, name): """Initialize new function.""" self.name = name + self.flags = 0 self.ops = [] def add(self, op): @@ -234,25 +269,34 @@ def __str__(self): return s -class Connector: +class Connector(object): """Flow connector with linked variables.""" def __init__(self, name): """Initialize new connector.""" self.name = name + self.flags = 0 self.links = [] def add(self, var): """Add linked variable to connector.""" self.links.append(var) + def __str__(self): + s = "connector " + self.name + " {\n" + for l in self.links: + s += " " + l.name + "\n" + s += "}\n" + return s + -class Blob: +class Blob(object): """Blob for storing extra data like lexicons and feature maps.""" def __init__(self, name): """Initialize new blob.""" self.name = name + self.flags = 0 self.type = "" self.data = None self.attrs = {} @@ -268,11 +312,23 @@ def get_attr(self, name): """Get blob attribute as a string or None.""" return self.attrs.get(name, None) + def __str__(self): + s = "blob " + self.name + " : " + self.type + if self.data is not None: + s += " = " + str(self.data) + s += " {\n" + for a in self.attrs: + s += " " + a + " = " + self.attrs[a] + "\n" + s += "}\n" + return s + + class Flow: """Flow with variables, operations, and functions.""" def __init__(self): """Initialize empty flow.""" + self.flags = 0 self.vars = {} self.ops = {} self.funcs = {} @@ -435,17 +491,20 @@ def save(self, filename): """Write flow to file.""" # Write flow file header - f = File(filename) + f = FileWriter(filename) f.write('flow') - f.write_int(4) + f.write_int(5) + f.write_int(self.flags) # Write variables. f.write_int(len(self.vars)) for name in self.vars: var = self.vars[name] + f.write_int(var.flags) f.write_string(var.name) - f.write_int(0) # no aliases - f.write_string("&" + var.type if var.ref else var.type) + f.write_int(len(var.aliases)) + for alias in var.aliases: f.write_string(alias) + f.write_string(var.type) f.write_int(len(var.shape)) for d in var.shape: f.write_int(d) f.write_object(var.data) @@ -454,6 +513,7 @@ def save(self, filename): f.write_int(len(self.ops)) for name in self.ops: op = self.ops[name] + f.write_int(op.flags) f.write_string(op.name) f.write_string(op.type) f.write_int(len(op.inputs)) @@ -471,6 +531,7 @@ def save(self, filename): f.write_int(len(self.funcs)) for name in self.funcs: func = self.funcs[name] + f.write_int(func.flags) f.write_string(func.name) f.write_int(len(func.ops)) for op in func.ops: @@ -480,6 +541,7 @@ def save(self, filename): f.write_int(len(self.cnxs)) for name in self.cnxs: cnx = self.cnxs[name] + f.write_int(cnx.flags) f.write_string(cnx.name) f.write_int(len(cnx.links)) for link in cnx.links: @@ -489,6 +551,7 @@ def save(self, filename): f.write_int(len(self.blobs)) for name in self.blobs: blob = self.blobs[name] + f.write_int(blob.flags) f.write_string(blob.name) f.write_string(blob.type) f.write_int(len(blob.attrs)) @@ -506,33 +569,41 @@ def load(self, filename): assert magic == 'flow', magic version = f.read_int() - assert version == 4, version + assert version == 4 or version == 5, version + if version >= 5: self.flags = f.read_int() num_vars = f.read_int() for _ in xrange(num_vars): + flags = 0 + if version >= 5: flags = f.read_int() name = f.read_string() - assert f.read_int() == 0 + num_aliases = f.read_int() + aliases = [] + for i in xrange(num_aliases): + aliases.append(f.read_string()) t = f.read_string() - ref = False if t[0] == '&': - ref = True + flags |= 4 t = t[1:] shape_size = f.read_int() shape = [] for _ in xrange(shape_size): shape.append(f.read_int()) - var = self.var(name, type=t, shape=shape) - if ref: var.ref = True - data_size = f.read_long() - var.data = f.slice(data_size) # avoid creating a copy + var.flags = flags + size = f.read_long() + if size > 0: + var.data = f.slice(size) # avoid creating a copy num_ops = f.read_int() for _ in xrange(num_ops): + flags = 0 + if version >= 5: flags = f.read_int() name = f.read_string() op = self.op(name) - + op.flags = flags op.type = f.read_string() + num_in = f.read_int() for _ in xrange(num_in): op.add_input(self.var(name=f.read_string())) @@ -549,29 +620,42 @@ def load(self, filename): num_funcs = f.read_int() for _ in xrange(num_funcs): - func = self.func(name=f.read_string()) + flags = 0 + if version >= 5: flags = f.read_int() + name = f.read_string() + func = self.func(name) + func.flags = flags n = f.read_int() for _ in xrange(n): func.add(self.op(f.read_string())) num_cnxs = f.read_int() for _ in xrange(num_cnxs): - cnx = self.cnx(f.read_string()) + flags = 0 + if version >= 5: flags = f.read_int() + name = f.read_string() + cnx = self.cnx(name) + cnx.flags = flags n = f.read_int() for _ in xrange(n): cnx.add(self.var(f.read_string())) num_blobs = f.read_int() for _ in xrange(num_blobs): - blob = self.blob(f.read_string()) + flags = 0 + if version >= 5: flags = f.read_int() + name = f.read_string() + blob = self.blob(name) + blob.flags = flags blob.type = f.read_string() n = f.read_int() for _ in xrange(n): name = f.read_string() val = f.read_string() blob.add_attr(name, val) - data_size = f.read_long() - blob.data = f.slice(data_size) # avoid creating a copy + size = f.read_long() + if size > 0: + blob.data = f.slice(size) # avoid creating a copy def __str__(self): s = "" diff --git a/python/myelin/lexical_encoder.py b/python/myelin/lexical_encoder.py index cf560f9d..ef5dc4fa 100644 --- a/python/myelin/lexical_encoder.py +++ b/python/myelin/lexical_encoder.py @@ -80,6 +80,8 @@ def read_file(filename): self.feature_vector = bldr.concat(concat_args) bldr.rename(self.feature_vector, "feature_vector") self.feature_vector.ref = True + self.feature_vector.input = True + self.feature_vector.output = True # Add BiLSTM. lr = builder.Builder(flow, "lstm/lr") diff --git a/python/task/wiki.py b/python/task/wiki.py index cc918cdc..da00aec1 100644 --- a/python/task/wiki.py +++ b/python/task/wiki.py @@ -531,6 +531,7 @@ def item_names(self, language=None): lang: /lang/ sources: ... count: ... + form: ... } ... } diff --git a/sling/myelin/kernel/gradients.cc b/sling/myelin/kernel/gradients.cc index 80d969c2..ccc10cc5 100644 --- a/sling/myelin/kernel/gradients.cc +++ b/sling/myelin/kernel/gradients.cc @@ -112,11 +112,11 @@ void sqrt_grad(Flow::Operation *op, Gradients *g) { } // y = 1 / x -// dx = -dy / x^2 +// dx = -dy / x^2 = -dy * y^2 void reciprocal_grad(Flow::Operation *op, Gradients *g) { auto x = op->inputs[0]; auto y = op->outputs[0]; - g->add(x, g->Neg(g->Div(g->d(y), g->Square(g->v(x))))); + g->add(x, g->Neg(g->Mul(g->d(y), g->Square(g->v(y))))); } // y = -x diff --git a/sling/pyapi/BUILD b/sling/pyapi/BUILD index 05808b26..4677936b 100644 --- a/sling/pyapi/BUILD +++ b/sling/pyapi/BUILD @@ -9,6 +9,7 @@ cc_library( "pydate.cc", "pyframe.cc", "pymisc.cc", + "pymyelin.cc", "pyparser.cc", "pyphrase.cc", "pyrecordio.cc", @@ -22,6 +23,7 @@ cc_library( "pydate.h", "pyframe.h", "pymisc.h", + "pymyelin.h", "pyparser.h", "pyphrase.h", "pyrecordio.h", @@ -36,6 +38,9 @@ cc_library( "//sling/file:recordio", "//sling/frame", "//sling/http:http-server", + "//sling/myelin:flow", + "//sling/myelin:compiler", + "//sling/myelin:compute", "//sling/nlp/document", "//sling/nlp/wiki:phrase-table", "//sling/nlp/document:document-tokenizer", diff --git a/sling/pyapi/pyapi.cc b/sling/pyapi/pyapi.cc index ecc97c05..8eab898b 100644 --- a/sling/pyapi/pyapi.cc +++ b/sling/pyapi/pyapi.cc @@ -25,6 +25,7 @@ #include "sling/pyapi/pyarray.h" #include "sling/pyapi/pydate.h" #include "sling/pyapi/pyframe.h" +#include "sling/pyapi/pymyelin.h" #include "sling/pyapi/pyparser.h" #include "sling/pyapi/pyphrase.h" #include "sling/pyapi/pyrecordio.h" @@ -54,24 +55,38 @@ static PyMethodDef py_funcs[] = { static void RegisterPythonModule() { PyObject *module = Py_InitModule3("pysling", py_funcs, "SLING"); + PyStore::Define(module); PySymbols::Define(module); PyFrame::Define(module); PySlots::Define(module); PyArray::Define(module); PyItems::Define(module); + PyTokenizer::Define(module); + PyParser::Define(module); + PyPhraseMatch::Define(module); PyPhraseTable::Define(module); - PyParser::Define(module); + PyRecordReader::Define(module); - PyRecordDatabase::Define(module); PyRecordWriter::Define(module); + PyRecordDatabase::Define(module); + PyCalendar::Define(module); PyDate::Define(module); + PyWikiConverter::Define(module); PyFactExtractor::Define(module); PyTaxonomy::Define(module); + + PyCompiler::Define(module); + PyNetwork::Define(module); + PyCell::Define(module); + PyInstance::Define(module); + PyChannel::Define(module); + PyTensor::Define(module); + #ifndef SLING_GOOGLE3 PyJob::Define(module); PyResource::Define(module); diff --git a/sling/pyapi/pymyelin.cc b/sling/pyapi/pymyelin.cc new file mode 100644 index 00000000..d01d5ab7 --- /dev/null +++ b/sling/pyapi/pymyelin.cc @@ -0,0 +1,903 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "sling/pyapi/pymyelin.h" + +#include "sling/myelin/flow.h" + +namespace sling { + +using namespace myelin; + +// Python type declarations. +PyTypeObject PyCompiler::type; +PyMethodTable PyCompiler::methods; + +PyTypeObject PyNetwork::type; +PyMappingMethods PyNetwork::mapping; +PyMethodTable PyNetwork::methods; + +PyTypeObject PyCell::type; +PyMethodTable PyCell::methods; + +PyTypeObject PyInstance::type; +PyMappingMethods PyInstance::mapping; +PyMethodTable PyInstance::methods; + +PyTypeObject PyChannel::type; +PyMappingMethods PyChannel::mapping; +PyMethodTable PyChannel::methods; + +PyTypeObject PyTensor::type; +PyMappingMethods PyTensor::mapping; +PyBufferProcs PyTensor::buffer; +PyMethodTable PyTensor::methods; + +void PyCompiler::Define(PyObject *module) { + InitType(&type, "sling.Compiler", sizeof(PyCompiler), true); + type.tp_init = method_cast(&PyCompiler::Init); + type.tp_dealloc = method_cast(&PyCompiler::Dealloc); + + methods.AddO("compile", &PyCompiler::Compile); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Compiler"); +} + +int PyCompiler::Init(PyObject *args, PyObject *kwds) { + // Initialize compiler. + compiler = new Compiler(); + compiler->set_perf_flopctr(false); + + return 0; +} + +void PyCompiler::Dealloc() { + delete compiler; + Free(); +} + +PyObject *PyCompiler::Compile(PyObject *arg) { + // Import Python-based flow into a Myelin flow. + Flow flow; + PyBuffers buffers(&flow); + if (!ImportFlow(arg, &flow, &buffers)) return nullptr; + + // Compile flow to network. + Network *net = new Network(); + compiler->Compile(&flow, net); + + // Return compiled network. + PyNetwork *pynet = PyObject_New(PyNetwork, &PyNetwork::type); + pynet->Init(net); + return pynet->AsObject(); +} + +bool PyCompiler::ImportFlow(PyObject *pyflow, Flow *flow, PyBuffers *buffers) { + // Get variables. + PyObject *pyvars = PyAttr(pyflow, "vars"); + std::unordered_map varmap; + Py_ssize_t pos = 0; + PyObject *pyvar; + while (PyDict_Next(pyvars, &pos, nullptr, &pyvar)) { + const char *name = PyStrAttr(pyvar, "name"); + string type = PyStrAttr(pyvar, "type"); + auto &t = TypeTraits::of(type); + + PyObject *pyshape = PyAttr(pyvar, "shape"); + Shape shape; + for (int i = 0; i < PyList_Size(pyshape); ++i) { + int dim = PyInt_AsLong(PyList_GetItem(pyshape, i)); + if (dim == -1) dim = 1; + shape.add(dim); + } + Py_DECREF(pyshape); + + auto *var = flow->AddVariable(name, t.type(), shape); + var->flags = PyIntAttr(pyvar, "flags"); + varmap[pyvar] = var; + + PyObject *pydata = PyAttr(pyvar, "data"); + if (pydata != Py_None) { + var->data = buffers->GetData(pydata, &var->size); + if (var->data == nullptr) return false; + } + Py_DECREF(pydata); + } + Py_DECREF(pyvars); + + // Get operations. + PyObject *pyops = PyAttr(pyflow, "ops"); + std::unordered_map opmap; + pos = 0; + PyObject *pyop; + while (PyDict_Next(pyops, &pos, nullptr, &pyop)) { + const char *name = PyStrAttr(pyop, "name"); + const char *type = PyStrAttr(pyop, "type"); + + auto *op = flow->AddOperation(name, type); + op->flags = PyIntAttr(pyop, "flags"); + opmap[pyop] = op; + + PyObject *pyinputs = PyAttr(pyop, "inputs"); + for (int i = 0; i < PyList_Size(pyinputs); ++i) { + Flow::Variable *input = varmap[PyList_GetItem(pyinputs, i)]; + CHECK(input != nullptr); + op->AddInput(input); + } + Py_DECREF(pyinputs); + + PyObject *pyoutputs = PyAttr(pyop, "outputs"); + for (int i = 0; i < PyList_Size(pyoutputs); ++i) { + Flow::Variable *output = varmap[PyList_GetItem(pyoutputs, i)]; + CHECK(output != nullptr); + op->AddOutput(output); + } + Py_DECREF(pyoutputs); + + if (!ImportAttributes(pyop, op)) return false; + } + Py_DECREF(pyops); + + // Get functions. + PyObject *pyfuncs = PyAttr(pyflow, "funcs"); + pos = 0; + PyObject *pyfunc; + while (PyDict_Next(pyfuncs, &pos, nullptr, &pyfunc)) { + const char *name = PyStrAttr(pyfunc, "name"); + + auto *func = flow->AddFunction(name); + func->flags = PyIntAttr(pyfunc, "flags"); + + PyObject *pyops = PyAttr(pyfunc, "ops"); + for (int i = 0; i < PyList_Size(pyops); ++i) { + Flow::Operation *op = opmap[PyList_GetItem(pyops, i)]; + CHECK(op != nullptr); + func->AddOperation(op); + } + Py_DECREF(pyops); + } + Py_DECREF(pyfuncs); + + // Get connectors. + PyObject *pycnxs = PyAttr(pyflow, "cnxs"); + pos = 0; + PyObject *pycnx; + while (PyDict_Next(pycnxs, &pos, nullptr, &pycnx)) { + const char *name = PyStrAttr(pycnx, "name"); + + auto *cnx = flow->AddConnector(name); + cnx->flags = PyIntAttr(pycnx, "flags"); + + PyObject *pylinks = PyAttr(pycnx, "links"); + for (int i = 0; i < PyList_Size(pylinks); ++i) { + Flow::Variable *var = varmap[PyList_GetItem(pylinks, i)]; + CHECK(var != nullptr); + cnx->AddLink(var); + } + Py_DECREF(pylinks); + } + Py_DECREF(pycnxs); + + // Get blobs. + PyObject *pyblobs = PyAttr(pyflow, "blobs"); + pos = 0; + PyObject *pyblob; + while (PyDict_Next(pyblobs, &pos, nullptr, &pyblob)) { + const char *name = PyStrAttr(pyblob, "name"); + const char *type = PyStrAttr(pyblob, "type"); + + auto *blob = flow->AddBlob(name, type); + blob->flags = PyIntAttr(pyblob, "flags"); + + PyObject *pydata = PyAttr(pyblob, "data"); + if (pydata != Py_None) { + blob->data = buffers->GetData(pydata, &blob->size); + if (blob->data == nullptr) return false; + } + Py_DECREF(pydata); + + if (!ImportAttributes(pyblob, blob)) return false; + } + Py_DECREF(pyblobs); + + return true; +} + +bool PyCompiler::ImportAttributes(PyObject *obj, Attributes *attrs) { + PyObject *pyattrs = PyAttr(obj, "attrs"); + Py_ssize_t pos = 0; + PyObject *pyname; + PyObject *pyvalue; + while (PyDict_Next(pyattrs, &pos, &pyname, &pyvalue)) { + const char *name = PyString_AsString(pyname); + if (name == nullptr) return false; + const char *value = PyString_AsString(pyvalue); + if (value == nullptr) return false; + attrs->SetAttr(name, value); + } + + return true; +} + +const char *PyCompiler::PyStrAttr(PyObject *obj, const char *name) { + PyObject *attr = PyAttr(obj, name); + const char *str = attr == Py_None ? "" : PyString_AsString(attr); + CHECK(str != nullptr) << name; + Py_DECREF(attr); + return str; +} + +int PyCompiler::PyIntAttr(PyObject *obj, const char *name) { + PyObject *attr = PyAttr(obj, name); + int value = PyNumber_AsSsize_t(attr, nullptr); + Py_DECREF(attr); + return value; +} + +PyObject *PyCompiler::PyAttr(PyObject *obj, const char *name) { + PyObject *attr = PyObject_GetAttrString(obj, name); + CHECK(attr != nullptr) << name; + return attr; +} + +void PyNetwork::Define(PyObject *module) { + InitType(&type, "sling.Network", sizeof(PyNetwork), false); + type.tp_init = method_cast(&PyNetwork::Init); + type.tp_dealloc = method_cast(&PyNetwork::Dealloc); + + type.tp_as_mapping = &mapping; + mapping.mp_subscript = method_cast(&PyNetwork::LookupTensor); + + methods.AddO("cell", &PyNetwork::LookupCell); + methods.Add("profile", &PyNetwork::Profile); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Network"); +} + +int PyNetwork::Init(Network *net) { + this->net = net; + return 0; +} + +void PyNetwork::Dealloc() { + delete net; + Free(); +} + +PyObject *PyNetwork::LookupTensor(PyObject *key) { + // Look up tensor in network. + Tensor *tensor = GetTensor(key, nullptr); + if (tensor == nullptr) return nullptr; + + // Get tensor data buffer. + if (tensor->placement() == DEVICE) Py_RETURN_NONE; + char *ptr = tensor->data(); + if (ptr == nullptr) Py_RETURN_NONE; + if (tensor->ref()) { + if (tensor->ref_placement() == DEVICE) Py_RETURN_NONE; + ptr = *reinterpret_cast(ptr); + } + if (ptr == nullptr) Py_RETURN_NONE; + + // Return tensor data. + PyTensor *pytensor = PyObject_New(PyTensor, &PyTensor::type); + pytensor->Init(this->AsObject(), ptr, tensor); + return pytensor->AsObject(); +} + +PyObject *PyNetwork::LookupCell(PyObject *key) { + // Get cell name. + const char *name = PyString_AsString(key); + if (name == nullptr) return nullptr; + + // Look up cell in network. + Cell *cell = net->LookupCell(name); + if (cell == nullptr) { + PyErr_SetString(PyExc_TypeError, "Unknown cell"); + return nullptr; + } + + // Return cell wrapper. + PyCell *pycell = PyObject_New(PyCell, &PyCell::type); + pycell->Init(this, cell); + return pycell->AsObject(); +} + +PyObject *PyNetwork::Profile() { + return AllocateString(ProfileReport(*net)); +} + +Tensor *PyNetwork::GetTensor(PyObject *key, const Cell *cell) { + // Get tensor name. If the key is a string, this used for looking up the + // tensor by name. If key is an integer, it is used as an index into the + // parameter array of the network. Otherwise, Otherwise, the repr() method + // is used for computing the name of the tensor. + Tensor *tensor; + if (PyInt_Check(key)) { + int index = PyInt_AsLong(key); + auto ¶ms = net->parameters(); + if (index < 0 || index >= params.size()) { + PyErr_SetString(PyExc_IndexError, "Invalid parameter tensor index"); + return nullptr; + } + tensor = params[index]; + } else if (PyString_Check(key)) { + const char *name = PyString_AsString(key); + if (name == nullptr) return nullptr; + tensor = net->LookupParameter(name); + } else { + PyObject *repr = PyObject_Repr(key); + if (repr == nullptr) return nullptr; + const char *name = PyString_AsString(repr); + if (name == nullptr) { + Py_DECREF(repr); + return nullptr; + } + tensor = net->LookupParameter(name); + Py_DECREF(repr); + } + + if (tensor == nullptr) { + PyErr_SetString(PyExc_ValueError, "Unknown tensor"); + return nullptr; + } + + if (tensor->cell() != cell) { + if (cell == nullptr) { + PyErr_SetString(PyExc_TypeError, "Tensor is not a global tensor"); + } else { + PyErr_SetString(PyExc_TypeError, "Tensor does not belong to cell"); + } + return nullptr; + } + + return tensor; +} + +void PyCell::Define(PyObject *module) { + InitType(&type, "sling.Cell", sizeof(PyCell), false); + type.tp_init = method_cast(&PyCell::Init); + type.tp_dealloc = method_cast(&PyCell::Dealloc); + + methods.Add("instance", &PyCell::NewInstance); + methods.Add("channel", &PyCell::NewChannel); + methods.AddO("index", &PyCell::Index); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Cell"); +} + +int PyCell::Init(PyNetwork *pynet, myelin::Cell *cell) { + this->cell = cell; + this->pynet = pynet; + Py_INCREF(pynet); + return 0; +} + +void PyCell::Dealloc() { + Py_DECREF(pynet); + Free(); +} + +PyObject *PyCell::NewInstance() { + PyInstance *pyinstance = PyObject_New(PyInstance, &PyInstance::type); + pyinstance->Init(this); + return pyinstance->AsObject(); +} + +PyObject *PyCell::NewChannel(PyObject *args) { + // Get tensor name for channel and optionally size. + PyObject *key = nullptr; + int size = 0; + if (!PyArg_ParseTuple(args, "O|i", &key, &size)) return nullptr; + + // Look up tensor in network. + Tensor *tensor = pynet->GetTensor(key, cell); + if (tensor == nullptr) return nullptr; + + // Create new channel. + PyChannel *pychannel = PyObject_New(PyChannel, &PyChannel::type); + pychannel->Init(pynet, tensor, size); + return pychannel->AsObject(); +} + +PyObject *PyCell::Index(PyObject *key) { + // Look up tensor in network. + Tensor *tensor = pynet->GetTensor(key, cell); + if (tensor == nullptr) return nullptr; + + // Find parameter tensor index. + int index = -1; + auto ¶ms = pynet->net->parameters(); + for (int i = 0; i < params.size(); ++i) { + if (params[i] == tensor) { + index = i; + break; + } + } + return PyInt_FromLong(index); +} + +void PyInstance::Define(PyObject *module) { + InitType(&type, "sling.Instance", sizeof(PyInstance), false); + type.tp_init = method_cast(&PyInstance::Init); + type.tp_dealloc = method_cast(&PyInstance::Dealloc); + type.tp_str = method_cast(&PyInstance::Str); + type.tp_repr = method_cast(&PyInstance::Str); + + type.tp_as_mapping = &mapping; + mapping.mp_subscript = method_cast(&PyInstance::LookupTensor); + + methods.Add("compute", &PyInstance::Compute); + methods.Add("clear", &PyInstance::Clear); + methods.Add("connect", &PyInstance::Connect); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Instance"); +} + +int PyInstance::Init(PyCell *pycell) { + this->pycell = pycell; + Py_INCREF(pycell); + data = new Instance(pycell->cell); + data->Clear(); + return 0; +} + +void PyInstance::Dealloc() { + delete data; + Py_DECREF(pycell); + Free(); +} + +PyObject *PyInstance::LookupTensor(PyObject *key) { + // Look up tensor in network. + Tensor *tensor = pycell->pynet->GetTensor(key, data->cell()); + if (tensor == nullptr) return nullptr; + + // Get tensor data buffer. + if (tensor->placement() == DEVICE) Py_RETURN_NONE; + char *ptr = data->GetAddress(tensor); + if (ptr == nullptr) Py_RETURN_NONE; + if (tensor->ref()) { + if (tensor->ref_placement() == DEVICE) Py_RETURN_NONE; + ptr = *reinterpret_cast(ptr); + } + if (ptr == nullptr) Py_RETURN_NONE; + + // Return tensor data. + PyTensor *pytensor = PyObject_New(PyTensor, &PyTensor::type); + pytensor->Init(this->AsObject(), ptr, tensor); + return pytensor->AsObject(); +} + +PyObject *PyInstance::Connect(PyObject *args) { + // Get arguments: tensor name, channel, index. + PyObject *key; + PyChannel *pychannel; + int index; + if (!PyArg_ParseTuple(args, "OOi", &key, &pychannel, &index)) return nullptr; + if (!PyChannel::TypeCheck(pychannel)) return nullptr; + + // Look up tensor in network. + Tensor *tensor = pycell->pynet->GetTensor(key, data->cell()); + if (tensor == nullptr) return nullptr; + + // Check index. + if (index < 0 || index >= pychannel->channel->size()) { + PyErr_SetString(PyExc_IndexError, "Invalid channel element index"); + return nullptr; + } + + // Set reference tensor to element in channel. + data->Set(tensor, pychannel->channel, index); + + Py_RETURN_NONE; +} + +PyObject *PyInstance::Compute() { + data->Compute(); + Py_RETURN_NONE; +} + +PyObject *PyInstance::Clear() { + data->Clear(); + Py_RETURN_NONE; +} + +PyObject *PyInstance::Str() { + return AllocateString(data->ToString()); +} + +void PyChannel::Define(PyObject *module) { + InitType(&type, "sling.Channel", sizeof(PyChannel), false); + type.tp_init = method_cast(&PyChannel::Init); + type.tp_dealloc = method_cast(&PyChannel::Dealloc); + + type.tp_as_mapping = &mapping; + mapping.mp_length = method_cast(&PyChannel::Size); + mapping.mp_subscript = method_cast(&PyChannel::Lookup); + + methods.Add("resize", &PyChannel::Resize); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Channel"); +} + +int PyChannel::Init(PyNetwork *pynet, Tensor *format, int size) { + this->pynet = pynet; + Py_INCREF(pynet); + channel = new Channel(format); + if (size > 0) channel->resize(size); + return 0; +} + +void PyChannel::Dealloc() { + delete channel; + Py_DECREF(pynet); + Free(); +} + +PyObject *PyChannel::Size() { + return PyInt_FromLong(channel->size()); +} + +PyObject *PyChannel::Lookup(PyObject *key) { + // Get index. + int index = PyInt_AsLong(key); + if (index == -1 && PyErr_Occurred()) return nullptr; + if (index < 0 || index >= channel->size()) { + PyErr_SetString(PyExc_IndexError, "Invalid channel element index"); + return nullptr; + } + + // Cannot access channel elements in device. + if (channel->placement() == DEVICE) Py_RETURN_NONE; + + // Return element as tensor. + char *ptr = channel->at(index); + PyTensor *pytensor = PyObject_New(PyTensor, &PyTensor::type); + pytensor->Init(this->AsObject(), ptr, channel->format()); + return pytensor->AsObject(); +} + +PyObject *PyChannel::Resize(PyObject *args) { + // Get new channel size. + int size = 0; + if (!PyArg_ParseTuple(args, "i", &size)) return nullptr; + if (size < 0) size = 0; + + // Resize channel. + channel->resize(size); + + Py_RETURN_NONE; +} + +void PyTensor::Define(PyObject *module) { + InitType(&type, "sling.Tensor", sizeof(PyTensor), false); + type.tp_init = method_cast(&PyTensor::Init); + type.tp_dealloc = method_cast(&PyTensor::Dealloc); + type.tp_str = method_cast(&PyTensor::Str); + type.tp_repr = method_cast(&PyTensor::Str); + + type.tp_as_mapping = &mapping; + mapping.mp_subscript = method_cast(&PyTensor::GetElement); + mapping.mp_ass_subscript = method_cast(&PyTensor::SetElement); + + type.tp_as_buffer = &buffer; + type.tp_flags |= Py_TPFLAGS_HAVE_NEWBUFFER; + buffer.bf_getbuffer = + method_cast(&PyTensor::GetBuffer); + buffer.bf_releasebuffer = + method_cast(&PyTensor::ReleaseBuffer); + + methods.Add("name", &PyTensor::Name); + methods.Add("rank", &PyTensor::Rank); + methods.Add("shape", &PyTensor::Shape); + methods.Add("type", &PyTensor::Type); + type.tp_methods = methods.table(); + + RegisterType(&type, module, "Tensor"); +} + +int PyTensor::Init(PyObject *owner, char *data, const Tensor *format) { + this->owner = owner; + this->data = data; + this->format = format; + if (owner) Py_INCREF(owner); + shape = nullptr; + strides = nullptr; + return 0; +} + +void PyTensor::Dealloc() { + if (shape) free(shape); + if (strides) free(strides); + if (owner) Py_DECREF(owner); + Free(); +} + +PyObject *PyTensor::Name() { + return AllocateString(format->name()); +} + +PyObject *PyTensor::Rank() { + return PyInt_FromLong(format->rank()); +} + +PyObject *PyTensor::Shape() { + PyObject *dims = PyList_New(format->rank()); + for (int d = 0; d < format->rank(); ++d) { + PyList_SetItem(dims, d, PyInt_FromLong(format->dim(d))); + } + return dims; +} + +PyObject *PyTensor::Type() { + return AllocateString(TypeTraits::of(format->type()).name()); +} + +PyObject *PyTensor::Str() { + return AllocateString(format->ToString(data, false)); +} + +PyObject *PyTensor::GetElement(PyObject *index) { + // Get reference to tensor element. + char *ptr = GetAddress(index); + if (ptr == nullptr) return nullptr; + + // Return element. + switch (format->type()) { + case DT_FLOAT: + return PyFloat_FromDouble(*reinterpret_cast(ptr)); + case DT_DOUBLE: + return PyFloat_FromDouble(*reinterpret_cast(ptr)); + case DT_INT32: + return PyInt_FromLong(*reinterpret_cast(ptr)); + case DT_UINT8: + return PyInt_FromLong(*reinterpret_cast(ptr)); + case DT_INT16: + return PyInt_FromLong(*reinterpret_cast(ptr)); + case DT_INT8: + return PyInt_FromLong(*reinterpret_cast(ptr)); + case DT_INT64: + return PyLong_FromLongLong(*reinterpret_cast(ptr)); + case DT_BOOL: + return PyBool_FromLong(*reinterpret_cast(ptr)); + default: + PyErr_SetString(PyExc_ValueError, "Unsupported element type"); + return nullptr; + } +} + +int PyTensor::SetElement(PyObject *index, PyObject *value) { + // Elements cannot be deleted. + if (value == nullptr) { + PyErr_SetString(PyExc_ValueError, "Cannot delete values from tensor"); + return -1; + } + + // Get reference to tensor element. + char *ptr = GetAddress(index); + if (ptr == nullptr) return -1; + + // Return element. + switch (format->type()) { + case DT_FLOAT: { + float v = PyFloat_AsDouble(value); + if (v == -1.0 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_DOUBLE: { + double v = PyFloat_AsDouble(value); + if (v == -1.0 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_INT32: { + int v = PyInt_AsLong(value); + if (v == -1 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_UINT8: { + int v = PyInt_AsLong(value); + if (v == -1 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_INT16: { + int v = PyInt_AsLong(value); + if (v == -1 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_INT8: { + int v = PyInt_AsLong(value); + if (v == -1 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_INT64: { + int64 v = PyLong_AsLongLong(value); + if (v == -1 && PyErr_Occurred()) return -1; + *reinterpret_cast(ptr) = v; + break; + } + case DT_BOOL: { + int v = PyObject_IsTrue(value); + if (v == -1) return -1; + *reinterpret_cast(ptr) = v; + break; + } + default: + PyErr_SetString(PyExc_ValueError, "Unsupported element type"); + return -1; + } + + return 0; +} + +char *PyTensor::GetAddress(PyObject *index) { + int rank = format->rank(); + if (rank == 0) { + // Ignore index for scalars. + return data; + } else if (rank == 1) { + // Get single-dimensional index. + int idx = PyInt_AsLong(index); + if (idx == -1 && PyErr_Occurred()) return nullptr; + if (idx < 0) idx += format->dim(0); + if (idx < 0 || idx >= format->dim(0)) { + PyErr_SetString(PyExc_IndexError, "Invalid tensor index"); + return nullptr; + } + return data + format->offset(idx); + } else if (PyTuple_Check(index)) { + // Get multi-dimensional index. + int size = PyTuple_Size(index); + if (size != rank) { + PyErr_SetString(PyExc_IndexError, "Wrong number of indices"); + return nullptr; + } + size_t ofs = 0; + for (int d = 0; d < rank; ++d) { + int idx = PyInt_AsLong(PyTuple_GetItem(index, d)); + if (idx == -1 && PyErr_Occurred()) return nullptr; + if (idx < 0) idx += format->dim(d); + if (idx < 0 || idx >= format->dim(d)) { + PyErr_SetString(PyExc_IndexError, "Invalid tensor index"); + return nullptr; + } + ofs += idx * format->stride(d); + } + return data + ofs; + } else { + PyErr_SetString(PyExc_IndexError, "Invalid tensor index"); + return nullptr; + } +} + +int PyTensor::GetBuffer(Py_buffer *view, int flags) { + memset(view, 0, sizeof(Py_buffer)); + view->buf = data; + view->obj = AsObject(); + view->len = format->size(); + view->readonly = 0; + + if (flags != PyBUF_SIMPLE) { + int dims = format->rank(); + view->itemsize = format->element_size(); + + if (flags & PyBUF_FORMAT) { + view->format = GetFormat(); + } + + if (flags & PyBUF_ND) { + view->ndim = dims; + if (dims > 0) view->shape = GetShape(); + } + + if (flags & PyBUF_STRIDES) { + if ((flags & PyBUF_C_CONTIGUOUS) == PyBUF_C_CONTIGUOUS) { + if (format->order() != ROW_MAJOR) { + PyErr_SetString(PyExc_TypeError, "Buffer is not row-major"); + return -1; + } + } + if ((flags & PyBUF_F_CONTIGUOUS) == PyBUF_F_CONTIGUOUS) { + if (format->order() != COLUMN_MAJOR) { + PyErr_SetString(PyExc_TypeError, "Buffer is not column-major"); + return -1; + } + } + + if (dims > 0) view->strides = GetStrides(); + } + } + + Py_INCREF(view->obj); + return 0; +} + +void PyTensor::ReleaseBuffer(Py_buffer *view) { +} + +Py_ssize_t *PyTensor::GetShape() { + if (shape == nullptr) { + int dims = format->rank(); + shape = static_cast(malloc(dims * sizeof(Py_ssize_t))); + for (int d = 0; d < dims; ++d) shape[d] = format->dim(d); + } + return shape; +} + +Py_ssize_t *PyTensor::GetStrides() { + if (strides == nullptr) { + int dims = format->rank(); + strides = static_cast(malloc(dims * sizeof(Py_ssize_t))); + for (int d = 0; d < dims; ++d) strides[d] = format->stride(d); + } + return strides; +} + +PyBuffers::~PyBuffers() { + for (auto *view : views_) { + PyBuffer_Release(view); + delete view; + } + for (auto *ref : refs_) { + Py_DECREF(ref); + } +} + +char *PyBuffers::GetData(PyObject *obj, size_t *size) { + if (PyObject_CheckBuffer(obj)) { + // Get data using Python buffer protocol. + Py_buffer *view = new Py_buffer; + if (PyObject_GetBuffer(obj, view, PyBUF_C_CONTIGUOUS) == -1) { + delete view; + return nullptr; + } + views_.push_back(view); + *size = view->len; + return static_cast(view->buf); + } else if (PyString_Check(obj)) { + // Get string buffer. + char *data; + Py_ssize_t length; + if (PyString_AsStringAndSize(obj, &data, &length) == -1) return nullptr; + Py_INCREF(obj); + refs_.push_back(obj); + *size = length; + return data; + } else if (PyFloat_Check(obj)) { + float v = PyFloat_AsDouble(obj); + *size = sizeof(float); + return flow_->AllocateMemory(&v, sizeof(float)); + } else if (PyInt_Check(obj)) { + int v = PyInt_AsLong(obj); + *size = sizeof(int); + return flow_->AllocateMemory(&v, sizeof(int)); + } else { + PyErr_SetString(PyExc_TypeError, "Cannot get data from object"); + return nullptr; + } +} + +} // namespace sling + diff --git a/sling/pyapi/pymyelin.h b/sling/pyapi/pymyelin.h new file mode 100644 index 00000000..45d23490 --- /dev/null +++ b/sling/pyapi/pymyelin.h @@ -0,0 +1,278 @@ +// Copyright 2018 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef SLING_PYAPI_PYMYELIN_H_ +#define SLING_PYAPI_PYMYELIN_H_ + +#include "sling/myelin/compiler.h" +#include "sling/pyapi/pybase.h" + +namespace sling { + +// Utility class for holding on to internal memory buffers defined in other +// Python objects. This uses the Python buffer interface to get direct access +// to the internal memory representation of other Python objects like +// memoryview and numpy arrays, so these do not need to be copied in the +// Myelin flows. +class PyBuffers { + public: + PyBuffers(myelin::Flow *flow) : flow_(flow) {} + ~PyBuffers(); + char *GetData(PyObject *obj, size_t *size); + private: + myelin::Flow *flow_; + std::vector views_; + std::vector refs_; +}; + +// Python wrapper for Myelin compiler. +struct PyCompiler : public PyBase { + // Initialize wrapper. + int Init(PyObject *args, PyObject *kwds); + + // Deallocate wrapper. + void Dealloc(); + + // Compile flow. + PyObject *Compile(PyObject *arg); + + // Import Python flow into Myelin flow. + static bool ImportFlow(PyObject *pyflow, myelin::Flow *flow, + PyBuffers *buffers); + + // Import attributes for flow artifact. + static bool ImportAttributes(PyObject *obj, myelin::Attributes *attrs); + + // Get string attribute for object. + static const char *PyStrAttr(PyObject *obj, const char *name); + + // Get integer attribute for object. + static int PyIntAttr(PyObject *obj, const char *name); + + // Get attribute for object. Returns new reference. + static PyObject *PyAttr(PyObject *obj, const char *name); + + // Myelin compiler. + myelin::Compiler *compiler; + + // Registration. + static PyTypeObject type; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +// Python wrapper for Myelin network. +struct PyNetwork : public PyBase { + // Initialize wrapper. + int Init(myelin::Network *net); + + // Deallocate wrapper. + void Dealloc(); + + // Look up global tensor in network. + PyObject *LookupTensor(PyObject *key); + + // Look up cell in network. + PyObject *LookupCell(PyObject *key); + + // Return profile report if profiling is enabled. + PyObject *Profile(); + + // Get named tensor in cell or a global tensor is cell is null. + myelin::Tensor *GetTensor(PyObject *key, const myelin::Cell *cell); + + // Myelin network. + myelin::Network *net; + + // Registration. + static PyTypeObject type; + static PyMappingMethods mapping; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +// Python wrapper for Myelin cell. +struct PyCell : public PyBase { + // Initialize wrapper. + int Init(PyNetwork *pynet, myelin::Cell *cell); + + // Deallocate wrapper. + void Dealloc(); + + // Return new data instance for cell. + PyObject *NewInstance(); + + // Return new channel. + PyObject *NewChannel(PyObject *args); + + // Return parameter tensor index. This can be used as a key for looking up + // tensors in instances. + PyObject *Index(PyObject *key); + + // Myelin cell. + myelin::Cell *cell; + + // Network that owns the cell. + PyNetwork *pynet; + + // Registration. + static PyTypeObject type; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +// Python wrapper for Myelin instance. +struct PyInstance : public PyBase { + // Initialize wrapper. + int Init(PyCell *pycell); + + // Deallocate wrapper. + void Dealloc(); + + // Look up local tensor in instance. + PyObject *LookupTensor(PyObject *key); + + // Connect channel element to reference tensor in instance. + PyObject *Connect(PyObject *args); + + // Run cell computation on instance. + PyObject *Compute(); + + // Clear instance. + PyObject *Clear(); + + // Return data instance as string. + PyObject *Str(); + + // Myelin data instance. + myelin::Instance *data; + + // Cell for the instance. + PyCell *pycell; + + // Registration. + static PyTypeObject type; + static PyMappingMethods mapping; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +// Python wrapper for Myelin channel. +struct PyChannel : public PyBase { + // Initialize wrapper. + int Init(PyNetwork *pynet, myelin::Tensor *format, int size); + + // Deallocate wrapper. + void Dealloc(); + + // Return channel size. + PyObject *Size(); + + // Return channel element. + PyObject *Lookup(PyObject *key); + + // Resize channel. + PyObject *Resize(PyObject *args); + + // Myelin channel data. + myelin::Channel *channel; + + // Network for channel. + PyNetwork *pynet; + + // Type checking. + static bool TypeCheck(PyBase *object) { + return PyBase::TypeCheck(object, &type); + } + static bool TypeCheck(PyObject *object) { + return PyBase::TypeCheck(object, &type); + } + + // Registration. + static PyTypeObject type; + static PyMappingMethods mapping; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +// Python wrapper for Myelin tensor data. +struct PyTensor : public PyBase { + // Initialize wrapper. + int Init(PyObject *owner, char *data, const myelin::Tensor *format); + + // Deallocate wrapper. + void Dealloc(); + + // Return tensor name. + PyObject *Name(); + + // Return tensor rank. + PyObject *Rank(); + + // Return tensor shape. + PyObject *Shape(); + + // Return tensor data type. + PyObject *Type(); + + // Return tensor as string. + PyObject *Str(); + + // Get element from tensor. + PyObject *GetElement(PyObject *index); + + // Assign value to tensor element. + int SetElement(PyObject *index, PyObject *value); + + // Buffer interface for accessing tensor data. + int GetBuffer(Py_buffer *view, int flags); + void ReleaseBuffer(Py_buffer *view); + + // Get shape and stides. There are allocated lazily. + Py_ssize_t *GetShape(); + Py_ssize_t *GetStrides(); + + // Return tensor type as Python type format string. + char *GetFormat() { + return const_cast(myelin::TypeTraits::of(format->type()).pytype()); + } + + // Get address of element in tensor. + char *GetAddress(PyObject *index); + + // Reference for keeping data alive. + PyObject *owner; + + // Raw data for tensor. + char *data; + + // Tensor format. + const myelin::Tensor *format; + + // Shape and strides in Python format. + Py_ssize_t *shape; + Py_ssize_t *strides; + + // Registration. + static PyTypeObject type; + static PyMappingMethods mapping; + static PyBufferProcs buffer; + static PyMethodTable methods; + static void Define(PyObject *module); +}; + +} // namespace sling + +#endif // SLING_PYAPI_PYMYELIN_H_ +