diff --git a/src/laser/private/ast_utils.nim b/src/laser/private/ast_utils.nim new file mode 100644 index 000000000..61cb9ab1b --- /dev/null +++ b/src/laser/private/ast_utils.nim @@ -0,0 +1,33 @@ +# Laser +# Copyright (c) 2018 Mamy André-Ratsimbazafy +# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0). +# This file may not be copied, modified, or distributed except according to those terms. + +import macros + +proc pop*(tree: var NimNode): NimNode = + ## varargs[untyped] consumes all arguments so the actual value should be popped + ## https://github.com/nim-lang/Nim/issues/5855 + result = tree[tree.len-1] + tree.del(tree.len-1) + +proc replaceNodes*(ast: NimNode, replacements: NimNode, to_replace: NimNode): NimNode = + # Args: + # - The full syntax tree + # - an array of replacement value + # - an array of identifiers to replace + proc inspect(node: NimNode): NimNode = + case node.kind: + of {nnkIdent, nnkSym}: + for i, c in to_replace: + if node.eqIdent($c): + return replacements[i] + return node + of nnkEmpty: return node + of nnkLiterals: return node + else: + var rTree = node.kind.newTree() + for child in node: + rTree.add inspect(child) + return rTree + result = inspect(ast) diff --git a/src/laser/tensor/allocator.nim b/src/laser/tensor/allocator.nim index 22b641255..72ff8123a 100644 --- a/src/laser/tensor/allocator.nim +++ b/src/laser/tensor/allocator.nim @@ -11,7 +11,7 @@ import proc finalizer[T](storage: CpuStorage[T]) = static: assert T.supportsCopyMem, "Tensors of seq, strings, ref types and types with non-trivial destructors cannot be finalized by this proc" - if storage.memowner and not storage.memalloc.isNil: + if storage.isMemOwner and not storage.memalloc.isNil: storage.memalloc.deallocShared() proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) = @@ -21,8 +21,9 @@ proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) = ## are always zero-initialized. This prevents potential GC issues. when T.supportsCopyMem: new(storage, finalizer[T]) - storage.memalloc = allocShared0(sizeof(T) * size + LASER_MEM_ALIGN - 1) - storage.memowner = true + {.noSideEffect.}: + storage.memalloc = allocShared(sizeof(T) * size + LASER_MEM_ALIGN - 1) + storage.isMemOwner = true storage.raw_buffer = align_raw_data(T, storage.memalloc) else: # Always 0-initialize Tensors of seq, strings, ref types and types with non-trivial destructors new(storage) diff --git a/src/laser/tensor/datatypes.nim b/src/laser/tensor/datatypes.nim index 51d83136e..f1b70954f 100644 --- a/src/laser/tensor/datatypes.nim +++ b/src/laser/tensor/datatypes.nim @@ -25,7 +25,7 @@ type when supportsCopyMem(T): raw_buffer*: ptr UncheckedArray[T] # 8 bytes memalloc*: pointer # 8 bytes - memowner*: bool # 1 byte + isMemOwner*: bool # 1 byte else: # Tensors of strings, other ref types or non-trivial destructors raw_buffer*: seq[T] # 8 bytes (16 for seq v2 backed by destructors?) diff --git a/src/private/nested_containers.nim b/src/private/nested_containers.nim deleted file mode 100644 index 8a5a8b871..000000000 --- a/src/private/nested_containers.nim +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright 2017 the Arraymancer contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ../tensor/backend/metadataArray - -# Tools to manipulate deep nested containers - -iterator flatIter*(s: string): string {.noSideEffect.} = - yield s - -iterator flatIter*[T](s: openarray[T]): auto {.noSideEffect.}= - ## Inline iterator on any-depth seq or array - ## Returns values in order - for item in s: - when item is array|seq: - for subitem in flatIter(item): - yield subitem - else: - yield item - -proc shape*(s: string, parent_shape: MetadataArray = initMetadataArray(0)): MetadataArray {.noSideEffect.}= - ## Handle strings - const z = initMetadataArray(0) - if parent_shape == z: - result = z - result.len = 1 - result[0] = 1 - else: return parent_shape - -proc shape*[T](s: openarray[T], parent_shape: MetadataArray = initMetadataArray(0)): MetadataArray {.noSideEffect.}= - - result = parent_shape # Note result = parent_shape & s.len breaks at a random but deterministic point with C++ backend - result.add(s.len) # on the full test suite - - when (T is seq|array): - result = shape(s[0], result) - -# proc shape*[T: not char](s: openarray[T], parent_shape: seq[int] = @[]): seq[int] {.noSideEffect.}= -# ## Helper function to get the shape of nested arrays/sequences -# ## C convention. Last index is the fastest changing (columns in 2D, depth in 3D) - Rows (slowest), Columns, Depth (fastest) -# ## The second argument "shape" is used for recursive call on nested arrays/sequences -# # Dimension check is using only the first nested element so further checking -# # must be one to confirm that the total number of elements match the shape. - -# result = parent_shape # Note result = parent_shape & s.len breaks at a random but deterministic point with C++ backend -# result.add(s.len) # on the full test suite - -# when (T is seq|array): -# result = shape(s[0], result) - -# proc shape*(s: string|seq[char], parent_shape: seq[int] = @[]): seq[int] {.noSideEffect.}= -# ## Handle char / string -# if parent_shape == @[]: -# return @[1] -# else: return parent_shape diff --git a/src/tensor/accessors_macros_syntax.nim b/src/tensor/accessors_macros_syntax.nim index 15ededde2..2b5672fc9 100644 --- a/src/tensor/accessors_macros_syntax.nim +++ b/src/tensor/accessors_macros_syntax.nim @@ -12,9 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../private/[nested_containers, functional], - ./backend/metadataArray, - ./private/p_checks, +import ./private/p_checks, ./data_structure # ## This file adds slicing syntactic sugar. @@ -214,4 +212,3 @@ proc `^`*(s: Slice): SteppedSlice {.noSideEffect, inline.} = ## Internal: Prefix to a to indicate starting the slice at "a" away from the end ## Note: This does not automatically inverse stepping, what if we want ^5..^1 return SteppedSlice(a: s.a, b: s.b, step: 1, a_from_end: true) - diff --git a/src/tensor/data_structure.nim b/src/tensor/data_structure.nim index 7ddb422e6..367119e23 100644 --- a/src/tensor/data_structure.nim +++ b/src/tensor/data_structure.nim @@ -12,15 +12,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../laser/dynamic_stack_arrays, - ../laser/tensor/datatypes, - nimblas, complex +import + # Internal + ../laser/dynamic_stack_arrays, + ../laser/tensor/datatypes, + ../private/sequninit, + # Third-party + nimblas, + # Standard library + std/[complex, typetraits] export nimblas.OrderType, complex +export datatypes, dynamic_stack_arrays type # On CPU, the tensor datastructures and basic accessors # are defined in laser/tensor/datatypes + MetadataArray*{.deprecated: "Use Metadata instead".} = Metadata CudaStorage*[T: SomeFloat] = object ## Opaque seq-like structure for storage on the Cuda backend. @@ -76,43 +84,55 @@ type # Field accessors # ############### -proc data*[T](t: Tensor[T]): seq[T] {.inline, noSideEffect, noInit.} = +proc data*[T](t: Tensor[T]): seq[T] {.inline, noInit, deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} = # Get tensor raw data # This is intended for library writer - shallowCopy(result, t.storage.Fdata) - -proc data*[T](t: var Tensor[T]): var seq[T] {.inline, noSideEffect, noInit.} = + when supportsCopyMem: + result = newSeqUninit(t.size) + for i in 0 ..< t.size: + result[i] = t.storage.raw_buffer[i] + else: + shallowCopy(result, t.storage.raw_buffer) + +proc data*[T](t: var Tensor[T]): var seq[T] {.deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} = # Get mutable tensor raw data # This is intended for library writer - shallowCopy(result, t.storage.Fdata) - -proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}= + when supportsCopyMem: + result = newSeqUninit(t.size) + for i in 0 ..< t.size: + result[i] = t.storage.raw_buffer[i] + else: + shallowCopy(result, t.storage.raw_buffer) + +proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw instead".} = # Set tensor raw data # This is intended for library writer - t.storage.Fdata = s + assert s.len > 0 + when T.supportsCopyMem: + t.copyFromRaw(s[0].addr, s.len) + else: + t.storage.raw_buffer = s # ################ # Tensor Metadata # ################ -proc rank*(t: AnyTensor): int {.noSideEffect, inline.}= - ## Input: - ## - A tensor - ## Returns: - ## - Its rank - ## - ## - 0 for scalar (unfortunately cannot be stored) - ## - 1 for vector - ## - 2 for matrices - ## - N for N-dimension array - t.shape.len +# rank, size, is_C_contiguous defined in laser -proc size*(t: AnyTensor): int {.noSideEffect, inline.}= - ## Input: - ## - A tensor - ## Returns: - ## - The total number of elements it contains - t.shape.product +proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}= + ## Check if the tensor follows Fortran convention / is column major + var z = 1 + for i in 0.. float64), we must force T #68 + var size: int + initTensorMetadata(result, size, shape) + allocCpuStorage(result.storage, size) + + for i in 0 ..< t.size: + result.storage.raw_buffer[i] = T(rand(max_or_range)) # Due to automatic converter (float32 -> float64), we must force T #68 proc randomTensor*[T:SomeFloat](shape: varargs[int], max: T): Tensor[T] {.noInit.} = ## Creates a new float Tensor filled with values between 0 and max. @@ -260,8 +256,12 @@ proc randomTensor*[T](shape: varargs[int], sample_source: openarray[T]): Tensor[ ## - a sample_source ## Result: ## - A tensor of the input shape filled with random values from ``sample_source`` - tensorCpu(shape, result) - result.storage.Fdata = newSeqWith(result.size, sample(sample_source)) + var size: int + initTensorMetadata(result, size, shape) + allocCpuStorage(result.storage, size) + + for i in 0 ..< size: + result.storage.raw_buffer[i] = sample(sample_source) proc randomNormal(mean = 0.0, std = 1.0): float = ## Random number in the normal distribution using Box-Muller method @@ -288,5 +288,9 @@ proc randomNormalTensor*[T:SomeFloat](shape: varargs[int], mean:T = 0, std:T = 1 ## - the standard deviation (default 1) ## Result: ## - A tensor of the input shape filled with random values in the normal distribution - tensorCpu(shape, result) - result.storage.Fdata = newSeqWith(result.size, T(randomNormal(mean.float, std.float))) + var size: int + initTensorMetadata(result, size, shape) + allocCpuStorage(result.storage, size) + + for i in 0 ..< size: + result.storage.raw_buffer[i] = T(randomNormal(mean.float, std.float)) diff --git a/src/tensor/optim_ops_fusion.nim b/src/tensor/optim_ops_fusion.nim index 8d4fe48c2..0f0448a35 100644 --- a/src/tensor/optim_ops_fusion.nim +++ b/src/tensor/optim_ops_fusion.nim @@ -12,13 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../private/[nested_containers, ast_utils], - ./backend/metadataArray, +import ../laser/private/nested_containers, + ../laser/tensor/[allocator, initialization], ./private/p_checks, - ./private/p_init_cpu, ./data_structure, ./operators_blas_l2l3, - sequtils + sequtils, typetraits ################################################# ## Operations fusion @@ -77,16 +76,22 @@ template rewriteTensor_MultiplyAdd_inplace*{C += `*`(A,B)}[T]( ## initialization template toTensorReshapeImpl(oa: typed, shape: varargs[int]): untyped = - let data = toSeq(flatIter(oa)) - let seq_shape = shape.toMetadataArray - when compileOption("boundChecks"): - check_nested_elements(seq_shape, data.len) - - var t: Tensor[type(data[0])] - tensorCpu(seq_shape, t) - shallowCopy(t.data, data) - t + var t: Tensor[typeof(flatIter(oa))] + var size: int + + static: echo typeof(t) + static: echo supportsCopyMem(string) + static: echo typeof t.storage.raw_buffer + static: echo typeof t.storage + initTensorMetadata(t, size, shape) + allocCpuStorage(t.storage, size) + var i = 0 + for val in flatIter(oa): + t.storage.raw_buffer[i] = val + assert i < size + i += 1 + assert i == size proc toTensorReshape(oa: string, shape: varargs[int]): auto {.noInit,noSideEffect.}= ## Fuse toTensor and reshape in one operation. diff --git a/src/tensor/private/p_accessors.nim b/src/tensor/private/p_accessors.nim index 791130aab..d0a1d3509 100644 --- a/src/tensor/private/p_accessors.nim +++ b/src/tensor/private/p_accessors.nim @@ -13,7 +13,6 @@ # limitations under the License. import ../backend/[global_config, memory_optimization_hints], - ../backend/metadataArray, ../data_structure, ./p_checks diff --git a/src/tensor/private/p_accessors_macros_read.nim b/src/tensor/private/p_accessors_macros_read.nim index 00a900b34..1a0c2eeb1 100644 --- a/src/tensor/private/p_accessors_macros_read.nim +++ b/src/tensor/private/p_accessors_macros_read.nim @@ -17,7 +17,6 @@ import ../../private/ast_utils, ../data_structure, ../init_cpu, ../accessors_macros_syntax, - ../backend/metadataArray, ./p_checks, ./p_accessors, ./p_accessors_macros_desugar, sequtils, macros diff --git a/src/tensor/private/p_accessors_macros_write.nim b/src/tensor/private/p_accessors_macros_write.nim index 6ad63b4b8..6e7ce3549 100644 --- a/src/tensor/private/p_accessors_macros_write.nim +++ b/src/tensor/private/p_accessors_macros_write.nim @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../../private/[nested_containers, ast_utils], +import ../../laser/private/nested_containers, + ../../private/ast_utils, ../data_structure, ../accessors_macros_syntax, - ../backend/metadataArray, ./p_accessors_macros_desugar, ./p_accessors_macros_read, ./p_checks, diff --git a/src/tensor/private/p_checks.nim b/src/tensor/private/p_checks.nim index b3e4abb26..0568fd7cb 100644 --- a/src/tensor/private/p_checks.nim +++ b/src/tensor/private/p_checks.nim @@ -12,8 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../../private/[functional, nested_containers], - ../backend/metadataArray, +import ../../laser/private/nested_containers, ../data_structure include ./p_checks_cuda, ./p_checks_opencl diff --git a/src/tensor/private/p_init_cpu.nim b/src/tensor/private/p_init_cpu.nim deleted file mode 100644 index 9f72937b4..000000000 --- a/src/tensor/private/p_init_cpu.nim +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2017 the Arraymancer contributors -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import ../../private/nested_containers, - ../backend/metadataArray, - ../data_structure, - ./p_checks, - nimblas, - sequtils - -proc tensorCpu*[T](out_shape: varargs[int], result: var Tensor[T], layout: OrderType = rowMajor) {.inline, noSideEffect.} = - result.shape.copyFrom(out_shape) - shape_to_strides(result.shape, layout, result.strides) - result.offset = 0 - -proc tensorCpu*[T](out_shape: MetadataArray, result: var Tensor[T], layout: OrderType = rowMajor) {.inline, noSideEffect.} = - result.shape.copyFrom(out_shape) - shape_to_strides(result.shape, layout, result.strides) - result.offset = 0 - -template toTensorCpu*(s: typed): untyped = - let shape = s.shape - let data = toSeq(flatIter(s)) - - when compileOption("boundChecks"): - check_nested_elements(shape, data.len) - - var t: Tensor[type(data[0])] - tensorCpu(shape, t) - t.data = data - t diff --git a/src/tensor/private/p_shapeshifting.nim b/src/tensor/private/p_shapeshifting.nim index a9dc6ccdd..98274d312 100644 --- a/src/tensor/private/p_shapeshifting.nim +++ b/src/tensor/private/p_shapeshifting.nim @@ -12,11 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import ../../private/sequninit, - ../backend/metadataArray, +import ../../laser/tensor/[allocator, initialization], + ../../private/sequninit, ../data_structure, ../higher_order_applymap, ../init_cpu, - ./p_init_cpu, ./p_checks, nimblas @@ -24,8 +23,11 @@ proc contiguousImpl*[T](t: Tensor[T], layout: OrderType, result: var Tensor[T]) if layout == rowMajor: result = t.map_inline(x) else: # colMajor - tensorCpu(t.shape, result, layout) - result.data = newSeqUninit[T](result.size) + var size: int + initTensorMetadata(result, size, t.shape) + # TODO: init with colMajor layout + result.strides.reversed + allocCpuStorage(result.storage, size) apply2_inline(result, t): y @@ -77,7 +79,7 @@ proc broadcastImpl*(t: var AnyTensor, shape: varargs[int]|MetadataArray) {.noSid proc broadcast2Impl*[T](a, b: AnyTensor[T], result: var tuple[a, b: AnyTensor[T]]) {.noSideEffect.}= let rank = max(a.rank, b.rank) - var shapeA, stridesA, shapeB, stridesB = initMetadataArray(rank) # initialized with 0 + var shapeA, stridesA, shapeB, stridesB = Metadata(len: rank) # initialized with 0 for i in 0..