diff --git a/src/laser/private/ast_utils.nim b/src/laser/private/ast_utils.nim
new file mode 100644
index 000000000..61cb9ab1b
--- /dev/null
+++ b/src/laser/private/ast_utils.nim
@@ -0,0 +1,33 @@
+# Laser
+# Copyright (c) 2018 Mamy André-Ratsimbazafy
+# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
+# This file may not be copied, modified, or distributed except according to those terms.
+
+import macros
+
+proc pop*(tree: var NimNode): NimNode =
+  ## varargs[untyped] consumes all arguments so the actual value should be popped
+  ## https://github.com/nim-lang/Nim/issues/5855
+  result = tree[tree.len-1]
+  tree.del(tree.len-1)
+
+proc replaceNodes*(ast: NimNode, replacements: NimNode, to_replace: NimNode): NimNode =
+  # Args:
+  #   - The full syntax tree
+  #   - an array of replacement value
+  #   - an array of identifiers to replace
+  proc inspect(node: NimNode): NimNode =
+    case node.kind:
+    of {nnkIdent, nnkSym}:
+      for i, c in to_replace:
+        if node.eqIdent($c):
+          return replacements[i]
+      return node
+    of nnkEmpty: return node
+    of nnkLiterals: return node
+    else:
+      var rTree = node.kind.newTree()
+      for child in node:
+        rTree.add inspect(child)
+      return rTree
+  result = inspect(ast)
diff --git a/src/laser/tensor/allocator.nim b/src/laser/tensor/allocator.nim
index 22b641255..72ff8123a 100644
--- a/src/laser/tensor/allocator.nim
+++ b/src/laser/tensor/allocator.nim
@@ -11,7 +11,7 @@ import
 proc finalizer[T](storage: CpuStorage[T]) =
   static: assert T.supportsCopyMem, "Tensors of seq, strings, ref types and types with non-trivial destructors cannot be finalized by this proc"
 
-  if storage.memowner and not storage.memalloc.isNil:
+  if storage.isMemOwner and not storage.memalloc.isNil:
     storage.memalloc.deallocShared()
 
 proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) =
@@ -21,8 +21,9 @@ proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) =
   ## are always zero-initialized. This prevents potential GC issues.
   when T.supportsCopyMem:
     new(storage, finalizer[T])
-    storage.memalloc = allocShared0(sizeof(T) * size + LASER_MEM_ALIGN - 1)
-    storage.memowner = true
+    {.noSideEffect.}:
+      storage.memalloc = allocShared(sizeof(T) * size + LASER_MEM_ALIGN - 1)
+    storage.isMemOwner = true
     storage.raw_buffer = align_raw_data(T, storage.memalloc)
   else: # Always 0-initialize Tensors of seq, strings, ref types and types with non-trivial destructors
     new(storage)
diff --git a/src/laser/tensor/datatypes.nim b/src/laser/tensor/datatypes.nim
index 51d83136e..f1b70954f 100644
--- a/src/laser/tensor/datatypes.nim
+++ b/src/laser/tensor/datatypes.nim
@@ -25,7 +25,7 @@ type
     when supportsCopyMem(T):
       raw_buffer*: ptr UncheckedArray[T] # 8 bytes
       memalloc*: pointer                 # 8 bytes
-      memowner*: bool                    # 1 byte
+      isMemOwner*: bool                    # 1 byte
     else: # Tensors of strings, other ref types or non-trivial destructors
       raw_buffer*: seq[T]                # 8 bytes (16 for seq v2 backed by destructors?)
 
diff --git a/src/private/nested_containers.nim b/src/private/nested_containers.nim
deleted file mode 100644
index 8a5a8b871..000000000
--- a/src/private/nested_containers.nim
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright 2017 the Arraymancer contributors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import ../tensor/backend/metadataArray
-
-# Tools to manipulate deep nested containers
-
-iterator flatIter*(s: string): string {.noSideEffect.} =
-  yield s
-
-iterator flatIter*[T](s: openarray[T]): auto {.noSideEffect.}=
-  ## Inline iterator on any-depth seq or array
-  ## Returns values in order
-  for item in s:
-    when item is array|seq:
-      for subitem in flatIter(item):
-        yield subitem
-    else:
-      yield item
-
-proc shape*(s: string, parent_shape: MetadataArray = initMetadataArray(0)): MetadataArray {.noSideEffect.}=
-  ## Handle strings
-  const z = initMetadataArray(0)
-  if parent_shape == z:
-    result = z
-    result.len = 1
-    result[0] = 1
-  else: return parent_shape
-
-proc shape*[T](s: openarray[T], parent_shape: MetadataArray = initMetadataArray(0)): MetadataArray {.noSideEffect.}=
-
-  result = parent_shape # Note result = parent_shape & s.len breaks at a random but deterministic point with C++ backend
-  result.add(s.len)     # on the full test suite
-
-  when (T is seq|array):
-    result = shape(s[0], result)
-
-# proc shape*[T: not char](s: openarray[T], parent_shape: seq[int] = @[]): seq[int] {.noSideEffect.}=
-#   ## Helper function to get the shape of nested arrays/sequences
-#   ## C convention. Last index is the fastest changing (columns in 2D, depth in 3D) - Rows (slowest), Columns, Depth (fastest)
-#   ## The second argument "shape" is used for recursive call on nested arrays/sequences
-#   # Dimension check is using only the first nested element so further checking
-#   # must be one to confirm that the total number of elements match the shape.
-
-#   result = parent_shape # Note result = parent_shape & s.len breaks at a random but deterministic point with C++ backend
-#   result.add(s.len)     # on the full test suite
-
-#   when (T is seq|array):
-#     result = shape(s[0], result)
-
-# proc shape*(s: string|seq[char], parent_shape: seq[int] = @[]): seq[int] {.noSideEffect.}=
-#   ## Handle char / string
-#   if parent_shape == @[]:
-#     return @[1]
-#   else: return parent_shape
diff --git a/src/tensor/accessors_macros_syntax.nim b/src/tensor/accessors_macros_syntax.nim
index 15ededde2..2b5672fc9 100644
--- a/src/tensor/accessors_macros_syntax.nim
+++ b/src/tensor/accessors_macros_syntax.nim
@@ -12,9 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../private/[nested_containers, functional],
-        ./backend/metadataArray,
-        ./private/p_checks,
+import  ./private/p_checks,
         ./data_structure
 
 # ## This file adds slicing syntactic sugar.
@@ -214,4 +212,3 @@ proc `^`*(s: Slice): SteppedSlice {.noSideEffect, inline.} =
   ## Internal: Prefix to a to indicate starting the slice at "a" away from the end
   ## Note: This does not automatically inverse stepping, what if we want ^5..^1
   return SteppedSlice(a: s.a, b: s.b, step: 1, a_from_end: true)
-
diff --git a/src/tensor/data_structure.nim b/src/tensor/data_structure.nim
index 7ddb422e6..367119e23 100644
--- a/src/tensor/data_structure.nim
+++ b/src/tensor/data_structure.nim
@@ -12,15 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../laser/dynamic_stack_arrays,
-        ../laser/tensor/datatypes,
-        nimblas, complex
+import
+  # Internal
+  ../laser/dynamic_stack_arrays,
+  ../laser/tensor/datatypes,
+  ../private/sequninit,
+  # Third-party
+  nimblas,
+  # Standard library
+  std/[complex, typetraits]
 
 export nimblas.OrderType, complex
+export datatypes, dynamic_stack_arrays
 
 type
   # On CPU, the tensor datastructures and basic accessors
   # are defined in laser/tensor/datatypes
+  MetadataArray*{.deprecated: "Use Metadata instead".} = Metadata
 
   CudaStorage*[T: SomeFloat] = object
     ## Opaque seq-like structure for storage on the Cuda backend.
@@ -76,43 +84,55 @@ type
 # Field accessors
 # ###############
 
-proc data*[T](t: Tensor[T]): seq[T] {.inline, noSideEffect, noInit.} =
+proc data*[T](t: Tensor[T]): seq[T] {.inline, noInit, deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} =
   # Get tensor raw data
   # This is intended for library writer
-  shallowCopy(result, t.storage.Fdata)
-
-proc data*[T](t: var Tensor[T]): var seq[T] {.inline, noSideEffect, noInit.} =
+  when supportsCopyMem:
+    result = newSeqUninit(t.size)
+    for i in 0 ..< t.size:
+      result[i] = t.storage.raw_buffer[i]
+  else:
+    shallowCopy(result, t.storage.raw_buffer)
+
+proc data*[T](t: var Tensor[T]): var seq[T] {.deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} =
   # Get mutable tensor raw data
   # This is intended for library writer
-  shallowCopy(result, t.storage.Fdata)
-
-proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
+  when supportsCopyMem:
+    result = newSeqUninit(t.size)
+    for i in 0 ..< t.size:
+      result[i] = t.storage.raw_buffer[i]
+  else:
+    shallowCopy(result, t.storage.raw_buffer)
+
+proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw instead".} =
   # Set tensor raw data
   # This is intended for library writer
-  t.storage.Fdata = s
+  assert s.len > 0
+  when T.supportsCopyMem:
+    t.copyFromRaw(s[0].addr, s.len)
+  else:
+    t.storage.raw_buffer = s
 
 # ################
 # Tensor Metadata
 # ################
 
-proc rank*(t: AnyTensor): int {.noSideEffect, inline.}=
-  ## Input:
-  ##     - A tensor
-  ## Returns:
-  ##     - Its rank
-  ##
-  ##   - 0 for scalar (unfortunately cannot be stored)
-  ##   - 1 for vector
-  ##   - 2 for matrices
-  ##   - N for N-dimension array
-  t.shape.len
+# rank, size, is_C_contiguous defined in laser
 
-proc size*(t: AnyTensor): int {.noSideEffect, inline.}=
-  ## Input:
-  ##     - A tensor
-  ## Returns:
-  ##     - The total number of elements it contains
-  t.shape.product
+proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
+  ## Check if the tensor follows Fortran convention / is column major
+  var z = 1
+  for i in 0..<t.shape.len:
+    # 1. We should ignore strides on dimensions of size 1
+    # 2. Strides always must have the size equal to the product of the next dimensons
+    if t.shape[i] != 1 and t.strides[i] != z:
+        return false
+    z *= t.shape[i]
+  return true
+
+proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
+  ## Check if the tensor is contiguous
+  return t.is_C_contiguous or t.is_F_contiguous
 
 proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, result: var MetadataArray) {.noSideEffect.} =
   ## Input:
@@ -121,8 +141,8 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul
   ## Returns:
   ##     - The strides in C or Fortran order corresponding to this shape and layout
   ##
-  ## Arraymancer defaults to rowMajor. Temporarily, CudaTensors are colMajor by default.
-  # See Design document for further considerations.
+  ## Arraymancer defaults to rowMajor. Temporarily, CudaTensors are colMajor by default.
+  # See Design document for further considerations.
   var accum = 1
   result.len = shape.len
 
@@ -137,55 +157,31 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul
     accum *= shape[i]
   return
 
-proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor follows C convention / is row major
-  var z = 1
-  for i in countdown(t.shape.high,0):
-    # 1. We should ignore strides on dimensions of size 1
-    # 2. Strides always must have the size equal to the product of the next dimensons
-    if t.shape[i] != 1 and t.strides[i] != z:
-        return false
-    z *= t.shape[i]
-  return true
-
-proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor follows Fortran convention / is column major
-  var z = 1
-  for i in 0..<t.shape.len:
-    # 1. We should ignore strides on dimensions of size 1
-    # 2. Strides always must have the size equal to the product of the next dimensons
-    if t.shape[i] != 1 and t.strides[i] != z:
-        return false
-    z *= t.shape[i]
-  return true
-
-proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor is contiguous
-  return t.is_C_contiguous or t.is_F_contiguous
-
 # ##################
 # Raw pointer access
 # ##################
 
+# TODO: proper getters and setters, that also update Nim refcount
+#       for interoperability of Arraymancer buffers with other framework
 
 proc get_data_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the real start of its data (no offset)
-  unsafeAddr(t.storage.Fdata[0])
+  cast[ptr T](t.storage.raw_buffer)
 
 proc get_offset_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of its data
-  unsafeAddr(t.storage.Fdata[t.offset])
+  t.storage.raw_buffer[t.offset].unsafeAddr
 
-proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline.}=
+proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline, deprecated: "Use unsafe_raw_data instead".}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of the data.
   ##       Return value supports array indexing.
-  cast[ptr UncheckedArray[T]](t.storage.Fdata[t.offset].unsafeAddr)
+  (ptr UncheckedArray[T])(t.unsafe_raw_data)
diff --git a/src/tensor/init_cpu.nim b/src/tensor/init_cpu.nim
index 482fc288f..b97594a7a 100644
--- a/src/tensor/init_cpu.nim
+++ b/src/tensor/init_cpu.nim
@@ -12,15 +12,22 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../private/[functional, nested_containers, sequninit],
-        ./backend/[metadataArray],
-        ./private/p_checks,
-        ./private/p_init_cpu,
-        ./data_structure,
-        nimblas,
-        sequtils,
-        random,
-        math
+import
+  # Internal
+  ../private/[functional, sequninit],
+  ../laser/tensor/[initialization, allocator],
+  ../laser/strided_iteration/foreach,
+  ./private/p_checks,
+  ./data_structure,
+  # Third-party
+  nimblas,
+  # Standard library
+  sequtils,
+  random,
+  math,
+  typetraits
+
+export initialization
 
 proc newTensorUninit*[T](shape: varargs[int]): Tensor[T] {.noSideEffect,noInit, inline.} =
   ## Creates a new Tensor on Cpu backend
@@ -31,10 +38,11 @@ proc newTensorUninit*[T](shape: varargs[int]): Tensor[T] {.noSideEffect,noInit,
   ##      - A Tensor of the proper shape with NO initialization
   ## Warning ⚠
   ##   Tensor data is uninitialized and contains garbage.
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqUninit[T](result.size)
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
 
-proc newTensorUninit*[T](shape: MetadataArray): Tensor[T] {.noSideEffect,noInit, inline.} =
+proc newTensorUninit*[T](shape: MetadataArray): Tensor[T] {.noInit, inline.} =
   ## Creates a new Tensor on Cpu backend
   ## Input:
   ##      - Shape of the Tensor
@@ -43,19 +51,11 @@ proc newTensorUninit*[T](shape: MetadataArray): Tensor[T] {.noSideEffect,noInit,
   ##      - A Tensor of the proper shape with NO initialization
   ## Warning ⚠
   ##   Tensor data is uninitialized and contains garbage.
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqUninit[T](result.size)
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
 
-proc newTensor*[T](shape: varargs[int]): Tensor[T] {.noSideEffect,noInit, inline.} =
-  ## Creates a new Tensor on Cpu backend
-  ## Input:
-  ##      - Shape of the Tensor
-  ##      - Type of its elements
-  ## Result:
-  ##      - A Tensor of the proper shape initialized with
-  ##        the default type value (0 for numeric types) on Cpu backend
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeq[T](result.size)
+# newTensor is defined in laser/tensor/initialization
 
 proc newTensorWith*[T](shape: varargs[int], value: T): Tensor[T] {.noInit, noSideEffect.} =
   ## Creates a new Tensor filled with the given value
@@ -67,12 +67,16 @@ proc newTensorWith*[T](shape: varargs[int], value: T): Tensor[T] {.noInit, noSid
   ##      - A Tensor of the proper shape initialized with
   ##        the given value
   # Todo: use a template that can accept proc or value. See the code for newSeqWith: https://github.com/nim-lang/Nim/blob/master/lib/pure/collections/sequtils.nim#L650-L665
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqUninit[T](result.size)
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
 
-  for tval in result.storage.Fdata.mitems:
-    {.unroll: 8.}
-    tval = value
+  when T.supportsCopyMem:
+    forEachContiguous x in result:
+      x = value
+  else:
+    forEachContiguousSerial x in result:
+      x = value
 
 proc newTensorWith*[T](shape: MetadataArray, value: T): Tensor[T] {.noInit, noSideEffect.} =
   ## Creates a new Tensor filled with the given value
@@ -84,29 +88,18 @@ proc newTensorWith*[T](shape: MetadataArray, value: T): Tensor[T] {.noInit, noSi
   ##      - A Tensor of the proper shape initialized with
   ##        the given value
   # Todo: use a template that can accept proc or value. See the code for newSeqWith: https://github.com/nim-lang/Nim/blob/master/lib/pure/collections/sequtils.nim#L650-L665
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqUninit[T](result.size)
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
 
-  for tval in result.storage.Fdata.mitems:
-    {.unroll: 8.}
-    tval = value
-
-proc toTensor*(s:openarray, dummy_bugfix: static[int] = 0 ): auto {.noSideEffect.} =
-  ## Convert an openarray to a Tensor
-  ## Input:
-  ##      - An array or a seq (can be nested)
-  ## Result:
-  ##      - A Tensor of the same shape
-  ##
-  ## Note: dummy_bugfix param is unused and is a workaround a Nim bug.
-  # TODO: remove 'dummy_bugfix' - https://github.com/nim-lang/Nim/issues/6343
-  toTensorCpu(s)
+  when T.supportsCopyMem:
+    forEachContiguous x in result:
+      x = value
+  else:
+    forEachContiguousSerial x in result:
+      x = value
 
-proc toTensor*(s:string): auto {.noSideEffect.} =
-  ## Convert a string to a Tensor
-  ##
-  ## This proc handles string specifically as otherwise they are interpreted as a sequence of char
-  toTensorCpu(s)
+# newTensor is defined in laser/tensor/initialization
 
 proc zeros*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: varargs[int]): Tensor[T] {.noInit,noSideEffect, inline.} =
   ## Creates a new Tensor filled with 0
@@ -116,8 +109,7 @@ proc zeros*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: varargs[int]
   ##      - Type of its elements
   ## Result:
   ##      - A zero-ed Tensor of the input shape on backend Cpu
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeq[T](result.size)
+  result = newTensor(shape)
 
 proc zeros*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: MetadataArray): Tensor[T] {.noInit,noSideEffect, inline.} =
   ## Creates a new Tensor filled with 0
@@ -127,8 +119,7 @@ proc zeros*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: MetadataArra
   ##      - Type of its elements
   ## Result:
   ##      - A zero-ed Tensor of the input shape on backend Cpu
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeq[T](result.size)
+  result = newTensor(shape)
 
 proc zeros_like*[T: SomeNumber|Complex[float32]|Complex[float64]](t: Tensor[T]): Tensor[T] {.noInit,noSideEffect, inline.} =
   ## Creates a new Tensor filled with 0 with the same shape as the input
@@ -147,10 +138,10 @@ proc ones*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: varargs[int])
   ## Result:
   ##      - A one-ed Tensor of the same shape
   when T is SomeNumber:
-    newTensorWith[T](shape, 1.T)
+    result = newTensorWith[T](shape, 1.T)
   else:
     type F = T.T # Get the float subtype of Complex[T]
-    newTensorWith[T](shape, complex(1.F, 0.F))
+    result = newTensorWith[T](shape, complex(1.F, 0.F))
 
 proc ones*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: MetadataArray): Tensor[T] {.noInit, inline, noSideEffect.} =
   ## Creates a new Tensor filled with 1
@@ -160,10 +151,10 @@ proc ones*[T: SomeNumber|Complex[float32]|Complex[float64]](shape: MetadataArray
   ## Result:
   ##      - A one-ed Tensor of the same shape
   when T is SomeNumber:
-    newTensorWith[T](shape, 1.T)
+    result = newTensorWith[T](shape, 1.T)
   else:
     type F = T.T
-    newTensorWith[T](shape, complex(1.F, 0.F))
+    result = newTensorWith[T](shape, complex(1.F, 0.F))
 
 proc ones_like*[T: SomeNumber|Complex[float32]|Complex[float64]](t: Tensor[T]): Tensor[T] {.noInit, inline, noSideEffect.} =
   ## Creates a new Tensor filled with 1 with the same shape as the input
@@ -172,7 +163,7 @@ proc ones_like*[T: SomeNumber|Complex[float32]|Complex[float64]](t: Tensor[T]):
   ##      - Tensor
   ## Result:
   ##      - A one-ed Tensor of the same shape
-  return ones[T](t.shape)
+  result = ones[T](t.shape)
 
 func arange*[T: SomeNumber](start, stop, step: T): Tensor[T] {.noInit.} =
   ## Creates a new 1d-tensor with values evenly spaced by ``step``
@@ -196,12 +187,13 @@ func arange*[T: SomeNumber](start, stop, step: T): Tensor[T] {.noInit.} =
   var size_f64 = ceil((stop.float64 - start.float64) / step.float64)
   assert 0 <= size_f64 and size_f64 <= float64(high(int)), "Invalid size"
 
-  let size = int(size_f64)
-  tensorCpu([size], result)
-  result.storage.Fdata = newSeqUninit[T](size)
+  let shape = [int(size_f64)]
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
 
   for i in 0 ..< size:
-    result.storage.Fdata[i] = start + i.T * step
+    result.storage.raw_buffer[i] = start + i.T * step
 
 template arange*[T: SomeNumber](stop: T): Tensor[T] =
   # Error messages of templates are very poor
@@ -212,8 +204,12 @@ template arange*[T: SomeNumber](start, stop: T): Tensor[T] =
   arange(start, stop, T(1))
 
 template randomTensorCpu[T](t: Tensor[T], shape: varargs[int], max_or_range: typed): untyped =
-  tensorCpu(shape, t)
-  result.storage.Fdata = newSeqWith(t.size, T(rand(max_or_range))) # Due to automatic converter (float32 -> float64), we must force T #68
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
+
+  for i in 0 ..< t.size:
+    result.storage.raw_buffer[i] = T(rand(max_or_range)) # Due to automatic converter (float32 -> float64), we must force T #68
 
 proc randomTensor*[T:SomeFloat](shape: varargs[int], max: T): Tensor[T] {.noInit.} =
   ## Creates a new float Tensor filled with values between 0 and max.
@@ -260,8 +256,12 @@ proc randomTensor*[T](shape: varargs[int], sample_source: openarray[T]): Tensor[
   ##      - a sample_source
   ## Result:
   ##      - A tensor of the input shape filled with random values from ``sample_source``
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqWith(result.size, sample(sample_source))
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
+
+  for i in 0 ..< size:
+    result.storage.raw_buffer[i] = sample(sample_source)
 
 proc randomNormal(mean = 0.0, std = 1.0): float =
   ## Random number in the normal distribution using Box-Muller method
@@ -288,5 +288,9 @@ proc randomNormalTensor*[T:SomeFloat](shape: varargs[int], mean:T = 0, std:T = 1
   ##      - the standard deviation (default 1)
   ## Result:
   ##      - A tensor of the input shape filled with random values in the normal distribution
-  tensorCpu(shape, result)
-  result.storage.Fdata = newSeqWith(result.size, T(randomNormal(mean.float, std.float)))
+  var size: int
+  initTensorMetadata(result, size, shape)
+  allocCpuStorage(result.storage, size)
+
+  for i in 0 ..< size:
+    result.storage.raw_buffer[i] = T(randomNormal(mean.float, std.float))
diff --git a/src/tensor/optim_ops_fusion.nim b/src/tensor/optim_ops_fusion.nim
index 8d4fe48c2..0f0448a35 100644
--- a/src/tensor/optim_ops_fusion.nim
+++ b/src/tensor/optim_ops_fusion.nim
@@ -12,13 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../private/[nested_containers, ast_utils],
-        ./backend/metadataArray,
+import  ../laser/private/nested_containers,
+        ../laser/tensor/[allocator, initialization],
         ./private/p_checks,
-        ./private/p_init_cpu,
         ./data_structure,
         ./operators_blas_l2l3,
-        sequtils
+        sequtils, typetraits
 
 #################################################
 ## Operations fusion
@@ -77,16 +76,22 @@ template rewriteTensor_MultiplyAdd_inplace*{C += `*`(A,B)}[T](
 ## initialization
 
 template toTensorReshapeImpl(oa: typed, shape: varargs[int]): untyped =
-  let data = toSeq(flatIter(oa))
-  let seq_shape = shape.toMetadataArray
 
-  when compileOption("boundChecks"):
-    check_nested_elements(seq_shape, data.len)
-
-  var t: Tensor[type(data[0])]
-  tensorCpu(seq_shape, t)
-  shallowCopy(t.data, data)
-  t
+  var t: Tensor[typeof(flatIter(oa))]
+  var size: int
+
+  static: echo typeof(t)
+  static: echo supportsCopyMem(string)
+  static: echo typeof t.storage.raw_buffer
+  static: echo typeof t.storage
+  initTensorMetadata(t, size, shape)
+  allocCpuStorage(t.storage, size)
+  var i = 0
+  for val in flatIter(oa):
+    t.storage.raw_buffer[i] = val
+    assert i < size
+    i += 1
+  assert i == size
 
 proc toTensorReshape(oa: string, shape: varargs[int]): auto {.noInit,noSideEffect.}=
   ## Fuse toTensor and reshape in one operation.
diff --git a/src/tensor/private/p_accessors.nim b/src/tensor/private/p_accessors.nim
index 791130aab..d0a1d3509 100644
--- a/src/tensor/private/p_accessors.nim
+++ b/src/tensor/private/p_accessors.nim
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import  ../backend/[global_config, memory_optimization_hints],
-        ../backend/metadataArray,
         ../data_structure,
         ./p_checks
 
diff --git a/src/tensor/private/p_accessors_macros_read.nim b/src/tensor/private/p_accessors_macros_read.nim
index 00a900b34..1a0c2eeb1 100644
--- a/src/tensor/private/p_accessors_macros_read.nim
+++ b/src/tensor/private/p_accessors_macros_read.nim
@@ -17,7 +17,6 @@
 
 import  ../../private/ast_utils,
         ../data_structure, ../init_cpu, ../accessors_macros_syntax,
-        ../backend/metadataArray,
         ./p_checks, ./p_accessors, ./p_accessors_macros_desugar,
         sequtils, macros
 
diff --git a/src/tensor/private/p_accessors_macros_write.nim b/src/tensor/private/p_accessors_macros_write.nim
index 6ad63b4b8..6e7ce3549 100644
--- a/src/tensor/private/p_accessors_macros_write.nim
+++ b/src/tensor/private/p_accessors_macros_write.nim
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../../private/[nested_containers, ast_utils],
+import  ../../laser/private/nested_containers,
+        ../../private/ast_utils,
         ../data_structure, ../accessors_macros_syntax,
-        ../backend/metadataArray,
         ./p_accessors_macros_desugar,
         ./p_accessors_macros_read,
         ./p_checks,
diff --git a/src/tensor/private/p_checks.nim b/src/tensor/private/p_checks.nim
index b3e4abb26..0568fd7cb 100644
--- a/src/tensor/private/p_checks.nim
+++ b/src/tensor/private/p_checks.nim
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../../private/[functional, nested_containers],
-        ../backend/metadataArray,
+import  ../../laser/private/nested_containers,
         ../data_structure
 
 include ./p_checks_cuda, ./p_checks_opencl
diff --git a/src/tensor/private/p_init_cpu.nim b/src/tensor/private/p_init_cpu.nim
deleted file mode 100644
index 9f72937b4..000000000
--- a/src/tensor/private/p_init_cpu.nim
+++ /dev/null
@@ -1,42 +0,0 @@
-# Copyright 2017 the Arraymancer contributors
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import  ../../private/nested_containers,
-        ../backend/metadataArray,
-        ../data_structure,
-        ./p_checks,
-        nimblas,
-        sequtils
-
-proc tensorCpu*[T](out_shape: varargs[int], result: var Tensor[T], layout: OrderType = rowMajor) {.inline, noSideEffect.} =
-  result.shape.copyFrom(out_shape)
-  shape_to_strides(result.shape, layout, result.strides)
-  result.offset = 0
-
-proc tensorCpu*[T](out_shape: MetadataArray, result: var Tensor[T], layout: OrderType = rowMajor) {.inline, noSideEffect.} =
-  result.shape.copyFrom(out_shape)
-  shape_to_strides(result.shape, layout, result.strides)
-  result.offset = 0
-
-template toTensorCpu*(s: typed): untyped =
-  let shape = s.shape
-  let data = toSeq(flatIter(s))
-
-  when compileOption("boundChecks"):
-    check_nested_elements(shape, data.len)
-
-  var t: Tensor[type(data[0])]
-  tensorCpu(shape, t)
-  t.data = data
-  t
diff --git a/src/tensor/private/p_shapeshifting.nim b/src/tensor/private/p_shapeshifting.nim
index a9dc6ccdd..98274d312 100644
--- a/src/tensor/private/p_shapeshifting.nim
+++ b/src/tensor/private/p_shapeshifting.nim
@@ -12,11 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../../private/sequninit,
-        ../backend/metadataArray,
+import  ../../laser/tensor/[allocator, initialization],
+        ../../private/sequninit,
         ../data_structure, ../higher_order_applymap,
         ../init_cpu,
-        ./p_init_cpu,
         ./p_checks,
         nimblas
 
@@ -24,8 +23,11 @@ proc contiguousImpl*[T](t: Tensor[T], layout: OrderType, result: var Tensor[T])
   if layout == rowMajor:
     result = t.map_inline(x)
   else: # colMajor
-    tensorCpu(t.shape, result, layout)
-    result.data = newSeqUninit[T](result.size)
+    var size: int
+    initTensorMetadata(result, size, t.shape)
+    # TODO: init with colMajor layout
+    result.strides.reversed
+    allocCpuStorage(result.storage, size)
     apply2_inline(result, t):
       y
 
@@ -77,7 +79,7 @@ proc broadcastImpl*(t: var AnyTensor, shape: varargs[int]|MetadataArray) {.noSid
 proc broadcast2Impl*[T](a, b: AnyTensor[T], result: var tuple[a, b: AnyTensor[T]]) {.noSideEffect.}=
   let rank = max(a.rank, b.rank)
 
-  var shapeA, stridesA, shapeB, stridesB = initMetadataArray(rank) # initialized with 0
+  var shapeA, stridesA, shapeB, stridesB = Metadata(len: rank) # initialized with 0
 
   for i in 0..<rank:
     let shape_A_iter = if i < rank: a.shape[i] else: 1
diff --git a/src/tensor/shapeshifting.nim b/src/tensor/shapeshifting.nim
index 5b826b25a..a5daa6520 100644
--- a/src/tensor/shapeshifting.nim
+++ b/src/tensor/shapeshifting.nim
@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ./backend/metadataArray,
-        ./private/p_shapeshifting,
+import  ./private/p_shapeshifting,
         ./private/p_checks,
         ./private/p_accessors_macros_write,
         ./accessors,
diff --git a/src/tensor/tensor.nim b/src/tensor/tensor.nim
index d8ce50701..f6a93b205 100644
--- a/src/tensor/tensor.nim
+++ b/src/tensor/tensor.nim
@@ -16,7 +16,7 @@ import nimblas
 # Export OrderType (rowMajor, colMajor) from nimblas
 export OrderType
 
-import  ./backend/metadataArray,
+import  ../laser/dynamic_stack_arrays,
         ./data_structure,
         ./init_cpu,
         ./init_copy_cpu,