Stash changes: showstopper upstream bug nim-lang/Nim#13095

mratsim · Jan 10, 2020 · d3f078a · d3f078a
1 parent 3060546
commit d3f078a
Show file tree

Hide file tree

Showing 16 changed files with 199 additions and 273 deletions.
diff --git a/src/laser/private/ast_utils.nim b/src/laser/private/ast_utils.nim
@@ -0,0 +1,33 @@
+# Laser
+# Copyright (c) 2018 Mamy André-Ratsimbazafy
+# Distributed under the Apache v2 License (license terms are at http://www.apache.org/licenses/LICENSE-2.0).
+# This file may not be copied, modified, or distributed except according to those terms.
+
+import macros
+
+proc pop*(tree: var NimNode): NimNode =
+  ## varargs[untyped] consumes all arguments so the actual value should be popped
+  ## https://github.com/nim-lang/Nim/issues/5855
+  result = tree[tree.len-1]
+  tree.del(tree.len-1)
+
+proc replaceNodes*(ast: NimNode, replacements: NimNode, to_replace: NimNode): NimNode =
+  # Args:
+  #   - The full syntax tree
+  #   - an array of replacement value
+  #   - an array of identifiers to replace
+  proc inspect(node: NimNode): NimNode =
+    case node.kind:
+    of {nnkIdent, nnkSym}:
+      for i, c in to_replace:
+        if node.eqIdent($c):
+          return replacements[i]
+      return node
+    of nnkEmpty: return node
+    of nnkLiterals: return node
+    else:
+      var rTree = node.kind.newTree()
+      for child in node:
+        rTree.add inspect(child)
+      return rTree
+  result = inspect(ast)
diff --git a/src/laser/tensor/allocator.nim b/src/laser/tensor/allocator.nim
@@ -11,7 +11,7 @@ import
 proc finalizer[T](storage: CpuStorage[T]) =
   static: assert T.supportsCopyMem, "Tensors of seq, strings, ref types and types with non-trivial destructors cannot be finalized by this proc"
 
-  if storage.memowner and not storage.memalloc.isNil:
+  if storage.isMemOwner and not storage.memalloc.isNil:
     storage.memalloc.deallocShared()
 
 proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) =
@@ -21,8 +21,9 @@ proc allocCpuStorage*[T](storage: var CpuStorage[T], size: int) =
   ## are always zero-initialized. This prevents potential GC issues.
   when T.supportsCopyMem:
     new(storage, finalizer[T])
-    storage.memalloc = allocShared0(sizeof(T) * size + LASER_MEM_ALIGN - 1)
-    storage.memowner = true
+    {.noSideEffect.}:
+      storage.memalloc = allocShared(sizeof(T) * size + LASER_MEM_ALIGN - 1)
+    storage.isMemOwner = true
     storage.raw_buffer = align_raw_data(T, storage.memalloc)
   else: # Always 0-initialize Tensors of seq, strings, ref types and types with non-trivial destructors
     new(storage)

diff --git a/src/laser/tensor/datatypes.nim b/src/laser/tensor/datatypes.nim
@@ -25,7 +25,7 @@ type
     when supportsCopyMem(T):
       raw_buffer*: ptr UncheckedArray[T] # 8 bytes
       memalloc*: pointer                 # 8 bytes
-      memowner*: bool                    # 1 byte
+      isMemOwner*: bool                    # 1 byte
     else: # Tensors of strings, other ref types or non-trivial destructors
       raw_buffer*: seq[T]                # 8 bytes (16 for seq v2 backed by destructors?)
 

diff --git a/src/private/nested_containers.nim b/src/private/nested_containers.nim
diff --git a/src/tensor/accessors_macros_syntax.nim b/src/tensor/accessors_macros_syntax.nim
@@ -12,9 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../private/[nested_containers, functional],
-        ./backend/metadataArray,
-        ./private/p_checks,
+import  ./private/p_checks,
         ./data_structure
 
 # ## This file adds slicing syntactic sugar.
@@ -214,4 +212,3 @@ proc `^`*(s: Slice): SteppedSlice {.noSideEffect, inline.} =
   ## Internal: Prefix to a to indicate starting the slice at "a" away from the end
   ## Note: This does not automatically inverse stepping, what if we want ^5..^1
   return SteppedSlice(a: s.a, b: s.b, step: 1, a_from_end: true)
-
diff --git a/src/tensor/data_structure.nim b/src/tensor/data_structure.nim
@@ -12,15 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import  ../laser/dynamic_stack_arrays,
-        ../laser/tensor/datatypes,
-        nimblas, complex
+import
+  # Internal
+  ../laser/dynamic_stack_arrays,
+  ../laser/tensor/datatypes,
+  ../private/sequninit,
+  # Third-party
+  nimblas,
+  # Standard library
+  std/[complex, typetraits]
 
 export nimblas.OrderType, complex
+export datatypes, dynamic_stack_arrays
 
 type
   # On CPU, the tensor datastructures and basic accessors
   # are defined in laser/tensor/datatypes
+  MetadataArray*{.deprecated: "Use Metadata instead".} = Metadata
 
   CudaStorage*[T: SomeFloat] = object
     ## Opaque seq-like structure for storage on the Cuda backend.
@@ -76,43 +84,55 @@ type
 # Field accessors
 # ###############
 
-proc data*[T](t: Tensor[T]): seq[T] {.inline, noSideEffect, noInit.} =
+proc data*[T](t: Tensor[T]): seq[T] {.inline, noInit, deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} =
   # Get tensor raw data
   # This is intended for library writer
-  shallowCopy(result, t.storage.Fdata)
-
-proc data*[T](t: var Tensor[T]): var seq[T] {.inline, noSideEffect, noInit.} =
+  when supportsCopyMem:
+    result = newSeqUninit(t.size)
+    for i in 0 ..< t.size:
+      result[i] = t.storage.raw_buffer[i]
+  else:
+    shallowCopy(result, t.storage.raw_buffer)
+
+proc data*[T](t: var Tensor[T]): var seq[T] {.deprecated: "This used to be a way to extract raw data without copy. Use the raw pointer instead.".} =
   # Get mutable tensor raw data
   # This is intended for library writer
-  shallowCopy(result, t.storage.Fdata)
-
-proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.inline, noSideEffect.}=
+  when supportsCopyMem:
+    result = newSeqUninit(t.size)
+    for i in 0 ..< t.size:
+      result[i] = t.storage.raw_buffer[i]
+  else:
+    shallowCopy(result, t.storage.raw_buffer)
+
+proc `data=`*[T](t: var Tensor[T], s: seq[T]) {.deprecated: "Use copyFromRaw instead".} =
   # Set tensor raw data
   # This is intended for library writer
-  t.storage.Fdata = s
+  assert s.len > 0
+  when T.supportsCopyMem:
+    t.copyFromRaw(s[0].addr, s.len)
+  else:
+    t.storage.raw_buffer = s
 
 # ################
 # Tensor Metadata
 # ################
 
-proc rank*(t: AnyTensor): int {.noSideEffect, inline.}=
-  ## Input:
-  ##     - A tensor
-  ## Returns:
-  ##     - Its rank
-  ##
-  ##   - 0 for scalar (unfortunately cannot be stored)
-  ##   - 1 for vector
-  ##   - 2 for matrices
-  ##   - N for N-dimension array
-  t.shape.len
+# rank, size, is_C_contiguous defined in laser
 
-proc size*(t: AnyTensor): int {.noSideEffect, inline.}=
-  ## Input:
-  ##     - A tensor
-  ## Returns:
-  ##     - The total number of elements it contains
-  t.shape.product
+proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
+  ## Check if the tensor follows Fortran convention / is column major
+  var z = 1
+  for i in 0..<t.shape.len:
+    # 1. We should ignore strides on dimensions of size 1
+    # 2. Strides always must have the size equal to the product of the next dimensons
+    if t.shape[i] != 1 and t.strides[i] != z:
+        return false
+    z *= t.shape[i]
+  return true
+
+proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
+  ## Check if the tensor is contiguous
+  return t.is_C_contiguous or t.is_F_contiguous
 
 proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, result: var MetadataArray) {.noSideEffect.} =
   ## Input:
@@ -121,8 +141,8 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul
   ## Returns:
   ##     - The strides in C or Fortran order corresponding to this shape and layout
   ##
-  ## Arraymancer defaults to rowMajor. Temporarily, CudaTensors are colMajor by default.
-  # See Design document for further considerations.
+  ## Arraymancer defaults to rowMajor. Temporarily, CudaTensors are colMajor by default.
+  # See Design document for further considerations.
   var accum = 1
   result.len = shape.len
 
@@ -137,55 +157,31 @@ proc shape_to_strides*(shape: MetadataArray, layout: OrderType = rowMajor, resul
     accum *= shape[i]
   return
 
-proc is_C_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor follows C convention / is row major
-  var z = 1
-  for i in countdown(t.shape.high,0):
-    # 1. We should ignore strides on dimensions of size 1
-    # 2. Strides always must have the size equal to the product of the next dimensons
-    if t.shape[i] != 1 and t.strides[i] != z:
-        return false
-    z *= t.shape[i]
-  return true
-
-proc is_F_contiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor follows Fortran convention / is column major
-  var z = 1
-  for i in 0..<t.shape.len:
-    # 1. We should ignore strides on dimensions of size 1
-    # 2. Strides always must have the size equal to the product of the next dimensons
-    if t.shape[i] != 1 and t.strides[i] != z:
-        return false
-    z *= t.shape[i]
-  return true
-
-proc isContiguous*(t: AnyTensor): bool {.noSideEffect, inline.}=
-  ## Check if the tensor is contiguous
-  return t.is_C_contiguous or t.is_F_contiguous
-
 # ##################
 # Raw pointer access
 # ##################
 
+# TODO: proper getters and setters, that also update Nim refcount
+#       for interoperability of Arraymancer buffers with other framework
 
 proc get_data_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the real start of its data (no offset)
-  unsafeAddr(t.storage.Fdata[0])
+  cast[ptr T](t.storage.raw_buffer)
 
 proc get_offset_ptr*[T](t: AnyTensor[T]): ptr T {.noSideEffect, inline.}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of its data
-  unsafeAddr(t.storage.Fdata[t.offset])
+  t.storage.raw_buffer[t.offset].unsafeAddr
 
-proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline.}=
+proc dataArray*[T](t: Tensor[T]): ptr UncheckedArray[T] {.noSideEffect, inline, deprecated: "Use unsafe_raw_data instead".}=
   ## Input:
   ##     - A tensor
   ## Returns:
   ##     - A pointer to the offset start of the data.
   ##       Return value supports array indexing.
-  cast[ptr UncheckedArray[T]](t.storage.Fdata[t.offset].unsafeAddr)
+  (ptr UncheckedArray[T])(t.unsafe_raw_data)