From c22d899160f0f54774f9c4a052bb55051e2e4341 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 10:30:37 +0100
Subject: [PATCH 1/9] update arraymancer dep, remove our `len` for `Tensor`

---
 datamancer.nimble            | 2 +-
 src/datamancer/dataframe.nim | 6 ------
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/datamancer.nimble b/datamancer.nimble
index 6867d84..d8d7176 100644
--- a/datamancer.nimble
+++ b/datamancer.nimble
@@ -11,7 +11,7 @@ srcDir        = "src"
 
 requires "nim >= 1.2.0"
 requires "https://github.com/Vindaar/seqmath >= 0.1.11"
-requires "arraymancer >= 0.7.1"
+requires "arraymancer >= 0.7.28"
 
 task test, "Run standard tests":
   exec "nim c -r tests/testDf.nim"
diff --git a/src/datamancer/dataframe.nim b/src/datamancer/dataframe.nim
index 349c5b5..8214d30 100644
--- a/src/datamancer/dataframe.nim
+++ b/src/datamancer/dataframe.nim
@@ -121,12 +121,6 @@ proc shallowCopy*[C: ColumnLike](df: DataTable[C]): DataTable[C] =
 
 # ---------- General convenience helpers ----------
 
-func len*[T](t: Tensor[T]): int =
-  ## Helper proc for 1D `Tensor[T]` to return the length of the vector, which
-  ## corresponds to a length of a DF column.
-  assert t.rank == 1
-  result = t.size
-
 proc contains*[C: ColumnLike](df: DataTable[C], key: string): bool =
   ## Checks if the `key` names column in the `DataFrame`.
   result = df.data.hasKey(key)

From a3ef72f39c787a6e3357f1f517ee1d4c1d5a2fed Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 11:37:03 +0100
Subject: [PATCH 2/9] [formula] check if proc to match types forbidden by
 {.error.}, ignore

---
 src/datamancer/formula.nim | 23 ++++++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/src/datamancer/formula.nim b/src/datamancer/formula.nim
index 282b137..22e9194 100644
--- a/src/datamancer/formula.nim
+++ b/src/datamancer/formula.nim
@@ -629,10 +629,29 @@ proc maybeAddSpecialTypes(possibleTypes: var PossibleTypes, n: NimNode) =
   if strVal in ["<", ">", ">=", "<=", "==", "!="]:
     for dtype in Dtypes:
       possibleTypes.add ProcType(inputTypes: @[ident(dtype),
-                                                ident(dtype)],
+                                               ident(dtype)],
                                   isGeneric: false,
                                   resType: some(ident("bool")))
 
+proc isForbiddenByErrorPragma*(n: NimNode): bool =
+  ## Checks whether the given procedure is actually forbidden by usage of the `{.error: "".}` pragma.
+  ## This happens e.g. in arraymancer for the `+` and similar operations between Tensor and Scalar
+  ## nowadays.
+  ##
+  ## An example of such a body:
+  ##
+  ##    StmtList
+  ##      CommentStmt "Mathematical addition of tensors and scalars is undefined. Must use a broadcasted addition instead"
+  ##      Pragma
+  ##        ExprColonExpr
+  ##          Ident "error"
+  ##          StrLit "To add a tensor to a scalar you must use the `+.` operator (instead of a plain `+` operator)"
+  ##
+  let body = n.body
+  if body.kind == nnkStmtList: # magic procs can have an empty body!
+    result = body[^1].kind == nnkPragma and body[^1][0].kind == nnkExprColonExpr and
+      body[^1][0][0].kind in {nnkIdent, nnkSym} and body[^1][0][0].strVal == "error"
+
 proc findType(n: NimNode, numArgs: int): PossibleTypes =
   ## This procedure tries to find type information about a given NimNode.
   var possibleTypes = PossibleTypes()
@@ -684,6 +703,8 @@ proc findType(n: NimNode, numArgs: int): PossibleTypes =
       let tImpl = ch.getImpl
       case tImpl.kind
       of nnkProcDef, nnkFuncDef:
+        if tImpl.isForbiddenByErrorPragma(): continue # Forbidden by `{.error: "".}`, skip this
+
         let pt = determineTypeFromProc(tImpl, numArgs)
         if pt.isSome:
           possibleTypes.add pt.get

From 0ff1347d44e9b786d21efba8fc935e5cd6084aa0 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 11:37:29 +0100
Subject: [PATCH 3/9] [tests] update test cases for {.error.} pragma
 regressions

Without the previous commit of checking for the error pragma the
original tests did now not pass. We were generating code `Tensor +
Scalar` in the previous version for this. But that was never actually
intended. Technically it was simply allowed and 'correct' given our
logic and available tools in arraymancer.
---
 tests/testDf.nim | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tests/testDf.nim b/tests/testDf.nim
index 1a0eee0..6e6a851 100644
--- a/tests/testDf.nim
+++ b/tests/testDf.nim
@@ -1987,15 +1987,32 @@ suite "Formulas":
     ## This test is really only to test that the `mutate` formula shown here is
     ## actually compiled correctly into a mapping operation, with or without
     ## user given `~`
+    let mpgDf = readCsv("data/mpg.csv")
+    block WithName:
+      let df = mpgDf
+        .group_by("class")
+        .mutate(f{float -> float: "subMeanHwy" ~ idx(`cty`) + mean(df["hwy"])})
+        .arrange("class")
+      check df.len == 234
+      check df["subMeanHwy", float][0 ..< 5] == [40.8, 39.8, 40.8, 39.8, 39.8].toTensor
     block:
-      let df = readCsv("data/mpg.csv")
+      ## Check that it also works correctly without `idx`!
+      let df = mpgDf
         .group_by("class")
         .mutate(f{float -> float: "subMeanHwy" ~ `cty` + mean(df["hwy"])})
         .arrange("class")
       check df.len == 234
       check df["subMeanHwy", float][0 ..< 5] == [40.8, 39.8, 40.8, 39.8, 39.8].toTensor
     block:
-      let df = readCsv("data/mpg.csv")
+      ## And same with direct `col`
+      let df = mpgDf
+        .group_by("class")
+        .mutate(f{float -> float: "subMeanHwy" ~ `cty` + mean(col("hwy"))})
+        .arrange("class")
+      check df.len == 234
+      check df["subMeanHwy", float][0 ..< 5] == [40.8, 39.8, 40.8, 39.8, 39.8].toTensor
+    block NoName: ## without an explicit name
+      let df = mpgDf
         .group_by("class")
         .mutate(f{float -> float: `cty` + mean(df["hwy"])})
         .arrange("class")

From 03a5b2a6cfa9bac709e49459c73c8d80ae0b89be Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 11:38:39 +0100
Subject: [PATCH 4/9] update changelog

---
 changelog.org | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/changelog.org b/changelog.org
index fbaeead..f304df5 100644
--- a/changelog.org
+++ b/changelog.org
@@ -1,3 +1,9 @@
+* v0.4.1
+- remove our ~len~ for ~Tensor~ due to addition in arraymaner now
+  (update arraymancer dep to ~>= 0.7.28~)
+- improvement to formula logic to detect procedures with ~{.error:
+  "".}~ pragma usage and ignore them for type matching (necessary due
+  to forbidding e.g. ~+~ in arraymancer for ~Tensor + Scalar~ now)  
 * v0.4.0
 - add ~shuffle~ to shuffle a DF. Either using stdlib global RNG or
   given RNG

From a6ba1a7651f386252528c98553e24b2f365bb7b4 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 11:40:26 +0100
Subject: [PATCH 5/9] update github CI to run 1.6, 2.0 and devel

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index c712c1e..d249aaf 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,7 +20,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        branch: [version-1-4, devel]
+        branch: [version-1-6, version-2-0, devel]
         target: [linux, macos, windows]
         include:
           - target: linux

From 8bb28a93822aef52a367cb35a45b51a0edf90604 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 13:04:42 +0100
Subject: [PATCH 6/9] [tests] disable `max(c"x")` using tests for 1.6, 2.0

On versions other than devel, `max` here is being bound to the new
`max` that takes `varargs[Tensor]` and produces `Tensor`, breaking the formula!
---
 tests/testDf.nim | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/tests/testDf.nim b/tests/testDf.nim
index 6e6a851..f97bd54 100644
--- a/tests/testDf.nim
+++ b/tests/testDf.nim
@@ -6,6 +6,11 @@ from os import removeFile
 when not declared(AssertionDefect):
   type AssertionDefect = AssertionError
 
+template onlyDevel(body: untyped): untyped =
+  ## Used to disable some tests on older nim versions
+  when (NimMajor, NimMinor, NimPatch) >= (2, 1, 0):
+    body
+
 suite "Column":
   test "Constant columns":
     let c = constantColumn(12, 100)
@@ -901,18 +906,35 @@ suite "DataTable tests":
   test "Filter - comparisons using function":
     let x = toSeq(0 .. 100)
     let df = toDf(x)
-    let dfFilter = df.filter(f{float: c"x" >= max(c"x") * 0.5})
-    check dfFilter["x"].toTensor(int) == toTensor toSeq(50 .. 100)
-
+    block:
+      let dfFilter = df.filter(f{float: c"x" >= max(col("x")) * 0.5})
+      check dfFilter["x"].toTensor(int) == toTensor toSeq(50 .. 100)
+    onlyDevel:
+      block:
+        let dfFilter = df.filter(f{float: c"x" >= max(col("x")) * 0.5})
+        check dfFilter["x"].toTensor(int) == toTensor toSeq(50 .. 100)
   test "Filter - data types":
     let x = toSeq(0 .. 100)
     let df = toDf(x)
-    let dfFiltered = df.filter(f{float: c"x" >= max(c"x") * 0.5})
-    check dfFiltered["x"].kind == colInt
-    let dfReduced1 = df.summarize(f{int: max(c"x")})
-    check dfReduced1["(max x)"].kind == colInt
-    let dfReduced2 = df.summarize(f{float: max(c"x")})
-    check dfReduced2["(max x)"].kind == colFloat
+    block:
+      let dfFiltered = df.filter(f{float: c"x" >= max(col("x")) * 0.5})
+      check dfFiltered["x"].kind == colInt
+      let dfReduced1 = df.summarize(f{int: max(col("x"))})
+      check dfReduced1["(max (col x))"].kind == colInt
+      let dfReduced2 = df.summarize(f{float: max(col("x"))})
+      check dfReduced2["(max (col x))"].kind == colFloat
+    ## Test that `c"x"` works on devel. On older Nim (1.6, 2.0) the `max` is bound directly
+    ## to `max(varargs[Tensor[T]]): Tensor[T]`, which breaks the formula!
+    ## This `max` is a recent addition in arraymancer.
+    ## On devel this seems fixed.
+    onlyDevel:
+      block:
+        let dfFiltered = df.filter(f{float: c"x" >= max(c"x") * 0.5})
+        check dfFiltered["x"].kind == colInt
+        let dfReduced1 = df.summarize(f{int: max(c"x")})
+        check dfReduced1["(max x)"].kind == colInt
+        let dfReduced2 = df.summarize(f{float: max(c"x")})
+        check dfReduced2["(max x)"].kind == colFloat
 
   test "Transmute - float arithmetic":
     let x = toSeq(0 ..< 100)

From 06eaca7b351087e4c75003b1084eb65f9c9168a4 Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 13:06:53 +0100
Subject: [PATCH 7/9] update changelog

---
 changelog.org | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/changelog.org b/changelog.org
index f304df5..32ea516 100644
--- a/changelog.org
+++ b/changelog.org
@@ -3,7 +3,14 @@
   (update arraymancer dep to ~>= 0.7.28~)
 - improvement to formula logic to detect procedures with ~{.error:
   "".}~ pragma usage and ignore them for type matching (necessary due
-  to forbidding e.g. ~+~ in arraymancer for ~Tensor + Scalar~ now)  
+  to forbidding e.g. ~+~ in arraymancer for ~Tensor + Scalar~ now)
+
+*Note*: If you are on Nim 1.6 or 2.0 (but not after) and you are using
+ a formula with ~max(c"foo")~ where ~foo~ is a column name, you will
+ have to replace that by ~max(col("foo"))~. Due to a new addition of a
+ ~max(varargs[Tensor]]): Tensor~ type procedure in arraymancer, for
+ some reason older Nim versions bind the symbol ~max~ fully to that
+ ~varargs~ version. That breaks previous formulas.
 * v0.4.0
 - add ~shuffle~ to shuffle a DF. Either using stdlib global RNG or
   given RNG

From 161802f7eb522b68e3592e55843f7c7b85cf23ce Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 13:18:41 +0100
Subject: [PATCH 8/9] [tests] update other tests accordingly

---
 tests/tests.nim        | 17 ++++++++++++---
 tests/testsFormula.nim | 47 ++++++++++++++++++++++++++++++++----------
 2 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/tests/tests.nim b/tests/tests.nim
index cdf3256..80914e7 100644
--- a/tests/tests.nim
+++ b/tests/tests.nim
@@ -11,6 +11,11 @@ proc almostEq(a, b: float, epsilon = 1e-8): bool =
   if not result:
     echo "Comparison failed: a = ", a, ", b = ", b
 
+template onlyDevel(body: untyped): untyped =
+  ## Used to disable some tests on older nim versions
+  when (NimMajor, NimMinor, NimPatch) >= (2, 1, 0):
+    body
+
 suite "Value":
   let
     v1 = %~ 1
@@ -210,9 +215,15 @@ suite "Formula":
     check h.val == %~ false
     check h.name == "(== (%~ tup.a) (%~ tup.b))"
 
-    let f2 = f{float: "min" << min(c"runTimes")}
-    check $f2 == "min" # LHS of formula
-    check f2.name == "(<< min (min runTimes))"
+    block:
+      let f2 = f{float: "min" << min(col("runTimes"))}
+      check $f2 == "min" # LHS of formula
+      check f2.name == "(<< min (min (col runTimes)))"
+    onlyDevel:
+      block:
+        let f2 = f{float: "min" << min(c"runTimes")}
+        check $f2 == "min" # LHS of formula
+        check f2.name == "(<< min (min runTimes))"
 
 
   test "Evaluate raw formula (no DF column dependency)":
diff --git a/tests/testsFormula.nim b/tests/testsFormula.nim
index 0626cfd..cdf27de 100644
--- a/tests/testsFormula.nim
+++ b/tests/testsFormula.nim
@@ -11,6 +11,11 @@ template fails(body: untyped): untyped =
   else:
     check true
 
+template onlyDevel(body: untyped): untyped =
+  ## Used to disable some tests on older nim versions
+  when (NimMajor, NimMinor, NimPatch) >= (2, 1, 0):
+    body
+
 when (NimMajor, NimMinor, NimPatch) < (1, 6, 0):
   proc isNan(x: float): bool =
     result = classify(x) == fcNaN
@@ -79,9 +84,9 @@ suite "Formulas":
       # - prefix, automatic type deduction
       let fn = f{ not idx("f") }
       check fn.evaluate(df).bCol == [true, false, true].toTensor
-    block:
-      let fn = f{ idx("x") >= max(col("x")) * 0.5 }
-
+    onlyDevel:
+      block:
+        let fn = f{ idx("x") >= max(col("x")) * 0.5 }
     block:
       let fn = f{ parseInt(idx("a")) > 2 }
 
@@ -117,16 +122,32 @@ suite "Formulas":
       # - type deduction based on `idx` in specific argument of a typically overloaded
       #   symbol. Can be deduced due to only single overload matching the arguments
       proc someInt(): int = 2
-      proc max(x: int, y: string, z: float, b: int): int =
-        result = 5
-      let fn = f{ max(idx("a"), "hello", 5.5, someInt()) }
-      check fn.evaluate(df).iCol == [5, 5, 5].toTensor
+
+      ## On none devel, `max` and `min` are broken
+      onlyDevel:
+        block:
+          proc max(x: int, y: string, z: float, b: int): int =
+            result = 5
+
+          let fn = f{ max(idx("a"), "hello", 5.5, someInt()) }
+          check fn.evaluate(df).iCol == [5, 5, 5].toTensor
+      block:
+        proc getAnInt(x: int, y: string, z: float, b: int): int =
+          result = 5
+
+        let fn = f{ getAnInt(idx("a"), "hello", 5.5, someInt()) }
+        check fn.evaluate(df).iCol == [5, 5, 5].toTensor
 
     block:
       # - automatically determines that `a` should be read as `int`
       # - formula is mapping
-      let fn = f{ max(idx("a"), 2) }
-      check fn.evaluate(df).iCol == [2, 2, 3].toTensor
+      onlyDevel:
+        block:
+          let fn = f{ max(idx("a"), 2) }
+          check fn.evaluate(df).iCol == [2, 2, 3].toTensor
+      block: # on 1.6, 2.0 requires explicit type hint. Should work though!
+        let fn = f{int: max(idx("a"), 2) }
+        check fn.evaluate(df).iCol == [2, 2, 3].toTensor
 
   test "Formula with an if expression accessing multiple columns":
     block:
@@ -174,8 +195,12 @@ suite "Formulas":
       check fn.evaluate(df).fCol == [5.5, 5.5, 5.5].toTensor
 
   test "`max` overload is resolved in context of infix with float":
-    block:
-      let fn = f{ `a` >= max(`a`) * 0.5 }
+    onlyDevel:
+      block:
+        let fn = f{ `a` >= max(`a`) * 0.5 }
+        check fn.evaluate(df).bCol == [false, true, true].toTensor
+    block: # For 1.6, 2.0 does not work anymore!
+      let fn = f{float: `a` >= max(col("a")) * 0.5 }
       check fn.evaluate(df).bCol == [false, true, true].toTensor
 
     block:

From 66c393e5556f1b8accffcffd8642b84e8af9dedb Mon Sep 17 00:00:00 2001
From: Vindaar <basti90@gmail.com>
Date: Mon, 19 Feb 2024 13:19:24 +0100
Subject: [PATCH 9/9] update changelog

---
 changelog.org | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/changelog.org b/changelog.org
index 32ea516..701c3f3 100644
--- a/changelog.org
+++ b/changelog.org
@@ -11,6 +11,15 @@
  ~max(varargs[Tensor]]): Tensor~ type procedure in arraymancer, for
  some reason older Nim versions bind the symbol ~max~ fully to that
  ~varargs~ version. That breaks previous formulas.
+ In formulas using ~max~ (or similarly ~min~) it may also be necessary
+ to supply type hints, even if they were not required previously for
+ related reasons. E.g. replace:
+ ~f{ max(idx("a"), 2) }~
+ by
+ ~f{int: max(idx("a"), 2) }~
+ to indicate the column ~"a"~ should be treated as integers.
+ Given that this is fixed on Nim devel, I won't attempt a hacky
+ workaround. Please just update your Nim version or your formulas. :( 
 * v0.4.0
 - add ~shuffle~ to shuffle a DF. Either using stdlib global RNG or
   given RNG