Skip to content

Commit

Permalink
[df] fix spread implementation again, fix test case
Browse files Browse the repository at this point in the history
  • Loading branch information
Vindaar committed Feb 9, 2024
1 parent 266ea21 commit 10613fc
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 8 deletions.
4 changes: 2 additions & 2 deletions src/datamancer/dataframe.nim
Original file line number Diff line number Diff line change
Expand Up @@ -2311,8 +2311,8 @@ proc spread*[C: ColumnLike; T](df: DataTable[C], namesFrom, valuesFrom: string,
# bind `items` here to make it available in calling scope without `import sets`
bind items
let newCols = toSeq(items(dfGrouped.groupMap[namesFrom]))
# 4. and length of resulting DF by getting class with most counts
let dfOutlen = df.count(namesFrom)["n", int].max
# 4. number of output: unique values in remaining columns
let dfOutlen = df.select(restKeys).unique().len
# 5. create result DF from input column types
for c in restKeys:
result[c] = newColumn(df[c].kind, dfOutlen)
Expand Down
15 changes: 9 additions & 6 deletions tests/testDf.nim
Original file line number Diff line number Diff line change
Expand Up @@ -1041,7 +1041,7 @@ suite "DataTable tests":
## TODO: support NULL values instead of filling by default T(0)
block:
let data = """
Type Septem Line Fake ε_cut FractionPass
Type Septem Line Fake ε_cut FractionPass
LineReal false true Real 1 0.2204
LineFake false true Fake 1 0.8622
SeptemReal true false Real 1 0.2315
Expand All @@ -1051,14 +1051,17 @@ SeptemLineFake true true Fake 1
"""
let df = parseCsvString(data, sep = ' ')
let exp = """
Type Septem Line ε_cut Real Fake
LineReal false true 1 0.2204 0.8622
SeptemReal true false 1 0.2315 0.7255
SeptemLineReal true true 1 0.1368 0.7763
Type Septem Line ε_cut Real Fake
LineFake false true 1 0 0.8622
LineReal false true 1 0.2204 0
SeptemFake true false 1 0 0.7763
SeptemLineFake true true 1 0 0.7255
SeptemLineReal true true 1 0.1368 0
SeptemReal true false 1 0.2315 0
"""
let dfExp = parseCsvString(exp, sep = ' ')
let dfRes = df.spread("Fake", "FractionPass")
check dfRes.len == 3
check dfRes.len == 6
check dfRes.getKeys().len == 6
check dfRes.getKeys() == dfExp.getKeys()
check equal(dfRes, dfExp)
Expand Down

0 comments on commit 10613fc

Please sign in to comment.