Merge pull request #810 from sean-morris/master

Updates to Otter Grade CSV
ucbds-infra · Jul 11, 2024 · 5b09787 · 5b09787
2 parents 65c5ef8 + 48f4a9e
commit 5b09787
Show file tree

Hide file tree

Showing 5 changed files with 100 additions and 21 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 **v5.6.0(unreleased):**
 
+* Updated Otter Grade CSV to include the number of points per question in the first row
+* Updated Otter Grade CSV to include total points column
+* Updated Otter Grade CSV to round percentages to four decimal places
 * Updated Otter Grade CSV output switched from labeling submissions by file path to notebook name and is now sorted by notebook name per [#738](https://github.com/ucbds-infra/otter-grader/issues/738)
 * Added backwards compatibility to Otter Grade for autograder configuration zip files generated in previous major versions of Otter-Grader
 

diff --git a/otter/grade/__init__.py b/otter/grade/__init__.py
@@ -7,7 +7,7 @@
 from typing import List, Optional, Tuple, Union
 
 from .containers import launch_containers
-from .utils import merge_csv, prune_images
+from .utils import merge_csv, prune_images, SCORES_DICT_FILE_KEY, SCORES_DICT_PERCENT_CORRECT_KEY,  SCORES_DICT_TOTAL_POINTS_KEY
 
 from ..run.run_autograder.autograder_config import AutograderConfig
 from ..utils import assert_path_exists, loggers
@@ -129,11 +129,12 @@ def main(
     # Merge dataframes
     output_df = merge_csv(grade_dfs)
     cols = output_df.columns.tolist()
-    output_df = output_df[cols[-1:] + cols[:-1]]
+    question_cols = sorted(c for c in cols if c not in {SCORES_DICT_FILE_KEY, SCORES_DICT_TOTAL_POINTS_KEY, SCORES_DICT_PERCENT_CORRECT_KEY})
+    output_df = output_df[[SCORES_DICT_FILE_KEY, *question_cols, SCORES_DICT_TOTAL_POINTS_KEY, SCORES_DICT_PERCENT_CORRECT_KEY]]  
 
     # write to CSV file
     output_df.to_csv(os.path.join(output_dir, "final_grades.csv"), index=False)
 
     # return percentage if a single file was graded
     if len(paths) == 1 and os.path.isfile(paths[0]):
-        return output_df["percent_correct"][0]
+        return output_df[SCORES_DICT_PERCENT_CORRECT_KEY][1]
diff --git a/otter/grade/containers.py b/otter/grade/containers.py
@@ -2,7 +2,6 @@
 
 import json
 import os
-import pandas as pd
 import pathlib
 import pkg_resources
 import shutil
@@ -14,7 +13,7 @@
 from textwrap import indent
 from typing import List, Optional
 
-from .utils import OTTER_DOCKER_IMAGE_NAME
+from .utils import OTTER_DOCKER_IMAGE_NAME, merge_scores_to_df
 
 from ..run.run_autograder.autograder_config import AutograderConfig
 from ..utils import loggers, OTTER_CONFIG_FILENAME
@@ -85,7 +84,7 @@ def launch_containers(
     Grade submissions in parallel Docker containers.
 
     This function runs ``num_containers`` Docker containers in parallel to grade the student
-    submissions in ``submissions_dir`` using the autograder configuration file at ``ag_zip_path``. 
+    submissions in ``submissions_dir`` using the autograder configuration file at ``ag_zip_path``.
     If indicated, it copies the PDFs generated of the submissions out of their containers.
 
     Args:
@@ -117,9 +116,8 @@ def launch_containers(
 
     # stop execution while containers are running
     finished_futures = wait(futures)
-
-    # return list of dataframes
-    return [df.result() for df in finished_futures[0]]
+    scores = [f.result() for f in finished_futures[0]]
+    return merge_scores_to_df(scores)
 
 
 def grade_submission(
@@ -217,12 +215,7 @@ def kill_container():
         with open(results_path, "rb") as f:
             scores = dill.load(f)
 
-        scores_dict = scores.to_dict()
-        scores_dict["percent_correct"] = scores.total / scores.possible
-
-        scores_dict = {t: [scores_dict[t]["score"]] if type(scores_dict[t]) == dict else scores_dict[t] for t in scores_dict}
-        scores_dict["file"] = nb_name
-        df = pd.DataFrame(scores_dict)
+        scores.file = nb_name
 
         if pdf_dir:
             os.makedirs(pdf_dir, exist_ok=True)
@@ -234,4 +227,4 @@ def kill_container():
         os.remove(results_path)
         os.remove(temp_subm_path)
 
-    return df
+    return scores
diff --git a/otter/grade/utils.py b/otter/grade/utils.py
@@ -4,11 +4,21 @@
 import pandas as pd
 import re
 
+from typing import List
 from python_on_whales import docker
 
+from ..test_files import GradingResults
 
 OTTER_DOCKER_IMAGE_NAME = "otter-grade"
 
+POINTS_POSSIBLE_LABEL = "points-per-question"
+
+SCORES_DICT_FILE_KEY = "file"
+
+SCORES_DICT_TOTAL_POINTS_KEY = "total_points_earned"
+
+SCORES_DICT_PERCENT_CORRECT_KEY = "percent_correct"
+
 
 def list_files(path):
     """
@@ -35,8 +45,12 @@ def merge_csv(dataframes):
         ``pandas.core.frame.DataFrame``: A merged dataframe resulting from 'stacking' all input dataframes
 
     """
-    final_dataframe = pd.concat(dataframes, axis=0, join='inner').sort_values(by="file")
-    return final_dataframe
+    final_dataframe = pd.concat(dataframes, axis=0, join='inner')
+    do_not_sort = final_dataframe[final_dataframe['file'] == POINTS_POSSIBLE_LABEL]
+    sort_these = final_dataframe[final_dataframe['file'] != POINTS_POSSIBLE_LABEL]
+    df_sorted = sort_these.sort_values(by="file")
+    df_final = pd.concat([do_not_sort, df_sorted], ignore_index=True)
+    return df_final
 
 
 def prune_images(force=False):
@@ -63,3 +77,32 @@ def prune_images(force=False):
 
     else:
         print("Prune cancelled.")
+
+
+def merge_scores_to_df(scores: List[GradingResults]) -> pd.DataFrame:  
+    """  
+    Convert a list of ``GradingResults`` objects to a scores dataframe, including a row  
+    with the total point values for each question.  
+
+    Args:  
+        scores (``list[otter.test_files.GradingResults]``): the score objects to merge  
+
+    Returns:  
+        ``pd.DataFrame``: the scores dataframe  
+    """
+    full_df = []
+    pts_poss_dict = {t: [scores[0].to_dict()[t]["possible"]] for t in scores[0].to_dict()}
+    pts_poss_dict[SCORES_DICT_FILE_KEY] = POINTS_POSSIBLE_LABEL
+    pts_poss_dict[SCORES_DICT_PERCENT_CORRECT_KEY] = "NA"
+    pts_poss_dict[SCORES_DICT_TOTAL_POINTS_KEY] = scores[0].possible
+    pts_poss_df = pd.DataFrame(pts_poss_dict)
+    full_df.append(pts_poss_df)
+    for grading_result in scores:
+        scores_dict = grading_result.to_dict()
+        scores_dict = {t: [scores_dict[t]["score"]] for t in scores_dict}
+        scores_dict[SCORES_DICT_PERCENT_CORRECT_KEY] = round(grading_result.total / grading_result.possible, 4)
+        scores_dict[SCORES_DICT_TOTAL_POINTS_KEY] = grading_result.total
+        scores_dict[SCORES_DICT_FILE_KEY] = grading_result.file
+        df_scores = pd.DataFrame(scores_dict)
+        full_df.append(df_scores)
+    return full_df
diff --git a/test/test_grade/test_integration.py b/test/test_grade/test_integration.py
@@ -14,6 +14,7 @@
 
 from otter.generate import main as generate
 from otter.grade import main as grade
+from otter.grade.utils import POINTS_POSSIBLE_LABEL
 from otter.run.run_autograder.autograder_config import AutograderConfig
 from otter.utils import loggers
 
@@ -168,6 +169,9 @@ def test_notebooks_with_pdfs(expected_points):
     )
     assert sorted(dir1_contents) == sorted(dir2_contents), f"'{FILE_MANAGER.get_path('notebooks/')}' and 'test/submission_pdfs' have different contents"
 
+    # check that the row with point totals for each question exists
+    assert any(POINTS_POSSIBLE_LABEL in row for row in df_test.itertuples(index=False))
+
 
 @mock.patch("otter.grade.launch_containers")
 def test_single_notebook_grade(mocked_launch_grade):
@@ -182,7 +186,19 @@ def test_single_notebook_grade(mocked_launch_grade):
         "q6": 5.0,
         "q2b": 2.0,
         "q7": 1.0,
-        "percent_correct": 1.0,
+        "percent_correct": float('nan'),
+        "total_points_earned": 15.0,
+        "file": POINTS_POSSIBLE_LABEL,
+    },{
+        "q1": 2.0,
+        "q2": 2.0,
+        "q3": 2.0,
+        "q4": 1.0,
+        "q6": 4.0,
+        "q2b": 2.0,
+        "q7": 1.0,
+        "percent_correct": 0.933333,
+        "total_points_earned": 14.0,
         "file": "passesAll.ipynb",
     }])
 
@@ -211,7 +227,7 @@ def test_single_notebook_grade(mocked_launch_grade):
     )
 
     mocked_launch_grade.assert_called_with(notebook_path, [notebook_path], **kw_expected)
-    assert output == 1.0
+    assert output == 0.933333
 
 
 @mock.patch("otter.grade.launch_containers")
@@ -220,6 +236,17 @@ def test_config_overrides(mocked_launch_grade):
     Checks that the CLI flags are converted to config overrides correctly.
     """
     mocked_launch_grade.return_value = [pd.DataFrame([{
+        "q1": 2.0,
+        "q2": 2.0,
+        "q3": 2.0,
+        "q4": 1.0,
+        "q6": 5.0,
+        "q2b": 2.0,
+        "q7": 1.0,
+        "percent_correct": float('nan'),
+        "total_points_earned": 15.0,
+        "file": POINTS_POSSIBLE_LABEL,
+    },{
         "q1": 2.0,
         "q2": 2.0,
         "q3": 2.0,
@@ -228,6 +255,7 @@ def test_config_overrides(mocked_launch_grade):
         "q2b": 2.0,
         "q7": 1.0,
         "percent_correct": 1.0,
+        "total_points_earned": 15.0,
         "file": "passesAll.ipynb",
     }])]
 
@@ -274,6 +302,17 @@ def test_config_overrides_integration():
 
     got = pd.read_csv("test/final_grades.csv")
     want = pd.DataFrame([{
+        "q1": 0.0,
+        "q2": 2.0,
+        "q3": 2.0,
+        "q4": 1.0,
+        "q6": 5.0,
+        "q2b": 2.0,
+        "q7": 1.0,
+        "percent_correct": float('nan'),
+        "total_points_earned": 13.0,
+        "file": POINTS_POSSIBLE_LABEL,
+    },{
         "q1": 0.0,
         "q2": 2.0,
         "q3": 2.0,
@@ -282,11 +321,11 @@ def test_config_overrides_integration():
         "q2b": 2.0,
         "q7": 1.0,
         "percent_correct": 1.0,
+        "total_points_earned": 13.0,
         "file": os.path.splitext(os.path.basename(ZIP_SUBM_PATH))[0],
     }])
 
     # Sort the columns by label so the dataframes can be compared with ==.
     got = got.reindex(sorted(got.columns), axis=1)
     want = want.reindex(sorted(want.columns), axis=1)
-
     assert got.equals(want)