Skip to content

Commit

Permalink
Merge pull request #810 from sean-morris/master
Browse files Browse the repository at this point in the history
Updates to Otter Grade CSV
  • Loading branch information
chrispyles authored Jul 11, 2024
2 parents 65c5ef8 + 48f4a9e commit 5b09787
Show file tree
Hide file tree
Showing 5 changed files with 100 additions and 21 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

**v5.6.0(unreleased):**

* Updated Otter Grade CSV to include the number of points per question in the first row
* Updated Otter Grade CSV to include total points column
* Updated Otter Grade CSV to round percentages to four decimal places
* Updated Otter Grade CSV output switched from labeling submissions by file path to notebook name and is now sorted by notebook name per [#738](https://github.com/ucbds-infra/otter-grader/issues/738)
* Added backwards compatibility to Otter Grade for autograder configuration zip files generated in previous major versions of Otter-Grader

Expand Down
7 changes: 4 additions & 3 deletions otter/grade/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from typing import List, Optional, Tuple, Union

from .containers import launch_containers
from .utils import merge_csv, prune_images
from .utils import merge_csv, prune_images, SCORES_DICT_FILE_KEY, SCORES_DICT_PERCENT_CORRECT_KEY, SCORES_DICT_TOTAL_POINTS_KEY

from ..run.run_autograder.autograder_config import AutograderConfig
from ..utils import assert_path_exists, loggers
Expand Down Expand Up @@ -129,11 +129,12 @@ def main(
# Merge dataframes
output_df = merge_csv(grade_dfs)
cols = output_df.columns.tolist()
output_df = output_df[cols[-1:] + cols[:-1]]
question_cols = sorted(c for c in cols if c not in {SCORES_DICT_FILE_KEY, SCORES_DICT_TOTAL_POINTS_KEY, SCORES_DICT_PERCENT_CORRECT_KEY})
output_df = output_df[[SCORES_DICT_FILE_KEY, *question_cols, SCORES_DICT_TOTAL_POINTS_KEY, SCORES_DICT_PERCENT_CORRECT_KEY]]

# write to CSV file
output_df.to_csv(os.path.join(output_dir, "final_grades.csv"), index=False)

# return percentage if a single file was graded
if len(paths) == 1 and os.path.isfile(paths[0]):
return output_df["percent_correct"][0]
return output_df[SCORES_DICT_PERCENT_CORRECT_KEY][1]
19 changes: 6 additions & 13 deletions otter/grade/containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import json
import os
import pandas as pd
import pathlib
import pkg_resources
import shutil
Expand All @@ -14,7 +13,7 @@
from textwrap import indent
from typing import List, Optional

from .utils import OTTER_DOCKER_IMAGE_NAME
from .utils import OTTER_DOCKER_IMAGE_NAME, merge_scores_to_df

from ..run.run_autograder.autograder_config import AutograderConfig
from ..utils import loggers, OTTER_CONFIG_FILENAME
Expand Down Expand Up @@ -85,7 +84,7 @@ def launch_containers(
Grade submissions in parallel Docker containers.
This function runs ``num_containers`` Docker containers in parallel to grade the student
submissions in ``submissions_dir`` using the autograder configuration file at ``ag_zip_path``.
submissions in ``submissions_dir`` using the autograder configuration file at ``ag_zip_path``.
If indicated, it copies the PDFs generated of the submissions out of their containers.
Args:
Expand Down Expand Up @@ -117,9 +116,8 @@ def launch_containers(

# stop execution while containers are running
finished_futures = wait(futures)

# return list of dataframes
return [df.result() for df in finished_futures[0]]
scores = [f.result() for f in finished_futures[0]]
return merge_scores_to_df(scores)


def grade_submission(
Expand Down Expand Up @@ -217,12 +215,7 @@ def kill_container():
with open(results_path, "rb") as f:
scores = dill.load(f)

scores_dict = scores.to_dict()
scores_dict["percent_correct"] = scores.total / scores.possible

scores_dict = {t: [scores_dict[t]["score"]] if type(scores_dict[t]) == dict else scores_dict[t] for t in scores_dict}
scores_dict["file"] = nb_name
df = pd.DataFrame(scores_dict)
scores.file = nb_name

if pdf_dir:
os.makedirs(pdf_dir, exist_ok=True)
Expand All @@ -234,4 +227,4 @@ def kill_container():
os.remove(results_path)
os.remove(temp_subm_path)

return df
return scores
47 changes: 45 additions & 2 deletions otter/grade/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,21 @@
import pandas as pd
import re

from typing import List
from python_on_whales import docker

from ..test_files import GradingResults

OTTER_DOCKER_IMAGE_NAME = "otter-grade"

POINTS_POSSIBLE_LABEL = "points-per-question"

SCORES_DICT_FILE_KEY = "file"

SCORES_DICT_TOTAL_POINTS_KEY = "total_points_earned"

SCORES_DICT_PERCENT_CORRECT_KEY = "percent_correct"


def list_files(path):
"""
Expand All @@ -35,8 +45,12 @@ def merge_csv(dataframes):
``pandas.core.frame.DataFrame``: A merged dataframe resulting from 'stacking' all input dataframes
"""
final_dataframe = pd.concat(dataframes, axis=0, join='inner').sort_values(by="file")
return final_dataframe
final_dataframe = pd.concat(dataframes, axis=0, join='inner')
do_not_sort = final_dataframe[final_dataframe['file'] == POINTS_POSSIBLE_LABEL]
sort_these = final_dataframe[final_dataframe['file'] != POINTS_POSSIBLE_LABEL]
df_sorted = sort_these.sort_values(by="file")
df_final = pd.concat([do_not_sort, df_sorted], ignore_index=True)
return df_final


def prune_images(force=False):
Expand All @@ -63,3 +77,32 @@ def prune_images(force=False):

else:
print("Prune cancelled.")


def merge_scores_to_df(scores: List[GradingResults]) -> pd.DataFrame:
"""
Convert a list of ``GradingResults`` objects to a scores dataframe, including a row
with the total point values for each question.
Args:
scores (``list[otter.test_files.GradingResults]``): the score objects to merge
Returns:
``pd.DataFrame``: the scores dataframe
"""
full_df = []
pts_poss_dict = {t: [scores[0].to_dict()[t]["possible"]] for t in scores[0].to_dict()}
pts_poss_dict[SCORES_DICT_FILE_KEY] = POINTS_POSSIBLE_LABEL
pts_poss_dict[SCORES_DICT_PERCENT_CORRECT_KEY] = "NA"
pts_poss_dict[SCORES_DICT_TOTAL_POINTS_KEY] = scores[0].possible
pts_poss_df = pd.DataFrame(pts_poss_dict)
full_df.append(pts_poss_df)
for grading_result in scores:
scores_dict = grading_result.to_dict()
scores_dict = {t: [scores_dict[t]["score"]] for t in scores_dict}
scores_dict[SCORES_DICT_PERCENT_CORRECT_KEY] = round(grading_result.total / grading_result.possible, 4)
scores_dict[SCORES_DICT_TOTAL_POINTS_KEY] = grading_result.total
scores_dict[SCORES_DICT_FILE_KEY] = grading_result.file
df_scores = pd.DataFrame(scores_dict)
full_df.append(df_scores)
return full_df
45 changes: 42 additions & 3 deletions test/test_grade/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from otter.generate import main as generate
from otter.grade import main as grade
from otter.grade.utils import POINTS_POSSIBLE_LABEL
from otter.run.run_autograder.autograder_config import AutograderConfig
from otter.utils import loggers

Expand Down Expand Up @@ -168,6 +169,9 @@ def test_notebooks_with_pdfs(expected_points):
)
assert sorted(dir1_contents) == sorted(dir2_contents), f"'{FILE_MANAGER.get_path('notebooks/')}' and 'test/submission_pdfs' have different contents"

# check that the row with point totals for each question exists
assert any(POINTS_POSSIBLE_LABEL in row for row in df_test.itertuples(index=False))


@mock.patch("otter.grade.launch_containers")
def test_single_notebook_grade(mocked_launch_grade):
Expand All @@ -182,7 +186,19 @@ def test_single_notebook_grade(mocked_launch_grade):
"q6": 5.0,
"q2b": 2.0,
"q7": 1.0,
"percent_correct": 1.0,
"percent_correct": float('nan'),
"total_points_earned": 15.0,
"file": POINTS_POSSIBLE_LABEL,
},{
"q1": 2.0,
"q2": 2.0,
"q3": 2.0,
"q4": 1.0,
"q6": 4.0,
"q2b": 2.0,
"q7": 1.0,
"percent_correct": 0.933333,
"total_points_earned": 14.0,
"file": "passesAll.ipynb",
}])

Expand Down Expand Up @@ -211,7 +227,7 @@ def test_single_notebook_grade(mocked_launch_grade):
)

mocked_launch_grade.assert_called_with(notebook_path, [notebook_path], **kw_expected)
assert output == 1.0
assert output == 0.933333


@mock.patch("otter.grade.launch_containers")
Expand All @@ -220,6 +236,17 @@ def test_config_overrides(mocked_launch_grade):
Checks that the CLI flags are converted to config overrides correctly.
"""
mocked_launch_grade.return_value = [pd.DataFrame([{
"q1": 2.0,
"q2": 2.0,
"q3": 2.0,
"q4": 1.0,
"q6": 5.0,
"q2b": 2.0,
"q7": 1.0,
"percent_correct": float('nan'),
"total_points_earned": 15.0,
"file": POINTS_POSSIBLE_LABEL,
},{
"q1": 2.0,
"q2": 2.0,
"q3": 2.0,
Expand All @@ -228,6 +255,7 @@ def test_config_overrides(mocked_launch_grade):
"q2b": 2.0,
"q7": 1.0,
"percent_correct": 1.0,
"total_points_earned": 15.0,
"file": "passesAll.ipynb",
}])]

Expand Down Expand Up @@ -274,6 +302,17 @@ def test_config_overrides_integration():

got = pd.read_csv("test/final_grades.csv")
want = pd.DataFrame([{
"q1": 0.0,
"q2": 2.0,
"q3": 2.0,
"q4": 1.0,
"q6": 5.0,
"q2b": 2.0,
"q7": 1.0,
"percent_correct": float('nan'),
"total_points_earned": 13.0,
"file": POINTS_POSSIBLE_LABEL,
},{
"q1": 0.0,
"q2": 2.0,
"q3": 2.0,
Expand All @@ -282,11 +321,11 @@ def test_config_overrides_integration():
"q2b": 2.0,
"q7": 1.0,
"percent_correct": 1.0,
"total_points_earned": 13.0,
"file": os.path.splitext(os.path.basename(ZIP_SUBM_PATH))[0],
}])

# Sort the columns by label so the dataframes can be compared with ==.
got = got.reindex(sorted(got.columns), axis=1)
want = want.reindex(sorted(want.columns), axis=1)

assert got.equals(want)

0 comments on commit 5b09787

Please sign in to comment.