449 lines
18 KiB
Python
449 lines
18 KiB
Python
"""
|
|
Library to do grading of Python programs.
|
|
Usage (see grader.py):
|
|
|
|
# create a grader
|
|
grader = Grader("Name of assignment")
|
|
|
|
# add a basic test
|
|
grader.addBasicPart(number, grade_func, max_points, max_seconds, description="a basic test")
|
|
|
|
# add a hidden test
|
|
grader.addHiddenPart(number, grade_func, max_points, max_seconds, description="a hidden test")
|
|
|
|
# add a manual grading part
|
|
grader.addManualPart(number, grade_func, max_points, description="written problem")
|
|
|
|
# run grading
|
|
grader.grade()
|
|
"""
|
|
|
|
import argparse
|
|
import datetime
|
|
import gc
|
|
import json
|
|
import os
|
|
import signal
|
|
import sys
|
|
import traceback
|
|
|
|
default_max_seconds = 5 # 5 second
|
|
TOLERANCE = 1e-4 # For measuring whether two floats are equal
|
|
|
|
BASIC_MODE = 'basic' # basic
|
|
AUTO_MODE = 'auto' # basic + hidden
|
|
ALL_MODE = 'all' # basic + hidden + manual
|
|
|
|
|
|
# When reporting stack traces as feedback, ignore parts specific to the grading
|
|
# system.
|
|
def is_traceback_item_grader(item):
|
|
return item[0].endswith('graderUtil.py')
|
|
|
|
|
|
def is_collection(x):
|
|
return isinstance(x, list) or isinstance(x, tuple)
|
|
|
|
|
|
# Return whether two answers are equal.
|
|
def is_equal(true_answer, pred_answer, tolerance=TOLERANCE):
|
|
# Handle floats specially
|
|
if isinstance(true_answer, float) or isinstance(pred_answer, float):
|
|
return abs(true_answer - pred_answer) < tolerance
|
|
# Recurse on collections to deal with floats inside them
|
|
if is_collection(true_answer) and is_collection(pred_answer) and len(true_answer) == len(pred_answer):
|
|
for a, b in zip(true_answer, pred_answer):
|
|
if not is_equal(a, b):
|
|
return False
|
|
return True
|
|
if isinstance(true_answer, dict) and isinstance(pred_answer, dict):
|
|
if len(true_answer) != len(pred_answer):
|
|
return False
|
|
for k, v in list(true_answer.items()):
|
|
if not is_equal(pred_answer.get(k), v):
|
|
return False
|
|
return True
|
|
|
|
# Numpy array comparison
|
|
if type(true_answer).__name__ == 'ndarray':
|
|
import numpy as np
|
|
if isinstance(true_answer, np.ndarray) and isinstance(pred_answer, np.ndarray):
|
|
if true_answer.shape != pred_answer.shape:
|
|
return False
|
|
for a, b in zip(true_answer, pred_answer):
|
|
if not is_equal(a, b):
|
|
return False
|
|
return True
|
|
|
|
# Do normal comparison
|
|
return true_answer == pred_answer
|
|
|
|
|
|
# Run a function, timing out after max_seconds.
|
|
class TimeoutFunctionException(Exception):
|
|
pass
|
|
|
|
|
|
class TimeoutFunction:
|
|
def __init__(self, function, max_seconds):
|
|
self.max_seconds = max_seconds
|
|
self.function = function
|
|
|
|
@staticmethod
|
|
def handle_max_seconds(signum, frame):
|
|
print('TIMEOUT!')
|
|
raise TimeoutFunctionException()
|
|
|
|
def __call__(self, *args):
|
|
if os.name == 'nt':
|
|
# Windows does not have signal.SIGALRM
|
|
# Will not stop after max_seconds second but can still throw an exception
|
|
time_start = datetime.datetime.now()
|
|
result = self.function(*args)
|
|
time_end = datetime.datetime.now()
|
|
if time_end - time_start > datetime.timedelta(seconds=self.max_seconds + 1):
|
|
raise TimeoutFunctionException()
|
|
return result
|
|
# End modification for Windows here
|
|
signal.signal(signal.SIGALRM, self.handle_max_seconds)
|
|
signal.alarm(self.max_seconds + 1)
|
|
result = self.function(*args)
|
|
signal.alarm(0)
|
|
return result
|
|
|
|
|
|
class Part:
|
|
def __init__(self, number, grade_func, max_points, max_seconds, extra_credit, description, basic):
|
|
if not isinstance(number, str):
|
|
raise Exception("Invalid number: %s" % number)
|
|
if grade_func is not None and not callable(grade_func):
|
|
raise Exception("Invalid grade_func: %s" % grade_func)
|
|
if not isinstance(max_points, int) and not isinstance(max_points, float):
|
|
raise Exception("Invalid max_points: %s" % max_points)
|
|
if max_seconds is not None and not isinstance(max_seconds, int):
|
|
raise Exception("Invalid max_seconds: %s" % max_seconds)
|
|
if not description:
|
|
print('ERROR: description required for part {}'.format(number))
|
|
# Specification of part
|
|
self.number = number # Unique identifier for this part.
|
|
self.description = description # Description of this part
|
|
self.grade_func = grade_func # Function to call to do grading
|
|
self.max_points = max_points # Maximum number of points attainable on this part
|
|
self.max_seconds = max_seconds # Maximum allowed time that the student's code can take (in seconds)
|
|
self.extra_credit = extra_credit # Whether this is an extra credit problem
|
|
self.basic = basic
|
|
# Grading the part
|
|
self.points = 0
|
|
self.side = None # Side information
|
|
self.seconds = 0
|
|
self.messages = []
|
|
self.failed = False
|
|
|
|
def fail(self):
|
|
self.failed = True
|
|
|
|
def is_basic(self):
|
|
return self.grade_func is not None and self.basic
|
|
|
|
def is_hidden(self):
|
|
return self.grade_func is not None and not self.basic
|
|
|
|
def is_auto(self):
|
|
return self.grade_func is not None
|
|
|
|
def is_manual(self):
|
|
return self.grade_func is None
|
|
|
|
|
|
class Grader:
|
|
def __init__(self, args=None):
|
|
if args is None:
|
|
args = sys.argv
|
|
self.parts = [] # Parts (to be added)
|
|
self.useSolution = False # Set to true if we are actually evaluating the hidden test cases
|
|
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--js', action='store_true', help='Write JS file with information about this assignment')
|
|
parser.add_argument('--json', action='store_true',
|
|
help='Write JSON file with information about this assignment')
|
|
parser.add_argument('--summary', action='store_true', help='Don\'t actually run code')
|
|
parser.add_argument('remainder', nargs=argparse.REMAINDER)
|
|
self.params = parser.parse_args(args[1:])
|
|
|
|
args = self.params.remainder
|
|
if len(args) < 1:
|
|
self.mode = AUTO_MODE
|
|
self.selectedPartName = None
|
|
else:
|
|
if args[0] in [BASIC_MODE, AUTO_MODE, ALL_MODE]:
|
|
self.mode = args[0]
|
|
self.selectedPartName = None
|
|
else:
|
|
self.mode = AUTO_MODE
|
|
self.selectedPartName = args[0]
|
|
|
|
self.messages = [] # General messages
|
|
self.currentPart = None # Which part we're grading
|
|
self.fatalError = False # Set this if we should just stop immediately
|
|
|
|
def add_basic_part(self, number, grade_func, max_points=1, max_seconds=default_max_seconds, extra_credit=False,
|
|
description=""):
|
|
"""Add a basic test case. The test will be visible to students."""
|
|
self.assert_new_number(number)
|
|
part = Part(number, grade_func, max_points, max_seconds, extra_credit, description, basic=True)
|
|
self.parts.append(part)
|
|
|
|
def add_hidden_part(self, number, grade_func, max_points=1, max_seconds=default_max_seconds, extra_credit=False,
|
|
description=""):
|
|
"""Add a hidden test case. The output should NOT be visible to students
|
|
and so should be inside a BEGIN_HIDE block."""
|
|
self.assert_new_number(number)
|
|
part = Part(number, grade_func, max_points, max_seconds, extra_credit, description, basic=False)
|
|
self.parts.append(part)
|
|
|
|
def add_manual_part(self, number, max_points, extra_credit=False, description=""):
|
|
"""Add a manual part."""
|
|
self.assert_new_number(number)
|
|
part = Part(number, None, max_points, None, extra_credit, description, basic=False)
|
|
self.parts.append(part)
|
|
|
|
def assert_new_number(self, number):
|
|
if number in [part.number for part in self.parts]:
|
|
raise Exception("Part number %s already exists" % number)
|
|
|
|
# Try to load the module (submission from student).
|
|
def load(self, module_name):
|
|
try:
|
|
return __import__(module_name)
|
|
except Exception as e:
|
|
self.fail("Threw exception when importing '%s': %s" % (module_name, e))
|
|
self.fatalError = True
|
|
return None
|
|
except:
|
|
self.fail("Threw exception when importing '%s'" % module_name)
|
|
self.fatalError = True
|
|
return None
|
|
|
|
def grade_part(self, part):
|
|
print('----- START PART %s%s: %s' % (
|
|
part.number, ' (extra credit)' if part.extra_credit else '', part.description))
|
|
self.currentPart = part
|
|
|
|
start_time = datetime.datetime.now()
|
|
try:
|
|
TimeoutFunction(part.grade_func, part.max_seconds)() # Call the part's function
|
|
except KeyboardInterrupt:
|
|
raise
|
|
except MemoryError:
|
|
signal.alarm(0)
|
|
gc.collect()
|
|
self.fail('Memory limit exceeded.')
|
|
except TimeoutFunctionException:
|
|
signal.alarm(0)
|
|
self.fail('Time limit (%s seconds) exceeded.' % part.max_seconds)
|
|
except Exception as e:
|
|
signal.alarm(0)
|
|
self.fail('Exception thrown: %s -- %s' % (str(type(e)), str(e)))
|
|
self.print_exception()
|
|
except SystemExit:
|
|
# Catch SystemExit raised by exit(), quit() or sys.exit()
|
|
# This class is not a subclass of Exception and we don't
|
|
# expect students to raise it.
|
|
self.fail('Unexpected exit.')
|
|
self.print_exception()
|
|
end_time = datetime.datetime.now()
|
|
part.seconds = (end_time - start_time).seconds
|
|
###### quick fix to pacman problem 4 ######
|
|
if part.seconds > part.max_seconds:
|
|
signal.alarm(0)
|
|
self.fail('Time limit (%s seconds) exceeded.' % part.max_seconds)
|
|
###### quick fix to pacman problem 4 ######
|
|
if part.is_hidden() and not self.useSolution:
|
|
display_points = '???/%s points (hidden test ungraded)' % part.max_points
|
|
else:
|
|
display_points = '%s/%s points' % (part.points, part.max_points)
|
|
print('----- END PART %s [%s]' % (
|
|
part.number, display_points))
|
|
print()
|
|
|
|
def get_selected_parts(self):
|
|
parts = []
|
|
for part in self.parts:
|
|
if self.selectedPartName is not None and self.selectedPartName != part.number:
|
|
continue
|
|
if self.mode == BASIC_MODE:
|
|
if part.is_basic():
|
|
parts.append(part)
|
|
elif self.mode == AUTO_MODE:
|
|
if part.is_auto():
|
|
parts.append(part)
|
|
elif self.mode == ALL_MODE:
|
|
parts.append(part)
|
|
else:
|
|
raise Exception("Invalid mode: {}".format(self.mode))
|
|
return parts
|
|
|
|
def grade(self):
|
|
parts = self.get_selected_parts()
|
|
|
|
result = {'mode': self.mode}
|
|
|
|
# Grade it!
|
|
if not self.params.summary and not self.fatalError:
|
|
print('========== START GRADING')
|
|
for part in parts:
|
|
self.grade_part(part)
|
|
|
|
# When students have it (not useSolution), only include basic tests.
|
|
active_parts = [part for part in parts if self.useSolution or part.basic]
|
|
|
|
total_points = sum(part.points for part in active_parts if not part.extra_credit)
|
|
extra_credit = sum(part.points for part in active_parts if part.extra_credit)
|
|
max_total_points = sum(part.max_points for part in active_parts if not part.extra_credit)
|
|
max_extra_credit = sum(part.max_points for part in active_parts if part.extra_credit)
|
|
|
|
print('========== END GRADING [%s/%s points + %s/%s extra credit]' %
|
|
(total_points, max_total_points, extra_credit, max_extra_credit))
|
|
|
|
result_parts = []
|
|
leaderboard = []
|
|
for part in parts:
|
|
r = {'number': part.number, 'name': part.description}
|
|
|
|
if self.params.summary:
|
|
# Just print out specification of the part
|
|
r['description'] = part.description
|
|
r['max_seconds'] = part.max_seconds
|
|
r['max_points'] = part.max_points
|
|
r['extra_credit'] = part.extra_credit
|
|
r['basic'] = part.basic
|
|
else:
|
|
r['score'] = part.points
|
|
# Force max_score to be 0 for extra credits for displaying correct total points on Gradescope
|
|
r['max_score'] = 0 if (part.extra_credit and self.mode == AUTO_MODE) else part.max_points
|
|
r["visibility"] = "after_published" if part.is_hidden() else "visible"
|
|
r['seconds'] = part.seconds
|
|
if part.side is not None:
|
|
r['side'] = part.side
|
|
r['output'] = "\n".join(part.messages)
|
|
|
|
if part.side is not None:
|
|
for k in part.side:
|
|
leaderboard.append({"name": k, "value": part.side[k]})
|
|
result_parts.append(r)
|
|
result['tests'] = result_parts
|
|
result['leaderboard'] = leaderboard
|
|
|
|
self.output(self.mode, result)
|
|
|
|
def display(name, select_extra_credit):
|
|
parts_to_display = [p for p in self.parts if p.extra_credit == select_extra_credit]
|
|
max_basic_points = sum(p.max_points for p in parts_to_display if p.is_basic())
|
|
max_hidden_points = sum(p.max_points for p in parts_to_display if p.is_hidden())
|
|
max_manual_points = sum(p.max_points for p in parts_to_display if p.is_manual())
|
|
max_total_points_found = max_basic_points + max_hidden_points + max_manual_points
|
|
print("Total %s (basic auto/coding + hidden auto/coding + manual/written): %d + %d + %d = %d" %
|
|
(name, max_basic_points, max_hidden_points, max_manual_points, max_total_points_found))
|
|
if not select_extra_credit and max_total_points_found != 75:
|
|
print('WARNING: max_total_points = {} is not 75'.format(max_total_points_found))
|
|
|
|
if self.params.summary:
|
|
display('points', False)
|
|
display('extra credit', True)
|
|
|
|
def output(self, mode, result):
|
|
if self.params.json:
|
|
path = 'grader-{}.json'.format(mode)
|
|
with open(path, 'w') as out:
|
|
print(json.dumps(result), file=out)
|
|
print('Wrote to %s' % path)
|
|
if self.params.js:
|
|
path = 'grader-{}.js'.format(mode)
|
|
with open(path, 'w') as out:
|
|
print('var ' + mode + 'Result = ' + json.dumps(result) + ';', file=out)
|
|
print('Wrote to %s' % path)
|
|
|
|
# Called by the grader to modify state of the current part
|
|
|
|
def add_points(self, amt):
|
|
self.currentPart.points += amt
|
|
|
|
def assign_full_credit(self):
|
|
if not self.currentPart.failed:
|
|
self.currentPart.points = self.currentPart.max_points
|
|
return True
|
|
|
|
def assign_partial_credit(self, credit):
|
|
self.currentPart.points = credit
|
|
return True
|
|
|
|
def set_side(self, side):
|
|
self.currentPart.side = side
|
|
|
|
@staticmethod
|
|
def truncate_string(string, length=200):
|
|
if len(string) <= length:
|
|
return string
|
|
else:
|
|
return string[:length] + '...'
|
|
|
|
def require_is_numeric(self, answer):
|
|
if isinstance(answer, int) or isinstance(answer, float):
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected either int or float, but got '%s'" % self.truncate_string(answer))
|
|
|
|
def require_is_one_of(self, true_answers, pred_answer):
|
|
if pred_answer in true_answers:
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected one of %s, but got '%s'" % (
|
|
self.truncate_string(true_answers), self.truncate_string(pred_answer)))
|
|
|
|
def require_is_equal(self, true_answer, pred_answer, tolerance=TOLERANCE):
|
|
if is_equal(true_answer, pred_answer, tolerance):
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected '%s', but got '%s'" % (
|
|
self.truncate_string(str(true_answer)), self.truncate_string(str(pred_answer))))
|
|
|
|
def require_is_less_than(self, less_than_quantity, pred_answer):
|
|
if pred_answer < less_than_quantity:
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected to be < %f, but got %f" % (less_than_quantity, pred_answer))
|
|
|
|
def require_is_greater_than(self, greater_than_quantity, pred_answer):
|
|
if pred_answer > greater_than_quantity:
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected to be > %f, but got %f" %
|
|
(greater_than_quantity, pred_answer))
|
|
|
|
def require_is_true(self, pred_answer):
|
|
if pred_answer:
|
|
return self.assign_full_credit()
|
|
else:
|
|
return self.fail("Expected to be true, but got false")
|
|
|
|
def fail(self, message):
|
|
print('FAIL:', message)
|
|
self.add_message(message)
|
|
if self.currentPart:
|
|
self.currentPart.points = 0
|
|
self.currentPart.fail()
|
|
return False
|
|
|
|
def print_exception(self):
|
|
tb = [item for item in traceback.extract_tb(sys.exc_info()[2]) if not is_traceback_item_grader(item)]
|
|
for item in traceback.format_list(tb):
|
|
self.fail('%s' % item)
|
|
|
|
def add_message(self, message):
|
|
if not self.useSolution:
|
|
print(message)
|
|
if self.currentPart:
|
|
self.currentPart.messages.append(message)
|
|
else:
|
|
self.messages.append(message)
|