diff --git a/scripts/find_grade_gain.py b/scripts/find_grade_gain.py index f4d4a47..3297f8d 100644 --- a/scripts/find_grade_gain.py +++ b/scripts/find_grade_gain.py @@ -1,6 +1,7 @@ import matplotlib.pyplot as plt from scipy import stats import numpy as np +import argparse def main(): @@ -13,16 +14,37 @@ def main(): hours_studied = np.array([1, 2.5, 3.5, 4.5, 5.5, 6.5]) gpa = np.array([2.94, 2.91, 2.97, 2.86, 3.25, 3.18]) + # Parse command line arguments + + parser = argparse.ArgumentParser() + parser.add_argument("--plot", action="store_true") + + args = parser.parse_args() + + # Compute Spearman rank order correlation + + corr, p = stats.spearmanr(hours_studied, gpa) + + print("======== Spearman rank order correlation ========") + print(f"Correlation: {corr}") + print(f"p-value: {p}") + + # Perform linear regression + slope, intercept, r, p, std_err = stats.linregress(hours_studied, gpa) - print(f"GPA/hour (slope) of best fit line: {slope}") + print("======== Linear regression ========") + print(f"slope: {slope:.8f} points/hour = {slope / (60 * 60):.8f} points/second") + # Printing the p-value here doesn't make much sense, because we don't know + # whether the assumptions for the test are satisfied - plt.plot(hours_studied, gpa, label="Plot from publication") - plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit") - plt.xlabel("Hours studied") - plt.ylabel("GPA") - plt.legend() - plt.show() + if args.plot: + plt.plot(hours_studied, gpa, label="Plot from publication") + plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit") + plt.xlabel("Hours studied") + plt.ylabel("GPA") + plt.legend() + plt.show() if __name__ == "__main__":