bib-paper/scripts/find_grade_gain.py

52 lines
1.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import argparse
def main():
"""
[1] H. Schuman, E. Walsh, C. Olson, and B. Etheridge, “Effort and Reward:
The Assumption that College Grades Are Affected by Quantity of Study*,”
Social Forces, vol. 63, no. 4, pp. 945966, June 1985.
"""
# [1, p. 950]
hours_studied = np.array([1, 2.5, 3.5, 4.5, 5.5, 6.5])
gpa = np.array([2.94, 2.91, 2.97, 2.86, 3.25, 3.18])
# Parse command line arguments
parser = argparse.ArgumentParser()
parser.add_argument("--plot", action="store_true")
args = parser.parse_args()
# Compute Spearman rank order correlation
corr, p = stats.spearmanr(hours_studied, gpa)
print("======== Spearman rank order correlation ========")
print(f"Correlation: {corr}")
print(f"p-value: {p}")
# Perform linear regression
slope, intercept, r, p, std_err = stats.linregress(hours_studied, gpa)
print("======== Linear regression ========")
print(f"slope: {slope:.8f} points/hour = {slope / (60 * 60):.8f} points/second")
# Printing the p-value here doesn't make much sense, because we don't know
# whether the assumptions for the test are satisfied
if args.plot:
plt.plot(hours_studied, gpa, label="Plot from publication")
plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit")
plt.xlabel("Hours studied")
plt.ylabel("GPA")
plt.legend()
plt.show()
if __name__ == "__main__":
main()