bib-paper/scripts/find_grade_gain.py

import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import argparse


def main():
    """
    [1] H. Schuman, E. Walsh, C. Olson, and B. Etheridge, “Effort and Reward:
        The Assumption that College Grades Are Affected by Quantity of Study*,”
        Social Forces, vol. 63, no. 4, pp. 945–966, June 1985.
    """
    # [1, p. 950]
    hours_studied = np.array([1, 2.5, 3.5, 4.5, 5.5, 6.5])
    gpa = np.array([2.94, 2.91, 2.97, 2.86, 3.25, 3.18])

    # Parse command line arguments

    parser = argparse.ArgumentParser()
    parser.add_argument("--plot", action="store_true")

    args = parser.parse_args()

    # Compute Spearman rank order correlation

    corr, p = stats.spearmanr(hours_studied, gpa)

    print("======== Spearman rank order correlation ========")
    print(f"Correlation: {corr}")
    print(f"p-value: {p}")

    # Perform linear regression

    slope, intercept, r, p, std_err = stats.linregress(hours_studied, gpa)

    print("======== Linear regression ========")
    print(f"slope: {slope:.8f} points/hour = {slope / (60 * 60):.8f} points/second")
    # Printing the p-value here doesn't make much sense, because we don't know
    # whether the assumptions for the test are satisfied

    if args.plot:
        plt.plot(hours_studied, gpa, label="Plot from publication")
        plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit")
        plt.xlabel("Hours studied")
        plt.ylabel("GPA")
        plt.legend()
        plt.show()


if __name__ == "__main__":
    main()