import matplotlib.pyplot as plt from scipy import stats import numpy as np import argparse def main(): """ [1] H. Schuman, E. Walsh, C. Olson, and B. Etheridge, “Effort and Reward: The Assumption that College Grades Are Affected by Quantity of Study*,” Social Forces, vol. 63, no. 4, pp. 945–966, June 1985. """ # [1, p. 950] hours_studied = np.array([1, 2.5, 3.5, 4.5, 5.5, 6.5]) gpa = np.array([2.94, 2.91, 2.97, 2.86, 3.25, 3.18]) # Parse command line arguments parser = argparse.ArgumentParser() parser.add_argument("--plot", action="store_true") args = parser.parse_args() # Compute Spearman rank order correlation corr, p = stats.spearmanr(hours_studied, gpa) print("======== Spearman rank order correlation ========") print(f"Correlation: {corr}") print(f"p-value: {p}") # Perform linear regression slope, intercept, r, p, std_err = stats.linregress(hours_studied, gpa) print("======== Linear regression ========") print(f"slope: {slope:.8f} points/hour = {slope / (60 * 60):.8f} points/second") # Printing the p-value here doesn't make much sense, because we don't know # whether the assumptions for the test are satisfied if args.plot: plt.plot(hours_studied, gpa, label="Plot from publication") plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit") plt.xlabel("Hours studied") plt.ylabel("GPA") plt.legend() plt.show() if __name__ == "__main__": main()