52 lines
1.5 KiB
Python
52 lines
1.5 KiB
Python
import matplotlib.pyplot as plt
|
||
from scipy import stats
|
||
import numpy as np
|
||
import argparse
|
||
|
||
|
||
def main():
|
||
"""
|
||
[1] H. Schuman, E. Walsh, C. Olson, and B. Etheridge, “Effort and Reward:
|
||
The Assumption that College Grades Are Affected by Quantity of Study*,”
|
||
Social Forces, vol. 63, no. 4, pp. 945–966, June 1985.
|
||
"""
|
||
# [1, p. 950]
|
||
hours_studied = np.array([1, 2.5, 3.5, 4.5, 5.5, 6.5])
|
||
gpa = np.array([2.94, 2.91, 2.97, 2.86, 3.25, 3.18])
|
||
|
||
# Parse command line arguments
|
||
|
||
parser = argparse.ArgumentParser()
|
||
parser.add_argument("--plot", action="store_true")
|
||
|
||
args = parser.parse_args()
|
||
|
||
# Compute Spearman rank order correlation
|
||
|
||
corr, p = stats.spearmanr(hours_studied, gpa)
|
||
|
||
print("======== Spearman rank order correlation ========")
|
||
print(f"Correlation: {corr}")
|
||
print(f"p-value: {p}")
|
||
|
||
# Perform linear regression
|
||
|
||
slope, intercept, r, p, std_err = stats.linregress(hours_studied, gpa)
|
||
|
||
print("======== Linear regression ========")
|
||
print(f"slope: {slope:.8f} points/hour = {slope / (60 * 60):.8f} points/second")
|
||
# Printing the p-value here doesn't make much sense, because we don't know
|
||
# whether the assumptions for the test are satisfied
|
||
|
||
if args.plot:
|
||
plt.plot(hours_studied, gpa, label="Plot from publication")
|
||
plt.plot(hours_studied, slope * hours_studied + intercept, label="Best fit")
|
||
plt.xlabel("Hours studied")
|
||
plt.ylabel("GPA")
|
||
plt.legend()
|
||
plt.show()
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|