From 3181b2ac4ef26c1a0e26b05e11fa61be313e29f4 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Wed, 12 Mar 2025 16:31:16 +0100
Subject: [PATCH] Add mean bottle size, mathematical model

---
 paper.tex                             | 106 +++++++++++++++---------
 res/full_participant_measurement.csv  | 114 ++++++++++++++++++++++++++
 scripts/calculate_mean_bottle_size.py |  45 ++++++++++
 scripts/perform_hypothesis_tests.py   |   2 +-
 4 files changed, 227 insertions(+), 40 deletions(-)
 create mode 100644 res/full_participant_measurement.csv
 create mode 100644 scripts/calculate_mean_bottle_size.py

diff --git a/paper.tex b/paper.tex
index dd9b97d..4f7d364 100644
--- a/paper.tex
+++ b/paper.tex
@@ -159,10 +159,13 @@ of the choice of hydration strategy of the participants: $S_\text{L}$ denotes
 pressing the left button of the water dispenser, $S_\text{R}$ the right one,
 and $S_\text{B}$ pressing both buttons.
 
-As is always the case with measurements, care must be taken not to alter
-quantities by measuring them. To this end, we made sure only to take system
-measurements in the absence of participants and to only record data on the
-behaviour of participants discreetly.
+For the system measurement $10$ datapoints were recorded for each strategy,
+for the behavioral measurement it was $113$ in total.
+
+% As is always the case with measurements, care must be taken not to alter
+% quantities by measuring them. To this end, we made sure only to take system
+% measurements in the absence of participants and to only record data on the
+% behaviour of participants discreetly.
 
 % TODO: Describe the actual measurement setup? (e.g., filling up a 0.7l bottle
 % and timing with a standard smartphone timer)
@@ -177,8 +180,8 @@ behaviour of participants discreetly.
 
     \begin{tikzpicture}
         \begin{axis}[
-            width=0.85\columnwidth,
-            height=0.4\columnwidth,
+            width=0.8\columnwidth,
+            height=0.35\columnwidth,
             boxplot/draw direction = x,
             grid,
             ytick = {1, 2, 3},
@@ -199,11 +202,13 @@ behaviour of participants discreetly.
         \end{axis}
     \end{tikzpicture}
 
-    \caption{Flow rate of the water dispenser depending on the button pressed.}
+    \vspace*{-3mm}
+
+    \caption{Flow rate of the water dispenser depending on the hydration strategy.}
     \label{fig:System}
 \end{figure}
 
-\begin{figure}[H]
+\begin{figure}
     \centering
 
     \begin{tikzpicture}
@@ -211,31 +216,39 @@ behaviour of participants discreetly.
             ybar,
             bar width=15mm,
             width=\columnwidth,
-            height=0.4\columnwidth,
+            height=0.35\columnwidth,
             area style,
             xtick = {0, 1, 2},
             grid,
             ymin = 0,
             enlarge x limits=0.3,
-            xticklabels = {Left button, Right button, Both buttons},
-            ylabel = {No. of presses},
+            xticklabels = {\footnotesize{$S_\text{L}$ (Left button)}, \footnotesize{$S_\text{R}$  (Right button)}, \footnotesize{$S_\text{B}$}  (Both buttons)},
+            ylabel = {No. chosen},
         ]
             \addplot+[ybar,mark=no,fill=scol1] table[skip first n=1, col sep=comma, x=button, y=count]
                 {res/left_right_distribution.csv};
         \end{axis}
     \end{tikzpicture}
 
+    \vspace*{-3mm}
+
     \caption{Distribution of the choice of hydration strategy.}
     \label{fig:Behavior}
 \end{figure}
 
-Fig. \ref{fig:System} indicates that $S_\text{L}$ is the slowest
-strategy, while $S_\text{R}$ and $S_\text{B}$ are similar.
-Due to the small sample size ($N=10$) and the unknown distribution, the test
-we chose to verify this observation is a Mann-Whitney U test. We found that
-$S _\text{L}$ is faster than $S_\text{R}$ with a significance of $p < 0.0001$,
-while no significant statement could be made about $S_\text{R}$ and
-$S_\text{B}$.
+
+Fig. \ref{fig:System} shows the results of the system measurement.
+We observe that $S_\text{L}$ is the slowest strategy, while $S_\text{R}$
+and $S_\text{B}$ are similar. Due to the small sample size and the
+unknown distribution, the test we chose to verify this observation is a Mann
+Whitney U test. We found that $S _\text{L}$ is faster than $S_\text{R}$ with a
+significance of $p < 0.0001$, while no significant statement could be made
+about $S_\text{R}$ and $S_\text{B}$.
+
+Fig. \ref{fig:Behavior} shows the results of the behavioral measurement.
+During this part of the experiment, we also measured the time each participant
+needed to fill up their bottle. Using the measured flowrates we calculated
+the mean bottle size to be $\SI{673.92}{\milli\liter}$.
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -243,25 +256,40 @@ $S_\text{B}$.
 
 
 We examine the effects of the choice of hydration strategy. To
-this end, we first estimate the amount of time saved by choosing a certain
-strategy and relate that to a possible gain in academic performance, i.e.,
-grades.%
+this end, we start by estimating the potential time savings possible by always
+choosing the fastest strategy:%
 %
-\todo{
-\begin{itemize}
-    \item ``We measured the average bottle size''
-    \item Quantify relationship: Compute average time saving by using right
-        button $\rightarrow$ translate into grade gain
-    \item People using the left button slow down the entire queue
-        behind them, not only themselves
-\end{itemize}
-}%
+% We can model the time needed for one person to refill their bottle as a random
+% variable (RV) $T_1 = V/R$ and the time saved by choosing the fastest strategy
+% as $\Delta T_1 = T_1 - V/\max r$, where $V$ and $R$ are RVs representing the
+% bottle volume and flowrate. The potential time saving for the last person in a
+% queue of $N$ people is thus $\Delta T_N = N\cdot\Delta T_1$. We can then model
+% the total time savings as $\Delta T_\text{tot} = \sum_{n=1}^{N} \Delta T_n$,
+% where N is an RV describing the queue length. Assuming the independence of all
+% RVs we can compute the mean total time savings as
+%
+\begin{gather*}
+    T_1 = V/R, \hspace{3mm} \Delta T_1 = T_1 - V/\max r, \hspace{3mm} \Delta T_n = n \cdot \Delta T_1 \\
+    \Delta T_\text{tot} = \sum_{n=1}^{N} \Delta T_\text{n} = \sum_{n=1}^{N} n \cdot \Delta T_1 = \Delta T_1 \frac{N\mleft( N+1 \mright)}{2} \\
+    E\mleft\{ \Delta T_\text{tot} \mright\} = E\mleft\{ \Delta T_1 \mright\} \cdot \mleft[ E\mleft\{ N^2 \mright\} + E\mleft\{ N \mright\} \mright]/2
+    ,%
+\end{gather*}
+%
+where $V$ and $R$ are random variables (RVs) representing the volume of a
+bottle and the flowrate, $\Delta T_n$ describes the time the last of $n$
+people saves, $\Delta T_\text{tot}$ the total time savings and $N$ the length
+of the queue. It is plausible to assume independence of $R,V$ and $N$.
 
 Many attempts have been made in the literature to relate the time spent
 studying to academic achievement -  see, e.g.
 \cite{schuman_effort_1985, zulauf_use_1999, michaels_academic_1989, dickinson_effect_1990}.
 The overwhelming consensus is that there is a significant relationship,
 though it is a weak one.
+%
+\todo{
+\begin{itemize}
+    \item Compute possible grade gain
+\end{itemize}}
 %Many of the studies were only performed over
 % a period of one week or even day, so we believe care should be taken when
 % generlizing these results. Nevertheless, the overwhelming consensus in the
@@ -273,20 +301,20 @@ though it is a weak one.
 
 
 In this study, we investigated how the choice of hydration strategy affects
-the average academic performance of a student. We found that always choosing to
+the average academic performance. We found that always choosing to
 press the right button leads to an average time gain of \todo{\SI{10}{\second}}
 per day, which translates into a grade improvement of $\todo{0.001}$ levels.
 We thus propose a novel and broadly applicable strategy to boost the average
 academic performance of KIT students: always pressing the right button.
 
-Further research is needed to develop a better model of how the choice of
-hydration strategy is related to academic performance. We
-suspect that there is a compounding effect that leads to $S_\text{L}$ being an
-even worse choice of hydration strategy: When the queue is long, students are
-less likely to refill their empty water bottles, leading to reduced mental
-ability. Nevertheless, we believe that with this work we have laid a solid
-foundation and hope that our results will find widespread acceptance among the
-local student population.
+% Further research is needed to develop a better model of how the choice of
+% hydration strategy is related to academic performance. We
+% suspect that there is a compounding effect that leads to $S_\text{L}$ being an
+% even worse choice of hydration strategy: When the queue is long, students are
+% less likely to refill their empty water bottles, leading to reduced mental
+% ability. Nevertheless, we believe that with this work we have laid a solid
+% foundation and hope that our results will find widespread acceptance among the
+% local student population.
 
 
 %
diff --git a/res/full_participant_measurement.csv b/res/full_participant_measurement.csv
new file mode 100644
index 0000000..f0da16f
--- /dev/null
+++ b/res/full_participant_measurement.csv
@@ -0,0 +1,114 @@
+time,button
+28,left
+22,left
+17,left
+40,left
+24,left
+41,left
+11,left
+11,left
+26.56,left
+37,left
+30,left
+30,left
+8,left
+21,left
+20,left
+19,left
+28,left
+20,left
+21,left
+16.43,left
+16,left
+29,left
+20,left
+24,left
+22,left
+15,left
+13,left
+22,left
+23,left
+40,left
+19.8,left
+35.38,left
+21,left
+16.3,left
+29.3,left
+30.3,left
+30.2,left
+25,left
+14,left
+14.1,left
+40,left
+24.4,left
+5.2,left
+50,left
+29.7,left
+39,left
+17,left
+40.7,left
+27.3,left
+19.8,left
+7.55,right
+14,right
+9,right
+13,right
+5,right
+13,right
+13.58,right
+15.58,right
+25,right
+20,right
+14,right
+13,right
+14,right
+13.3,right
+19,right
+13,right
+10,right
+15,right
+14,right
+19.4,right
+12.8,right
+13.5,right
+19.31,right
+27.5,right
+13.1,right
+23.6,right
+15,right
+18.7,right
+18,right
+12.7,right
+40.3,right
+12.86,right
+22.9,right
+10,right
+20,right
+12,right
+19,right
+39.8,right
+20,both
+20,both
+15,both
+19,both
+13,both
+7,both
+15,both
+17.3,both
+12,both
+23,both
+11.26,both
+35.66,both
+13.54,both
+27.81,both
+16.83,both
+17.13,both
+17.8,both
+39,both
+11,both
+13.6,both
+21.7,both
+14.25,both
+12,both
+12.9,both
+12.35,both
diff --git a/scripts/calculate_mean_bottle_size.py b/scripts/calculate_mean_bottle_size.py
new file mode 100644
index 0000000..70b59f5
--- /dev/null
+++ b/scripts/calculate_mean_bottle_size.py
@@ -0,0 +1,45 @@
+import numpy as np
+import pandas as pd
+
+
+filename_participants = "res/full_participant_measurement.csv"
+
+filename_left = "res/flowrate_left.csv"
+filename_right = "res/flowrate_right.csv"
+filename_both = "res/flowrate_both.csv"
+
+
+def main():
+    # Get bottle fillup times
+
+    df_part = pd.read_csv(filename_participants)
+
+    times_left = np.array(df_part[df_part["button"] == "left"]["time"])
+    times_right = np.array(df_part[df_part["button"] == "right"]["time"])
+    times_both = np.array(df_part[df_part["button"] == "both"]["time"])
+
+    # Get mean flowrates
+
+    df_left = pd.read_csv(filename_left)
+    df_right = pd.read_csv(filename_right)
+    df_both = pd.read_csv(filename_both)
+
+    flowrate_left = np.mean(np.array(df_left["flowrate"]))
+    flowrate_right = np.mean(np.array(df_right["flowrate"]))
+    flowrate_both = np.mean(np.array(df_both["flowrate"]))
+
+    # Calculate mean bottle size
+
+    sizes_left = times_left * flowrate_left
+    sizes_right = times_right * flowrate_right
+    sizes_both = times_both * flowrate_both
+
+    sizes = np.concatenate([sizes_left, sizes_right, sizes_both])
+
+    mean_size = np.mean(sizes)
+
+    print(f"Mean bottle size: {mean_size}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/perform_hypothesis_tests.py b/scripts/perform_hypothesis_tests.py
index 252a7e5..98d17ba 100644
--- a/scripts/perform_hypothesis_tests.py
+++ b/scripts/perform_hypothesis_tests.py
@@ -16,7 +16,7 @@ def main():
     flowrate_right = np.array(df_right["flowrate"])
 
     df_both = pd.read_csv(filename_both)
-    flowrate_both = np.array(df_right["flowrate"])
+    flowrate_both = np.array(df_both["flowrate"])
     
     U_lr, p_lr = mannwhitneyu(flowrate_left, flowrate_both, method="exact")
     U_rb, p_rb = mannwhitneyu(flowrate_right, flowrate_both, method="exact")