From e39ae190f3fa046e58d43178188b546c0326a7ce Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Thu, 12 Feb 2026 23:21:15 +0100 Subject: [PATCH] Add complete theory for exercise 1 --- src/2026-02-13/presentation.tex | 479 ++++++++++++++++++++++++++------ 1 file changed, 388 insertions(+), 91 deletions(-) diff --git a/src/2026-02-13/presentation.tex b/src/2026-02-13/presentation.tex index e0a660d..c4db74f 100644 --- a/src/2026-02-13/presentation.tex +++ b/src/2026-02-13/presentation.tex @@ -80,6 +80,20 @@ } } +\newlength{\depthofprodsign} +\setlength{\depthofprodsign}{\depthof{$\prod$}} +\newlength{\totalheightofprodsign} +\newcommand{\nprod}[1][1.4]{ + \mathop{ + \raisebox + {-#1\depthofprodsign+1\depthofprodsign} + {\scalebox + {#1} + {$\displaystyle\prod$}% + } + } +} + % \tikzstyle{every node}=[font=\small] % \captionsetup[sub]{font=small} @@ -122,7 +136,8 @@ \begin{itemize} \item Einfache Stichprobe \begin{gather*} - X_1, \ldots, X_N \hspace{2mm}\overbrace{\text{unabhäng und haben + X_1, \ldots, X_N + \hspace{2mm}\overbrace{\text{unabhängig und haben dieselbe Verteilung}}^{\text{``iid.''}} \hspace*{5mm} \rightarrow\hspace*{5mm} \bm{X} := @@ -157,7 +172,7 @@ X_1 \\ \vdots \\ X_N - \end{pmatrix}\sim f_{\bm{X}}$ + \end{pmatrix}\sim P_{\bm{X}}$ }; \node[right=of model] (x) { @@ -246,7 +261,7 @@ X_1 \\ \vdots \\ X_N - \end{pmatrix}\sim f_{\bm{X}}$ + \end{pmatrix}\sim P_{\bm{X}}$ }; \draw[ @@ -266,7 +281,7 @@ \end{frame} \begin{frame} - \frametitle{Punktschätzer I} + \frametitle{Punktschätzer} \vspace*{-10mm} @@ -276,77 +291,272 @@ \begin{figure}[H] \centering - \begin{tikzpicture} - \node[ - rectangle, - densely dashed, - draw, - inner sep=5mm, - ] (x) { - $ - \bm{x} = - \begin{pmatrix} - 26{,}2 \\ - 27{,}8 \\ - 25{,}7 \\ - \vdots - \end{pmatrix} - $ - }; + \only<1>{ + \begin{tikzpicture} + \node[ + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (x) { + $ + \bm{x} = + \begin{pmatrix} + 26{,}2 \\ + 27{,}8 \\ + 25{,}7 \\ + \vdots + \end{pmatrix} + $ + }; - \node[ - rectangle, - right=of x, - minimum width=5cm, minimum height=2cm, - draw=kit-green, fill=kit-green!20, - line width=1pt, - align=center, - inner sep=3mm - ] (est) {Schätzer\\[5mm] $T(\bm{x}) = - \displaystyle\frac{1}{N} - \sum_{i=0}^{N} x_i$}; + \node[ + draw opacity=0, + fill opacity=0, + rectangle, + right=of x, + minimum width=5cm, minimum height=2cm, + draw=kit-green, fill=kit-green!20, + line width=1pt, + align=center, + inner sep=3mm + ] (est) {Schätzer\\[5mm] $T_N(\bm{x}) = + \displaystyle\frac{1}{N} + \nsum_{i=0}^{N} x_i$}; - \node[ - above=of est, - rectangle, - densely dashed, - draw, - inner sep=5mm, - ] (model) { - $X_i \sim \mathcal{N}(\mu = \vartheta, \sigma^2 = 1)$ - }; + \node[ + draw opacity=0, + fill opacity=0, + above=of est, + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (model) { + $X_i \sim \mathcal{N}(\mu = \vartheta, + \sigma^2 = 1)$ + }; - \node[right=of est] (theta) {$\hat{\vartheta} - = 26{,}0$}; + \node[right=of est, draw opacity=0, fill + opacity=0] (theta) {$\hat{\vartheta} = 26{,}0$}; - \node[below] at (x.south) {Beobachtung}; - \node[above] at (model.north) {Parametrisiertes Modell}; + \node[below] at (x.south) {Beobachtung}; + \node[above, draw opacity=0, fill opacity=0] + at (model.north) {Parametrisiertes Modell}; + \end{tikzpicture} + }% + \only<2>{ + \begin{tikzpicture} + \node[ + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (x) { + $ + \bm{x} = + \begin{pmatrix} + 26{,}2 \\ + 27{,}8 \\ + 25{,}7 \\ + \vdots + \end{pmatrix} + $ + }; - \draw[-{Latex}, line width=1pt] (x) -- (est); - \draw[-{Latex}, line width=1pt] (model) -- (est); - \draw[-{Latex}, line width=1pt] (model) -- (est); - \draw[-{Latex}, line width=1pt] (est) -- (theta); - \end{tikzpicture} + \node[ + draw opacity=0, + fill opacity=0, + rectangle, + right=of x, + minimum width=5cm, minimum height=2cm, + draw=kit-green, fill=kit-green!20, + line width=1pt, + align=center, + inner sep=3mm + ] (est) {Schätzer\\[5mm] $T_N(\bm{x}) = + \displaystyle\frac{1}{N} + \nsum_{i=0}^{N} x_i$}; + + \node[ + above=of est, + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (model) { + $X_i \sim \mathcal{N}(\mu = \vartheta, + \sigma^2 = 1)$ + }; + + \node[right=of est, draw opacity=0, fill + opacity=0] (theta) {$\hat{\vartheta} + = 26{,}0$}; + + \node[below] at (x.south) {Beobachtung}; + \node[above] at (model.north) {Parametrisiertes Modell}; + \end{tikzpicture} + }% + \only<3->{ + \begin{tikzpicture} + \node[ + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (x) { + $ + \bm{x} = + \begin{pmatrix} + 26{,}2 \\ + 27{,}8 \\ + 25{,}7 \\ + \vdots + \end{pmatrix} + $ + }; + + \node[ + rectangle, + right=of x, + minimum width=5cm, minimum height=2cm, + draw=kit-green, fill=kit-green!20, + line width=1pt, + align=center, + inner sep=3mm + ] (est) {Schätzer\\[5mm] $T_N(\bm{x}) = + \displaystyle\frac{1}{N} + \nsum_{i=0}^{N} x_i$}; + + \node[ + above=of est, + rectangle, + densely dashed, + draw, + inner sep=5mm, + ] (model) { + $X_i \sim \mathcal{N}(\mu = \vartheta, + \sigma^2 = 1)$ + }; + + \node[right=of est] (theta) {$\hat{\vartheta} + = 26{,}0$}; + + \node[below] at (x.south) {Beobachtung}; + \node[above] at (model.north) {Parametrisiertes Modell}; + + \draw[-{Latex}, line width=1pt] (x) -- (est); + \draw[-{Latex}, line width=1pt] (model) -- (est); + \draw[-{Latex}, line width=1pt] (model) -- (est); + \draw[-{Latex}, line width=1pt] (est) -- (theta); + \end{tikzpicture} + } \end{figure} \pause + \pause \item Punktschätzer: Rechenvorschrift zur Berechnung von Parametern aus Beobachtungen \\ + \pause $\rightarrow$ Schätzer hängen von den Realisierungen ab und sind damit selbst auch zufällig \\ - $\rightarrow$ Schätzer haben selbst einen Erwartungswert - und eine Varianz + $\rightarrow$ Schätzer haben einen Erwartungswert und eine Varianz \end{itemize} \end{frame} \begin{frame} - \frametitle{Punktschätzer II} + \frametitle{Likelihood und Log-Likelihood (Diskret)} + + \vspace*{-10mm} + + \begin{itemize} + \item Maximum Likelihood (ML) Schätzer\\ + \begin{minipage}{0.21\textwidth} + \phantom{a} + \end{minipage} + \begin{minipage}{0.16\textwidth} + \centering + \begin{align*} + \hat{\vartheta}_\text{ML} + = \argmax_\vartheta \hspace{2mm} P(\bm{X} = \bm{x} + \vert \vartheta) + \end{align*} + \end{minipage}% + \visible<2->{ + \begin{minipage}{0.15\textwidth} + \centering + \begin{align*} + \hspace*{-3mm} = \argmax_\vartheta + \hspace{2mm} L_{\bm{x}} (\vartheta) + \end{align*} + \end{minipage}% + } + \visible<3->{ + \begin{minipage}{0.13\textwidth} + \centering + \begin{align*} + \hspace*{-10mm} = \argmax_\vartheta + \hspace{2mm} l_{\bm{x}} (\vartheta) + \end{align*} + \end{minipage}% + } + + \begin{figure}[H] + \centering + ``Welches $\vartheta$ maximiert die + Wahrscheinlichkeit die beobachtete Realisierung zu bekommen?'' + \end{figure} + \pause + \item Likelihoodfunktion + \end{itemize} + + \vspace*{5mm} + + \begin{minipage}{0.5\textwidth} + \centering + \begin{align*} + L_{\bm{x}}(\vartheta) = P(\bm{X} = \bm{x} \vert + \vartheta) \overset{X_i \text{ + iid.}}{=\joinrel=\joinrel=} \nprod_{i=1}^{N} + P(X_i = x_i \vert \vartheta) + \end{align*} + \end{minipage}% + \begin{minipage}{0.5\textwidth} + \centering + \begin{lightgrayhighlightbox} + \vspace*{-3mm} + Beispiel + + \vspace*{-10mm} + \begin{gather*} + X_i \sim \text{\normalfont Binomial} (p = \vartheta, K) \\ + L_{\bm{x}}(\vartheta) = P(\bm{X}=\bm{x} \vert \vartheta) = + \nprod_{i=1}^{N} + \binom{K}{x_i}\vartheta^{x_i}(1-\vartheta)^{K-x_i} + \end{gather*} + \vspace*{-10mm} + \end{lightgrayhighlightbox} + \end{minipage}% + + \vspace*{5mm} + + \begin{itemize} + \pause + \item Log-Likelihoodfunktion + \begin{align*} + l_{\bm{x}}(\vartheta) = \ln \left( L_{\bm{x}}(\vartheta) \right) + \end{align*} + \end{itemize} +\end{frame} + +\begin{frame} + \frametitle{Eigenschaften von Punktschätzern} \vspace*{-10mm} \begin{itemize} \item Erwartungtreue \begin{gather*} - E(\hat{\vartheta}) = E\big( T(\bm{X}) \big) = \vartheta + E(\hat{\vartheta}) = E\big( T_N(\bm{X}) \big) = \vartheta \end{gather*} \begin{figure}[H] @@ -359,7 +569,7 @@ \item Konsistenz \begin{gather*} \lim_{N\rightarrow \infty} P_\vartheta \big( \lvert - T_N - \vartheta \rvert \ge \varepsilon \big) = 0 + \hat{\vartheta} - \vartheta \rvert \ge \varepsilon \big) = 0 \end{gather*} \begin{figure}[H] @@ -371,22 +581,108 @@ \vspace*{10mm} \pause \item Effizienz (für erwartungtreue Schätzer) - \begin{gather*} - V(\hat{\vartheta}) = \frac{1}{J(\vartheta)}, - \hspace*{5mm} J(\vartheta) = - E\left( - \frac{\partial^2}{\partial \vartheta^2} - \ln \mleft( f_\vartheta (\bm{X}) \mright) - \right) - \end{gather*} - - \begin{figure}[H] - \centering - ``Für jedes N hat der Schätzer jeweils die - kleinstmögliche Varianz'' - \end{figure} + \begin{minipage}{0.68\textwidth} + \begin{gather*} + V(\hat{\vartheta}) = \frac{1}{J(\vartheta)}, + \hspace*{5mm} J(\vartheta) = - E\left( + \frac{\partial^2}{\partial \vartheta^2} + l_{\bm{X}}(\vartheta) + \right) + \end{gather*} + \begin{figure}[H] + \centering + ``Für jedes fixe N hat der Schätzer jeweils die + kleinstmögliche Varianz'' + \end{figure} + \end{minipage}% + \begin{minipage}{0.3\textwidth} + \begin{lightgrayhighlightbox} + Cramér-Rao Ungleichung \\ + \vspace*{-6mm} + \begin{gather*} + V(\hat{\vartheta}) \le \frac{1}{J(\vartheta)} + \end{gather*} + \vspace*{-10mm} + \end{lightgrayhighlightbox} + \end{minipage} \end{itemize} \end{frame} +\begin{frame} + \frametitle{Zusammenfassung} + + \vspace*{-10mm} + + \begin{columns} + \column{\kitthreecolumns} + % \begin{greenblock}{Einfache Stichprobe} + % \vspace*{-8mm} + % \begin{gather*} + % \bm{X} = + % \begin{pmatrix} + % X_1 \\ + % \vdots \\ + % X_N + % \end{pmatrix},\hspace{5mm} + % X_1, \ldots, X_N \text{ iid.} + % \end{gather*} + % \vspace*{-3mm} + % \end{greenblock} + \begin{greenblock}{Likelihood und co. (diskret)} + \vspace*{-10mm} + \begin{align*} + \text{Likelihoodfunktion: } &L_{\bm{x}} (\vartheta) = P\left( + \bm{X} = \bm{x} + \vert \vartheta \right) \\[3mm] + \text{Log-Likelihoodfunktion: } &l_{\bm{x}} + (\vartheta) = \ln \left( L_{\bm{x}} + (\vartheta) \right) \\[3mm] + \text{ML-Schätzer: } &\hat{\vartheta}_\text{ML} = + \argmax_\vartheta + \hspace{2mm} l_{\bm{x}} (\vartheta) + \end{align*} + \vspace*{-6mm} + \end{greenblock} + \begin{greenblock}{Eigenschaften von Schätzern} + \vspace*{-10mm} + \begin{align*} + \text{Erwartungtreue: } & E\left( \hat{\vartheta} + \right) = \vartheta \\ + \text{Konsistenz: } & \lim_{N\rightarrow \infty} + P\left( \lvert \hat{\vartheta} + - \vartheta \rvert \ge \varepsilon + \right) = 0 \\ + \text{Effizienz: } & V(\hat{\vartheta}) = + \frac{1}{J(\vartheta)},\hspace{5mm} J(\vartheta) = - E\left( + \frac{\partial^2}{\partial \vartheta^2} + l_{\bm{x}}(\vartheta) \right) + \end{align*} + \vspace*{-3mm} + \end{greenblock} + \column{\kitthreecolumns} + \begin{greenblock}{Erwartungswert \& Varianz Rechenregeln} + \vspace*{-10mm} + \begin{align*} + E(aX) &= aE(X) \\ + E(X + b) &= E(X) + b \\ + E(X + Y) &= E(X) + E(Y) \\[5mm] + V(aX) &= a^2V(X) \\ + V(X + b) &= E(X) \\ + V(X + Y) &= V(X) + V(Y) + \end{align*} + \vspace*{-8mm} + \end{greenblock} + \begin{greenblock}{Tschebyscheff Ungleichung} + \vspace*{-8mm} + \begin{align*} + P\left( \lvert X - E(X) \rvert \ge \varepsilon \right) \le + \frac{V(X)}{\varepsilon^2} + \end{align*} + \vspace*{-6mm} + \end{greenblock} + \end{columns} +\end{frame} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \subsection{Aufgabe} @@ -447,22 +743,22 @@ \begin{align*} \hspace*{-77mm} L_{\bm{x}}(\lambda) &= P(\bm{X} = \bm{x} | \lambda) = - \prod_{i=1}^{N} P(X_i=x_i | \lambda) = - \prod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!} e^{-\lambda} + \nprod_{i=1}^{N} P(X_i=x_i | \lambda) = + \nprod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!} e^{-\lambda} \end{align*} \vspace*{-3mm} \pause \begin{align*} l_{\bm{x}}(\lambda) &= \ln \left( L_{\bm{x}}(\lambda) \right) = \ln \left( - \prod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!} + \nprod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!} e^{-\lambda} \right) = - \sum_{i=1}^{N}\left[\ln \left( e^{-\lambda} \right) + + \nsum_{i=1}^{N}\left[\ln \left( e^{-\lambda} \right) + \ln \left( \lambda^{x_i} \right) - \ln \left( x_i! \right)\right] - = - N \lambda + \sum_{i=1}^{N} \left[ x_i \ln \left( - \lambda \right) - \sum_{n=1}^{x_i} \ln \left( n + = - N \lambda + \nsum_{i=1}^{N} \left[ x_i \ln \left( + \lambda \right) - \nsum_{n=1}^{x_i} \ln \left( n \right) \right] \end{align*} \vspace*{5mm} @@ -472,18 +768,19 @@ \begin{array}{l} \displaystyle\frac{\partial l_{\bm{x}}(\lambda)}{\lambda} = -N + - \frac{1}{\lambda} \sum_{i=1}^{N} x_i \overset{!}{=} 0 - \Rightarrow \lambda = \frac{\sum_{i=1}^{N} x_i}{N} \\[7mm] + \frac{1}{\lambda} \nsum_{i=1}^{N} x_i \overset{!}{=} 0 + \Rightarrow \lambda = \frac{1}{N} \nsum_{i=1}^{N} + x_i \\[7mm] \displaystyle\frac{\partial^2 l_{\bm{x}}(\lambda)}{\partial - \lambda^2} = - \frac{1}{\lambda^2} \sum_{i=1}^{N} x_i < 0 + \lambda^2} = - \frac{1}{\lambda^2} \nsum_{i=1}^{N} x_i < 0 \end{array} % tex-fmt: off \right\} % tex-fmt: on \Rightarrow \hat{\lambda}_\text{ML} = \argmax_\lambda \hspace{2mm} l_{\bm{x}}(\lambda) = - \frac{\sum_{i=1}^{N} x_i}{N} + \frac{1}{N} \nsum_{i=1}^{N} x_i % % \hat{\lambda}_\text{ML} = \argmax_\lambda % \hspace{2mm} \ln \left( l_{\bm{x}} (\lambda) \right) @@ -507,8 +804,8 @@ \pause \begin{gather*} E(\hat{\lambda}_\text{ML}) = E \left(\frac{1}{N} - \sum_{i=1}^{N} X_i \right) - = \frac{1}{N} \sum_{i=1}^{N} E(X_i) = \frac{1}{N} + \nsum_{i=1}^{N} X_i \right) + = \frac{1}{N} \nsum_{i=1}^{N} E(X_i) = \frac{1}{N} \cdot N \lambda = \lambda \hspace{7mm}\Rightarrow\hspace{7mm} \text{Schätzer ist erwartungstreu} @@ -523,7 +820,7 @@ \begin{minipage}{0.16\textwidth} \begin{gather*} E\left( \lvert \hat{\lambda}_\text{ML} - \lambda - \rvert > \varepsilon + \rvert \ge \varepsilon \right) \end{gather*} \end{minipage}% @@ -532,7 +829,7 @@ \begin{gather*} = E\left( \lvert \hat{\lambda}_\text{ML} - E\left(\hat{\lambda}_\text{ML}\right) \rvert - > \varepsilon + \ge \varepsilon \right) \le \frac{V\left(\hat{\lambda}_\text{ML}\right)}{\varepsilon^2} @@ -542,8 +839,8 @@ \pause \begin{gather*} V\left(\hat{\lambda}_\text{ML}\right) = V \left( - \frac{1}{N} \sum_{i=1}^{N} X_i \right) = - \frac{1}{N^2} \sum_{i=1}^{N} V(X_i) = + \frac{1}{N} \nsum_{i=1}^{N} X_i \right) = + \frac{1}{N^2} \nsum_{i=1}^{N} V(X_i) = \frac{N\lambda}{N^2} = \frac{\lambda}{N} \end{gather*} \pause @@ -564,10 +861,10 @@ \begin{gather*} J\left( \lambda \right) = - E \left( - \frac{\partial^2}{\partial \lambda^2} l_{\bm{x}} + \frac{\partial^2}{\partial \lambda^2} l_{\bm{X}} (\lambda) \right) - = - E \left( \frac{1}{\lambda^2} \sum_{i=1}^{N} X_i \right) - = \frac{1}{\lambda^2} \sum_{i=1}^{N} E\left( X_i + = E \left( \frac{1}{\lambda^2} \nsum_{i=1}^{N} X_i \right) + = \frac{1}{\lambda^2} \nsum_{i=1}^{N} E\left( X_i \right) = \frac{N}{\lambda} \end{gather*} \pause @@ -786,8 +1083,8 @@ \vspace*{-8mm} \pause \begin{gather*} - \overline{z} = \frac{1}{N} \sum_{i=1}^{N} z_{1,i} = 25 \\ - s^2 = \frac{1}{N-1} \sum_{i=1}^{N} \left( z_{1,i} - + \overline{z} = \frac{1}{N} \nsum_{i=1}^{N} z_{1,i} = 25 \\ + s^2 = \frac{1}{N-1} \nsum_{i=1}^{N} \left( z_{1,i} - \overline{z} \right)^2 = 4 \end{gather*} \end{minipage}% @@ -811,8 +1108,8 @@ \vspace*{-8mm} \pause \begin{gather*} - \overline{z} = \frac{1}{N} \sum_{i=1}^{N} z_{1,i} = 34{,}875 \\ - s^2 = \frac{1}{N-1} \sum_{i=1}^{N} \left( z_{1,i} - + \overline{z} = \frac{1}{N} \nsum_{i=1}^{N} z_{1,i} = 34{,}875 \\ + s^2 = \frac{1}{N-1} \nsum_{i=1}^{N} \left( z_{1,i} - \overline{z} \right)^2 \approx 1525{,}84 \end{gather*} \end{minipage}