1579 lines
51 KiB
TeX
1579 lines
51 KiB
TeX
\ifdefined\ishandout
|
|
\documentclass[de, handout]{CELbeamer}
|
|
\else
|
|
\documentclass[de]{CELbeamer}
|
|
\fi
|
|
|
|
%
|
|
%
|
|
% CEL Template
|
|
%
|
|
%
|
|
|
|
\newcommand{\templates}{preambles}
|
|
\input{\templates/packages.tex}
|
|
\input{\templates/macros.tex}
|
|
|
|
\grouplogo{CEL_logo.pdf}
|
|
|
|
\groupname{Communication Engineering Lab (CEL)}
|
|
\groupnamewidth{80mm}
|
|
|
|
\fundinglogos{}
|
|
|
|
%
|
|
%
|
|
% Document setup
|
|
%
|
|
%
|
|
|
|
\usepackage{tikz}
|
|
\usepackage{tikz-3dplot}
|
|
\usetikzlibrary{spy, external, intersections, positioning}
|
|
|
|
% \ifdefined\ishandout\else
|
|
% \tikzexternalize
|
|
% \fi
|
|
|
|
\usepackage{pgfplots}
|
|
\pgfplotsset{compat=newest}
|
|
\usepgfplotslibrary{fillbetween}
|
|
\usepgfplotslibrary{groupplots}
|
|
\usepgfplotslibrary{statistics}
|
|
|
|
\usepackage{enumerate}
|
|
\usepackage{listings}
|
|
\usepackage{subcaption}
|
|
\usepackage{bbm}
|
|
\usepackage{multirow}
|
|
\usepackage{xcolor}
|
|
\usepackage{amsmath}
|
|
\usepackage{graphicx}
|
|
\usepackage{calc}
|
|
\usepackage{amssymb}
|
|
|
|
\title{WT Tutorium 7}
|
|
\author[Tsouchlos]{Andreas Tsouchlos}
|
|
\date[]{13. Februar 2026}
|
|
|
|
%
|
|
%
|
|
% Custom commands
|
|
%
|
|
%
|
|
|
|
\input{lib/latex-common/common.tex}
|
|
\pgfplotsset{colorscheme/rocket}
|
|
|
|
\newcommand{\res}{src/2026-02-13/res}
|
|
|
|
\newlength{\depthofsumsign}
|
|
\setlength{\depthofsumsign}{\depthof{$\sum$}}
|
|
\newlength{\totalheightofsumsign}
|
|
\newcommand{\nsum}[1][1.4]{
|
|
\mathop{
|
|
\raisebox
|
|
{-#1\depthofsumsign+1\depthofsumsign}
|
|
{\scalebox
|
|
{#1}
|
|
{$\displaystyle\sum$}%
|
|
}
|
|
}
|
|
}
|
|
|
|
\newlength{\depthofprodsign}
|
|
\setlength{\depthofprodsign}{\depthof{$\prod$}}
|
|
\newlength{\totalheightofprodsign}
|
|
\newcommand{\nprod}[1][1.4]{
|
|
\mathop{
|
|
\raisebox
|
|
{-#1\depthofprodsign+1\depthofprodsign}
|
|
{\scalebox
|
|
{#1}
|
|
{$\displaystyle\prod$}%
|
|
}
|
|
}
|
|
}
|
|
|
|
% \tikzstyle{every node}=[font=\small]
|
|
% \captionsetup[sub]{font=small}
|
|
|
|
\newlength{\hght}
|
|
\newlength{\wdth}
|
|
|
|
\newcommand{\canceltotikz}[3][.5ex]{
|
|
\setlength{\hght}{\heightof{$#3$}}
|
|
\setlength{\wdth}{\widthof{$#3$}}
|
|
\makebox[0pt][l]{
|
|
\tikz[baseline]{\draw[-latex](0,-#1)--(\wdth,\hght+#1)
|
|
node[shift={(2mm,2mm)}]{#2};
|
|
}
|
|
}#3
|
|
}
|
|
|
|
%
|
|
%
|
|
% Document body
|
|
%
|
|
%
|
|
|
|
\begin{document}
|
|
|
|
\begin{frame}[title white vertical, picture=images/IMG_7801-cut]
|
|
\titlepage
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{Aufgabe 1}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{Theorie Wiederholung}
|
|
|
|
\ifdefined\ishandout
|
|
\begin{frame}
|
|
\frametitle{Wahrscheinlichkeitstheorie und Statistik}
|
|
|
|
\vspace*{-5mm}
|
|
|
|
\begin{itemize}
|
|
\item Einfache Stichprobe
|
|
\begin{gather*}
|
|
X_1, \ldots, X_N
|
|
\hspace{2mm}\overbrace{\text{unabhängig und haben
|
|
dieselbe Verteilung}}^{\text{``iid.''}}
|
|
\hspace*{5mm} \rightarrow\hspace*{5mm}
|
|
\bm{X} :=
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
\end{itemize}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\begin{subfigure}{0.5\textwidth}
|
|
\centering
|
|
|
|
\begin{itemize}
|
|
\item Wahrscheinlichkeitstheorie
|
|
\end{itemize}
|
|
\vspace*{2mm}
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
minimum width=7cm, minimum height=4cm,
|
|
line width=1pt,
|
|
draw=kit-blue, fill=kit-blue!20,
|
|
] (model) {
|
|
$\bm{X} =
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}\sim P_{\bm{X}}$
|
|
};
|
|
|
|
\node[right=of model] (x) {
|
|
$\bm{x} =
|
|
\begin{pmatrix}
|
|
x_1 \\
|
|
\vdots \\
|
|
x_N
|
|
\end{pmatrix}$
|
|
};
|
|
|
|
\draw[-{Latex}, line width=1pt] (model) -- (x);
|
|
\node[above=22mm of model.center] {Modell};
|
|
\node[above=20.8mm of x.center] {Beobachtung};
|
|
\end{tikzpicture}%
|
|
\vspace*{15mm}
|
|
\end{subfigure}%
|
|
\vspace*{-12.6mm}%
|
|
\begin{subfigure}{0.5\textwidth}
|
|
\centering
|
|
|
|
\begin{itemize}
|
|
\item Statistik
|
|
\end{itemize}
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
minimum width=7.5cm, minimum height=4.5cm,
|
|
line width=1pt,
|
|
draw=kit-orange, fill=kit-orange!20,
|
|
] (real) {};
|
|
|
|
\node[right=of real] (x) {
|
|
$\bm{x} =
|
|
\begin{pmatrix}
|
|
x_1 \\
|
|
\vdots \\
|
|
x_N
|
|
\end{pmatrix}$
|
|
};
|
|
|
|
\draw[-{Latex}, line width=1pt] (real) -- (x);
|
|
\node[above=23mm of real.center] {``Echte Welt''};
|
|
\node[above=21.8mm of x.center] {Beobachtung};
|
|
|
|
\node[
|
|
rectangle,
|
|
minimum width=6.5cm, minimum height=3.5cm,
|
|
line width=1pt,
|
|
draw=kit-blue, fill=kit-blue!20,
|
|
densely dashed,
|
|
] (model) at (real) {
|
|
$\bm{X} =
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}\sim P_{\bm{X}}$
|
|
};
|
|
|
|
\draw[
|
|
line width=1pt, densely dashed,
|
|
] (x.south)
|
|
edge[-{Latex}, bend left]
|
|
node[below] {Modellierung}
|
|
(model.south);
|
|
\end{tikzpicture}
|
|
\vspace*{1mm}
|
|
\end{subfigure}
|
|
\end{figure}
|
|
\end{frame}
|
|
\else
|
|
\begin{frame}
|
|
\frametitle{Wahrscheinlichkeitstheorie und Statistik}
|
|
|
|
\vspace*{-5mm}
|
|
|
|
\begin{itemize}
|
|
\item Einfache Stichprobe
|
|
\begin{gather*}
|
|
X_1, \ldots, X_N
|
|
\hspace{2mm}\overbrace{\text{unabhängig und haben
|
|
dieselbe Verteilung}}^{\text{``iid.''}}
|
|
\hspace*{5mm} \rightarrow\hspace*{5mm}
|
|
\bm{X} :=
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
\end{itemize}
|
|
|
|
\pause
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\begin{subfigure}{0.5\textwidth}
|
|
\centering
|
|
|
|
\begin{itemize}
|
|
\item Wahrscheinlichkeitstheorie
|
|
\end{itemize}
|
|
\vspace*{2mm}
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
minimum width=7cm, minimum height=4cm,
|
|
line width=1pt,
|
|
draw=kit-blue, fill=kit-blue!20,
|
|
] (model) {
|
|
$\bm{X} =
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}\sim P_{\bm{X}}$
|
|
};
|
|
|
|
\node[right=of model] (x) {
|
|
$\bm{x} =
|
|
\begin{pmatrix}
|
|
x_1 \\
|
|
\vdots \\
|
|
x_N
|
|
\end{pmatrix}$
|
|
};
|
|
|
|
\draw[-{Latex}, line width=1pt] (model) -- (x);
|
|
\node[above=22mm of model.center] {Modell};
|
|
\node[above=20.8mm of x.center] {Beobachtung};
|
|
\end{tikzpicture}%
|
|
\vspace*{15mm}
|
|
\end{subfigure}%
|
|
\only<2>{\hspace*{16cm}}%
|
|
\only<3->{\vspace*{-12.6mm}}%
|
|
\begin{subfigure}{0.5\textwidth}
|
|
\centering
|
|
|
|
\only<3>{
|
|
\begin{itemize}
|
|
\item Statistik
|
|
\end{itemize}
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
minimum width=7.5cm, minimum height=4.5cm,
|
|
line width=1pt,
|
|
draw=kit-orange, fill=kit-orange!20,
|
|
] (real) {};
|
|
|
|
\node[right=of real] (x) {
|
|
$\bm{x} =
|
|
\begin{pmatrix}
|
|
x_1 \\
|
|
\vdots \\
|
|
x_N
|
|
\end{pmatrix}$
|
|
};
|
|
|
|
\draw[-{Latex}, line width=1pt] (real) -- (x);
|
|
\node[above=23mm of real.center] {``Echte Welt''};
|
|
\node[above=21.8mm of x.center] {Beobachtung};
|
|
|
|
\node[below=25.5mm of real.center]
|
|
{\phantom{Modellierung}};
|
|
\end{tikzpicture}
|
|
}%
|
|
\only<4->{
|
|
\begin{itemize}
|
|
\item Statistik
|
|
\end{itemize}
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
minimum width=7.5cm, minimum height=4.5cm,
|
|
line width=1pt,
|
|
draw=kit-orange, fill=kit-orange!20,
|
|
] (real) {};
|
|
|
|
\node[right=of real] (x) {
|
|
$\bm{x} =
|
|
\begin{pmatrix}
|
|
x_1 \\
|
|
\vdots \\
|
|
x_N
|
|
\end{pmatrix}$
|
|
};
|
|
|
|
\draw[-{Latex}, line width=1pt] (real) -- (x);
|
|
\node[above=23mm of real.center] {``Echte Welt''};
|
|
\node[above=21.8mm of x.center] {Beobachtung};
|
|
|
|
\node[
|
|
rectangle,
|
|
minimum width=6.5cm, minimum height=3.5cm,
|
|
line width=1pt,
|
|
draw=kit-blue, fill=kit-blue!20,
|
|
densely dashed,
|
|
] (model) at (real) {
|
|
$\bm{X} =
|
|
\begin{pmatrix}
|
|
X_1 \\
|
|
\vdots \\
|
|
X_N
|
|
\end{pmatrix}\sim P_{\bm{X}}$
|
|
};
|
|
|
|
\draw[
|
|
line width=1pt, densely dashed,
|
|
] (x.south)
|
|
edge[-{Latex}, bend left]
|
|
node[below] {Modellierung}
|
|
(model.south);
|
|
\end{tikzpicture}
|
|
}
|
|
\vspace*{1mm}
|
|
\end{subfigure}
|
|
\only<3->{
|
|
\vspace*{12.5mm}
|
|
}
|
|
\end{figure}
|
|
\end{frame}
|
|
\fi
|
|
|
|
\ifdefined\ishandout
|
|
\begin{frame}
|
|
\frametitle{Punktschätzer}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Beispiel: Temperaturschätzung
|
|
\vspace*{-5mm}
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (x) {
|
|
$
|
|
\bm{x} =
|
|
\begin{pmatrix}
|
|
26{,}2 \\
|
|
27{,}8 \\
|
|
25{,}7 \\
|
|
\vdots
|
|
\end{pmatrix}
|
|
$
|
|
};
|
|
|
|
\node[
|
|
rectangle,
|
|
right=of x,
|
|
minimum width=5cm, minimum height=2cm,
|
|
draw=kit-green, fill=kit-green!20,
|
|
line width=1pt,
|
|
align=center,
|
|
inner sep=3mm
|
|
] (est) {Schätzer\\[5mm] $T_N(\bm{x}) =
|
|
\displaystyle\frac{1}{N}
|
|
\nsum_{i=0}^{N} x_i$};
|
|
|
|
\node[
|
|
above=of est,
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (model) {
|
|
$X_i \sim \mathcal{N}(\mu = \vartheta,
|
|
\sigma^2 = 1)$
|
|
};
|
|
|
|
\node[right=of est] (theta) {$\hat{\vartheta}
|
|
= 26{,}0$};
|
|
|
|
\node[below] at (x.south) {Beobachtung};
|
|
\node[above] at (model.north) {Parametrisiertes Modell};
|
|
|
|
\draw[-{Latex}, line width=1pt] (x) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (model) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (model) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (est) -- (theta);
|
|
\end{tikzpicture}
|
|
\end{figure}
|
|
\item Punktschätzer: Rechenvorschrift zur Berechnung von
|
|
Parametern aus Beobachtungen \\
|
|
$\rightarrow$ Schätzer hängen von den Realisierungen ab
|
|
und sind damit selbst auch zufällig \\
|
|
$\rightarrow$ Schätzer haben einen Erwartungswert und eine Varianz
|
|
\end{itemize}
|
|
\end{frame}
|
|
\else
|
|
\begin{frame}
|
|
\frametitle{Punktschätzer}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Beispiel: Temperaturschätzung
|
|
\vspace*{-5mm}
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\only<1>{
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (x) {
|
|
$
|
|
\bm{x} =
|
|
\begin{pmatrix}
|
|
26{,}2 \\
|
|
27{,}8 \\
|
|
25{,}7 \\
|
|
\vdots
|
|
\end{pmatrix}
|
|
$
|
|
};
|
|
|
|
\node[
|
|
draw opacity=0,
|
|
fill opacity=0,
|
|
rectangle,
|
|
right=of x,
|
|
minimum width=5cm, minimum height=2cm,
|
|
draw=kit-green, fill=kit-green!20,
|
|
line width=1pt,
|
|
align=center,
|
|
inner sep=3mm
|
|
] (est) {Schätzer\\[5mm] $T_N(\bm{x}) =
|
|
\displaystyle\frac{1}{N}
|
|
\nsum_{i=0}^{N} x_i$};
|
|
|
|
\node[
|
|
draw opacity=0,
|
|
fill opacity=0,
|
|
above=of est,
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (model) {
|
|
$X_i \sim \mathcal{N}(\mu = \vartheta,
|
|
\sigma^2 = 1)$
|
|
};
|
|
|
|
\node[right=of est, draw opacity=0, fill
|
|
opacity=0] (theta) {$\hat{\vartheta} = 26{,}0$};
|
|
|
|
\node[below] at (x.south) {Beobachtung};
|
|
\node[above, draw opacity=0, fill opacity=0]
|
|
at (model.north) {Parametrisiertes Modell};
|
|
\end{tikzpicture}
|
|
}%
|
|
\only<2>{
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (x) {
|
|
$
|
|
\bm{x} =
|
|
\begin{pmatrix}
|
|
26{,}2 \\
|
|
27{,}8 \\
|
|
25{,}7 \\
|
|
\vdots
|
|
\end{pmatrix}
|
|
$
|
|
};
|
|
|
|
\node[
|
|
draw opacity=0,
|
|
fill opacity=0,
|
|
rectangle,
|
|
right=of x,
|
|
minimum width=5cm, minimum height=2cm,
|
|
draw=kit-green, fill=kit-green!20,
|
|
line width=1pt,
|
|
align=center,
|
|
inner sep=3mm
|
|
] (est) {Schätzer\\[5mm] $T_N(\bm{x}) =
|
|
\displaystyle\frac{1}{N}
|
|
\nsum_{i=0}^{N} x_i$};
|
|
|
|
\node[
|
|
above=of est,
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (model) {
|
|
$X_i \sim \mathcal{N}(\mu = \vartheta,
|
|
\sigma^2 = 1)$
|
|
};
|
|
|
|
\node[right=of est, draw opacity=0, fill
|
|
opacity=0] (theta) {$\hat{\vartheta}
|
|
= 26{,}0$};
|
|
|
|
\node[below] at (x.south) {Beobachtung};
|
|
\node[above] at (model.north) {Parametrisiertes Modell};
|
|
\end{tikzpicture}
|
|
}%
|
|
\only<3->{
|
|
\begin{tikzpicture}
|
|
\node[
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (x) {
|
|
$
|
|
\bm{x} =
|
|
\begin{pmatrix}
|
|
26{,}2 \\
|
|
27{,}8 \\
|
|
25{,}7 \\
|
|
\vdots
|
|
\end{pmatrix}
|
|
$
|
|
};
|
|
|
|
\node[
|
|
rectangle,
|
|
right=of x,
|
|
minimum width=5cm, minimum height=2cm,
|
|
draw=kit-green, fill=kit-green!20,
|
|
line width=1pt,
|
|
align=center,
|
|
inner sep=3mm
|
|
] (est) {Schätzer\\[5mm] $T_N(\bm{x}) =
|
|
\displaystyle\frac{1}{N}
|
|
\nsum_{i=0}^{N} x_i$};
|
|
|
|
\node[
|
|
above=of est,
|
|
rectangle,
|
|
densely dashed,
|
|
draw,
|
|
inner sep=5mm,
|
|
] (model) {
|
|
$X_i \sim \mathcal{N}(\mu = \vartheta,
|
|
\sigma^2 = 1)$
|
|
};
|
|
|
|
\node[right=of est] (theta) {$\hat{\vartheta}
|
|
= 26{,}0$};
|
|
|
|
\node[below] at (x.south) {Beobachtung};
|
|
\node[above] at (model.north) {Parametrisiertes Modell};
|
|
|
|
\draw[-{Latex}, line width=1pt] (x) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (model) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (model) -- (est);
|
|
\draw[-{Latex}, line width=1pt] (est) -- (theta);
|
|
\end{tikzpicture}
|
|
}
|
|
\end{figure}
|
|
\pause
|
|
\pause
|
|
\item Punktschätzer: Rechenvorschrift zur Berechnung von
|
|
Parametern aus Beobachtungen \\
|
|
\pause
|
|
$\rightarrow$ Schätzer hängen von den Realisierungen ab
|
|
und sind damit selbst auch zufällig \\
|
|
$\rightarrow$ Schätzer haben einen Erwartungswert und eine Varianz
|
|
\end{itemize}
|
|
\end{frame}
|
|
\fi
|
|
|
|
\begin{frame}
|
|
\frametitle{Likelihood und Log-Likelihood (Diskret)}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Maximum Likelihood (ML) Schätzer\\
|
|
\begin{minipage}{0.21\textwidth}
|
|
\phantom{a}
|
|
\end{minipage}
|
|
\begin{minipage}{0.16\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
\hat{\vartheta}_\text{ML}
|
|
= \argmax_\vartheta \hspace{2mm} P(\bm{X} = \bm{x}
|
|
\vert \vartheta)
|
|
\end{align*}
|
|
\end{minipage}%
|
|
\visible<2->{
|
|
\begin{minipage}{0.15\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
\hspace*{-3mm} = \argmax_\vartheta
|
|
\hspace{2mm} L_{\bm{x}} (\vartheta)
|
|
\end{align*}
|
|
\end{minipage}%
|
|
}
|
|
\visible<3->{
|
|
\begin{minipage}{0.13\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
\hspace*{-10mm} = \argmax_\vartheta
|
|
\hspace{2mm} l_{\bm{x}} (\vartheta)
|
|
\end{align*}
|
|
\end{minipage}%
|
|
}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
``Welches $\vartheta$ maximiert die
|
|
Wahrscheinlichkeit die beobachtete Realisierung zu bekommen?''
|
|
\end{figure}
|
|
\pause
|
|
\item Likelihoodfunktion
|
|
\end{itemize}
|
|
|
|
\vspace*{5mm}
|
|
|
|
\begin{minipage}{0.5\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
L_{\bm{x}}(\vartheta) = P(\bm{X} = \bm{x} \vert
|
|
\vartheta) \overset{X_i \text{
|
|
iid.}}{=\joinrel=\joinrel=} \nprod_{i=1}^{N}
|
|
P(X_i = x_i \vert \vartheta)
|
|
\end{align*}
|
|
\end{minipage}%
|
|
\begin{minipage}{0.5\textwidth}
|
|
\centering
|
|
\begin{lightgrayhighlightbox}
|
|
\vspace*{-3mm}
|
|
Beispiel
|
|
|
|
\vspace*{-10mm}
|
|
\begin{gather*}
|
|
X_i \sim \text{\normalfont Binomial} (p = \vartheta, K) \\
|
|
L_{\bm{x}}(\vartheta) = P(\bm{X}=\bm{x} \vert \vartheta) =
|
|
\nprod_{i=1}^{N}
|
|
\binom{K}{x_i}\vartheta^{x_i}(1-\vartheta)^{K-x_i}
|
|
\end{gather*}
|
|
\vspace*{-10mm}
|
|
\end{lightgrayhighlightbox}
|
|
\end{minipage}%
|
|
|
|
\vspace*{5mm}
|
|
|
|
\begin{itemize}
|
|
\pause
|
|
\item Log-Likelihoodfunktion
|
|
\begin{align*}
|
|
l_{\bm{x}}(\vartheta) = \ln \left( L_{\bm{x}}(\vartheta) \right)
|
|
\end{align*}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Eigenschaften von Punktschätzern}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Erwartungtreue
|
|
\begin{gather*}
|
|
E(\hat{\vartheta}) = E\big( T_N(\bm{X}) \big) = \vartheta
|
|
\end{gather*}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
``Im Mittel gibt der Schätzer der richtigen Wert zurück''
|
|
\end{figure}
|
|
|
|
\vspace*{10mm}
|
|
\pause
|
|
\item Konsistenz
|
|
\begin{gather*}
|
|
\lim_{N\rightarrow \infty} P \big( \lvert
|
|
\hat{\vartheta} - \vartheta \rvert \ge \varepsilon \big) = 0
|
|
\end{gather*}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
``Der Schätzer streut weniger, je mehr Realisierungen
|
|
betrachtet werden''
|
|
\end{figure}
|
|
|
|
\vspace*{10mm}
|
|
\pause
|
|
\item Effizienz (für erwartungtreue Schätzer)
|
|
\begin{minipage}{0.68\textwidth}
|
|
\begin{gather*}
|
|
V(\hat{\vartheta}) = \frac{1}{J(\vartheta)},
|
|
\hspace*{5mm} J(\vartheta) = - E\left(
|
|
\frac{\partial^2}{\partial \vartheta^2}
|
|
l_{\bm{X}}(\vartheta)
|
|
\right)
|
|
\end{gather*}
|
|
\begin{figure}[H]
|
|
\centering
|
|
``Für jedes fixe N hat der Schätzer jeweils die
|
|
kleinstmögliche Varianz''
|
|
\end{figure}
|
|
\end{minipage}%
|
|
\begin{minipage}{0.3\textwidth}
|
|
\begin{lightgrayhighlightbox}
|
|
Cramér-Rao Ungleichung \\
|
|
\vspace*{-6mm}
|
|
\begin{gather*}
|
|
V(\hat{\vartheta}) \ge \frac{1}{J(\vartheta)}
|
|
\end{gather*}
|
|
\vspace*{-10mm}
|
|
\end{lightgrayhighlightbox}
|
|
\end{minipage}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Zusammenfassung}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{columns}
|
|
\column{\kitthreecolumns}
|
|
% \begin{greenblock}{Einfache Stichprobe}
|
|
% \vspace*{-8mm}
|
|
% \begin{gather*}
|
|
% \bm{X} =
|
|
% \begin{pmatrix}
|
|
% X_1 \\
|
|
% \vdots \\
|
|
% X_N
|
|
% \end{pmatrix},\hspace{5mm}
|
|
% X_1, \ldots, X_N \text{ iid.}
|
|
% \end{gather*}
|
|
% \vspace*{-3mm}
|
|
% \end{greenblock}
|
|
\begin{greenblock}{Likelihood und co. (diskret)}
|
|
\vspace*{-10mm}
|
|
\begin{align*}
|
|
\text{Likelihoodfunktion: } &L_{\bm{x}} (\vartheta) = P\left(
|
|
\bm{X} = \bm{x}
|
|
\vert \vartheta \right) \\[3mm]
|
|
\text{Log-Likelihoodfunktion: } &l_{\bm{x}}
|
|
(\vartheta) = \ln \left( L_{\bm{x}}
|
|
(\vartheta) \right) \\[3mm]
|
|
\text{ML-Schätzer: } &\hat{\vartheta}_\text{ML} =
|
|
\argmax_\vartheta
|
|
\hspace{2mm} l_{\bm{x}} (\vartheta)
|
|
\end{align*}
|
|
\vspace*{-6mm}
|
|
\end{greenblock}
|
|
\begin{greenblock}{Eigenschaften von Schätzern}
|
|
\vspace*{-10mm}
|
|
\begin{align*}
|
|
\text{Erwartungtreue: } & E\left( \hat{\vartheta}
|
|
\right) = \vartheta \\
|
|
\text{Konsistenz: } & \lim_{N\rightarrow \infty}
|
|
P\left( \lvert \hat{\vartheta}
|
|
- \vartheta \rvert \ge \varepsilon
|
|
\right) = 0 \\
|
|
\text{Effizienz: } & V(\hat{\vartheta}) =
|
|
\frac{1}{J(\vartheta)},\hspace{5mm} J(\vartheta) = - E\left(
|
|
\frac{\partial^2}{\partial \vartheta^2}
|
|
l_{\bm{x}}(\vartheta) \right)
|
|
\end{align*}
|
|
\vspace*{-3mm}
|
|
\end{greenblock}
|
|
\column{\kitthreecolumns}
|
|
\begin{greenblock}{Erwartungswert \& Varianz Rechenregeln}
|
|
\vspace*{-10mm}
|
|
\begin{align*}
|
|
E(aX) &= aE(X) \\
|
|
E(X + b) &= E(X) + b \\
|
|
E(X + Y) &= E(X) + E(Y) \\[5mm]
|
|
V(aX) &= a^2V(X) \\
|
|
V(X + b) &= E(X) \\
|
|
V(X + Y) &= V(X) + V(Y)
|
|
\end{align*}
|
|
\vspace*{-8mm}
|
|
\end{greenblock}
|
|
\begin{greenblock}{Tschebyscheff Ungleichung}
|
|
\vspace*{-8mm}
|
|
\begin{align*}
|
|
P\left( \lvert X - E(X) \rvert \ge \varepsilon \right) \le
|
|
\frac{V(X)}{\varepsilon^2}
|
|
\end{align*}
|
|
\vspace*{-6mm}
|
|
\end{greenblock}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{Aufgabe}
|
|
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 1: Punktschätzer}
|
|
|
|
Die Anzahl der Studierenden, die zur Mittagszeit in der KIT-Mensa
|
|
essen gehen, sei näherungsweise Poissonverteilt mit unbekanntem
|
|
Parameter $\lambda > 0$, wobei $\lambda$ die mittlere Ankunftsrate an
|
|
Studierenden pro Minute ist.
|
|
\begin{gather*}
|
|
X_i \sim \text{Poisson}(\lambda),\hspace*{10mm} P(X_i = k
|
|
\vert \lambda) = \frac{\lambda^k}{k!}
|
|
e^{-\lambda},\hspace*{3mm} k\in \mathbb{N}_0
|
|
\end{gather*}
|
|
Aus N statistisch unabhängigen Messungen $x_i$ soll nun die mittlere
|
|
Ankunftsrate mithilfe eines ML-Schätzers geschätzt werden.
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\item Bestimmen Sie die Log-Likelihoodfunktion für $N$
|
|
Messwerte und damit den ML-Schätzer für die
|
|
Ankunftsrate $\lambda$.
|
|
\item Zeigen Sie, dass der Schätzer erwartungstreu ist.
|
|
\item Ist der ML-Schätzer konsistent?
|
|
\item Ist der ML-Schätzer effizient?
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 1: Punktschätzer}
|
|
|
|
\vspace*{-15mm}
|
|
|
|
Die Anzahl der Studierenden, die zur Mittagszeit in der KIT-Mensa
|
|
essen gehen, sei näherungsweise Poissonverteilt mit unbekanntem
|
|
Parameter $\lambda > 0$, wobei $\lambda$ die mittlere Ankunftsrate an
|
|
Studierenden pro Minute ist.
|
|
\begin{gather*}
|
|
X_i \sim \text{Poisson}(\lambda),\hspace*{10mm} P(X_i = k
|
|
\vert \lambda) = \frac{\lambda^k}{k!}
|
|
e^{-\lambda},\hspace*{3mm} k\in \mathbb{N}_0
|
|
\end{gather*}
|
|
Aus N statistisch unabhängigen Messungen $x_i$ soll nun die mittlere
|
|
Ankunftsrate mithilfe eines ML-Schätzers geschätzt werden.
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\item Bestimmen Sie die Log-Likelihoodfunktion für $N$
|
|
Messwerte und damit den ML-Schätzer für die
|
|
Ankunftsrate $\lambda$.
|
|
\vspace*{5mm}
|
|
\pause
|
|
\begin{align*}
|
|
\hspace*{-77mm}
|
|
L_{\bm{x}}(\lambda) &= P(\bm{X} = \bm{x} | \lambda) =
|
|
\nprod_{i=1}^{N} P(X_i=x_i | \lambda) =
|
|
\nprod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!} e^{-\lambda}
|
|
\end{align*}
|
|
\vspace*{-3mm}
|
|
\pause
|
|
\begin{align*}
|
|
l_{\bm{x}}(\lambda) &= \ln \left(
|
|
L_{\bm{x}}(\lambda) \right) = \ln \left(
|
|
\nprod_{i=1}^{N} \frac{\lambda^{x_i}}{x_i!}
|
|
e^{-\lambda} \right)
|
|
=
|
|
\nsum_{i=1}^{N}\left[\ln \left( e^{-\lambda} \right) +
|
|
\ln \left( \lambda^{x_i} \right)
|
|
- \ln \left( x_i! \right)\right]
|
|
= - N \lambda + \nsum_{i=1}^{N} \left[ x_i \ln \left(
|
|
\lambda \right) - \nsum_{n=1}^{x_i} \ln \left( n
|
|
\right) \right]
|
|
\end{align*}
|
|
\vspace*{5mm}
|
|
\pause
|
|
\begin{gather*}
|
|
\left.
|
|
\begin{array}{l}
|
|
\displaystyle\frac{\partial
|
|
l_{\bm{x}}(\lambda)}{\lambda} = -N +
|
|
\frac{1}{\lambda} \nsum_{i=1}^{N} x_i \overset{!}{=} 0
|
|
\Rightarrow \lambda = \frac{1}{N} \nsum_{i=1}^{N}
|
|
x_i \\[7mm]
|
|
\displaystyle\frac{\partial^2
|
|
l_{\bm{x}}(\lambda)}{\partial
|
|
\lambda^2} = - \frac{1}{\lambda^2} \nsum_{i=1}^{N} x_i < 0
|
|
\end{array}
|
|
% tex-fmt: off
|
|
\right\}
|
|
% tex-fmt: on
|
|
\Rightarrow \hat{\lambda}_\text{ML} =
|
|
\argmax_\lambda \hspace{2mm} l_{\bm{x}}(\lambda) =
|
|
\frac{1}{N} \nsum_{i=1}^{N} x_i
|
|
%
|
|
% \hat{\lambda}_\text{ML} = \argmax_\lambda
|
|
% \hspace{2mm} \ln \left( l_{\bm{x}} (\lambda) \right)
|
|
\end{gather*}
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
% TODO: Erwartungswert Rechenregeln in Zusammenfassung
|
|
% TODO: Tschebyscheff Ungleichung in Theorie und Zusammenfassung
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 1: Punktschätzer}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\setcounter{enumi}{1}
|
|
\item Zeigen Sie, dass der Schätzer erwartungstreu ist.
|
|
\pause
|
|
\begin{gather*}
|
|
E(\hat{\lambda}_\text{ML}) = E \left(\frac{1}{N}
|
|
\nsum_{i=1}^{N} X_i \right)
|
|
= \frac{1}{N} \nsum_{i=1}^{N} E(X_i) = \frac{1}{N}
|
|
\cdot N \lambda = \lambda
|
|
\hspace{7mm}\Rightarrow\hspace{7mm} \text{Schätzer
|
|
ist erwartungstreu}
|
|
\end{gather*}
|
|
\pause
|
|
\vspace*{-5mm}
|
|
\item Ist der ML-Schätzer konsistent? \\[-5mm]
|
|
\pause
|
|
\begin{minipage}{0.24\textwidth}
|
|
\phantom{a}
|
|
\end{minipage}
|
|
\begin{minipage}{0.16\textwidth}
|
|
\begin{gather*}
|
|
P\left( \lvert \hat{\lambda}_\text{ML} - \lambda
|
|
\rvert \ge \varepsilon
|
|
\right)
|
|
\end{gather*}
|
|
\end{minipage}%
|
|
\pause %
|
|
\begin{minipage}{0.22\textwidth}
|
|
\begin{gather*}
|
|
= P\left( \lvert \hat{\lambda}_\text{ML} -
|
|
E\left(\hat{\lambda}_\text{ML}\right) \rvert
|
|
\ge \varepsilon
|
|
\right)
|
|
\le
|
|
\frac{V\left(\hat{\lambda}_\text{ML}\right)}{\varepsilon^2}
|
|
\end{gather*}
|
|
\end{minipage} \\[2mm]
|
|
|
|
\pause
|
|
\begin{gather*}
|
|
V\left(\hat{\lambda}_\text{ML}\right) = V \left(
|
|
\frac{1}{N} \nsum_{i=1}^{N} X_i \right) =
|
|
\frac{1}{N^2} \nsum_{i=1}^{N} V(X_i) =
|
|
\frac{N\lambda}{N^2} = \frac{\lambda}{N}
|
|
\end{gather*}
|
|
\pause
|
|
\begin{gather*}
|
|
P\left( \lvert \hat{\lambda}_\text{ML} - \lambda
|
|
\rvert \ge \varepsilon
|
|
\right) \le \frac{\lambda}{N \varepsilon^2}
|
|
\overset{N\rightarrow
|
|
\infty}{\relbar\joinrel\relbar\joinrel\relbar\joinrel\rightarrow}
|
|
0
|
|
\hspace{7mm} \Rightarrow \hspace{7mm} \text{Schätzer
|
|
ist konsistent}
|
|
\end{gather*}
|
|
\pause
|
|
\vspace*{-5mm}
|
|
\item Ist der ML-Schätzer effizient?
|
|
\pause
|
|
\begin{gather*}
|
|
J\left( \lambda \right) = - E
|
|
\left(
|
|
\frac{\partial^2}{\partial \lambda^2} l_{\bm{X}}
|
|
(\lambda) \right)
|
|
= E \left( \frac{1}{\lambda^2} \nsum_{i=1}^{N} X_i \right)
|
|
= \frac{1}{\lambda^2} \nsum_{i=1}^{N} E\left( X_i
|
|
\right) = \frac{N}{\lambda}
|
|
\end{gather*}
|
|
\pause
|
|
\begin{gather*}
|
|
V\left( \hat{\lambda}_\text{ML} \right)
|
|
% tex-fmt: off
|
|
\overset{\text{c)}}{=}
|
|
% tex-fmt: on
|
|
\frac{\lambda}{N} = \frac{1}{J\left( \lambda \right)}
|
|
\hspace{7mm} \Rightarrow \hspace{7mm} \text{Schätzer
|
|
ist effizient}
|
|
\end{gather*}
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{Aufgabe 2}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{Theorie Wiederholung}
|
|
|
|
\begin{frame}
|
|
\frametitle{Empirische Kenngrößen I}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Empirischer Erwartungswert
|
|
\end{itemize}
|
|
|
|
\begin{minipage}{0.47\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
\overline{x} = \frac{1}{N} \nsum_{i=1}^{N} x_i
|
|
\end{align*}
|
|
\end{minipage}%
|
|
\begin{minipage}{0.53\textwidth}
|
|
\centering
|
|
\begin{lightgrayhighlightbox}
|
|
\vspace*{-3mm}
|
|
Erinnerung: Erwartungswert (diskret)
|
|
\begin{align*}
|
|
E(X) = \nsum_{n=1}^{\infty} x_n P(X=x_n)
|
|
\end{align*}
|
|
\vspace*{-10mm}
|
|
\end{lightgrayhighlightbox}
|
|
\end{minipage}%
|
|
|
|
\vspace*{10mm}
|
|
|
|
\pause
|
|
\begin{itemize}
|
|
\item Empirische Varianz
|
|
\end{itemize}
|
|
|
|
\begin{minipage}{0.47\textwidth}
|
|
\centering
|
|
\begin{align*}
|
|
s^2 = \frac{1}{N-1} \nsum_{i=1}^{N} (x_i - \overline{x})^2
|
|
\end{align*}
|
|
\end{minipage}%
|
|
\begin{minipage}{0.53\textwidth}
|
|
\centering
|
|
\begin{lightgrayhighlightbox}
|
|
\vspace*{-3mm}
|
|
Erinnerung: Varianz (diskret)
|
|
\begin{align*}
|
|
V(X) = E\left( \left( X - E(X) \right)^2
|
|
\right) = \nsum_{n=1}^{\infty} \left( x_n -
|
|
E(X) \right)^2 P(X=x_n)
|
|
\end{align*}
|
|
\vspace*{-10mm}
|
|
\end{lightgrayhighlightbox}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Empirische Kenngrößen II}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{itemize}
|
|
\item Geordnete Stichprobe
|
|
\begin{align*}
|
|
\begin{pmatrix}
|
|
x_1 & \cdots & x_N
|
|
\end{pmatrix}
|
|
\hspace{10mm} \rightarrow \hspace{10mm}
|
|
\begin{pmatrix}
|
|
x_{(1)} & \cdots & x_{(N)}
|
|
\end{pmatrix}, \hspace{5mm} x_{(1)} \le \cdots \le x_{(N)}
|
|
\end{align*}
|
|
\pause
|
|
\item Empirischer Median
|
|
\begin{align*}
|
|
x_{1/2} =
|
|
\begin{cases}
|
|
x_{\left( \frac{N+1}{2} \right)}, & N \text{
|
|
ungerade} \\[3mm]
|
|
\frac{1}{2} \left( x_{\left( \frac{N}{2} \right)}
|
|
+ x_{\left( \frac{N}{2} +1 \right)} \right), & N
|
|
\text{ gerade}
|
|
\end{cases}
|
|
\end{align*}
|
|
\pause
|
|
\item $p$-Quantil
|
|
\begin{align*}
|
|
x_{p} =
|
|
\begin{cases}
|
|
x_{\left( \lfloor Np + 1 \rfloor \right)}, & Np
|
|
\notin \mathbb{N} \\[3mm]
|
|
\frac{1}{2} \left( x_{\left( Np \right)}
|
|
+ x_{\left( Np + 1 \right)} \right), & Np \in \mathbb{N}
|
|
\end{cases}
|
|
\end{align*}
|
|
\pause
|
|
\item Quartilsabstand
|
|
\begin{align*}
|
|
x_{3/4} - x_{1/4}
|
|
\end{align*}
|
|
\end{itemize}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Boxplots}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
\begin{tikzpicture}
|
|
\begin{axis}[
|
|
width=24cm,
|
|
height=6cm,
|
|
clip=false,
|
|
xticklabel=\empty,
|
|
yticklabel=\empty,
|
|
]
|
|
\addplot+ [
|
|
mark=*,
|
|
kit-red,
|
|
boxplot prepared={
|
|
lower whisker=5,
|
|
lower quartile=7,
|
|
median=8.5,
|
|
upper quartile=9.5,
|
|
upper whisker=10,
|
|
},
|
|
boxplot/every median/.style={draw=kit-blue,line width=2pt},
|
|
boxplot/every whisker/.style={draw=kit-green,line
|
|
width=1pt},
|
|
boxplot/every box/.style={black,line width=1pt},
|
|
] table [row sep=\\,y index=0] {
|
|
data\\ 1\\ 3\\
|
|
};
|
|
|
|
\node at (7.5,0) (median)
|
|
{\textcolor{kit-blue}{Median: $x_{1/2}$}};
|
|
|
|
\node[below right=0cm and 0cm of median,align=center]
|
|
(uw) {
|
|
\textcolor{kit-green}{Größte normale Beobachtung:}\\
|
|
$\textcolor{kit-green}{x_{3/4} + \frac{3}{2}
|
|
\left( x_{3/4} - x_{1/4} \right)}$
|
|
};
|
|
\node[below left=0cm and 0cm of median,align=center] (lw)
|
|
{
|
|
\textcolor{kit-green}{Kleinste normale Beobachtung:}\\
|
|
$\textcolor{kit-green}{x_{1/4} - \frac{3}{2} \left(
|
|
x_{3/4} - x_{1/4} \right)}$
|
|
};
|
|
|
|
\node at (9.78, 2) (uq)
|
|
{Oberes Quartil: $x_{3/4}$};
|
|
\node[left=of uq] (lq)
|
|
{Unteres Quartil: $x_{1/4}$};
|
|
|
|
\node[above left=0cm and 0cm of lw] (out)
|
|
{\textcolor{kit-red}{Ausreißer}};
|
|
|
|
\draw[kit-blue, line width=1pt] (axis cs: 8.4,0.65) -- (median);
|
|
\draw[kit-green, line width=1pt] (axis cs: 5,0.65) -- (lw);
|
|
\draw[kit-green, line width=1pt] (axis cs: 10,0.65) -- (uw);
|
|
|
|
\draw[kit-red, line width=1pt] (axis cs: 1.08,0.9) -- (out);
|
|
\draw[kit-red, line width=1pt] (axis cs: 2.9,0.9) -- (out);
|
|
|
|
\draw[line width=1pt] (axis cs: 7,1.42) -- (lq);
|
|
\draw[line width=1pt] (axis cs: 9.5,1.42) -- (uq);
|
|
\end{axis}
|
|
\end{tikzpicture}
|
|
\end{figure}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Zusammenfassung}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\begin{columns}
|
|
\column{\kitfourcolumns}
|
|
\begin{greenblock}{Empirische Kenngrößen}
|
|
\vspace*{-8mm}
|
|
\begin{align*}
|
|
\text{Empirischer Erwartungswert: } & \overline{x} = \frac{1}{N}
|
|
\nsum_{i=1}^{N} x_i \\[3mm]
|
|
\text{Empirische Varianz: } & s^2 = \frac{1}{N-1}
|
|
\nsum_{i=1}^{N} \left( x_i - \overline{x} \right)^2 \\[3mm]
|
|
p\text{-Quantil: }
|
|
& x_p =
|
|
\begin{cases}
|
|
x_{\left( \lfloor Np + 1 \rfloor \right)}, & Np
|
|
\notin \mathbb{N} \\[3mm]
|
|
\frac{1}{2} \left( x_{\left( Np \right)}
|
|
+ x_{\left( Np + 1 \right)} \right), & Np \in \mathbb{N}
|
|
\end{cases} \\[3mm]
|
|
\text{Median: } & x_{1/2} \\[3mm]
|
|
\text{Quartilsabstand: } & x_{3/4} - x_{1/4}
|
|
\end{align*}
|
|
\vspace*{-4mm}
|
|
\end{greenblock}
|
|
\column{\kittwocolumns}
|
|
\begin{greenblock}{Boxplot}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\begin{tikzpicture}
|
|
\begin{axis}[
|
|
width=3cm,
|
|
height=12cm,
|
|
boxplot/draw direction=y,
|
|
clip=false,
|
|
xtick=\empty,
|
|
ytick=\empty,
|
|
axis lines=none,
|
|
]
|
|
\addplot+ [
|
|
mark=*,
|
|
kit-red,
|
|
boxplot prepared={
|
|
lower whisker=5,
|
|
lower quartile=7,
|
|
median=8.5,
|
|
upper quartile=9.5,
|
|
upper whisker=10,
|
|
},
|
|
boxplot/every
|
|
median/.style={draw=kit-blue,line width=2pt},
|
|
boxplot/every
|
|
whisker/.style={draw=kit-green,line width=1pt},
|
|
boxplot/every box/.style={black,line width=1pt},
|
|
] table [row sep=\\,y index=0] {
|
|
data\\ 1\\ 3\\
|
|
};
|
|
|
|
\node[right] (median) at (2.5,8.5)
|
|
{$\textcolor{kit-blue}{x_{1/2}}$};
|
|
\node[right] (lq) at (2.5,7) {$x_{1/4}$};
|
|
\node[right] (uq) at (2.5,9.5) {$x_{3/4}$};
|
|
\node[right] (lw) at (2.5,5)
|
|
{
|
|
$\textcolor{kit-green}{
|
|
x_{1/4} - \frac{3}{2} \left( x_{3/4} -
|
|
x_{1/4} \right)
|
|
}$
|
|
};
|
|
\node[right] (uw) at (2.5,10.6)
|
|
{
|
|
$\textcolor{kit-green}{
|
|
x_{1/4} + \frac{3}{2} \left( x_{3/4} -
|
|
x_{1/4} \right)
|
|
}$
|
|
};
|
|
\node[right] (out) at (2.5,2)
|
|
{
|
|
\textcolor{kit-red}{Ausreißer}
|
|
};
|
|
|
|
\draw[kit-blue,line width=1pt] (1.6,8.5) -- (median);
|
|
\draw[line width=1pt] (1.6,9.5) -- (uq);
|
|
\draw[line width=1pt] (1.6,7) -- (lq);
|
|
\draw[kit-green,line width=1pt] (1.6,5) -- (lw);
|
|
\draw[kit-green,line width=1pt] (1.6,10) -- (uw);
|
|
\draw[kit-red,line width=1pt] (1.3,1) -- (out);
|
|
\draw[kit-red,line width=1pt] (1.3,3) -- (out);
|
|
\end{axis}
|
|
\end{tikzpicture}
|
|
\end{figure}
|
|
\end{greenblock}
|
|
\end{columns}
|
|
\end{frame}
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\subsection{Aufgabe}
|
|
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 2: Deskriptive Statistik}
|
|
|
|
\vspace*{-15mm}
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\item Nennen Sie zwei Bedingungen, die erfüllt sein müssen,
|
|
damit eine Stichprobe als einfache
|
|
Stichprobe gilt. Wie muss eine Stichprobe vorverarbeitet
|
|
werden, um daraus den Median
|
|
oder Quantile bestimmen zu können?
|
|
\end{enumerate}
|
|
|
|
\vspace*{5mm}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[scale=1.4]{res/boxplot.pdf}
|
|
\end{figure}
|
|
|
|
\begin{enumerate}
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\setcounter{enumi}{1}
|
|
\item Lesen Sie aus dem Boxplot folgende Werte ab: den
|
|
Median, die untere Quartilsgrenze, die
|
|
größte normale Beobachtung.
|
|
\end{enumerate}
|
|
|
|
\vspace*{5mm}
|
|
|
|
Die Zufallsvariable $Z \in \mathbb{N}$ beschreibt die
|
|
Studiendauer am KIT bis zum Abschluss der Promotion. Eine
|
|
einfache Zufallsstichprobe mit $n = 6$ Studierenden ergab die
|
|
folgenden Studiendauern:
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
|
|
Durch fehlerhaftes Eintragen wurde für zwei weitere Studierende
|
|
die Studiendauer $0$ und $129$ vermerkt. Die erweiterte
|
|
Stichprobe lautet:
|
|
\vspace*{-5mm}
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24 & 0 & 129
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
|
|
\vspace*{5mm}
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\setcounter{enumi}{2}
|
|
\item Berechnen Sie für beide Stichproben die empirische
|
|
Varianz und den Quartilsabstand. Erklären Sie anhand der
|
|
Ergebnisse einen Vorteil des Quartilsabstands gegenüber
|
|
der Varianz als Maß für die Streuung.
|
|
\end{enumerate}
|
|
\end{frame}
|
|
|
|
% TODO: Boxplot erklären
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 2: Deskriptive Statistik}
|
|
|
|
\vspace*{-15mm}
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\item Nennen Sie zwei Bedingungen, die erfüllt sein müssen,
|
|
damit eine Stichprobe als einfache
|
|
Stichprobe gilt. Wie muss eine Stichprobe vorverarbeitet
|
|
werden, um daraus den Median
|
|
oder Quantile bestimmen zu können?
|
|
\end{enumerate}
|
|
|
|
\vspace*{5mm}
|
|
|
|
\pause
|
|
\begin{minipage}{0.25\textwidth}
|
|
\phantom{a}
|
|
\end{minipage}
|
|
\begin{minipage}{0.5\textwidth}
|
|
\centering
|
|
\begin{itemize}
|
|
\item Die Messung muss unabhängig und identisch verteilt sein
|
|
\item Die Stichprobe muss sortiert werden
|
|
\end{itemize}
|
|
\end{minipage}
|
|
|
|
\pause
|
|
|
|
\vspace*{15mm}
|
|
\begin{figure}[H]
|
|
\centering
|
|
\includegraphics[scale=1.4]{res/boxplot.pdf}
|
|
\end{figure}
|
|
|
|
\begin{enumerate}
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\setcounter{enumi}{1}
|
|
\item Lesen Sie aus dem Boxplot folgende Werte ab: den
|
|
Median, die untere Quartilsgrenze, die
|
|
größte normale Beobachtung.
|
|
\end{enumerate}
|
|
|
|
\vspace*{-10mm}
|
|
|
|
\pause
|
|
\begin{align*}
|
|
\text{Median: } \hspace{5mm}&5 \\
|
|
\text{Untere Quartilsgrenze: } \hspace{5mm}&3 \\
|
|
\text{Größte normale Beobachtung: } \hspace{5mm}&9
|
|
\end{align*}
|
|
\end{frame}
|
|
|
|
\begin{frame}
|
|
\frametitle{Aufgabe 2: Deskriptive Statistik}
|
|
|
|
\vspace*{-17mm}
|
|
|
|
Die Zufallsvariable $Z \in \mathbb{N}$ beschreibt die
|
|
Studiendauer am KIT bis zum Abschluss der Promotion. Eine
|
|
einfache Zufallsstichprobe mit $n = 6$ Studierenden ergab die
|
|
folgenden Studiendauern:
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
|
|
Durch fehlerhaftes Eintragen wurde für zwei weitere Studierende
|
|
die Studiendauer $0$ und $129$ vermerkt. Die erweiterte
|
|
Stichprobe lautet:
|
|
\vspace*{-5mm}
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24 & 0 & 129
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
|
|
\vspace*{5mm}
|
|
|
|
\begin{enumerate}%
|
|
% tex-fmt: off
|
|
[a{)}]
|
|
% tex-fmt: on
|
|
\setcounter{enumi}{2}
|
|
\item Berechnen Sie für beide Stichproben die empirische
|
|
Varianz und den Quartilsabstand. Erklären Sie anhand der
|
|
Ergebnisse einen Vorteil des Quartilsabstands gegenüber
|
|
der Varianz als Maß für die Streuung.
|
|
\end{enumerate}
|
|
%
|
|
\vspace*{-3mm}
|
|
\pause
|
|
\begin{minipage}{0.5\textwidth}
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24
|
|
\end{pmatrix}\\
|
|
\rightarrow
|
|
\begin{pmatrix}
|
|
22 & 24 & 25 & 25 & 26 & 28
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
\vspace*{-10mm}
|
|
\pause
|
|
\begin{gather*}
|
|
z_{3/4} - z_{1/4} = 26 - 24 = 2
|
|
\end{gather*}
|
|
\vspace*{-8mm}
|
|
\pause
|
|
\begin{gather*}
|
|
\overline{z} = \frac{1}{N} \nsum_{i=1}^{N} z_{1,i} = 25 \\
|
|
s^2 = \frac{1}{N-1} \nsum_{i=1}^{N} \left( z_{1,i} -
|
|
\overline{z} \right)^2 = 4
|
|
\end{gather*}
|
|
\end{minipage}%
|
|
\pause
|
|
\begin{minipage}{0.5\textwidth}
|
|
\begin{gather*}
|
|
z_1 =
|
|
\begin{pmatrix}
|
|
28 & 22 & 25 & 26 & 25 & 24 & 0 & 129
|
|
\end{pmatrix}\\
|
|
\rightarrow
|
|
\begin{pmatrix}
|
|
0 & 22 & 24 & 25 & 25 & 26 & 28 & 129
|
|
\end{pmatrix}
|
|
\end{gather*}
|
|
\vspace*{-10mm}
|
|
\pause
|
|
\begin{gather*}
|
|
z_{3/4} - z_{1/4} = \frac{26 + 28}{2} - \frac{22 + 24}{2} = 4
|
|
\end{gather*}
|
|
\vspace*{-8mm}
|
|
\pause
|
|
\begin{gather*}
|
|
\overline{z} = \frac{1}{N} \nsum_{i=1}^{N} z_{1,i} = 34{,}875 \\
|
|
s^2 = \frac{1}{N-1} \nsum_{i=1}^{N} \left( z_{1,i} -
|
|
\overline{z} \right)^2 \approx 1525{,}84
|
|
\end{gather*}
|
|
\end{minipage}
|
|
\end{frame}
|
|
|
|
\end{document}
|
|
|