First round of corrections

2023-03-07 19:48:28 +01:00 · 2023-03-07 19:48:28 +01:00 · af45d0ce19
commit af45d0ce19
parent d36cd83cf6
3 changed files with 92 additions and 63 deletions
--- a/latex/thesis/abbreviations.tex
+++ b/latex/thesis/abbreviations.tex
@ -31,6 +31,15 @@
    long  = binary phase-shift keying
 }
 %
 % C
 %
 \DeclareAcronym{CN}{
    short = CN,
    long  = check node
 }
 %
 % F
 %
@ -87,3 +96,13 @@
    short = PDF,
    long  = probability density function
 }
 %
 % V
 %
 \DeclareAcronym{VN}{
    short = VN,
    long  = variable node
 }
--- a/latex/thesis/chapters/decoding_techniques.tex
+++ b/latex/thesis/chapters/decoding_techniques.tex
@ -26,30 +26,32 @@ Generally, the original decoding problem considered is either the \ac{MAP} or
 the \ac{ML} decoding problem:%
 %
 \begin{align*}
-    \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
+    \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
-        f_{\boldsymbol{C} \mid \boldsymbol{Y}} \left( \boldsymbol{c} \mid \boldsymbol{y} \right)\\
+    P \left(\boldsymbol{C} = \boldsymbol{c} \mid \boldsymbol{Y} = \boldsymbol{y}
-    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
+        \right)\\
-        f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) 
+    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
    f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c}
        \right) 
 .\end{align*}%
 %
 The goal is to arrive at a formulation, where a certain objective function
-$f$ must be minimized under certain constraints:%
+$g \left( \cdot \right) $ must be minimized under certain constraints:%
 %
 \begin{align*}
-    \text{minimize}\hspace{2mm}   &f\left( \boldsymbol{c} \right)\\
+    \text{minimize}\hspace{2mm}   &g\left( \tilde{\boldsymbol{c}} \right)\\
-    \text{subject to}\hspace{2mm} &\boldsymbol{c} \in D
+    \text{subject to}\hspace{2mm} &\tilde{\boldsymbol{c}} \in D
 ,\end{align*}%
 %
-where $D$ is the domain of values attainable for $\boldsymbol{c}$ and represents the
+where $D \subseteq \mathbb{R}^n$ is the domain of values attainable for $\tilde{\boldsymbol{c}}$
-constraints.
+and represents the constraints.
 In contrast to the established message-passing decoding algorithms,
-the viewpoint then changes from observing the decoding process in its
+the prespective then changes from observing the decoding process in its
-tanner graph representation (as shown in figure \ref{fig:dec:tanner})
+Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner})
 to a spatial representation (figure \ref{fig:dec:spatial}),
 where the codewords are some of the edges of a hypercube.
-The goal is to find the point $\boldsymbol{c}$,
+The goal is to find the point $\tilde{\boldsymbol{c}}$,
-which minimizes the objective function $f$.
+which minimizes the objective function $g\left( \cdot \right) $.
 %
 % Figure showing decoding space
@ -70,15 +72,18 @@ which minimizes the objective function $f$.
        \begin{tikzpicture}[scale=1, transform shape]
            \node[checknode,
                  label={[below, label distance=-0.4cm, align=center]
-                    $c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
+                  CN\\$\left( c_1 + c_2 + c_3 = 0 \right) $}]
-                (c) at (0, 0) {};
+                (cn) at (0, 0) {};
-            \node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
+            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_1 \right)$}]
-            \node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
+                (c1) at (-2, 2) {};
-            \node[variablenode, label={$x_3$}] (x3) at (2, 2) {};
+            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_2 \right)$}]
                (c2) at (0, 2) {};
            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_3 \right)$}]
                (c3) at (2, 2) {};
-            \draw (c) -- (x1);
+            \draw (cn) -- (c1);
-            \draw (c) -- (x2);
+            \draw (cn) -- (c2);
-            \draw (c) -- (x3);
+            \draw (cn) -- (c3);
        \end{tikzpicture}
        \caption{Tanner graph representation of a single parity-check code}
@ -148,7 +153,7 @@ which minimizes the objective function $f$.
            \node[color=KITgreen, fill=KITgreen,
                  draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
-            \node[color=KITgreen, right=0cm of c] {$\boldsymbol{c}$};
+            \node[color=KITgreen, right=0cm of c] {$\tilde{\boldsymbol{c}}$};
        \end{tikzpicture}
        \caption{Spatial representation of a single parity-check code}
@ -157,6 +162,7 @@ which minimizes the objective function $f$.
    \caption{Different representations of the decoding problem}
 \end{figure}
 \todo{Rename $c$ to e.g. $h$ or remove it completely?}
@ -184,17 +190,15 @@ making the \ac{ML} and \ac{MAP} decoding problems equivalent.}%
    \label{eq:lp:ml}
 .\end{align}%
 %
-Assuming a memoryless channel, \ref{eq:lp:ml} can be rewritten in terms
+Assuming a memoryless channel, equation (\ref{eq:lp:ml}) can be rewritten in terms
 of the \acp{LLR} $\gamma_i$ \cite[Sec 2.5]{feldman_thesis}:%
 %
 \begin{align*}
    \hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
-        \sum_{i=1}^{n} \gamma_i y_i,%
+        \sum_{i=1}^{n} \gamma_i c_i,%
    \hspace{5mm} \gamma_i = \ln\left(
-        \frac{f_{\boldsymbol{Y} | \boldsymbol{C}}
+        \frac{f_{Y_i | C_i} \left( y_i  \mid C_i = 0 \right) }
-            \left( Y_i = y_i  \mid C_i = 0 \right) }
+        {f_{Y_i | C_i} \left( y_i \mid C_i = 1 \right) } \right)
        {f_{\boldsymbol{Y} | \boldsymbol{C}}
            \left( Y_i = y_i | C_i = 1 \right) } \right)
 .\end{align*}
 %
 The authors propose the following cost function%
@ -203,7 +207,7 @@ have the same meaning.}
 for the \ac{LP} decoding problem:%
 %
 \begin{align*}
-    \sum_{i=1}^{n} \gamma_i c_i
+    g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i
 .\end{align*}
 %
 With this cost function, the exact integer linear program formulation of \ac{ML}
@ -227,13 +231,13 @@ decoding, redefining the constraints in terms of the \text{codeword polytope}
 %
 \begin{align*}
    \text{poly}\left( \mathcal{C} \right) = \left\{
-        \sum_{c \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
+        \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
            \text{ : } \lambda_{\boldsymbol{c}} \ge 0,
        \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\} 
 ,\end{align*} %
 %
 which represents the \textit{convex hull} of all possible codewords,
-i.e. the convex set of linear combinations of all codewords.
+i.e., the convex set of linear combinations of all codewords.
 However, since the number of constraints needed to characterize the codeword
 polytope is exponential in the code length, this formulation is relaxed further.
 By observing that each check node defines its own local single parity-check
@ -244,12 +248,14 @@ This consideration leads to constraints, that can be described as follows
 \cite[Sec. II, A]{efficient_lp_dec_admm}:%
 %
 \begin{align*}
-    \boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j}
+    \boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
    \hspace{5mm}\forall j\in \mathcal{J}
 ,\end{align*}%
 \todo{Explicitly state that the first relaxation is essentially just lifing the integer
 requirement}%
 where $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
 neighboring variable nodes
-of check node $j$%
+of check node $j$
 \footnote{For example, if the $j$th row of the parity-check matrix
 $\boldsymbol{H}$ was $\boldsymbol{h}_j =
 \begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
@ -259,17 +265,17 @@ the transfer matrix would be $\boldsymbol{T}_j =
    0 & 0 & 0 & 1 & 0 & 0 & 0 \\
    0 & 0 & 0 & 0 & 0 & 1 & 0 \\
 \end{bmatrix} $ (example taken from \cite[Sec. II, A]{efficient_lp_dec_admm}).}
-(i.e. the relevant components of $\boldsymbol{c}$ for parity-check $j$)
+(i.e., the relevant components of $\boldsymbol{c}$ for parity-check $j$)
-and $\mathcal{P}_{d}$ is the \textit{check polytope}, the convex hull of all
+and $\mathcal{P}_{d_j}$ is the \textit{check polytope}, the convex hull of all
-binary vectors of length $d$ with even parity%
+binary vectors of length $d_j$ with even parity%
 \footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
 parity-check $j$, but extended to the continuous domain.}%
 .
 In figure \ref{fig:dec:poly}, the two relaxations are compared for an
-example code.
+examplary code.
 Figure \ref{fig:dec:poly:exact} shows the codeword polytope
-$\text{poly}\left( \mathcal{C} \right) $, i.e. the constraints for the
+$\text{poly}\left( \mathcal{C} \right) $, i.e., the constraints for the
 equivalent linear program to exact \ac{ML} decoding - only valid codewords are
 feasible solutions.
 Figure \ref{fig:dec:poly:local} shows the local codeword polytope of each check
@ -577,16 +583,16 @@ figure \ref{fig:dec:poly:relaxed}.%
 It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
 vertices with fractional values;
 these represent erroneous non-codeword solutions to the linear program and
-correspond to the so-called \textit{pseudocodewords} introduced in
+correspond to the so-called \textit{pseudo-codewords} introduced in
 \cite{feldman_paper}.
 However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
 exponentially, it is a lot more tractable for practical applications.
-The resulting formulation of the relaxed optimization problem is the following:%
+The resulting formulation of the relaxed optimization problem becomes:%
 %
 \begin{align*}
-    \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
+    \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\
-    \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j},
+    \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j},
        \hspace{5mm}j\in\mathcal{J}
 .\end{align*}%
@ -633,28 +639,28 @@ determined by the channel model.
 The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also
 known as the equal probability assumption is made on
 $\mathcal{C}\left( \boldsymbol{H} \right)$.
-However, because the considered domain is continuous,
+However, since the considered domain is continuous,
 the prior \ac{PDF} cannot be ignored as a constant during the minimization
 as is often done, and has a rather unwieldy representation:%
 %
 \begin{align}
    f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
-        \frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right)  \right| }
+        \frac{1}{\left| \mathcal{C} \right| }
-            \sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) }
+            \sum_{\boldsymbol{c} \in \mathcal{C} }
                \delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
    \label{eq:prox:prior_pdf}
 .\end{align}%
 %
 In order to rewrite the prior \ac{PDF}
 $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
-the so-called \textit{code-constraint polynomial} is introduced:%
+the so-called \textit{code-constraint polynomial} is introduced as:%
 %
 \begin{align*}
    h\left( \boldsymbol{x} \right) =
        \underbrace{\sum_{j=1}^{n} \left( x_j^2-1 \right) ^2}_{\text{Bipolar constraint}}
        + \underbrace{\sum_{i=1}^{m} \left[
            \left( \prod_{j\in \mathcal{A}
-        \left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity Constraint}}%
+        \left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity constraint}}%
 .\end{align*}%
 %
 The intention of this function is to provide a way to penalize vectors far
@ -662,20 +668,20 @@ from a codeword and favor those close to one.
 In order to achieve this, the polynomial is composed of two parts: one term
 representing the bipolar constraint, providing for a discrete solution of the
 continuous optimization problem, and one term representing the parity
-constraint, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
+constraints, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
-The prior \ac{PDF} is then approximated using the code-constraint polynomial:%
+The prior \ac{PDF} is then approximated using the code-constraint polynomial as:%
 %
 \begin{align}
    f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)
-    \approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }%
+    \approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) }%
    \label{eq:prox:prior_pdf_approx}
 .\end{align}%
 %
-The authors justify this approximation by arguing that for
+The authors justify this approximation by arguing, that for
 $\gamma \rightarrow \infty$, the approximation in equation
-\ref{eq:prox:prior_pdf_approx} approaches the original function in equation
+(\ref{eq:prox:prior_pdf_approx}) approaches the original function in equation
-\ref{eq:prox:prior_pdf}.
+(\ref{eq:prox:prior_pdf}).
-This approximation can then be plugged into equation \ref{eq:prox:vanilla_MAP}
+This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP})
 and the likelihood can be rewritten using the negative log-likelihood
 $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
        f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left(
@ -683,8 +689,8 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
 %
 \begin{align*}
    \hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
-        e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
+        \mathrm{e}^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
-        e^{-\gamma h\left( \boldsymbol{x} \right) } \\
+        \mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) } \\
    &= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
        L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
        + \gamma h\left( \boldsymbol{x} \right) 
@ -692,10 +698,10 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
 .\end{align*}%
 %
 Thus, with proximal decoding, the objective function
-$f\left( \boldsymbol{x} \right)$ considered is%
+$g\left( \boldsymbol{x} \right)$ considered is%
 %
 \begin{align}
-    f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
+    g\left( \boldsymbol{x} \right) = L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
        + \gamma h\left( \boldsymbol{x} \right)%
    \label{eq:prox:objective_function}
 \end{align}%
@ -703,14 +709,14 @@ $f\left( \boldsymbol{x} \right)$ considered is%
 and the decoding problem is reformulated to%
 %
 \begin{align*}    
-    \text{minimize}\hspace{2mm}   &L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
+    \text{minimize}\hspace{2mm}   &L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
        + \gamma h\left( \boldsymbol{x} \right)\\
    \text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n
 .\end{align*}
 %
 For the solution of the approximate \ac{MAP} decoding problem, the two parts
-of \ref{eq:prox:objective_function} are considered separately:
+of equation (\ref{eq:prox:objective_function}) are considered separately:
 the minimization of the objective function occurs in an alternating
 fashion, switching between the negative log-likelihood
 $L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
@ -737,8 +743,8 @@ It is then immediately approximated with gradient-descent:%
        \argmin_{\boldsymbol{t} \in \mathbb{R}^n}
            \left( \gamma h\left( \boldsymbol{x} \right) +
                \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
-    &\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
+    &\approx \boldsymbol{r} - \gamma \nabla h \left( \boldsymbol{r} \right),
-    \hspace{5mm} \gamma \text{ small}
+    \hspace{5mm} \gamma > 0, \text{ small}
 .\end{align*}%
 %
 The second step thus becomes%
@ -775,7 +781,9 @@ is%
 %
 \begin{align*}
    f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
-    = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
+    = \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x}
        \rVert^2 }
    {2\sigma^2}}
 .\end{align*}
 %
 Thus, the gradient of the negative log-likelihood becomes%
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@ -73,6 +73,8 @@ Lastly, the optimization methods utilized are described.
    \label{fig:notation}
 \end{figure}
 \todo{Note about $\tilde{\boldsymbol{c}}$ (and maybe $\tilde{\boldsymbol{x}}$?)}
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Channel Coding with LDPC Codes}