Added general methodology for optimization decoding; minor changes to proximal decoding

2023-02-15 17:33:45 +01:00 · 2023-02-15 17:33:45 +01:00 · 4584989ae5
commit 4584989ae5
parent 05be3d21b6
2 changed files with 142 additions and 40 deletions
--- a/latex/thesis/chapters/decoding_techniques.tex
+++ b/latex/thesis/chapters/decoding_techniques.tex
@ -1,18 +1,50 @@
 \chapter{Decoding Techniques}%
 \label{chapter:decoding_techniques}

+In this chapter, the decoding techniques examined in this work are detailed.
+First, an overview of of the general methodology of using optimization methods
+for channel decoding is given. Afterwards, the specific decoding techniques
+themselves are explained.
+

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Decoding using Optimization Methods}%
 \label{sec:dec:Decoding using Optimization Methods}

 %
-% TODOs
+% General methodology
 %

-\begin{itemize}
-    \item General methodology
-\end{itemize}
+The general idea behind using optimization methods for channel decoding
+is to reformulate the decoding problem as an optimization problem.
+This new formulation can then be solved with one of the many
+available optimization algorithms.
+
+Generally, the original decoding problem considered is either the \ac{MAP} or
+the \ac{ML} decoding problem:%
+%
+\begin{align*}
+    \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
+        f_{\boldsymbol{X} \mid \boldsymbol{Y}} \left( \boldsymbol{x} \mid \boldsymbol{y} \right)\\
+    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
+        f_{\boldsymbol{Y} \mid \boldsymbol{X}} \left( \boldsymbol{y} \mid \boldsymbol{x} \right) 
+.\end{align*}
+%
+The goal is to arrive at a formulation, where a certain objective function
+$f$ has to be minimized under certain constraints:%
+%
+\begin{align*}
+    \text{minimize } f\left( \boldsymbol{x} \right)\\
+    \text{subject to \ldots}
+.\end{align*}
+
+In contrast to the established message-passing decoding algorithms,
+the viewpoint then changes from observing the decoding process in its
+tanner graph based representation (as shown in figure \ref{fig:dec:tanner})
+into a spacial representation, where the codewords are some of the edges
+of a hypercube and the goal is to find that point $\boldsymbol{x}$,
+\todo{$\boldsymbol{x}$? Or some other variable?}
+which minimizes the objective function $f$ (as shown in figure \ref{fig:dec:spacial}).

 %
 % Figure showing decoding space
@ -21,48 +53,80 @@
 \begin{figure}[H]
    \centering

-    \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
-                            draw, circle, inner sep=0pt, minimum size=4pt]
+    \begin{subfigure}[c]{0.47\textwidth}
+        \centering
+    
+        \tikzstyle{checknode} = [color=KITblue, fill=KITblue,
+                                draw, regular polygon,regular polygon sides=4,
+                                inner sep=0pt, minimum size=12pt]
+        \tikzstyle{variablenode} = [color=KITgreen, fill=KITgreen,
+                                draw, circle, inner sep=0pt, minimum size=10pt]

-    \tdplotsetmaincoords{60}{245}
-    \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
-        % Cube
+        \begin{tikzpicture}[scale=1, transform shape]
+            \node[checknode,
+                  label={[below, label distance=-0.4cm, align=center]
+                    $c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
+                (c) at (0, 0) {};
+            \node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
+            \node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
+            \node[variablenode, label={$x_3$}] (x3) at (2, 2) {};

-        \draw[dashed] (0, 0, 0) -- (2, 0, 0);
-        \draw[dashed] (2, 0, 0) -- (2, 0, 2);
-        \draw[] (2, 0, 2) -- (0, 0, 2);
-        \draw[] (0, 0, 2) -- (0, 0, 0);
+            \draw (c) -- (x1);
+            \draw (c) -- (x2);
+            \draw (c) -- (x3);
+        \end{tikzpicture}
+    
+        \caption{Tanner graph representation of a single parity-check code}
+        \label{fig:dec:tanner}
+    \end{subfigure}%
+    \hfill%
+    \begin{subfigure}[c]{0.47\textwidth}
+        \centering

-        \draw[] (0, 2, 0) -- (2, 2, 0);
-        \draw[] (2, 2, 0) -- (2, 2, 2);
-        \draw[] (2, 2, 2) -- (0, 2, 2);
-        \draw[] (0, 2, 2) -- (0, 2, 0);
+        \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
+                                draw, circle, inner sep=0pt, minimum size=4pt]

-        \draw[] (0, 0, 0) -- (0, 2, 0);
-        \draw[dashed] (2, 0, 0) -- (2, 2, 0);
-        \draw[] (2, 0, 2) -- (2, 2, 2);
-        \draw[] (0, 0, 2) -- (0, 2, 2);
+        \tdplotsetmaincoords{60}{245}
+        \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
+            % Cube

-        % Polytope Annotations
+            \draw[dashed] (0, 0, 0) -- (2, 0, 0);
+            \draw[dashed] (2, 0, 0) -- (2, 0, 2);
+            \draw[] (2, 0, 2) -- (0, 0, 2);
+            \draw[] (0, 0, 2) -- (0, 0, 0);

-        \node[codeword] (c000) at (0, 0, 0) {};% {$\left( 0, 0, 0 \right) $};
-        \node[codeword] (c101) at (2, 0, 2) {};% {$\left( 1, 0, 1 \right) $};
-        \node[codeword] (c110) at (2, 2, 0) {};% {$\left( 1, 1, 0 \right) $};
-        \node[codeword] (c011) at (0, 2, 2) {};% {$\left( 0, 1, 1 \right) $};
+            \draw[] (0, 2, 0) -- (2, 2, 0);
+            \draw[] (2, 2, 0) -- (2, 2, 2);
+            \draw[] (2, 2, 2) -- (0, 2, 2);
+            \draw[] (0, 2, 2) -- (0, 2, 0);

-        \node[color=KITblue, right=0cm of c000] {$\left( 0, 0, 0 \right) $};
-        \node[color=KITblue, above=0cm of c101] {$\left( 1, 0, 1 \right) $};
-        \node[color=KITblue, left=0cm of c110] {$\left( 1, 1, 0 \right) $};
-        \node[color=KITblue, left=-0.1cm of c011] {$\left( 0, 1, 1 \right) $};
+            \draw[] (0, 0, 0) -- (0, 2, 0);
+            \draw[dashed] (2, 0, 0) -- (2, 2, 0);
+            \draw[] (2, 0, 2) -- (2, 2, 2);
+            \draw[] (0, 0, 2) -- (0, 2, 2);

-        % f
+            % Polytope Annotations

-        \node[color=KITgreen, fill=KITgreen,
-              draw, circle, inner sep=0pt, minimum size=4pt] (f) at (0.9, 0.7, 1) {};
-        \node[color=KITgreen, right=0cm of f] {$\boldsymbol{f}$};
-    \end{tikzpicture}
+            \node[codeword] (c000) at (0, 0, 0) {};% {$\left( 0, 0, 0 \right) $};
+            \node[codeword] (c101) at (2, 0, 2) {};% {$\left( 1, 0, 1 \right) $};
+            \node[codeword] (c110) at (2, 2, 0) {};% {$\left( 1, 1, 0 \right) $};
+            \node[codeword] (c011) at (0, 2, 2) {};% {$\left( 0, 1, 1 \right) $};

-    \caption{Hypercube ($n=3$) and valid codewords for a single parity-check code}
+            \node[color=KITblue, right=0cm of c000] {$\left( 0, 0, 0 \right) $};
+            \node[color=KITblue, above=0cm of c101] {$\left( 1, 0, 1 \right) $};
+            \node[color=KITblue, left=0cm of c110] {$\left( 1, 1, 0 \right) $};
+            \node[color=KITblue, left=-0.1cm of c011] {$\left( 0, 1, 1 \right) $};
+
+            % x
+
+            \node[color=KITgreen, fill=KITgreen,
+                  draw, circle, inner sep=0pt, minimum size=4pt] (f) at (0.9, 0.7, 1) {};
+            \node[color=KITgreen, right=0cm of f] {$\boldsymbol{x}$};
+        \end{tikzpicture}
+
+        \caption{Spacial representation of a single parity-check code}
+        \label{fig:dec:spacial}
+    \end{subfigure}%
 \end{figure}


@ -120,6 +184,7 @@ representing the bibolar constraint, providing for a discrete solution of the
 continuous optimization problem, and one term representing the parity
 constraint, accomodating the role of the parity-check matrix $\boldsymbol{H}$.
 %
+The equal probability assumption is made on $\mathcal{C}\left( \boldsymbol{H} \right) $.
 The prior \ac{PDF} is then approximated using the code-constraint polynomial\todo{Italic?}:%
 %
 \begin{align}
@ -186,14 +251,14 @@ immediately approximalted by a gradient-descent step:%
        \argmin_{\boldsymbol{t} \in \mathbb{R}^n}
            \left( \gamma h\left( \boldsymbol{x} \right) +
                \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
-    &\approx \boldsymbol{x} - \gamma h \left( \boldsymbol{x} \right),
+    &\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
    \hspace{5mm} \gamma \text{ small}
 .\end{align*}%
 %
 The second step thus becomes%
 %
 \begin{align*}
-    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma h\left( \boldsymbol{x} \right),
+    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right),
    \hspace{5mm}\gamma > 0,\text{ small}
 .\end{align*}
 %
@ -205,8 +270,7 @@ of the objective function small.
 Otherwise, unwanted stationary points, including local minima are introduced.
 The authors say that in practice, the value of $\gamma$ should be adjusted
 according to the decoding performance.
-
-The iterative decoding process resulting from this considreation is shown in
+The iterative decoding process \todo{projection with $\eta$} resulting from this considreation is shown in
 figure \ref{fig:prox:alg}.

 \begin{figure}[H]
@ -230,3 +294,40 @@ return $\boldsymbol{\hat{c}}$
    \label{fig:prox:alg}
 \end{figure}

+The components of the gradient of the code-constraint polynomial can be computed as follows:%
+%
+\begin{align*}
+    \frac{\partial}{\partial x_k} h\left( \boldsymbol{x} \right) =
+        4\left( x_k^2 - 1 \right) x_k + \frac{2}{x_k}
+            \sum_{i\in \mathcal{B}\left( k \right) } \left(
+                \left( \prod_{j\in\mathcal{A}\left( i \right)} x_j\right)^2
+                - \prod_{j\in\mathcal{A}\left( i \right) }x_j \right)
+.\end{align*}%
+\todo{Only multiplication?}%
+%
+In the case of \ac{AWGN}, the likelihood
+$f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)$
+is%
+%
+\begin{align*}
+    f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
+    = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
+.\end{align*}
+%
+Thus, the gradient of the negative log-likelihood becomes%
+\footnote{For the minimization, constants can be disregarded. For this reason,
+it suffices to consider only the proportionality instead of the equality}%
+%
+\begin{align*}
+    \nabla L \left( \boldsymbol{y} \mid \boldsymbol{x} \right)
+    &\propto -\nabla \lVert \boldsymbol{y} - \boldsymbol{x} \rVert^2\\
+    &\propto \boldsymbol{x} - \boldsymbol{y}
+.\end{align*}%
+%
+The resulting iterative decoding process under the assumption of \ac{AWGN} is
+described by%
+%
+\begin{align*}
+    \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega\left( \boldsymbol{s}-\boldsymbol{y} \right)\\
+    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) 
+.\end{align*}
--- a/latex/thesis/thesis.tex
+++ b/latex/thesis/thesis.tex
@ -33,6 +33,7 @@

 \usetikzlibrary{external}
 \usetikzlibrary{spy}
+\usetikzlibrary{shapes.geometric}

 \pgfplotsset{compat=newest}
 \usepgfplotslibrary{colorbrewer}