\chapter{Decoding Techniques}% \label{chapter:decoding_techniques} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Decoding using Optimization Methods}% \label{sec:dec:Decoding using Optimization Methods} % % TODOs % \begin{itemize} \item General methodology \end{itemize} % % Figure showing decoding space % \begin{figure}[H] \centering \tikzstyle{codeword} = [color=KITblue, fill=KITblue, draw, circle, inner sep=0pt, minimum size=4pt] \tdplotsetmaincoords{60}{245} \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords] % Cube \draw[dashed] (0, 0, 0) -- (2, 0, 0); \draw[dashed] (2, 0, 0) -- (2, 0, 2); \draw[] (2, 0, 2) -- (0, 0, 2); \draw[] (0, 0, 2) -- (0, 0, 0); \draw[] (0, 2, 0) -- (2, 2, 0); \draw[] (2, 2, 0) -- (2, 2, 2); \draw[] (2, 2, 2) -- (0, 2, 2); \draw[] (0, 2, 2) -- (0, 2, 0); \draw[] (0, 0, 0) -- (0, 2, 0); \draw[dashed] (2, 0, 0) -- (2, 2, 0); \draw[] (2, 0, 2) -- (2, 2, 2); \draw[] (0, 0, 2) -- (0, 2, 2); % Polytope Annotations \node[codeword] (c000) at (0, 0, 0) {};% {$\left( 0, 0, 0 \right) $}; \node[codeword] (c101) at (2, 0, 2) {};% {$\left( 1, 0, 1 \right) $}; \node[codeword] (c110) at (2, 2, 0) {};% {$\left( 1, 1, 0 \right) $}; \node[codeword] (c011) at (0, 2, 2) {};% {$\left( 0, 1, 1 \right) $}; \node[color=KITblue, right=0cm of c000] {$\left( 0, 0, 0 \right) $}; \node[color=KITblue, above=0cm of c101] {$\left( 1, 0, 1 \right) $}; \node[color=KITblue, left=0cm of c110] {$\left( 1, 1, 0 \right) $}; \node[color=KITblue, left=-0.1cm of c011] {$\left( 0, 1, 1 \right) $}; % f \node[color=KITgreen, fill=KITgreen, draw, circle, inner sep=0pt, minimum size=4pt] (f) at (0.9, 0.7, 1) {}; \node[color=KITgreen, right=0cm of f] {$\boldsymbol{f}$}; \end{tikzpicture} \caption{Hypercube ($n=3$) and valid codewords for a single parity-check code} \end{figure} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{LP Decoding using ADMM}% \label{sec:dec:LP Decoding using ADMM} \begin{itemize} \item Equivalent \ac{ML} optimization problem \item \Ac{LP} relaxation \item \Ac{ADMM} as a solver \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Proximal Decoding}% \label{sec:dec:Proximal Decoding} Proximal decoding was proposed by Wadayama et. al \cite{proximal_paper}. With this decoding algorithm, the objective function is minimized using the proximal gradient method. In contrast to \ac{LP} decoding, the objective function is based on a non-convex optimization formulation of the \ac{MAP} decoding problem. In order to derive the objective function, the authors reformulate the \ac{MAP} decoding problem:% % \begin{align} \hat{\boldsymbol{x}} = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{X} \mid \boldsymbol{Y}} \left( \boldsymbol{x} \mid \boldsymbol{y} \right) = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} \mid \boldsymbol{X}} \left( \boldsymbol{y} \mid \boldsymbol{x} \right) f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)% \label{eq:prox:vanilla_MAP} \end{align}% % The likelihood is usually a known function determined by the channel model. In order to rewrite the prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$, the so-called \textit{code-constraint polynomial} is introduced:% % \begin{align} h\left( \boldsymbol{x} \right) = \sum_{j=1}^{n} \left( x_j^2-1 \right) ^2 + \sum_{i=1}^{m} \left[ \left( \prod_{j\in \mathcal{A}\left( i \right) } x_j \right) -1 \right] ^2% \label{eq:prox:ccp} \end{align}% % The intention of this function is to provide a way to penalize vectors far from a codeword and favor those close to a codeword. In order to achieve this, the polynomial is composed of two parts: one term representing the bibolar constraint, providing for a discrete solution of the continuous optimization problem, and one term representing the parity constraint, accomodating the role of the parity-check matrix $\boldsymbol{H}$. % The prior \ac{PDF} is then approximated using the code-constraint polynomial\todo{Italic?}:% % \begin{align} f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) = \frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right) \right| } \sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) } \delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right) \approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }% \label{eq:prox:prior_pdf_approx} \end{align}% % The authors justify this approximation by arguing that for $\gamma \rightarrow \infty$, the right-hand side aproaches the left-hand side. In \ref{eq:prox:vanilla_MAP} the prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) $ can then be subsituted for \ref{eq:prox:prior_pdf_approx} and the likelihood can be rewritten using the negative log-likelihood $f_{\boldsymbol{X} \mid \boldsymbol{Y}}\left( \boldsymbol{x} \mid \boldsymbol{y} \right) = e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }$:% % \begin{align} \hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) } e^{-\gamma h\left( \boldsymbol{x} \right) } \nonumber \\ &= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left( L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) + \gamma h\left( \boldsymbol{x} \right) \right)% \label{eq:prox:approx_map_problem} \end{align}% % Thus, with proximal decoding, the objective function $f\left( \boldsymbol{x} \right)$ to be minimized is% % \begin{align} f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right) + \gamma h\left( \boldsymbol{x} \right)% \label{eq:prox:objective_function} .\end{align}\todo{Dot after equations?} For the solution of the approximalte \ac{MAP} decoding problem, the two parts of \ref{eq:prox:approx_map_problem} are considered separately from one another: the minimization of the objective function occurs in an alternating manner, switching between the minimization of the negative log-likelihood $L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled code-constaint polynomial $\gamma h\left( \boldsymbol{x} \right) $. Two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$ are introduced, describing the result of each of the two steps. The first step, minimizing the log-likelihood using gradient descent, yields% % \begin{align*} \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla L\left( \boldsymbol{y} \mid \boldsymbol{s} \right), \hspace{5mm}\omega > 0 .\end{align*}% % For the second step, minimizig the scaled code-constraint polynomial using the proximal gradient method, the proximal operator of $\gamma h\left( \boldsymbol{x} \right) $ has to be computed and is immediately approximalted by a gradient-descent step:% % \begin{align*} \text{prox}_{\gamma h} \left( \boldsymbol{x} \right) &\equiv \argmin_{\boldsymbol{t} \in \mathbb{R}^n} \left( \gamma h\left( \boldsymbol{x} \right) + \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\ &\approx \boldsymbol{x} - \gamma h \left( \boldsymbol{x} \right), \hspace{5mm} \gamma \text{ small} .\end{align*}% % The second step thus becomes% % \begin{align*} \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma h\left( \boldsymbol{x} \right), \hspace{5mm}\gamma > 0,\text{ small} .\end{align*} % While the approximatin of the prior \ac{PDF} made in \ref{eq:prox:prior_pdf_approx} theoretically becomes better with larger $\gamma$, the constraint that $\gamma$ be small is important, as it keeps the effect of $h\left( \boldsymbol{x} \right) $ on the landscape of the objective function small. Otherwise, unwanted stationary points, including local minima are introduced. The authors say that in practice, the value of $\gamma$ should be adjusted according to the decoding performance. The iterative decoding process resulting from this considreation is shown in figure \ref{fig:prox:alg}. \begin{figure}[H] \centering \begin{genericAlgorithm}[caption={}, label={}] $\boldsymbol{s} \leftarrow \boldsymbol{0}$ for $K$ iterations do $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla L \left( \boldsymbol{y} \mid \boldsymbol{s} \right) $ $\boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) $ $\boldsymbol{\hat{x}} \leftarrow \text{sign}\left( \boldsymbol{s} \right) $ if $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ do return $\boldsymbol{\hat{c}}$ end if end for return $\boldsymbol{\hat{c}}$ \end{genericAlgorithm} \caption{Proximal decoding algorithm} \label{fig:prox:alg} \end{figure}