\chapter{Decoding Techniques}%
\label{chapter:decoding_techniques}

In this chapter, the decoding techniques examined in this work are detailed.
First, an overview of of the general methodology of using optimization methods
for channel decoding is given. Afterwards, the specific decoding techniques
themselves are explained.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Decoding using Optimization Methods}%
\label{sec:dec:Decoding using Optimization Methods}

%
% General methodology
%

The general idea behind using optimization methods for channel decoding
is to reformulate the decoding problem as an optimization problem.
This new formulation can then be solved with one of the many
available optimization algorithms.

Generally, the original decoding problem considered is either the \ac{MAP} or
the \ac{ML} decoding problem:%
%
\begin{align*}
    \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
        f_{\boldsymbol{C} \mid \boldsymbol{Y}} \left( \boldsymbol{c} \mid \boldsymbol{y} \right)\\
    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
        f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) 
.\end{align*}%
%
\todo{Note about these generally being the same thing, when the a priori probability
is uniformly distributed}%
\todo{Here the two problems are written in terms of $\hat{\boldsymbol{c}}$; below MAP
decoding is applied in terms of $\hat{\boldsymbol{x}}$. Is that a problem?}%
The goal is to arrive at a formulation, where a certain objective function
$f$ has to be minimized under certain constraints:%
%
\begin{align*}
    \text{minimize } f\left( \boldsymbol{x} \right)\\
    \text{subject to \ldots}
.\end{align*}

In contrast to the established message-passing decoding algorithms,
the viewpoint then changes from observing the decoding process in its
tanner graph representation (as shown in figure \ref{fig:dec:tanner})
to a spacial representation, where the codewords are some of the edges
of a hypercube and the goal is to find that point $\boldsymbol{x}$,
\todo{$\boldsymbol{x}$? Or some other variable?}
which minimizes the objective function $f$ (as shown in figure \ref{fig:dec:spacial}).

%
% Figure showing decoding space
%

\begin{figure}[H]
    \centering

    \begin{subfigure}[c]{0.47\textwidth}
        \centering
    
        \tikzstyle{checknode} = [color=KITblue, fill=KITblue,
                                draw, regular polygon,regular polygon sides=4,
                                inner sep=0pt, minimum size=12pt]
        \tikzstyle{variablenode} = [color=KITgreen, fill=KITgreen,
                                draw, circle, inner sep=0pt, minimum size=10pt]

        \begin{tikzpicture}[scale=1, transform shape]
            \node[checknode,
                  label={[below, label distance=-0.4cm, align=center]
                    $c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
                (c) at (0, 0) {};
            \node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
            \node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
            \node[variablenode, label={$x_3$}] (x3) at (2, 2) {};

            \draw (c) -- (x1);
            \draw (c) -- (x2);
            \draw (c) -- (x3);
        \end{tikzpicture}
    
        \caption{Tanner graph representation of a single parity-check code}
        \label{fig:dec:tanner}
    \end{subfigure}%
    \hfill%
    \begin{subfigure}[c]{0.47\textwidth}
        \centering

        \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

        \tdplotsetmaincoords{60}{245}
        \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
            % Cube

            \draw[dashed] (0, 0, 0) -- (2, 0, 0);
            \draw[dashed] (2, 0, 0) -- (2, 0, 2);
            \draw[] (2, 0, 2) -- (0, 0, 2);
            \draw[] (0, 0, 2) -- (0, 0, 0);

            \draw[] (0, 2, 0) -- (2, 2, 0);
            \draw[] (2, 2, 0) -- (2, 2, 2);
            \draw[] (2, 2, 2) -- (0, 2, 2);
            \draw[] (0, 2, 2) -- (0, 2, 0);

            \draw[] (0, 0, 0) -- (0, 2, 0);
            \draw[dashed] (2, 0, 0) -- (2, 2, 0);
            \draw[] (2, 0, 2) -- (2, 2, 2);
            \draw[] (0, 0, 2) -- (0, 2, 2);

            % Polytope Annotations

            \node[codeword] (c000) at (0, 0, 0) {};% {$\left( 0, 0, 0 \right) $};
            \node[codeword] (c101) at (2, 0, 2) {};% {$\left( 1, 0, 1 \right) $};
            \node[codeword] (c110) at (2, 2, 0) {};% {$\left( 1, 1, 0 \right) $};
            \node[codeword] (c011) at (0, 2, 2) {};% {$\left( 0, 1, 1 \right) $};

            \node[color=KITblue, right=0cm of c000] {$\left( 0, 0, 0 \right) $};
            \node[color=KITblue, above=0cm of c101] {$\left( 1, 0, 1 \right) $};
            \node[color=KITblue, left=0cm of c110] {$\left( 1, 1, 0 \right) $};
            \node[color=KITblue, left=-0.1cm of c011] {$\left( 0, 1, 1 \right) $};

            % x

            \node[color=KITgreen, fill=KITgreen,
                  draw, circle, inner sep=0pt, minimum size=4pt] (f) at (0.9, 0.7, 1) {};
            \node[color=KITgreen, right=0cm of f] {$\boldsymbol{x}$};
        \end{tikzpicture}

        \caption{Spacial representation of a single parity-check code}
        \label{fig:dec:spacial}
    \end{subfigure}%

    \caption{Different representations of the decoding problem}
\end{figure}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{LP Decoding using ADMM}%
\label{sec:dec:LP Decoding using ADMM}

\Ac{LP} decoding is a subject area introduced by Feldman et al.
\todo{Space before citation?}
\cite{feldman_paper}. They reframe the decoding problem as an
\textit{integer linear program} and subsequently present two relaxations into
\textit{linear programs}, one representing a formulation of exact \ac{LP}
decoding and an approximation with a more manageable representation.
To solve the resulting linear program, various optimization methods can be
used.
The one examined in this work is \ac{ADMM}.
\todo{Why chose ADMM?}

Feldman at al. begin by looking at the \ac{ML} decoding problem%
\footnote{They assume that all codewords are equally likely to be transmitted,
making the \ac{ML} and \ac{MAP} decoding problems essentially equivalent.}%
%
\begin{align*}
    \hat{\boldsymbol{c}} = \argmax_{\boldsymbol{c} \in \mathcal{C}}
        f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) 
.\end{align*}
%
They suggest that maximizing the likelihood
$f_{\boldsymbol{Y} \mid \boldsymbol{C}}\left( \boldsymbol{y} \mid \boldsymbol{c} \right)$
is equivalent to minimizing the negative log-likelihood.

\ldots (Explain arriving at the cost function from the ML decoding problem)

Based on this, they propose their cost function%
\footnote{In this context, \textit{cost function} and \textit{objective function}
have the same meaning.}
for the \ac{LP} decoding problem:%
%
\begin{align*}
    \sum_{i=1}^{n} \gamma_i c_i,
        \hspace{5mm} \gamma_i = \ln\left(
            \frac{f_{\boldsymbol{Y} | \boldsymbol{C}}
                \left( Y_i = y_i  \mid C_i = 0 \right) }
            {f_{\boldsymbol{Y} | \boldsymbol{C}}
                \left( Y_i = y_i | C_i = 1 \right) } \right)
.\end{align*}
%
%
With this cost function, the exact integer linear program formulation of \ac{ML}
decoding is the following:%
%
\begin{align*}
    \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
    \text{subject to }\hspace{2mm} &\boldsymbol{c} \in \mathcal{C}
.\end{align*}%
%

As solving integer linear programs is generally NP-hard, this decoding problem
has to be approximated by one with looser constraints.
A technique called \textit{relaxation} is applied,
essentially modifying the constraints in order to broaden the considered
domain (e.g. by lifting the integer requirement).
First, the authors present an equivalent \ac{LP} formulation of exact \ac{ML}
decoding, redefining the constraints in terms of the \text{codeword polytope}
%
\begin{align*}
    \text{poly}\left( \mathcal{C} \right) = \left\{
        \sum_{c \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
            \text{ : } \lambda_{\boldsymbol{c}} \ge 0,
        \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\} 
,\end{align*} %
%
which represents the \textit{convex hull} of all possible codewords,
i.e. the set of convex linear combinations of all codewords
(visualized in figure \ref{fig:dec:poly}).
However, since the number of constraints needed to characterize this codeword
polytope is exponential in the code length, this formulation is relaxed futher.


%
% Codeword polytope visualization figure
%

\begin{figure}[H]
    \centering

    %
    % Left side - codeword polytope
    %

    \begin{subfigure}[c]{0.45\textwidth}
        \centering
   
        \begin{subfigure}{\textwidth}
            \centering
        
            \begin{align*}
                \boldsymbol{H} &=
                    \begin{bmatrix}
                        1 & 1 & 1\\
                        0 & 1 & 1
                    \end{bmatrix}\\[1em]
                \mathcal{C} &= \left\{ 
                    \begin{bmatrix}
                        0\\
                        0\\
                        0
                    \end{bmatrix},
                    \begin{bmatrix}
                        0\\
                        1\\
                        1
                    \end{bmatrix}
                \right\}
            \end{align*}

            \caption{Definition of the visualized code}
            \label{fig:}
        \end{subfigure} \\[7em]
        \begin{subfigure}{\textwidth}
            \centering

            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};
                
                % Polytope Edges

                \draw[line width=1pt, color=KITblue] (c000) -- (c011);
                
                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
            \end{tikzpicture}
        
            \caption{Codeword polytope}
            \label{fig:}
        \end{subfigure}
    \end{subfigure} \hfill%
%    
    %
    % Right side - relaxed polytope
    %
%
    \begin{subfigure}[c]{0.45\textwidth}
        \centering
    
        \begin{subfigure}{\textwidth}
            \centering
        
            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c101) at (p101) {};
                \node[codeword] (c110) at (p110) {};
                \node[codeword] (c011) at (p011) {};

                % Polytope Edges

                \draw[line width=1pt, color=KITblue] (c000) -- (c101);
                \draw[line width=1pt, color=KITblue] (c000) -- (c110);
                \draw[line width=1pt, color=KITblue] (c000) -- (c011);

                \draw[line width=1pt, color=KITblue] (c101) -- (c110);
                \draw[line width=1pt, color=KITblue] (c101) -- (c011);

                \draw[line width=1pt, color=KITblue] (c011) -- (c110);

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000]    {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, right=0.17cm of c101] {$\left( 1, 0, 1 \right) $};
                \node[color=KITblue, right=0cm of c110]    {$\left( 1, 1, 0 \right) $};
                \node[color=KITblue, above=0cm of c011]    {$\left( 0, 1, 1 \right) $};
            \end{tikzpicture}
        
            \caption{Local codeword polytope of parity-check
                $\begin{bmatrix} 1 & 1 & 1 \end{bmatrix}$}
            \label{fig:}
        \end{subfigure} \\[1em]
        \begin{subfigure}{\textwidth}
            \centering
        
            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};
                \node[codeword] (c100) at (p100) {};
                \node[codeword] (c111) at (p111) {};

                % Polytope Edges

                \draw[line width=1pt, color=KITblue] (c000) -- (c011);
                \draw[line width=1pt, color=KITblue] (c000) -- (c100);
                \draw[line width=1pt, color=KITblue] (c100) -- (c111);
                \draw[line width=1pt, color=KITblue] (c111) -- (c011);

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
                \node[color=KITblue, below=0cm of c100] {$\left( 1, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c111] {$\left( 1, 1, 1 \right) $};
            \end{tikzpicture}
        
            \caption{Local codeword polytope of parity-check
                $\begin{bmatrix} 0 & 1 & 1 \end{bmatrix}$}
        \end{subfigure}\\[1em]
        \begin{subfigure}{\textwidth}
            \centering
        
            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]
            \tikzstyle{pseudocodeword} = [color=KITred, fill=KITred,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};
                \node[pseudocodeword] (cpseudo) at (2, 1, 1) {};
 
                % Polytope Edges
                
                \draw[line width=1pt, color=KITblue] (c000) -- (c011);
                \draw[line width=1pt, color=KITred] (cpseudo) -- (c000);
                \draw[line width=1pt, color=KITred] (cpseudo) -- (c011);

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000]   {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
                \node[color=KITred, right=0.03cm of cpseudo]
                    {$\left( 1, \frac{1}{2}, \frac{1}{2} \right) $};
            \end{tikzpicture}
        
            \caption{Relaxed codeword polytope}
            \label{fig:}
        \end{subfigure}
    \end{subfigure}
    
    \caption{Visualization of the codeword polytope and the relaxed codeword
    polytope for an example code}
    \label{fig:dec:poly}
\end{figure}

\begin{itemize}
    \item Equivalent \ac{ML} optimization problem
    \item \Ac{LP} relaxation
    \item \Ac{ADMM} as a solver
\end{itemize}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Proximal Decoding}%
\label{sec:dec:Proximal Decoding}

Proximal decoding was proposed by Wadayama et. al \cite{proximal_paper}.
With this decoding algorithm, the objective function is minimized using
the proximal gradient method.
In contrast to \ac{LP} decoding, the objective function is based on a
non-convex optimization formulation of the \ac{MAP} decoding problem.

In order to derive the objective function, the authors reformulate the
\ac{MAP} decoding problem:%
%
\begin{align}
    \hat{\boldsymbol{x}} = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
        f_{\boldsymbol{X} \mid \boldsymbol{Y}}
            \left( \boldsymbol{x} \mid \boldsymbol{y} \right)
    = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} \mid \boldsymbol{X}}
        \left( \boldsymbol{y} \mid \boldsymbol{x} \right)
        f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)%
    \label{eq:prox:vanilla_MAP}
\end{align}%
%
The likelihood is usually a known function determined by the channel model.
In order to rewrite the prior \ac{PDF}
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
the so-called \textit{code-constraint polynomial} is introduced:%
%
\begin{align}
    h\left( \boldsymbol{x} \right) = \sum_{j=1}^{n} \left( x_j^2-1 \right) ^2
        + \sum_{i=1}^{m} \left[
            \left( \prod_{j\in \mathcal{A}\left( i \right) } x_j \right) -1 \right] ^2%
    \label{eq:prox:ccp}
\end{align}%
%
The intention of this function is to provide a way to penalize vectors far
from a codeword and favor those close to a codeword.
In order to achieve this, the polynomial is composed of two parts: one term
representing the bibolar constraint, providing for a discrete solution of the
continuous optimization problem, and one term representing the parity
constraint, accomodating the role of the parity-check matrix $\boldsymbol{H}$.
%
The equal probability assumption is made on $\mathcal{C}\left( \boldsymbol{H} \right) $.
The prior \ac{PDF} is then approximated using the code-constraint polynomial:%
%
\begin{align}
    f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
        \frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right)  \right| }
            \sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) }
                \delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
    \approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }%
    \label{eq:prox:prior_pdf_approx}
\end{align}%
%
The authors justify this approximation by arguing that for
$\gamma \rightarrow \infty$, the right-hand side aproaches the left-hand
side. In equation \ref{eq:prox:vanilla_MAP}, the prior \ac{PDF}
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) $ can then be subsituted
for equation \ref{eq:prox:prior_pdf_approx} and the likelihood can be rewritten using
the negative log-likelihood
$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
        f_{\boldsymbol{X} \mid \boldsymbol{Y}}\left(
            \boldsymbol{x} \mid \boldsymbol{y} \right) \right) $:%
%
\begin{align}
    \hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
        e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
        e^{-\gamma h\left( \boldsymbol{x} \right) } \nonumber \\
    &= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
        L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
        + \gamma h\left( \boldsymbol{x} \right) 
        \right)%
    \label{eq:prox:approx_map_problem}
.\end{align}%
%
Thus, with proximal decoding, the objective function
$f\left( \boldsymbol{x} \right)$ to be minimized is%
%
\begin{align}
    f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
        + \gamma h\left( \boldsymbol{x} \right)%
    \label{eq:prox:objective_function}
.\end{align}

For the solution of the approximalte \ac{MAP} decoding problem, the two parts
of equation \ref{eq:prox:approx_map_problem} are considered separately:
the minimization of the objective function occurs in an alternating
manner, switching between the minimization of the negative log-likelihood
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
code-constaint polynomial $\gamma h\left( \boldsymbol{x} \right) $.
Two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$ are introduced,
describing the result of each of the two steps.
The first step, minimizing the log-likelihood using gradient descent, yields%
%
\begin{align*}
    \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla
        L\left( \boldsymbol{y} \mid \boldsymbol{s} \right),
    \hspace{5mm}\omega > 0
.\end{align*}%
%
For the second step, minimizig the scaled code-constraint polynomial using
the proximal gradient method, the proximal operator of
$\gamma h\left( \boldsymbol{x} \right) $ has to be computed and is
immediately approximalted by a gradient-descent step:%
%
\begin{align*}
    \text{prox}_{\gamma h} \left( \boldsymbol{x} \right) &\equiv
        \argmin_{\boldsymbol{t} \in \mathbb{R}^n}
            \left( \gamma h\left( \boldsymbol{x} \right) +
                \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
    &\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
    \hspace{5mm} \gamma \text{ small}
.\end{align*}%
%
The second step thus becomes \todo{Write the formulation optimization problem properly
    (as shown in the introductory section)}%
%
\begin{align*}
    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right),
    \hspace{5mm}\gamma > 0,\text{ small}
.\end{align*}
%
While the approximation of the prior \ac{PDF} made in \ref{eq:prox:prior_pdf_approx}
theoretically becomes better
with larger $\gamma$, the constraint that $\gamma$ be small is important,
as it keeps the effect of $h\left( \boldsymbol{x} \right) $ on the landscape
of the objective function small.
Otherwise, unwanted stationary points, including local minima are introduced.
The authors say that in practice, the value of $\gamma$ should be adjusted
according to the decoding performance.
The iterative decoding process \todo{projection with $\eta$} resulting from this considreation is shown in
figure \ref{fig:prox:alg}.

\begin{figure}[H]
    \centering

    \begin{genericAlgorithm}[caption={}, label={}]
$\boldsymbol{s} \leftarrow \boldsymbol{0}$
for $K$ iterations do
    $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla L \left( \boldsymbol{y} \mid \boldsymbol{s} \right) $
    $\boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) $
    $\boldsymbol{\hat{x}} \leftarrow \text{sign}\left( \boldsymbol{s} \right) $
    if $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ do
        return $\boldsymbol{\hat{c}}$
    end if
end for
return $\boldsymbol{\hat{c}}$
    \end{genericAlgorithm}


    \caption{Proximal decoding algorithm}
    \label{fig:prox:alg}
\end{figure}

The components of the gradient of the code-constraint polynomial can be computed as follows:%
%
\begin{align*}
    \frac{\partial}{\partial x_k} h\left( \boldsymbol{x} \right) =
        4\left( x_k^2 - 1 \right) x_k + \frac{2}{x_k}
            \sum_{i\in \mathcal{B}\left( k \right) } \left(
                \left( \prod_{j\in\mathcal{A}\left( i \right)} x_j\right)^2
                - \prod_{j\in\mathcal{A}\left( i \right) }x_j \right)
.\end{align*}%
\todo{Only multiplication?}%
\todo{$x_k$: $k$ or some other indexing variable?}%
%
In the case of \ac{AWGN}, the likelihood
$f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)$
is%
%
\begin{align*}
    f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
    = \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
.\end{align*}
%
Thus, the gradient of the negative log-likelihood becomes%
\footnote{For the minimization, constants can be disregarded. For this reason,
it suffices to consider only the proportionality instead of the equality}%
%
\begin{align*}
    \nabla L \left( \boldsymbol{y} \mid \boldsymbol{x} \right)
    &\propto -\nabla \lVert \boldsymbol{y} - \boldsymbol{x} \rVert^2\\
    &\propto \boldsymbol{x} - \boldsymbol{y}
.\end{align*}%
%
The resulting iterative decoding process under the assumption of \ac{AWGN} is
described by%
%
\begin{align*}
    \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega\left( \boldsymbol{s}-\boldsymbol{y} \right)\\
    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) 
.\end{align*}