ba-thesis/latex/thesis/chapters/decoding_techniques.tex

\chapter{Decoding Techniques}%
\label{chapter:decoding_techniques}

In this chapter, the decoding techniques examined in this work are detailed.
First, an overview of the general methodology of using optimization methods
for channel decoding is given.
Then, the field of \ac{LP} decoding and an \ac{ADMM}-based \ac{LP} decoding
algorithm are introduced.
Finally, the \textit{proximal decoding} algorithm is presented.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Decoding using Optimization Methods}%
\label{sec:dec:Decoding using Optimization Methods}

%
% General methodology
%

The general idea behind using optimization methods for channel decoding
is to reformulate the decoding problem as an optimization problem.
This new formulation can then be solved with one of the many
available optimization algorithms.

Generally, the original decoding problem considered is either the \ac{MAP} or
the \ac{ML} decoding problem:%
%
\begin{align}
    \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
    p_{\boldsymbol{C} \mid \boldsymbol{Y}} \left(\boldsymbol{c} \mid \boldsymbol{y}
        \right) \label{eq:dec:map}\\
    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
    f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c}
        \right) \label{eq:dec:ml}
.\end{align}%
%
The goal is to arrive at a formulation, where a certain objective function
$g : \mathbb{R}^n \rightarrow \mathbb{R} $ must be minimized under certain constraints:%
%
\begin{align*}
    \text{minimize}\hspace{2mm}   &g\left( \tilde{\boldsymbol{c}} \right)\\
    \text{subject to}\hspace{2mm} &\tilde{\boldsymbol{c}} \in D
,\end{align*}%
%
where $D \subseteq \mathbb{R}^n$ is the domain of values attainable for $\tilde{\boldsymbol{c}}$
and represents the constraints.

In contrast to the established message-passing decoding algorithms,
the prespective then changes from observing the decoding process in its
Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner})
to a spatial representation (figure \ref{fig:dec:spatial}),
where the codewords are some of the edges of a hypercube.
The goal is to find the point $\tilde{\boldsymbol{c}}$,
which minimizes the objective function $g$.

%
% Figure showing decoding space
%

\begin{figure}[H]
    \centering

    \begin{subfigure}[c]{0.47\textwidth}
        \centering

        \tikzstyle{checknode} = [color=KITblue, fill=KITblue,
                                draw, regular polygon,regular polygon sides=4,
                                inner sep=0pt, minimum size=12pt]
        \tikzstyle{variablenode} = [color=KITgreen, fill=KITgreen,
                                draw, circle, inner sep=0pt, minimum size=10pt]

        \begin{tikzpicture}[scale=1, transform shape]
            \node[checknode,
                  label={[below, label distance=-0.4cm, align=center]
                  CN\\$\left( c_1 + c_2 + c_3 = 0 \right) $}]
                (cn) at (0, 0) {};
            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_1 \right)$}]
                (c1) at (-2, 2) {};
            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_2 \right)$}]
                (c2) at (0, 2) {};
            \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_3 \right)$}]
                (c3) at (2, 2) {};

            \draw (cn) -- (c1);
            \draw (cn) -- (c2);
            \draw (cn) -- (c3);
        \end{tikzpicture}

        \caption{Tanner graph representation of a single parity-check code}
        \label{fig:dec:tanner}
    \end{subfigure}%
    \hfill%
    \begin{subfigure}[c]{0.47\textwidth}
        \centering

        \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

        \tdplotsetmaincoords{60}{25}
        \begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
            % Cube

            \coordinate (p000) at (0, 0, 0);
            \coordinate (p001) at (0, 0, 2);
            \coordinate (p010) at (0, 2, 0);
            \coordinate (p011) at (0, 2, 2);
            \coordinate (p100) at (2, 0, 0);
            \coordinate (p101) at (2, 0, 2);
            \coordinate (p110) at (2, 2, 0);
            \coordinate (p111) at (2, 2, 2);

            \draw[] (p000) -- (p100);
            \draw[] (p100) -- (p101);
            \draw[] (p101) -- (p001);
            \draw[] (p001) -- (p000);

            \draw[dashed] (p010) -- (p110);
            \draw[]       (p110) -- (p111);
            \draw[]       (p111) -- (p011);
            \draw[dashed] (p011) -- (p010);

            \draw[dashed] (p000) -- (p010);
            \draw[]       (p100) -- (p110);
            \draw[]       (p101) -- (p111);
            \draw[]       (p001) -- (p011);

            % Polytope Vertices

            \node[codeword] (c000) at (p000) {};
            \node[codeword] (c101) at (p101) {};
            \node[codeword] (c110) at (p110) {};
            \node[codeword] (c011) at (p011) {};

            % Polytope Edges

%            \draw[line width=1pt, color=KITblue] (c000) -- (c101);
%            \draw[line width=1pt, color=KITblue] (c000) -- (c110);
%            \draw[line width=1pt, color=KITblue] (c000) -- (c011);
%
%            \draw[line width=1pt, color=KITblue] (c101) -- (c110);
%            \draw[line width=1pt, color=KITblue] (c101) -- (c011);
%
%            \draw[line width=1pt, color=KITblue] (c011) -- (c110);

            % Polytope Annotations

            \node[color=KITblue, below=0cm of c000]    {$\left( 0, 0, 0 \right) $};
            \node[color=KITblue, right=0.17cm of c101] {$\left( 1, 0, 1 \right) $};
            \node[color=KITblue, right=0cm of c110]    {$\left( 1, 1, 0 \right) $};
            \node[color=KITblue, above=0cm of c011]    {$\left( 0, 1, 1 \right) $};

            % c

            \node[color=KITgreen, fill=KITgreen,
                  draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
            \node[color=KITgreen, right=0cm of c] {$\tilde{\boldsymbol{c}}$};
        \end{tikzpicture}

        \caption{Spatial representation of a single parity-check code}
        \label{fig:dec:spatial}
    \end{subfigure}%

    \caption{Different representations of the decoding problem}
\end{figure}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{LP Decoding}%
\label{sec:dec:LP Decoding}

\Ac{LP} decoding is a subject area introduced by Feldman et al.
\cite{feldman_paper}. They reframe the decoding problem as an
\textit{integer linear program} and subsequently present two relaxations into
\textit{linear programs}, one representing a formulation of exact \ac{LP}
decoding and one, which is an approximation with a more manageable
representation.
To solve the resulting linear program, various optimization methods can be
used.
\todo{Citation needed}

They begin by looking at the \ac{ML} decoding problem%
\footnote{They assume that all codewords are equally likely to be transmitted,
making the \ac{ML} and \ac{MAP} decoding problems equivalent.}%
%
\begin{align}
    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmax_{\boldsymbol{c} \in \mathcal{C}}
        f_{\boldsymbol{Y} \mid \boldsymbol{C}}
            \left( \boldsymbol{y} \mid \boldsymbol{c} \right)%
    \label{eq:lp:ml}
.\end{align}%
%
Assuming a memoryless channel, equation (\ref{eq:lp:ml}) can be rewritten in terms
of the \acp{LLR} $\gamma_i$ \cite[Sec. 2.5]{feldman_thesis}:%
%
\begin{align*}
    \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
        \sum_{i=1}^{n} \gamma_i c_i,%
    \hspace{5mm} \gamma_i = \ln\left(
        \frac{f_{Y_i | C_i} \left( y_i  \mid c_i = 0 \right) }
        {f_{Y_i | C_i} \left( y_i \mid c_i = 1 \right) } \right)
.\end{align*}
%
The authors propose the following cost function%
\footnote{In this context, \textit{cost function} and \textit{objective function}
have the same meaning.}
for the \ac{LP} decoding problem:%
%
\begin{align*}
    g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i
        = \boldsymbol{\gamma}^\text{T}\boldsymbol{c}
.\end{align*}
%
With this cost function, the exact integer linear program formulation of \ac{ML}
decoding becomes the following:%
%
\begin{align*}
    \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{c} \\
    \text{subject to }\hspace{2mm} &\boldsymbol{c} \in \mathcal{C}
.\end{align*}%
%
%\todo{$\boldsymbol{c}$ or some other variable name? e.g. $\boldsymbol{c}^{*}$.
%Especially for the continuous variable in LP decoding}

As solving integer linear programs is generally NP-hard, this decoding problem
has to be approximated by a problem with looser constraints.
A technique called \textit{relaxation} is applied:
relaxing the constraints, thereby broadening the considered domain
(e.g. by lifting the integer requirement).
First, the authors present an equivalent \ac{LP} formulation of exact \ac{ML}
decoding, redefining the constraints in terms of the \text{codeword polytope}
%
\begin{align*}
    \text{poly}\left( \mathcal{C} \right) = \left\{
        \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} \boldsymbol{c}
            \text{ : } \alpha_{\boldsymbol{c}} \ge 0,
        \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} = 1 \right\}
,\end{align*} %
%
which represents the \textit{convex hull} of all possible codewords,
i.e., the convex set of linear combinations of all codewords.
This corresponds to simply lifting the integer requirement.
However, since the number of constraints needed to characterize the codeword
polytope is exponential in the code length, this formulation is relaxed further.
By observing that each check node defines its own local single parity-check
code, and thus its own \textit{local codeword polytope},
the \textit{relaxed codeword polytope} $\overline{Q}$ is defined as the intersection of all
local codeword polytopes.
This consideration leads to constraints, that can be described as follows
\cite[Sec. II, A]{efficient_lp_dec_admm}:%
%
\begin{align*}
    \boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
    \hspace{5mm}\forall j\in \mathcal{J}
,\end{align*}%
%
where $\mathcal{P}_{d_j}$ is the \textit{check polytope}, the convex hull of all
binary vectors of length $d_j$ with even parity%
\footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
parity-check $j$, but extended to the continuous domain.}%
and $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
neighboring variable nodes
of check node $j$ (i.e., the relevant components of $\boldsymbol{c}$ for parity-check $j$).
For example, if the $j$th row of the parity-check matrix
$\boldsymbol{H}$ was $\boldsymbol{h}_j =
\begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
the transfer matrix would be \cite[Sec. II, A]{efficient_lp_dec_admm}
%
\begin{align*}
\boldsymbol{T}_j =
\begin{bmatrix}
    0 & 1 & 0 & 0 & 0 & 0 & 0 \\
    0 & 0 & 0 & 1 & 0 & 0 & 0 \\
    0 & 0 & 0 & 0 & 0 & 1 & 0 \\
\end{bmatrix}
.\end{align*}%
%

In figure \ref{fig:dec:poly}, the two relaxations are compared for an
examplary code, which is described by the generator and parity-check matrices%
%
\begin{align}
\boldsymbol{G} =
    \begin{bmatrix}
        0 & 1 & 1
    \end{bmatrix} \label{eq:lp:example_code_def_gen} \\[1em]
\boldsymbol{H} =
    \begin{bmatrix}
        1 & 1 & 1\\
        0 & 1 & 1
    \end{bmatrix} \label{eq:lp:example_code_def_par}
\end{align}%
%
and has only two possible codewords:
%
\begin{align*}
\mathcal{C} = \left\{ \begin{bmatrix} 0 & 0 & 0 \end{bmatrix},
    \begin{bmatrix} 0 & 1 & 1 \end{bmatrix}   \right\}
.\end{align*}
%
Figure \ref{fig:dec:poly:exact_ilp} shows the domain of exact \ac{ML} decoding.
The first relaxation, onto the codeword polytope $\text{poly}\left( \mathcal{C} \right) $,
is shown in figure \ref{fig:dec:poly:exact};
this expresses the constraints for the equivalent linear program to exact \ac{ML} decoding.
$\text{poly}\left( \mathcal{C} \right) $ is further relaxed onto the relaxed codeword polytope
$\overline{Q}$, shown in figure \ref{fig:dec:poly:relaxed}.
Figure \ref{fig:dec:poly:local} shows how $\overline{Q}$ is formed by intersecting the
local codeword polytopes of each check node.
%
%
%
% Codeword polytope visualization figure
%
%
\begin{figure}[H]
    \centering

    %
    % Left side - codeword polytope
    %

    \begin{subfigure}[b]{0.35\textwidth}
        \centering

        \begin{subfigure}{\textwidth}
            \centering

            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=0.9, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
            \end{tikzpicture}

            \caption{Set of all codewords $\mathcal{C}$}
            \label{fig:dec:poly:exact_ilp}
        \end{subfigure}\\[1em]
        \begin{subfigure}{\textwidth}
            \centering

            \begin{tikzpicture}
                \node (relaxation) at (0, 0) {Relaxation};

                \draw (0, 0.61) -- (relaxation);
                \draw[->] (relaxation) -- (0, -0.7);
            \end{tikzpicture}

            \vspace{4mm}

            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=0.9, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};

                % Polytope Edges

                \draw[line width=1pt, color=KITblue] (c000) -- (c011);

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
            \end{tikzpicture}

            \caption{Codeword polytope $\text{poly}\left( \mathcal{C} \right) $}
            \label{fig:dec:poly:exact}
        \end{subfigure}
    \end{subfigure} \hfill%
%
    %
    % Right side - relaxed polytope
    %
%
    \begin{subfigure}[b]{0.55\textwidth}
        \centering

        \begin{subfigure}{\textwidth}
            \centering

            \begin{minipage}{0.5\textwidth}
                \centering

                \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                    draw, circle, inner sep=0pt, minimum size=4pt]

                \tdplotsetmaincoords{60}{25}
                \begin{tikzpicture}[scale=0.9, transform shape, tdplot_main_coords]
                    % Cube

                    \coordinate (p000) at (0, 0, 0);
                    \coordinate (p001) at (0, 0, 2);
                    \coordinate (p010) at (0, 2, 0);
                    \coordinate (p011) at (0, 2, 2);
                    \coordinate (p100) at (2, 0, 0);
                    \coordinate (p101) at (2, 0, 2);
                    \coordinate (p110) at (2, 2, 0);
                    \coordinate (p111) at (2, 2, 2);

                    \draw[] (p000) -- (p100);
                    \draw[] (p100) -- (p101);
                    \draw[] (p101) -- (p001);
                    \draw[] (p001) -- (p000);

                    \draw[dashed] (p010) -- (p110);
                    \draw[]       (p110) -- (p111);
                    \draw[]       (p111) -- (p011);
                    \draw[dashed] (p011) -- (p010);

                    \draw[dashed] (p000) -- (p010);
                    \draw[]       (p100) -- (p110);
                    \draw[]       (p101) -- (p111);
                    \draw[]       (p001) -- (p011);

                    % Polytope Vertices

                    \node[codeword] (c000) at (p000) {};
                    \node[codeword] (c101) at (p101) {};
                    \node[codeword] (c110) at (p110) {};
                    \node[codeword] (c011) at (p011) {};

                    % Polytope Edges & Faces

                    \draw[line width=1pt, color=KITblue] (c000) -- (c101);
                    \draw[line width=1pt, color=KITblue] (c000) -- (c110);
                    \draw[line width=1pt, color=KITblue] (c000) -- (c011);

                    \draw[line width=1pt, color=KITblue] (c101) -- (c110);
                    \draw[line width=1pt, color=KITblue] (c101) -- (c011);

                    \draw[line width=1pt, color=KITblue] (c011) -- (c110);

                    \fill[KITblue, opacity=0.15] (p000) -- (p101) -- (p011) -- cycle;
                    \fill[KITblue, opacity=0.15] (p000) -- (p110) -- (p101) -- cycle;
                    \fill[KITblue, opacity=0.15] (p110) -- (p011) -- (p101) -- cycle;

                    % Polytope Annotations

                    \node[color=KITblue, below=0cm of c000]    {$\left( 0, 0, 0 \right) $};
                    \node[color=KITblue, right=0.17cm of c101] {$\left( 1, 0, 1 \right) $};
                    \node[color=KITblue, right=0cm of c110]    {$\left( 1, 1, 0 \right) $};
                    \node[color=KITblue, above=0cm of c011]    {$\left( 0, 1, 1 \right) $};
                \end{tikzpicture}
            \end{minipage}%
            \begin{minipage}{0.5\textwidth}
                \centering

                \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                    draw, circle, inner sep=0pt, minimum size=4pt]

                \tdplotsetmaincoords{60}{25}
                \begin{tikzpicture}[scale=0.9, transform shape, tdplot_main_coords]
                    % Cube

                    \coordinate (p000) at (0, 0, 0);
                    \coordinate (p001) at (0, 0, 2);
                    \coordinate (p010) at (0, 2, 0);
                    \coordinate (p011) at (0, 2, 2);
                    \coordinate (p100) at (2, 0, 0);
                    \coordinate (p101) at (2, 0, 2);
                    \coordinate (p110) at (2, 2, 0);
                    \coordinate (p111) at (2, 2, 2);

                    \draw[] (p000) -- (p100);
                    \draw[] (p100) -- (p101);
                    \draw[] (p101) -- (p001);
                    \draw[] (p001) -- (p000);

                    \draw[dashed] (p010) -- (p110);
                    \draw[]       (p110) -- (p111);
                    \draw[]       (p111) -- (p011);
                    \draw[dashed] (p011) -- (p010);

                    \draw[dashed] (p000) -- (p010);
                    \draw[]       (p100) -- (p110);
                    \draw[]       (p101) -- (p111);
                    \draw[]       (p001) -- (p011);

                    % Polytope Vertices

                    \node[codeword] (c000) at (p000) {};
                    \node[codeword] (c011) at (p011) {};
                    \node[codeword] (c100) at (p100) {};
                    \node[codeword] (c111) at (p111) {};

                    % Polytope Edges & Faces

                    \draw[line width=1pt, color=KITblue] (c000) -- (c011);
                    \draw[line width=1pt, color=KITblue] (c000) -- (c100);
                    \draw[line width=1pt, color=KITblue] (c100) -- (c111);
                    \draw[line width=1pt, color=KITblue] (c111) -- (c011);

                    \fill[KITblue, opacity=0.2] (p000) -- (p100) -- (p111) -- (p011) -- cycle;

                    % Polytope Annotations

                    \node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
                    \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
                    \node[color=KITblue, below=0cm of c100] {$\left( 1, 0, 0 \right) $};
                    \node[color=KITblue, above=0cm of c111] {$\left( 1, 1, 1 \right) $};
                \end{tikzpicture}
            \end{minipage}

            \begin{tikzpicture}
                \node[color=KITblue, align=center] at (-2,0)
                    {$j=1$\\ $\left( c_1 + c_2+ c_3 = 0 \right) $};
                \node[color=KITblue, align=center] at (2,0)
                    {$j=2$\\ $\left(c_2 + c_3 = 0\right)$};
            \end{tikzpicture}

            \caption{Local codeword polytopes of the check nodes}
            \label{fig:dec:poly:local}
        \end{subfigure}\\[1em]
        \begin{subfigure}{\textwidth}
            \centering

            \begin{tikzpicture}
                \draw[densely dashed] (-2, 0) -- (2, 0);
                \draw[densely dashed] (-2, 0.5) -- (-2, 0);
                \draw[densely dashed] (2, 0.5) -- (2, 0);

                \node (intersection) at (0, -0.5) {Intersection};

                \draw[densely dashed] (0, 0) -- (intersection);
                \draw[densely dashed, ->] (intersection) -- (0, -1);
            \end{tikzpicture}

            \vspace{2mm}

            \tikzstyle{codeword} = [color=KITblue, fill=KITblue,
                                draw, circle, inner sep=0pt, minimum size=4pt]
            \tikzstyle{pseudocodeword} = [color=KITred, fill=KITred,
                                draw, circle, inner sep=0pt, minimum size=4pt]

            \tdplotsetmaincoords{60}{25}
            \begin{tikzpicture}[scale=0.9, transform shape, tdplot_main_coords]
                % Cube

                \coordinate (p000) at (0, 0, 0);
                \coordinate (p001) at (0, 0, 2);
                \coordinate (p010) at (0, 2, 0);
                \coordinate (p011) at (0, 2, 2);
                \coordinate (p100) at (2, 0, 0);
                \coordinate (p101) at (2, 0, 2);
                \coordinate (p110) at (2, 2, 0);
                \coordinate (p111) at (2, 2, 2);

                \draw[] (p000) -- (p100);
                \draw[] (p100) -- (p101);
                \draw[] (p101) -- (p001);
                \draw[] (p001) -- (p000);

                \draw[dashed] (p010) -- (p110);
                \draw[]       (p110) -- (p111);
                \draw[]       (p111) -- (p011);
                \draw[dashed] (p011) -- (p010);

                \draw[dashed] (p000) -- (p010);
                \draw[]       (p100) -- (p110);
                \draw[]       (p101) -- (p111);
                \draw[]       (p001) -- (p011);

                % Polytope Vertices

                \node[codeword] (c000) at (p000) {};
                \node[codeword] (c011) at (p011) {};
                \node[pseudocodeword] (cpseudo) at (2, 1, 1) {};

                % Polytope Edges & Faces

                \draw[line width=1pt, color=KITblue] (c000) -- (c011);
                \draw[line width=1pt, color=KITred] (cpseudo) -- (c000);
                \draw[line width=1pt, color=KITred] (cpseudo) -- (c011);

                \fill[KITred, opacity=0.2] (p000) -- (p011) -- (2,1,1) -- cycle;

                % Polytope Annotations

                \node[color=KITblue, below=0cm of c000]   {$\left( 0, 0, 0 \right) $};
                \node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
                \node[color=KITred, right=0.03cm of cpseudo]
                    {$\left( 1, \frac{1}{2}, \frac{1}{2} \right) $};
            \end{tikzpicture}

            \caption{Relaxed codeword polytope $\overline{Q}$}
            \label{fig:dec:poly:relaxed}
        \end{subfigure}
    \end{subfigure}

    \vspace*{-2.5cm}
    \hspace*{-0.1\textwidth}
    \begin{tikzpicture}
        \draw[->] (0,0) -- (2.5, 0);
        \node[above] at (1.25, 0) {Relaxation};

        % Dummy node to make tikzpicture slightly larger
        \node[below] at (1.25, 0) {};
    \end{tikzpicture}
    \vspace{2.5cm}

    \caption{Visualization of the codeword polytope and the relaxed codeword
        polytope of the code described by equations (\ref{eq:lp:example_code_def_gen})
        and (\ref{eq:lp:example_code_def_par})}
    \label{fig:dec:poly}
\end{figure}%
%
\noindent It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
vertices with fractional values;
these represent erroneous non-codeword solutions to the linear program and
correspond to the so-called \textit{pseudo-codewords} introduced in
\cite{feldman_paper}.
However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
exponentially, it is a lot more tractable for practical applications.

The resulting formulation of the relaxed optimization problem becomes:%
%
\begin{align}
    \begin{aligned}
        \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} \\
        \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
        \hspace{5mm}\forall j\in\mathcal{J}.
    \end{aligned} \label{eq:lp:relaxed_formulation}
\end{align}%
\todo{Space before $\forall$?}


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{LP Decoding using ADMM}%
\label{sec:dec:LP Decoding using ADMM}

The \ac{LP} decoding formulation in section \ref{sec:dec:Decoding using Optimization Methods}
is a very general one that can be solved with a number of different optimization methods.
In this work \ac{ADMM} is examined, as its distributed nature allows for a very efficient
implementation.
\ac{LP} decoding using \ac{ADMM} can be regarded as a message
passing algorithm with separate variable- and check-node update steps;
the resulting algorithm has a striking similarity to \ac{BP} and its computational
complexity has been demonstrated to compare favorably to \ac{BP} \cite{original_admm},
\cite{efficient_lp_dec_admm}.

The \ac{LP} decoding problem in (\ref{eq:lp:relaxed_formulation}) can be
slightly rewritten using the auxiliary variables
$\boldsymbol{z}_{[1:m]}$:%
%
\begin{align}
    \begin{aligned}
        \begin{array}{r}
            \text{minimize }
        \end{array}\hspace{0.5mm}   & \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} \\
        \begin{array}{r}
            \text{subject to }\\
            \phantom{te}
        \end{array}\hspace{0.5mm}        & \setlength{\arraycolsep}{1.4pt}
                                         \begin{array}{rl}
                                              \boldsymbol{T}_j\tilde{\boldsymbol{c}}
                                                    &= \boldsymbol{z}_j\\
                                              \boldsymbol{z}_j
                                                    &\in \mathcal{P}_{d_j}
                                          \end{array}
                                          \hspace{5mm} \forall j\in\mathcal{J}.
    \end{aligned}
    \label{eq:lp:admm_reformulated}
\end{align}
%
In this form, the problem almost fits the \ac{ADMM} template described in section
\ref{sec:theo:Optimization Methods}, except for the fact that there are multiple equality
constraints $\boldsymbol{T}_j \tilde{\boldsymbol{c}} = \boldsymbol{z}_j$ and the
additional constraints $\boldsymbol{z}_j \in \mathcal{P}_{d_j} \, \forall\, j\in\mathcal{J}$.
\todo{$\forall$ in text?}
The multiple constraints can be addressed by introducing additional terms in the
augmented lagrangian:%
%
\begin{align*}
    \mathcal{L}_{\mu}\left( \tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]},
        \boldsymbol{\lambda}_{[1:m]} \right)
    = \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}}
        + \sum_{j\in\mathcal{J}} \boldsymbol{\lambda}^\text{T}_j
        \left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \right)
        + \frac{\mu}{2}\sum_{j\in\mathcal{J}}
            \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert^2_2
.\end{align*}%
%
The additional constraints remain in the dual optimization problem:%
%
\begin{align*}
    \text{maximize } \min_{\substack{\tilde{\boldsymbol{c}} \\
            \boldsymbol{z}_j \in \mathcal{P}_{d_j}\,\forall\,j\in\mathcal{J}}}
    \mathcal{L}_{\mu}\left( \tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]},
        \boldsymbol{\lambda}_{[1:m]} \right)
.\end{align*}%
%
The steps to solve the dual problem then become:
%
\begin{alignat*}{3}
    \tilde{\boldsymbol{c}} &\leftarrow \argmin_{\tilde{\boldsymbol{c}}} \mathcal{L}_{\mu} \left(
        \tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]}, \boldsymbol{\lambda}_{[1:m]} \right) \\
    \boldsymbol{z}_j &\leftarrow \argmin_{\boldsymbol{z}_j \in \mathcal{P}_{d_j}}
        \mathcal{L}_{\mu} \left(
            \tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]}, \boldsymbol{\lambda}_{[1:m]} \right)
        \hspace{3mm} &&\forall j\in\mathcal{J} \\
    \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j
        + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}}
        - \boldsymbol{z}_j \right)
        \hspace{3mm} &&\forall j\in\mathcal{J}
.\end{alignat*}
%
Luckily, the additional constaints only affect the $\boldsymbol{z}_j$-update steps.
Furthermore, the $\boldsymbol{z}_j$-update steps can be shown to be equivalent to projections
onto the check polytopes $\mathcal{P}_{d_j}$
and the $\tilde{\boldsymbol{c}}$-update can be computed analytically%
%
\footnote{In the $\tilde{c}_i$-update rule, the term
$\left( \boldsymbol{z}_j \right)_i$ is a slight abuse of notation, as
$\boldsymbol{z}_j$ has less components than there are variable-nodes $i$.
What is actually meant is the component of $\boldsymbol{z}_j$ that is associated
with the variable node $i$, i.e., $\left( \boldsymbol{T}_j^\text{T}\boldsymbol{z}_j\right)_i$.
The same is true for $\left( \boldsymbol{\lambda}_j \right)_i$.}
%
\cite[Sec. III. B.]{original_admm}:%
%
\begin{alignat*}{3}
    \tilde{c}_i &\leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left(
        \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{z}_j \right)_i
            - \frac{1}{\mu} \left( \boldsymbol{\lambda}_j \right)_i \Big)
        - \frac{\gamma_i}{\mu} \right)
    \hspace{3mm} && \forall i\in\mathcal{I} \\
    \boldsymbol{z}_j &\leftarrow \Pi_{\mathcal{P}_{d_j}}\left(
        \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \frac{\boldsymbol{\lambda}_j}{\mu} \right)
    \hspace{3mm} && \forall j\in\mathcal{J} \\
    \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j
        + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}}
        - \boldsymbol{z}_j \right)
    \hspace{3mm} && \forall j\in\mathcal{J}
.\end{alignat*}
%
It should be noted that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously,
as they are independent of one another.
The same is true for the updates of the individual components of $\tilde{\boldsymbol{c}}$.
This representation can be slightly simplified by substituting
$\boldsymbol{\lambda}_j = \mu \cdot \boldsymbol{u}_j \,\forall\,j\in\mathcal{J}$:%
%
\begin{alignat*}{3}
    \tilde{c}_i &\leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left(
        \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{z}_j \right)_i
            - \left( \boldsymbol{u}_j \right)_i \Big)
        - \gamma_i \right)
    \hspace{3mm} && \forall i\in\mathcal{I} \\
    \boldsymbol{z}_j &\leftarrow \Pi_{\mathcal{P}_{d_j}}\left(
        \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{u}_j \right)
    \hspace{3mm} && \forall j\in\mathcal{J} \\
    \boldsymbol{u}_j &\leftarrow \boldsymbol{u}_j
        + \boldsymbol{T}_j\tilde{\boldsymbol{c}}
        - \boldsymbol{z}_j
    \hspace{3mm} && \forall j\in\mathcal{J}
.\end{alignat*}
%


The reason \ac{ADMM} is able to perform so well is due to the relocation of the constraints
$\boldsymbol{T}_j\tilde{\boldsymbol{c}}_j\in\mathcal{P}_{d_j}\,\forall\, j\in\mathcal{J}$
into the objective function itself.
The minimization of the new objective function can then take place simultaneously
with respect to all $\boldsymbol{z}_j, j\in\mathcal{J}$.
Effectively, all of the $\left|\mathcal{J}\right|$ parity constraints are
able to be handled at the same time.
This can also be understood by interpreting the decoding process as a message-passing
algorithm \cite[Sec. III. D.]{original_admm}, \cite[Sec. II. B.]{efficient_lp_dec_admm},
as is shown in figure \ref{fig:lp:message_passing}.%
\todo{Explicitly specify sections?}%
%
\begin{figure}[H]
    \centering

    \begin{genericAlgorithm}[caption={}, label={},
        basicstyle=\fontsize{11}{16}\selectfont
        ]
Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]}$ and $\boldsymbol{u}_{[1:m]}$
while $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{pri}}$ or $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{z}^\prime_j - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{dual}}$ do
    for $j$ in $\mathcal{J}$ do
        $\boldsymbol{z}_j \leftarrow \Pi_{\mathcal{P}_{d_j}}\left(
            \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{u}_j \right)$
        $\boldsymbol{u}_j \leftarrow \boldsymbol{u}_j
            + \boldsymbol{T}_j\tilde{\boldsymbol{c}}
            - \boldsymbol{z}_j$
    end for
    for $i$ in $\mathcal{I}$ do
        $\tilde{c}_i \leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left(
            \sum_{j\in N_v\left( i \right) } \Big(
                \left( \boldsymbol{z}_j \right)_i - \left( \boldsymbol{u}_j
                \right)_i
            \Big) - \frac{\gamma_i}{\mu} \right)$
    end for
end while
    \end{genericAlgorithm}

    \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm%
        \protect\footnotemark{}}
    \label{fig:lp:message_passing}
\end{figure}%
%
\footnotetext{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$
are additional parameters
defining the tolerances for the stopping criteria of the algorithm.
The variable $\boldsymbol{z}_j^\prime$ denotes the value of
$\boldsymbol{z}_j$ in the previous iteration.}%
%
\noindent The $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be understood as
a check-node update step (lines $3$-$6$) and the $\tilde{c}_i$-updates can be understood as
a variable-node update step (lines $7$-$9$ in figure \ref{fig:lp:message_passing}).
The updates for each variable- and check-node can be perfomed in parallel.
With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM}
is able to achieve similar computational complexity to \ac{BP}.

The main computational effort in solving the linear program then amounts to
computing the projection operation $\Pi_{\mathcal{P}_{d_j}} \left( \cdot \right) $
onto each check polytope. Various different methods to perform this projection
have been proposed (e.g., in \cite{original_admm}, \cite{efficient_lp_dec_admm},
\cite{lautern}).
The method chosen here is the one presented in \cite{lautern}.


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Proximal Decoding}%
\label{sec:dec:Proximal Decoding}

Proximal decoding was proposed by Wadayama et. al as a novel formulation of
optimization-based decoding \cite{proximal_paper}.
With this algorithm, minimization is performed using the proximal gradient
method.
In contrast to \ac{LP} decoding, the objective function is based on a
non-convex optimization formulation of the \ac{MAP} decoding problem.

In order to derive the objective function, the authors begin with the
\ac{MAP} decoding rule, expressed as a continuous maximization problem%
\footnote{The expansion of the domain to be continuous doesn't constitute a
material difference in the meaning of the rule.
The only change is that what previously were \acp{PMF} now have to be expressed
in terms of \acp{PDF}.}
over $\boldsymbol{x}$
:%
%
\begin{align}
    \hat{\boldsymbol{x}} = \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}}
        f_{\tilde{\boldsymbol{X}} \mid \boldsymbol{Y}}
        \left( \tilde{\boldsymbol{x}} \mid \boldsymbol{y} \right)
        = \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}} f_{\boldsymbol{Y}
            \mid \tilde{\boldsymbol{X}}}
        \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
        f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)%
    \label{eq:prox:vanilla_MAP}
.\end{align}%
%
The likelihood $f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}
\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) $ is a known function
determined by the channel model.
The prior \ac{PDF} $f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)$ is also
known, as the equal probability assumption is made on
$\mathcal{C}$.
However, since the considered domain is continuous,
the prior \ac{PDF} cannot be ignored as a constant during the minimization
as is often done, and has a rather unwieldy representation:%
%
\begin{align}
    f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right) =
        \frac{1}{\left| \mathcal{C} \right| }
            \sum_{\boldsymbol{c} \in \mathcal{C} }
                \delta\big( \tilde{\boldsymbol{x}} - \left( -1 \right) ^{\boldsymbol{c}}\big)
    \label{eq:prox:prior_pdf}
.\end{align}%
%
In order to rewrite the prior \ac{PDF}
$f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)$,
the so-called \textit{code-constraint polynomial} is introduced as:%
%
\begin{align*}
    h\left( \tilde{\boldsymbol{x}} \right) =
        \underbrace{\sum_{i=1}^{n} \left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}}
        + \underbrace{\sum_{j=1}^{m} \left[
            \left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right)
        -1 \right] ^2}_{\text{Parity constraint}}%
.\end{align*}%
%
The intention of this function is to provide a way to penalize vectors far
from a codeword and favor those close to one.
In order to achieve this, the polynomial is composed of two parts: one term
representing the bipolar constraint, providing for a discrete solution of the
continuous optimization problem, and one term representing the parity
constraints, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
The prior \ac{PDF} is then approximated using the code-constraint polynomial as:%
%
\begin{align}
    f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)
    \approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) }%
    \label{eq:prox:prior_pdf_approx}
.\end{align}%
%
The authors justify this approximation by arguing, that for
$\gamma \rightarrow \infty$, the approximation in equation
(\ref{eq:prox:prior_pdf_approx}) approaches the original function in equation
(\ref{eq:prox:prior_pdf}).
This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP})
and the likelihood can be rewritten using the negative log-likelihood
$L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
        f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left(
        \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right) $:%
%
\begin{align*}
    \hat{\boldsymbol{x}} &= \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}}
            \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) }
            \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\
        &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \big(
            L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
            + \gamma h\left( \tilde{\boldsymbol{x}} \right)
            \big)%
.\end{align*}%
%
Thus, with proximal decoding, the objective function
$g\left( \tilde{\boldsymbol{x}} \right)$ considered is%
%
\begin{align}
    g\left( \tilde{\boldsymbol{x}} \right) = L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}}
    \right)
        + \gamma h\left( \tilde{\boldsymbol{x}} \right)%
    \label{eq:prox:objective_function}
\end{align}%
%
and the decoding problem is reformulated to%
%
\begin{align*}
    \text{minimize}\hspace{2mm}   &L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
        + \gamma h\left( \tilde{\boldsymbol{x}} \right)\\
    \text{subject to}\hspace{2mm} &\tilde{\boldsymbol{x}} \in \mathbb{R}^n
.\end{align*}
%

For the solution of the approximate \ac{MAP} decoding problem, the two parts
of equation (\ref{eq:prox:objective_function}) are considered separately:
the minimization of the objective function occurs in an alternating
fashion, switching between the negative log-likelihood
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
code-constraint polynomial $\gamma h\left( \boldsymbol{x} \right) $.
Two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$, are introduced,
describing the result of each of the two steps.
The first step, minimizing the log-likelihood, is performed using gradient
descent:%
%
\begin{align}
    \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla
        L\left( \boldsymbol{y} \mid \boldsymbol{s} \right),
    \hspace{5mm}\omega > 0
    \label{eq:prox:step_log_likelihood}
.\end{align}%
%
For the second step, minimizing the scaled code-constraint polynomial, the
proximal gradient method is used and the \textit{proximal operator} of
$\gamma h\left( \boldsymbol{x} \right) $ has to be computed.
It is then immediately approximated with gradient-descent:%
%
\begin{align*}
    \text{prox}_{\gamma h} \left( \tilde{\boldsymbol{x}} \right) &\equiv
        \argmin_{\boldsymbol{t} \in \mathbb{R}^n}
            \left( \gamma h\left( \boldsymbol{t} \right) +
                \frac{1}{2} \lVert \boldsymbol{t} - \tilde{\boldsymbol{x}} \rVert \right)\\
        &\approx \tilde{\boldsymbol{x}} - \gamma \nabla h \left( \tilde{\boldsymbol{x}} \right),
    \hspace{5mm} \gamma > 0, \text{ small}
.\end{align*}%
%
The second step thus becomes%
%
\begin{align*}
    \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right),
    \hspace{5mm}\gamma > 0,\text{ small}
.\end{align*}
%
While the approximation of the prior \ac{PDF} made in equation (\ref{eq:prox:prior_pdf_approx})
theoretically becomes better
with larger $\gamma$, the constraint that $\gamma$ be small is important,
as it keeps the effect of $h\left( \boldsymbol{x} \right) $ on the landscape
of the objective function small.
Otherwise, unwanted stationary points, including local minima, are introduced.
The authors say that ``in practice, the value of $\gamma$ should be adjusted
according to the decoding performance.'' \cite[Sec. 3.1]{proximal_paper}.

%The components of the gradient of the code-constraint polynomial can be computed as follows:%
%%
%\begin{align*}
%    \frac{\partial}{\partial x_k} h\left( \boldsymbol{x} \right) =
%        4\left( x_k^2 - 1 \right) x_k + \frac{2}{x_k}
%            \sum_{i\in \mathcal{B}\left( k \right) } \left(
%                \left( \prod_{j\in\mathcal{A}\left( i \right)} x_j\right)^2
%                - \prod_{j\in\mathcal{A}\left( i \right) }x_j \right)
%.\end{align*}%
%\todo{Only multiplication?}%
%\todo{$x_k$: $k$ or some other indexing variable?}%
%%
In the case of \ac{AWGN}, the likelihood
$f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}
    \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)$
is%
%
\begin{align*}
    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}
        \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
        = \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{
            -\frac{\lVert \boldsymbol{y}-\tilde{\boldsymbol{x}}
        \rVert^2 }
    {2\sigma^2}}
.\end{align*}
%
Thus, the gradient of the negative log-likelihood becomes%
\footnote{For the minimization, constants can be disregarded. For this reason,
it suffices to consider only proportionality instead of equality.}%
%
\begin{align*}
    \nabla L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
    &\propto -\nabla \lVert \boldsymbol{y} - \tilde{\boldsymbol{x}} \rVert^2\\
    &\propto \tilde{\boldsymbol{x}} - \boldsymbol{y}
,\end{align*}%
%
allowing equation \ref{eq:prox:step_log_likelihood} to be rewritten as%
%
\begin{align*}
    \boldsymbol{r} \leftarrow \boldsymbol{s}
        - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right)
.\end{align*}
%

One thing to consider during the actual decoding process, is that the gradient
of the code-constraint polynomial can take on extremely large values.
To avoid numerical instability, an additional step is added, where all
components of the current estimate are clipped to $\left[-\eta, \eta \right]$,
where $\eta$ is a positive constant slightly larger than one:%
%
\begin{align*}
    \boldsymbol{s} \leftarrow \Pi_{\eta} \left( \boldsymbol{r}
        - \gamma \nabla h\left( \boldsymbol{r} \right)  \right)
,\end{align*}
%
$\Pi_{\eta}\left( \cdot \right) $ expressing the projection onto
$\left[ -\eta, \eta \right]^n$.

The iterative decoding process resulting from these considerations is shown in
figure \ref{fig:prox:alg}.

\begin{figure}[H]
    \centering

    \begin{genericAlgorithm}[caption={}, label={}]
$\boldsymbol{s} \leftarrow \boldsymbol{0}$
for $K$ iterations do
    $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
    $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
    $\boldsymbol{\hat{x}} \leftarrow \text{sign}\left( \boldsymbol{s} \right) $
    if $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ do
        return $\boldsymbol{\hat{c}}$
    end if
end for
return $\boldsymbol{\hat{c}}$
    \end{genericAlgorithm}


    \caption{Proximal decoding algorithm for an \ac{AWGN} channel}
    \label{fig:prox:alg}
\end{figure}