829 lines
32 KiB
TeX
829 lines
32 KiB
TeX
\chapter{Decoding Techniques}%
|
|
\label{chapter:decoding_techniques}
|
|
|
|
In this chapter, the decoding techniques examined in this work are detailed.
|
|
First, an overview of the general methodology of using optimization methods
|
|
for channel decoding is given. Afterwards, the specific decoding techniques
|
|
themselves are explained.
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{Decoding using Optimization Methods}%
|
|
\label{sec:dec:Decoding using Optimization Methods}
|
|
|
|
%
|
|
% General methodology
|
|
%
|
|
|
|
The general idea behind using optimization methods for channel decoding
|
|
is to reformulate the decoding problem as an optimization problem.
|
|
This new formulation can then be solved with one of the many
|
|
available optimization algorithms.
|
|
|
|
Generally, the original decoding problem considered is either the \ac{MAP} or
|
|
the \ac{ML} decoding problem:%
|
|
%
|
|
\begin{align*}
|
|
\hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
|
|
f_{\boldsymbol{C} \mid \boldsymbol{Y}} \left( \boldsymbol{c} \mid \boldsymbol{y} \right)\\
|
|
\hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
|
|
f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right)
|
|
.\end{align*}%
|
|
%
|
|
The goal is to arrive at a formulation, where a certain objective function
|
|
$f$ must be minimized under certain constraints:%
|
|
%
|
|
\begin{align*}
|
|
\text{minimize}\hspace{2mm} &f\left( \boldsymbol{c} \right)\\
|
|
\text{subject to}\hspace{2mm} &\boldsymbol{c} \in D
|
|
,\end{align*}%
|
|
%
|
|
where $D$ is the domain of values attainable for $c$ and represents the
|
|
constraints.
|
|
|
|
In contrast to the established message-passing decoding algorithms,
|
|
the viewpoint then changes from observing the decoding process in its
|
|
tanner graph representation (as shown in figure \ref{fig:dec:tanner})
|
|
to a spatial representation (figure \ref{fig:dec:spatial}),
|
|
where the codewords are some of the edges of a hypercube.
|
|
The goal is to find that point $\boldsymbol{c}$,
|
|
which minimizes the objective function $f$.
|
|
|
|
%
|
|
% Figure showing decoding space
|
|
%
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\begin{subfigure}[c]{0.47\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{checknode} = [color=KITblue, fill=KITblue,
|
|
draw, regular polygon,regular polygon sides=4,
|
|
inner sep=0pt, minimum size=12pt]
|
|
\tikzstyle{variablenode} = [color=KITgreen, fill=KITgreen,
|
|
draw, circle, inner sep=0pt, minimum size=10pt]
|
|
|
|
\begin{tikzpicture}[scale=1, transform shape]
|
|
\node[checknode,
|
|
label={[below, label distance=-0.4cm, align=center]
|
|
$c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
|
|
(c) at (0, 0) {};
|
|
\node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
|
|
\node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
|
|
\node[variablenode, label={$x_3$}] (x3) at (2, 2) {};
|
|
|
|
\draw (c) -- (x1);
|
|
\draw (c) -- (x2);
|
|
\draw (c) -- (x3);
|
|
\end{tikzpicture}
|
|
|
|
\caption{Tanner graph representation of a single parity-check code}
|
|
\label{fig:dec:tanner}
|
|
\end{subfigure}%
|
|
\hfill%
|
|
\begin{subfigure}[c]{0.47\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{codeword} = [color=KITblue, fill=KITblue,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
|
|
\tdplotsetmaincoords{60}{25}
|
|
\begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
|
|
% Cube
|
|
|
|
\coordinate (p000) at (0, 0, 0);
|
|
\coordinate (p001) at (0, 0, 2);
|
|
\coordinate (p010) at (0, 2, 0);
|
|
\coordinate (p011) at (0, 2, 2);
|
|
\coordinate (p100) at (2, 0, 0);
|
|
\coordinate (p101) at (2, 0, 2);
|
|
\coordinate (p110) at (2, 2, 0);
|
|
\coordinate (p111) at (2, 2, 2);
|
|
|
|
\draw[] (p000) -- (p100);
|
|
\draw[] (p100) -- (p101);
|
|
\draw[] (p101) -- (p001);
|
|
\draw[] (p001) -- (p000);
|
|
|
|
\draw[dashed] (p010) -- (p110);
|
|
\draw[] (p110) -- (p111);
|
|
\draw[] (p111) -- (p011);
|
|
\draw[dashed] (p011) -- (p010);
|
|
|
|
\draw[dashed] (p000) -- (p010);
|
|
\draw[] (p100) -- (p110);
|
|
\draw[] (p101) -- (p111);
|
|
\draw[] (p001) -- (p011);
|
|
|
|
% Polytope Vertices
|
|
|
|
\node[codeword] (c000) at (p000) {};
|
|
\node[codeword] (c101) at (p101) {};
|
|
\node[codeword] (c110) at (p110) {};
|
|
\node[codeword] (c011) at (p011) {};
|
|
|
|
% Polytope Edges
|
|
|
|
% \draw[line width=1pt, color=KITblue] (c000) -- (c101);
|
|
% \draw[line width=1pt, color=KITblue] (c000) -- (c110);
|
|
% \draw[line width=1pt, color=KITblue] (c000) -- (c011);
|
|
%
|
|
% \draw[line width=1pt, color=KITblue] (c101) -- (c110);
|
|
% \draw[line width=1pt, color=KITblue] (c101) -- (c011);
|
|
%
|
|
% \draw[line width=1pt, color=KITblue] (c011) -- (c110);
|
|
|
|
% Polytope Annotations
|
|
|
|
\node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
|
|
\node[color=KITblue, right=0.17cm of c101] {$\left( 1, 0, 1 \right) $};
|
|
\node[color=KITblue, right=0cm of c110] {$\left( 1, 1, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
|
|
|
|
% c
|
|
|
|
\node[color=KITgreen, fill=KITgreen,
|
|
draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
|
|
\node[color=KITgreen, right=0cm of c] {$\boldsymbol{c}$};
|
|
\end{tikzpicture}
|
|
|
|
\caption{Spatial representation of a single parity-check code}
|
|
\label{fig:dec:spatial}
|
|
\end{subfigure}%
|
|
|
|
\caption{Different representations of the decoding problem}
|
|
\end{figure}
|
|
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{LP Decoding}%
|
|
\label{sec:dec:LP Decoding}
|
|
|
|
\Ac{LP} decoding is a subject area introduced by Feldman et al.
|
|
\cite{feldman_paper}. They reframe the decoding problem as an
|
|
\textit{integer linear program} and subsequently present two relaxations into
|
|
\textit{linear programs}, one representing a formulation of exact \ac{LP}
|
|
decoding and one, which is an approximation with a more manageable
|
|
representation.
|
|
To solve the resulting linear program, various optimization methods can be
|
|
used.
|
|
|
|
Feldman et al. begin by looking at the \ac{ML} decoding problem%
|
|
\footnote{They assume that all codewords are equally likely to be transmitted,
|
|
making the \ac{ML} and \ac{MAP} decoding problems equivalent.}%
|
|
%
|
|
\begin{align}
|
|
\hat{\boldsymbol{c}} = \argmax_{\boldsymbol{c} \in \mathcal{C}}
|
|
f_{\boldsymbol{Y} \mid \boldsymbol{C}}
|
|
\left( \boldsymbol{y} \mid \boldsymbol{c} \right)%
|
|
\label{eq:lp:ml}
|
|
.\end{align}%
|
|
%
|
|
Assuming a memoryless channel, \ref{eq:lp:ml} can be rewritten in terms
|
|
of the \acp{LLR} $\gamma_i$ \cite[Sec 2.5]{feldman_thesis}:%
|
|
%
|
|
\begin{align*}
|
|
\hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
|
|
\sum_{i=1}^{n} \gamma_i y_i,%
|
|
\hspace{5mm} \gamma_i = \ln\left(
|
|
\frac{f_{\boldsymbol{Y} | \boldsymbol{C}}
|
|
\left( Y_i = y_i \mid C_i = 0 \right) }
|
|
{f_{\boldsymbol{Y} | \boldsymbol{C}}
|
|
\left( Y_i = y_i | C_i = 1 \right) } \right)
|
|
.\end{align*}
|
|
%
|
|
The authors propose the following cost function%
|
|
\footnote{In this context, \textit{cost function} and \textit{objective function}
|
|
have the same meaning.}
|
|
for the \ac{LP} decoding problem:%
|
|
%
|
|
\begin{align*}
|
|
\sum_{i=1}^{n} \gamma_i c_i
|
|
.\end{align*}
|
|
%
|
|
With this cost function, the exact integer linear program formulation of \ac{ML}
|
|
decoding is the following:%
|
|
%
|
|
\begin{align*}
|
|
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
|
|
\text{subject to }\hspace{2mm} &\boldsymbol{c} \in \mathcal{C}
|
|
.\end{align*}%
|
|
%
|
|
\todo{$\boldsymbol{c}$ or some other variable name? e.g. $\boldsymbol{c}^{*}$.
|
|
Especially for the continuous consideration in LP decoding}
|
|
|
|
As solving integer linear programs is generally NP-hard, this decoding problem
|
|
has to be approximated by one with looser constraints.
|
|
A technique called \textit{relaxation} is applied:
|
|
modifying the constraints in order to broaden the considered
|
|
domain (e.g. by lifting the integer requirement).
|
|
First, the authors present an equivalent \ac{LP} formulation of exact \ac{ML}
|
|
decoding, redefining the constraints in terms of the \text{codeword polytope}
|
|
%
|
|
\begin{align*}
|
|
\text{poly}\left( \mathcal{C} \right) = \left\{
|
|
\sum_{c \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
|
|
\text{ : } \lambda_{\boldsymbol{c}} \ge 0,
|
|
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\}
|
|
,\end{align*} %
|
|
%
|
|
which represents the \textit{convex hull} of all possible codewords,
|
|
i.e. the convex set of linear combinations of all codewords.
|
|
However, since the number of constraints needed to characterize the codeword
|
|
polytope is exponential in the code length, this formulation is relaxed further.
|
|
By observing that each check node defines its own local single parity-check
|
|
code, and thus its own \textit{local codeword polytope},
|
|
the \textit{relaxed codeword polytope} $\overline{Q}$ is defined as the intersection of all
|
|
local codeword polytopes.
|
|
This consideration leads to constraints, that can be described as follows
|
|
\cite[Sec. II, A]{efficient_lp_dec_admm}:%
|
|
%
|
|
\begin{align*}
|
|
\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j}
|
|
\hspace{5mm}\forall j\in \mathcal{J}
|
|
,\end{align*}%
|
|
where $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
|
|
neighboring variable nodes
|
|
of check node $j$%
|
|
\footnote{For example, if the $j$th row of the parity-check matrix
|
|
$\boldsymbol{H}$ was $\boldsymbol{h}_j =
|
|
\begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
|
|
the transfer matrix would be $\boldsymbol{T}_j =
|
|
\begin{bmatrix}
|
|
0 & 1 & 0 & 0 & 0 & 0 & 0 \\
|
|
0 & 0 & 0 & 1 & 0 & 0 & 0 \\
|
|
0 & 0 & 0 & 0 & 0 & 1 & 0 \\
|
|
\end{bmatrix} $ (example taken from \cite[Sec. II, A]{efficient_lp_dec_admm})}
|
|
(i.e. the relevant components of $\boldsymbol{c}$ for parity-check $j$)
|
|
and $\mathcal{P}_{d}$ is the \textit{check polytope}, the convex hull of all
|
|
binary vectors of length $d$ with even parity%
|
|
\footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
|
|
parity-check $j$, but extended to continuous domain.}%
|
|
.
|
|
|
|
In figure \ref{fig:dec:poly}, the two relaxations are compared for an
|
|
example code.
|
|
Figure \ref{fig:dec:poly:exact} shows the codeword polytope
|
|
$\text{poly}\left( \mathcal{C} \right) $, i.e. the constraints for the
|
|
equivalent linear program to exact \ac{ML} decoding - only valid codewords are
|
|
feasible solutions.
|
|
Figures \ref{fig:dec:poly:local1} and \ref{fig:dec:poly:local2} show the local
|
|
codeword polytopes of each check node.
|
|
Their intersection, the relaxed codeword polytope $\overline{Q}$, is shown in
|
|
figure \ref{fig:dec:poly:relaxed}.
|
|
It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
|
|
vertices with fractional values;
|
|
these represent erroneous non-codeword solutions to the linear program and
|
|
correspond to the so-called \textit{pseudocodewords} introduced in
|
|
\cite{feldman_paper}.
|
|
However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
|
|
exponentially, it is a lot more tractable for practical applications.
|
|
|
|
The resulting formulation of the relaxed optimization problem is the following:%
|
|
%
|
|
\begin{align*}
|
|
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
|
|
\text{subject to }\hspace{2mm} &\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j},
|
|
\hspace{5mm}j\in\mathcal{J}
|
|
.\end{align*}%
|
|
%
|
|
%
|
|
%
|
|
% Codeword polytope visualization figure
|
|
%
|
|
%
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
%
|
|
% Left side - codeword polytope
|
|
%
|
|
|
|
\begin{subfigure}[c]{0.45\textwidth}
|
|
\centering
|
|
|
|
\begin{subfigure}{\textwidth}
|
|
\centering
|
|
|
|
\begin{align*}
|
|
\boldsymbol{H} &=
|
|
\begin{bmatrix}
|
|
1 & 1 & 1\\
|
|
0 & 1 & 1
|
|
\end{bmatrix}\\[1em]
|
|
\mathcal{C} &= \left\{
|
|
\begin{bmatrix}
|
|
0\\
|
|
0\\
|
|
0
|
|
\end{bmatrix},
|
|
\begin{bmatrix}
|
|
0\\
|
|
1\\
|
|
1
|
|
\end{bmatrix}
|
|
\right\}
|
|
\end{align*}
|
|
|
|
\caption{Definition of the visualized code}
|
|
\label{fig:dec:poly:code_def}
|
|
\end{subfigure} \\[7em]
|
|
\begin{subfigure}{\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{codeword} = [color=KITblue, fill=KITblue,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
|
|
\tdplotsetmaincoords{60}{25}
|
|
\begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
|
|
% Cube
|
|
|
|
\coordinate (p000) at (0, 0, 0);
|
|
\coordinate (p001) at (0, 0, 2);
|
|
\coordinate (p010) at (0, 2, 0);
|
|
\coordinate (p011) at (0, 2, 2);
|
|
\coordinate (p100) at (2, 0, 0);
|
|
\coordinate (p101) at (2, 0, 2);
|
|
\coordinate (p110) at (2, 2, 0);
|
|
\coordinate (p111) at (2, 2, 2);
|
|
|
|
\draw[] (p000) -- (p100);
|
|
\draw[] (p100) -- (p101);
|
|
\draw[] (p101) -- (p001);
|
|
\draw[] (p001) -- (p000);
|
|
|
|
\draw[dashed] (p010) -- (p110);
|
|
\draw[] (p110) -- (p111);
|
|
\draw[] (p111) -- (p011);
|
|
\draw[dashed] (p011) -- (p010);
|
|
|
|
\draw[dashed] (p000) -- (p010);
|
|
\draw[] (p100) -- (p110);
|
|
\draw[] (p101) -- (p111);
|
|
\draw[] (p001) -- (p011);
|
|
|
|
% Polytope Vertices
|
|
|
|
\node[codeword] (c000) at (p000) {};
|
|
\node[codeword] (c011) at (p011) {};
|
|
|
|
% Polytope Edges
|
|
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c011);
|
|
|
|
% Polytope Annotations
|
|
|
|
\node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
|
|
\end{tikzpicture}
|
|
|
|
\caption{Codeword polytope $\text{poly}\left( \mathcal{C} \right) $}
|
|
\label{fig:dec:poly:exact}
|
|
\end{subfigure}
|
|
\end{subfigure} \hfill%
|
|
%
|
|
%
|
|
% Right side - relaxed polytope
|
|
%
|
|
%
|
|
\begin{subfigure}[c]{0.45\textwidth}
|
|
\centering
|
|
|
|
\begin{subfigure}{\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{codeword} = [color=KITblue, fill=KITblue,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
|
|
\tdplotsetmaincoords{60}{25}
|
|
\begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
|
|
% Cube
|
|
|
|
\coordinate (p000) at (0, 0, 0);
|
|
\coordinate (p001) at (0, 0, 2);
|
|
\coordinate (p010) at (0, 2, 0);
|
|
\coordinate (p011) at (0, 2, 2);
|
|
\coordinate (p100) at (2, 0, 0);
|
|
\coordinate (p101) at (2, 0, 2);
|
|
\coordinate (p110) at (2, 2, 0);
|
|
\coordinate (p111) at (2, 2, 2);
|
|
|
|
\draw[] (p000) -- (p100);
|
|
\draw[] (p100) -- (p101);
|
|
\draw[] (p101) -- (p001);
|
|
\draw[] (p001) -- (p000);
|
|
|
|
\draw[dashed] (p010) -- (p110);
|
|
\draw[] (p110) -- (p111);
|
|
\draw[] (p111) -- (p011);
|
|
\draw[dashed] (p011) -- (p010);
|
|
|
|
\draw[dashed] (p000) -- (p010);
|
|
\draw[] (p100) -- (p110);
|
|
\draw[] (p101) -- (p111);
|
|
\draw[] (p001) -- (p011);
|
|
|
|
% Polytope Vertices
|
|
|
|
\node[codeword] (c000) at (p000) {};
|
|
\node[codeword] (c101) at (p101) {};
|
|
\node[codeword] (c110) at (p110) {};
|
|
\node[codeword] (c011) at (p011) {};
|
|
|
|
% Polytope Edges
|
|
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c101);
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c110);
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c011);
|
|
|
|
\draw[line width=1pt, color=KITblue] (c101) -- (c110);
|
|
\draw[line width=1pt, color=KITblue] (c101) -- (c011);
|
|
|
|
\draw[line width=1pt, color=KITblue] (c011) -- (c110);
|
|
|
|
% Polytope Annotations
|
|
|
|
\node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
|
|
\node[color=KITblue, right=0.17cm of c101] {$\left( 1, 0, 1 \right) $};
|
|
\node[color=KITblue, right=0cm of c110] {$\left( 1, 1, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
|
|
\end{tikzpicture}
|
|
|
|
\caption{Local codeword polytope of check node\\ $j=1$
|
|
$\left( c_1 + c_2 + c_3 = 0 \right)$}
|
|
\label{fig:dec:poly:local1}
|
|
\end{subfigure} \\[1em]
|
|
\begin{subfigure}{\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{codeword} = [color=KITblue, fill=KITblue,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
|
|
\tdplotsetmaincoords{60}{25}
|
|
\begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
|
|
% Cube
|
|
|
|
\coordinate (p000) at (0, 0, 0);
|
|
\coordinate (p001) at (0, 0, 2);
|
|
\coordinate (p010) at (0, 2, 0);
|
|
\coordinate (p011) at (0, 2, 2);
|
|
\coordinate (p100) at (2, 0, 0);
|
|
\coordinate (p101) at (2, 0, 2);
|
|
\coordinate (p110) at (2, 2, 0);
|
|
\coordinate (p111) at (2, 2, 2);
|
|
|
|
\draw[] (p000) -- (p100);
|
|
\draw[] (p100) -- (p101);
|
|
\draw[] (p101) -- (p001);
|
|
\draw[] (p001) -- (p000);
|
|
|
|
\draw[dashed] (p010) -- (p110);
|
|
\draw[] (p110) -- (p111);
|
|
\draw[] (p111) -- (p011);
|
|
\draw[dashed] (p011) -- (p010);
|
|
|
|
\draw[dashed] (p000) -- (p010);
|
|
\draw[] (p100) -- (p110);
|
|
\draw[] (p101) -- (p111);
|
|
\draw[] (p001) -- (p011);
|
|
|
|
% Polytope Vertices
|
|
|
|
\node[codeword] (c000) at (p000) {};
|
|
\node[codeword] (c011) at (p011) {};
|
|
\node[codeword] (c100) at (p100) {};
|
|
\node[codeword] (c111) at (p111) {};
|
|
|
|
% Polytope Edges
|
|
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c011);
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c100);
|
|
\draw[line width=1pt, color=KITblue] (c100) -- (c111);
|
|
\draw[line width=1pt, color=KITblue] (c111) -- (c011);
|
|
|
|
% Polytope Annotations
|
|
|
|
\node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
|
|
\node[color=KITblue, below=0cm of c100] {$\left( 1, 0, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c111] {$\left( 1, 1, 1 \right) $};
|
|
\end{tikzpicture}
|
|
|
|
\caption{Local codeword polytope of check node\\ $j=2$
|
|
$\left( c_2 + c_3 = 0\right)$}
|
|
\label{fig:dec:poly:local2}
|
|
\end{subfigure}\\[1em]
|
|
\begin{subfigure}{\textwidth}
|
|
\centering
|
|
|
|
\tikzstyle{codeword} = [color=KITblue, fill=KITblue,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
\tikzstyle{pseudocodeword} = [color=KITred, fill=KITred,
|
|
draw, circle, inner sep=0pt, minimum size=4pt]
|
|
|
|
\tdplotsetmaincoords{60}{25}
|
|
\begin{tikzpicture}[scale=1, transform shape, tdplot_main_coords]
|
|
% Cube
|
|
|
|
\coordinate (p000) at (0, 0, 0);
|
|
\coordinate (p001) at (0, 0, 2);
|
|
\coordinate (p010) at (0, 2, 0);
|
|
\coordinate (p011) at (0, 2, 2);
|
|
\coordinate (p100) at (2, 0, 0);
|
|
\coordinate (p101) at (2, 0, 2);
|
|
\coordinate (p110) at (2, 2, 0);
|
|
\coordinate (p111) at (2, 2, 2);
|
|
|
|
\draw[] (p000) -- (p100);
|
|
\draw[] (p100) -- (p101);
|
|
\draw[] (p101) -- (p001);
|
|
\draw[] (p001) -- (p000);
|
|
|
|
\draw[dashed] (p010) -- (p110);
|
|
\draw[] (p110) -- (p111);
|
|
\draw[] (p111) -- (p011);
|
|
\draw[dashed] (p011) -- (p010);
|
|
|
|
\draw[dashed] (p000) -- (p010);
|
|
\draw[] (p100) -- (p110);
|
|
\draw[] (p101) -- (p111);
|
|
\draw[] (p001) -- (p011);
|
|
|
|
% Polytope Vertices
|
|
|
|
\node[codeword] (c000) at (p000) {};
|
|
\node[codeword] (c011) at (p011) {};
|
|
\node[pseudocodeword] (cpseudo) at (2, 1, 1) {};
|
|
|
|
% Polytope Edges
|
|
|
|
\draw[line width=1pt, color=KITblue] (c000) -- (c011);
|
|
\draw[line width=1pt, color=KITred] (cpseudo) -- (c000);
|
|
\draw[line width=1pt, color=KITred] (cpseudo) -- (c011);
|
|
|
|
% Polytope Annotations
|
|
|
|
\node[color=KITblue, below=0cm of c000] {$\left( 0, 0, 0 \right) $};
|
|
\node[color=KITblue, above=0cm of c011] {$\left( 0, 1, 1 \right) $};
|
|
\node[color=KITred, right=0.03cm of cpseudo]
|
|
{$\left( 1, \frac{1}{2}, \frac{1}{2} \right) $};
|
|
\end{tikzpicture}
|
|
|
|
\caption{Relaxed codeword polytope $\overline{Q}$}
|
|
\label{fig:dec:poly:relaxed}
|
|
\end{subfigure}
|
|
\end{subfigure}
|
|
|
|
\caption{Visualization of the codeword polytope and the relaxed codeword
|
|
polytope}
|
|
\label{fig:dec:poly}
|
|
\end{figure}%
|
|
%
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{LP Decoding using ADMM}%
|
|
\label{sec:dec:LP Decoding using ADMM}
|
|
|
|
\begin{itemize}
|
|
\item Why ADMM?
|
|
\item Adaptive linear programming?
|
|
\item How ADMM is adapted to LP decoding
|
|
\end{itemize}
|
|
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
\section{Proximal Decoding}%
|
|
\label{sec:dec:Proximal Decoding}
|
|
|
|
Proximal decoding was proposed by Wadayama et. al as a novel formulation of
|
|
optimization-based decoding \cite{proximal_paper}.
|
|
With this algorithm, minimization is performed using the proximal gradient
|
|
method.
|
|
In contrast to \ac{LP} decoding, the objective function is based on a
|
|
non-convex optimization formulation of the \ac{MAP} decoding problem.
|
|
|
|
In order to derive the objective function, the authors begin with the
|
|
\ac{MAP} decoding rule, expressed as a continuous minimization problem over
|
|
$\boldsymbol{x}$:%
|
|
%
|
|
\begin{align}
|
|
\hat{\boldsymbol{x}} = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
|
|
f_{\boldsymbol{X} \mid \boldsymbol{Y}}
|
|
\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
|
|
= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} \mid \boldsymbol{X}}
|
|
\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
|
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)%
|
|
\label{eq:prox:vanilla_MAP}
|
|
.\end{align}%
|
|
%
|
|
The likelihood $f_{\boldsymbol{Y} \mid \boldsymbol{X}}
|
|
\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ is a known function
|
|
determined by the channel model.
|
|
The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also
|
|
known as the equal probability assumption is made on
|
|
$\mathcal{C}\left( \boldsymbol{H} \right)$.
|
|
However, because in this case the considered domain is continuous,
|
|
the prior \ac{PDF} cannot be ignored as a constant during the minimization
|
|
as is often done, and has a rather unwieldy representation:%
|
|
%
|
|
\begin{align}
|
|
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
|
|
\frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right) \right| }
|
|
\sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) }
|
|
\delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
|
|
\label{eq:prox:prior_pdf}
|
|
\end{align}%
|
|
%
|
|
In order to rewrite the prior \ac{PDF}
|
|
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
|
|
the so-called \textit{code-constraint polynomial} is introduced:%
|
|
%
|
|
\begin{align*}
|
|
h\left( \boldsymbol{x} \right) =
|
|
\underbrace{\sum_{j=1}^{n} \left( x_j^2-1 \right) ^2}_{\text{Bipolar constraint}}
|
|
+ \underbrace{\sum_{i=1}^{m} \left[
|
|
\left( \prod_{j\in \mathcal{A}
|
|
\left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity Constraint}}%
|
|
.\end{align*}%
|
|
%
|
|
The intention of this function is to provide a way to penalize vectors far
|
|
from a codeword and favor those close to one.
|
|
In order to achieve this, the polynomial is composed of two parts: one term
|
|
representing the bipolar constraint, providing for a discrete solution of the
|
|
continuous optimization problem, and one term representing the parity
|
|
constraint, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
|
|
The prior \ac{PDF} is then approximated using the code-constraint polynomial:%
|
|
%
|
|
\begin{align}
|
|
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)
|
|
\approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }%
|
|
\label{eq:prox:prior_pdf_approx}
|
|
.\end{align}%
|
|
%
|
|
The authors justify this approximation by arguing that for
|
|
$\gamma \rightarrow \infty$, the approximation in equation
|
|
\ref{eq:prox:prior_pdf_approx} approaches the original function in equation
|
|
\ref{eq:prox:prior_pdf}.
|
|
This approximation can then be plugged into equation \ref{eq:prox:vanilla_MAP}
|
|
and the likelihood can be rewritten using the negative log-likelihood
|
|
$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
|
|
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left(
|
|
\boldsymbol{y} \mid \boldsymbol{x} \right) \right) $:%
|
|
%
|
|
\begin{align*}
|
|
\hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
|
|
e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
|
|
e^{-\gamma h\left( \boldsymbol{x} \right) } \\
|
|
&= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
|
|
L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
|
+ \gamma h\left( \boldsymbol{x} \right)
|
|
\right)%
|
|
.\end{align*}%
|
|
%
|
|
Thus, with proximal decoding, the objective function
|
|
$f\left( \boldsymbol{x} \right)$ considered is%
|
|
%
|
|
\begin{align}
|
|
f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
|
|
+ \gamma h\left( \boldsymbol{x} \right)%
|
|
\label{eq:prox:objective_function}
|
|
\end{align}%
|
|
%
|
|
and the decoding problem is reformulated to%
|
|
%
|
|
\begin{align*}
|
|
\text{minimize}\hspace{2mm} &L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
|
|
+ \gamma h\left( \boldsymbol{x} \right)\\
|
|
\text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n
|
|
.\end{align*}
|
|
%
|
|
|
|
For the solution of the approximate \ac{MAP} decoding problem, the two parts
|
|
of \ref{eq:prox:objective_function} are considered separately:
|
|
the minimization of the objective function occurs in an alternating
|
|
fashion, switching between the negative log-likelihood
|
|
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
|
|
code-constraint polynomial $\gamma h\left( \boldsymbol{x} \right) $.
|
|
Two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$, are introduced,
|
|
describing the result of each of the two steps.
|
|
The first step, minimizing the log-likelihood, is performed using gradient
|
|
descent:%
|
|
%
|
|
\begin{align}
|
|
\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla
|
|
L\left( \boldsymbol{y} \mid \boldsymbol{s} \right),
|
|
\hspace{5mm}\omega > 0
|
|
\label{eq:prox:step_log_likelihood}
|
|
.\end{align}%
|
|
%
|
|
For the second step, minimizing the scaled code-constraint polynomial, the
|
|
proximal gradient method is used and the \textit{proximal operator} of
|
|
$\gamma h\left( \boldsymbol{x} \right) $ has to be computed.
|
|
It is then immediately approximated with gradient-descent:%
|
|
%
|
|
\begin{align*}
|
|
\text{prox}_{\gamma h} \left( \boldsymbol{x} \right) &\equiv
|
|
\argmin_{\boldsymbol{t} \in \mathbb{R}^n}
|
|
\left( \gamma h\left( \boldsymbol{x} \right) +
|
|
\frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
|
|
&\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
|
|
\hspace{5mm} \gamma \text{ small}
|
|
.\end{align*}%
|
|
%
|
|
The second step thus becomes%
|
|
%
|
|
\begin{align*}
|
|
\boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right),
|
|
\hspace{5mm}\gamma > 0,\text{ small}
|
|
.\end{align*}
|
|
%
|
|
While the approximation of the prior \ac{PDF} made in \ref{eq:prox:prior_pdf_approx}
|
|
theoretically becomes better
|
|
with larger $\gamma$, the constraint that $\gamma$ be small is important,
|
|
as it keeps the effect of $h\left( \boldsymbol{x} \right) $ on the landscape
|
|
of the objective function small.
|
|
Otherwise, unwanted stationary points, including local minima, are introduced.
|
|
The authors say that in practice, the value of $\gamma$ should be adjusted
|
|
according to the decoding performance.
|
|
|
|
%The components of the gradient of the code-constraint polynomial can be computed as follows:%
|
|
%%
|
|
%\begin{align*}
|
|
% \frac{\partial}{\partial x_k} h\left( \boldsymbol{x} \right) =
|
|
% 4\left( x_k^2 - 1 \right) x_k + \frac{2}{x_k}
|
|
% \sum_{i\in \mathcal{B}\left( k \right) } \left(
|
|
% \left( \prod_{j\in\mathcal{A}\left( i \right)} x_j\right)^2
|
|
% - \prod_{j\in\mathcal{A}\left( i \right) }x_j \right)
|
|
%.\end{align*}%
|
|
%\todo{Only multiplication?}%
|
|
%\todo{$x_k$: $k$ or some other indexing variable?}%
|
|
%%
|
|
In the case of \ac{AWGN}, the likelihood
|
|
$f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)$
|
|
is%
|
|
%
|
|
\begin{align*}
|
|
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
|
= \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
|
|
.\end{align*}
|
|
%
|
|
Thus, the gradient of the negative log-likelihood becomes%
|
|
\footnote{For the minimization, constants can be disregarded. For this reason,
|
|
it suffices to consider only proportionality instead of equality.}%
|
|
%
|
|
\begin{align*}
|
|
\nabla L \left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
|
&\propto -\nabla \lVert \boldsymbol{y} - \boldsymbol{x} \rVert^2\\
|
|
&\propto \boldsymbol{x} - \boldsymbol{y}
|
|
,\end{align*}%
|
|
%
|
|
allowing equation \ref{eq:prox:step_log_likelihood} to be rewritten as%
|
|
%
|
|
\begin{align*}
|
|
\boldsymbol{r} \leftarrow \boldsymbol{s}
|
|
- \omega \left( \boldsymbol{s} - \boldsymbol{y} \right)
|
|
.\end{align*}
|
|
%
|
|
|
|
One thing to consider during the actual decoding process, is that the gradient
|
|
of the code-constraint polynomial can take on extremely large values.
|
|
To avoid numerical instability, an additional step is added, where all
|
|
components of the current estimate are clipped to $\left[-\eta, \eta \right]$,
|
|
where $\eta$ is a positive constant slightly larger than one:%
|
|
%
|
|
\begin{align*}
|
|
\boldsymbol{s} \leftarrow \Pi_{\eta} \left( \boldsymbol{r}
|
|
- \gamma \nabla h\left( \boldsymbol{r} \right) \right)
|
|
,\end{align*}
|
|
%
|
|
$\Pi_{\eta}\left( \cdot \right) $ expressing the projection onto
|
|
$\left[ -\eta, \eta \right]^n$.
|
|
|
|
The iterative decoding process resulting from these considerations is shown in
|
|
figure \ref{fig:prox:alg}.
|
|
|
|
\begin{figure}[H]
|
|
\centering
|
|
|
|
\begin{genericAlgorithm}[caption={}, label={}]
|
|
$\boldsymbol{s} \leftarrow \boldsymbol{0}$
|
|
for $K$ iterations do
|
|
$\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
|
|
$\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
|
|
$\boldsymbol{\hat{x}} \leftarrow \text{sign}\left( \boldsymbol{s} \right) $
|
|
if $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ do
|
|
return $\boldsymbol{\hat{c}}$
|
|
end if
|
|
end for
|
|
return $\boldsymbol{\hat{c}}$
|
|
\end{genericAlgorithm}
|
|
|
|
|
|
\caption{Proximal decoding algorithm for an \ac{AWGN} channel}
|
|
\label{fig:prox:alg}
|
|
\end{figure}
|