ba-thesis/latex/thesis/chapters/comparison.tex

520 lines
22 KiB
TeX

\chapter{Comparison of Proximal Decoding and \acs{LP} Decoding using \acs{ADMM}}%
\label{chapter:comparison}
In this chapter, proximal decoding and \ac{LP} Decoding using \ac{ADMM} are compared.
First the two algorithms are compared on a theoretical basis.
Subsequently, their respective simulation results are examined, and their
differences are interpreted on the basis of their theoretical structure.
%some similarities between the proximal decoding algorithm
%and \ac{LP} decoding using \ac{ADMM} are be pointed out.
%The two algorithms are compared and their different computational and decoding
%performance is interpreted on the basis of their theoretical structure.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Theoretical Comparison}%
\label{sec:comp:theo}
\ac{ADMM} and the proximal gradient method can both be expressed in terms of
proximal operators \cite[Sec. 4.4]{proximal_algorithms}.
When using \ac{ADMM} as an optimization method to solve the \ac{LP} decoding
problem specifically, this is not quite possible because of the multiple
constraints.
In spite of that, the two algorithms still show some striking similarities.
To see the first of these similarities, the \ac{LP} decoding problem in
equation (\ref{eq:lp:relaxed_formulation}) can be slightly rewritten using the
\textit{indicator functions} $g_j : \mathbb{R}^{d_j} \rightarrow
\left\{ 0, +\infty \right\} \hspace{1mm}, j\in\mathcal{J}$ for the polytopes
$\mathcal{P}_{d_j}, \hspace{1mm} j\in\mathcal{J}$, defined as%
%
\begin{align*}
g_j\left( \boldsymbol{t} \right) := \begin{cases}
0, & \boldsymbol{t} \in \mathcal{P}_{d_j} \\
+\infty, & \boldsymbol{t} \not\in \mathcal{P}_{d_j}
\end{cases}
,\end{align*}
%
by moving the constraints into the objective function, as shown in figure
\ref{fig:ana:theo_comp_alg:admm}.
Both algorithms are composed of an iterative approach consisting of two
alternating steps.
The objective functions of both problems are similar in that they
both comprise two parts: one associated to the likelihood that a given
codeword was sent and one associated to the constraints the decoding process
is subjected to.
%
\begin{figure}[h]
\centering
\begin{subfigure}{0.42\textwidth}
\centering
\begin{align*}
\text{minimize}\hspace{2mm} & \underbrace{L\left( \boldsymbol{y} \mid
\tilde{\boldsymbol{x}} \right)}_{\text{Likelihood}}
+ \underbrace{\gamma h\left( \tilde{\boldsymbol{x}} \right)}
_{\text{Constraints}} \\
\text{subject to}\hspace{2mm} &\tilde{\boldsymbol{x}} \in \mathbb{R}^n
\end{align*}
\begin{genericAlgorithm}[caption={}, label={},
basicstyle=\fontsize{10}{18}\selectfont
]
Initialize $\boldsymbol{r}, \boldsymbol{s}, \omega, \gamma$
while stopping critierion unfulfilled do
$\boldsymbol{r} \leftarrow \boldsymbol{r}
+ \omega \nabla L\left( \boldsymbol{y} \mid \boldsymbol{s} \right) $
$\boldsymbol{s} \leftarrow
\textbf{prox}_{\scaleto{\gamma h}{7.5pt}}\left( \boldsymbol{r} \right) $|\Suppressnumber|
|\Reactivatenumber|
end while
return $\boldsymbol{s}$
\end{genericAlgorithm}
\caption{Proximal decoding}
\label{fig:ana:theo_comp_alg:prox}
\end{subfigure}\hfill%
\begin{subfigure}{0.55\textwidth}
\centering
\begin{align*}
\text{minimize}\hspace{5mm} &
\underbrace{\boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}}}
_{\text{Likelihood}}
+ \underbrace{\sum\nolimits_{j\in\mathcal{J}} g_j\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} \right) }
_{\text{Constraints}} \\
\text{subject to}\hspace{5mm} &
\tilde{\boldsymbol{c}} \in \left[ 0, 1 \right]^n
\end{align*}
\begin{genericAlgorithm}[caption={}, label={},
basicstyle=\fontsize{10}{18}\selectfont
]
Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}, \boldsymbol{u}, \boldsymbol{\gamma}, \rho$
while stopping criterion unfulfilled do
$\tilde{\boldsymbol{c}} \leftarrow \argmin_{\tilde{\boldsymbol{c}}}
\left( \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}}
+ \frac{\rho}{2}\sum_{j\in\mathcal{J}} \left\Vert
\boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j
+ \boldsymbol{u}_j \right\Vert \right)$
$\boldsymbol{z}_j \leftarrow \textbf{prox}_{g_j}
\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}}
+ \boldsymbol{u}_j \right), \hspace{5mm}\forall j\in\mathcal{J}$
$\boldsymbol{u}_j \leftarrow \boldsymbol{u}_j
+ \tilde{\boldsymbol{c}} - \boldsymbol{z}_j, \hspace{15.25mm}\forall j\in\mathcal{J}$
end while
return $\tilde{\boldsymbol{c}}$
\end{genericAlgorithm}
\caption{LP decoding using \ac{ADMM}}
\label{fig:ana:theo_comp_alg:admm}
\end{subfigure}%
\caption{Comparison of the proximal gradient method and \ac{ADMM}}
\label{fig:ana:theo_comp_alg}
\end{figure}%
%
Their major difference is that while with proximal decoding the constraints
are regarded in a global context, considering all parity checks at the same
time, with \ac{ADMM} each parity check is
considered separately and in a more local context (line 4 in both algorithms).
This difference means that while with proximal decoding the alternating
minimization of the two parts of the objective function inevitably leads to
oscillatory behavior (as explained in section
\ref{subsec:prox:conv_properties}), this is not the case with \ac{ADMM}, which
partly explains the disparate decoding performance of the two methods.
Furthermore, while with proximal decoding the step considering the constraints
is realized using gradient descent - amounting to an approximation -
with \ac{ADMM} it reduces to a number of projections onto the parity polytopes
$\mathcal{P}_{d_j}$ which always provide exact results.
The contrasting treatment of the constraints (global and approximate with
proximal decoding as opposed to local and exact with \ac{LP} decoding using
\ac{ADMM}) also leads to different prospects when the decoding process gets
stuck in a local minimum.
With proximal decoding this occurs due to the approximate nature of the
calculation, whereas with \ac{LP} decoding it occurs due to the approximate
formulation of the constraints - independent of the optimization method
itself.
The advantage which arises because of this when employing \ac{LP} decoding is
that it can be easily detected \todo{Not 'easily' detected}, when the algorithm gets stuck - it
returns a solution corresponding to a pseudocodeword, the components of which
are fractional.
Moreover, when a valid codeword is returned, it is also the \ac{ML} codeword.
This means that additional redundant parity-checks can be added successively
until the codeword returned is valid and thus the \ac{ML} solution is found
\cite[Sec. IV.]{alp}.
In terms of time complexity, the two decoding algorithms are comparable.
Each of the operations required for proximal decoding can be performed
in linear time for \ac{LDPC} codes (see section \ref{subsec:prox:comp_perf}).
The same is true for the $\tilde{\boldsymbol{c}}$- and $\boldsymbol{u}$-update
steps of \ac{LP} decoding using \ac{ADMM}, while
the projection step has a worst-case time complexity of
$\mathcal{O}\left( n^2 \right)$ and an average complexity of
$\mathcal{O}\left( n \right)$ (see section TODO, \cite[Sec. VIII.]{lautern}).
Both algorithms can be understood as message-passing algorithms, \ac{LP}
decoding using \ac{ADMM} as similarly to \cite[Sec. III. D.]{original_admm}
or \cite[Sec. II. B.]{efficient_lp_dec_admm} and proximal decoding by
starting with algorithm \ref{alg:prox},
substituting for the gradient of the code-constraint polynomial and separating
it into two parts.
The algorithms in their message-passing form are depicted in figure
\ref{fig:comp:message_passing}.
$M_{j\to i}$ denotes a message transmitted from \ac{CN} j to \ac{VN} i.
$M_{j\to}$ signifies the special case where a \ac{VN} transmits the same
message to all \acp{VN}.
%
\begin{figure}[h]
\centering
\begin{subfigure}{0.48\textwidth}
\centering
\begin{genericAlgorithm}[caption={}, label={},
% basicstyle=\fontsize{10}{16}\selectfont
]
Initialize $\boldsymbol{r}, \boldsymbol{s}, \omega, \gamma$
while stopping critierion unfulfilled do
for j in $\mathcal{J}$ do
$p_j \leftarrow \prod_{i\in N_c\left( j \right) } r_i $
$M_{j\to} \leftarrow p_j^2 - p_j$|\Suppressnumber|
|\vspace{0.22mm}\Reactivatenumber|
end for
for i in $\mathcal{I}$ do
$s_i \leftarrow s_i + \gamma \left[ 4\left( s_i^2 - 1 \right)s_i
\phantom{\frac{4}{s_i}}\right.$|\Suppressnumber|
|\Reactivatenumber|$\left.+ \frac{4}{s_i}\sum_{j\in N_v\left( i \right) }
M_{j\to} \right] $
$r_i \leftarrow r_i + \omega \left( s_i - y_i \right)$
end for
end while
return $\boldsymbol{s}$
\end{genericAlgorithm}
\caption{Proximal decoding}
\label{fig:comp:message_passing:proximal}
\end{subfigure}%
\hfill
\begin{subfigure}{0.48\textwidth}
\centering
\begin{genericAlgorithm}[caption={}, label={},
% basicstyle=\fontsize{10}{16}\selectfont
]
Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}, \boldsymbol{u}, \boldsymbol{\gamma}, \rho$
while stopping criterion unfulfilled do
for j in $\mathcal{J}$ do
$\boldsymbol{z}_j \leftarrow \Pi_{P_{d_j}}\left(
\boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{u}_j\right)$
$\boldsymbol{u}_j \leftarrow \boldsymbol{u}_j + \boldsymbol{T}_j\tilde{\boldsymbol{c}}
- \boldsymbol{z}_j$
$M_{j\to i} \leftarrow \left( z_j \right)_i - \left( u_j \right)_i,
\hspace{3mm} \forall i \in N_c\left( j \right) $
end for
for i in $\mathcal{I}$ do
$\tilde{c}_i \leftarrow \frac{1}{d_i}
\left(\sum_{j\in N_v\left( i \right) } M_{j\to i}
- \frac{\gamma_i}{\mu} \right)$|\Suppressnumber|
|\vspace{7mm}\Reactivatenumber|
end for
end while
return $\tilde{\boldsymbol{c}}$
\end{genericAlgorithm}
\caption{\ac{LP} decoding using \ac{ADMM}}
\label{fig:comp:message_passing:admm}
\end{subfigure}%
\caption{The proximal gradient method and \ac{LP} decoding using \ac{ADMM}
as message passing algorithms}
\label{fig:comp:message_passing}
\end{figure}%
%
It is evident that while the two algorithms are very similar in their general
structure, with \ac{LP} decoding using \ac{ADMM}, multiple messages have to be
computed for each check node (line 6 in figure
\ref{fig:comp:message_passing:admm}), whereas
with proximal decoding, the same message is transmitted to all \acp{VN}
(line 5 of figure \ref{fig:comp:message_passing:proximal}).
This means that while both algorithms have an average time complexity of
$\mathcal{O}\left( n \right)$, more arithmetic operations are required in the
\ac{ADMM} case.
In conclusion, the two algorithms have a very similar structure, where the
parts of the objective function relating to the likelihood and to the
constraints are minimized in an alternating fashion.
With proximal decoding this minimization is performed for all constraints at once
in an approximative manner, while with \ac{LP} decoding using \ac{ADMM} it is
performed for each constraint individually and with exact results.
In terms of time complexity, both algorithms are, on average, linear with
respect to $n$, although for \ac{LP} decoding using \ac{ADMM} significantly
more arithmetic operations are necessary in each iteration.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Comparison of Simulation Results}%
\label{sec:comp:res}
\begin{itemize}
\item The comparison of actual implementations is always debatable /
contentious, since it is difficult to separate differences in
algorithm performance from differences in implementation
\item No large difference in computational performance $\rightarrow$
Parallelism cannot come to fruition as decoding is performed on the
same number of cores for both algorithms (Multiple decodings in parallel)
\item Nonetheless, in realtime applications / applications where the focus
is not the mass decoding of raw data, \ac{ADMM} has advantages, since
the decoding of a single codeword is performed faster
\item \ac{ADMM} faster than proximal decoding $\rightarrow$
Parallelism
\item Proximal decoding faster than \ac{ADMM} $\rightarrow$ dafuq
(larger number of iterations before convergence? More values to compute for ADMM?)
\end{itemize}
\begin{figure}[H]
\centering
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_963965.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
%{res/hybrid/2d_ber_fer_dfr_963965.csv};
{res/admm/ber_2d_963965.csv};
\addplot[PineGreen, line width=1pt, mark=triangle]
table [col sep=comma, x=SNR, y=FER,]
{res/generic/fer_ml_9633965.csv};
\end{axis}
\end{tikzpicture}
\caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=96, k=48$
\cite[\text{96.3.965}]{mackay_enc}}
\end{subfigure}%
\hfill%
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_bch_31_26.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_bch_31_26.csv};
\addplot[PineGreen, line width=1pt, mark=triangle*]
table [x=SNR, y=FER, col sep=comma,
discard if gt={SNR}{5.5},
discard if lt={SNR}{1},
]
{res/generic/fer_ml_bch_31_26.csv};
\end{axis}
\end{tikzpicture}
\caption{BCH code with $n=31, k=26$}
\end{subfigure}%
\vspace{3mm}
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{5.5}]
{res/proximal/2d_ber_fer_dfr_20433484.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma,
discard if not={mu}{3.0},
discard if gt={SNR}{5.5}]
{res/admm/ber_2d_20433484.csv};
\addplot[PineGreen, line width=1pt, mark=triangle, solid]
table [col sep=comma, x=SNR, y=FER,
discard if gt={SNR}{5.5}]
{res/generic/fer_ml_20433484.csv};
\end{axis}
\end{tikzpicture}
\caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=204, k=102$
\cite[\text{204.33.484}]{mackay_enc}}
\end{subfigure}%
\hfill%
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_20455187.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_20455187.csv};
\end{axis}
\end{tikzpicture}
\caption{$\left( 5, 10 \right)$-regular \ac{LDPC} code with $n=204, k=102$
\cite[\text{204.55.187}]{mackay_enc}}
\end{subfigure}%
\vspace{3mm}
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_40833844.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_40833844.csv};
\end{axis}
\end{tikzpicture}
\caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=204, k=102$
\cite[\text{204.33.484}]{mackay_enc}}
\end{subfigure}%
\hfill%
\begin{subfigure}[t]{0.48\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log,
ymax=1.5, ymin=8e-5,
width=\textwidth,
height=0.75\textwidth,
]
\addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_pegreg252x504.csv};
\addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_pegreg252x504.csv};
\end{axis}
\end{tikzpicture}
\caption{LDPC code (progressive edge growth construction) with $n=504, k=252$
\cite[\text{PEGReg252x504}]{mackay_enc}}
\end{subfigure}%
\vspace{5mm}
\begin{subfigure}[t]{\textwidth}
\centering
\begin{tikzpicture}
\begin{axis}[hide axis,
xmin=10, xmax=50,
ymin=0, ymax=0.4,
legend columns=1,
legend style={draw=white!15!black}]
\addlegendimage{RedOrange, line width=1pt, mark=*, solid}
\addlegendentry{Proximal decoding}
\addlegendimage{NavyBlue, line width=1pt, mark=triangle, densely dashed}
\addlegendentry{\acs{LP} decoding using \acs{ADMM}}
\addlegendimage{PineGreen, line width=1pt, mark=triangle*, solid}
\addlegendentry{\acs{ML} decoding}
\end{axis}
\end{tikzpicture}
\end{subfigure}
\caption{Comparison of decoding performance between proximal decoding and \ac{LP} decoding
using \ac{ADMM}}
\label{fig:comp:prox_admm_dec}
\end{figure}
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[grid=both,
xlabel={$n$}, ylabel={Time per frame (s)},
width=0.6\textwidth,
height=0.45\textwidth,
legend style={at={(0.5,-0.42)},anchor=south},
legend cell align={left},]
\addplot[RedOrange, only marks, mark=*]
table [col sep=comma, x=n, y=spf]
{res/proximal/fps_vs_n.csv};
\addlegendentry{Proximal decoding}
\addplot[PineGreen, only marks, mark=triangle*]
table [col sep=comma, x=n, y=spf]
{res/admm/fps_vs_n.csv};
\addlegendentry{\acs{LP} decoding using \acs{ADMM}}
\end{axis}
\end{tikzpicture}
\caption{Timing requirements of the proximal decoding imlementation%
\protect\footnotemark{}}
\label{fig:comp:time}
\end{figure}%
%
\footnotetext{asdf}
%