Compare commits

..

1 Commits

Author SHA1 Message Date
96099b7fd2 Added compiled presentation 2023-04-20 11:09:13 +02:00
23 changed files with 702 additions and 2228 deletions

View File

@ -2,14 +2,14 @@
This repository contains resources related to the Bachelor-Thesis This repository contains resources related to the Bachelor-Thesis
`Application of Optimization Algorithms for Channel Decoding` `Application of Optimization Algorithms for Channel Decoding`
(the related sofware has it's own [repo](https://gitlab.kit.edu/uhhxt/ba_sw)) (the related sofware has it's own [repo](https://git.scc.kit.edu/ba_duo/ba_sw))
## Access compiled documents ## Access compiled documents
Each document has a corresponding tag, where an already compiled pdf document can be found Each document has a corresponding tag, where an already compiled pdf document can be found
(See for example (See for example
[this](https://gitlab.kit.edu/uhhxt/ba_thesis/-/tree/prox_07_12_22/latex/presentations/02_12_2022) [this](https://git.scc.kit.edu/ba_duo/ba_thesis/-/tree/prox_07_12_22/latex/presentations/02_12_2022)
tag, for one of the presentations under `latex/presentations`) tag, for one of the presentations under `latex/presentations`)

Binary file not shown.

View File

@ -45,6 +45,15 @@
% eprint = {https://doi.org/10.1080/24725854.2018.1550692} % eprint = {https://doi.org/10.1080/24725854.2018.1550692}
} }
@online{mackay_enc,
author = {MacKay, David J.C.},
title = {Encyclopedia of Sparse Graph Codes},
date = {2023-01},
url = {http://www.inference.org.uk/mackay/codes/data.html}
}
@article{proximal_algorithms, @article{proximal_algorithms,
author = {Parikh, Neal and Boyd, Stephen}, author = {Parikh, Neal and Boyd, Stephen},
title = {Proximal Algorithms}, title = {Proximal Algorithms},
@ -210,25 +219,11 @@
doi={10.1109/TIT.1962.1057683} doi={10.1109/TIT.1962.1057683}
} }
@online{lautern_channelcodes, @misc{lautern_channelcodes,
author = "Helmling, Michael and Scholl, Stefan and Gensheimer, Florian and Dietz, Tobias and Kraft, Kira and Ruzika, Stefan and Wehn, Norbert", author = "Helmling, Michael and Scholl, Stefan and Gensheimer, Florian and Dietz, Tobias and Kraft, Kira and Ruzika, Stefan and Wehn, Norbert",
title = "{D}atabase of {C}hannel {C}odes and {ML} {S}imulation {R}esults", title = "{D}atabase of {C}hannel {C}odes and {ML} {S}imulation {R}esults",
howpublished = "\url{www.uni-kl.de/channel-codes}",
url={https://www.uni-kl.de/channel-codes}, url={https://www.uni-kl.de/channel-codes},
date = {2023-04} year = "2023"
}
@online{mackay_enc,
author = {MacKay, David J.C.},
title = {Encyclopedia of Sparse Graph Codes},
date = {2023-04},
url = {http://www.inference.org.uk/mackay/codes/data.html}
}
@article{adam,
title={Adam: A method for stochastic optimization},
author={Kingma, Diederik P and Ba, Jimmy},
journal={arXiv preprint arXiv:1412.6980},
year={2014},
doi={10.48550/arXiv.1412.6980}
} }

View File

@ -1,18 +0,0 @@
\chapter*{Acknowledgements}
I would like to thank Prof. Dr.-Ing. Laurent Schmalen for granting me the
opportunity to write my bachelor's thesis at the Communications Engineering Lab,
as well as all other members of the institute for their help and many productive
discussions, and for creating a very pleasant environment to do research in.
I am very grateful to Dr.-Ing. Holger Jäkel
for kindly providing me with his knowledge and many suggestions,
and for his constructive criticism during the preparation of this work.
Special thanks also to Mai Anh Vu for her invaluable feedback and support
during the entire undertaking that is this thesis.
Finally, I would like to thank my family, who have enabled me to pursue my
studies in a field I thoroughly enjoy and who have supported me completely
throughout my journey.

View File

@ -508,7 +508,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,
@ -549,7 +549,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,
@ -593,7 +593,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,
@ -647,7 +647,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,
@ -692,7 +692,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,
@ -735,7 +735,7 @@ $\gamma \in \left\{ 0.01, 0.05, 0.15 \right\}$.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
legend columns=1, legend columns=1,
legend pos=outer north east, legend pos=outer north east,

View File

@ -2,9 +2,14 @@
\label{chapter:comparison} \label{chapter:comparison}
In this chapter, proximal decoding and \ac{LP} Decoding using \ac{ADMM} are compared. In this chapter, proximal decoding and \ac{LP} Decoding using \ac{ADMM} are compared.
First, the two algorithms are studied on a theoretical basis. First the two algorithms are compared on a theoretical basis.
Subsequently, their respective simulation results are examined, and their Subsequently, their respective simulation results are examined, and their
differences are interpreted based on their theoretical structure. differences are interpreted on the basis of their theoretical structure.
%some similarities between the proximal decoding algorithm
%and \ac{LP} decoding using \ac{ADMM} are be pointed out.
%The two algorithms are compared and their different computational and decoding
%performance is interpreted on the basis of their theoretical structure.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@ -13,11 +18,12 @@ differences are interpreted based on their theoretical structure.
\ac{ADMM} and the proximal gradient method can both be expressed in terms of \ac{ADMM} and the proximal gradient method can both be expressed in terms of
proximal operators \cite[Sec. 4.4]{proximal_algorithms}. proximal operators \cite[Sec. 4.4]{proximal_algorithms}.
Additionally, the two algorithms show some striking similarities with When using \ac{ADMM} as an optimization method to solve the \ac{LP} decoding
regard to their general structure and the way in which the minimization of the problem specifically, this is not quite possible because of the multiple
respective objective functions is accomplished. constraints.
In spite of that, the two algorithms still show some striking similarities.
The \ac{LP} decoding problem in To see the first of these similarities, the \ac{LP} decoding problem in
equation (\ref{eq:lp:relaxed_formulation}) can be slightly rewritten using the equation (\ref{eq:lp:relaxed_formulation}) can be slightly rewritten using the
\textit{indicator functions} $g_j : \mathbb{R}^{d_j} \rightarrow \textit{indicator functions} $g_j : \mathbb{R}^{d_j} \rightarrow
\left\{ 0, +\infty \right\} \hspace{1mm}, j\in\mathcal{J}$ for the polytopes \left\{ 0, +\infty \right\} \hspace{1mm}, j\in\mathcal{J}$ for the polytopes
@ -32,13 +38,12 @@ $\mathcal{P}_{d_j}, \hspace{1mm} j\in\mathcal{J}$, defined as%
% %
by moving the constraints into the objective function, as shown in figure by moving the constraints into the objective function, as shown in figure
\ref{fig:ana:theo_comp_alg:admm}. \ref{fig:ana:theo_comp_alg:admm}.
The objective functions of the two problems are similar in that they
both comprise two parts: one associated to the likelihood that a given
codeword was sent, arising from the channel model, and one associated
to the constraints the decoding process is subjected to, arising from the
code used.
Both algorithms are composed of an iterative approach consisting of two Both algorithms are composed of an iterative approach consisting of two
alternating steps, each minimizing one part of the objective function. alternating steps.
The objective functions of both problems are similar in that they
both comprise two parts: one associated to the likelihood that a given
codeword was sent and one associated to the constraints the decoding process
is subjected to.
% %
\begin{figure}[h] \begin{figure}[h]
@ -109,7 +114,7 @@ return $\tilde{\boldsymbol{c}}$
\end{subfigure}% \end{subfigure}%
\caption{Comparison of proximal decoding and \ac{LP} decoding using \ac{ADMM}} \caption{Comparison of the proximal gradient method and \ac{ADMM}}
\label{fig:ana:theo_comp_alg} \label{fig:ana:theo_comp_alg}
\end{figure}% \end{figure}%
% %
@ -118,11 +123,15 @@ Their major difference is that while with proximal decoding the constraints
are regarded in a global context, considering all parity checks at the same are regarded in a global context, considering all parity checks at the same
time, with \ac{ADMM} each parity check is time, with \ac{ADMM} each parity check is
considered separately and in a more local context (line 4 in both algorithms). considered separately and in a more local context (line 4 in both algorithms).
This difference means that while with proximal decoding the alternating
minimization of the two parts of the objective function inevitably leads to
oscillatory behavior (as explained in section
\ref{subsec:prox:conv_properties}), this is not the case with \ac{ADMM}, which
partly explains the disparate decoding performance of the two methods.
Furthermore, while with proximal decoding the step considering the constraints Furthermore, while with proximal decoding the step considering the constraints
is realized using gradient descent - amounting to an approximation - is realized using gradient descent - amounting to an approximation -
with \ac{ADMM} it reduces to a number of projections onto the parity polytopes with \ac{ADMM} it reduces to a number of projections onto the parity polytopes
$\mathcal{P}_{d_j}, \hspace{1mm} j\in\mathcal{J}$, which always provide exact $\mathcal{P}_{d_j}$ which always provide exact results.
results.
The contrasting treatment of the constraints (global and approximate with The contrasting treatment of the constraints (global and approximate with
proximal decoding as opposed to local and exact with \ac{LP} decoding using proximal decoding as opposed to local and exact with \ac{LP} decoding using
@ -133,27 +142,34 @@ calculation, whereas with \ac{LP} decoding it occurs due to the approximate
formulation of the constraints - independent of the optimization method formulation of the constraints - independent of the optimization method
itself. itself.
The advantage which arises because of this when employing \ac{LP} decoding is The advantage which arises because of this when employing \ac{LP} decoding is
the \ac{ML} certificate property: when a valid codeword is returned, it is that it can be easily detected \todo{Not 'easily' detected}, when the algorithm gets stuck - it
also the \ac{ML} codeword. returns a solution corresponding to a pseudocodeword, the components of which
are fractional.
Moreover, when a valid codeword is returned, it is also the \ac{ML} codeword.
This means that additional redundant parity-checks can be added successively This means that additional redundant parity-checks can be added successively
until the codeword returned is valid and thus the \ac{ML} solution is found until the codeword returned is valid and thus the \ac{ML} solution is found
\cite[Sec. IV.]{alp}. \cite[Sec. IV.]{alp}.
In terms of time complexity, the two decoding algorithms are comparable. In terms of time complexity, the two decoding algorithms are comparable.
Each of the operations required for proximal decoding can be performed Each of the operations required for proximal decoding can be performed
in $\mathcal{O}\left( n \right) $ time for \ac{LDPC} codes (see section in linear time for \ac{LDPC} codes (see section \ref{subsec:prox:comp_perf}).
\ref{subsec:prox:comp_perf}). The same is true for the $\tilde{\boldsymbol{c}}$- and $\boldsymbol{u}$-update
The same is true for \ac{LP} decoding using \ac{ADMM} (see section steps of \ac{LP} decoding using \ac{ADMM}, while
\ref{subsec:admm:comp_perf}). the projection step has a worst-case time complexity of
Additionally, both algorithms can be understood as message-passing algorithms, $\mathcal{O}\left( n^2 \right)$ and an average complexity of
\ac{LP} decoding using \ac{ADMM} as similarly to $\mathcal{O}\left( n \right)$ (see section TODO, \cite[Sec. VIII.]{lautern}).
\cite[Sec. III. D.]{original_admm} and
\cite[Sec. II. B.]{efficient_lp_dec_admm}, and proximal decoding by starting Both algorithms can be understood as message-passing algorithms, \ac{LP}
with algorithm \ref{alg:prox}, substituting for the gradient of the decoding using \ac{ADMM} as similarly to \cite[Sec. III. D.]{original_admm}
code-constraint polynomial and separating the $\boldsymbol{s}$ update into two parts. or \cite[Sec. II. B.]{efficient_lp_dec_admm} and proximal decoding by
starting with algorithm \ref{alg:prox},
substituting for the gradient of the code-constraint polynomial and separating
it into two parts.
The algorithms in their message-passing form are depicted in figure The algorithms in their message-passing form are depicted in figure
\ref{fig:comp:message_passing}. \ref{fig:comp:message_passing}.
$M_{j\to i}$ denotes a message transmitted from \ac{CN} j to \ac{VN} i. $M_{j\to i}$ denotes a message transmitted from \ac{CN} j to \ac{VN} i.
$M_{j\to}$ signifies the special case where a \ac{VN} transmits the same
message to all \acp{VN}.
% %
\begin{figure}[h] \begin{figure}[h]
\centering \centering
@ -168,14 +184,14 @@ Initialize $\boldsymbol{r}, \boldsymbol{s}, \omega, \gamma$
while stopping critierion unfulfilled do while stopping critierion unfulfilled do
for j in $\mathcal{J}$ do for j in $\mathcal{J}$ do
$p_j \leftarrow \prod_{i\in N_c\left( j \right) } r_i $ $p_j \leftarrow \prod_{i\in N_c\left( j \right) } r_i $
$M_{j\to i} \leftarrow p_j^2 - p_j$|\Suppressnumber| $M_{j\to} \leftarrow p_j^2 - p_j$|\Suppressnumber|
|\vspace{0.22mm}\Reactivatenumber| |\vspace{0.22mm}\Reactivatenumber|
end for end for
for i in $\mathcal{I}$ do for i in $\mathcal{I}$ do
$s_i\leftarrow \Pi_\eta \left( s_i + \gamma \left( 4\left( s_i^2 - 1 \right)s_i $s_i \leftarrow s_i + \gamma \left[ 4\left( s_i^2 - 1 \right)s_i
\phantom{\frac{4}{s_i}}\right.\right.$|\Suppressnumber| \phantom{\frac{4}{s_i}}\right.$|\Suppressnumber|
|\Reactivatenumber|$\left.\left.+ \frac{4}{s_i}\sum_{j\in |\Reactivatenumber|$\left.+ \frac{4}{s_i}\sum_{j\in N_v\left( i \right) }
N_v\left( i \right) } M_{j\to i} \right)\right) $ M_{j\to} \right] $
$r_i \leftarrow r_i + \omega \left( s_i - y_i \right)$ $r_i \leftarrow r_i + \omega \left( s_i - y_i \right)$
end for end for
end while end while
@ -216,14 +232,20 @@ return $\tilde{\boldsymbol{c}}$
\end{subfigure}% \end{subfigure}%
\caption{Proximal decoding and \ac{LP} decoding using \ac{ADMM} \caption{The proximal gradient method and \ac{LP} decoding using \ac{ADMM}
as message passing algorithms} as message passing algorithms}
\label{fig:comp:message_passing} \label{fig:comp:message_passing}
\end{figure}% \end{figure}%
% %
This message passing structure means that both algorithms can be implemented It is evident that while the two algorithms are very similar in their general
very efficiently, as the update steps can be performed in parallel for all structure, with \ac{LP} decoding using \ac{ADMM}, multiple messages have to be
\acp{CN} and for all \acp{VN}, respectively. computed for each check node (line 6 in figure
\ref{fig:comp:message_passing:admm}), whereas
with proximal decoding, the same message is transmitted to all \acp{VN}
(line 5 of figure \ref{fig:comp:message_passing:proximal}).
This means that while both algorithms have an average time complexity of
$\mathcal{O}\left( n \right)$, more arithmetic operations are required in the
\ac{ADMM} case.
In conclusion, the two algorithms have a very similar structure, where the In conclusion, the two algorithms have a very similar structure, where the
parts of the objective function relating to the likelihood and to the parts of the objective function relating to the likelihood and to the
@ -231,8 +253,9 @@ constraints are minimized in an alternating fashion.
With proximal decoding this minimization is performed for all constraints at once With proximal decoding this minimization is performed for all constraints at once
in an approximative manner, while with \ac{LP} decoding using \ac{ADMM} it is in an approximative manner, while with \ac{LP} decoding using \ac{ADMM} it is
performed for each constraint individually and with exact results. performed for each constraint individually and with exact results.
In terms of time complexity, both algorithms are linear with In terms of time complexity, both algorithms are, on average, linear with
respect to $n$ and are heavily parallelizable. respect to $n$, although for \ac{LP} decoding using \ac{ADMM} significantly
more arithmetic operations are necessary in each iteration.
@ -240,100 +263,24 @@ respect to $n$ and are heavily parallelizable.
\section{Comparison of Simulation Results}% \section{Comparison of Simulation Results}%
\label{sec:comp:res} \label{sec:comp:res}
The decoding performance of the two algorithms is compared in figure \begin{itemize}
\ref{fig:comp:prox_admm_dec} in form of the \ac{FER}. \item The comparison of actual implementations is always debatable /
Shown as well is the performance of the improved proximal decoding contentious, since it is difficult to separate differences in
algorithm presented in section \ref{sec:prox:Improved Implementation}. algorithm performance from differences in implementation
The \ac{FER} resulting from decoding using \ac{BP} and, \item No large difference in computational performance $\rightarrow$
wherever available, the \ac{FER} of \ac{ML} decoding, taken from Parallelism cannot come to fruition as decoding is performed on the
\cite{lautern_channelcodes}, are plotted as a reference. same number of cores for both algorithms (Multiple decodings in parallel)
The parameters chosen for the proximal and improved proximal decoders are \item Nonetheless, in realtime applications / applications where the focus
$\gamma=0.05$, $\omega=0.05$, $K=200$, $\eta = 1.5$ and $N=12$. is not the mass decoding of raw data, \ac{ADMM} has advantages, since
The parameters chosen for \ac{LP} decoding using \ac{ADMM} are $\mu = 5$, the decoding of a single codeword is performed faster
$\rho = 1$, $K=200$, $\epsilon_\text{pri} = 10^{-5}$ and \item \ac{ADMM} faster than proximal decoding $\rightarrow$
$\epsilon_\text{dual} = 10^{-5}$. Parallelism
For all codes considered within the scope of this work, \ac{LP} decoding using \item Proximal decoding faster than \ac{ADMM} $\rightarrow$ dafuq
\ac{ADMM} consistently outperforms both proximal decoding and the improved (larger number of iterations before convergence? More values to compute for ADMM?)
version, reaching very similar performance to \ac{BP}. \end{itemize}
The decoding gain heavily depends on the code, evidently becoming greater for
codes with larger $n$ and reaching values of up to $\SI{2}{dB}$.
These simulation results can be interpreted with regard to the theoretical
structure of the decoding methods, as analyzed in section \ref{sec:comp:theo}.
The worse performance of proximal decoding is somewhat surprising, considering
the global treatment of the constraints in contrast to the local treatment
in the case of \ac{LP} decoding using \ac{ADMM}.
It may be explained, however, in the context of the nature of the
calculations performed in each case.
With proximal decoding, the calculations are approximate, leading
to the constraints never being quite satisfied.
With \ac{LP} decoding using \ac{ADMM},
the constraints are fulfilled for each parity check individually after each
iteration of the decoding process.
A further contributing factor might be the structure of the optimization
process, as the alternating minimization with respect to the same variable
leads to oscillatory behavior, as explained in section
\ref{subsec:prox:conv_properties}.
It should be noted that while in this thesis proximal decoding was
examined with respect to its performance in \ac{AWGN} channels, in
\cite{proximal_paper} it is presented as a method applicable to non-trivial
channel models such as \ac{LDPC}-coded massive \ac{MIMO} channels, perhaps
broadening its usefulness beyond what is shown here.
The timing requirements of the decoding algorithms are visualized in figure \begin{figure}[H]
\ref{fig:comp:time}.
The datapoints have been generated by evaluating the metadata from \ac{FER}
and \ac{BER} simulations and using the parameters mentioned earlier when
discussing the decoding performance.
The codes considered are the same as in sections \ref{subsec:prox:comp_perf}
and \ref{subsec:admm:comp_perf}.
While the \ac{ADMM} implementation seems to be faster than the proximal
decoding and improved proximal decoding implementations, inferring some
general behavior is difficult in this case.
This is because of the comparison of actual implementations, making the
results dependent on factors such as the grade of optimization of each of the
implementations.
Nevertheless, the run time of both the proximal decoding and the \ac{LP}
decoding using \ac{ADMM} implementations is similar, and both are
reasonably performant, owing to the parallelizable structure of the
algorithms.
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[grid=both,
xlabel={$n$}, ylabel={Time per frame (ms)},
width=0.6\textwidth,
height=0.45\textwidth,
legend style={at={(0.5,-0.52)},anchor=south},
legend cell align={left},]
\addplot[RedOrange, only marks, mark=square*]
table [col sep=comma, x=n, y=spf,
y expr=\thisrow{spf} * 1000]
{res/proximal/fps_vs_n.csv};
\addlegendentry{Proximal decoding}
\addplot[Gray, only marks, mark=*]
table [col sep=comma, x=n, y=spf,
y expr=\thisrow{spf} * 1000]
{res/hybrid/fps_vs_n.csv};
\addlegendentry{Improved proximal decoding ($N=12$)}
\addplot[NavyBlue, only marks, mark=triangle*]
table [col sep=comma, x=n, y=spf,
y expr=\thisrow{spf} * 1000]
{res/admm/fps_vs_n.csv};
\addlegendentry{\acs{LP} decoding using \acs{ADMM}}
\end{axis}
\end{tikzpicture}
\caption{Comparison of the timing requirements of the different decoder implementations}
\label{fig:comp:time}
\end{figure}%
%
\begin{figure}[h]
\centering \centering
\begin{subfigure}[t]{0.48\textwidth} \begin{subfigure}[t]{0.48\textwidth}
@ -342,7 +289,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -352,19 +299,13 @@ algorithms.
\addplot[RedOrange, line width=1pt, mark=*, solid] \addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}] table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_963965.csv}; {res/proximal/2d_ber_fer_dfr_963965.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/hybrid/2d_ber_fer_dfr_963965.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}] table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
%{res/hybrid/2d_ber_fer_dfr_963965.csv}; %{res/hybrid/2d_ber_fer_dfr_963965.csv};
{res/admm/ber_2d_963965.csv}; {res/admm/ber_2d_963965.csv};
\addplot[Black, line width=1pt, mark=*] \addplot[PineGreen, line width=1pt, mark=triangle]
table [col sep=comma, x=SNR, y=FER,] table [col sep=comma, x=SNR, y=FER,]
{res/generic/fer_ml_9633965.csv}; {res/generic/fer_ml_9633965.csv};
\addplot [RoyalPurple, mark=*, line width=1pt]
table [x=SNR, y=FER, col sep=comma]
{res/generic/bp_963965.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -378,7 +319,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -388,20 +329,15 @@ algorithms.
\addplot[RedOrange, line width=1pt, mark=*, solid] \addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}] table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_bch_31_26.csv}; {res/proximal/2d_ber_fer_dfr_bch_31_26.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/hybrid/2d_ber_fer_dfr_bch_31_26.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}] table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_bch_31_26.csv}; {res/admm/ber_2d_bch_31_26.csv};
\addplot[Black, line width=1pt, mark=*] \addplot[PineGreen, line width=1pt, mark=triangle*]
table [x=SNR, y=FER, col sep=comma, table [x=SNR, y=FER, col sep=comma,
discard if gt={SNR}{5.5}, discard if gt={SNR}{5.5},
discard if lt={SNR}{1},] discard if lt={SNR}{1},
]
{res/generic/fer_ml_bch_31_26.csv}; {res/generic/fer_ml_bch_31_26.csv};
\addplot [RoyalPurple, mark=*, line width=1pt]
table [x=SNR, y=FER, col sep=comma]
{res/generic/bp_bch_31_26.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -416,7 +352,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -428,22 +364,15 @@ algorithms.
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{5.5}] discard if gt={SNR}{5.5}]
{res/proximal/2d_ber_fer_dfr_20433484.csv}; {res/proximal/2d_ber_fer_dfr_20433484.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05},
discard if gt={SNR}{5.5}]
{res/hybrid/2d_ber_fer_dfr_20433484.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, table [x=SNR, y=FER, col sep=comma,
discard if not={mu}{3.0}, discard if not={mu}{3.0},
discard if gt={SNR}{5.5}] discard if gt={SNR}{5.5}]
{res/admm/ber_2d_20433484.csv}; {res/admm/ber_2d_20433484.csv};
\addplot[Black, line width=1pt, mark=*] \addplot[PineGreen, line width=1pt, mark=triangle, solid]
table [col sep=comma, x=SNR, y=FER, table [col sep=comma, x=SNR, y=FER,
discard if gt={SNR}{5.5}] discard if gt={SNR}{5.5}]
{res/generic/fer_ml_20433484.csv}; {res/generic/fer_ml_20433484.csv};
\addplot [RoyalPurple, mark=*, line width=1pt]
table [x=SNR, y=FER, col sep=comma]
{res/generic/bp_20433484.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -457,7 +386,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -467,16 +396,9 @@ algorithms.
\addplot[RedOrange, line width=1pt, mark=*, solid] \addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}] table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_20455187.csv}; {res/proximal/2d_ber_fer_dfr_20455187.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/hybrid/2d_ber_fer_dfr_20455187.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}] table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_20455187.csv}; {res/admm/ber_2d_20455187.csv};
\addplot [RoyalPurple, mark=*, line width=1pt,
discard if gt={SNR}{5}]
table [x=SNR, y=FER, col sep=comma]
{res/generic/bp_20455187.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -492,7 +414,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -502,16 +424,9 @@ algorithms.
\addplot[RedOrange, line width=1pt, mark=*, solid] \addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}] table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_40833844.csv}; {res/proximal/2d_ber_fer_dfr_40833844.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/hybrid/2d_ber_fer_dfr_40833844.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}] table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_40833844.csv}; {res/admm/ber_2d_40833844.csv};
\addplot [RoyalPurple, mark=*, line width=1pt,
discard if gt={SNR}{3}]
table [x=SNR, y=FER, col sep=comma]
{res/generic/bp_40833844.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -525,7 +440,7 @@ algorithms.
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_b / N_0$ (dB)}, ylabel={FER}, xlabel={$E_b / N_0$}, ylabel={FER},
ymode=log, ymode=log,
ymax=1.5, ymin=8e-5, ymax=1.5, ymin=8e-5,
width=\textwidth, width=\textwidth,
@ -535,16 +450,9 @@ algorithms.
\addplot[RedOrange, line width=1pt, mark=*, solid] \addplot[RedOrange, line width=1pt, mark=*, solid]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}] table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/proximal/2d_ber_fer_dfr_pegreg252x504.csv}; {res/proximal/2d_ber_fer_dfr_pegreg252x504.csv};
\addplot[RedOrange, line width=1pt, mark=triangle, densely dashed] \addplot[NavyBlue, line width=1pt, mark=triangle, densely dashed]
table [x=SNR, y=FER, col sep=comma, discard if not={gamma}{0.05}]
{res/hybrid/2d_ber_fer_dfr_pegreg252x504.csv};
\addplot[Turquoise, line width=1pt, mark=*]
table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}] table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
{res/admm/ber_2d_pegreg252x504.csv}; {res/admm/ber_2d_pegreg252x504.csv};
\addplot [RoyalPurple, mark=*, line width=1pt]
table [x=SNR, y=FER, col sep=comma,
discard if gt={SNR}{3}]
{res/generic/bp_pegreg252x504.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@ -562,28 +470,50 @@ algorithms.
xmin=10, xmax=50, xmin=10, xmax=50,
ymin=0, ymax=0.4, ymin=0, ymax=0.4,
legend columns=1, legend columns=1,
legend cell align={left},
legend style={draw=white!15!black}] legend style={draw=white!15!black}]
\addlegendimage{RedOrange, line width=1pt, mark=*, solid} \addlegendimage{RedOrange, line width=1pt, mark=*, solid}
\addlegendentry{Proximal decoding} \addlegendentry{Proximal decoding}
\addlegendimage{RedOrange, line width=1pt, mark=triangle, densely dashed}
\addlegendentry{Improved proximal decoding}
\addlegendimage{Turquoise, line width=1pt, mark=*} \addlegendimage{NavyBlue, line width=1pt, mark=triangle, densely dashed}
\addlegendentry{\acs{LP} decoding using \acs{ADMM}} \addlegendentry{\acs{LP} decoding using \acs{ADMM}}
\addlegendimage{RoyalPurple, line width=1pt, mark=*, solid} \addlegendimage{PineGreen, line width=1pt, mark=triangle*, solid}
\addlegendentry{\acs{BP} (200 iterations)}
\addlegendimage{Black, line width=1pt, mark=*, solid}
\addlegendentry{\acs{ML} decoding} \addlegendentry{\acs{ML} decoding}
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
\end{subfigure} \end{subfigure}
\caption{Comparison of the decoding performance of the different decoder \caption{Comparison of decoding performance between proximal decoding and \ac{LP} decoding
implementations for various codes} using \ac{ADMM}}
\label{fig:comp:prox_admm_dec} \label{fig:comp:prox_admm_dec}
\end{figure} \end{figure}
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[grid=both,
xlabel={$n$}, ylabel={Time per frame (s)},
width=0.6\textwidth,
height=0.45\textwidth,
legend style={at={(0.5,-0.42)},anchor=south},
legend cell align={left},]
\addplot[RedOrange, only marks, mark=*]
table [col sep=comma, x=n, y=spf]
{res/proximal/fps_vs_n.csv};
\addlegendentry{Proximal decoding}
\addplot[PineGreen, only marks, mark=triangle*]
table [col sep=comma, x=n, y=spf]
{res/admm/fps_vs_n.csv};
\addlegendentry{\acs{LP} decoding using \acs{ADMM}}
\end{axis}
\end{tikzpicture}
\caption{Timing requirements of the proximal decoding imlementation%
\protect\footnotemark{}}
\label{fig:comp:time}
\end{figure}%
%
\footnotetext{asdf}
%

View File

@ -1,61 +1,8 @@
\chapter{Conclusion and Outlook}% \chapter{Conclusion}%
\label{chapter:conclusion} \label{chapter:conclusion}
In the context of this thesis, two decoding algorithms were considered: \begin{itemize}
proximal decoding and \ac{LP} decoding using \ac{ADMM}. \item Summary of results
The two algorithms were first analyzed individually, before comparing them \item Future work
based on simulation results as well as on their theoretical structure. \end{itemize}
For proximal decoding, the effect of each parameter on the behavior of the
decoder was examined, leading to an approach to optimally choose the value
of each parameter.
The convergence properties of the algorithm were investigated in the context
of the relatively high decoding failure rate, to derive an approach to correct
possibly wrong components of the estimate.
Based on this approach, an improvement of proximal decoding was suggested,
leading to a decoding gain of up to $\SI{1}{dB}$, depending on the code and
the parameters considered.
For \ac{LP} decoding using \ac{ADMM}, the circumstances brought about by the
\ac{LP} relaxation were first explored.
The decomposable nature arising from the relocation of the constraints into
the objective function itself was recognized as the major driver in enabling
an efficient implementation of the decoding algorithm.
Based on simulation results, general guidelines for choosing each parameter
were derived.
The decoding performance, in form of the \ac{FER}, of the algorithm was
analyzed, observing that \ac{LP} decoding using \ac{ADMM} nearly reaches that
of \ac{BP}, staying within approximately $\SI{0.5}{dB}$ depending on the code
in question.
Finally, strong parallels were discovered with regard to the theoretical
structure of the two algorithms, both in the constitution of their respective
objective functions as well as in the iterative approaches used to minimize them.
One difference noted was the approximate nature of the minimization in the
case of proximal decoding, leading to the constraints never being truly
satisfied.
In conjunction with the alternating minimization with respect to the same
variable, leading to oscillatory behavior, this was identified as
a possible cause of its comparatively worse decoding performance.
Furthermore, both algorithms were expressed as message passing algorithms,
illustrating their similar computational performance.
While the modified proximal decoding algorithm presented in section
\ref{sec:prox:Improved Implementation} shows some promising results, further
investigation is required to determine how different choices of parameters
affect the decoding performance.
Additionally, a more mathematically rigorous foundation for determining the
potentially wrong components of the estimate is desirable.
A different method to improve proximal decoding might be to use
moment-based optimization techniques such as \textit{Adam} \cite{adam}
to try to mitigate the effect of local minima introduced in the objective
function as well as the adversarial structure of the minimization when employing
proximal decoding.
Another area benefiting from future work is the expansion of the \ac{ADMM}
based \ac{LP} decoder into a decoder approximating \ac{ML} performance,
using \textit{adaptive \ac{LP} decoding}.
With this method, the successive addition of redundant parity checks is used
to mitigate the decoder becoming stuck in erroneous solutions introduced due
the relaxation of the constraints of the \ac{LP} decoding problem \cite{alp}.

View File

@ -33,6 +33,13 @@ examined with respect to its performance in \ac{AWGN} channels, in
channel models such as \ac{LDPC}-coded massive \ac{MIMO} channels, perhaps channel models such as \ac{LDPC}-coded massive \ac{MIMO} channels, perhaps
broadening its usefulness beyond what is shown here. broadening its usefulness beyond what is shown here.
While the modified proximal decoding algorithm presented in section
\ref{sec:prox:Improved Implementation} shows some promising results, further
investigation is required to determine how different choices of parameters
affect the decoding performance.
Additionally, a more mathematically rigorous foundation for determining the
potentially wrong components of the estimate is desirable.
Another interesting approach might be the combination of proximal and \ac{LP} Another interesting approach might be the combination of proximal and \ac{LP}
decoding. decoding.
Performing an initial number of iterations using proximal decoding to obtain Performing an initial number of iterations using proximal decoding to obtain

View File

@ -1,51 +1,16 @@
\chapter{Introduction}% \chapter{Introduction}%
\label{chapter:introduction} \label{chapter:introduction}
Channel coding using binary linear codes is a way of enhancing the reliability
of data by detecting and correcting any errors that may occur during
its transmission or storage.
One class of binary linear codes, \ac{LDPC} codes, has become especially
popular due to being able to reach arbitrarily small probabilities of error
at code rates up to the capacity of the channel \cite[Sec. II.B.]{mackay_rediscovery},
while retaining a structure that allows for very efficient decoding.
While the established decoders for \ac{LDPC} codes, such as \ac{BP} and the
\textit{min-sum algorithm}, offer good decoding performance, they are suboptimal
in most cases and exhibit an \textit{error floor} for high \acp{SNR}
\cite[Sec. 15.3]{ryan_lin_2009}, making them unsuitable for applications
with extreme reliability requirements.
Optimization based decoding algorithms are an entirely different way of approaching \begin{itemize}
the decoding problem. \item Problem definition
The first introduction of optimization techniques as a way of decoding binary \item Motivation
linear codes was conducted in Feldman's 2003 Ph.D. thesis and a subsequent paper, \begin{itemize}
establishing the field of \ac{LP} decoding \cite{feldman_thesis}, \cite{feldman_paper}. \item Error floor when decoding with BP (seems to not be persent with LP decoding
There, the \ac{ML} decoding problem is approximated by a \textit{linear program}, i.e., \cite[Sec. I]{original_admm})
a linear, convex optimization problem, which can subsequently be solved using \item Strong theoretical guarantees that allow for better and better approximations
several different algorithms \cite{alp}, \cite{interior_point}, of ML decoding \cite[Sec. I]{original_admm}
\cite{original_admm}, \cite{pdd}. \end{itemize}
More recently, novel approaches such as \textit{proximal decoding} have been \item Results summary
introduced. Proximal decoding is based on a non-convex optimization formulation \end{itemize}
of the \ac{MAP} decoding problem \cite{proximal_paper}.
The motivation behind applying optimization methods to channel decoding is to
utilize existing techniques in the broad field of optimization theory, as well
as to find new decoding methods not suffering from the same disadvantages as
existing message passing based approaches or exhibiting other desirable properties.
\Ac{LP} decoding, for example, comes with strong theoretical guarantees
allowing it to be used as a way of closely approximating \ac{ML} decoding
\cite[Sec. I]{original_admm},
and proximal decoding is applicable to non-trivial channel models such
as \ac{LDPC}-coded massive \ac{MIMO} channels \cite{proximal_paper}.
This thesis aims to further the analysis of optimization based decoding
algorithms as well as to verify and complement the considerations present in
the existing literature.
Specifically, the proximal decoding algorithm and \ac{LP} decoding using
the \ac{ADMM} \cite{original_admm} are explored within the context of
\ac{BPSK} modulated \ac{AWGN} channels.
Implementations of both decoding methods are produced, and based on simulation
results from those implementations the algorithms are examined and compared.
Approaches to determine the optimal value of each parameter are derived and
the computational and decoding performance of the algorithms is examined.
An improvement on proximal decoding is suggested, achieving up to 1 dB of gain,
depending on the parameters chosen and the code considered.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,13 @@
\chapter{Theoretical Background}% \chapter{Theoretical Background}%
\label{chapter:theoretical_background} \label{chapter:theoretical_background}
In this chapter, the theoretical background necessary to understand the In this chapter, the theoretical background necessary to understand this
decoding algorithms examined in this work is given. work is given.
First, the notation used is clarified. First, the notation used is clarified.
The physical layer is detailed - the used modulation scheme and channel model. The physical aspects are detailed - the used modulation scheme and channel model.
A short introduction to channel coding with binary linear codes and especially A short introduction to channel coding with binary linear codes and especially
\ac{LDPC} codes is given. \ac{LDPC} codes is given.
The established methods of decoding \ac{LDPC} codes are briefly explained. The established methods of decoding LPDC codes are briefly explained.
Lastly, the general process of decoding using optimization techniques is described Lastly, the general process of decoding using optimization techniques is described
and an overview of the utilized optimization methods is given. and an overview of the utilized optimization methods is given.
@ -31,7 +31,7 @@ Additionally, a shorthand notation will be used, denoting a set of indices as%
\hspace{5mm} m < n, \hspace{2mm} m,n\in\mathbb{Z} \hspace{5mm} m < n, \hspace{2mm} m,n\in\mathbb{Z}
.\end{align*} .\end{align*}
% %
In order to designate element-wise operations, in particular the \textit{Hadamard product} In order to designate elemen-twise operations, in particular the \textit{Hadamard product}
and the \textit{Hadamard power}, the operator $\circ$ will be used:% and the \textit{Hadamard power}, the operator $\circ$ will be used:%
% %
\begin{alignat*}{3} \begin{alignat*}{3}
@ -45,7 +45,7 @@ and the \textit{Hadamard power}, the operator $\circ$ will be used:%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Channel Model and Modulation} \section{Preliminaries: Channel Model and Modulation}
\label{sec:theo:Preliminaries: Channel Model and Modulation} \label{sec:theo:Preliminaries: Channel Model and Modulation}
In order to transmit a bit-word $\boldsymbol{c} \in \mathbb{F}_2^n$ of length In order to transmit a bit-word $\boldsymbol{c} \in \mathbb{F}_2^n$ of length
@ -82,7 +82,7 @@ conducting this process, whereby \textit{data words} are mapped onto longer
\textit{codewords}, which carry redundant information. \textit{codewords}, which carry redundant information.
\Ac{LDPC} codes have become especially popular, since they are able to \Ac{LDPC} codes have become especially popular, since they are able to
reach arbitrarily small probabilities of error at code rates up to the capacity reach arbitrarily small probabilities of error at code rates up to the capacity
of the channel \cite[Sec. II.B.]{mackay_rediscovery}, while having a structure of the channel \cite[Sec. II.B.]{mackay_rediscovery} while having a structure
that allows for very efficient decoding. that allows for very efficient decoding.
The lengths of the data words and codewords are denoted by $k\in\mathbb{N}$ The lengths of the data words and codewords are denoted by $k\in\mathbb{N}$
@ -97,7 +97,7 @@ the number of parity-checks:%
\boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\}
.\end{align*} .\end{align*}
% %
A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codeword A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword
$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix} $\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix}
$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:% $\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:%
% %
@ -179,9 +179,9 @@ codewords:
&= \argmax_{c\in\mathcal{C}} \frac{f_{\boldsymbol{Y} \mid \boldsymbol{C}} &= \argmax_{c\in\mathcal{C}} \frac{f_{\boldsymbol{Y} \mid \boldsymbol{C}}
\left( \boldsymbol{y} \mid \boldsymbol{c} \right) p_{\boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) p_{\boldsymbol{C}}
\left( \boldsymbol{c} \right)}{f_{\boldsymbol{Y}}\left( \boldsymbol{y} \right) } \\ \left( \boldsymbol{c} \right)}{f_{\boldsymbol{Y}}\left( \boldsymbol{y} \right) } \\
% &= \argmax_{c\in\mathcal{C}} f_{\boldsymbol{Y} \mid \boldsymbol{C}} &= \argmax_{c\in\mathcal{C}} f_{\boldsymbol{Y} \mid \boldsymbol{C}}
% \left( \boldsymbol{y} \mid \boldsymbol{c} \right) p_{\boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) p_{\boldsymbol{C}}
% \left( \boldsymbol{c} \right) \\ \left( \boldsymbol{c} \right) \\
&= \argmax_{c\in\mathcal{C}}f_{\boldsymbol{Y} \mid \boldsymbol{C}} &= \argmax_{c\in\mathcal{C}}f_{\boldsymbol{Y} \mid \boldsymbol{C}}
\left( \boldsymbol{y} \mid \boldsymbol{c} \right) \left( \boldsymbol{y} \mid \boldsymbol{c} \right)
.\end{align*} .\end{align*}
@ -204,7 +204,7 @@ Each row of $\boldsymbol{H}$, which represents one parity-check, is viewed as a
Each component of the codeword $\boldsymbol{c}$ is interpreted as a \ac{VN}. Each component of the codeword $\boldsymbol{c}$ is interpreted as a \ac{VN}.
The relationship between \acp{CN} and \acp{VN} can then be plotted by noting The relationship between \acp{CN} and \acp{VN} can then be plotted by noting
which components of $\boldsymbol{c}$ are considered for which parity-check. which components of $\boldsymbol{c}$ are considered for which parity-check.
Figure \ref{fig:theo:tanner_graph} shows the Tanner graph for the Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the
(7,4) Hamming code, which has the following parity-check matrix (7,4) Hamming code, which has the following parity-check matrix
\cite[Example 5.7.]{ryan_lin_2009}:% \cite[Example 5.7.]{ryan_lin_2009}:%
% %
@ -263,7 +263,7 @@ Figure \ref{fig:theo:tanner_graph} shows the Tanner graph for the
\draw (cn3) -- (c7); \draw (cn3) -- (c7);
\end{tikzpicture} \end{tikzpicture}
\caption{Tanner graph for the (7,4) Hamming code} \caption{Tanner graph for the (7,4)-Hamming-code}
\label{fig:theo:tanner_graph} \label{fig:theo:tanner_graph}
\end{figure}% \end{figure}%
% %
@ -285,16 +285,15 @@ Message passing algorithms are based on the notion of passing messages between
\acp{CN} and \acp{VN}. \acp{CN} and \acp{VN}.
\Ac{BP} is one such algorithm that is commonly used to decode \ac{LDPC} codes. \Ac{BP} is one such algorithm that is commonly used to decode \ac{LDPC} codes.
It aims to compute the posterior probabilities It aims to compute the posterior probabilities
$p_{C_i \mid \boldsymbol{Y}}\left(c_i = 1 | \boldsymbol{y} \right),\hspace{2mm} i\in\mathcal{I}$, $p_{C_i \mid \boldsymbol{Y}}\left(c_i = 1 | \boldsymbol{y} \right),\hspace{2mm} i\in\mathcal{I}$
see \cite[Sec. III.]{mackay_rediscovery} and use them to calculate the estimate \cite[Sec. III.]{mackay_rediscovery} and use them to calculate the estimate $\hat{\boldsymbol{c}}$.
$\hat{\boldsymbol{c}}$.
For cycle-free graphs this goal is reached after a finite For cycle-free graphs this goal is reached after a finite
number of steps and \ac{BP} is equivalent to \ac{MAP} decoding. number of steps and \ac{BP} is equivalent to \ac{MAP} decoding.
When the graph contains cycles, however, \ac{BP} only approximates the \ac{MAP} probabilities When the graph contains cycles, however, \ac{BP} only approximates the probabilities
and is sub-optimal. and is sub-optimal.
This leads to generally worse performance than \ac{MAP} decoding for practical codes. This leads to generally worse performance than \ac{MAP} decoding for practical codes.
Additionally, an \textit{error floor} appears for very high \acp{SNR}, making Additionally, an \textit{error floor} appears for very high \acp{SNR}, making
the use of \ac{BP} impractical for applications where a very low error rate is the use of \ac{BP} impractical for applications where a very low \ac{BER} is
desired \cite[Sec. 15.3]{ryan_lin_2009}. desired \cite[Sec. 15.3]{ryan_lin_2009}.
Another popular decoding method for \ac{LDPC} codes is the Another popular decoding method for \ac{LDPC} codes is the
\textit{min-sum algorithm}. \textit{min-sum algorithm}.
@ -342,7 +341,7 @@ In contrast to the established message-passing decoding algorithms,
the perspective then changes from observing the decoding process in its the perspective then changes from observing the decoding process in its
Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner}) Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner})
to a spatial representation (figure \ref{fig:dec:spatial}), to a spatial representation (figure \ref{fig:dec:spatial}),
where the codewords are some of the vertices of a hypercube. where the codewords are some of the edges of a hypercube.
The goal is to find the point $\tilde{\boldsymbol{c}}$, The goal is to find the point $\tilde{\boldsymbol{c}}$,
which minimizes the objective function $g$. which minimizes the objective function $g$.
@ -458,38 +457,29 @@ which minimizes the objective function $g$.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{A Short Introduction to the Proximal Gradient Method and ADMM} \section{An introduction to the proximal gradient method and ADMM}
\label{sec:theo:Optimization Methods} \label{sec:theo:Optimization Methods}
In this section, the general ideas behind the optimization methods used in
this work are outlined.
The application of these optimization methods to channel decoding decoding
will be discussed in later chapters.
Two methods are introduced, the \textit{proximal gradient method} and
\ac{ADMM}.
\textit{Proximal algorithms} are algorithms for solving convex optimization \textit{Proximal algorithms} are algorithms for solving convex optimization
problems that rely on the use of \textit{proximal operators}. problems, that rely on the use of \textit{proximal operators}.
The proximal operator $\textbf{prox}_{\lambda f} : \mathbb{R}^n \rightarrow \mathbb{R}^n$ The proximal operator $\textbf{prox}_{\lambda f} : \mathbb{R}^n \rightarrow \mathbb{R}^n$
of a function $f:\mathbb{R}^n \rightarrow \mathbb{R}$ is defined by of a function $f:\mathbb{R}^n \rightarrow \mathbb{R}$ is defined by
\cite[Sec. 1.1]{proximal_algorithms}% \cite[Sec. 1.1]{proximal_algorithms}%
% %
\begin{align*} \begin{align*}
\textbf{prox}_{\lambda f}\left( \boldsymbol{v} \right) \textbf{prox}_{\lambda f}\left( \boldsymbol{v} \right) = \argmin_{\boldsymbol{x}} \left(
= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left( f\left( \boldsymbol{x} \right) + \frac{1}{2\lambda}\lVert \boldsymbol{x}
f\left( \boldsymbol{x} \right) + \frac{1}{2\lambda}\lVert \boldsymbol{x} - \boldsymbol{v} \rVert_2^2 \right)
- \boldsymbol{v} \rVert_2^2 \right)
.\end{align*} .\end{align*}
% %
This operator computes a point that is a compromise between minimizing $f$ This operator computes a point that is a compromise between minimizing $f$
and staying in the proximity of $\boldsymbol{v}$. and staying in the proximity of $\boldsymbol{v}$.
The parameter $\lambda$ determines how each term is weighed. The parameter $\lambda$ determines how heavily each term is weighed.
The proximal gradient method is an iterative optimization method The \textit{proximal gradient method} is an iterative optimization method
utilizing proximal operators, used to solve problems of the form% utilizing proximal operators, used to solve problems of the form%
% %
\begin{align*} \begin{align*}
\underset{\boldsymbol{x} \in \mathbb{R}^n}{\text{minimize}}\hspace{5mm} \text{minimize}\hspace{5mm}f\left( \boldsymbol{x} \right) + g\left( \boldsymbol{x} \right)
f\left( \boldsymbol{x} \right) + g\left( \boldsymbol{x} \right)
\end{align*} \end{align*}
% %
that consists of two steps: minimizing $f$ with gradient descent that consists of two steps: minimizing $f$ with gradient descent
@ -502,14 +492,14 @@ and minimizing $g$ using the proximal operator
,\end{align*} ,\end{align*}
% %
Since $g$ is minimized with the proximal operator and is thus not required Since $g$ is minimized with the proximal operator and is thus not required
to be differentiable, it can be used to encode the constraints of the optimization problem to be differentiable, it can be used to encode the constraints of the problem
(e.g., in the form of an \textit{indicator function}, as mentioned in (e.g., in the form of an \textit{indicator function}, as mentioned in
\cite[Sec. 1.2]{proximal_algorithms}). \cite[Sec. 1.2]{proximal_algorithms}).
\ac{ADMM} is another optimization method. The \ac{ADMM} is another optimization method.
In this thesis it will be used to solve a \textit{linear program}, which In this thesis it will be used to solve a \textit{linear program}, which
is a special type of convex optimization problem in which the objective function is a special type of convex optimization problem, where the objective function
is linear and the constraints consist of linear equalities and inequalities. is linear, and the constraints consist of linear equalities and inequalities.
Generally, any linear program can be expressed in \textit{standard form}% Generally, any linear program can be expressed in \textit{standard form}%
\footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be \footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be
interpreted componentwise.} interpreted componentwise.}
@ -517,53 +507,38 @@ interpreted componentwise.}
% %
\begin{alignat}{3} \begin{alignat}{3}
\begin{alignedat}{3} \begin{alignedat}{3}
\underset{\boldsymbol{x}\in\mathbb{R}^n}{\text{minimize }}\hspace{2mm} \text{minimize }\hspace{2mm} && \boldsymbol{\gamma}^\text{T} \boldsymbol{x} \\
&& \boldsymbol{\gamma}^\text{T} \boldsymbol{x} \\
\text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x} & = \boldsymbol{b} \\ \text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x} & = \boldsymbol{b} \\
&& \boldsymbol{x} & \ge \boldsymbol{0}, && \boldsymbol{x} & \ge \boldsymbol{0}.
\end{alignedat} \end{alignedat}
\label{eq:theo:admm_standard} \label{eq:theo:admm_standard}
\end{alignat}% \end{alignat}%
% %
where $\boldsymbol{x}, \boldsymbol{\gamma} \in \mathbb{R}^n$, $\boldsymbol{b} \in \mathbb{R}^m$ A technique called \textit{Lagrangian relaxation} \cite[Sec. 11.4]{intro_to_lin_opt_book}
and $\boldsymbol{A}\in\mathbb{R}^{m \times n}$. can then be applied.
A technique called \textit{Lagrangian relaxation} can then be applied
\cite[Sec. 11.4]{intro_to_lin_opt_book}.
First, some of the constraints are moved into the objective function itself First, some of the constraints are moved into the objective function itself
and weights $\boldsymbol{\lambda}$ are introduced. A new, relaxed problem and weights $\boldsymbol{\lambda}$ are introduced. A new, relaxed problem
is formulated as is then formulated as
% %
\begin{align} \begin{align}
\begin{aligned} \begin{aligned}
\underset{\boldsymbol{x}\in\mathbb{R}^n}{\text{minimize }}\hspace{2mm} \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
& \boldsymbol{\gamma}^\text{T}\boldsymbol{x} + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
+ \boldsymbol{\lambda}^\text{T}\left( - \boldsymbol{A}\boldsymbol{x} \right) \\
\boldsymbol{A}\boldsymbol{x} - \boldsymbol{b}\right) \\
\text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0}, \text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0},
\end{aligned} \end{aligned}
\label{eq:theo:admm_relaxed} \label{eq:theo:admm_relaxed}
\end{align}% \end{align}%
% %
the new objective function being the \textit{Lagrangian}% the new objective function being the \textit{Lagrangian}%
\footnote{
Depending on what literature is consulted, the definition of the Lagrangian differs
in the order of $\boldsymbol{A}\boldsymbol{x}$ and $\boldsymbol{b}$.
As will subsequently be seen, however, the only property of the Lagrangian having
any bearing on the optimization process is that minimizing it gives a lower bound
on the optimal objective of the original problem.
This property is satisfied no matter the order of the terms and the order
chosen here is the one used in the \ac{LP} decoding literature making use of
\ac{ADMM}.
}%
% %
\begin{align*} \begin{align*}
\mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right)
= \boldsymbol{\gamma}^\text{T}\boldsymbol{x} = \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
+ \boldsymbol{\lambda}^\text{T}\left( + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
\boldsymbol{A}\boldsymbol{x} - \boldsymbol{b}\right) - \boldsymbol{A}\boldsymbol{x} \right)
.\end{align*}% .\end{align*}%
% %
This problem is not directly equivalent to the original one, as the This problem is not directly equivalent to the original one, as the
solution now depends on the choice of the \textit{Lagrange multipliers} solution now depends on the choice of the \textit{Lagrange multipliers}
$\boldsymbol{\lambda}$. $\boldsymbol{\lambda}$.
@ -587,12 +562,12 @@ Furthermore, for uniquely solvable linear programs \textit{strong duality}
always holds \cite[Theorem 4.4]{intro_to_lin_opt_book}. always holds \cite[Theorem 4.4]{intro_to_lin_opt_book}.
This means that not only is it a lower bound, the tightest lower This means that not only is it a lower bound, the tightest lower
bound actually reaches the value itself: bound actually reaches the value itself:
in other words, with the optimal choice of $\boldsymbol{\lambda}$, In other words, with the optimal choice of $\boldsymbol{\lambda}$,
the optimal objectives of the problems (\ref{eq:theo:admm_relaxed}) the optimal objectives of the problems (\ref{eq:theo:admm_relaxed})
and (\ref{eq:theo:admm_standard}) have the same value, i.e., and (\ref{eq:theo:admm_standard}) have the same value.
% %
\begin{align*} \begin{align*}
\max_{\boldsymbol{\lambda}\in\mathbb{R}^m} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}} \max_{\boldsymbol{\lambda}} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}}
\mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right)
= \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x} = \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x}
= \boldsymbol{b}}} = \boldsymbol{b}}}
@ -602,7 +577,7 @@ and (\ref{eq:theo:admm_standard}) have the same value, i.e.,
Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:% Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:%
% %
\begin{align} \begin{align}
\underset{\boldsymbol{\lambda}\in\mathbb{R}^m}{\text{maximize }}\hspace{2mm} \underset{\boldsymbol{\lambda}}{\text{maximize }}\hspace{2mm}
& \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L} & \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L}
\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \left( \boldsymbol{x}, \boldsymbol{\lambda} \right)
\label{eq:theo:dual} \label{eq:theo:dual}
@ -625,7 +600,7 @@ using equation (\ref{eq:theo:admm_obtain_primal}); then, update $\boldsymbol{\la
using gradient descent \cite[Sec. 2.1]{distr_opt_book}:% using gradient descent \cite[Sec. 2.1]{distr_opt_book}:%
% %
\begin{align*} \begin{align*}
\boldsymbol{x} &\leftarrow \argmin_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L}\left( \boldsymbol{x} &\leftarrow \argmin_{\boldsymbol{x}} \mathcal{L}\left(
\boldsymbol{x}, \boldsymbol{\lambda} \right) \\ \boldsymbol{x}, \boldsymbol{\lambda} \right) \\
\boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda}
+ \alpha\left( \boldsymbol{A}\boldsymbol{x} - \boldsymbol{b} \right), + \alpha\left( \boldsymbol{A}\boldsymbol{x} - \boldsymbol{b} \right),
@ -633,12 +608,12 @@ using gradient descent \cite[Sec. 2.1]{distr_opt_book}:%
.\end{align*} .\end{align*}
% %
The algorithm can be improved by observing that when the objective function The algorithm can be improved by observing that when the objective function
$g: \mathbb{R}^n \rightarrow \mathbb{R}$ is separable into a sum of $g: \mathbb{R}^n \rightarrow \mathbb{R}$ is separable into a number
$N \in \mathbb{N}$ sub-functions $N \in \mathbb{N}$ of sub-functions
$g_i: \mathbb{R}^{n_i} \rightarrow \mathbb{R}$, $g_i: \mathbb{R}^{n_i} \rightarrow \mathbb{R}$,
i.e., $g\left( \boldsymbol{x} \right) = \sum_{i=1}^{N} g_i i.e., $g\left( \boldsymbol{x} \right) = \sum_{i=1}^{N} g_i
\left( \boldsymbol{x}_i \right)$, \left( \boldsymbol{x}_i \right)$,
where $\boldsymbol{x}_i\in\mathbb{R}^{n_i},\hspace{1mm} i\in [1:N]$ are subvectors of where $\boldsymbol{x}_i,\hspace{1mm} i\in [1:N]$ are subvectors of
$\boldsymbol{x}$, the Lagrangian is as well: $\boldsymbol{x}$, the Lagrangian is as well:
% %
\begin{align*} \begin{align*}
@ -649,18 +624,18 @@ $\boldsymbol{x}$, the Lagrangian is as well:
\begin{align*} \begin{align*}
\mathcal{L}\left( \left( \boldsymbol{x}_i \right)_{i=1}^N, \boldsymbol{\lambda} \right) \mathcal{L}\left( \left( \boldsymbol{x}_i \right)_{i=1}^N, \boldsymbol{\lambda} \right)
= \sum_{i=1}^{N} g_i\left( \boldsymbol{x}_i \right) = \sum_{i=1}^{N} g_i\left( \boldsymbol{x}_i \right)
+ \boldsymbol{\lambda}^\text{T} \left( + \boldsymbol{\lambda}^\text{T} \left( \boldsymbol{b}
\sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x_i} - \boldsymbol{b}\right) - \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x_i} \right)
.\end{align*}% .\end{align*}%
% %
The matrices $\boldsymbol{A}_i \in \mathbb{R}^{m \times n_i}, \hspace{1mm} i \in [1:N]$ The matrices $\boldsymbol{A}_i, \hspace{1mm} i \in [1:N]$ are partitions of
form a partition of $\boldsymbol{A}$, corresponding to the matrix $\boldsymbol{A}$, corresponding to
$\boldsymbol{A} = \begin{bmatrix} $\boldsymbol{A} = \begin{bmatrix}
\boldsymbol{A}_1 & \boldsymbol{A}_1 &
\ldots & \ldots &
\boldsymbol{A}_N \boldsymbol{A}_N
\end{bmatrix}$. \end{bmatrix}$.
The minimization of each term can happen in parallel, in a distributed The minimization of each term can then happen in parallel, in a distributed
fashion \cite[Sec. 2.2]{distr_opt_book}. fashion \cite[Sec. 2.2]{distr_opt_book}.
In each minimization step, only one subvector $\boldsymbol{x}_i$ of In each minimization step, only one subvector $\boldsymbol{x}_i$ of
$\boldsymbol{x}$ is considered, regarding all other subvectors as being $\boldsymbol{x}$ is considered, regarding all other subvectors as being
@ -668,7 +643,7 @@ constant.
This modified version of dual ascent is called \textit{dual decomposition}: This modified version of dual ascent is called \textit{dual decomposition}:
% %
\begin{align*} \begin{align*}
\boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i \ge \boldsymbol{0}}\mathcal{L}\left( \boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i}\mathcal{L}\left(
\left( \boldsymbol{x}_i \right)_{i=1}^N, \boldsymbol{\lambda}\right) \left( \boldsymbol{x}_i \right)_{i=1}^N, \boldsymbol{\lambda}\right)
\hspace{5mm} \forall i \in [1:N]\\ \hspace{5mm} \forall i \in [1:N]\\
\boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda}
@ -682,15 +657,14 @@ This modified version of dual ascent is called \textit{dual decomposition}:
It only differs in the use of an \textit{augmented Lagrangian} It only differs in the use of an \textit{augmented Lagrangian}
$\mathcal{L}_\mu\left( \left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda} \right)$ $\mathcal{L}_\mu\left( \left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda} \right)$
in order to strengthen the convergence properties. in order to strengthen the convergence properties.
The augmented Lagrangian extends the classical one with an additional penalty term The augmented Lagrangian extends the ordinary one with an additional penalty term
with the penalty parameter $\mu$: with the penaly parameter $\mu$:
% %
\begin{align*} \begin{align*}
\mathcal{L}_\mu \left( \left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda} \right) \mathcal{L}_\mu \left( \left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda} \right)
= \underbrace{\sum_{i=1}^{N} g_i\left( \boldsymbol{x_i} \right) = \underbrace{\sum_{i=1}^{N} g_i\left( \boldsymbol{x_i} \right)
+ \boldsymbol{\lambda}^\text{T}\left(\sum_{i=1}^{N} + \boldsymbol{\lambda}^\text{T}\left( \boldsymbol{b}
\boldsymbol{A}_i\boldsymbol{x}_i - \boldsymbol{b}\right)} - \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i \right)}_{\text{Ordinary Lagrangian}}
_{\text{Classical Lagrangian}}
+ \underbrace{\frac{\mu}{2}\left\Vert \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i + \underbrace{\frac{\mu}{2}\left\Vert \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i
- \boldsymbol{b} \right\Vert_2^2}_{\text{Penalty term}}, - \boldsymbol{b} \right\Vert_2^2}_{\text{Penalty term}},
\hspace{5mm} \mu > 0 \hspace{5mm} \mu > 0
@ -700,20 +674,21 @@ The steps to solve the problem are the same as with dual decomposition, with the
condition that the step size be $\mu$:% condition that the step size be $\mu$:%
% %
\begin{align*} \begin{align*}
\boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i \ge \boldsymbol{0}}\mathcal{L}_\mu\left( \boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i}\mathcal{L}_\mu\left(
\left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda}\right) \left( \boldsymbol{x} \right)_{i=1}^N, \boldsymbol{\lambda}\right)
\hspace{5mm} \forall i \in [1:N]\\ \hspace{5mm} \forall i \in [1:N]\\
\boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda}
+ \mu\left( \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i + \mu\left( \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i
- \boldsymbol{b} \right), - \boldsymbol{b} \right),
\hspace{5mm} \mu > 0 \hspace{5mm} \mu > 0
% \boldsymbol{x}_1 &\leftarrow \argmin_{\boldsymbol{x}_1}\mathcal{L}_\mu\left(
% \boldsymbol{x}_1, \boldsymbol{x_2}, \boldsymbol{\lambda}\right) \\
% \boldsymbol{x}_2 &\leftarrow \argmin_{\boldsymbol{x}_2}\mathcal{L}_\mu\left(
% \boldsymbol{x}_1, \boldsymbol{x_2}, \boldsymbol{\lambda}\right) \\
% \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda}
% + \mu\left( \boldsymbol{A}_1\boldsymbol{x}_1 + \boldsymbol{A}_2\boldsymbol{x}_2
% - \boldsymbol{b} \right),
% \hspace{5mm} \mu > 0
.\end{align*} .\end{align*}
% %
In subsequent chapters, the decoding problem will be reformulated as an
optimization problem using two different methodologies.
In chapter \ref{chapter:proximal_decoding}, a non-convex optimization approach
is chosen and addressed using the proximal gradient method.
In chapter \ref{chapter:lp_dec_using_admm}, an \ac{LP} based optimization problem is
formulated and solved using \ac{ADMM}.

View File

@ -1,9 +0,0 @@
SNR,BER,FER,DFR,num_iterations
1.0,0.06409994553376906,0.7013888888888888,0.7013888888888888,144.0
1.5,0.03594771241830065,0.45495495495495497,0.47297297297297297,222.0
2.0,0.014664163537755528,0.2148936170212766,0.2297872340425532,470.0
2.5,0.004731522238525039,0.07634164777021919,0.08163265306122448,1323.0
3.0,0.000911436423803915,0.016994783779236074,0.01749957933703517,5943.0
3.5,0.00011736135227863285,0.002537369677176234,0.002587614621278734,39805.0
4.0,1.0686274509803922e-05,0.00024,0.00023,100000.0
4.5,4.411764705882353e-07,1e-05,1e-05,100000.0
1 SNR BER FER DFR num_iterations
2 1.0 0.06409994553376906 0.7013888888888888 0.7013888888888888 144.0
3 1.5 0.03594771241830065 0.45495495495495497 0.47297297297297297 222.0
4 2.0 0.014664163537755528 0.2148936170212766 0.2297872340425532 470.0
5 2.5 0.004731522238525039 0.07634164777021919 0.08163265306122448 1323.0
6 3.0 0.000911436423803915 0.016994783779236074 0.01749957933703517 5943.0
7 3.5 0.00011736135227863285 0.002537369677176234 0.002587614621278734 39805.0
8 4.0 1.0686274509803922e-05 0.00024 0.00023 100000.0
9 4.5 4.411764705882353e-07 1e-05 1e-05 100000.0

View File

@ -1,10 +0,0 @@
{
"duration": 46.20855645602569,
"name": "ber_20433484",
"platform": "Linux-6.2.10-arch1-1-x86_64-with-glibc2.37",
"K": 200,
"epsilon_pri": 1e-05,
"epsilon_dual": 1e-05,
"max_frame_errors": 100,
"end_time": "2023-04-22 19:15:37.252176"
}

View File

@ -1,31 +0,0 @@
SNR,epsilon,BER,FER,DFR,num_iterations,k_avg
1.0,1e-06,0.294328431372549,0.722,0.0,1000.0,0.278
1.0,1e-05,0.294328431372549,0.722,0.0,1000.0,0.278
1.0,0.0001,0.3059313725490196,0.745,0.0,1000.0,0.254
1.0,0.001,0.3059558823529412,0.748,0.0,1000.0,0.254
1.0,0.01,0.30637254901960786,0.786,0.0,1000.0,0.254
1.0,0.1,0.31590686274509805,0.9,0.0,1000.0,0.925
2.0,1e-06,0.11647058823529412,0.298,0.0,1000.0,0.702
2.0,1e-05,0.11647058823529412,0.298,0.0,1000.0,0.702
2.0,0.0001,0.11647058823529412,0.298,0.0,1000.0,0.702
2.0,0.001,0.11652450980392157,0.306,0.0,1000.0,0.702
2.0,0.01,0.09901470588235294,0.297,0.0,1000.0,0.75
2.0,0.1,0.10512745098039215,0.543,0.0,1000.0,0.954
3.0,1e-06,0.004563725490196078,0.012,0.0,1000.0,0.988
3.0,1e-05,0.004563725490196078,0.012,0.0,1000.0,0.988
3.0,0.0001,0.005730392156862745,0.015,0.0,1000.0,0.985
3.0,0.001,0.005730392156862745,0.015,0.0,1000.0,0.985
3.0,0.01,0.007200980392156863,0.037,0.0,1000.0,0.981
3.0,0.1,0.009151960784313726,0.208,0.0,1000.0,0.995
4.0,1e-06,0.0,0.0,0.0,1000.0,1.0
4.0,1e-05,0.0,0.0,0.0,1000.0,1.0
4.0,0.0001,0.0,0.0,0.0,1000.0,1.0
4.0,0.001,0.0002598039215686275,0.001,0.0,1000.0,0.999
4.0,0.01,4.901960784313725e-05,0.01,0.0,1000.0,1.0
4.0,0.1,0.0006862745098039216,0.103,0.0,1000.0,1.0
5.0,1e-06,0.0,0.0,0.0,1000.0,1.0
5.0,1e-05,0.0,0.0,0.0,1000.0,1.0
5.0,0.0001,0.0,0.0,0.0,1000.0,1.0
5.0,0.001,0.0,0.0,0.0,1000.0,1.0
5.0,0.01,9.803921568627451e-06,0.002,0.0,1000.0,1.0
5.0,0.1,0.0005245098039215687,0.097,0.0,1000.0,1.0
1 SNR epsilon BER FER DFR num_iterations k_avg
2 1.0 1e-06 0.294328431372549 0.722 0.0 1000.0 0.278
3 1.0 1e-05 0.294328431372549 0.722 0.0 1000.0 0.278
4 1.0 0.0001 0.3059313725490196 0.745 0.0 1000.0 0.254
5 1.0 0.001 0.3059558823529412 0.748 0.0 1000.0 0.254
6 1.0 0.01 0.30637254901960786 0.786 0.0 1000.0 0.254
7 1.0 0.1 0.31590686274509805 0.9 0.0 1000.0 0.925
8 2.0 1e-06 0.11647058823529412 0.298 0.0 1000.0 0.702
9 2.0 1e-05 0.11647058823529412 0.298 0.0 1000.0 0.702
10 2.0 0.0001 0.11647058823529412 0.298 0.0 1000.0 0.702
11 2.0 0.001 0.11652450980392157 0.306 0.0 1000.0 0.702
12 2.0 0.01 0.09901470588235294 0.297 0.0 1000.0 0.75
13 2.0 0.1 0.10512745098039215 0.543 0.0 1000.0 0.954
14 3.0 1e-06 0.004563725490196078 0.012 0.0 1000.0 0.988
15 3.0 1e-05 0.004563725490196078 0.012 0.0 1000.0 0.988
16 3.0 0.0001 0.005730392156862745 0.015 0.0 1000.0 0.985
17 3.0 0.001 0.005730392156862745 0.015 0.0 1000.0 0.985
18 3.0 0.01 0.007200980392156863 0.037 0.0 1000.0 0.981
19 3.0 0.1 0.009151960784313726 0.208 0.0 1000.0 0.995
20 4.0 1e-06 0.0 0.0 0.0 1000.0 1.0
21 4.0 1e-05 0.0 0.0 0.0 1000.0 1.0
22 4.0 0.0001 0.0 0.0 0.0 1000.0 1.0
23 4.0 0.001 0.0002598039215686275 0.001 0.0 1000.0 0.999
24 4.0 0.01 4.901960784313725e-05 0.01 0.0 1000.0 1.0
25 4.0 0.1 0.0006862745098039216 0.103 0.0 1000.0 1.0
26 5.0 1e-06 0.0 0.0 0.0 1000.0 1.0
27 5.0 1e-05 0.0 0.0 0.0 1000.0 1.0
28 5.0 0.0001 0.0 0.0 0.0 1000.0 1.0
29 5.0 0.001 0.0 0.0 0.0 1000.0 1.0
30 5.0 0.01 9.803921568627451e-06 0.002 0.0 1000.0 1.0
31 5.0 0.1 0.0005245098039215687 0.097 0.0 1000.0 1.0

View File

@ -1,10 +0,0 @@
{
"duration": 15.407989402010571,
"name": "rho_kavg_20433484",
"platform": "Linux-6.2.10-arch1-1-x86_64-with-glibc2.37",
"K": 200,
"rho": 1,
"mu": 5,
"max_frame_errors": 100000,
"end_time": "2023-04-23 06:39:23.561294"
}

View File

@ -1,9 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0315398614535635,0.598802395209581,334
1.5,0.0172476397966594,0.352733686067019,567
2,0.00668670591018522,0.14194464158978,1409
2.5,0.00168951075575861,0.0388349514563107,5150
3,0.000328745799468201,0.00800288103717338,24991
3.5,4.21796065456326e-05,0.00109195935727272,183157
4,4.95098039215686e-06,0.000134,500000
4.5,2.94117647058824e-07,1.2e-05,500000
1 SNR BER FER num_iterations
2 1 0.0315398614535635 0.598802395209581 334
3 1.5 0.0172476397966594 0.352733686067019 567
4 2 0.00668670591018522 0.14194464158978 1409
5 2.5 0.00168951075575861 0.0388349514563107 5150
6 3 0.000328745799468201 0.00800288103717338 24991
7 3.5 4.21796065456326e-05 0.00109195935727272 183157
8 4 4.95098039215686e-06 0.000134 500000
9 4.5 2.94117647058824e-07 1.2e-05 500000

View File

@ -1,10 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0592497868712702,0.966183574879227,207
1.5,0.0465686274509804,0.854700854700855,234
2,0.0326898561282098,0.619195046439629,323
2.5,0.0171613765211425,0.368324125230203,543
3,0.00553787541776455,0.116346713205352,1719
3.5,0.00134027952469441,0.0275900124155056,7249
4,0.000166480738027721,0.0034201480924124,58477
4.5,1.19607843137255e-05,0.000252,500000
5,9.50980392156863e-07,2e-05,500000
1 SNR BER FER num_iterations
2 1 0.0592497868712702 0.966183574879227 207
3 1.5 0.0465686274509804 0.854700854700855 234
4 2 0.0326898561282098 0.619195046439629 323
5 2.5 0.0171613765211425 0.368324125230203 543
6 3 0.00553787541776455 0.116346713205352 1719
7 3.5 0.00134027952469441 0.0275900124155056 7249
8 4 0.000166480738027721 0.0034201480924124 58477
9 4.5 1.19607843137255e-05 0.000252 500000
10 5 9.50980392156863e-07 2e-05 500000

View File

@ -1,6 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0303507766743061,0.649350649350649,308
1.5,0.0122803195352215,0.296296296296296,675
2,0.00284899516991547,0.0733137829912024,2728
2.5,0.000348582879119279,0.00916968502131952,21811
3,2.52493009763634e-05,0.000760274154860243,263063
1 SNR BER FER num_iterations
2 1 0.0303507766743061 0.649350649350649 308
3 1.5 0.0122803195352215 0.296296296296296 675
4 2 0.00284899516991547 0.0733137829912024 2728
5 2.5 0.000348582879119279 0.00916968502131952 21811
6 3 2.52493009763634e-05 0.000760274154860243 263063

View File

@ -1,9 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0352267331433998,0.56980056980057,351
1.5,0.0214331413947537,0.383877159309021,521
2,0.0130737396538751,0.225733634311512,886
2.5,0.00488312520012808,0.0960614793467819,2082
3,0.00203045475576707,0.0396589331746976,5043
3.5,0.000513233833401836,0.010275380189067,19464
4,0.000107190497363908,0.0025408117893667,78715
4.5,3.2074433522095e-05,0.00092605883251763,215969
1 SNR BER FER num_iterations
2 1 0.0352267331433998 0.56980056980057 351
3 1.5 0.0214331413947537 0.383877159309021 521
4 2 0.0130737396538751 0.225733634311512 886
5 2.5 0.00488312520012808 0.0960614793467819 2082
6 3 0.00203045475576707 0.0396589331746976 5043
7 3.5 0.000513233833401836 0.010275380189067 19464
8 4 0.000107190497363908 0.0025408117893667 78715
9 4.5 3.2074433522095e-05 0.00092605883251763 215969

View File

@ -1,11 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0584002878042931,0.743494423791822,269
1.5,0.0458084740620892,0.626959247648903,319
2,0.0318872821653689,0.459770114942529,435
2.5,0.0248431459534825,0.392927308447937,509
3,0.0158453558113146,0.251256281407035,796
3.5,0.0115615186982586,0.176991150442478,1130
4,0.00708558642550811,0.115606936416185,1730
4.5,0.00389714705036542,0.0614817091915155,3253
5,0.00221548053104734,0.0345125107851596,5795
5.5,0.00106888328072207,0.0172131852999398,11619
1 SNR BER FER num_iterations
2 1 0.0584002878042931 0.743494423791822 269
3 1.5 0.0458084740620892 0.626959247648903 319
4 2 0.0318872821653689 0.459770114942529 435
5 2.5 0.0248431459534825 0.392927308447937 509
6 3 0.0158453558113146 0.251256281407035 796
7 3.5 0.0115615186982586 0.176991150442478 1130
8 4 0.00708558642550811 0.115606936416185 1730
9 4.5 0.00389714705036542 0.0614817091915155 3253
10 5 0.00221548053104734 0.0345125107851596 5795
11 5.5 0.00106888328072207 0.0172131852999398 11619

View File

@ -1,6 +0,0 @@
SNR,BER,FER,num_iterations
1,0.0294665331073098,0.647249190938511,309
1.5,0.0104905437352246,0.265957446808511,752
2,0.0021089358705197,0.05616399887672,3561
2.5,0.000172515567971134,0.00498840196543037,40093
3,6.3531746031746e-06,0.0002,500000
1 SNR BER FER num_iterations
2 1 0.0294665331073098 0.647249190938511 309
3 1.5 0.0104905437352246 0.265957446808511 752
4 2 0.0021089358705197 0.05616399887672 3561
5 2.5 0.000172515567971134 0.00498840196543037 40093
6 3 6.3531746031746e-06 0.0002 500000

View File

@ -9,12 +9,12 @@
\thesisTitle{Application of Optimization Algorithms for Channel Decoding} \thesisTitle{Application of Optimization Algorithms for Channel Decoding}
\thesisType{Bachelor's Thesis} \thesisType{Bachelor's Thesis}
\thesisAuthor{Andreas Tsouchlos} \thesisAuthor{Andreas Tsouchlos}
\thesisAdvisor{Prof. Dr.-Ing. Laurent Schmalen} %\thesisAdvisor{Prof. Dr.-Ing. Laurent Schmalen}
%\thesisHeadOfInstitute{Prof. Dr.-Ing. Laurent Schmalen} %\thesisHeadOfInstitute{Prof. Dr.-Ing. Laurent Schmalen}
\thesisSupervisor{Dr.-Ing. Holger Jäkel} \thesisSupervisor{Name of assistant}
\thesisStartDate{24.10.2022} \thesisStartDate{24.10.2022}
\thesisEndDate{24.04.2023} \thesisEndDate{24.04.2023}
\thesisSignatureDate{24.04.2023} % TODO: Signature date \thesisSignatureDate{Signature date} % TODO: Signature date
\thesisLanguage{english} \thesisLanguage{english}
\setlanguage \setlanguage
@ -35,7 +35,6 @@
\usetikzlibrary{spy} \usetikzlibrary{spy}
\usetikzlibrary{shapes.geometric} \usetikzlibrary{shapes.geometric}
\usetikzlibrary{arrows.meta,arrows} \usetikzlibrary{arrows.meta,arrows}
\tikzset{>=latex}
\pgfplotsset{compat=newest} \pgfplotsset{compat=newest}
\usepgfplotslibrary{colorbrewer} \usepgfplotslibrary{colorbrewer}
@ -210,7 +209,6 @@
% %
% 6. Conclusion % 6. Conclusion
\include{chapters/acknowledgements}
\tableofcontents \tableofcontents
\cleardoublepage % make sure multipage TOCs are numbered correctly \cleardoublepage % make sure multipage TOCs are numbered correctly
@ -220,9 +218,9 @@
\include{chapters/proximal_decoding} \include{chapters/proximal_decoding}
\include{chapters/lp_dec_using_admm} \include{chapters/lp_dec_using_admm}
\include{chapters/comparison} \include{chapters/comparison}
% \include{chapters/discussion} \include{chapters/discussion}
\include{chapters/conclusion} \include{chapters/conclusion}
% \include{chapters/appendix} \include{chapters/appendix}
%\listoffigures %\listoffigures