Compare commits

13 Commits

5 changed files with 265 additions and 177 deletions

View File

@@ -2,6 +2,13 @@ FROM alpine:3.19
RUN apk update && apk upgrade RUN apk update && apk upgrade
RUN apk add make texlive texmf-dist-pictures RUN apk add make texlive texmf-dist-pictures
RUN apk add texmf-dist-publishers RUN apk add texmf-dist-publishers texmf-dist-science texmf-dist-fontsextra texmf-dist-latexextra
RUN apk add texmf-dist-science RUN apk add biber texmf-dist-bibtexextra
RUN apk add texmf-dist-fontsextra
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home

View File

@@ -6,4 +6,16 @@ RUN pacman -Sy archlinux-keyring --noconfirm && pacman -Su --noconfirm
RUN pacman -Syu --noconfirm RUN pacman -Syu --noconfirm
RUN pacman -S make perl texlive texlive-binextra texlive-pictures --noconfirm RUN pacman -S make perl texlive texlive-binextra texlive-pictures --noconfirm
RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra --noconfirm RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra texlive-latexextra --noconfirm
RUN pacman -Syu biber texlive-bibtexextra --noconfirm
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home
# For some reason simply installing 'biber' does not set the path
ENV PATH="${PATH}:/usr/bin/vendor_perl"

View File

@@ -4,4 +4,13 @@ ARG DEBIAN_FRONTEND=noninteractive
RUN apt update -y && apt upgrade -y RUN apt update -y && apt upgrade -y
RUN apt install make texlive latexmk texlive-pictures -y RUN apt install make texlive latexmk texlive-pictures -y
RUN apt install make texlive-publishers texlive-science texlive-fonts-extra -y RUN apt install texlive-publishers texlive-science texlive-fonts-extra texlive-latex-extra -y
RUN apt install biber texlive-bibtex-extra -y
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home

104
letter.bib Normal file
View File

@@ -0,0 +1,104 @@
@ARTICLE{ADMM,
author={Barman, Siddharth and Liu, Xishuo and Draper, Stark C. and Recht, Benjamin},
journal={IEEE Transactions on Information Theory},
title={Decomposition Methods for Large Scale LP Decoding},
year={2013},
volume={59},
number={12},
pages={7870-7886},
% doi={10.1109/TIT.2013.2281372}
}
@ARTICLE{feldman_paper,
author={Feldman, J. and Wainwright, M.J. and Karger, D.R.},
journal={IEEE Transactions on Information Theory},
title={Using linear programming to Decode Binary linear codes},
year={2005},
volume={51},
number={3},
pages={954-972},
% doi={10.1109/TIT.2004.842696}
}
@ARTICLE{ml_in_the_list,
author={Geiselhart, Marvin and Elkelesh, Ahmed and Ebada, Moustafa and Cammerer, Sebastian and Brink, Stephan ten},
journal={IEEE Transactions on Communications},
title={Automorphism Ensemble Decoding of ReedMuller Codes},
year={2021},
volume={69},
number={10},
pages={6424-6438},
% doi={10.1109/TCOMM.2021.3098798}
}
@ARTICLE{mackay99,
author={MacKay, D.J.C.},
journal={IEEE Transactions on Information Theory},
title={Good error-correcting codes based on very sparse matrices},
year={1999},
volume={45},
number={2},
pages={399-431},
% doi={10.1109/18.748992}
}
@online{mackay,
author = {MacKay, David J.C.},
title = {Encyclopedia of Sparse Graph Codes},
date = {2023-04},
url = {http://www.inference.org.uk/mackay/codes/data.html}
}
@article{proximal_algorithms,
title={Proximal algorithms},
author={Parikh, Neal and Boyd, Stephen and others},
journal={Foundations and trends{\textregistered} in Optimization},
volume={1},
number={3},
pages={127--239},
year={2014},
publisher={Now Publishers, Inc.}
}
@book{channel_codes_book,
place={Cambridge},
title={Channel Codes: Classical and Modern},
% DOI={10.1017/CBO9780511803253},
publisher={Cambridge University Press},
author={Ryan, William and Lin, Shu},
year={2009},
% url={https://d1.amobbs.com/bbs_upload782111/files_35/ourdev_604508GHLFR2.pdf}
}
@INPROCEEDINGS{adaptive_lp_decoding,
author={Taghavi, Mohammad H. and Siegel, Paul H.},
booktitle={2006 IEEE International Symposium on Information Theory},
title={Adaptive Linear Programming Decoding},
year={2006},
volume={},
number={},
pages={1374-1378},
% doi={10.1109/ISIT.2006.262071}
}
@INPROCEEDINGS{interior_point_decoding,
author={Vontobel, Pascal O.},
booktitle={2008 Information Theory and Applications Workshop},
title={Interior-point algorithms for linear-programming decoding},
year={2008},
volume={},
number={},
pages={433-437},
% doi={10.1109/ITA.2008.4601085}
}
@article{proximal_paper,
title={Proximal Decoding for {LDPC} Codes},
author={Tadashi Wadayama and Satoshi Takabe},
journal={IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences},
% volume={advpub},
% pages={2022TAP0002},
year={2022},
% doi={10.1587/transfun.2022TAP0002}
}

View File

@@ -7,6 +7,14 @@
\usepackage{algorithm} \usepackage{algorithm}
\usepackage{siunitx} \usepackage{siunitx}
\usepackage{dsfont} \usepackage{dsfont}
\usepackage{mleftright}
\usepackage{bbm}
\usepackage[
backend=biber,
style=ieee,
sorting=nty,
]{biblatex}
\usepackage{tikz} \usepackage{tikz}
\usetikzlibrary{spy, arrows.meta,arrows} \usetikzlibrary{spy, arrows.meta,arrows}
@@ -37,15 +45,28 @@
\input{lib/latex-common/common.tex} \input{lib/latex-common/common.tex}
\fi \fi
\addbibresource{letter.bib}
\AtBeginBibliography{\footnotesize}
\pgfplotsset{colorscheme/cel} \pgfplotsset{colorscheme/cel}
% TODO % TODO
\pgfplotsset{fancy marks/.style={}}
\newcommand{\figwidth}{\columnwidth} \newcommand{\figwidth}{\columnwidth}
\newcommand{\figheight}{0.75\columnwidth} \newcommand{\figheight}{0.75\columnwidth}
\pgfplotsset{
FERPlot/.style={
line width=1pt,
densely dashed,
},
BERPlot/.style={
line width=1pt,
},
DFRPlot/.style={
only marks,
},
}
% %
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -57,7 +78,7 @@
\begin{document} \begin{document}
\title{A Note on Improving Proximal Decoding for Linear Block Codes} \title{List-based Proximal Decoding for Linear Block Codes}
\author{Andreas Tsouchlos, Holger Jäkel, and Laurent Schmalen\\ \author{Andreas Tsouchlos, Holger Jäkel, and Laurent Schmalen\\
Communications Engineering Lab (CEL), Karlsruhe Institute of Technology (KIT)\\ Communications Engineering Lab (CEL), Karlsruhe Institute of Technology (KIT)\\
@@ -80,20 +101,19 @@ Hertzstr. 16, 76187 Karlsruhe, Germany, Email: \texttt{\{first.last\}@kit.edu}}
\begin{abstract} \begin{abstract}
In this paper, the proximal decoding algorithm is considered within the In this paper, the proximal decoding algorithm is considered within the
context of \textit{additive white Gaussian noise} (AWGN) channels. context of \textit{additive white Gaussian noise} (AWGN) channels.
An analysis of the convergence behavior of the algorithm shows that it is an An analysis of the convergence behavior of the algorithm shows that
inherent property of proximal decoding to enter an proximal decoding inherently enters an oscillating behavior of the estimate
oscillating behavior of the estimate after a number of iterations. after a certain number of iterations.
Due to this oscillation, frame errors arising during decoding can often Due to this oscillation, frame errors arising during decoding can often
be attributed to only a few remaining wrongly decoded components. be attributed to only a few remaining wrongly decoded bits.
In this letter, an improvement of the proximal decoding algorithm is proposed
In this paper, an improvement of the algorithm is proposed by appending an by appending an additional step, in which these erroneous components are
additional step, in which these erroneous components are attempted to be attempted to be corrected.
corrected. We suggesst an empirical rule with which the components most likely needing
An empirical rule is suggested, with which the components most likely needing
correction can be determined. correction can be determined.
Using this insight and performing a subsequent ``ML-in-the-list'' decoding, Using this insight and performing a subsequent ``ML-in-the-list'' decoding,
a gain of up to approximately 1 dB is achieved compared to proximal decoding, a gain of up to 1 dB is achieved compared to conventional
depending on the parameters chosen and the code considered. proximal decoding, depending on the decoder parameters and the code.
\end{abstract} \end{abstract}
\begin{IEEEkeywords} \begin{IEEEkeywords}
@@ -116,7 +136,7 @@ the reliability of data by detecting and correcting any errors that may occur
during its transmission or storage. during its transmission or storage.
One class of binary linear codes, \textit{low-density parity-check} (LDPC) One class of binary linear codes, \textit{low-density parity-check} (LDPC)
codes, has become especially popular due to its ability to reach arbitrarily codes, has become especially popular due to its ability to reach arbitrarily
small probabilities of error at code rates up to the capacity of the channel small error probabilities at code rates up to the capacity of the channel
\cite{mackay99}, while retaining a structure that allows for very efficient \cite{mackay99}, while retaining a structure that allows for very efficient
decoding. decoding.
While the established decoders for LDPC codes, such as belief propagation (BP) While the established decoders for LDPC codes, such as belief propagation (BP)
@@ -129,37 +149,37 @@ Optimization based decoding algorithms are an entirely different way of
approaching the decoding problem. approaching the decoding problem.
A number of different such algorithms have been introduced. A number of different such algorithms have been introduced.
The field of \textit{linear programming} (LP) decoding \cite{feldman_paper}, The field of \textit{linear programming} (LP) decoding \cite{feldman_paper},
for example, represents one class of such algorithms, based on a reformulation for example, represents one class of such algorithms, based on a relaxation
of the \textit{maximum likelihood} (ML) decoding problem as a linear program. of the \textit{maximum likelihood} (ML) decoding problem as a linear program.
Many different optimization algorithms can be used to solve the resulting Many different optimization algorithms can be used to solve the resulting
problem \cite{interior_point_decoding, ADMM, adaptive_lp_decoding}. problem \cite{ADMM, adaptive_lp_decoding, interior_point_decoding}.
Recently, proximal decoding for LDPC codes was presented by Recently, proximal decoding for LDPC codes was presented by
Wadayama et al. \cite{proximal_paper}. Wadayama \textit{et al.} \cite{proximal_paper}.
It is a novel approach and relies on a non-convex optimization formulation Proximal decoding relies on a non-convex optimization formulation
of the \textit{maximum a posteriori} (MAP) decoding problem. of the \textit{maximum a posteriori} (MAP) decoding problem.
The aim of this work is to improve upon the performance of proximal decoding by The aim of this work is to improve upon the performance of proximal decoding by
first presenting an examination of the algorithm's behavior and then suggesting first presenting an examination of the algorithm's behavior and then suggesting
an approach to mitigate some of its flaws. an approach to mitigate some of its flaws.
This analysis is performed within the context of This analysis is performed for
\textit{additive white Gaussian noise} (AWGN) channels. \textit{additive white Gaussian noise} (AWGN) channels.
It is first observed that, while the algorithm initially moves the estimate in We first observe that the algorithm initially moves the estimate in
the right direction, in the final steps of the decoding process convergence to the right direction, however, in the final steps of the decoding process,
the correct codeword is often not achieved. convergence to the correct codeword is often not achieved.
Furthermore, it is suggested that the reason for this behavior is the nature Furthermore, we suggest that the reason for this behavior is the nature
of the decoding algorithm itself, comprising two separate gradient descent of the decoding algorithm itself, comprising two separate gradient descent
steps working adversarially. steps working adversarially.
A method to mitigate this effect is proposed by appending an additional step We propose a method mitigate this effect by appending an
to the decoding process. additional step to the decoding process.
In this additional step, the components of the estimate with the highest In this additional step, the components of the estimate with the highest
probability of being erroneous are identified. probability of being erroneous are identified.
New codewords are then generated, over which an ``ML-in-the-list'' New codewords are then generated, over which an ``ML-in-the-list''
\cite{ml_in_the_list} decoding is performed. \cite{ml_in_the_list} decoding is performed.
A process to conduct this identification is proposed in this paper. A process to conduct this identification is proposed in this paper.
Using the improved algorithm, a gain of up to Using the improved algorithm, a gain of up to
approximately 1 dB can be achieved compared to proximal decoding, depending on 1 dB can be achieved compared to conventional proximal decoding,
the parameters chosen and the code considered. depending on the decoder parameters and the code.
%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -182,31 +202,31 @@ number of parity-checks:
\end{align*} \end{align*}
% %
The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each correspond The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each
to a parity check, i.e., row of $\boldsymbol{H}$. correspond to a parity check, i.e., a row of $\boldsymbol{H}$.
The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond
to the components of a codeword being subjected to a parity check, i.e., to to the components of a codeword being subjected to a parity check, i.e.,
columns of $\boldsymbol{H}$. to the columns of $\boldsymbol{H}$.
The neighborhood of a parity check $j$, i.e., the set of indices of components The neighborhood of a parity check $j$, i.e., the set of indices of components
relevant for the according parity check, is denoted by relevant for the according parity check, is denoted by
$N_c(j) := \left\{i \mid i \in \mathcal{I}, \boldsymbol{H}_{j,i} = 1 \right\}, $\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\},
\hspace{2mm} j \in \mathcal{J}$. \hspace{2mm} j \in \mathcal{J}$.
In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is
mapped onto a \textit{binary phase shift keying} (BPSK) symbol via mapped onto a \textit{binary phase shift keying} (BPSK) symbol via
$\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with $\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with
$ \boldsymbol{x} \in \left\{-1, 1\right\}^n$, which is then transmitted over an $ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
AWGN channel. AWGN channel.
The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an
estimate of the transmitted codeword, denoted as estimate of the transmitted codeword, denoted as
$\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$. $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$.
A distinction is made between $\boldsymbol{x} \in \left\{-1, 1\right\}^n$ A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$
and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$, and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$,
the former denoting the BPSK symbol physically transmitted over the channel and the former denoting the BPSK symbol physically transmitted over the channel and
the latter being used as a variable during the optimization process. the latter being used as a variable during the optimization process.
The posterior probability of having transmitted $\boldsymbol{x}$ when receiving The posterior probability of having transmitted $\boldsymbol{x}$ when receiving
$\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF) $\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF)
$p_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$. $P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting
$\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF) $\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF)
$f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$. $f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$.
@@ -221,25 +241,24 @@ With proximal decoding, the proximal gradient method \cite{proximal_algorithms}
is used to solve a non-convex optimization formulation of the MAP decoding is used to solve a non-convex optimization formulation of the MAP decoding
problem. problem.
When making the equal probability assumption for all codewords, MAP and ML With the equal prior probability assumption for all codewords, MAP and ML
decoding are equivalent and, specifically for AWGN channels, correspond to a decoding are equivalent and, specifically for AWGN channels, correspond to a
nearest-neighbor decision. nearest-neighbor decision.
For this reason, decoding can be done using a figure of merit that describes For this reason, decoding can be carried out using a figure of merit that
the distance from a given vector to a codeword. describes the distance from a given vector to a codeword.
One such expression, formulated under the assumption of BPSK, is the One such expression, formulated under the assumption of BPSK, is the
\textit{code-constraint polynomial} \cite{proximal_paper} \textit{code-constraint polynomial} \cite{proximal_paper}
% %
\begin{align*} \begin{align*}
h\left( \tilde{\boldsymbol{x}} \right) = h( \tilde{\boldsymbol{x}} ) =
\underbrace{\sum_{i=1}^{n} \underbrace{\sum_{i=1}^{n}
\left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}} \left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}}
+ \underbrace{\sum_{j=1}^{m} \left[ + \underbrace{\sum_{j=1}^{m} \left[
\left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right) \left( \prod_{i\in \mathcal{N}_c \left( j \right) } \tilde{x}_i \right)
-1 \right] ^2}_{\text{Parity constraint}} -1 \right] ^2}_{\text{Parity constraint}}
.\end{align*}% .\end{align*}%
% %
Its intent is to penalize vectors far from a codeword and favor those close Its intent is to penalize vectors far from a codeword.
to one.
It comprises two terms: one representing the bipolar constraint It comprises two terms: one representing the bipolar constraint
and one representing the parity constraint, incorporating all of the and one representing the parity constraint, incorporating all of the
information regarding the code. information regarding the code.
@@ -247,18 +266,18 @@ information regarding the code.
The channel model can be considered using the negative log-likelihood The channel model can be considered using the negative log-likelihood
% %
\begin{align*} \begin{align*}
L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left( L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft(
f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left( f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft(
\boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right) \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright)
.\end{align*} .\end{align*}
% %
The information about the channel and the code are consolidated in the objective The information about the channel and the code are consolidated in the objective
function \cite{proximal_paper} function \cite{proximal_paper}
% %
\begin{align*} \begin{align*}
g\left( \tilde{\boldsymbol{x}} \right) g \mleft( \tilde{\boldsymbol{x}} \mright)
= L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright)
+ \gamma h\left( \tilde{\boldsymbol{x}} \right), + \gamma h\mleft( \tilde{\boldsymbol{x}} \mright),
\hspace{5mm} \gamma > 0% \hspace{5mm} \gamma > 0%
.\end{align*} .\end{align*}
% %
@@ -270,17 +289,17 @@ introduced, describing the result of each of the two steps:
% %
\begin{alignat}{3} \begin{alignat}{3}
\boldsymbol{r} &\leftarrow \boldsymbol{s} \boldsymbol{r} &\leftarrow \boldsymbol{s}
- \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) - \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright)
\hspace{5mm }&&\omega > 0 \label{eq:r_update}\\ \hspace{5mm }&&\omega > 0 \label{eq:r_update}\\
\boldsymbol{s} &\leftarrow \boldsymbol{r} \boldsymbol{s} &\leftarrow \boldsymbol{r}
- \gamma \nabla h\left( \boldsymbol{r} \right), - \gamma \nabla h\mleft( \boldsymbol{r} \mright),
\hspace{5mm} &&\gamma > 0 \label{eq:s_update} \hspace{5mm} &&\gamma > 0 \label{eq:s_update}
.\end{alignat} .\end{alignat}
% %
An equation for determining $\nabla h(\boldsymbol{r})$ is given in An equation for determining $\nabla h(\boldsymbol{r})$ is given in
\cite{proximal_paper}. \cite{proximal_paper}.
It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$ It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$
really represent $\tilde{\boldsymbol{x}}$ during different represent $\tilde{\boldsymbol{x}}$ during different
stages of the decoding process. stages of the decoding process.
As the gradient of the code-constraint polynomial can attain very large values As the gradient of the code-constraint polynomial can attain very large values
@@ -290,10 +309,10 @@ $\left[-\eta, \eta\right]^n$ by a projection
$\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$ $\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$
is a positive constant slightly larger than one, e.g., $\eta = 1.5$. is a positive constant slightly larger than one, e.g., $\eta = 1.5$.
The resulting decoding process as described in \cite{proximal_paper} is The resulting decoding process as described in \cite{proximal_paper} is
presented in algorithm \ref{alg:proximal_decoding}. presented in Algorithm \ref{alg:proximal_decoding}.
\begin{algorithm} \begin{algorithm}
\caption{Proximal decoding algorithm for an AWGN channel.} \caption{Proximal decoding algorithm for an AWGN channel \cite{proximal_paper}.}
\label{alg:proximal_decoding} \label{alg:proximal_decoding}
\begin{algorithmic} \begin{algorithmic}
@@ -301,7 +320,7 @@ presented in algorithm \ref{alg:proximal_decoding}.
\STATE \textbf{for} $K$ iterations \textbf{do} \STATE \textbf{for} $K$ iterations \textbf{do}
\STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $ \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
\STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$ \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
\STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \boldsymbol{s} \right) = -1 \right\}$ \STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \boldsymbol{s} \preceq 0 \right\}}$
\STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do} \STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do}
\STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$ \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$
\STATE \hspace{5mm} \textbf{end if} \STATE \hspace{5mm} \textbf{end if}
@@ -316,14 +335,14 @@ presented in algorithm \ref{alg:proximal_decoding}.
\section{Improved algorithm} \section{Improved algorithm}
%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%
\subsection{Analysis of Convergence Behavior} \subsection{Analysis of the Convergence Behavior}
In figure \ref{fig:fer vs ber}, the \textit{frame error rate} (FER), In Fig. \ref{fig:fer vs ber}, the \textit{frame error rate} (FER),
\textit{bit error rate} (BER) and \textit{decoding failure rate} (DFR) of \textit{bit error rate} (BER) and \textit{decoding failure rate} (DFR) of
proximal decoding are shown for an LDPC code with $n=204$ and $k=102$ proximal decoding are shown for an LDPC code with $n=204$ and $k=102$
\cite[204.33.484]{mackay}. \cite[204.33.484]{mackay}.
A decoding failure is defined as a decoding operation, the result of which is A decoding failure is defined as a decoding operation returning an invalid
not a valid codeword, i.e., as non-convergence of the algorithm. codeword, i.e., as non-convergence of the algorithm.
The parameters chosen for this simulation are $\gamma=0.05, \omega=0.05, The parameters chosen for this simulation are $\gamma=0.05, \omega=0.05,
\eta=1.5$ and $K=200$. \eta=1.5$ and $K=200$.
They were determined to offer the best performance in a preliminary examination, They were determined to offer the best performance in a preliminary examination,
@@ -334,33 +353,15 @@ This means that most frame errors are not due to the algorithm converging
to the wrong codeword, but due to the algorithm not converging at all. to the wrong codeword, but due to the algorithm not converging at all.
As proximal decoding is an optimization-based decoding method, one possible As proximal decoding is an optimization-based decoding method, one possible
explanation for this effect might be that during the decoding process convergence explanation for this effect might be that during the decoding process, convergence
on the final codeword is often not achieved, although the estimate is moving in to the final codeword is often not achieved, although the estimate is moving into
the right general direction. the right direction.
This would suggest that most frame errors occur due to only a few incorrectly This would suggest that most frame errors occur due to only a few incorrectly
decoded bits.% decoded bits.%
% %
\begin{figure}[ht] \begin{figure}
\centering \centering
\pgfplotsset{
FERPlot/.style={
line width=1pt,
densely dashed,
mark=triangle,
fancy marks
},
BERPlot/.style={
line width=1pt,
mark=*,
fancy marks,
},
DFRPlot/.style={
only marks,
mark=square*,
fancy marks,
}}
\begin{tikzpicture} \begin{tikzpicture}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
@@ -373,24 +374,24 @@ decoded bits.%
height=\figheight, height=\figheight,
legend pos = south west, legend pos = south west,
] ]
\addplot+[FERPlot, scol0] \addplot+[FERPlot, mark=o, mark options={solid}, scol1]
table [x=SNR, y=FER, col sep=comma, table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{9}] discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv}; {res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER} \addlegendentry{FER}
\addplot+[DFRPlot, scol2] \addplot+[BERPlot, mark=*, scol1]
table [x=SNR, y=DFR, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{DFR}
\addplot+[BERPlot, scol1]
table [x=SNR, y=BER, col sep=comma, table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}] discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv}; {res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER} \addlegendentry{BER}
\addplot+[DFRPlot, mark=square*, scol0]
table [x=SNR, y=DFR, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{DFR}
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
@@ -402,27 +403,28 @@ decoded bits.%
\label{fig:fer vs ber} \label{fig:fer vs ber}
\end{figure}% \end{figure}%
% %
An approach for lowering the FER might then be to append an ``ML-in-the-list'' An approach for lowering the FER might then be to append an ``ML-in-the-list''
\cite{ml_in_the_list} step to the decoding process shown in algorithm \cite{ml_in_the_list} step to the decoding process shown in Algorithm
\ref{alg:proximal_decoding}. \ref{alg:proximal_decoding}.
This step would consist of determining the $N \in \mathbb{N}$ most probably This step consists in determining the $N \in \mathbb{N}$ most probable
wrong bits, finding all variations of the current estimate with those bits erroneous bits, finding all variations of the current estimate with those bits
modified, and performing ML decoding on this list. modified, and performing ML decoding on this list.
This approach crucially relies on identifying the most probably wrong bits. This approach crucially relies on identifying the most probable erroneous bits.
Therefore, the convergence properties of proximal decoding are investigated. Therefore, the convergence properties of proximal decoding are investigated.
Considering equations (\ref{eq:s_update}) and (\ref{eq:r_update}), figure Considering (\ref{eq:s_update}) and (\ref{eq:r_update}), Fig.
\ref{fig:grad} shows the two gradients along which the minimization is \ref{fig:grad} shows the two gradients along which the minimization is
performed for a repetition code with $n=2$. performed for a repetition code with $n=2$.
It is apparent that a net movement will result as long as the two gradients It is apparent that a net movement will result as long as the two gradients
have a common component. have a common component.
As soon as this common component is exhausted, they will work in opposing As soon as this common component is exhausted, they will work in opposing
directions and an oscillation of the estimate will take place. directions resulting in an oscillation of the estimate.
This behavior matches the conjecture that the reason for the high DFR is a This behavior supports the conjecture that the reason for the high DFR is a
failure to converge to the correct codeword in the final steps of the failure to converge to the correct codeword in the final steps of the
optimization process.% optimization process.%
% %
\begin{figure}[h] \begin{figure}
\centering \centering
\begin{tikzpicture} \begin{tikzpicture}
@@ -521,16 +523,18 @@ optimization process.%
\label{fig:grad} \label{fig:grad}
\end{figure}% \end{figure}%
% %
In figure \ref{fig:prox:convergence_large_n}, only component
$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate is considered during a In Fig. \ref{fig:prox:convergence_large_n}, we consider only component
decoding operation for an LDPC code with $n=204$ and $k=102$. $\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate during a
decoding operation for the LDPC code used also for Fig. 1.
Two qualities may be observed. Two qualities may be observed.
First, the average values of the two gradients are equal, except for their sign, First, we observe the average absolute values of the two gradients are equal,
however, they have opposing signs,
leading to the aforementioned oscillation. leading to the aforementioned oscillation.
Second, the gradient of the code constraint polynomial itself starts to Second, the gradient of the code constraint polynomial itself starts to
oscillate after a certain number of iterations.% oscillate after a certain number of iterations.%
% %
\begin{figure}[ht] \begin{figure}
\centering \centering
\begin{tikzpicture} \begin{tikzpicture}
@@ -574,11 +578,11 @@ oscillate after a certain number of iterations.%
\end{figure}% \end{figure}%
%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%
\subsection{Improvement using ``ML-in-the-list'' step} \subsection{Improvement Using ``ML-in-the-List'' Step}
Considering the magnitude of oscillation of the gradient of the code constraint Considering the magnitude of the oscillation of the gradient of the code constraint
polynomial, some interesting behavior may be observed. polynomial, some interesting behavior may be observed.
Figure \ref{fig:p_error} shows the probability that a component of the estimate Fig. \ref{fig:p_error} shows the probability that a component of the estimate
is wrong, determined through a Monte Carlo simulation, when the components of is wrong, determined through a Monte Carlo simulation, when the components of
$\boldsymbol{c}$ are ordered from smallest to largest oscillation of $\boldsymbol{c}$ are ordered from smallest to largest oscillation of
$\left(\nabla h\right)_i$. $\left(\nabla h\right)_i$.
@@ -599,29 +603,29 @@ the probability that a given component was decoded incorrectly.%
ymode=log, ymode=log,
ymin=1e-9,ymax=1e-5, ymin=1e-9,ymax=1e-5,
xmin=0,xmax=200, xmin=0,xmax=200,
width=\figwidth, width=0.95\figwidth,
height=\figheight, height=\figheight,
] ]
\addplot+ [scol1, mark=none, line width=1] \addplot+ [scol0, mark=none, line width=1]
table [col sep=comma, y=p_error]{res/p_error.csv}; table [col sep=comma, y=p_error]{res/p_error.csv};
\end{axis} \end{axis}
\end{tikzpicture} \end{tikzpicture}
\caption{Probability that a component of the estimated codeword \caption{Probability that a component of the estimated codeword
$\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is wrong for a (3,6) regular $\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular
LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}. LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
The indices $i'$ are ordered such that the amplitude of oscillation of The indices $i'$ are ordered such that the amplitude of oscillation of
$\left(\nabla h\right)_{i'}$ increases with $i'$. $\left(\nabla h\right)_{i'}$ increases with $i'$.
Parameters used for simulation: $\gamma = 0.05, \omega = 0.05, Parameters used for the simulation: $\gamma = 0.05, \omega = 0.05,
\eta = 1.5, E_b/N_0 = \SI{4}{dB}$. \eta = 1.5, E_b/N_0 = \SI{4}{dB}$.
Simulated with $\SI{100000000}{}$ iterations.} Simulated with $\SI{100000000}{}$ iterations using the all-zeros codeword.}
\label{fig:p_error} \label{fig:p_error}
\end{figure} \end{figure}
The complete improved algorithm is depicted in algorithm \ref{alg:improved}. The complete improved algorithm is given in Algorithm \ref{alg:improved}.
First, the proximal decoding algorithm is applied. First, the proximal decoding algorithm is applied.
If a valid codeword has been reached, i.e., if the algorithm has converged, this If a valid codeword has been reached, i.e., if the algorithm has converged,
is the solution returned. we return this solution.
Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion
presented above. presented above.
Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$, Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$,
@@ -668,7 +672,7 @@ generated and an ``ML-in-the-list'' step is performed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Simulation Results \& Discussion} \section{Simulation Results \& Discussion}
Figure \ref{fig:results} shows the FER and BER resulting from applying Fig. \ref{fig:results} shows the FER and BER resulting from applying
proximal decoding as presented in \cite{proximal_paper} and the improved proximal decoding as presented in \cite{proximal_paper} and the improved
algorithm presented here when applied to a $\left( 3,6 \right)$-regular LDPC algorithm presented here when applied to a $\left( 3,6 \right)$-regular LDPC
code with $n=204$ and $k=102$ \cite[204.33.484]{mackay}. code with $n=204$ and $k=102$ \cite[204.33.484]{mackay}.
@@ -676,24 +680,10 @@ The parameters chosen for the simulation are
$\gamma = 0.05, \omega=0.05, \eta=1.5, K=200$. $\gamma = 0.05, \omega=0.05, \eta=1.5, K=200$.
Again, these parameters were chosen,% Again, these parameters were chosen,%
% %
\begin{figure}[ht] \begin{figure}
\centering \centering
\begin{tikzpicture} \begin{tikzpicture}
\pgfplotsset{
ProxPlot/.style={
line width=1pt,
mark=*,
fancy marks,
},
ImprPlot/.style={
line width=1pt,
mark=triangle,
densely dashed,
fancy marks,
},
}
\begin{axis}[ \begin{axis}[
grid=both, grid=both,
xlabel={$E_\text{b} / N_0$ (dB)}, xlabel={$E_\text{b} / N_0$ (dB)},
@@ -708,28 +698,28 @@ Again, these parameters were chosen,%
at={(0.5,-0.44)},anchor=south} at={(0.5,-0.44)},anchor=south}
] ]
\addplot+[ProxPlot, scol1] \addplot+[FERPlot, mark=o, mark options={solid}, scol1]
table [x=SNR, y=FER, col sep=comma, table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{9}] discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv}; {res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, prox. dec.}; \addlegendentry{FER, prox. dec.};
\addplot+[ProxPlot, scol2] \addplot+[BERPlot, mark=*, scol1]
table [x=SNR, y=BER, col sep=comma, table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}] discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv}; {res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER, prox. dec.}; \addlegendentry{BER, prox. dec.};
\addplot+[ImprPlot, scol1] \addplot+[FERPlot, mark=triangle, mark options={solid}, scol2]
table [x=SNR, y=FER, col sep=comma, table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}] discard if gt={SNR}{7.5}]
{res/improved_ber_fer_dfr_20433484.csv}; {res/improved_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, improved}; \addlegendentry{FER, improved};
\addplot+[ImprPlot, scol2] \addplot+[BERPlot, mark=triangle*, scol2]
table [x=SNR, y=BER, col sep=comma, table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05}, discard if not={gamma}{0.05},
discard if gt={SNR}{6.5}] discard if gt={SNR}{6.5}]
@@ -761,7 +751,7 @@ The gain varies significantly
with the SNR (which is to be expected, since with higher SNR values the number with the SNR (which is to be expected, since with higher SNR values the number
of bit errors decreases, making the correction of those errors in the of bit errors decreases, making the correction of those errors in the
``ML-in-the-list'' step more likely). ``ML-in-the-list'' step more likely).
For an FER of $10^{-6}$ the gain is approximately $\SI{1}{dB}$. For an FER of $10^{-6}$, the gain is approximately $\SI{1}{dB}$.
Similar behavior can be observed with various other codes. Similar behavior can be observed with various other codes.
No immediate relationship between the code length and the gain was observed No immediate relationship between the code length and the gain was observed
during our examinations. during our examinations.
@@ -776,7 +766,7 @@ from only a few components of the estimate being wrong.
These few erroneous components can mostly be corrected by appending an These few erroneous components can mostly be corrected by appending an
additional step to the original algorithm that is only executed if the additional step to the original algorithm that is only executed if the
algorithm has not converged. algorithm has not converged.
A gain of up to $\sim\SI{1}{dB}$ can be observed, depending on the code, A gain of up to $\SI{1}{dB}$ can be observed, depending on the code,
the parameters considered, and the SNR. the parameters considered, and the SNR.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -796,41 +786,7 @@ Ministry of Education and Research (BMBF) within the project Open6GHub
% %
\begin{thebibliography}{1} \printbibliography
\bibliographystyle{IEEEtran}
\bibitem{ADMM}
S. Barman, X. Liu, S. C. Draper and B. Recht, ``Decomposition Methods for Large Scale LP Decoding,'' in IEEE Transactions on Information Theory, vol. 59, no. 12, pp. 7870-7886, Dec. 2013, doi: 10.1109/TIT.2013.2281372.
\bibitem{feldman_paper}
J. Feldman, M. J. Wainwright and D. R. Karger, ``Using linear programming to Decode Binary linear codes,'' in IEEE Transactions on Information Theory, vol. 51, no. 3, pp. 954-972, March 2005, doi: 10.1109/TIT.2004.842696.
\bibitem{ml_in_the_list}
M. Geiselhart, A. Elkelesh, M. Ebada, S. Cammerer and S. t. Brink, ``Automorphism Ensemble Decoding of ReedMuller Codes,'' in IEEE Transactions on Communications, vol. 69, no. 10, pp. 6424-6438, Oct. 2021, doi: 10.1109/TCOMM.2021.3098798.
\bibitem{mackay99}
D. J. C. MacKay, ``Good error-correcting codes based on very sparse matrices,'' in IEEE Transactions on Information Theory, vol. 45, no. 2, pp. 399-431, March 1999, doi: 10.1109/18.748992.
\bibitem{mackay}
D.J.C. MacKay, ``Encyclopedia of sparse graph codes [online],''
Available: http://www.inference.phy.cam.ac.uk/mackay/codes/data.html
\bibitem{proximal_algorithms}
N. Parikh and S. Boyd,``Proximal algorithms,'' Found. Trends Optim., vol. 1, no. 3, pp. 127239, Jan. 2014.
\bibitem{channel_codes_book}
W. Ryan and S. Lin, Channel Codes: Classical and Modern, Cambridge, Cambridge University Press, 2009, pp. 651-670.
\bibitem{adaptive_lp_decoding}
M. H. Taghavi and P. H. Siegel, ``Adaptive Linear Programming Decoding,'' 2006 IEEE International Symposium on Information Theory, Seattle, WA, USA, 2006, pp. 1374-1378, doi: 10.1109/ISIT.2006.262071.
\bibitem{interior_point_decoding}
P. O. Vontobel, ``Interior-point algorithms for linear-programming decoding,'' 2008 Information Theory and Applications Workshop, San Diego, CA, USA, 2008, pp. 433-437, doi: 10.1109/ITA.2008.4601085.
\bibitem{proximal_paper}
T. Wadayama and S. Takabe, ``Proximal decoding for ldpc codes'' IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences, vol. advpub, 2022TAP0002, 2022.
\end{thebibliography}
\end{document} \end{document}