Compare commits

15 Commits

5 changed files with 491 additions and 369 deletions

View File

@@ -2,6 +2,13 @@ FROM alpine:3.19
RUN apk update && apk upgrade
RUN apk add make texlive texmf-dist-pictures
RUN apk add texmf-dist-publishers
RUN apk add texmf-dist-science
RUN apk add texmf-dist-fontsextra
RUN apk add texmf-dist-publishers texmf-dist-science texmf-dist-fontsextra texmf-dist-latexextra
RUN apk add biber texmf-dist-bibtexextra
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home

View File

@@ -6,4 +6,16 @@ RUN pacman -Sy archlinux-keyring --noconfirm && pacman -Su --noconfirm
RUN pacman -Syu --noconfirm
RUN pacman -S make perl texlive texlive-binextra texlive-pictures --noconfirm
RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra --noconfirm
RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra texlive-latexextra --noconfirm
RUN pacman -Syu biber texlive-bibtexextra --noconfirm
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home
# For some reason simply installing 'biber' does not set the path
ENV PATH="${PATH}:/usr/bin/vendor_perl"

View File

@@ -4,4 +4,13 @@ ARG DEBIAN_FRONTEND=noninteractive
RUN apt update -y && apt upgrade -y
RUN apt install make texlive latexmk texlive-pictures -y
RUN apt install make texlive-publishers texlive-science texlive-fonts-extra -y
RUN apt install texlive-publishers texlive-science texlive-fonts-extra texlive-latex-extra -y
RUN apt install biber texlive-bibtex-extra -y
# The 'bbm' package insists on generating stuff in the home directory. In
# order to guarantee access to the home directory no matter the user the
# docker container is run with, create a temporary one anyone can write to
RUN mkdir /tmp/home
RUN chmod -R 777 /tmp/home
ENV HOME=/tmp/home

104
letter.bib Normal file
View File

@@ -0,0 +1,104 @@
@ARTICLE{ADMM,
author={Barman, Siddharth and Liu, Xishuo and Draper, Stark C. and Recht, Benjamin},
journal={IEEE Transactions on Information Theory},
title={Decomposition Methods for Large Scale LP Decoding},
year={2013},
volume={59},
number={12},
pages={7870-7886},
% doi={10.1109/TIT.2013.2281372}
}
@ARTICLE{feldman_paper,
author={Feldman, J. and Wainwright, M.J. and Karger, D.R.},
journal={IEEE Transactions on Information Theory},
title={Using linear programming to Decode Binary linear codes},
year={2005},
volume={51},
number={3},
pages={954-972},
% doi={10.1109/TIT.2004.842696}
}
@ARTICLE{ml_in_the_list,
author={Geiselhart, Marvin and Elkelesh, Ahmed and Ebada, Moustafa and Cammerer, Sebastian and Brink, Stephan ten},
journal={IEEE Transactions on Communications},
title={Automorphism Ensemble Decoding of ReedMuller Codes},
year={2021},
volume={69},
number={10},
pages={6424-6438},
% doi={10.1109/TCOMM.2021.3098798}
}
@ARTICLE{mackay99,
author={MacKay, D.J.C.},
journal={IEEE Transactions on Information Theory},
title={Good error-correcting codes based on very sparse matrices},
year={1999},
volume={45},
number={2},
pages={399-431},
% doi={10.1109/18.748992}
}
@online{mackay,
author = {MacKay, David J.C.},
title = {Encyclopedia of Sparse Graph Codes},
date = {2023-04},
url = {http://www.inference.org.uk/mackay/codes/data.html}
}
@article{proximal_algorithms,
title={Proximal algorithms},
author={Parikh, Neal and Boyd, Stephen and others},
journal={Foundations and trends{\textregistered} in Optimization},
volume={1},
number={3},
pages={127--239},
year={2014},
publisher={Now Publishers, Inc.}
}
@book{channel_codes_book,
place={Cambridge},
title={Channel Codes: Classical and Modern},
% DOI={10.1017/CBO9780511803253},
publisher={Cambridge University Press},
author={Ryan, William and Lin, Shu},
year={2009},
% url={https://d1.amobbs.com/bbs_upload782111/files_35/ourdev_604508GHLFR2.pdf}
}
@INPROCEEDINGS{adaptive_lp_decoding,
author={Taghavi, Mohammad H. and Siegel, Paul H.},
booktitle={2006 IEEE International Symposium on Information Theory},
title={Adaptive Linear Programming Decoding},
year={2006},
volume={},
number={},
pages={1374-1378},
% doi={10.1109/ISIT.2006.262071}
}
@INPROCEEDINGS{interior_point_decoding,
author={Vontobel, Pascal O.},
booktitle={2008 Information Theory and Applications Workshop},
title={Interior-point algorithms for linear-programming decoding},
year={2008},
volume={},
number={},
pages={433-437},
% doi={10.1109/ITA.2008.4601085}
}
@article{proximal_paper,
title={Proximal Decoding for {LDPC} Codes},
author={Tadashi Wadayama and Satoshi Takabe},
journal={IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences},
% volume={advpub},
% pages={2022TAP0002},
year={2022},
% doi={10.1587/transfun.2022TAP0002}
}

View File

@@ -7,6 +7,14 @@
\usepackage{algorithm}
\usepackage{siunitx}
\usepackage{dsfont}
\usepackage{mleftright}
\usepackage{bbm}
\usepackage[
backend=biber,
style=ieee,
sorting=nty,
]{biblatex}
\usepackage{tikz}
\usetikzlibrary{spy, arrows.meta,arrows}
@@ -18,10 +26,6 @@
\hyphenation{op-tical net-works semi-conduc-tor IEEE-Xplore}
\newif\ifoverleaf
%\overleaftrue
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% Inputs & Global Options
@@ -29,6 +33,18 @@
%
\newif\ifoverleaf
%\overleaftrue % When enabled, this option allows the document to be compiled
% on overleaf:
% - common.tex is sourced from a different directory
% - TikZ Externalization is disabled
% - Figures are included from pre-build PDFs
%
% Figures
%
\ifoverleaf
\input{common.tex}
\else
@@ -37,15 +53,31 @@
\input{lib/latex-common/common.tex}
\fi
\pgfplotsset{colorscheme/cel}
% TODO
\pgfplotsset{fancy marks/.style={}}
\newcommand{\figwidth}{\columnwidth}
\newcommand{\figheight}{0.75\columnwidth}
\pgfplotsset{
FERPlot/.style={
line width=1pt,
densely dashed,
},
BERPlot/.style={
line width=1pt,
},
DFRPlot/.style={
only marks,
},
}
%
% Bibliography
%
\addbibresource{letter.bib}
\AtBeginBibliography{\footnotesize}
%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -57,7 +89,7 @@
\begin{document}
\title{A Note on Improving Proximal Decoding for Linear Block Codes}
\title{List-based Proximal Decoding for Linear Block Codes}
\author{Andreas Tsouchlos, Holger Jäkel, and Laurent Schmalen\\
Communications Engineering Lab (CEL), Karlsruhe Institute of Technology (KIT)\\
@@ -80,20 +112,19 @@ Hertzstr. 16, 76187 Karlsruhe, Germany, Email: \texttt{\{first.last\}@kit.edu}}
\begin{abstract}
In this paper, the proximal decoding algorithm is considered within the
context of \textit{additive white Gaussian noise} (AWGN) channels.
An analysis of the convergence behavior of the algorithm shows that it is an
inherent property of proximal decoding to enter an
oscillating behavior of the estimate after a number of iterations.
An analysis of the convergence behavior of the algorithm shows that
proximal decoding inherently enters an oscillating behavior of the estimate
after a certain number of iterations.
Due to this oscillation, frame errors arising during decoding can often
be attributed to only a few remaining wrongly decoded components.
In this paper, an improvement of the algorithm is proposed by appending an
additional step, in which these erroneous components are attempted to be
corrected.
An empirical rule is suggested, with which the components most likely needing
be attributed to only a few remaining wrongly decoded bits.
In this letter, an improvement of the proximal decoding algorithm is proposed
by appending an additional step, in which these erroneous components are
attempted to be corrected.
We suggesst an empirical rule with which the components most likely needing
correction can be determined.
Using this insight and performing a subsequent ``ML-in-the-list'' decoding,
a gain of up to approximately 1 dB is achieved compared to proximal decoding,
depending on the parameters chosen and the code considered.
a gain of up to 1 dB is achieved compared to conventional
proximal decoding, depending on the decoder parameters and the code.
\end{abstract}
\begin{IEEEkeywords}
@@ -116,7 +147,7 @@ the reliability of data by detecting and correcting any errors that may occur
during its transmission or storage.
One class of binary linear codes, \textit{low-density parity-check} (LDPC)
codes, has become especially popular due to its ability to reach arbitrarily
small probabilities of error at code rates up to the capacity of the channel
small error probabilities at code rates up to the capacity of the channel
\cite{mackay99}, while retaining a structure that allows for very efficient
decoding.
While the established decoders for LDPC codes, such as belief propagation (BP)
@@ -129,37 +160,37 @@ Optimization based decoding algorithms are an entirely different way of
approaching the decoding problem.
A number of different such algorithms have been introduced.
The field of \textit{linear programming} (LP) decoding \cite{feldman_paper},
for example, represents one class of such algorithms, based on a reformulation
for example, represents one class of such algorithms, based on a relaxation
of the \textit{maximum likelihood} (ML) decoding problem as a linear program.
Many different optimization algorithms can be used to solve the resulting
problem \cite{interior_point_decoding, ADMM, adaptive_lp_decoding}.
problem \cite{ADMM, adaptive_lp_decoding, interior_point_decoding}.
Recently, proximal decoding for LDPC codes was presented by
Wadayama et al. \cite{proximal_paper}.
It is a novel approach and relies on a non-convex optimization formulation
Wadayama \textit{et al.} \cite{proximal_paper}.
Proximal decoding relies on a non-convex optimization formulation
of the \textit{maximum a posteriori} (MAP) decoding problem.
The aim of this work is to improve upon the performance of proximal decoding by
first presenting an examination of the algorithm's behavior and then suggesting
an approach to mitigate some of its flaws.
This analysis is performed within the context of
This analysis is performed for
\textit{additive white Gaussian noise} (AWGN) channels.
It is first observed that, while the algorithm initially moves the estimate in
the right direction, in the final steps of the decoding process convergence to
the correct codeword is often not achieved.
Furthermore, it is suggested that the reason for this behavior is the nature
We first observe that the algorithm initially moves the estimate in
the right direction, however, in the final steps of the decoding process,
convergence to the correct codeword is often not achieved.
Furthermore, we suggest that the reason for this behavior is the nature
of the decoding algorithm itself, comprising two separate gradient descent
steps working adversarially.
A method to mitigate this effect is proposed by appending an additional step
to the decoding process.
We propose a method mitigate this effect by appending an
additional step to the decoding process.
In this additional step, the components of the estimate with the highest
probability of being erroneous are identified.
New codewords are then generated, over which an ``ML-in-the-list''
\cite{ml_in_the_list} decoding is performed.
A process to conduct this identification is proposed in this paper.
Using the improved algorithm, a gain of up to
approximately 1 dB can be achieved compared to proximal decoding, depending on
the parameters chosen and the code considered.
1 dB can be achieved compared to conventional proximal decoding,
depending on the decoder parameters and the code.
%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -182,31 +213,31 @@ number of parity-checks:
\end{align*}
%
The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each correspond
to a parity check, i.e., row of $\boldsymbol{H}$.
The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each
correspond to a parity check, i.e., a row of $\boldsymbol{H}$.
The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond
to the components of a codeword being subjected to a parity check, i.e., to
columns of $\boldsymbol{H}$.
to the components of a codeword being subjected to a parity check, i.e.,
to the columns of $\boldsymbol{H}$.
The neighborhood of a parity check $j$, i.e., the set of indices of components
relevant for the according parity check, is denoted by
$N_c(j) := \left\{i \mid i \in \mathcal{I}, \boldsymbol{H}_{j,i} = 1 \right\},
$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\},
\hspace{2mm} j \in \mathcal{J}$.
In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is
mapped onto a \textit{binary phase shift keying} (BPSK) symbol via
$\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with
$ \boldsymbol{x} \in \left\{-1, 1\right\}^n$, which is then transmitted over an
$ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
AWGN channel.
The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an
estimate of the transmitted codeword, denoted as
$\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$.
A distinction is made between $\boldsymbol{x} \in \left\{-1, 1\right\}^n$
A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$
and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$,
the former denoting the BPSK symbol physically transmitted over the channel and
the latter being used as a variable during the optimization process.
The posterior probability of having transmitted $\boldsymbol{x}$ when receiving
$\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF)
$p_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
$P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting
$\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF)
$f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$.
@@ -221,25 +252,24 @@ With proximal decoding, the proximal gradient method \cite{proximal_algorithms}
is used to solve a non-convex optimization formulation of the MAP decoding
problem.
When making the equal probability assumption for all codewords, MAP and ML
With the equal prior probability assumption for all codewords, MAP and ML
decoding are equivalent and, specifically for AWGN channels, correspond to a
nearest-neighbor decision.
For this reason, decoding can be done using a figure of merit that describes
the distance from a given vector to a codeword.
For this reason, decoding can be carried out using a figure of merit that
describes the distance from a given vector to a codeword.
One such expression, formulated under the assumption of BPSK, is the
\textit{code-constraint polynomial} \cite{proximal_paper}
%
\begin{align*}
h\left( \tilde{\boldsymbol{x}} \right) =
h( \tilde{\boldsymbol{x}} ) =
\underbrace{\sum_{i=1}^{n}
\left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}}
\left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}}
+ \underbrace{\sum_{j=1}^{m} \left[
\left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right)
\left( \prod_{i\in \mathcal{N}_c \left( j \right) } \tilde{x}_i \right)
-1 \right] ^2}_{\text{Parity constraint}}
.\end{align*}%
%
Its intent is to penalize vectors far from a codeword and favor those close
to one.
Its intent is to penalize vectors far from a codeword.
It comprises two terms: one representing the bipolar constraint
and one representing the parity constraint, incorporating all of the
information regarding the code.
@@ -247,18 +277,18 @@ information regarding the code.
The channel model can be considered using the negative log-likelihood
%
\begin{align*}
L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left(
\boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right)
L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft(
f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft(
\boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright)
.\end{align*}
%
The information about the channel and the code are consolidated in the objective
function \cite{proximal_paper}
%
\begin{align*}
g\left( \tilde{\boldsymbol{x}} \right)
= L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
+ \gamma h\left( \tilde{\boldsymbol{x}} \right),
g \mleft( \tilde{\boldsymbol{x}} \mright)
= L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright)
+ \gamma h\mleft( \tilde{\boldsymbol{x}} \mright),
\hspace{5mm} \gamma > 0%
.\end{align*}
%
@@ -270,17 +300,17 @@ introduced, describing the result of each of the two steps:
%
\begin{alignat}{3}
\boldsymbol{r} &\leftarrow \boldsymbol{s}
- \omega \left( \boldsymbol{s} - \boldsymbol{y} \right)
- \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright)
\hspace{5mm }&&\omega > 0 \label{eq:r_update}\\
\boldsymbol{s} &\leftarrow \boldsymbol{r}
- \gamma \nabla h\left( \boldsymbol{r} \right),
- \gamma \nabla h\mleft( \boldsymbol{r} \mright),
\hspace{5mm} &&\gamma > 0 \label{eq:s_update}
.\end{alignat}
%
An equation for determining $\nabla h(\boldsymbol{r})$ is given in
\cite{proximal_paper}.
It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$
really represent $\tilde{\boldsymbol{x}}$ during different
represent $\tilde{\boldsymbol{x}}$ during different
stages of the decoding process.
As the gradient of the code-constraint polynomial can attain very large values
@@ -290,10 +320,10 @@ $\left[-\eta, \eta\right]^n$ by a projection
$\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$
is a positive constant slightly larger than one, e.g., $\eta = 1.5$.
The resulting decoding process as described in \cite{proximal_paper} is
presented in algorithm \ref{alg:proximal_decoding}.
presented in Algorithm \ref{alg:proximal_decoding}.
\begin{algorithm}
\caption{Proximal decoding algorithm for an AWGN channel.}
\caption{Proximal decoding algorithm for an AWGN channel \cite{proximal_paper}.}
\label{alg:proximal_decoding}
\begin{algorithmic}
@@ -301,7 +331,7 @@ presented in algorithm \ref{alg:proximal_decoding}.
\STATE \textbf{for} $K$ iterations \textbf{do}
\STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
\STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
\STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \boldsymbol{s} \right) = -1 \right\}$
\STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \boldsymbol{s} \preceq 0 \right\}}$
\STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do}
\STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$
\STATE \hspace{5mm} \textbf{end if}
@@ -316,14 +346,14 @@ presented in algorithm \ref{alg:proximal_decoding}.
\section{Improved algorithm}
%%%%%%%%%%%%%%%%%%%%%
\subsection{Analysis of Convergence Behavior}
\subsection{Analysis of the Convergence Behavior}
In figure \ref{fig:fer vs ber}, the \textit{frame error rate} (FER),
In Fig. \ref{fig:fer vs ber}, the \textit{frame error rate} (FER),
\textit{bit error rate} (BER) and \textit{decoding failure rate} (DFR) of
proximal decoding are shown for an LDPC code with $n=204$ and $k=102$
\cite[204.33.484]{mackay}.
A decoding failure is defined as a decoding operation, the result of which is
not a valid codeword, i.e., as non-convergence of the algorithm.
A decoding failure is defined as a decoding operation returning an invalid
codeword, i.e., as non-convergence of the algorithm.
The parameters chosen for this simulation are $\gamma=0.05, \omega=0.05,
\eta=1.5$ and $K=200$.
They were determined to offer the best performance in a preliminary examination,
@@ -334,65 +364,52 @@ This means that most frame errors are not due to the algorithm converging
to the wrong codeword, but due to the algorithm not converging at all.
As proximal decoding is an optimization-based decoding method, one possible
explanation for this effect might be that during the decoding process convergence
on the final codeword is often not achieved, although the estimate is moving in
the right general direction.
explanation for this effect might be that during the decoding process, convergence
to the final codeword is often not achieved, although the estimate is moving into
the right direction.
This would suggest that most frame errors occur due to only a few incorrectly
decoded bits.%
%
\begin{figure}[ht]
\begin{figure}
\centering
\pgfplotsset{
FERPlot/.style={
line width=1pt,
densely dashed,
mark=triangle,
fancy marks
},
BERPlot/.style={
line width=1pt,
mark=*,
fancy marks,
},
DFRPlot/.style={
only marks,
mark=square*,
fancy marks,
}}
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_\text{b} / N_0$ (dB)}, ylabel={},
ymode=log,
xmin=1, xmax=8,
ymax=1, ymin=1e-6,
% ytick={1e-0, 1e-2, 1e-4, 1e-6},
width=\figwidth,
height=\figheight,
legend pos = south west,
]
\addplot+[FERPlot, scol0]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER}
\addplot+[DFRPlot, scol2]
table [x=SNR, y=DFR, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{DFR}
\addplot+[BERPlot, scol1]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER}
\end{axis}
\end{tikzpicture}
\ifoverleaf
\includegraphics{figs/letter-figure0.pdf}
\else
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_\text{b} / N_0$ (dB)}, ylabel={},
ymode=log,
xmin=1, xmax=8,
ymax=1, ymin=1e-6,
% ytick={1e-0, 1e-2, 1e-4, 1e-6},
width=\figwidth,
height=\figheight,
legend pos = south west,
]
\addplot+[FERPlot, mark=o, mark options={solid}, scol1]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER}
\addplot+[BERPlot, mark=*, scol1]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER}
\addplot+[DFRPlot, mark=square*, scol0]
table [x=SNR, y=DFR, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{DFR}
\end{axis}
\end{tikzpicture}
\fi
\caption{FER, DFR, and BER for $\left( 3, 6 \right)$-regular LDPC code with
$n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
@@ -402,116 +419,124 @@ decoded bits.%
\label{fig:fer vs ber}
\end{figure}%
%
An approach for lowering the FER might then be to append an ``ML-in-the-list''
\cite{ml_in_the_list} step to the decoding process shown in algorithm
\cite{ml_in_the_list} step to the decoding process shown in Algorithm
\ref{alg:proximal_decoding}.
This step would consist of determining the $N \in \mathbb{N}$ most probably
wrong bits, finding all variations of the current estimate with those bits
This step consists in determining the $N \in \mathbb{N}$ most probable
erroneous bits, finding all variations of the current estimate with those bits
modified, and performing ML decoding on this list.
This approach crucially relies on identifying the most probably wrong bits.
This approach crucially relies on identifying the most probable erroneous bits.
Therefore, the convergence properties of proximal decoding are investigated.
Considering equations (\ref{eq:s_update}) and (\ref{eq:r_update}), figure
Considering (\ref{eq:s_update}) and (\ref{eq:r_update}), Fig.
\ref{fig:grad} shows the two gradients along which the minimization is
performed for a repetition code with $n=2$.
It is apparent that a net movement will result as long as the two gradients
have a common component.
As soon as this common component is exhausted, they will work in opposing
directions and an oscillation of the estimate will take place.
This behavior matches the conjecture that the reason for the high DFR is a
directions resulting in an oscillation of the estimate.
This behavior supports the conjecture that the reason for the high DFR is a
failure to converge to the correct codeword in the final steps of the
optimization process.%
%
\begin{figure}[h]
\begin{figure}
\centering
\begin{tikzpicture}
\begin{axis}[xmin = -1.25, xmax=1.25,
ymin = -1.25, ymax=1.25,
xlabel={$\tilde{x}_1$},
ylabel={$\tilde{x}_2$},
y label style={at={(axis description cs:-0.06,0.5)},anchor=south},
width=\figwidth,
height=\figheight,
grid=major, grid style={dotted},
view={0}{90}]
\addplot3[point meta=\thisrow{grad_norm},
point meta min=1,
point meta max=2.5,
quiver={u=\thisrow{grad_0},
v=\thisrow{grad_1},
scale arrows=.05,
every arrow/.append style={%
line width=.3
+\pgfplotspointmetatransformed/1000,
-{Latex[length=0pt 5,width=0pt 3]}
},
},
quiver/colored = {mapped color},
-stealth,
]
table[col sep=comma] {res/2d_grad_L.csv};
\end{axis}
\begin{axis}[hide axis,
width=\figwidth,
height=\figheight,
xmin=10, xmax=50,
ymin=0, ymax=0.4,
legend style={draw=white!15!black,
legend cell align=left,
empty legend,
at={(0.9775,0.97)},anchor=north east}]
\addlegendimage{mark=none}
\addlegendentry{
$\nabla L\left(\boldsymbol{y}
\mid \tilde{\boldsymbol{x}}\right)$
};
\end{axis}
\end{tikzpicture}
\ifoverleaf
\includegraphics{figs/letter-figure1.pdf}
\else
\begin{tikzpicture}
\begin{axis}[xmin = -1.25, xmax=1.25,
ymin = -1.25, ymax=1.25,
xlabel={$\tilde{x}_1$},
ylabel={$\tilde{x}_2$},
y label style={at={(axis description cs:-0.06,0.5)},anchor=south},
width=\figwidth,
height=\figheight,
grid=major, grid style={dotted},
view={0}{90}]
\addplot3[point meta=\thisrow{grad_norm},
point meta min=1,
point meta max=2.5,
quiver={u=\thisrow{grad_0},
v=\thisrow{grad_1},
scale arrows=.05,
every arrow/.append style={%
line width=.3
+\pgfplotspointmetatransformed/1000,
-{Latex[length=0pt 5,width=0pt 3]}
},
},
quiver/colored = {mapped color},
-stealth,
]
table[col sep=comma] {res/2d_grad_L.csv};
\end{axis}
\begin{axis}[hide axis,
width=\figwidth,
height=\figheight,
xmin=10, xmax=50,
ymin=0, ymax=0.4,
legend style={draw=white!15!black,
legend cell align=left,
empty legend,
at={(0.9775,0.97)},anchor=north east}]
\addlegendimage{mark=none}
\addlegendentry{
$\nabla L\left(\boldsymbol{y}
\mid \tilde{\boldsymbol{x}}\right)$
};
\end{axis}
\end{tikzpicture}
\fi
\vspace{3mm}
\begin{tikzpicture}
\begin{axis}[xmin = -1.25, xmax=1.25,
ymin = -1.25, ymax=1.25,
width=\figwidth,
height=\figheight,
xlabel={$\tilde{x}_1$},
ylabel={$\tilde{x}_2$},
y label style={at={(axis description cs:-0.06,0.5)},anchor=south},
grid=major, grid style={dotted},
view={0}{90}]
\addplot3[point meta=\thisrow{grad_norm},
point meta min=1,
point meta max=7,
quiver={u=\thisrow{grad_0},
v=\thisrow{grad_1},
scale arrows=.03,
every arrow/.append style={%
line width=.5
+\pgfplotspointmetatransformed/1000,
-{Latex[length=0pt 5,width=0pt 3]}
},
},
quiver/colored = {mapped color},
-stealth,
]
table[col sep=comma] {res/2d_grad_h.csv};
\end{axis}
\begin{axis}[hide axis,
width=\figwidth,
height=\figheight,
xmin=10, xmax=50,
ymin=0, ymax=0.4,
legend style={draw=white!15!black,
legend cell align=left,
empty legend,
at={(0.9775,0.97)},anchor=north east}]
\addlegendimage{mark=none}
\addlegendentry{$\nabla h\left(\tilde{\boldsymbol{x}}\right)$};
\end{axis}
\end{tikzpicture}
\ifoverleaf
\includegraphics{figs/letter-figure2.pdf}
\else
\begin{tikzpicture}
\begin{axis}[xmin = -1.25, xmax=1.25,
ymin = -1.25, ymax=1.25,
width=\figwidth,
height=\figheight,
xlabel={$\tilde{x}_1$},
ylabel={$\tilde{x}_2$},
y label style={at={(axis description cs:-0.06,0.5)},anchor=south},
grid=major, grid style={dotted},
view={0}{90}]
\addplot3[point meta=\thisrow{grad_norm},
point meta min=1,
point meta max=7,
quiver={u=\thisrow{grad_0},
v=\thisrow{grad_1},
scale arrows=.03,
every arrow/.append style={%
line width=.5
+\pgfplotspointmetatransformed/1000,
-{Latex[length=0pt 5,width=0pt 3]}
},
},
quiver/colored = {mapped color},
-stealth,
]
table[col sep=comma] {res/2d_grad_h.csv};
\end{axis}
\begin{axis}[hide axis,
width=\figwidth,
height=\figheight,
xmin=10, xmax=50,
ymin=0, ymax=0.4,
legend style={draw=white!15!black,
legend cell align=left,
empty legend,
at={(0.9775,0.97)},anchor=north east}]
\addlegendimage{mark=none}
\addlegendentry{$\nabla h\left(\tilde{\boldsymbol{x}}\right)$};
\end{axis}
\end{tikzpicture}
\fi
\caption{Gradients
$\nabla L\left(\boldsymbol{y} \mid \tilde{\boldsymbol{x}}\right)$
and $\nabla h \left( \tilde{\boldsymbol{x}} \right)$ for a repetition
@@ -521,48 +546,54 @@ optimization process.%
\label{fig:grad}
\end{figure}%
%
In figure \ref{fig:prox:convergence_large_n}, only component
$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate is considered during a
decoding operation for an LDPC code with $n=204$ and $k=102$.
In Fig. \ref{fig:prox:convergence_large_n}, we consider only component
$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate during a
decoding operation for the LDPC code used also for Fig. 1.
Two qualities may be observed.
First, the average values of the two gradients are equal, except for their sign,
First, we observe the average absolute values of the two gradients are equal,
however, they have opposing signs,
leading to the aforementioned oscillation.
Second, the gradient of the code constraint polynomial itself starts to
oscillate after a certain number of iterations.%
%
\begin{figure}[ht]
\begin{figure}
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={Iterations},
width=\figwidth,
height=\figheight,
xtick={0, 100, ..., 400},
xticklabels={0, 50, ..., 200},
xmin=0, xmax=300,
ymin=-4, ymax=2,
ytick={-4,-3,...,2},
legend pos = south east,
]
\addplot+ [mark=none, line width=1]
table [col sep=comma, x=k, y=comb_r_s_0,
discard if gt={k}{300}]
{res/extreme_components_20433484_combined.csv};
\addplot+ [mark=none, line width=1,
discard if gt={k}{300}]
table [col sep=comma, x=k, y=grad_L_0]
{res/extreme_components_20433484_combined.csv};
\addplot+ [mark=none, line width=1]
table [col sep=comma, x=k, y=grad_h_0,
discard if gt={k}{300}]
{res/extreme_components_20433484_combined.csv};
\addlegendentry{$\left(\tilde{\boldsymbol{x}}\right)_1$}
\addlegendentry{$\left(\nabla L\right)_1$}
\addlegendentry{$\left(\nabla h\right)_1$}
\end{axis}
\end{tikzpicture}
\ifoverleaf
\includegraphics{figs/letter-figure3.pdf}
\else
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={Iterations},
width=\figwidth,
height=\figheight,
xtick={0, 100, ..., 400},
xticklabels={0, 50, ..., 200},
xmin=0, xmax=300,
ymin=-4, ymax=2,
ytick={-4,-3,...,2},
legend pos = south east,
]
\addplot+ [mark=none, line width=1]
table [col sep=comma, x=k, y=comb_r_s_0,
discard if gt={k}{300}]
{res/extreme_components_20433484_combined.csv};
\addplot+ [mark=none, line width=1,
discard if gt={k}{300}]
table [col sep=comma, x=k, y=grad_L_0]
{res/extreme_components_20433484_combined.csv};
\addplot+ [mark=none, line width=1]
table [col sep=comma, x=k, y=grad_h_0,
discard if gt={k}{300}]
{res/extreme_components_20433484_combined.csv};
\addlegendentry{$\left(\tilde{\boldsymbol{x}}\right)_1$}
\addlegendentry{$\left(\nabla L\right)_1$}
\addlegendentry{$\left(\nabla h\right)_1$}
\end{axis}
\end{tikzpicture}
\fi
\caption{Visualization of component $\left(\tilde{\boldsymbol{x}}\right)_1$
for a decoding operation for a (3,6) regular LDPC code with
@@ -574,11 +605,11 @@ oscillate after a certain number of iterations.%
\end{figure}%
%%%%%%%%%%%%%%%%%%%%%
\subsection{Improvement using ``ML-in-the-list'' step}
\subsection{Improvement Using ``ML-in-the-List'' Step}
Considering the magnitude of oscillation of the gradient of the code constraint
Considering the magnitude of the oscillation of the gradient of the code constraint
polynomial, some interesting behavior may be observed.
Figure \ref{fig:p_error} shows the probability that a component of the estimate
Fig. \ref{fig:p_error} shows the probability that a component of the estimate
is wrong, determined through a Monte Carlo simulation, when the components of
$\boldsymbol{c}$ are ordered from smallest to largest oscillation of
$\left(\nabla h\right)_i$.
@@ -591,37 +622,41 @@ the probability that a given component was decoded incorrectly.%
\begin{figure}[H]
\centering
\begin{tikzpicture}
\begin{axis}[
grid=both,
ylabel=$P(\hat{c}_{i'} \ne c_{i'})$,
xlabel=$i'$,
ymode=log,
ymin=1e-9,ymax=1e-5,
xmin=0,xmax=200,
width=\figwidth,
height=\figheight,
]
\addplot+ [scol1, mark=none, line width=1]
table [col sep=comma, y=p_error]{res/p_error.csv};
\end{axis}
\end{tikzpicture}
\ifoverleaf
\includegraphics{figs/letter-figure4.pdf}
\else
\begin{tikzpicture}
\begin{axis}[
grid=both,
ylabel=$P(\hat{c}_{i'} \ne c_{i'})$,
xlabel=$i'$,
ymode=log,
ymin=1e-9,ymax=1e-5,
xmin=0,xmax=200,
width=0.95\figwidth,
height=\figheight,
]
\addplot+ [scol0, mark=none, line width=1]
table [col sep=comma, y=p_error]{res/p_error.csv};
\end{axis}
\end{tikzpicture}
\fi
\caption{Probability that a component of the estimated codeword
$\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is wrong for a (3,6) regular
$\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular
LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
The indices $i'$ are ordered such that the amplitude of oscillation of
$\left(\nabla h\right)_{i'}$ increases with $i'$.
Parameters used for simulation: $\gamma = 0.05, \omega = 0.05,
Parameters used for the simulation: $\gamma = 0.05, \omega = 0.05,
\eta = 1.5, E_b/N_0 = \SI{4}{dB}$.
Simulated with $\SI{100000000}{}$ iterations.}
Simulated with $\SI{100000000}{}$ iterations using the all-zeros codeword.}
\label{fig:p_error}
\end{figure}
The complete improved algorithm is depicted in algorithm \ref{alg:improved}.
The complete improved algorithm is given in Algorithm \ref{alg:improved}.
First, the proximal decoding algorithm is applied.
If a valid codeword has been reached, i.e., if the algorithm has converged, this
is the solution returned.
If a valid codeword has been reached, i.e., if the algorithm has converged,
we return this solution.
Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion
presented above.
Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$,
@@ -668,7 +703,7 @@ generated and an ``ML-in-the-list'' step is performed.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Simulation Results \& Discussion}
Figure \ref{fig:results} shows the FER and BER resulting from applying
Fig. \ref{fig:results} shows the FER and BER resulting from applying
proximal decoding as presented in \cite{proximal_paper} and the improved
algorithm presented here when applied to a $\left( 3,6 \right)$-regular LDPC
code with $n=204$ and $k=102$ \cite[204.33.484]{mackay}.
@@ -676,67 +711,57 @@ The parameters chosen for the simulation are
$\gamma = 0.05, \omega=0.05, \eta=1.5, K=200$.
Again, these parameters were chosen,%
%
\begin{figure}[ht]
\begin{figure}
\centering
\begin{tikzpicture}
\pgfplotsset{
ProxPlot/.style={
line width=1pt,
mark=*,
fancy marks,
},
ImprPlot/.style={
line width=1pt,
mark=triangle,
densely dashed,
fancy marks,
},
}
\ifoverleaf
\includegraphics{figs/letter-figure5.pdf}
\else
\begin{tikzpicture}
\begin{axis}[
grid=both,
xlabel={$E_\text{b} / N_0$ (dB)},
ymode=log,
xmin=1, xmax=8,
ymax=1, ymin=1e-6,
width=\figwidth,
height=\figheight,
legend columns=2,
legend style={draw=white!15!black,
legend cell align=left,
at={(0.5,-0.44)},anchor=south}
]
\begin{axis}[
grid=both,
xlabel={$E_\text{b} / N_0$ (dB)},
ymode=log,
xmin=1, xmax=8,
ymax=1, ymin=1e-6,
width=\figwidth,
height=\figheight,
legend columns=2,
legend style={draw=white!15!black,
legend cell align=left,
at={(0.5,-0.44)},anchor=south}
]
\addplot+[FERPlot, mark=o, mark options={solid}, scol1]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, prox. dec.};
\addplot+[ProxPlot, scol1]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{9}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, prox. dec.};
\addplot+[BERPlot, mark=*, scol1]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER, prox. dec.};
\addplot+[ProxPlot, scol2]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/proximal_ber_fer_dfr_20433484.csv};
\addlegendentry{BER, prox. dec.};
\addplot+[FERPlot, mark=triangle, mark options={solid}, scol2]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/improved_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, improved};
\addplot+[ImprPlot, scol1]
table [x=SNR, y=FER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{7.5}]
{res/improved_ber_fer_dfr_20433484.csv};
\addlegendentry{FER, improved};
\addplot+[ImprPlot, scol2]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{6.5}]
{res/improved_ber_fer_dfr_20433484.csv};
\addlegendentry{BER, improved};
\end{axis}
\end{tikzpicture}
\addplot+[BERPlot, mark=triangle*, scol2]
table [x=SNR, y=BER, col sep=comma,
discard if not={gamma}{0.05},
discard if gt={SNR}{6.5}]
{res/improved_ber_fer_dfr_20433484.csv};
\addlegendentry{BER, improved};
\end{axis}
\end{tikzpicture}
\fi
\caption{FER and BER of proximal decoding \cite{proximal_paper} and the
improved algorithm for a $\left( 3, 6 \right)$-regular LDPC code with
@@ -761,7 +786,7 @@ The gain varies significantly
with the SNR (which is to be expected, since with higher SNR values the number
of bit errors decreases, making the correction of those errors in the
``ML-in-the-list'' step more likely).
For an FER of $10^{-6}$ the gain is approximately $\SI{1}{dB}$.
For an FER of $10^{-6}$, the gain is approximately $\SI{1}{dB}$.
Similar behavior can be observed with various other codes.
No immediate relationship between the code length and the gain was observed
during our examinations.
@@ -776,7 +801,7 @@ from only a few components of the estimate being wrong.
These few erroneous components can mostly be corrected by appending an
additional step to the original algorithm that is only executed if the
algorithm has not converged.
A gain of up to $\sim\SI{1}{dB}$ can be observed, depending on the code,
A gain of up to $\SI{1}{dB}$ can be observed, depending on the code,
the parameters considered, and the SNR.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -796,41 +821,6 @@ Ministry of Education and Research (BMBF) within the project Open6GHub
%
\begin{thebibliography}{1}
\bibliographystyle{IEEEtran}
\bibitem{ADMM}
S. Barman, X. Liu, S. C. Draper and B. Recht, ``Decomposition Methods for Large Scale LP Decoding,'' in IEEE Transactions on Information Theory, vol. 59, no. 12, pp. 7870-7886, Dec. 2013, doi: 10.1109/TIT.2013.2281372.
\bibitem{feldman_paper}
J. Feldman, M. J. Wainwright and D. R. Karger, ``Using linear programming to Decode Binary linear codes,'' in IEEE Transactions on Information Theory, vol. 51, no. 3, pp. 954-972, March 2005, doi: 10.1109/TIT.2004.842696.
\bibitem{ml_in_the_list}
M. Geiselhart, A. Elkelesh, M. Ebada, S. Cammerer and S. t. Brink, ``Automorphism Ensemble Decoding of ReedMuller Codes,'' in IEEE Transactions on Communications, vol. 69, no. 10, pp. 6424-6438, Oct. 2021, doi: 10.1109/TCOMM.2021.3098798.
\bibitem{mackay99}
D. J. C. MacKay, ``Good error-correcting codes based on very sparse matrices,'' in IEEE Transactions on Information Theory, vol. 45, no. 2, pp. 399-431, March 1999, doi: 10.1109/18.748992.
\bibitem{mackay}
D.J.C. MacKay, ``Encyclopedia of sparse graph codes [online],''
Available: http://www.inference.phy.cam.ac.uk/mackay/codes/data.html
\bibitem{proximal_algorithms}
N. Parikh and S. Boyd,``Proximal algorithms,'' Found. Trends Optim., vol. 1, no. 3, pp. 127239, Jan. 2014.
\bibitem{channel_codes_book}
W. Ryan and S. Lin, Channel Codes: Classical and Modern, Cambridge, Cambridge University Press, 2009, pp. 651-670.
\bibitem{adaptive_lp_decoding}
M. H. Taghavi and P. H. Siegel, ``Adaptive Linear Programming Decoding,'' 2006 IEEE International Symposium on Information Theory, Seattle, WA, USA, 2006, pp. 1374-1378, doi: 10.1109/ISIT.2006.262071.
\bibitem{interior_point_decoding}
P. O. Vontobel, ``Interior-point algorithms for linear-programming decoding,'' 2008 Information Theory and Applications Workshop, San Diego, CA, USA, 2008, pp. 433-437, doi: 10.1109/ITA.2008.4601085.
\bibitem{proximal_paper}
T. Wadayama and S. Takabe, ``Proximal decoding for ldpc codes'' IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences, vol. advpub, 2022TAP0002, 2022.
\end{thebibliography}
\printbibliography
\end{document}