Add generic localSettings.yaml as reference

2024-01-07 20:11:22 +01:00
6 changed files with 206 additions and 265 deletions
--- a/dockerfiles/Dockerfile.alpine
+++ b/dockerfiles/Dockerfile.alpine
@@ -2,13 +2,6 @@ FROM alpine:3.19

 RUN apk update && apk upgrade
 RUN apk add make texlive texmf-dist-pictures
-RUN apk add texmf-dist-publishers texmf-dist-science texmf-dist-fontsextra texmf-dist-latexextra
-RUN apk add biber texmf-dist-bibtexextra
-
-# The 'bbm' package insists on generating stuff in the home directory. In
-# order to guarantee access to the home directory no matter the user the
-# docker container is run with, create a temporary one anyone can write to
-RUN mkdir /tmp/home
-RUN chmod -R 777 /tmp/home
-ENV HOME=/tmp/home
-
+RUN apk add texmf-dist-publishers
+RUN apk add texmf-dist-science
+RUN apk add texmf-dist-fontsextra
--- a/dockerfiles/Dockerfile.archlinux
+++ b/dockerfiles/Dockerfile.archlinux
@@ -6,16 +6,4 @@ RUN pacman -Sy archlinux-keyring --noconfirm && pacman -Su --noconfirm

 RUN pacman -Syu --noconfirm
 RUN pacman -S make perl texlive texlive-binextra texlive-pictures --noconfirm
-RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra texlive-latexextra --noconfirm
-RUN pacman -Syu biber texlive-bibtexextra --noconfirm
-
-# The 'bbm' package insists on generating stuff in the home directory. In
-# order to guarantee access to the home directory no matter the user the
-# docker container is run with, create a temporary one anyone can write to
-RUN mkdir /tmp/home
-RUN chmod -R 777 /tmp/home
-ENV HOME=/tmp/home
-
-# For some reason simply installing 'biber' does not set the path
-ENV PATH="${PATH}:/usr/bin/vendor_perl"
-
+RUN pacman -S texlive-publishers texlive-mathscience texlive-fontsextra --noconfirm
--- a/dockerfiles/Dockerfile.ubuntu
+++ b/dockerfiles/Dockerfile.ubuntu
@@ -4,13 +4,4 @@ ARG DEBIAN_FRONTEND=noninteractive

 RUN apt update -y && apt upgrade -y
 RUN apt install make texlive latexmk texlive-pictures -y
-RUN apt install texlive-publishers texlive-science texlive-fonts-extra texlive-latex-extra -y
-RUN apt install biber texlive-bibtex-extra -y
-
-# The 'bbm' package insists on generating stuff in the home directory. In
-# order to guarantee access to the home directory no matter the user the
-# docker container is run with, create a temporary one anyone can write to
-RUN mkdir /tmp/home
-RUN chmod -R 777 /tmp/home
-ENV HOME=/tmp/home
-
+RUN apt install make texlive-publishers texlive-science texlive-fonts-extra -y
--- a/letter.bib
+++ b/letter.bib
@@ -1,104 +0,0 @@
-@ARTICLE{ADMM,
-  author={Barman, Siddharth and Liu, Xishuo and Draper, Stark C. and Recht, Benjamin},
-  journal={IEEE Transactions on Information Theory}, 
-  title={Decomposition Methods for Large Scale LP Decoding}, 
-  year={2013},
-  volume={59},
-  number={12},
-  pages={7870-7886},
-%  doi={10.1109/TIT.2013.2281372}
-}
-
-@ARTICLE{feldman_paper,
-  author={Feldman, J. and Wainwright, M.J. and Karger, D.R.},
-  journal={IEEE Transactions on Information Theory}, 
-  title={Using linear programming to Decode Binary linear codes}, 
-  year={2005},
-  volume={51},
-  number={3},
-  pages={954-972},
-%  doi={10.1109/TIT.2004.842696}
-}
-
-@ARTICLE{ml_in_the_list,
-	author={Geiselhart, Marvin and Elkelesh, Ahmed and Ebada, Moustafa and Cammerer, Sebastian and Brink, Stephan ten},
-	journal={IEEE Transactions on Communications}, 
-	title={Automorphism Ensemble Decoding of Reed–Muller Codes}, 
-	year={2021},
-	volume={69},
-	number={10},
-	pages={6424-6438},
-%	doi={10.1109/TCOMM.2021.3098798}
-}
-
-@ARTICLE{mackay99,
-  author={MacKay, D.J.C.},
-  journal={IEEE Transactions on Information Theory}, 
-  title={Good error-correcting codes based on very sparse matrices}, 
-  year={1999},
-  volume={45},
-  number={2},
-  pages={399-431},
-%  doi={10.1109/18.748992}
-}
-
-@online{mackay,
-    author = {MacKay, David J.C.},
-    title  = {Encyclopedia of Sparse Graph Codes},
-    date   = {2023-04},
-    url    = {http://www.inference.org.uk/mackay/codes/data.html}
-}
-
-@article{proximal_algorithms,
-  title={Proximal algorithms},
-  author={Parikh, Neal and Boyd, Stephen and others},
-  journal={Foundations and trends{\textregistered} in Optimization},
-  volume={1},
-  number={3},
-  pages={127--239},
-  year={2014},
-  publisher={Now Publishers, Inc.}
-}
-
-@book{channel_codes_book,
-    place={Cambridge},
-    title={Channel Codes: Classical and Modern},
-%    DOI={10.1017/CBO9780511803253},
-    publisher={Cambridge University Press},
-    author={Ryan, William and Lin, Shu},
-    year={2009},
-%    url={https://d1.amobbs.com/bbs_upload782111/files_35/ourdev_604508GHLFR2.pdf}
-}
-
-@INPROCEEDINGS{adaptive_lp_decoding,
-  author={Taghavi, Mohammad H. and Siegel, Paul H.},
-  booktitle={2006 IEEE International Symposium on Information Theory}, 
-  title={Adaptive Linear Programming Decoding}, 
-  year={2006},
-  volume={},
-  number={},
-  pages={1374-1378},
-%  doi={10.1109/ISIT.2006.262071}
-}
-
-@INPROCEEDINGS{interior_point_decoding,
-  author={Vontobel, Pascal O.},
-  booktitle={2008 Information Theory and Applications Workshop}, 
-  title={Interior-point algorithms for linear-programming decoding}, 
-  year={2008},
-  volume={},
-  number={},
-  pages={433-437},
-%  doi={10.1109/ITA.2008.4601085}
-}
-
-@article{proximal_paper,
-    title={Proximal Decoding for {LDPC} Codes},
-    author={Tadashi Wadayama and Satoshi Takabe},
-    journal={IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences},
-%    volume={advpub},
-%    pages={2022TAP0002},
-    year={2022},
-%    doi={10.1587/transfun.2022TAP0002}
-}
-
--- a/letter.tex
+++ b/letter.tex
@@ -7,14 +7,6 @@
 \usepackage{algorithm}
 \usepackage{siunitx}
 \usepackage{dsfont}
-\usepackage{mleftright}
-\usepackage{bbm}
-\usepackage[
-	backend=biber,
-	style=ieee,
-	sorting=nty,
-]{biblatex}
-

 \usepackage{tikz}
 \usetikzlibrary{spy, arrows.meta,arrows}
@@ -45,28 +37,15 @@
 	\input{lib/latex-common/common.tex}
 \fi

-\addbibresource{letter.bib}
-\AtBeginBibliography{\footnotesize}

 \pgfplotsset{colorscheme/cel}

 % TODO
+\pgfplotsset{fancy marks/.style={}}
+
 \newcommand{\figwidth}{\columnwidth}
 \newcommand{\figheight}{0.75\columnwidth}

-\pgfplotsset{
-	FERPlot/.style={
-		line width=1pt,
-		densely dashed,
-	},
-	BERPlot/.style={
-		line width=1pt,
-	},
-	DFRPlot/.style={
-		only marks,
-	},
-}
-

 %
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -78,7 +57,7 @@
 \begin{document}


-\title{List-based Proximal Decoding for Linear Block Codes}
+\title{A Note on Improving Proximal Decoding for Linear Block Codes}

 \author{Andreas Tsouchlos, Holger Jäkel, and Laurent Schmalen\\
 Communications Engineering Lab (CEL), Karlsruhe Institute of Technology (KIT)\\
@@ -101,19 +80,20 @@ Hertzstr. 16, 76187 Karlsruhe, Germany, Email: \texttt{\{first.last\}@kit.edu}}
 \begin{abstract}
 In this paper, the proximal decoding algorithm is considered within the
 context of \textit{additive white Gaussian noise} (AWGN) channels.
-An analysis of the convergence behavior of the algorithm shows that
-proximal decoding inherently enters an oscillating behavior of the estimate
-after a certain number of iterations.
+An analysis of the convergence behavior of the algorithm shows that it is an
+inherent property of proximal decoding to enter an
+oscillating behavior of the estimate after a number of iterations.
 Due to this oscillation, frame errors arising during decoding can often
-be attributed to only a few remaining wrongly decoded bits.
-In this letter, an improvement of the proximal decoding algorithm is proposed
-by appending an additional step, in which these erroneous components are
-attempted to be corrected.
-We suggesst an empirical rule with which the components most likely needing
+be attributed to only a few remaining wrongly decoded components.
+
+In this paper, an improvement of the algorithm is proposed by appending an
+additional step, in which these erroneous components are attempted to be
+corrected.
+An empirical rule is suggested, with which the components most likely needing
 correction can be determined.
 Using this insight and performing a subsequent ``ML-in-the-list'' decoding,
-a gain of up to 1 dB is achieved compared to conventional
-proximal decoding, depending on the decoder parameters and the code.
+a gain of up to approximately 1 dB is achieved compared to proximal decoding,
+depending on the parameters chosen and the code considered.
 \end{abstract}

 \begin{IEEEkeywords}
@@ -136,7 +116,7 @@ the reliability of data by detecting and correcting any errors that may occur
 during its transmission or storage.
 One class of binary linear codes, \textit{low-density parity-check} (LDPC)
 codes, has become especially popular due to its ability to reach arbitrarily
-small error probabilities at code rates up to the capacity of the channel
+small probabilities of error at code rates up to the capacity of the channel
 \cite{mackay99}, while retaining a structure that allows for very efficient
 decoding.
 While the established decoders for LDPC codes, such as belief propagation (BP)
@@ -149,37 +129,37 @@ Optimization based decoding algorithms are an entirely different way of
 approaching the decoding problem.
 A number of different such algorithms have been introduced.
 The field of \textit{linear programming} (LP) decoding \cite{feldman_paper},
-for example, represents one class of such algorithms, based on a relaxation
+for example, represents one class of such algorithms, based on a reformulation
 of the \textit{maximum likelihood} (ML) decoding problem as a linear program.
 Many different optimization algorithms can be used to solve the resulting
-problem \cite{ADMM, adaptive_lp_decoding, interior_point_decoding}.
+problem \cite{interior_point_decoding, ADMM, adaptive_lp_decoding}.
 Recently, proximal decoding for LDPC codes was presented by
-Wadayama \textit{et al.} \cite{proximal_paper}.
-Proximal decoding relies on a non-convex optimization formulation
+Wadayama et al. \cite{proximal_paper}.
+It is a novel approach and relies on a non-convex optimization formulation
 of the \textit{maximum a posteriori} (MAP) decoding problem.

 The aim of this work is to improve upon the performance of proximal decoding by
 first presenting an examination of the algorithm's behavior and then suggesting
 an approach to mitigate some of its flaws.
-This analysis is performed for
+This analysis is performed within the context of
 \textit{additive white Gaussian noise} (AWGN) channels.
-We first observe that the algorithm initially moves the estimate in
-the right direction, however, in the final steps of the decoding process,
-convergence to the correct codeword is often not achieved.
-Furthermore, we suggest that the reason for this behavior is the nature
+It is first observed that, while the algorithm initially moves the estimate in
+the right direction, in the final steps of the decoding process convergence to
+the correct codeword is often not achieved.
+Furthermore, it is suggested that the reason for this behavior is the nature
 of the decoding algorithm itself, comprising two separate gradient descent
 steps working adversarially.

-We propose a method mitigate this effect by appending an
-additional step to the decoding process.
+A method to mitigate this effect is proposed by appending an additional step
+to the decoding process.
 In this additional step, the components of the estimate with the highest
 probability of being erroneous are identified.
 New codewords are then generated, over which an ``ML-in-the-list''
 \cite{ml_in_the_list} decoding is performed.
 A process to conduct this identification is proposed in this paper.
 Using the improved algorithm, a gain of up to
-1 dB can be achieved compared to conventional proximal decoding,
-depending on the decoder parameters and the code.
+approximately 1 dB can be achieved compared to proximal decoding, depending on
+the parameters chosen and the code considered.


 %%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -202,31 +182,31 @@ number of parity-checks:
 \end{align*}
 %

-The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each
-correspond to a parity check, i.e., a row of $\boldsymbol{H}$.
+The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each correspond
+to a parity check, i.e., row of $\boldsymbol{H}$.
 The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond
-to the components of a codeword being subjected to a parity check, i.e.,
-to the columns of $\boldsymbol{H}$.
+to the components of a codeword being subjected to a parity check, i.e., to
+columns of $\boldsymbol{H}$.
 The neighborhood of a parity check $j$, i.e., the set of indices of components
 relevant for the according parity check, is denoted by
-$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\},
+$N_c(j) := \left\{i \mid i \in \mathcal{I}, \boldsymbol{H}_{j,i} = 1 \right\},
 \hspace{2mm} j \in \mathcal{J}$.

 In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is
 mapped onto a \textit{binary phase shift keying} (BPSK) symbol via
 $\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with
-$ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
+$ \boldsymbol{x} \in \left\{-1, 1\right\}^n$, which is then transmitted over an
 AWGN channel.
 The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an
 estimate of the transmitted codeword, denoted as
 $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$.
-A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$
+A distinction is made between $\boldsymbol{x} \in \left\{-1, 1\right\}^n$
 and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$,
 the former denoting the BPSK symbol physically transmitted over the channel and
 the latter being used as a variable during the optimization process.
 The posterior probability of having transmitted $\boldsymbol{x}$ when receiving
 $\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF)
-$P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
+$p_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
 Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting
 $\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF)
 $f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$.
@@ -241,24 +221,25 @@ With proximal decoding, the proximal gradient method \cite{proximal_algorithms}
 is used to solve a non-convex optimization formulation of the MAP decoding
 problem.

-With the equal prior probability assumption for all codewords, MAP and ML
+When making the equal probability assumption for all codewords, MAP and ML
 decoding are equivalent and, specifically for AWGN channels, correspond to a
 nearest-neighbor decision.
-For this reason, decoding can be carried out using a figure of merit that
-describes the distance from a given vector to a codeword.
+For this reason, decoding can be done using a figure of merit that describes
+the distance from a given vector to a codeword.
 One such expression, formulated under the assumption of BPSK, is the
 \textit{code-constraint polynomial} \cite{proximal_paper}
 %
 \begin{align*}
-    h( \tilde{\boldsymbol{x}} ) =
+    h\left( \tilde{\boldsymbol{x}} \right) =
        \underbrace{\sum_{i=1}^{n}
-            \left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}}
+            \left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}}
        + \underbrace{\sum_{j=1}^{m} \left[
-            \left( \prod_{i\in \mathcal{N}_c \left( j \right) } \tilde{x}_i \right)
+            \left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right)
        -1 \right] ^2}_{\text{Parity constraint}}
 .\end{align*}%
 %
-Its intent is to penalize vectors far from a codeword.
+Its intent is to penalize vectors far from a codeword and favor those close
+to one.
 It comprises two terms: one representing the bipolar constraint
 and one representing the parity constraint, incorporating all of the
 information regarding the code.
@@ -266,18 +247,18 @@ information regarding the code.
 The channel model can be considered using the negative log-likelihood
 %
 \begin{align*}
-	L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft(
-    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft(
-	    \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright)
+L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
+    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left(
+    \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right)
 .\end{align*}
 %
 The information about the channel and the code are consolidated in the objective
 function \cite{proximal_paper}
 %
 \begin{align*}
-    g \mleft( \tilde{\boldsymbol{x}} \mright)
-        = L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright)
-            + \gamma h\mleft( \tilde{\boldsymbol{x}} \mright),
+    g\left( \tilde{\boldsymbol{x}} \right)
+        = L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
+            + \gamma h\left( \tilde{\boldsymbol{x}} \right),
        \hspace{5mm} \gamma > 0%
 .\end{align*}
 %
@@ -289,17 +270,17 @@ introduced, describing the result of each of the two steps:
 %
 \begin{alignat}{3}
    \boldsymbol{r} &\leftarrow \boldsymbol{s}
-        - \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright)
+        - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right)
        \hspace{5mm }&&\omega > 0 \label{eq:r_update}\\
    \boldsymbol{s} &\leftarrow \boldsymbol{r}
-        - \gamma \nabla h\mleft( \boldsymbol{r} \mright),
+        - \gamma \nabla h\left( \boldsymbol{r} \right),
        \hspace{5mm} &&\gamma > 0 \label{eq:s_update}
 .\end{alignat}
 %
 An equation for determining $\nabla h(\boldsymbol{r})$ is given in
 \cite{proximal_paper}.
 It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$
-represent $\tilde{\boldsymbol{x}}$ during different
+really represent $\tilde{\boldsymbol{x}}$ during different
 stages of the decoding process.

 As the gradient of the code-constraint polynomial can attain very large values
@@ -309,10 +290,10 @@ $\left[-\eta, \eta\right]^n$ by a projection
 $\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$
 is a positive constant slightly larger than one, e.g., $\eta = 1.5$.
 The resulting decoding process as described in \cite{proximal_paper} is
-presented in Algorithm \ref{alg:proximal_decoding}.
+presented in algorithm \ref{alg:proximal_decoding}.

 \begin{algorithm}
-	\caption{Proximal decoding algorithm for an AWGN channel \cite{proximal_paper}.}
+    \caption{Proximal decoding algorithm for an AWGN channel.}
    \label{alg:proximal_decoding}

    \begin{algorithmic}
@@ -320,7 +301,7 @@ presented in Algorithm \ref{alg:proximal_decoding}.
        \STATE \textbf{for} $K$ iterations \textbf{do}
        \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
        \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
-		\STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \boldsymbol{s} \preceq 0 \right\}}$
+        \STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \boldsymbol{s} \right) = -1 \right\}$
        \STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do}
        \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$
        \STATE \hspace{5mm} \textbf{end if}
@@ -335,14 +316,14 @@ presented in Algorithm \ref{alg:proximal_decoding}.
 \section{Improved algorithm}

 %%%%%%%%%%%%%%%%%%%%%
-\subsection{Analysis of the Convergence Behavior}
+\subsection{Analysis of Convergence Behavior}

-In Fig. \ref{fig:fer vs ber}, the \textit{frame error rate} (FER),
+In figure \ref{fig:fer vs ber}, the \textit{frame error rate} (FER),
 \textit{bit error rate} (BER) and \textit{decoding failure rate} (DFR) of
 proximal decoding are shown for an LDPC code with $n=204$ and $k=102$
 \cite[204.33.484]{mackay}.
-A decoding failure is defined as a decoding operation returning an invalid
-codeword, i.e., as non-convergence of the algorithm.
+A decoding failure is defined as a decoding operation, the result of which is
+not a valid codeword, i.e., as non-convergence of the algorithm.
 The parameters chosen for this simulation are $\gamma=0.05, \omega=0.05,
 \eta=1.5$ and $K=200$.
 They were determined to offer the best performance in a preliminary examination,
@@ -353,15 +334,33 @@ This means that most frame errors are not due to the algorithm converging
 to the wrong codeword, but due to the algorithm not converging at all.

 As proximal decoding is an optimization-based decoding method, one possible
-explanation for this effect might be that during the decoding process, convergence
-to the final codeword is often not achieved, although the estimate is moving into
-the right direction.
+explanation for this effect might be that during the decoding process convergence
+on the final codeword is often not achieved, although the estimate is moving in
+the right general direction.
 This would suggest that most frame errors occur due to only a few incorrectly
 decoded bits.%
 %
-\begin{figure}
+\begin{figure}[ht]
    \centering

+	\pgfplotsset{
+	FERPlot/.style={
+		line width=1pt,
+		densely dashed,
+		mark=triangle,
+		fancy marks
+	},
+	BERPlot/.style={
+		line width=1pt,
+		mark=*,
+		fancy marks,
+	},
+	DFRPlot/.style={
+		only marks,
+		mark=square*,
+		fancy marks,
+	}}
+
    \begin{tikzpicture}
        \begin{axis}[
            grid=both,
@@ -374,24 +373,24 @@ decoded bits.%
            height=\figheight,
            legend pos = south west,
        ]
-            \addplot+[FERPlot, mark=o, mark options={solid}, scol1]
+            \addplot+[FERPlot, scol0]
                table [x=SNR, y=FER, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{9}]
                    {res/proximal_ber_fer_dfr_20433484.csv};
            \addlegendentry{FER}
-            \addplot+[BERPlot, mark=*, scol1]
-                table [x=SNR, y=BER, col sep=comma,
-                       discard if not={gamma}{0.05},
-                       discard if gt={SNR}{7.5}]
-                    {res/proximal_ber_fer_dfr_20433484.csv};
-            \addlegendentry{BER}
-            \addplot+[DFRPlot, mark=square*, scol0]
+            \addplot+[DFRPlot, scol2]
                table [x=SNR, y=DFR, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{9}]
                    {res/proximal_ber_fer_dfr_20433484.csv};
            \addlegendentry{DFR}
+            \addplot+[BERPlot, scol1]
+                table [x=SNR, y=BER, col sep=comma,
+                       discard if not={gamma}{0.05},
+                       discard if gt={SNR}{7.5}]
+                    {res/proximal_ber_fer_dfr_20433484.csv};
+            \addlegendentry{BER}
        \end{axis}
    \end{tikzpicture}

@@ -403,28 +402,27 @@ decoded bits.%
    \label{fig:fer vs ber}
 \end{figure}%
 %
-
 An approach for lowering the FER might then be to append an ``ML-in-the-list''
-\cite{ml_in_the_list} step to the decoding process shown in Algorithm
+\cite{ml_in_the_list} step to the decoding process shown in algorithm
 \ref{alg:proximal_decoding}.
-This step consists in determining the $N \in \mathbb{N}$ most probable
-erroneous bits, finding all variations of the current estimate with those bits
+This step would consist of determining the $N \in \mathbb{N}$ most probably
+wrong bits, finding all variations of the current estimate with those bits
 modified, and performing ML decoding on this list.

-This approach crucially relies on identifying the most probable erroneous bits.
+This approach crucially relies on identifying the most probably wrong bits.
 Therefore, the convergence properties of proximal decoding are investigated.
-Considering (\ref{eq:s_update}) and (\ref{eq:r_update}), Fig.
+Considering equations (\ref{eq:s_update}) and (\ref{eq:r_update}), figure
 \ref{fig:grad} shows the two gradients along which the minimization is
 performed for a repetition code with $n=2$.
 It is apparent that a net movement will result as long as the two gradients
 have a common component.
 As soon as this common component is exhausted, they will work in opposing
-directions resulting in an oscillation of the estimate.
-This behavior supports the conjecture that the reason for the high DFR is a
+directions and an oscillation of the estimate will take place.
+This behavior matches the conjecture that the reason for the high DFR is a
 failure to converge to the correct codeword in the final steps of the
 optimization process.%
 %
-\begin{figure}
+\begin{figure}[h]
    \centering

    \begin{tikzpicture}
@@ -523,18 +521,16 @@ optimization process.%
    \label{fig:grad}
 \end{figure}%
 %
-
-In Fig. \ref{fig:prox:convergence_large_n}, we consider only component
-$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate during a
-decoding operation for the LDPC code used also for Fig. 1.
+In figure \ref{fig:prox:convergence_large_n}, only component
+$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate is considered during a
+decoding operation for an LDPC code with $n=204$ and $k=102$.
 Two qualities may be observed.
-First, we observe the average absolute values of the two gradients are equal,
-however, they have opposing signs,
+First, the average values of the two gradients are equal, except for their sign,
 leading to the aforementioned oscillation.
 Second, the gradient of the code constraint polynomial itself starts to
 oscillate after a certain number of iterations.%
 %
-\begin{figure}
+\begin{figure}[ht]
    \centering

    \begin{tikzpicture}
@@ -578,11 +574,11 @@ oscillate after a certain number of iterations.%
 \end{figure}%

 %%%%%%%%%%%%%%%%%%%%%
-\subsection{Improvement Using ``ML-in-the-List'' Step}
+\subsection{Improvement using ``ML-in-the-list'' step}

-Considering the magnitude of the oscillation of the gradient of the code constraint
+Considering the magnitude of oscillation of the gradient of the code constraint
 polynomial, some interesting behavior may be observed.
-Fig. \ref{fig:p_error} shows the probability that a component of the estimate
+Figure \ref{fig:p_error} shows the probability that a component of the estimate
 is wrong, determined through a Monte Carlo simulation, when the components of
 $\boldsymbol{c}$ are ordered from smallest to largest oscillation of
 $\left(\nabla h\right)_i$.
@@ -603,29 +599,29 @@ the probability that a given component was decoded incorrectly.%
            ymode=log,
 			ymin=1e-9,ymax=1e-5,
 			xmin=0,xmax=200,
-			width=0.95\figwidth,
+			width=\figwidth,
 			height=\figheight,
        ]
-            \addplot+ [scol0, mark=none, line width=1]
+            \addplot+ [scol1, mark=none, line width=1]
                table [col sep=comma, y=p_error]{res/p_error.csv};
        \end{axis}
    \end{tikzpicture}

    \caption{Probability that a component of the estimated codeword
-        $\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular
+        $\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is wrong for a (3,6) regular
        LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
        The indices $i'$ are ordered such that the amplitude of oscillation of
        $\left(\nabla h\right)_{i'}$ increases with $i'$.
-        Parameters used for the simulation: $\gamma = 0.05, \omega = 0.05,
+        Parameters used for simulation: $\gamma = 0.05, \omega = 0.05,
        \eta = 1.5, E_b/N_0 = \SI{4}{dB}$.
-        Simulated with $\SI{100000000}{}$ iterations using the all-zeros codeword.}
+        Simulated with $\SI{100000000}{}$ iterations.}
    \label{fig:p_error}
 \end{figure}

-The complete improved algorithm is given in Algorithm \ref{alg:improved}.
+The complete improved algorithm is depicted in algorithm \ref{alg:improved}.
 First, the proximal decoding algorithm is applied.
-If a valid codeword has been reached, i.e., if the algorithm has converged,
-we return this solution.
+If a valid codeword has been reached, i.e., if the algorithm has converged, this
+is the solution returned.
 Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion
 presented above.
 Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$,
@@ -672,7 +668,7 @@ generated and an ``ML-in-the-list'' step is performed.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Simulation Results \& Discussion}

-Fig. \ref{fig:results} shows the FER and BER resulting from applying
+Figure \ref{fig:results} shows the FER and BER resulting from applying
 proximal decoding as presented in \cite{proximal_paper} and the improved
 algorithm presented here when applied to a $\left( 3,6 \right)$-regular LDPC
 code with $n=204$ and $k=102$ \cite[204.33.484]{mackay}.
@@ -680,10 +676,24 @@ The parameters chosen for the simulation are
 $\gamma = 0.05, \omega=0.05, \eta=1.5, K=200$.
 Again, these parameters were chosen,%
 %
-\begin{figure}
+\begin{figure}[ht]
    \centering

    \begin{tikzpicture}
+		\pgfplotsset{
+			ProxPlot/.style={
+				line width=1pt,
+				mark=*,
+				fancy marks,
+			},
+			ImprPlot/.style={
+				line width=1pt,
+				mark=triangle,
+				densely dashed,
+				fancy marks,
+			},
+		}
+
        \begin{axis}[
            grid=both,
            xlabel={$E_\text{b} / N_0$ (dB)},
@@ -698,28 +708,28 @@ Again, these parameters were chosen,%
                at={(0.5,-0.44)},anchor=south}
        ]

-            \addplot+[FERPlot, mark=o, mark options={solid}, scol1]
+            \addplot+[ProxPlot, scol1]
                table [x=SNR, y=FER, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{9}]
                    {res/proximal_ber_fer_dfr_20433484.csv};
            \addlegendentry{FER, prox. dec.};

-            \addplot+[BERPlot, mark=*, scol1]
+            \addplot+[ProxPlot, scol2]
                table [x=SNR, y=BER, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{7.5}]
                    {res/proximal_ber_fer_dfr_20433484.csv};
            \addlegendentry{BER, prox. dec.};

-            \addplot+[FERPlot, mark=triangle, mark options={solid}, scol2]
+            \addplot+[ImprPlot, scol1]
                table [x=SNR, y=FER, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{7.5}]
                    {res/improved_ber_fer_dfr_20433484.csv};
            \addlegendentry{FER, improved};

-            \addplot+[BERPlot, mark=triangle*, scol2]
+            \addplot+[ImprPlot, scol2]
                table [x=SNR, y=BER, col sep=comma,
                       discard if not={gamma}{0.05},
                       discard if gt={SNR}{6.5}]
@@ -751,7 +761,7 @@ The gain varies significantly
 with the SNR (which is to be expected, since with higher SNR values the number
 of bit errors decreases, making the correction of those errors in the
 ``ML-in-the-list'' step more likely).
-For an FER of $10^{-6}$, the gain is approximately $\SI{1}{dB}$.
+For an FER of $10^{-6}$ the gain is approximately $\SI{1}{dB}$.
 Similar behavior can be observed with various other codes.
 No immediate relationship between the code length and the gain was observed
 during our examinations.
@@ -766,7 +776,7 @@ from only a few components of the estimate being wrong.
 These few erroneous components can mostly be corrected by appending an
 additional step to the original algorithm that is only executed if the
 algorithm has not converged.
-A gain of up to $\SI{1}{dB}$ can be observed, depending on the code,
+A gain of up to $\sim\SI{1}{dB}$ can be observed, depending on the code,
 the parameters considered, and the SNR.

 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -786,7 +796,41 @@ Ministry of Education and Research (BMBF) within the project Open6GHub
 %


-\printbibliography
+\begin{thebibliography}{1}
+\bibliographystyle{IEEEtran}
+
+\bibitem{ADMM}
+S. Barman, X. Liu, S. C. Draper and B. Recht, ``Decomposition Methods for Large Scale LP Decoding,'' in IEEE Transactions on Information Theory, vol. 59, no. 12, pp. 7870-7886, Dec. 2013, doi: 10.1109/TIT.2013.2281372.
+
+\bibitem{feldman_paper}
+J. Feldman, M. J. Wainwright and D. R. Karger, ``Using linear programming to Decode Binary linear codes,'' in IEEE Transactions on Information Theory, vol. 51, no. 3, pp. 954-972, March 2005, doi: 10.1109/TIT.2004.842696.
+
+\bibitem{ml_in_the_list}
+M. Geiselhart, A. Elkelesh, M. Ebada, S. Cammerer and S. t. Brink, ``Automorphism Ensemble Decoding of Reed–Muller Codes,'' in IEEE Transactions on Communications, vol. 69, no. 10, pp. 6424-6438, Oct. 2021, doi: 10.1109/TCOMM.2021.3098798.
+
+\bibitem{mackay99}
+D. J. C. MacKay, ``Good error-correcting codes based on very sparse matrices,'' in IEEE Transactions on Information Theory, vol. 45, no. 2, pp. 399-431, March 1999, doi: 10.1109/18.748992.
+
+\bibitem{mackay}
+D.J.C. MacKay, ``Encyclopedia of sparse graph codes [online],''
+Available: http://www.inference.phy.cam.ac.uk/mackay/codes/data.html
+
+\bibitem{proximal_algorithms}
+N. Parikh and S. Boyd,``Proximal algorithms,'' Found. Trends Optim., vol. 1, no. 3, pp. 127–239, Jan. 2014.
+
+\bibitem{channel_codes_book}
+W. Ryan and S. Lin, Channel Codes: Classical and Modern, Cambridge, Cambridge University Press, 2009, pp. 651-670.
+
+\bibitem{adaptive_lp_decoding}
+M. H. Taghavi and P. H. Siegel, ``Adaptive Linear Programming Decoding,'' 2006 IEEE International Symposium on Information Theory, Seattle, WA, USA, 2006, pp. 1374-1378, doi: 10.1109/ISIT.2006.262071.
+
+\bibitem{interior_point_decoding}
+P. O. Vontobel, ``Interior-point algorithms for linear-programming decoding,'' 2008 Information Theory and Applications Workshop, San Diego, CA, USA, 2008, pp. 433-437, doi: 10.1109/ITA.2008.4601085.
+
+\bibitem{proximal_paper}
+T. Wadayama and S. Takabe, ``Proximal decoding for ldpc codes'' IEICE Transactions on Fundamentals of Electronics, Communications and Computer Sciences, vol. advpub, 2022TAP0002, 2022.
+
+\end{thebibliography}
+

 \end{document}
-
--- a/localSettings.yaml
+++ b/localSettings.yaml
@@ -0,0 +1,29 @@
+indentPreamble: 1
+defaultIndent: '    '
+maxNumberOfBackUps: 9
+modifyLineBreaks:
+    preserveBlankLines: 1
+    condenseMultipleBlankLinesInto: 0
+    oneSentencePerLine:
+        manipulateSentences: 1
+        removeSentenceLineBreaks: 0
+        sentencesFollow:
+            par: 1
+            blankLine: 1
+            fullStop: 1
+            exclamationMark: 1
+            questionMark: 1
+            rightBrace: 1
+            commentOnPreviousLine: 1
+            other: 0
+        sentencesBeginWith:
+            A-Z: 1
+            a-z: 0
+            other: 0
+        sentencesEndWith:
+            basicFullStop: 0
+            betterFullStop: 1
+            exclamationMark: 1
+            questionMark: 1
+            other: '(?:\.\)(?!\h*[a-z]))|(?:(?<!(?:(?:e\.g)|(?:i\.e)|(?:etc))))\.(?!(?:[a-z]|[A-Z]|\-|\,|\.|[0-9]))'
+