From 90ee3107751e6158515197537d25184beb56fe44 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Mon, 24 Apr 2023 18:35:53 +0200
Subject: [PATCH] Almost done with corrections

---
 latex/thesis/bibliography.bib                 |   9 +
 latex/thesis/chapters/comparison.tex          |  54 ++--
 latex/thesis/chapters/conclusion.tex          |  22 +-
 latex/thesis/chapters/introduction.tex        |  25 +-
 latex/thesis/chapters/lp_dec_using_admm.tex   | 278 +++++++++++++-----
 latex/thesis/chapters/proximal_decoding.tex   |  71 ++---
 .../chapters/theoretical_background.tex       |  20 +-
 latex/thesis/thesis.tex                       |   2 +-
 8 files changed, 312 insertions(+), 169 deletions(-)

diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib
index 151134c..bcfddd6 100644
--- a/latex/thesis/bibliography.bib
+++ b/latex/thesis/bibliography.bib
@@ -223,3 +223,12 @@
     date   = {2023-04},
     url    = {http://www.inference.org.uk/mackay/codes/data.html}
 }
+
+@article{adam,
+  title={Adam: A method for stochastic optimization},
+  author={Kingma, Diederik P and Ba, Jimmy},
+  journal={arXiv preprint arXiv:1412.6980},
+  year={2014},
+  doi={10.48550/arXiv.1412.6980}
+}
+
diff --git a/latex/thesis/chapters/comparison.tex b/latex/thesis/chapters/comparison.tex
index 8c66876..bcab4ed 100644
--- a/latex/thesis/chapters/comparison.tex
+++ b/latex/thesis/chapters/comparison.tex
@@ -3,7 +3,7 @@
 
 In this chapter, proximal decoding and \ac{LP} Decoding using \ac{ADMM} are compared.
 First, the two algorithms are studied on a theoretical basis.
-Subsequently, their respective simulation results are examined and their
+Subsequently, their respective simulation results are examined, and their
 differences are interpreted based on their theoretical structure.
 
 
@@ -32,13 +32,13 @@ $\mathcal{P}_{d_j}, \hspace{1mm} j\in\mathcal{J}$, defined as%
 %
 by moving the constraints into the objective function, as shown in figure
 \ref{fig:ana:theo_comp_alg:admm}.
-Both algorithms are composed of an iterative approach consisting of two
-alternating steps.
 The objective functions of the two problems are similar in that they
 both comprise two parts: one associated to the likelihood that a given
-codeword was sent, stemming from the channel model, and one associated
-to the constraints the decoding process is subjected to, stemming from the
+codeword was sent, arising from the channel model, and one associated
+to the constraints the decoding process is subjected to, arising from the
 code used.
+Both algorithms are composed of an iterative approach consisting of two
+alternating steps, each minimizing one part of the objective function.
 %
 
 \begin{figure}[h]
@@ -139,7 +139,7 @@ This means that additional redundant parity-checks can be added successively
 until the codeword returned is valid and thus the \ac{ML} solution is found
 \cite[Sec. IV.]{alp}.
 
-In terms of time complexity the two decoding algorithms are comparable.
+In terms of time complexity, the two decoding algorithms are comparable.
 Each of the operations required for proximal decoding can be performed
 in $\mathcal{O}\left( n \right) $ time for \ac{LDPC} codes (see section
 \ref{subsec:prox:comp_perf}).
@@ -172,10 +172,10 @@ while stopping critierion unfulfilled do
 |\vspace{0.22mm}\Reactivatenumber|
     end for
     for i in $\mathcal{I}$ do
-        $s_i \leftarrow s_i + \gamma \left[ 4\left( s_i^2 - 1 \right)s_i
-            \phantom{\frac{4}{s_i}}\right.$|\Suppressnumber|
-                     |\Reactivatenumber|$\left.+ \frac{4}{s_i}\sum_{j\in N_v\left( i \right) }
-                        M_{j\to i} \right] $
+        $s_i\leftarrow \Pi_\eta \left( s_i + \gamma \left( 4\left( s_i^2 - 1 \right)s_i
+            \phantom{\frac{4}{s_i}}\right.\right.$|\Suppressnumber|
+                  |\Reactivatenumber|$\left.\left.+ \frac{4}{s_i}\sum_{j\in
+                             N_v\left( i \right) } M_{j\to i} \right)\right) $
         $r_i \leftarrow r_i + \omega \left( s_i - y_i \right)$
     end for
 end while
@@ -232,7 +232,7 @@ With proximal decoding this minimization is performed for all constraints at onc
 in an approximative manner, while with \ac{LP} decoding using \ac{ADMM} it is
 performed for each constraint individually and with exact results.
 In terms of time complexity, both algorithms are linear with
-respect to $n$ and are heavily parallelisable.
+respect to $n$ and are heavily parallelizable.
 
 
 
@@ -241,18 +241,18 @@ respect to $n$ and are heavily parallelisable.
 \label{sec:comp:res}
 
 The decoding performance of the two algorithms is compared in figure
-\ref{fig:comp:prox_admm_dec} in the form of the \ac{FER}.
+\ref{fig:comp:prox_admm_dec} in form of the \ac{FER}.
 Shown as well is the performance of the improved proximal decoding
 algorithm presented in section \ref{sec:prox:Improved Implementation}.
 The \ac{FER} resulting from decoding using \ac{BP} and,
-wherever available, the \ac{FER} of \ac{ML} decoding taken from
-\cite{lautern_channelcodes} are plotted as a reference.
+wherever available, the \ac{FER} of \ac{ML} decoding, taken from
+\cite{lautern_channelcodes}, are plotted as a reference.
 The parameters chosen for the proximal and improved proximal decoders are
 $\gamma=0.05$, $\omega=0.05$, $K=200$, $\eta = 1.5$ and $N=12$.
 The parameters chosen for \ac{LP} decoding using \ac{ADMM} are $\mu = 5$,
 $\rho = 1$, $K=200$, $\epsilon_\text{pri} = 10^{-5}$ and
 $\epsilon_\text{dual} = 10^{-5}$.
-For all codes considered in the scope of this work, \ac{LP} decoding using
+For all codes considered within the scope of this work, \ac{LP} decoding using
 \ac{ADMM} consistently outperforms both proximal decoding and the improved
 version, reaching very similar performance to \ac{BP}.
 The decoding gain heavily depends on the code, evidently becoming greater for
@@ -268,8 +268,12 @@ calculations performed in each case.
 With proximal decoding, the calculations are approximate, leading
 to the constraints never being quite satisfied.
 With \ac{LP} decoding using \ac{ADMM},
-the constraints are fulfilled for each parity check individualy after each
+the constraints are fulfilled for each parity check individually after each
 iteration of the decoding process.
+A further contributing factor might be the structure of the optimization
+process, as the alternating minimization with respect to the same variable
+leads to oscillatory behavior, as explained in section
+\ref{subsec:prox:conv_properties}.
 It should be noted that while in this thesis proximal decoding was
 examined with respect to its performance in \ac{AWGN} channels, in
 \cite{proximal_paper} it is presented as a method applicable to non-trivial
@@ -279,19 +283,19 @@ broadening its usefulness beyond what is shown here.
 The timing requirements of the decoding algorithms are visualized in figure
 \ref{fig:comp:time}.
 The datapoints have been generated by evaluating the metadata from \ac{FER}
-and \ac{BER} simulations using the parameters mentioned earlier when
+and \ac{BER} simulations and using the parameters mentioned earlier when
 discussing the decoding performance.
 The codes considered are the same as in sections \ref{subsec:prox:comp_perf}
 and \ref{subsec:admm:comp_perf}.
-While the \ac{ADMM} implementation seems to be faster the the proximal
-decoding and improved proximal decoding implementations, infering some
+While the \ac{ADMM} implementation seems to be faster than the proximal
+decoding and improved proximal decoding implementations, inferring some
 general behavior is difficult in this case.
 This is because of the comparison of actual implementations, making the
 results dependent on factors such as the grade of optimization of each of the
 implementations.
 Nevertheless, the run time of both the proximal decoding and the \ac{LP}
-decoding using \ac{ADMM} implementations is similar and both are
-reasonably performant, owing to the parallelisable structure of the
+decoding using \ac{ADMM} implementations is similar, and both are
+reasonably performant, owing to the parallelizable structure of the
 algorithms.
 %
 \begin{figure}[h]
@@ -328,8 +332,6 @@ algorithms.
     \label{fig:comp:time}
 \end{figure}%
 %
-\footnotetext{asdf}
-%
 
 \begin{figure}[h]
     \centering
@@ -572,7 +574,7 @@ algorithms.
                 \addlegendentry{\acs{LP} decoding using \acs{ADMM}}
                 
                 \addlegendimage{RoyalPurple, line width=1pt, mark=*, solid}
-                \addlegendentry{\acs{BP} (20 iterations)}
+                \addlegendentry{\acs{BP} (200 iterations)}
                 
                 \addlegendimage{Black, line width=1pt, mark=*, solid}
                 \addlegendentry{\acs{ML} decoding}
@@ -580,8 +582,8 @@ algorithms.
         \end{tikzpicture}
     \end{subfigure}
 
-    \caption{Comparison of decoding performance between proximal decoding and \ac{LP} decoding
-        using \ac{ADMM}}
+    \caption{Comparison of decoding performance of the different decoder
+        implementations for various codes}
     \label{fig:comp:prox_admm_dec}
 \end{figure}
 
diff --git a/latex/thesis/chapters/conclusion.tex b/latex/thesis/chapters/conclusion.tex
index bee0444..c036f2d 100644
--- a/latex/thesis/chapters/conclusion.tex
+++ b/latex/thesis/chapters/conclusion.tex
@@ -4,15 +4,15 @@
 In the context of this thesis, two decoding algorithms were considered:
 proximal decoding and \ac{LP} decoding using \ac{ADMM}.
 The two algorithms were first analyzed individually, before comparing them
-based on simulation results as well as their theoretical structure.
+based on simulation results as well as on their theoretical structure.
 
 For proximal decoding, the effect of each parameter on the behavior of the
-decoder was examined, leading to an approach to choosing the value of each
-of the parameters.
+decoder was examined, leading to an approach to optimally choose the value
+of each parameter.
 The convergence properties of the algorithm were investigated in the context
 of the relatively high decoding failure rate, to derive an approach to correct
-possible wrong components of the estimate.
-Based on this approach, an improvement over proximal decoding was suggested,
+possibly wrong components of the estimate.
+Based on this approach, an improvement of proximal decoding was suggested,
 leading to a decoding gain of up to $\SI{1}{dB}$, depending on the code and
 the parameters considered.
 
@@ -22,7 +22,7 @@ The decomposable nature arising from the relocation of the constraints into
 the objective function itself was recognized as the major driver in enabling
 an efficient implementation of the decoding algorithm.
 Based on simulation results, general guidelines for choosing each parameter
-were again derived.
+were derived.
 The decoding performance, in form of the \ac{FER}, of the algorithm was
 analyzed, observing that \ac{LP} decoding using \ac{ADMM} nearly reaches that
 of \ac{BP}, staying within approximately $\SI{0.5}{dB}$ depending on the code
@@ -30,7 +30,7 @@ in question.
 
 Finally, strong parallels were discovered with regard to the theoretical
 structure of the two algorithms, both in the constitution of their respective
-objective functions as in the iterative approaches used to minimize them.
+objective functions as well as in the iterative approaches used to minimize them.
 One difference noted was the approximate nature of the minimization in the
 case of proximal decoding, leading to the constraints never being truly
 satisfied.
@@ -38,7 +38,7 @@ In conjunction with the alternating minimization with respect to the same
 variable, leading to oscillatory behavior, this was identified as
 a possible cause of its comparatively worse decoding performance.
 Furthermore, both algorithms were expressed as message passing algorithms,
-justifying their similar computational performance.
+illustrating their similar computational performance.
 
 While the modified proximal decoding algorithm presented in section
 \ref{sec:prox:Improved Implementation} shows some promising results, further
@@ -46,6 +46,12 @@ investigation is required to determine how different choices of parameters
 affect the decoding performance.
 Additionally, a more mathematically rigorous foundation for determining the
 potentially wrong components of the estimate is desirable.
+A different method to improve proximal decoding might be to use
+moment-based optimization techniques such as \textit{Adam} \cite{adam}
+to try to mitigate the effect of local minima introduced in the objective
+function as well as the adversarial structure of the minimization when employing
+proximal decoding.
+
 Another area benefiting from future work is the expansion of the \ac{ADMM}
 based \ac{LP} decoder into a decoder approximating \ac{ML} performance,
 using \textit{adaptive \ac{LP} decoding}.
diff --git a/latex/thesis/chapters/introduction.tex b/latex/thesis/chapters/introduction.tex
index 2aa2681..6693c1d 100644
--- a/latex/thesis/chapters/introduction.tex
+++ b/latex/thesis/chapters/introduction.tex
@@ -9,15 +9,17 @@ popular due to being able to reach arbitrarily small probabilities of error
 at code rates up to the capacity of the channel \cite[Sec. II.B.]{mackay_rediscovery},
 while retaining a structure that allows for very efficient decoding.
 While the established decoders for \ac{LDPC} codes, such as \ac{BP} and the
-\textit{min-sum algorithm}, offer reasonable decoding performance, they are suboptimal
-in most cases and exhibit an \textit{error floor} for high \acp{SNR},
-making them unsuitable for applications with extreme reliability requirements.
+\textit{min-sum algorithm}, offer good decoding performance, they are suboptimal
+in most cases and exhibit an \textit{error floor} for high \acp{SNR}
+\cite[Sec. 15.3]{ryan_lin_2009}, making them unsuitable for applications
+with extreme reliability requirements.
+
 Optimization based decoding algorithms are an entirely different way of approaching
 the decoding problem.
-The initial introduction of optimization techniques as a way of decoding binary
-linear codes was conducted in Feldman's 2003 Ph.D. thesis and subsequent paper,
+The first introduction of optimization techniques as a way of decoding binary
+linear codes was conducted in Feldman's 2003 Ph.D. thesis and a subsequent paper,
 establishing the field of \ac{LP} decoding \cite{feldman_thesis}, \cite{feldman_paper}.
-There, the \ac{ML} decoding problem is approximated by a \textit{linear program},
+There, the \ac{ML} decoding problem is approximated by a \textit{linear program}, i.e.,
 a linear, convex optimization problem, which can subsequently be solved using
 several different algorithms \cite{alp}, \cite{interior_point},
 \cite{original_admm}, \cite{pdd}.
@@ -27,8 +29,8 @@ of the \ac{MAP} decoding problem \cite{proximal_paper}.
 
 The motivation behind applying optimization methods to channel decoding is to
 utilize existing techniques in the broad field of optimization theory, as well
-as find new decoding methods not suffering from the same disadvantages as
-existing message passing based approaches, or exhibiting other desirable properties.
+as to find new decoding methods not suffering from the same disadvantages as
+existing message passing based approaches or exhibiting other desirable properties.
 \Ac{LP} decoding, for example, comes with strong theoretical guarantees
 allowing it to be used as a way of closely approximating \ac{ML} decoding
 \cite[Sec. I]{original_admm},
@@ -36,11 +38,14 @@ and proximal decoding is applicable to non-trivial channel models such
 as \ac{LDPC}-coded massive \ac{MIMO} channels \cite{proximal_paper}.
 
 This thesis aims to further the analysis of optimization based decoding
-algorithms as well as verify and complement the considerations present in
+algorithms as well as to verify and complement the considerations present in
 the existing literature.
 Specifically, the proximal decoding algorithm and \ac{LP} decoding using
 the \ac{ADMM} \cite{original_admm} are explored within the context of
 \ac{BPSK} modulated \ac{AWGN} channels.
 Implementations of both decoding methods are produced, and based on simulation
 results from those implementations the algorithms are examined and compared.
-
+Approaches to determine the optimal value of each parameter are derived and
+the computational and decoding performance of the algorithms is examined.
+An improvement on proximal decoding is suggested, achieving up to 1 dB of gain,
+depending on the parameters chosen and the code considered.
diff --git a/latex/thesis/chapters/lp_dec_using_admm.tex b/latex/thesis/chapters/lp_dec_using_admm.tex
index 2368ac3..f85880b 100644
--- a/latex/thesis/chapters/lp_dec_using_admm.tex
+++ b/latex/thesis/chapters/lp_dec_using_admm.tex
@@ -5,14 +5,12 @@ This chapter is concerned with \ac{LP} decoding - the reformulation of the
 decoding problem as a linear program.
 More specifically, the \ac{LP} decoding problem is solved using \ac{ADMM}.
 First, the general field of \ac{LP} decoding is introduced.
-The application of \ac{ADMM} to the decoding problem is explained.
-Some notable implementation details are mentioned.
+The application of \ac{ADMM} to the decoding problem is explained and some
+notable implementation details are mentioned.
 Finally, the behavior of the algorithm is examined based on simulation
 results.
 
 
-
-
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{LP Decoding}%
 \label{sec:lp:LP Decoding}
@@ -547,7 +545,7 @@ parity-checks until a valid result is returned \cite[Sec. IV.]{alp}.
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Decoding Algorithm}%
+\section{Decoding Algorithm and Implementation}%
 \label{sec:lp:Decoding Algorithm}
 
 The \ac{LP} decoding formulation in section \ref{sec:lp:LP Decoding}
@@ -689,7 +687,6 @@ handled at the same time.
 This can also be understood by interpreting the decoding process as a message-passing
 algorithm \cite[Sec. III. D.]{original_admm}, \cite[Sec. II. B.]{efficient_lp_dec_admm},
 depicted in algorithm \ref{alg:admm}.
-\todo{How are the variables being initialized?}
 
 \begin{genericAlgorithm}[caption={\ac{LP} decoding using \ac{ADMM} interpreted
             as a message passing algorithm\protect\footnotemark{}}, label={alg:admm},
@@ -735,7 +732,7 @@ before the $\boldsymbol{z}_j$ and $\boldsymbol{u}_j$ update steps (lines 4 and
 subsequently replacing $\boldsymbol{T}_j \tilde{\boldsymbol{c}}$ with the
 computed value in the two updates \cite[Sec. 3.4.3]{distr_opt_book}.
 
-The main computational effort in solving the linear program then amounts to
+The main computational effort in solving the linear program amounts to
 computing the projection operation $\Pi_{\mathcal{P}_{d_j}} \left( \cdot \right) $
 onto each check polytope. Various different methods to perform this projection
 have been proposed (e.g., in \cite{original_admm}, \cite{efficient_lp_dec_admm},
@@ -743,14 +740,14 @@ have been proposed (e.g., in \cite{original_admm}, \cite{efficient_lp_dec_admm},
 The method chosen here is the one presented in \cite{original_admm}.
 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Implementation Details}%
-\label{sec:lp:Implementation Details}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%\section{Implementation Details}%
+%\label{sec:lp:Implementation Details}
 
 The development process used to implement this decoding algorithm was the same
 as outlined in section
-\ref{sec:prox:Implementation Details} for proximal decoding.
-At first, an initial version was implemented in Python, before repeating the
+\ref{sec:prox:Decoding Algorithm} for proximal decoding.
+First, an initial version was implemented in Python, before repeating the
 process using C++ to achieve higher performance.
 Again, the performance can be increased by reframing the operations in such
 a way that the computation can take place primarily with element-wise
@@ -788,9 +785,13 @@ expression to be rewritten as%
 .\end{align*}
 %
 Defining%
+\footnote{
+    In this case $d_1, \ldots, d_n$ refer to the degree of the variable nodes,
+    i.e., $d_i,\hspace{1mm}i\in\mathcal{I}$.
+}
 %
 \begin{align*}
-    \boldsymbol{D} := \begin{bmatrix} 
+    \boldsymbol{d} := \begin{bmatrix} 
         d_1 \\
         \vdots \\
         d_n
@@ -800,13 +801,12 @@ Defining%
     \hspace{5mm}%
     \boldsymbol{s} := \sum_{j\in\mathcal{J}} \boldsymbol{T}_j^\text{T}
         \left( \boldsymbol{z}_j - \boldsymbol{u}_j \right)
-\end{align*}%
-\todo{Rename $\boldsymbol{D}$}%
+,\end{align*}%
 %
 the $\tilde{\boldsymbol{c}}$ update can then be rewritten as%
 %
 \begin{align*}
-    \tilde{\boldsymbol{c}} \leftarrow \boldsymbol{D}^{\circ \left(-1\right)} \circ
+    \tilde{\boldsymbol{c}} \leftarrow \boldsymbol{d}^{\circ \left(-1\right)} \circ
         \left( \boldsymbol{s} - \frac{1}{\mu}\boldsymbol{\gamma} \right)  
 .\end{align*}
 %
@@ -831,7 +831,7 @@ while $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}}
             \left( \boldsymbol{z}_j - \boldsymbol{u}_j \right) $
     end for
     for $i$ in $\mathcal{I}$ do
-        $\tilde{\boldsymbol{c}} \leftarrow \boldsymbol{D}^{\circ \left( -1\right)} \circ
+        $\tilde{\boldsymbol{c}} \leftarrow \boldsymbol{d}^{\circ \left( -1\right)} \circ
             \left( \boldsymbol{s} - \frac{1}{\mu}\boldsymbol{\gamma} \right) $
     end for
 end while
@@ -852,6 +852,12 @@ Subsequently, the decoding performance is observed and compared to that of
 Finally, the computational performance of the implementation and time
 complexity of the algorithm are studied.
 
+As was the case in chapter \ref{chapter:proximal_decoding} for proximal decoding,
+the following simulation results are based on Monte Carlo simulations
+and the BER and FER curves have been generated by producing at least 100
+frame errors for each data point, except in cases where this is explicitly
+specified otherwise.
+
 \subsection{Choice of Parameters}
 
 The first two parameters to be investigated are the penalty parameter $\mu$
@@ -865,8 +871,8 @@ The code chosen for this examination is a (3,6) regular \ac{LDPC} code with
 $n=204$ and $k=102$ \cite[\text{204.33.484}]{mackay_enc}.
 When varying $\mu$, $\rho$ is set to 1 and when varying
 $\rho$, $\mu$ is set to 5.
-$K$ is set to 200 and $\epsilon_\text{dual}$ and $\epsilon_\text{pri}$ to
-$10^{-5}$.
+The maximum number of iterations $K$ is set to 200 and
+$\epsilon_\text{dual}$ and $\epsilon_\text{pri}$ to $10^{-5}$.
 The behavior that can be observed is very similar to that of the
 parameter $\gamma$ in proximal decoding, analyzed in section
 \ref{sec:prox:Analysis and Simulation Results}.
@@ -1004,7 +1010,7 @@ run time of the decoding process.
     \label{fig:admm:mu_rho_iterations}
 \end{figure}%
 %
-The same behavior can be observed when looking at a number of different codes,
+The same behavior can be observed when looking at various different codes,
 as shown in figure \ref{fig:admm:mu_rho_multiple}.
 %
 \begin{figure}[h]
@@ -1205,22 +1211,22 @@ as shown in figure \ref{fig:admm:mu_rho_multiple}.
     \label{fig:admm:mu_rho_multiple}
 \end{figure}
 
-To get an estimate for the parameter $K$, the average error during decoding
-can be used.
+To get an estimate for the maximum number of iterations $K$ necessary,
+the average error during decoding can be used.
 This is shown in figure \ref{fig:admm:avg_error} as an average of
 $\SI{100000}{}$ decodings.
 $\mu$ is set to 5 and $\rho$ is set to $1$ and the rest of the parameters are
-again chosen as $K=200, \epsilon_\text{pri}=10^{-5}$ and $ \epsilon_\text{dual}=10^{-5}$.
-Similarly to the results in section
-\ref{sec:prox:Analysis and Simulation Results}, a dip is visible around the
-$20$ iteration mark.
-This is due to the fact that as the number of iterations increases
+again chosen as $\epsilon_\text{pri}=10^{-5}$ and
+$\epsilon_\text{dual}=10^{-5}$.
+Similarly to the results in section \ref{subsec:prox:choice}, a dip is
+visible around the $20$ iteration mark.
+This is due to the fact that as the number of iterations increases,
 more and more decodings converge, leaving only the mistaken ones to be
 averaged.
 The point at which the wrong decodings start to become dominant and the
 decoding performance does not increase any longer is largely independent of
-the \ac{SNR}, allowing the value of $K$ to be chosen without considering the
-\ac{SNR}.
+the \ac{SNR}, allowing the maximum number of iterations to be chosen without
+considering the \ac{SNR}.
 
 \begin{figure}[h]
     \centering
@@ -1275,7 +1281,8 @@ These are both set to the same value $\epsilon$.
 The effect of their value on the decoding performance is visualized in figure
 \ref{fig:admm:epsilon}.
 All parameters except $\epsilon_\text{pri}$ and $\epsilon_\text{dual}$ are
-kept constant, with $K=200$, $\mu=5$, $\rho=1$ and $E_b / N_0 = \SI{4}{dB}$.
+kept constant, with $\mu=5$, $\rho=1$ and $E_b / N_0 = \SI{4}{dB}$ and
+performing a maximum of 200 iterations.
 A lower value for the tolerance initially leads to a dramatic decrease in the
 \ac{FER}, this effect fading as the tolerance becomes increasingly lower.
 
@@ -1309,9 +1316,9 @@ In conclusion, the parameters $\mu$ and $\rho$ should be chosen comparatively
 small and large, respectively, to reduce the average runtime of the decoding
 process, while keeping them within a certain range as to not compromise the
 decoding performance.
-The maximum number of iterations $K$ performed can be chosen independantly
+The maximum number of iterations performed can be chosen independently
 of the \ac{SNR}.
-Finally, relatively small values should be given to the parameters
+Finally, small values should be given to the parameters
 $\epsilon_{\text{pri}}$ and $\epsilon_{\text{dual}}$ to achieve the lowest
 possible error rate.
 
@@ -1328,7 +1335,7 @@ the same as in \cite{original_admm}.
 The two \ac{FER} curves are practically identical.
 Also shown is the curve resulting from \ac{BP} decoding, performing
 1000 iterations.
-The two algorithms perform relatively similarly, coming within $\SI{0.5}{dB}$
+The two algorithms perform relatively similarly, staying within $\SI{0.5}{dB}$
 of one another.
 
 \begin{figure}[h]
@@ -1367,73 +1374,189 @@ of one another.
     \label{fig:admm:results}
 \end{figure}%
 %
-In figure \ref{fig:admm:ber_fer}, the \ac{BER} and \ac{FER} for \ac{LP} decoding
-using\ac{ADMM} and \ac{BP} are shown for a (3, 6) regular \ac{LDPC} code with 
-$n=204$.
-To ensure comparability, in both cases the number of iterations was set to
+In figure \ref{fig:admm:bp_multiple}, \ac{FER} curves for \ac{LP} decoding
+using \ac{ADMM} and \ac{BP} are shown for various codes.
+To ensure comparability, in all cases the number of iterations was set to
 $K=200$.
 The values of the other parameters were chosen as $\mu = 5$, $\rho = 1$,
 $\epsilon = 10^{-5}$ and $\epsilon=10^{-5}$.
-Comparing figures \ref{fig:admm:results} and \ref{fig:admm:ber_fer} it is
-apparent that the difference in decoding performance depends on the code being
+Comparing the simulation results for the different codes, it is apparent that
+the difference in decoding performance depends on the code being
 considered.
-More simulation results are presented in figure \ref{fig:comp:prox_admm_dec}
-in section \ref{sec:comp:res}.
-
+For all codes considered here, however, the performance of \ac{LP} decoding
+using \ac{ADMM} comes close to that of \ac{BP}, again staying withing
+approximately $\SI{0.5}{dB}$.
 
 \begin{figure}[h]
     \centering
-
-    \begin{subfigure}[c]{0.48\textwidth}
+    
+    \begin{subfigure}[t]{0.48\textwidth}
         \centering
     
         \begin{tikzpicture}
             \begin{axis}[
                 grid=both,
-                xlabel={$\mu$}, ylabel={\acs{BER}},
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
                 ymode=log,
+                ymax=1.5, ymin=8e-5,
                 width=\textwidth,
                 height=0.75\textwidth,
-                ymax=1.5, ymin=3e-7,
             ]
+
                 \addplot[Turquoise, line width=1pt, mark=*]
-                    table [col sep=comma, x=SNR, y=BER,
-                           discard if not={mu}{5.0},
-                           discard if gt={SNR}{4.5}]
-                        {res/admm/ber_2d_20433484.csv};
-                \addplot[RoyalPurple, line width=1pt, mark=*]
-                    table [col sep=comma, x=SNR, y=BER,
-                           discard if gt={SNR}{4.5}]
-                        {/home/andreas/bp_20433484.csv};
+                    table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
+                        %{res/hybrid/2d_ber_fer_dfr_963965.csv};
+                        {res/admm/ber_2d_963965.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt]
+                    table [x=SNR, y=FER, col sep=comma]
+                        {res/generic/bp_963965.csv};
             \end{axis}
         \end{tikzpicture}
+
+        \caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=96, k=48$
+            \cite[\text{96.3.965}]{mackay_enc}}
     \end{subfigure}%
     \hfill%
-    \begin{subfigure}[c]{0.48\textwidth}
+    \begin{subfigure}[t]{0.48\textwidth}
         \centering
     
         \begin{tikzpicture}
             \begin{axis}[
                 grid=both,
-                xlabel={$\rho$}, ylabel={\acs{FER}},
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
                 ymode=log,
+                ymax=1.5, ymin=8e-5,
                 width=\textwidth,
                 height=0.75\textwidth,
-                ymax=1.5, ymin=3e-7,
             ]
+        
                 \addplot[Turquoise, line width=1pt, mark=*]
-                    table [col sep=comma, x=SNR, y=FER,
-                           discard if not={mu}{5.0},
-                           discard if gt={SNR}{4.5}]
-                        {res/admm/ber_2d_20433484.csv};
-                \addplot[RoyalPurple, line width=1pt, mark=*]
-                    table [col sep=comma, x=SNR, y=FER,
-                           discard if gt={SNR}{4.5}]
-                        {/home/andreas/bp_20433484.csv};
+                    table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
+                        {res/admm/ber_2d_bch_31_26.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt]
+                    table [x=SNR, y=FER, col sep=comma]
+                        {res/generic/bp_bch_31_26.csv};
             \end{axis}
         \end{tikzpicture}
+    
+        \caption{BCH code with $n=31, k=26$}
+    \end{subfigure}%
+    
+    \vspace{3mm}
+
+    \begin{subfigure}[t]{0.48\textwidth}
+        \centering
+    
+        \begin{tikzpicture}
+            \begin{axis}[
+                grid=both,
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
+                ymode=log,
+                ymax=1.5, ymin=8e-5,
+                width=\textwidth,
+                height=0.75\textwidth,
+            ]
+        
+                \addplot[Turquoise, line width=1pt, mark=*]
+                    table [x=SNR, y=FER, col sep=comma,
+                           discard if not={mu}{3.0},
+                           discard if gt={SNR}{5.5}]
+                        {res/admm/ber_2d_20433484.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt]
+                    table [x=SNR, y=FER, col sep=comma]
+                        {res/generic/bp_20433484.csv};
+            \end{axis}
+        \end{tikzpicture}
+    
+        \caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=204, k=102$
+            \cite[\text{204.33.484}]{mackay_enc}}
+    \end{subfigure}%
+    \hfill%
+    \begin{subfigure}[t]{0.48\textwidth}
+        \centering
+    
+        \begin{tikzpicture}
+            \begin{axis}[
+                grid=both,
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
+                ymode=log,
+                ymax=1.5, ymin=8e-5,
+                width=\textwidth,
+                height=0.75\textwidth,
+            ]
+        
+                \addplot[Turquoise, line width=1pt, mark=*]
+                    table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
+                        {res/admm/ber_2d_20455187.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt,
+                          discard if gt={SNR}{5}]
+                    table [x=SNR, y=FER, col sep=comma]
+                        {res/generic/bp_20455187.csv};
+            \end{axis}
+        \end{tikzpicture}
+    
+        \caption{$\left( 5, 10 \right)$-regular \ac{LDPC} code with $n=204, k=102$
+            \cite[\text{204.55.187}]{mackay_enc}}
     \end{subfigure}%
 
+    \vspace{3mm}
+    
+    \begin{subfigure}[t]{0.48\textwidth}
+        \centering
+    
+        \begin{tikzpicture}
+            \begin{axis}[
+                grid=both,
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
+                ymode=log,
+                ymax=1.5, ymin=8e-5,
+                width=\textwidth,
+                height=0.75\textwidth,
+            ]
+        
+                \addplot[Turquoise, line width=1pt, mark=*]
+                    table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
+                        {res/admm/ber_2d_40833844.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt,
+                          discard if gt={SNR}{3}]
+                    table [x=SNR, y=FER, col sep=comma]
+                        {res/generic/bp_40833844.csv};
+            \end{axis}
+        \end{tikzpicture}
+    
+        \caption{$\left( 3, 6 \right)$-regular \ac{LDPC} code with $n=204, k=102$
+            \cite[\text{204.33.484}]{mackay_enc}}
+    \end{subfigure}%
+    \hfill%
+    \begin{subfigure}[t]{0.48\textwidth}
+        \centering
+    
+        \begin{tikzpicture}
+            \begin{axis}[
+                grid=both,
+                xlabel={$E_b / N_0$ (dB)}, ylabel={FER},
+                ymode=log,
+                ymax=1.5, ymin=8e-5,
+                width=\textwidth,
+                height=0.75\textwidth,
+            ]
+        
+                \addplot[Turquoise, line width=1pt, mark=*]
+                    table [x=SNR, y=FER, col sep=comma, discard if not={mu}{3.0}]
+                        {res/admm/ber_2d_pegreg252x504.csv};
+                \addplot [RoyalPurple, mark=*, line width=1pt]
+                    table [x=SNR, y=FER, col sep=comma,
+                           discard if gt={SNR}{3}]
+                        {res/generic/bp_pegreg252x504.csv};
+            \end{axis}
+        \end{tikzpicture}
+    
+        \caption{LDPC code (progressive edge growth construction) with $n=504, k=252$
+            \cite[\text{PEGReg252x504}]{mackay_enc}}
+    \end{subfigure}%
+    
+    \vspace{5mm}
+    
     \begin{subfigure}[t]{\textwidth}
         \centering
 
@@ -1441,26 +1564,23 @@ in section \ref{sec:comp:res}.
             \begin{axis}[hide axis,
                          xmin=10, xmax=50,
                          ymin=0, ymax=0.4,
-                         legend columns=3,
-                         legend style={draw=white!15!black,legend cell align=left}]
-
+                         legend columns=1,
+                         legend cell align={left},
+                         legend style={draw=white!15!black}]
+               
                 \addlegendimage{Turquoise, line width=1pt, mark=*}
                 \addlegendentry{\acs{LP} decoding using \acs{ADMM}}
-                \addlegendimage{RoyalPurple, line width=1pt, mark=*}
-                \addlegendentry{BP (200 iterations)}
+                
+                \addlegendimage{RoyalPurple, line width=1pt, mark=*, solid}
+                \addlegendentry{\acs{BP} (200 iterations)}
             \end{axis}
         \end{tikzpicture}
     \end{subfigure}
 
-    \caption{Comparison of the decoding performance of \acs{LP} decoding using
-        \acs{ADMM} and \acs{BP}. (3,6) regular \ac{LDPC} code with $n = 204$, $k = 102$
-        \cite[\text{204.33.484}]{mackay_enc}}
-    \label{fig:admm:ber_fer}
-\end{figure}%
-
-In summary, the decoding performance of \ac{LP} decoding using \ac{ADMM} comes
-close to that of \ac{BP}, their difference staying in the range of
-approximately $\SI{0.5}{dB}$, depending on the code in question.
+    \caption{Comparison of the decoding performance of \ac{LP} decoding using \ac{ADMM} 
+        and \ac{BP} for various codes}
+    \label{fig:admm:bp_multiple}
+\end{figure}
 
 \subsection{Computational Performance}
 \label{subsec:admm:comp_perf}
diff --git a/latex/thesis/chapters/proximal_decoding.tex b/latex/thesis/chapters/proximal_decoding.tex
index 4b1458a..1f3d009 100644
--- a/latex/thesis/chapters/proximal_decoding.tex
+++ b/latex/thesis/chapters/proximal_decoding.tex
@@ -1,10 +1,10 @@
-\chapter{Proximal Decoding}%
+\chapter{Proximal Decoding and Implementation}%
 \label{chapter:proximal_decoding}
 
 In this chapter, the proximal decoding algorithm is examined.
-First, the algorithm itself is described.
-Then, some interesting ideas concerning the implementation are presented.
-Simulation results are shown, based on which the behavior of the
+First, the algorithm itself is described and some useful considerations
+concerning the implementation are presented.
+Simulation results are shown, based on which behavior of the
 algorithm is investigated for different codes and parameters.
 Finally, an improvement on proximal decoding is proposed.
 
@@ -60,7 +60,7 @@ as is often done, and has a rather unwieldy representation:%
 %
 In order to rewrite the prior \ac{PDF}
 $f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right)$,
-the so-called \textit{code-constraint polynomial} is introduced as:%
+the so-called \textit{code-constraint polynomial} is introduced as%
 %
 \begin{align*}
     h\left( \tilde{\boldsymbol{x}} \right) =
@@ -92,7 +92,7 @@ This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP})
 and the likelihood can be rewritten using the negative log-likelihood
 $L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
         f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left(
-        \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right) $:%
+        \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right) $ as%
 %
 \begin{align*}
     \hat{\boldsymbol{x}} &= \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}}
@@ -122,7 +122,7 @@ and the decoding problem is reformulated to%
 .\end{align*}
 %
 
-For the solution of the approximate \ac{MAP} decoding problem, using the
+For the solution of the approximate \ac{MAP} decoding problem using the
 proximal gradient method, the two parts of equation
 (\ref{eq:prox:objective_function}) are considered separately:
 the minimization of the objective function occurs in an alternating
@@ -168,8 +168,8 @@ with larger $\gamma$, the constraint that $\gamma$ be small is important,
 as it keeps the effect of $h\left( \tilde{\boldsymbol{x}} \right) $ on the landscape
 of the objective function small.
 Otherwise, unwanted stationary points, including local minima, are introduced.
-The authors say that ``in practice, the value of $\gamma$ should be adjusted
-according to the decoding performance.'' \cite[Sec. 3.1]{proximal_paper}.
+The authors say that ``[\ldots] in practice, the value of $\gamma$ should be adjusted
+according to the decoding performance'' \cite[Sec. 3.1]{proximal_paper}.
 
 %The components of the gradient of the code-constraint polynomial can be computed as follows:%
 %%
@@ -232,7 +232,8 @@ $\left[ -\eta, \eta \right]^n$.
 The iterative decoding process resulting from these considerations is
 depicted in algorithm \ref{alg:prox}.
 
-\begin{genericAlgorithm}[caption={Proximal decoding algorithm for an \ac{AWGN} channel},
+\begin{genericAlgorithm}[caption={Proximal decoding algorithm for an \ac{AWGN} channel.
+    Based on Algorithm 1 in \protect\cite{proximal_paper}},
     label={alg:prox}]
 $\boldsymbol{s} \leftarrow \boldsymbol{0}$
 for $K$ iterations do
@@ -247,9 +248,9 @@ return $\boldsymbol{\hat{c}}$
 \end{genericAlgorithm}
 
 
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Implementation Details}%
-\label{sec:prox:Implementation Details}
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+%\section{Implementation Details}%
+%\label{sec:prox:Implementation Details}
 
 The algorithm was first implemented in Python because of the fast development
 process and straightforward debugging ability.
@@ -324,8 +325,8 @@ $[-\eta, \eta]$ individually.
 
 In this section, the general behavior of the proximal decoding algorithm is
 analyzed.
-The impact of the parameters $\gamma$, as well as $\omega$, $K$ and $\eta$ is
-examined.
+The impact of the parameters $\gamma$, as well as $\omega$, the maximum
+number of iterations $K$ and $\eta$ is examined.
 The decoding performance is assessed based on the \ac{BER} and the
 \ac{FER} as well as the \textit{decoding failure rate} - the rate at which
 the algorithm produces results that are not valid codewords.
@@ -340,14 +341,14 @@ simulations.
 The \ac{BER} and \ac{FER} curves in particular have been generated by
 producing at least 100 frame errors for each data point, unless otherwise
 stated.
-\todo{Same text about monte carlo simulations and frame errors for admm}
 
 
 \subsection{Choice of Parameters}
+\label{subsec:prox:choice}
 
 First, the effect of the parameter $\gamma$ is investigated.
 Figure \ref{fig:prox:results} shows a comparison of the decoding performance
-of the proximal decoding algorithm as presented by Wadayama et al. in
+of the proximal decoding algorithm as presented by Wadayama et al.
 \cite{proximal_paper} and the implementation realized for this work.
 \noindent The \ac{BER} curves for three different choices of
 $\gamma$ are shown, as well as the curve resulting from decoding
@@ -498,7 +499,7 @@ are chosen for the decoding.
 The \ac{SNR} is kept constant at $\SI{4}{dB}$.
 The \ac{BER} exhibits similar behavior in its dependency on $\omega$ and
 on $\gamma$: it is minimized when keeping the value within certain
-bounds, without displaying a single clear optimum.
+bounds, without displaying a single distinct optimum.
 It is noteworthy that the decoder seems to achieve the best performance for
 similar values of the two step sizes.
 Again, this consideration applies to a multitude of different codes, as
@@ -1074,7 +1075,7 @@ are minimized in an alternating manner by use of their gradients.
 \end{figure}%
 %
 While the initial net movement is generally directed in the right direction
-owing to the gradient of the negative log-likelihood, the final oscillation
+owing to the gradient of the negative log-likelihood, the resulting oscillation
 may well take place in a segment of space not corresponding to a valid
 codeword, leading to the aforementioned non-convergence of the algorithm.
 This also partly explains the difference in decoding performance when looking
@@ -1085,7 +1086,7 @@ The higher the \ac{SNR}, the more likely the gradient of the negative
 log-likelihood is to point to a valid codeword.
 The common component of the two gradients then pulls the estimate closer to
 a valid codeword before the oscillation takes place.
-This explains why the decoding performance is so much better for higher
+This explains why the decoding performance is significantly better for higher
 \acp{SNR}.
 
 Looking at figure \ref{fig:prox:gradients:h} it also becomes apparent why the
@@ -1201,7 +1202,7 @@ $\SI{2.80}{GHz}$ and utilizing all cores.
         \end{axis}
     \end{tikzpicture}
 
-    \caption{Timing requirements of the proximal decoding imlementation}
+    \caption{Timing requirements of the proximal decoding implementation}
     \label{fig:prox:time_comp}
 \end{figure}%
 %
@@ -1212,14 +1213,14 @@ $\SI{2.80}{GHz}$ and utilizing all cores.
 \label{sec:prox:Improved Implementation}
 
 As mentioned earlier, frame errors seem to mainly stem from decoding failures.
-Coupled with the fact that the \ac{BER} indicates so much better
+Coupled with the fact that the \ac{BER} indicates significantly better
 performance than the \ac{FER}, this leads to the assumption that only a small
 number of components of the estimated vector may be responsible for an invalid
 result.
 If it was possible to limit the number of possibly wrong components of the
 estimate to a small subset, an \ac{ML}-decoding step could be performed on
-a limited number of possible results (``ML-in-the-List'' as it will
-subsequently be called) to improve the decoding performance.
+a limited number of possible results (``ML-in-the-list'', as it is called)
+to improve the decoding performance.
 This concept is pursued in this section.
 
 First, a guideline must be found with which to evaluate the probability that
@@ -1274,11 +1275,11 @@ The datapoints are taken from a single decoding operation.
 Using this observation as a rule to determine the $N\in\mathbb{N}$ most
 probably wrong bits, all variations of the estimate with those bits modified
 can be generated.
-An \ac{ML}-in-the-List step can then be performed to determine the
+An \ac{ML}-in-the-list step can then be performed to determine the
 most likely candidate.
 This process is outlined in algorithm \ref{alg:prox:improved}.
 Its only difference to algorithm \ref{alg:prox} is that instead of returning
-the last estimate when no valid result is reached, an ML-in-the-List step is
+the last estimate when no valid result is reached, an ML-in-the-list step is
 performed.
 
 \begin{genericAlgorithm}[caption={Improved proximal decoding algorithm},
@@ -1293,12 +1294,12 @@ for $K$ iterations do
     end if
 end for
 $\textcolor{KITblue}{\text{Find }N\text{ most probably wrong bits}}$
-$\textcolor{KITblue}{\text{Generate variations } \boldsymbol{\tilde{c}}_l,\hspace{1mm}
-    l\in \mathbb{N}\text{ of } \boldsymbol{\hat{c}}\text{ with the }N\text{ bits modified}}$
-$\textcolor{KITblue}{\text{Compute }d_H\left( \boldsymbol{ \tilde{c}}_l,
-    \boldsymbol{\hat{c}} \right) \text{ for all valid codewords } \boldsymbol{\tilde{c}}_l}$
-$\textcolor{KITblue}{\text{Output }\boldsymbol{\tilde{c}}_l\text{ with lowest }
-    d_H\left( \boldsymbol{ \tilde{c}}_l, \boldsymbol{\hat{c}} \right)}$
+$\textcolor{KITblue}{\text{Generate variations } \hat{\boldsymbol{c}}_l,\hspace{1mm}
+    l\in \mathbb{N}\text{ of } \hat{\boldsymbol{c}}\text{ with the }N\text{ bits modified}}$
+$\textcolor{KITblue}{\text{Compute }d_H\left( \hat{\boldsymbol{c}}_l,
+    \hat{\boldsymbol{c}} \right) \text{ for all valid codewords } \hat{\boldsymbol{c}}_l}$
+$\textcolor{KITblue}{\text{Output }\hat{\boldsymbol{c}}_l\text{ with lowest }
+    d_H\left( \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \right)}$
 \end{genericAlgorithm}
 
 %\todo{Not hamming distance, correlation}
@@ -1316,7 +1317,7 @@ datapoints at $\SI{6}{dB}$, $\SI{6.5}{dB}$ and $\SI{7}{dB}$ are
 The gain seems to depend on the value of $\gamma$, as well as becoming more
 pronounced for higher \ac{SNR} values.
 This is to be expected, since with higher \ac{SNR} values the number of bit
-errors decreases, making the correction of those errors in the ML-in-the-List
+errors decreases, making the correction of those errors in the ML-in-the-list
 step more likely.
 In figure \ref{fig:prox:improved:comp} the decoding performance
 between proximal decoding and the improved algorithm is compared for a number
@@ -1461,7 +1462,7 @@ In some cases, a gain of up to $\SI{1}{dB}$ or higher can be achieved.
 
 Interestingly, the improved algorithm does not have much different time
 complexity than proximal decoding.
-This is the case, because the ML-in-the-List step is only performed when the
+This is the case, because the ML-in-the-list step is only performed when the
 proximal decoding algorithm produces an invalid result, which in absolute
 terms happens relatively infrequently.
 This is illustrated in figure \ref{fig:prox:time_complexity_comp}, where the
@@ -1504,7 +1505,7 @@ theoretical considerations.
 %
 
 In conclusion, the decoding performance of proximal decoding can be improved
-by appending an ML-in-the-List step when the algorithm does not produce a
+by appending an ML-in-the-list step when the algorithm does not produce a
 valid result.
 The gain can in some cases be as high as $\SI{1}{dB}$ and is achievable with
 negligible computational performance penalty.
diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex
index 63358eb..2c16e37 100644
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@@ -1,13 +1,13 @@
 \chapter{Theoretical Background}%
 \label{chapter:theoretical_background}
 
-In this chapter, the theoretical background necessary to understand this
-work is given.
+In this chapter, the theoretical background necessary to understand the
+decoding algorithms examined in this work is given.
 First, the notation used is clarified.
 The physical layer is detailed - the used modulation scheme and channel model.
 A short introduction to channel coding with binary linear codes and especially
 \ac{LDPC} codes is given.
-The established methods of decoding LPDC codes are briefly explained.
+The established methods of decoding \ac{LDPC} codes are briefly explained.
 Lastly, the general process of decoding using optimization techniques is described
 and an overview of the utilized optimization methods is given.
 
@@ -31,7 +31,7 @@ Additionally, a shorthand notation will be used, denoting a set of indices as%
         \hspace{5mm} m < n, \hspace{2mm} m,n\in\mathbb{Z}
 .\end{align*}
 %
-In order to designate elemen-twise operations, in particular the \textit{Hadamard product}
+In order to designate element-wise operations, in particular the \textit{Hadamard product}
 and the \textit{Hadamard power}, the operator $\circ$ will be used:%
 %
 \begin{alignat*}{3}
@@ -45,7 +45,7 @@ and the \textit{Hadamard power}, the operator $\circ$ will be used:%
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Preliminaries: Channel Model and Modulation}
+\section{Channel Model and Modulation}
 \label{sec:theo:Preliminaries: Channel Model and Modulation}
 
 In order to transmit a bit-word $\boldsymbol{c} \in \mathbb{F}_2^n$ of length
@@ -82,7 +82,7 @@ conducting this process, whereby \textit{data words} are mapped onto longer
 \textit{codewords}, which carry redundant information.
 \Ac{LDPC} codes have become especially popular, since they are able to
 reach arbitrarily small probabilities of error at code rates up to the capacity
-of the channel \cite[Sec. II.B.]{mackay_rediscovery} while having a structure
+of the channel \cite[Sec. II.B.]{mackay_rediscovery}, while having a structure
 that allows for very efficient decoding.
 
 The lengths of the data words and codewords are denoted by $k\in\mathbb{N}$
@@ -97,7 +97,7 @@ the number of parity-checks:%
             \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\}
 .\end{align*}
 %
-A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword
+A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codeword
 $\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix}
 $\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:%
 %
@@ -527,8 +527,8 @@ interpreted componentwise.}
 %
 where $\boldsymbol{x}, \boldsymbol{\gamma} \in \mathbb{R}^n$, $\boldsymbol{b} \in \mathbb{R}^m$
 and $\boldsymbol{A}\in\mathbb{R}^{m \times n}$.
-A technique called \textit{Lagrangian relaxation} \cite[Sec. 11.4]{intro_to_lin_opt_book}
-can then be applied.
+A technique called \textit{Lagrangian relaxation} can then be applied
+\cite[Sec. 11.4]{intro_to_lin_opt_book}.
 First, some of the constraints are moved into the objective function itself
 and weights $\boldsymbol{\lambda}$ are introduced. A new, relaxed problem
 is formulated as
@@ -660,7 +660,7 @@ $\boldsymbol{A} = \begin{bmatrix}
     \ldots &
     \boldsymbol{A}_N
 \end{bmatrix}$.
-The minimization of each term can then happen in parallel, in a distributed
+The minimization of each term can happen in parallel, in a distributed
 fashion \cite[Sec. 2.2]{distr_opt_book}.
 In each minimization step, only one subvector $\boldsymbol{x}_i$ of
 $\boldsymbol{x}$ is considered, regarding all other subvectors as being
diff --git a/latex/thesis/thesis.tex b/latex/thesis/thesis.tex
index f2741bd..ee23e1b 100644
--- a/latex/thesis/thesis.tex
+++ b/latex/thesis/thesis.tex
@@ -6,7 +6,7 @@
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
-\thesisTitle{Application o Optimization Algorithms for Channel Decoding}
+\thesisTitle{Application of Optimization Algorithms for Channel Decoding}
 \thesisType{Bachelor's Thesis}
 \thesisAuthor{Andreas Tsouchlos}
 \thesisAdvisor{Prof. Dr.-Ing. Laurent Schmalen}