diff --git a/letter.tex b/letter.tex
index a651373..f90a4f4 100644
--- a/letter.tex
+++ b/letter.tex
@@ -9,6 +9,7 @@
 \usepackage{dsfont}
 \usepackage{mleftright}
 \usepackage{bbm}
+\usepackage{bm}
 \usepackage[
 	backend=biber,
 	style=ieee,
@@ -201,43 +202,43 @@ codewords, the lengths of which are denoted by $k \in \mathbb{N}$
 and $n \in \mathbb{N}$, respectively, with $k \le n$.
 The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary linear
 code can be represented using the parity-check matrix
-$\boldsymbol{H} \in \mathbb{F}_2^{m \times n} $, where $m$ represents the
+$\bm{H} \in \mathbb{F}_2^{m \times n} $, where $m$ represents the
 number of parity-checks:
 %
 \begin{align*}
-    \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n :
-        \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\}
+    \mathcal{C} := \left\{ \bm{c} \in \mathbb{F}_2^n :
+        \bm{H}\bm{c}^\text{T} = \bm{0} \right\}
 \end{align*}
 %
 
 The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each
-correspond to a parity check, i.e., a row of $\boldsymbol{H}$.
+correspond to a parity check, i.e., a row of $\bm{H}$.
 The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond
 to the components of a codeword being subjected to a parity check, i.e.,
-to the columns of $\boldsymbol{H}$.
+to the columns of $\bm{H}$.
 The neighborhood of a parity check $j$, i.e., the set of indices of components
 relevant for the according parity check, is denoted by
-$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\},
+$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \bm{H}\negthinspace_{j,i} = 1 \right\},
 \hspace{2mm} j \in \mathcal{J}$.
 
-In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is
+In order to transmit a codeword $\bm{c} \in \mathbb{F}_2^n$, it is
 mapped onto a \textit{binary phase shift keying} (BPSK) symbol via
-$\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with
-$ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
+$\bm{x} = 1 - 2\bm{c}$, with
+$ \bm{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
 AWGN channel.
-The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an
+The received vector $\bm{y} \in \mathbb{R}^n$ is decoded to obtain an
 estimate of the transmitted codeword, denoted as
-$\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$.
-A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$
-and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$,
+$\hat{\bm{c}} \in \mathbb{F}_2^n$.
+A distinction is made between $\bm{x} \in \left\{\pm 1\right\}^n$
+and $\tilde{\bm{x}} \in \mathbb{R}^n$,
 the former denoting the BPSK symbol physically transmitted over the channel and
 the latter being used as a variable during the optimization process.
-The posterior probability of having transmitted $\boldsymbol{x}$ when receiving
-$\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF)
-$P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
-Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting
-$\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF)
-$f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$.
+The posterior probability of having transmitted $\bm{x}$ when receiving
+$\bm{y}$ is expressed as a \textit{probability mass function} (PMF)
+$P_{\bm{X}\mid\bm{Y}}(\bm{x} \mid \bm{y})$.
+Likewise, the likelihood of receiving $\bm{y}$ upon transmitting
+$\bm{x}$ is expressed as a \textit{probability density function} (PDF)
+$f_{\bm{Y}\mid\bm{X}}(\bm{y} \mid \bm{x})$.
 
 
 %%%%%%%%%%%%%%%%%%%%%
@@ -258,7 +259,7 @@ One such expression, formulated under the assumption of BPSK, is the
 \textit{code-constraint polynomial} \cite{proximal_paper}
 %
 \begin{align*}
-    h( \tilde{\boldsymbol{x}} ) =
+    h( \tilde{\bm{x}} ) =
         \underbrace{\sum_{i=1}^{n}
             \left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}}
         + \underbrace{\sum_{j=1}^{m} \left[
@@ -274,45 +275,45 @@ information regarding the code.
 The channel model can be considered using the negative log-likelihood
 %
 \begin{align*}
-	L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft(
-    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft(
-	    \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright)
+	L \mleft( \bm{y} \mid \tilde{\bm{x}} \mright) = -\ln\mleft(
+    f_{\bm{Y} \mid \tilde{\bm{X}}} \mleft(
+	    \bm{y} \mid \tilde{\bm{x}} \mright) \mright)
 .\end{align*}
 %
 The information about the channel and the code are consolidated in the objective
 function \cite{proximal_paper}
 %
 \begin{align*}
-    g \mleft( \tilde{\boldsymbol{x}} \mright)
-        = L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright)
-            + \gamma h\mleft( \tilde{\boldsymbol{x}} \mright),
+    g \mleft( \tilde{\bm{x}} \mright)
+        = L \mleft( \bm{y} \mid \tilde{\bm{x}} \mright)
+            + \gamma h\mleft( \tilde{\bm{x}} \mright),
         \hspace{5mm} \gamma > 0%
 .\end{align*}
 %
 The objective function is minimized using the proximal gradient method, which
 amounts to iteratively performing two gradient-descent steps \cite{proximal_paper}
 with the given objective function and considering AWGN channels.
-To this end, two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$, are
+To this end, two helper variables, $\bm{r}$ and $\bm{s}$, are
 introduced, describing the result of each of the two steps:
 %
 \begin{alignat}{3}
-    \boldsymbol{r} &\leftarrow \boldsymbol{s}
-        - \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright)
+    \bm{r} &\leftarrow \bm{s}
+        - \omega \mleft( \bm{s} - \bm{y} \mright)
         \hspace{5mm }&&\omega > 0 \label{eq:r_update}\\
-    \boldsymbol{s} &\leftarrow \boldsymbol{r}
-        - \gamma \nabla h\mleft( \boldsymbol{r} \mright),
+    \bm{s} &\leftarrow \bm{r}
+        - \gamma \nabla h\mleft( \bm{r} \mright),
         \hspace{5mm} &&\gamma > 0 \label{eq:s_update}
 .\end{alignat}
 %
-An equation for determining $\nabla h(\boldsymbol{r})$ is given in
+An equation for determining $\nabla h(\bm{r})$ is given in
 \cite{proximal_paper}.
-It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$
-represent $\tilde{\boldsymbol{x}}$ during different
+It should be noted that the variables $\bm{r}$ and $\bm{s}$
+represent $\tilde{\bm{x}}$ during different
 stages of the decoding process.
 
 As the gradient of the code-constraint polynomial can attain very large values
 in some cases, an additional step is introduced to ensure numerical stability:
-every current estimate $\boldsymbol{s}$ is projected onto
+every current estimate $\bm{s}$ is projected onto
 $\left[-\eta, \eta\right]^n$ by a projection
 $\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$
 is a positive constant slightly larger than one, e.g., $\eta = 1.5$.
@@ -324,16 +325,16 @@ presented in Algorithm \ref{alg:proximal_decoding}.
     \label{alg:proximal_decoding}
 
     \begin{algorithmic}
-        \STATE $\boldsymbol{s} \leftarrow \boldsymbol{0}$
+        \STATE $\bm{s} \leftarrow \bm{0}$
         \STATE \textbf{for} $K$ iterations \textbf{do}
-        \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
-        \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
-		\STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \boldsymbol{s} \le 0 \right\}}$
-        \STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do}
-        \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$
+        \STATE \hspace{5mm} $\bm{r} \leftarrow \bm{s} - \omega \left( \bm{s} - \bm{y} \right) $
+        \STATE \hspace{5mm} $\bm{s} \leftarrow \Pi_\eta \left(\bm{r} - \gamma \nabla h\left( \bm{r} \right) \right)$
+		\STATE \hspace{5mm} $\bm{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \bm{s} \le 0 \right\}}$
+        \STATE \hspace{5mm} \textbf{if} $\bm{H}\bm{\hat{c}} = \bm{0}$ \textbf{do}
+        \STATE \hspace{10mm} \textbf{return} $\bm{\hat{c}}$
         \STATE \hspace{5mm} \textbf{end if}
         \STATE \textbf{end for}
-        \STATE \textbf{return} $\boldsymbol{\hat{c}}$
+        \STATE \textbf{return} $\bm{\hat{c}}$
     \end{algorithmic}
 \end{algorithm}
 
@@ -481,8 +482,8 @@ optimization process.%
 									   at={(0.9775,0.97)},anchor=north east}]
 				\addlegendimage{mark=none}
 				\addlegendentry{
-					$\nabla L\left(\boldsymbol{y}
-						\mid \tilde{\boldsymbol{x}}\right)$
+					$\nabla L\left(\bm{y}
+						\mid \tilde{\bm{x}}\right)$
 				};
 			\end{axis}
 		\end{tikzpicture}
@@ -530,22 +531,22 @@ optimization process.%
 									   empty legend,
 									   at={(0.9775,0.97)},anchor=north east}]
 				\addlegendimage{mark=none}
-				\addlegendentry{$\nabla h\left(\tilde{\boldsymbol{x}}\right)$};
+				\addlegendentry{$\nabla h\left(\tilde{\bm{x}}\right)$};
 			\end{axis}
 		\end{tikzpicture}
 	\fi
     \caption{Gradients
-        $\nabla L\left(\boldsymbol{y} \mid \tilde{\boldsymbol{x}}\right)$
-        and $\nabla h \left( \tilde{\boldsymbol{x}} \right)$ for a repetition
+        $\nabla L\left(\bm{y} \mid \tilde{\bm{x}}\right)$
+        and $\nabla h \left( \tilde{\bm{x}} \right)$ for a repetition
         code with $n=2$.
-        Shown for $\boldsymbol{y} = \begin{bmatrix} -0.5 & 0.8 \end{bmatrix}$.
+        Shown for $\bm{y} = \begin{bmatrix} -0.5 & 0.8 \end{bmatrix}$.
     }
     \label{fig:grad}
 \end{figure}%
 %
 
 In Fig. \ref{fig:prox:convergence_large_n}, we consider only component
-$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate during a
+$\left(\tilde{\bm{x}}\right)_1$ of the estimate during a
 decoding operation for the LDPC code used also for Fig. 1.
 Two qualities may be observed.
 First, we observe the average absolute values of the two gradients are equal,
@@ -585,14 +586,14 @@ oscillate after a certain number of iterations.%
 					table [col sep=comma, x=k, y=grad_h_0,
 							discard if gt={k}{300}]
 						{res/extreme_components_20433484_combined.csv};
-				\addlegendentry{$\left(\tilde{\boldsymbol{x}}\right)_1$}
+				\addlegendentry{$\left(\tilde{\bm{x}}\right)_1$}
 				\addlegendentry{$\left(\nabla L\right)_1$}
 				\addlegendentry{$\left(\nabla h\right)_1$}
 			\end{axis}
 		\end{tikzpicture}
 	\fi
 
-    \caption{Visualization of component $\left(\tilde{\boldsymbol{x}}\right)_1$
+    \caption{Visualization of component $\left(\tilde{\bm{x}}\right)_1$
         for a decoding operation for a (3,6) regular LDPC code with
         $n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
         Parameters used for simulation: $\gamma = 0.05, \omega = 0.05,
@@ -608,7 +609,7 @@ Considering the magnitude of the oscillation of the gradient of the code constra
 polynomial, some interesting behavior may be observed.
 Fig. \ref{fig:p_error} shows the probability that a component of the estimate
 is wrong, determined through a Monte Carlo simulation, when the components of
-$\boldsymbol{c}$ are ordered from smallest to largest oscillation of
+$\bm{c}$ are ordered from smallest to largest oscillation of
 $\left(\nabla h\right)_i$.
 
 The lower the magnitude of the oscillation, the higher the probability that the
@@ -640,7 +641,7 @@ the probability that a given component was decoded incorrectly.%
 	\fi
 
     \caption{Probability that a component of the estimated codeword
-        $\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular
+        $\hat{\bm{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular
         LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}.
         The indices $i'$ are ordered such that the amplitude of oscillation of
         $\left(\nabla h\right)_{i'}$ increases with $i'$.
@@ -656,7 +657,7 @@ If a valid codeword has been reached, i.e., if the algorithm has converged,
 we return this solution.
 Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion
 presented above.
-Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$,
+Beginning with the recent estimate $\hat{\bm{c}} \in \mathbb{F}_2^n$,
 all variations of words with the selected components modified are then
 generated and an ``ML-in-the-list'' step is performed.
 
@@ -665,13 +666,13 @@ generated and an ``ML-in-the-list'' step is performed.
     \label{alg:ml-in-the-list}
 
     \begin{algorithmic}
-        \STATE Find valid codewords under $\left(\hat{\boldsymbol{c}}_{l}\right)_{1=1}^{2^N}$
+        \STATE Find valid codewords under $\left(\hat{\bm{c}}_{l}\right)_{1=1}^{2^N}$
         \STATE \textbf{if} no valid codewords exist
-        \STATE \hspace{5mm} Compute $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$ for all variations $\boldsymbol{c}_l$
+        \STATE \hspace{5mm} Compute $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$ for all variations $\bm{c}_l$
         \STATE \textbf{else}
-        \STATE \hspace{5mm} Compute $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$ for valid codewords
+        \STATE \hspace{5mm} Compute $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$ for valid codewords
         \STATE \textbf{end if}
-        \STATE \textbf{return} $\hat{\boldsymbol{c}}_l$ with highest $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$
+        \STATE \textbf{return} $\hat{\bm{c}}_l$ with highest $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$
     \end{algorithmic}
 \end{algorithm}%
 %
@@ -681,18 +682,18 @@ generated and an ``ML-in-the-list'' step is performed.
     \label{alg:improved}
 
     \begin{algorithmic}
-        \STATE $\boldsymbol{s} \leftarrow \boldsymbol{0}$
+        \STATE $\bm{s} \leftarrow \bm{0}$
         \STATE \textbf{for} $K$ iterations \textbf{do}
-        \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $
-        \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$
-        \STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \boldsymbol{s} \right) = -1 \right\}$
-        \STATE \hspace{10mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do}
-        \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$
+        \STATE \hspace{5mm} $\bm{r} \leftarrow \bm{s} - \omega \left( \bm{s} - \bm{y} \right) $
+        \STATE \hspace{5mm} $\bm{s} \leftarrow \Pi_\eta \left(\bm{r} - \gamma \nabla h\left( \bm{r} \right) \right)$
+        \STATE \hspace{5mm} $\bm{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \bm{s} \right) = -1 \right\}$
+        \STATE \hspace{10mm} \textbf{if} $\bm{H}\bm{\hat{c}} = \bm{0}$ \textbf{do}
+        \STATE \hspace{10mm} \textbf{return} $\bm{\hat{c}}$
         \STATE \hspace{5mm} \textbf{end if}
         \STATE \textbf{end for}
         \STATE $\textcolor{KITblue}{\text{Estimate $N$ wrong bit indices $\mathcal{I} = \{i_1,\ldots,i_N\}$}}$
-        \STATE $\textcolor{KITblue}{\text{Generate candidate list $\left(\hat{\boldsymbol{c}}_{l}\right)_{l=1}^{2^N}$ by varying bits in $\mathcal{I}$}}$\vspace{1mm}
-        \STATE $\textcolor{KITblue}{\textbf{return  ml\textunderscore in\textunderscore the\textunderscore list}\left(\left(\hat{\boldsymbol{c}}_l\right)_{1=1}^{2^N}\right)}$
+        \STATE $\textcolor{KITblue}{\text{Generate candidate list $\left(\hat{\bm{c}}_{l}\right)_{l=1}^{2^N}$ by varying bits in $\mathcal{I}$}}$\vspace{1mm}
+        \STATE $\textcolor{KITblue}{\textbf{return  ml\textunderscore in\textunderscore the\textunderscore list}\left(\left(\hat{\bm{c}}_l\right)_{1=1}^{2^N}\right)}$
     \end{algorithmic}
 \end{algorithm}