From 07107cb63e4c97c3411d9b222c1b36aa28020cd6 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Sun, 7 Jan 2024 21:52:43 +0100
Subject: [PATCH] Correct Preliminaries

---
 letter.tex | 56 +++++++++++++++++++++++++++---------------------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/letter.tex b/letter.tex
index 8481aa7..8d88d8d 100644
--- a/letter.tex
+++ b/letter.tex
@@ -7,6 +7,7 @@
 \usepackage{algorithm}
 \usepackage{siunitx}
 \usepackage{dsfont}
+\usepackage{mleftright}
 
 \usepackage{tikz}
 \usetikzlibrary{spy, arrows.meta,arrows}
@@ -192,31 +193,31 @@ number of parity-checks:
 \end{align*}
 %
 
-The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each correspond
-to a parity check, i.e., row of $\boldsymbol{H}$.
+The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each
+correspond to a parity check, i.e., a row of $\boldsymbol{H}$.
 The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond
-to the components of a codeword being subjected to a parity check, i.e., to
-columns of $\boldsymbol{H}$.
+to the components of a codeword being subjected to a parity check, i.e.,
+to the columns of $\boldsymbol{H}$.
 The neighborhood of a parity check $j$, i.e., the set of indices of components
 relevant for the according parity check, is denoted by
-$N_c(j) := \left\{i \mid i \in \mathcal{I}, \boldsymbol{H}_{j,i} = 1 \right\},
+$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\},
 \hspace{2mm} j \in \mathcal{J}$.
 
 In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is
 mapped onto a \textit{binary phase shift keying} (BPSK) symbol via
 $\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with
-$ \boldsymbol{x} \in \left\{-1, 1\right\}^n$, which is then transmitted over an
+$ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an
 AWGN channel.
 The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an
 estimate of the transmitted codeword, denoted as
 $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$.
-A distinction is made between $\boldsymbol{x} \in \left\{-1, 1\right\}^n$
+A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$
 and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$,
 the former denoting the BPSK symbol physically transmitted over the channel and
 the latter being used as a variable during the optimization process.
 The posterior probability of having transmitted $\boldsymbol{x}$ when receiving
 $\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF)
-$p_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
+$P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$.
 Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting
 $\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF)
 $f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$.
@@ -231,25 +232,24 @@ With proximal decoding, the proximal gradient method \cite{proximal_algorithms}
 is used to solve a non-convex optimization formulation of the MAP decoding
 problem.
 
-When making the equal probability assumption for all codewords, MAP and ML
+With the equal prior probability assumption for all codewords, MAP and ML
 decoding are equivalent and, specifically for AWGN channels, correspond to a
 nearest-neighbor decision.
-For this reason, decoding can be done using a figure of merit that describes
-the distance from a given vector to a codeword.
+For this reason, decoding can be carried out using a figure of merit that
+describes the distance from a given vector to a codeword.
 One such expression, formulated under the assumption of BPSK, is the
 \textit{code-constraint polynomial} \cite{proximal_paper}
 %
 \begin{align*}
-    h\left( \tilde{\boldsymbol{x}} \right) =
+    h( \tilde{\boldsymbol{x}} ) =
         \underbrace{\sum_{i=1}^{n}
-            \left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}}
+            \left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}}
         + \underbrace{\sum_{j=1}^{m} \left[
-            \left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right)
+            \left( \prod_{i\in \mathcal{N}_c \left( j \right) } \tilde{x}_i \right)
         -1 \right] ^2}_{\text{Parity constraint}}
 .\end{align*}%
 %
-Its intent is to penalize vectors far from a codeword and favor those close
-to one.
+Its intent is to penalize vectors far from a codeword.
 It comprises two terms: one representing the bipolar constraint
 and one representing the parity constraint, incorporating all of the
 information regarding the code.
@@ -257,18 +257,18 @@ information regarding the code.
 The channel model can be considered using the negative log-likelihood
 %
 \begin{align*}
-L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
-    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}}\left(
-    \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) \right)
+	L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft(
+    f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft(
+	    \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright)
 .\end{align*}
 %
 The information about the channel and the code are consolidated in the objective
 function \cite{proximal_paper}
 %
 \begin{align*}
-    g\left( \tilde{\boldsymbol{x}} \right)
-        = L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
-            + \gamma h\left( \tilde{\boldsymbol{x}} \right),
+    g \mleft( \tilde{\boldsymbol{x}} \mright)
+        = L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright)
+            + \gamma h\mleft( \tilde{\boldsymbol{x}} \mright),
         \hspace{5mm} \gamma > 0%
 .\end{align*}
 %
@@ -280,17 +280,17 @@ introduced, describing the result of each of the two steps:
 %
 \begin{alignat}{3}
     \boldsymbol{r} &\leftarrow \boldsymbol{s}
-        - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right)
+        - \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright)
         \hspace{5mm }&&\omega > 0 \label{eq:r_update}\\
     \boldsymbol{s} &\leftarrow \boldsymbol{r}
-        - \gamma \nabla h\left( \boldsymbol{r} \right),
+        - \gamma \nabla h\mleft( \boldsymbol{r} \mright),
         \hspace{5mm} &&\gamma > 0 \label{eq:s_update}
 .\end{alignat}
 %
 An equation for determining $\nabla h(\boldsymbol{r})$ is given in
 \cite{proximal_paper}.
 It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$
-really represent $\tilde{\boldsymbol{x}}$ during different
+represent $\tilde{\boldsymbol{x}}$ during different
 stages of the decoding process.
 
 As the gradient of the code-constraint polynomial can attain very large values
@@ -300,7 +300,7 @@ $\left[-\eta, \eta\right]^n$ by a projection
 $\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$
 is a positive constant slightly larger than one, e.g., $\eta = 1.5$.
 The resulting decoding process as described in \cite{proximal_paper} is
-presented in algorithm \ref{alg:proximal_decoding}.
+presented in Algorithm \ref{alg:proximal_decoding}.
 
 \begin{algorithm}
     \caption{Proximal decoding algorithm for an AWGN channel.}
@@ -395,7 +395,7 @@ decoded bits.%
 \end{figure}%
 %
 An approach for lowering the FER might then be to append an ``ML-in-the-list''
-\cite{ml_in_the_list} step to the decoding process shown in algorithm
+\cite{ml_in_the_list} step to the decoding process shown in Algorithm
 \ref{alg:proximal_decoding}.
 This step would consist of determining the $N \in \mathbb{N}$ most probably
 wrong bits, finding all variations of the current estimate with those bits
@@ -610,7 +610,7 @@ the probability that a given component was decoded incorrectly.%
     \label{fig:p_error}
 \end{figure}
 
-The complete improved algorithm is depicted in algorithm \ref{alg:improved}.
+The complete improved algorithm is depicted in Algorithm \ref{alg:improved}.
 First, the proximal decoding algorithm is applied.
 If a valid codeword has been reached, i.e., if the algorithm has converged, this
 is the solution returned.