From fdc1ad5df8e7a7a05f09cc7ff7bceceb5157d6c9 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Tue, 28 Mar 2023 00:14:22 +0200
Subject: [PATCH] Finished introduction except proximal

---
 latex/thesis/bibliography.bib                 |  29 +++
 latex/thesis/chapters/decoding_techniques.tex |   2 -
 .../chapters/theoretical_background.tex       | 165 ++++++++----------
 3 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib
index 6e41c7e..e82bba0 100644
--- a/latex/thesis/bibliography.bib
+++ b/latex/thesis/bibliography.bib
@@ -181,3 +181,32 @@
   doi={10.1109/LCOMM.2019.2911277}
 }
 
+@ARTICLE{mackay_rediscovery,
+  author={MacKay, D.J.C.},
+  journal={IEEE Transactions on Information Theory}, 
+  title={Good error-correcting codes based on very sparse matrices}, 
+  year={1999},
+  volume={45},
+  number={2},
+  pages={399-431},
+  doi={10.1109/18.748992}
+}
+
+@ARTICLE{principles_of_dig_comm,
+  author={Forney, G.},
+  year={2003},
+  month={01},
+  pages={},
+  title={6.451 Principles of Digital Communication II, Spring 2003}
+}
+
+@book{ryan_lin_2009,
+    place={Cambridge},
+    title={Channel Codes: Classical and Modern},
+%    DOI={10.1017/CBO9780511803253},
+    publisher={Cambridge University Press},
+    author={Ryan, William and Lin, Shu},
+    year={2009},
+    url={https://d1.amobbs.com/bbs_upload782111/files_35/ourdev_604508GHLFR2.pdf}
+}
+
diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex
index 62cb4f3..bc773f1 100644
--- a/latex/thesis/chapters/decoding_techniques.tex
+++ b/latex/thesis/chapters/decoding_techniques.tex
@@ -870,8 +870,6 @@ $\boldsymbol{z}_j$ in the previous iteration.}%
 a check-node update step (lines $3$-$6$) and the $\tilde{c}_i$-updates can be understood as
 a variable-node update step (lines $7$-$9$ in figure \ref{fig:lp:message_passing}).
 The updates for each variable- and check-node can be perfomed in parallel.
-With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM}
-is able to achieve similar computational complexity to \ac{BP}.
 
 The main computational effort in solving the linear program then amounts to
 computing the projection operation $\Pi_{\mathcal{P}_{d_j}} \left( \cdot \right) $
diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex
index d57cfcc..954427a 100644
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@@ -14,27 +14,6 @@ Lastly, the optimization methods utilized are described.
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{General Remarks on Notation}
 \label{sec:theo:Notation}
-%
-\todo{Explain bold font $\to$ vector/matrix?}%
-\todo{Explain random variables upper- and lower case and PDFs and PMFs?}%
-%
-%Matrices and vectors will be depicted in bold font, matrices with upper-case
-%and vectors with lower-case letters. For example:%
-%%
-%\begin{align*}
-%    \boldsymbol{H}\boldsymbol{c} &= \boldsymbol{0}
-%.\end{align*}
-%%
-%In order to be able to distinguish between random variables and their realizations,
-%random variables will be represented by upper-case and realizations by lower-case
-%letters. \Acp{PDF} and \acp{PMF} will be denoted by $f$ and $p$, respectively:%
-%%
-%\begin{align*}
-%    f_{Y} &\left( y \right) := \frac{d}{dy} P\left( Y \le y \right) \\
-%    p_{C} &\left( c \right) := P\left( Y = y \right)
-%,\end{align*}
-%%
-%where $P\left( . \right)$ is the probability function.
 
 Wherever the domain of a variable is expanded, this will be indicated with a tilde.
 For example:%
@@ -44,7 +23,7 @@ For example:%
     c \in \mathbb{F}_2           &\to \tilde{c} \in \left[ 0, 1 \right]
 .\end{align*}
 %
-Additionally a shorthand notation is used to denote series of indices and series
+Additionally, a shorthand notation will be used to denote series of indices and series
 of indexed variables:%
 %
 \begin{align*}
@@ -68,16 +47,64 @@ This is known as modulation. The modulation scheme chosen here is \ac{BPSK}:%
 .\end{align*}
 %
 The symbol that reaches the receiver, $\boldsymbol{y}$, is distorted by the channel.
-The channel model used here is \ac{AWGN}:%
+This distortion is described by the channel model, which here is chosen to be \ac{AWGN}:%
 %
 \begin{align*}
     \boldsymbol{y} = \boldsymbol{x} + \boldsymbol{z},
-        \hspace{5mm} \boldsymbol{z}_i \in \mathcal{N}\left( 0, \frac{\sigma^2}{2} \right),
+        \hspace{5mm} z_i \in \mathcal{N}\left( 0, \frac{\sigma^2}{2} \right),
             \hspace{2mm} i \in \left[ 1:n \right] 
 .\end{align*}
 %
-This process is visualized in figure \ref{fig:theo:channel_overview}.
 
+
+%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+\section{Channel Coding with LDPC Codes}
+\label{sec:theo:Channel Coding with LDPC Codes}
+
+Channel coding describes the process of adding redundancy to information
+transmitted over a channel in order to detect and correct any errors
+that may occur during the transmission.
+Encoding the information using \textit{binary linear codes} is one way of
+conducting this process, whereby \textit{data words} are mapped onto longer
+\textit{codewords}, which carry redundant information.
+\Ac{LDPC} codes have become especially popular, since they are able to
+reach arbitrarily small probabilities of error at coderates up to the capacity
+of the channel \cite[Sec. II.B.]{mackay_rediscovery} and their structure allows
+for very efficient decoding.
+
+The lengths of the data words and codewords are denoted by $k$ and $n$,
+respectively.
+The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary
+linear code can be represented using the \textit{parity-check matrix}
+$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents
+the number of parity-checks:%
+%
+\begin{align*}
+    \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n :
+            \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\}
+.\end{align*}
+%
+A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword
+$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix}
+$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:%
+%
+\begin{align*}
+    \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G}
+.\end{align*}
+%
+
+After obtaining a codeword from a data word, it is transmitted over a channel
+as described in section \ref{sec:theo:Preliminaries: Channel Model and Modulation}.
+The received signal $\boldsymbol{y}$ is then decoded to obtain
+an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$.
+Finally, the encoding procedure is reversed and an estimate of the originally
+sent data word, $\hat{\boldsymbol{u}}$, is obtained.
+The methods examined in this work are all based on \textit{soft-decision} decoding,
+i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision
+is made by a demodulator.
+The process of transmitting and decoding a codeword is visualized in
+figure \ref{fig:theo:channel_overview}.%
+%
 \begin{figure}[H]
     \centering
 
@@ -113,61 +140,13 @@ This process is visualized in figure \ref{fig:theo:channel_overview}.
         \node[below=0.25cm of z] (text) {Channel};
     \end{tikzpicture}
 
-    \caption{Overview of channel and modulation}
+    \caption{Overview of channel model and modulation}
     \label{fig:theo:channel_overview}
 \end{figure}
 
+\todo{$\boldsymbol{z}$ is used to denote both the noise and the auxiliary variable for ADMM}
 \todo{Mapper $\to$ Modulator?}
 
-
-%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Channel Coding with LDPC Codes}
-\label{sec:theo:Channel Coding with LDPC Codes}
-
-Channel coding describes the process of adding redundancy to information
-transmitted over a channel in order to detect and correct any errors
-that may occur during the transmission.
-Encoding the information using \textit{binary linear codes} is one way of
-conducting this process, whereby \textit{data words} are mapped onto longer
-\textit{codewords}, which carry redundant information.
-It can be shown that as the length of the encoded data words becomes greater,
-the theoretically achievable error-correcting capabilities of the code become
-better, asymptotically approaching the capacity of the channel.
-\todo{Citation needed}
-For this reason, \ac{LDPC} codes have become especially popular, given their
-low memory requirements even for very large codes.
-
-The lengths of the data words and codewords are denoted by $k$ and $n$,
-respectively.
-The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary
-linear code can be represented using the \textit{parity-check matrix}
-$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents
-the number of parity-checks:%
-%
-\begin{align*}
-    \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n :
-            \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\}
-.\end{align*}
-%
-A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword
-$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix}
-$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:%
-%
-\begin{align*}
-    \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G}
-.\end{align*}
-%
-
-After obtaining a codeword from a data word, it is transmitted over a channel
-as described in section \ref{sec:theo:Preliminaries: Channel Model and Modulation}.
-The received signal $\boldsymbol{y}$ is then decoded to obtain
-an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$.
-Finally, the encoding procedure is reversed and an estimate of the originally
-sent data word, $\hat{\boldsymbol{u}}$, is obtained.
-The methods examined in this work are all based on \textit{soft-decision} decoding,
-i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision
-is made by a demodulator.
-
 The decoding process itself is generally based either on the \ac{MAP} or the \ac{ML}
 criterion:%
 %
@@ -184,8 +163,8 @@ criterion:%
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-\section{Decoding LDPC Codes using Belief Propagation}
-\label{sec:theo:Decoding LDPC Codes using Belief Propagation}
+\section{Tanner Graphs and Belief Propagation}
+\label{sec:theo:Tanner Graphs and Belief Propagation}
 
 It is often helpful to visualize codes graphically.
 This is especially true for \ac{LDPC} codes, as the established decoding
@@ -200,7 +179,8 @@ Each component of the codeword $\boldsymbol{c}$ is interpreted as a \ac{VN}.
 The relationship between \acp{CN} and \acp{VN} can then be plotted by noting
 which components of $\boldsymbol{c}$ are considered for which parity-check.
 Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the
-(7,4)-Hamming-code, which has the following parity-check matrix:%
+(7,4) Hamming code, which has the following parity-check matrix
+\cite[Example 5.7.]{ryan_lin_2009}:%
 %
 \begin{align*}
     \boldsymbol{H} = \begin{bmatrix}
@@ -261,8 +241,8 @@ Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the
     \label{fig:theo:tanner_graph}
 \end{figure}%
 %
-\noindent \acp{CN} and \acp{VN}, and by extention the elements of $\boldsymbol{H}$, are
-indexed with the variables $j$ and $i$.
+\noindent \acp{CN} and \acp{VN}, and by extention the rows and columns of
+$\boldsymbol{H}$, are indexed with the variables $j$ and $i$.
 The sets of all \acp{CN} and all \acp{VN} are denoted by
 $\mathcal{J} := \left[ 1:m \right]$ and $\mathcal{I} := \left[ 1:n \right]$, respectively.
 The \textit{neighbourhood} of the $j$th \ac{CN}, i.e., the set of all adjacent \acp{VN},
@@ -275,24 +255,17 @@ $N_v\left( 3 \right) = \left\{ 1, 2 \right\}$.
 Message passing algorithms are based on the notion of passing messages between
 \acp{CN} and \acp{VN}.
 \Ac{BP} is one such algorithm that is commonly used to decode \ac{LDPC} codes.
-It is based on the observation that each \ac{CN} defines a single
-parity-check code and each \ac{VN} defines a repetition code.
-The messages transmitted between the nodes correspond to the \acp{LLR}:%
-%
-\begin{align*}
-    L_{i\to j} = \ldots
-.\end{align*}
-%
-A number of iterations are performed, passing messages between \acp{CN} and \acp{VN}
-in alternating fashion.
-The bits at each \ac{VN} are then decoded based on the final values.
-
-\ac{BP} can be shown to be equivalent to \ac{ML} decoding when the Tanner graph
-is a tree, but is sub-optimal when the graph contains cycles.
-This leads to generally worse performance than \ac{ML} decoding across all \acp{SNR}.
+It aims to compute the posterior probabilities
+$p_{C_i \mid \boldsymbol{Y}}\left(c_i = 1 | \boldsymbol{y} \right),\hspace{2mm} i\in\mathcal{I}$
+\cite[Sec. III.]{mackay_rediscovery} and use them to calculate the estimate $\hat{\boldsymbol{c}}$.
+For cycle-free graphs this goal is reached after a finite
+number of steps and \ac{BP} is thus equivalent to \ac{MAP} decoding.
+When the graph contains cycles, however, \ac{BP} only approximates the probabilities
+and is sub-optimal.
+This leads to generally worse performance than \ac{MAP} decoding for practical codes.
 Additionally, an \textit{error floor} appears for very high \acp{SNR}, making
 the use of \ac{BP} impractical for applications where a very low \ac{BER} is
-desired.
+desired \cite[Sec. 15.3]{ryan_lin_2009}.
 
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%