From fdc1ad5df8e7a7a05f09cc7ff7bceceb5157d6c9 Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Tue, 28 Mar 2023 00:14:22 +0200 Subject: [PATCH] Finished introduction except proximal --- latex/thesis/bibliography.bib | 29 +++ latex/thesis/chapters/decoding_techniques.tex | 2 - .../chapters/theoretical_background.tex | 165 ++++++++---------- 3 files changed, 98 insertions(+), 98 deletions(-) diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib index 6e41c7e..e82bba0 100644 --- a/latex/thesis/bibliography.bib +++ b/latex/thesis/bibliography.bib @@ -181,3 +181,32 @@ doi={10.1109/LCOMM.2019.2911277} } +@ARTICLE{mackay_rediscovery, + author={MacKay, D.J.C.}, + journal={IEEE Transactions on Information Theory}, + title={Good error-correcting codes based on very sparse matrices}, + year={1999}, + volume={45}, + number={2}, + pages={399-431}, + doi={10.1109/18.748992} +} + +@ARTICLE{principles_of_dig_comm, + author={Forney, G.}, + year={2003}, + month={01}, + pages={}, + title={6.451 Principles of Digital Communication II, Spring 2003} +} + +@book{ryan_lin_2009, + place={Cambridge}, + title={Channel Codes: Classical and Modern}, +% DOI={10.1017/CBO9780511803253}, + publisher={Cambridge University Press}, + author={Ryan, William and Lin, Shu}, + year={2009}, + url={https://d1.amobbs.com/bbs_upload782111/files_35/ourdev_604508GHLFR2.pdf} +} + diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index 62cb4f3..bc773f1 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -870,8 +870,6 @@ $\boldsymbol{z}_j$ in the previous iteration.}% a check-node update step (lines $3$-$6$) and the $\tilde{c}_i$-updates can be understood as a variable-node update step (lines $7$-$9$ in figure \ref{fig:lp:message_passing}). The updates for each variable- and check-node can be perfomed in parallel. -With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM} -is able to achieve similar computational complexity to \ac{BP}. The main computational effort in solving the linear program then amounts to computing the projection operation $\Pi_{\mathcal{P}_{d_j}} \left( \cdot \right) $ diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex index d57cfcc..954427a 100644 --- a/latex/thesis/chapters/theoretical_background.tex +++ b/latex/thesis/chapters/theoretical_background.tex @@ -14,27 +14,6 @@ Lastly, the optimization methods utilized are described. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{General Remarks on Notation} \label{sec:theo:Notation} -% -\todo{Explain bold font $\to$ vector/matrix?}% -\todo{Explain random variables upper- and lower case and PDFs and PMFs?}% -% -%Matrices and vectors will be depicted in bold font, matrices with upper-case -%and vectors with lower-case letters. For example:% -%% -%\begin{align*} -% \boldsymbol{H}\boldsymbol{c} &= \boldsymbol{0} -%.\end{align*} -%% -%In order to be able to distinguish between random variables and their realizations, -%random variables will be represented by upper-case and realizations by lower-case -%letters. \Acp{PDF} and \acp{PMF} will be denoted by $f$ and $p$, respectively:% -%% -%\begin{align*} -% f_{Y} &\left( y \right) := \frac{d}{dy} P\left( Y \le y \right) \\ -% p_{C} &\left( c \right) := P\left( Y = y \right) -%,\end{align*} -%% -%where $P\left( . \right)$ is the probability function. Wherever the domain of a variable is expanded, this will be indicated with a tilde. For example:% @@ -44,7 +23,7 @@ For example:% c \in \mathbb{F}_2 &\to \tilde{c} \in \left[ 0, 1 \right] .\end{align*} % -Additionally a shorthand notation is used to denote series of indices and series +Additionally, a shorthand notation will be used to denote series of indices and series of indexed variables:% % \begin{align*} @@ -68,16 +47,64 @@ This is known as modulation. The modulation scheme chosen here is \ac{BPSK}:% .\end{align*} % The symbol that reaches the receiver, $\boldsymbol{y}$, is distorted by the channel. -The channel model used here is \ac{AWGN}:% +This distortion is described by the channel model, which here is chosen to be \ac{AWGN}:% % \begin{align*} \boldsymbol{y} = \boldsymbol{x} + \boldsymbol{z}, - \hspace{5mm} \boldsymbol{z}_i \in \mathcal{N}\left( 0, \frac{\sigma^2}{2} \right), + \hspace{5mm} z_i \in \mathcal{N}\left( 0, \frac{\sigma^2}{2} \right), \hspace{2mm} i \in \left[ 1:n \right] .\end{align*} % -This process is visualized in figure \ref{fig:theo:channel_overview}. + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Channel Coding with LDPC Codes} +\label{sec:theo:Channel Coding with LDPC Codes} + +Channel coding describes the process of adding redundancy to information +transmitted over a channel in order to detect and correct any errors +that may occur during the transmission. +Encoding the information using \textit{binary linear codes} is one way of +conducting this process, whereby \textit{data words} are mapped onto longer +\textit{codewords}, which carry redundant information. +\Ac{LDPC} codes have become especially popular, since they are able to +reach arbitrarily small probabilities of error at coderates up to the capacity +of the channel \cite[Sec. II.B.]{mackay_rediscovery} and their structure allows +for very efficient decoding. + +The lengths of the data words and codewords are denoted by $k$ and $n$, +respectively. +The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary +linear code can be represented using the \textit{parity-check matrix} +$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents +the number of parity-checks:% +% +\begin{align*} + \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n : + \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} +.\end{align*} +% +A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword +$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix} +$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:% +% +\begin{align*} + \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G} +.\end{align*} +% + +After obtaining a codeword from a data word, it is transmitted over a channel +as described in section \ref{sec:theo:Preliminaries: Channel Model and Modulation}. +The received signal $\boldsymbol{y}$ is then decoded to obtain +an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$. +Finally, the encoding procedure is reversed and an estimate of the originally +sent data word, $\hat{\boldsymbol{u}}$, is obtained. +The methods examined in this work are all based on \textit{soft-decision} decoding, +i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision +is made by a demodulator. +The process of transmitting and decoding a codeword is visualized in +figure \ref{fig:theo:channel_overview}.% +% \begin{figure}[H] \centering @@ -113,61 +140,13 @@ This process is visualized in figure \ref{fig:theo:channel_overview}. \node[below=0.25cm of z] (text) {Channel}; \end{tikzpicture} - \caption{Overview of channel and modulation} + \caption{Overview of channel model and modulation} \label{fig:theo:channel_overview} \end{figure} +\todo{$\boldsymbol{z}$ is used to denote both the noise and the auxiliary variable for ADMM} \todo{Mapper $\to$ Modulator?} - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Channel Coding with LDPC Codes} -\label{sec:theo:Channel Coding with LDPC Codes} - -Channel coding describes the process of adding redundancy to information -transmitted over a channel in order to detect and correct any errors -that may occur during the transmission. -Encoding the information using \textit{binary linear codes} is one way of -conducting this process, whereby \textit{data words} are mapped onto longer -\textit{codewords}, which carry redundant information. -It can be shown that as the length of the encoded data words becomes greater, -the theoretically achievable error-correcting capabilities of the code become -better, asymptotically approaching the capacity of the channel. -\todo{Citation needed} -For this reason, \ac{LDPC} codes have become especially popular, given their -low memory requirements even for very large codes. - -The lengths of the data words and codewords are denoted by $k$ and $n$, -respectively. -The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary -linear code can be represented using the \textit{parity-check matrix} -$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents -the number of parity-checks:% -% -\begin{align*} - \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n : - \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} -.\end{align*} -% -A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword -$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix} -$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:% -% -\begin{align*} - \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G} -.\end{align*} -% - -After obtaining a codeword from a data word, it is transmitted over a channel -as described in section \ref{sec:theo:Preliminaries: Channel Model and Modulation}. -The received signal $\boldsymbol{y}$ is then decoded to obtain -an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$. -Finally, the encoding procedure is reversed and an estimate of the originally -sent data word, $\hat{\boldsymbol{u}}$, is obtained. -The methods examined in this work are all based on \textit{soft-decision} decoding, -i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision -is made by a demodulator. - The decoding process itself is generally based either on the \ac{MAP} or the \ac{ML} criterion:% % @@ -184,8 +163,8 @@ criterion:% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Decoding LDPC Codes using Belief Propagation} -\label{sec:theo:Decoding LDPC Codes using Belief Propagation} +\section{Tanner Graphs and Belief Propagation} +\label{sec:theo:Tanner Graphs and Belief Propagation} It is often helpful to visualize codes graphically. This is especially true for \ac{LDPC} codes, as the established decoding @@ -200,7 +179,8 @@ Each component of the codeword $\boldsymbol{c}$ is interpreted as a \ac{VN}. The relationship between \acp{CN} and \acp{VN} can then be plotted by noting which components of $\boldsymbol{c}$ are considered for which parity-check. Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the -(7,4)-Hamming-code, which has the following parity-check matrix:% +(7,4) Hamming code, which has the following parity-check matrix +\cite[Example 5.7.]{ryan_lin_2009}:% % \begin{align*} \boldsymbol{H} = \begin{bmatrix} @@ -261,8 +241,8 @@ Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the \label{fig:theo:tanner_graph} \end{figure}% % -\noindent \acp{CN} and \acp{VN}, and by extention the elements of $\boldsymbol{H}$, are -indexed with the variables $j$ and $i$. +\noindent \acp{CN} and \acp{VN}, and by extention the rows and columns of +$\boldsymbol{H}$, are indexed with the variables $j$ and $i$. The sets of all \acp{CN} and all \acp{VN} are denoted by $\mathcal{J} := \left[ 1:m \right]$ and $\mathcal{I} := \left[ 1:n \right]$, respectively. The \textit{neighbourhood} of the $j$th \ac{CN}, i.e., the set of all adjacent \acp{VN}, @@ -275,24 +255,17 @@ $N_v\left( 3 \right) = \left\{ 1, 2 \right\}$. Message passing algorithms are based on the notion of passing messages between \acp{CN} and \acp{VN}. \Ac{BP} is one such algorithm that is commonly used to decode \ac{LDPC} codes. -It is based on the observation that each \ac{CN} defines a single -parity-check code and each \ac{VN} defines a repetition code. -The messages transmitted between the nodes correspond to the \acp{LLR}:% -% -\begin{align*} - L_{i\to j} = \ldots -.\end{align*} -% -A number of iterations are performed, passing messages between \acp{CN} and \acp{VN} -in alternating fashion. -The bits at each \ac{VN} are then decoded based on the final values. - -\ac{BP} can be shown to be equivalent to \ac{ML} decoding when the Tanner graph -is a tree, but is sub-optimal when the graph contains cycles. -This leads to generally worse performance than \ac{ML} decoding across all \acp{SNR}. +It aims to compute the posterior probabilities +$p_{C_i \mid \boldsymbol{Y}}\left(c_i = 1 | \boldsymbol{y} \right),\hspace{2mm} i\in\mathcal{I}$ +\cite[Sec. III.]{mackay_rediscovery} and use them to calculate the estimate $\hat{\boldsymbol{c}}$. +For cycle-free graphs this goal is reached after a finite +number of steps and \ac{BP} is thus equivalent to \ac{MAP} decoding. +When the graph contains cycles, however, \ac{BP} only approximates the probabilities +and is sub-optimal. +This leads to generally worse performance than \ac{MAP} decoding for practical codes. Additionally, an \textit{error floor} appears for very high \acp{SNR}, making the use of \ac{BP} impractical for applications where a very low \ac{BER} is -desired. +desired \cite[Sec. 15.3]{ryan_lin_2009}. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%