From b84b9651ebfb2d83d55b30f64c16622138cc13e8 Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Mon, 27 Mar 2023 11:38:03 +0200 Subject: [PATCH] Reworked notation section; Added channel model and modulation section; Added decoding using BP section --- latex/thesis/abbreviations.tex | 9 + latex/thesis/chapters/decoding_techniques.tex | 2 +- .../chapters/theoretical_background.tex | 289 +++++++++++++----- 3 files changed, 222 insertions(+), 78 deletions(-) diff --git a/latex/thesis/abbreviations.tex b/latex/thesis/abbreviations.tex index 7a15983..55e240f 100644 --- a/latex/thesis/abbreviations.tex +++ b/latex/thesis/abbreviations.tex @@ -119,6 +119,15 @@ long = probability mass function } +% +%S +% + +\DeclareAcronym{SNR}{ + short = SNR, + long = signal-to-noise ratio +} + % % V % diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index 803414c..62cb4f3 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -72,7 +72,7 @@ which minimizes the objective function $g$. \begin{tikzpicture}[scale=1, transform shape] \node[checknode, label={[below, label distance=-0.4cm, align=center] - CN\\$\left( c_1 + c_2 + c_3 = 0 \right) $}] + \acs{CN}\\$\left( c_1 + c_2 + c_3 = 0 \right) $}] (cn) at (0, 0) {}; \node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_1 \right)$}] (c1) at (-2, 2) {}; diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex index 63a1272..d57cfcc 100644 --- a/latex/thesis/chapters/theoretical_background.tex +++ b/latex/thesis/chapters/theoretical_background.tex @@ -12,93 +12,78 @@ Lastly, the optimization methods utilized are described. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Notation} +\section{General Remarks on Notation} \label{sec:theo:Notation} - % -% TODOs +\todo{Explain bold font $\to$ vector/matrix?}% +\todo{Explain random variables upper- and lower case and PDFs and PMFs?}% % +%Matrices and vectors will be depicted in bold font, matrices with upper-case +%and vectors with lower-case letters. For example:% +%% +%\begin{align*} +% \boldsymbol{H}\boldsymbol{c} &= \boldsymbol{0} +%.\end{align*} +%% +%In order to be able to distinguish between random variables and their realizations, +%random variables will be represented by upper-case and realizations by lower-case +%letters. \Acp{PDF} and \acp{PMF} will be denoted by $f$ and $p$, respectively:% +%% +%\begin{align*} +% f_{Y} &\left( y \right) := \frac{d}{dy} P\left( Y \le y \right) \\ +% p_{C} &\left( c \right) := P\left( Y = y \right) +%,\end{align*} +%% +%where $P\left( . \right)$ is the probability function. -\begin{itemize} - \item General remarks on notation (matrices, \ldots) - \item Probabilistic quantities (random variables, \acp{PDF}, pdfs vs pmfs vs cdfs, \ldots) -\end{itemize} +Wherever the domain of a variable is expanded, this will be indicated with a tilde. +For example:% +% +\begin{align*} + x \in \left\{ -1, 1 \right\} &\to \tilde{x} \in \mathbb{R}\\ + c \in \mathbb{F}_2 &\to \tilde{c} \in \left[ 0, 1 \right] +.\end{align*} +% +Additionally a shorthand notation is used to denote series of indices and series +of indexed variables:% +% +\begin{align*} + \left[ m:n \right] &:= \left\{ m, m+1, \ldots, n-1, n \right\} \\ + x_{\left[ m:n \right] } &:= \left\{ x_m, x_{m+1}, \ldots, x_{n-1}, x_n \right\} +.\end{align*} +% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Preliminaries: Channel Model and Modulation} \label{sec:theo:Preliminaries: Channel Model and Modulation} -% -% TODOs -% - -\begin{itemize} - \item \Ac{AWGN} - \item \Ac{BPSK} -\end{itemize} - -% -% Figure showing notation for entire coding / decoding process -% - -\tikzstyle{box} = [rectangle, minimum width=1.5cm, minimum height=0.7cm, - rounded corners=0.1cm, text centered, draw=black, fill=KITgreen!80] - -\todo{Note about $\tilde{\boldsymbol{c}}$ (and maybe $\tilde{\boldsymbol{x}}$?)} - - -%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -\section{Channel Coding with LDPC Codes} -\label{sec:theo:Channel Coding with LDPC Codes} - -Channel coding describes the process of adding redundancy to information -transmitted over a channel in order to detect and correct any errors -that may occur during the transmission. -Encoding the information using \textit{binary linear codes} is one way of -conducting this process, whereby \textit{data words} are mapped onto longer -\textit{codewords}, which carry redundant information. -It can be shown that as the length of the encoded data words becomes greater, -the theoretically achievable error-correcting capabilities of the code become -better, asymptotically approaching the capacity of the channel. -For this reason, \ac{LDPC} codes have become especially popular, given their -low memory requirements even for very large codes. - -The lengths of the data words and codewords are denoted by $k$ and $n$, -respectively. -The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary -linear code can be represented using the \textit{parity-check matrix} -$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents -the number of parity-checks:% +In order to transmit a bit-word $\boldsymbol{c}$ of length $n$ over a channel, +it has to be mapped onto a symbol $\boldsymbol{x}$ that can be physically +transmitted. +This is known as modulation. The modulation scheme chosen here is \ac{BPSK}:% % \begin{align*} - \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n : - \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} + \boldsymbol{x} = \left( -1 \right)^{\boldsymbol{c}} .\end{align*} % -A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword -$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix} -$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:% +The symbol that reaches the receiver, $\boldsymbol{y}$, is distorted by the channel. +The channel model used here is \ac{AWGN}:% % \begin{align*} - \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G} + \boldsymbol{y} = \boldsymbol{x} + \boldsymbol{z}, + \hspace{5mm} \boldsymbol{z}_i \in \mathcal{N}\left( 0, \frac{\sigma^2}{2} \right), + \hspace{2mm} i \in \left[ 1:n \right] .\end{align*} % +This process is visualized in figure \ref{fig:theo:channel_overview}. -After obtaining a codeword from a data word, it is transmitted over a channel, -as shown in figure \ref{fig:theo:channel_overview}. -Using the selected modulation scheme, $\boldsymbol{c}$ is mapped onto -$\boldsymbol{x}$. -The channel distorts $\boldsymbol{x}$ into $\boldsymbol{y}$, which is what -reaches the receiver. -The received signal $\boldsymbol{y}$ is then decoded at the receiver to obtain -an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$. -Finally, the encoding procedure is reversed and an estimate for the originally -sent data word is obtained. - -\begin{figure}[htpb] +\begin{figure}[H] \centering + \tikzstyle{box} = [rectangle, minimum width=1.5cm, minimum height=0.7cm, + rounded corners=0.1cm, text centered, draw=black, fill=KITgreen!80] + \begin{tikzpicture}[scale=1, transform shape] \node (c) {$\boldsymbol{c}$}; \node[box, right=0.5cm of c] (bpskmap) {Mapper}; @@ -128,36 +113,186 @@ sent data word is obtained. \node[below=0.25cm of z] (text) {Channel}; \end{tikzpicture} - \caption{Overview of codeword transmission} + \caption{Overview of channel and modulation} \label{fig:theo:channel_overview} \end{figure} +\todo{Mapper $\to$ Modulator?} + + +%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +\section{Channel Coding with LDPC Codes} +\label{sec:theo:Channel Coding with LDPC Codes} + +Channel coding describes the process of adding redundancy to information +transmitted over a channel in order to detect and correct any errors +that may occur during the transmission. +Encoding the information using \textit{binary linear codes} is one way of +conducting this process, whereby \textit{data words} are mapped onto longer +\textit{codewords}, which carry redundant information. +It can be shown that as the length of the encoded data words becomes greater, +the theoretically achievable error-correcting capabilities of the code become +better, asymptotically approaching the capacity of the channel. +\todo{Citation needed} +For this reason, \ac{LDPC} codes have become especially popular, given their +low memory requirements even for very large codes. + +The lengths of the data words and codewords are denoted by $k$ and $n$, +respectively. +The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary +linear code can be represented using the \textit{parity-check matrix} +$\boldsymbol{H} \in \mathbb{F}_2^{m\times n}$, where $m$ represents +the number of parity-checks:% +% +\begin{align*} + \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n : + \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} +.\end{align*} +% +A data word $\boldsymbol{u} \in \mathbb{F}_2^k$ can be mapped onto a codword +$\boldsymbol{c} \in \mathbb{F}_2^n$ using the \textit{generator matrix} +$\boldsymbol{G} \in \mathbb{F}_2^{k\times n}$:% +% +\begin{align*} + \boldsymbol{c} = \boldsymbol{u}\boldsymbol{G} +.\end{align*} +% + +After obtaining a codeword from a data word, it is transmitted over a channel +as described in section \ref{sec:theo:Preliminaries: Channel Model and Modulation}. +The received signal $\boldsymbol{y}$ is then decoded to obtain +an estimate of the transmitted codeword, $\hat{\boldsymbol{c}}$. +Finally, the encoding procedure is reversed and an estimate of the originally +sent data word, $\hat{\boldsymbol{u}}$, is obtained. +The methods examined in this work are all based on \textit{soft-decision} decoding, +i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision +is made by a demodulator. + The decoding process itself is generally based either on the \ac{MAP} or the \ac{ML} criterion:% % \begin{align*} \hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}} p_{\boldsymbol{C} \mid \boldsymbol{Y}} \left(\boldsymbol{c} \mid \boldsymbol{y} - \right) + \right) \\ \hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}} f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right) .\end{align*}% % -The methods examined in this work are all based on \textit{soft-decision} decoding, -i.e., $\boldsymbol{y}$ is considered to be in $\mathbb{R}^n$ and no preliminary decision -is made by a demodulator. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Decoding LDPC Codes using Belief Propagation} \label{sec:theo:Decoding LDPC Codes using Belief Propagation} -\begin{itemize} - \item Introduction to message passing - \item Overview of \ac{BP} algorithm - \item \Ac{LDPC} codes (especially $i$, $j$, parity check matrix $\boldsymbol{H}$, $N\left( j \right) $ \& $N\left( i \right) $, etc.) -\end{itemize} +It is often helpful to visualize codes graphically. +This is especially true for \ac{LDPC} codes, as the established decoding +algorithms are \textit{message passing algorithms}, which are inherently +graph-based. + +Binary linear codes with a parity-check matrix $\boldsymbol{H}$ can be +visualized using a \textit{Tanner} or \textit{factor graph}: +Each row of $\boldsymbol{H}$, which represents one parity-check, is viewed as a +\ac{CN}. +Each component of the codeword $\boldsymbol{c}$ is interpreted as a \ac{VN}. +The relationship between \acp{CN} and \acp{VN} can then be plotted by noting +which components of $\boldsymbol{c}$ are considered for which parity-check. +Figure \ref{fig:theo:tanner_graph} shows the tanner graph for the +(7,4)-Hamming-code, which has the following parity-check matrix:% +% +\begin{align*} + \boldsymbol{H} = \begin{bmatrix} + 1 & 0 & 1 & 0 & 1 & 0 & 1 \\ + 0 & 1 & 1 & 0 & 0 & 1 & 1 \\ + 0 & 0 & 0 & 1 & 1 & 1 & 1 + \end{bmatrix} +.\end{align*} +% +% +\begin{figure}[H] + \centering + + \tikzstyle{checknode} = [color=KITblue, fill=KITblue, + draw, regular polygon,regular polygon sides=4, + inner sep=0pt, minimum size=12pt] + \tikzstyle{variablenode} = [color=KITgreen, fill=KITgreen, + draw, circle, inner sep=0pt, minimum size=10pt] + + \begin{tikzpicture}[scale=1, transform shape] + \node[checknode, + label={[below, label distance=-0.4cm, align=center] + \acs{CN} 1\\$\left( c_1 + c_3 + c_5 + c_7 = 0 \right) $}] + (cn1) at (-4, -1) {}; + \node[checknode, + label={[below, label distance=-0.4cm, align=center] + \acs{CN} 2\\$\left( c_2 + c_3 + c_6 + c_7 = 0 \right) $}] + (cn2) at (0, -1) {}; + \node[checknode, + label={[below, label distance=-0.4cm, align=center] + \acs{CN} 3\\$\left( c_4 + c_5 + c_6 + c_7 = 0 \right) $}] + (cn3) at (4, -1) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 1\\$c_1$}] (c1) at (-4.5, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 2\\$c_2$}] (c2) at (-3, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 3\\$c_3$}] (c3) at (-1.5, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 4\\$c_4$}] (c4) at (0, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 5\\$c_5$}] (c5) at (1.5, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 6\\$c_6$}] (c6) at (3, 2) {}; + \node[variablenode, label={[above, align=center] \acs{VN} 7\\$c_7$}] (c7) at (4.5, 2) {}; + + \draw (cn1) -- (c1); + \draw (cn1) -- (c3); + \draw (cn1) -- (c5); + \draw (cn1) -- (c7); + + \draw (cn2) -- (c2); + \draw (cn2) -- (c3); + \draw (cn2) -- (c6); + \draw (cn2) -- (c7); + + \draw (cn3) -- (c4); + \draw (cn3) -- (c5); + \draw (cn3) -- (c6); + \draw (cn3) -- (c7); + \end{tikzpicture} + + \caption{Tanner graph for the (7,4)-Hamming-code} + \label{fig:theo:tanner_graph} +\end{figure}% +% +\noindent \acp{CN} and \acp{VN}, and by extention the elements of $\boldsymbol{H}$, are +indexed with the variables $j$ and $i$. +The sets of all \acp{CN} and all \acp{VN} are denoted by +$\mathcal{J} := \left[ 1:m \right]$ and $\mathcal{I} := \left[ 1:n \right]$, respectively. +The \textit{neighbourhood} of the $j$th \ac{CN}, i.e., the set of all adjacent \acp{VN}, +is denoted by $N_c\left( j \right)$. +The neighbourhood of the $i$th \ac{VN} is denoted by $N_v\left( i \right)$. +For the code depicted in figure \ref{fig:theo:tanner_graph}, for example, +$N_c\left( 1 \right) = \left\{ 1, 3, 5, 7 \right\}$ and +$N_v\left( 3 \right) = \left\{ 1, 2 \right\}$. + +Message passing algorithms are based on the notion of passing messages between +\acp{CN} and \acp{VN}. +\Ac{BP} is one such algorithm that is commonly used to decode \ac{LDPC} codes. +It is based on the observation that each \ac{CN} defines a single +parity-check code and each \ac{VN} defines a repetition code. +The messages transmitted between the nodes correspond to the \acp{LLR}:% +% +\begin{align*} + L_{i\to j} = \ldots +.\end{align*} +% +A number of iterations are performed, passing messages between \acp{CN} and \acp{VN} +in alternating fashion. +The bits at each \ac{VN} are then decoded based on the final values. + +\ac{BP} can be shown to be equivalent to \ac{ML} decoding when the Tanner graph +is a tree, but is sub-optimal when the graph contains cycles. +This leads to generally worse performance than \ac{ML} decoding across all \acp{SNR}. +Additionally, an \textit{error floor} appears for very high \acp{SNR}, making +the use of \ac{BP} impractical for applications where a very low \ac{BER} is +desired. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -167,7 +302,7 @@ is made by a demodulator. TODO: \begin{itemize} \item Intro - \item Proximal Decoding + \item Proximal gradient method \end{itemize} \vspace{5mm}