\chapter{Theoretical Background}% \label{chapter:theoretical_background} In this chapter, the theoretical background necessary to understand this work is given. First, the used notation is clarified. The physical aspects are detailed - the used modulation scheme and channel model. A short introduction of channel coding with binary linear codes and especially \ac{LDPC} codes is given. The established methods of decoding LPDC codes are briefly explained. Lastly, the optimization methods utilized are described. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Notation} \label{sec:theo:Notation} % % TODOs % \begin{itemize} \item General remarks on notation (matrices, \ldots) \item Probabilistic quantities (random variables, \acp{PDF}, pdfs vs pmfs vs cdfs, \ldots) \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Preliminaries: Channel Model and Modulation} \label{sec:theo:Preliminaries: Channel Model and Modulation} % % TODOs % \begin{itemize} \item \Ac{AWGN} \item \Ac{BPSK} \end{itemize} % % Figure showing notation for entire coding / decoding process % \tikzstyle{box} = [rectangle, minimum width=1.5cm, minimum height=0.7cm, rounded corners=0.1cm, text centered, draw=black, fill=KITgreen!80] \begin{figure}[htpb] \centering \begin{tikzpicture}[scale=1, transform shape] \node (in) {$\boldsymbol{c}$}; \node[box, right=0.5cm of in] (bpskmap) {Mapper}; \node[right=1.5cm of bpskmap, draw, circle, inner sep=0pt, minimum size=0.5cm] (add) {$+$}; \node[below=0.5cm of add] (noise) {$\boldsymbol{z}$}; \node[box, right=1.5cm of add] (decoder) {Decoder}; \node[box, right=1.5cm of decoder] (demapper) {Demapper}; \node[right=0.5cm of demapper] (out) {$\boldsymbol{\hat{c}}$}; \node at ($(bpskmap.east)!0.5!(add.west) + (0,0.3cm)$) {$\boldsymbol{x}$}; \node at ($(add.east)!0.5!(decoder.west) + (0,0.3cm)$) {$\boldsymbol{y}$}; \node at ($(decoder.east)!0.5!(demapper.west) + (0,0.3cm)$) {$\boldsymbol{\hat{x}}$}; \draw[->] (in) -- (bpskmap); \draw[->] (bpskmap) -- (add); \draw[->] (add) -- (decoder); \draw[->] (noise) -- (add); \draw[->] (decoder) -- (demapper); \draw[->] (demapper) -- (out); \end{tikzpicture} \caption{Overview of notation} \label{fig:notation} \end{figure} \todo{Note about $\tilde{\boldsymbol{c}}$ (and maybe $\tilde{\boldsymbol{x}}$?)} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Channel Coding with LDPC Codes} \label{sec:theo:Channel Coding with LDPC Codes} \begin{itemize} \item Introduction \item Binary linear codes \item \Ac{LDPC} codes (especially $i$, $j$, parity check matrix $\boldsymbol{H}$, $N\left( j \right) $ \& $N\left( i \right) $, etc.) \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Decoding LDPC Codes using Belief Propagation} \label{sec:theo:Decoding LDPC Codes using Belief Propagation} \begin{itemize} \item Introduction to message passing \item Overview of \ac{BP} algorithm \end{itemize} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Optimization Methods} \label{sec:theo:Optimization Methods} TODO: \begin{itemize} \item Intro \item Proximal Decoding \end{itemize} \vspace{5mm} Generally, any linear program can be expressed in \textit{standard form}% \footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be interpreted componentwise.} \cite[Sec. 1.1]{intro_to_lin_opt_book}:% % \begin{alignat}{3} \begin{alignedat}{3} \text{minimize }\hspace{2mm} && \boldsymbol{\gamma}^\text{T} \boldsymbol{x} \\ \text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x} & = \boldsymbol{b} \\ && \boldsymbol{x} & \ge \boldsymbol{0}. \end{alignedat} \label{eq:theo:admm_standard} \end{alignat}% % A technique called \textit{lagrangian relaxation} \cite[Sec. 11.4]{intro_to_lin_opt_book} can then be applied. First, some of the constraints are moved into the objective function itself and the weights $\boldsymbol{\lambda}$ are introduced. A new, relaxed problem is formulated: % \begin{align} \begin{aligned} \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{x} + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b} - \boldsymbol{A}\boldsymbol{x} \right) \\ \text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0}, \end{aligned} \label{eq:theo:admm_relaxed} \end{align}% % the new objective function being the \textit{lagrangian}% % \begin{align*} \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) = \boldsymbol{\gamma}^\text{T}\boldsymbol{x} + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b} - \boldsymbol{A}\boldsymbol{x} \right) .\end{align*}% % This problem is not directly equivalent to the original one, as the solution now depends on the choice of the \textit{lagrange multipliers} $\boldsymbol{\lambda}$. Interestingly, however, for this particular class of problems, the minimum of the objective function (herafter called \textit{optimal objective}) of the relaxed problem (\ref{eq:theo:admm_relaxed}) is a lower bound for the optimal objective of the original problem (\ref{eq:theo:admm_standard}) \cite[Sec. 4.1]{intro_to_lin_opt_book}:% % \begin{align*} \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \phantom{a}}} \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \le \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x} = \boldsymbol{b}}} \boldsymbol{\gamma}^\text{T}\boldsymbol{x} .\end{align*} % Furthermore, for uniquely solvable linear programs \textit{strong duality} always holds \cite[Theorem 4.4]{intro_to_lin_opt_book}. This means that not only is it a lower bound, the tightest lower bound actually reaches the value itself: In other words, with the optimal choice of $\boldsymbol{\lambda}$, the optimal objectives of the problems (\ref{eq:theo:admm_relaxed}) and (\ref{eq:theo:admm_standard}) have the same value. % \begin{align*} \max_{\boldsymbol{\lambda}} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) = \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x} = \boldsymbol{b}}} \boldsymbol{\gamma}^\text{T}\boldsymbol{x} .\end{align*} % Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:% % \begin{align} \text{maximize }\hspace{2mm} & \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L} \left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \label{eq:theo:dual} ,\end{align} % and recover the solution $\boldsymbol{x}_{\text{opt}}$ to problem (\ref{eq:theo:admm_standard}) from the solution $\boldsymbol{\lambda}_\text{opt}$ to problem (\ref{eq:theo:dual}) by computing \cite[Sec. 2.1]{admm_distr_stats}% % \begin{align} \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x}} \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda}_{\text{opt}} \right) \label{eq:theo:admm_obtain_primal} .\end{align} % The dual problem can then be solved iteratively using \textit{dual ascent}: starting with an initial estimate of $\boldsymbol{\lambda}$, calculate an estimate for $\boldsymbol{x}$ using equation (\ref{eq:theo:admm_obtain_primal}); then, update $\boldsymbol{\lambda}$ using gradient descent \cite[Sec. 2.1]{admm_distr_stats}:% % \begin{align*} \boldsymbol{x} &\leftarrow \argmin_{\boldsymbol{x}} \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{\lambda} \right) \\ \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} + \alpha\left( \boldsymbol{A}\boldsymbol{x} - \boldsymbol{b} \right), \hspace{5mm} \alpha > 0 .\end{align*} % The algorithm can be improved by observing that when hen the objective function is separable in $\boldsymbol{x}$, the lagrangian is as well: % \begin{align*} \text{minimize }\hspace{5mm} & \sum_{i=1}^{N} g_i\left( \boldsymbol{x}_i \right) \\ \text{subject to}\hspace{5mm} & \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i = \boldsymbol{b} \end{align*} \begin{align*} \mathcal{L}\left( \boldsymbol{x}_{[1:N]}, \boldsymbol{\lambda} \right) = \sum_{i=1}^{N} g_i\left( \boldsymbol{x}_i \right) + \boldsymbol{\lambda}^\text{T} \left( \boldsymbol{b} - \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x_i} \right) .\end{align*}% % The minimization of each term can then happen in parallel, in a distributed fasion \cite[Sec. 2.2]{admm_distr_stats}. This modified version of dual ascent is called \textit{dual decomposition}: % \begin{align*} \boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i}\mathcal{L}\left( \boldsymbol{x}_{[1:N]}, \boldsymbol{\lambda}\right) \hspace{5mm} \forall i \in [1:N]\\ \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} + \alpha\left( \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i - \boldsymbol{b} \right), \hspace{5mm} \alpha > 0 .\end{align*} % The \ac{ADMM} works the same way as dual decomposition. It only differs in the use of an \textit{augmented lagrangian} $\mathcal{L}_\mu\left( \boldsymbol{x}_{[1:N]} \boldsymbol{\lambda} \right)$ in order to robustify the convergence properties. The augmented lagrangian extends the ordinary one with an additional penalty term with the penaly parameter $\mu$: % \begin{align*} \mathcal{L}_\mu \left( \boldsymbol{x}_{[1:N]}, \boldsymbol{\lambda} \right) = \underbrace{\sum_{i=1}^{N} g_i\left( \boldsymbol{x_i} \right) + \boldsymbol{\lambda}^\text{T}\left( \boldsymbol{b} - \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i \right)}_{\text{Ordinary lagrangian}} + \underbrace{\frac{\mu}{2}\lVert \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i - \boldsymbol{b} \rVert_2^2}_{\text{Penalty term}}, \hspace{5mm} \mu > 0 .\end{align*} % The steps to solve the problem are the same as with dual decomposition, with the added condition that the step size be $\mu$:% % \begin{align*} \boldsymbol{x}_i &\leftarrow \argmin_{\boldsymbol{x}_i}\mathcal{L}_\mu\left( \boldsymbol{x}_{[1:N]}, \boldsymbol{\lambda}\right) \hspace{5mm} \forall i \in [1:N]\\ \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} + \mu\left( \sum_{i=1}^{N} \boldsymbol{A}_i\boldsymbol{x}_i - \boldsymbol{b} \right), \hspace{5mm} \mu > 0 % \boldsymbol{x}_1 &\leftarrow \argmin_{\boldsymbol{x}_1}\mathcal{L}_\mu\left( % \boldsymbol{x}_1, \boldsymbol{x_2}, \boldsymbol{\lambda}\right) \\ % \boldsymbol{x}_2 &\leftarrow \argmin_{\boldsymbol{x}_2}\mathcal{L}_\mu\left( % \boldsymbol{x}_1, \boldsymbol{x_2}, \boldsymbol{\lambda}\right) \\ % \boldsymbol{\lambda} &\leftarrow \boldsymbol{\lambda} % + \mu\left( \boldsymbol{A}_1\boldsymbol{x}_1 + \boldsymbol{A}_2\boldsymbol{x}_2 % - \boldsymbol{b} \right), % \hspace{5mm} \mu > 0 .\end{align*} %