First round of corrections

This commit is contained in:
Andreas Tsouchlos 2023-03-07 19:48:28 +01:00
parent d36cd83cf6
commit af45d0ce19
3 changed files with 92 additions and 63 deletions

View File

@ -31,6 +31,15 @@
long = binary phase-shift keying
}
%
% C
%
\DeclareAcronym{CN}{
short = CN,
long = check node
}
%
% F
%
@ -87,3 +96,13 @@
short = PDF,
long = probability density function
}
%
% V
%
\DeclareAcronym{VN}{
short = VN,
long = variable node
}

View File

@ -26,30 +26,32 @@ Generally, the original decoding problem considered is either the \ac{MAP} or
the \ac{ML} decoding problem:%
%
\begin{align*}
\hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
f_{\boldsymbol{C} \mid \boldsymbol{Y}} \left( \boldsymbol{c} \mid \boldsymbol{y} \right)\\
\hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right)
\hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
P \left(\boldsymbol{C} = \boldsymbol{c} \mid \boldsymbol{Y} = \boldsymbol{y}
\right)\\
\hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c}
\right)
.\end{align*}%
%
The goal is to arrive at a formulation, where a certain objective function
$f$ must be minimized under certain constraints:%
$g \left( \cdot \right) $ must be minimized under certain constraints:%
%
\begin{align*}
\text{minimize}\hspace{2mm} &f\left( \boldsymbol{c} \right)\\
\text{subject to}\hspace{2mm} &\boldsymbol{c} \in D
\text{minimize}\hspace{2mm} &g\left( \tilde{\boldsymbol{c}} \right)\\
\text{subject to}\hspace{2mm} &\tilde{\boldsymbol{c}} \in D
,\end{align*}%
%
where $D$ is the domain of values attainable for $\boldsymbol{c}$ and represents the
constraints.
where $D \subseteq \mathbb{R}^n$ is the domain of values attainable for $\tilde{\boldsymbol{c}}$
and represents the constraints.
In contrast to the established message-passing decoding algorithms,
the viewpoint then changes from observing the decoding process in its
tanner graph representation (as shown in figure \ref{fig:dec:tanner})
the prespective then changes from observing the decoding process in its
Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner})
to a spatial representation (figure \ref{fig:dec:spatial}),
where the codewords are some of the edges of a hypercube.
The goal is to find the point $\boldsymbol{c}$,
which minimizes the objective function $f$.
The goal is to find the point $\tilde{\boldsymbol{c}}$,
which minimizes the objective function $g\left( \cdot \right) $.
%
% Figure showing decoding space
@ -70,15 +72,18 @@ which minimizes the objective function $f$.
\begin{tikzpicture}[scale=1, transform shape]
\node[checknode,
label={[below, label distance=-0.4cm, align=center]
$c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
(c) at (0, 0) {};
\node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
\node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
\node[variablenode, label={$x_3$}] (x3) at (2, 2) {};
CN\\$\left( c_1 + c_2 + c_3 = 0 \right) $}]
(cn) at (0, 0) {};
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_1 \right)$}]
(c1) at (-2, 2) {};
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_2 \right)$}]
(c2) at (0, 2) {};
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_3 \right)$}]
(c3) at (2, 2) {};
\draw (c) -- (x1);
\draw (c) -- (x2);
\draw (c) -- (x3);
\draw (cn) -- (c1);
\draw (cn) -- (c2);
\draw (cn) -- (c3);
\end{tikzpicture}
\caption{Tanner graph representation of a single parity-check code}
@ -148,7 +153,7 @@ which minimizes the objective function $f$.
\node[color=KITgreen, fill=KITgreen,
draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
\node[color=KITgreen, right=0cm of c] {$\boldsymbol{c}$};
\node[color=KITgreen, right=0cm of c] {$\tilde{\boldsymbol{c}}$};
\end{tikzpicture}
\caption{Spatial representation of a single parity-check code}
@ -157,6 +162,7 @@ which minimizes the objective function $f$.
\caption{Different representations of the decoding problem}
\end{figure}
\todo{Rename $c$ to e.g. $h$ or remove it completely?}
@ -184,17 +190,15 @@ making the \ac{ML} and \ac{MAP} decoding problems equivalent.}%
\label{eq:lp:ml}
.\end{align}%
%
Assuming a memoryless channel, \ref{eq:lp:ml} can be rewritten in terms
Assuming a memoryless channel, equation (\ref{eq:lp:ml}) can be rewritten in terms
of the \acp{LLR} $\gamma_i$ \cite[Sec 2.5]{feldman_thesis}:%
%
\begin{align*}
\hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
\sum_{i=1}^{n} \gamma_i y_i,%
\sum_{i=1}^{n} \gamma_i c_i,%
\hspace{5mm} \gamma_i = \ln\left(
\frac{f_{\boldsymbol{Y} | \boldsymbol{C}}
\left( Y_i = y_i \mid C_i = 0 \right) }
{f_{\boldsymbol{Y} | \boldsymbol{C}}
\left( Y_i = y_i | C_i = 1 \right) } \right)
\frac{f_{Y_i | C_i} \left( y_i \mid C_i = 0 \right) }
{f_{Y_i | C_i} \left( y_i \mid C_i = 1 \right) } \right)
.\end{align*}
%
The authors propose the following cost function%
@ -203,7 +207,7 @@ have the same meaning.}
for the \ac{LP} decoding problem:%
%
\begin{align*}
\sum_{i=1}^{n} \gamma_i c_i
g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i
.\end{align*}
%
With this cost function, the exact integer linear program formulation of \ac{ML}
@ -227,13 +231,13 @@ decoding, redefining the constraints in terms of the \text{codeword polytope}
%
\begin{align*}
\text{poly}\left( \mathcal{C} \right) = \left\{
\sum_{c \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
\text{ : } \lambda_{\boldsymbol{c}} \ge 0,
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\}
,\end{align*} %
%
which represents the \textit{convex hull} of all possible codewords,
i.e. the convex set of linear combinations of all codewords.
i.e., the convex set of linear combinations of all codewords.
However, since the number of constraints needed to characterize the codeword
polytope is exponential in the code length, this formulation is relaxed further.
By observing that each check node defines its own local single parity-check
@ -244,12 +248,14 @@ This consideration leads to constraints, that can be described as follows
\cite[Sec. II, A]{efficient_lp_dec_admm}:%
%
\begin{align*}
\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j}
\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
\hspace{5mm}\forall j\in \mathcal{J}
,\end{align*}%
\todo{Explicitly state that the first relaxation is essentially just lifing the integer
requirement}%
where $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
neighboring variable nodes
of check node $j$%
of check node $j$
\footnote{For example, if the $j$th row of the parity-check matrix
$\boldsymbol{H}$ was $\boldsymbol{h}_j =
\begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
@ -259,17 +265,17 @@ the transfer matrix would be $\boldsymbol{T}_j =
0 & 0 & 0 & 1 & 0 & 0 & 0 \\
0 & 0 & 0 & 0 & 0 & 1 & 0 \\
\end{bmatrix} $ (example taken from \cite[Sec. II, A]{efficient_lp_dec_admm}).}
(i.e. the relevant components of $\boldsymbol{c}$ for parity-check $j$)
and $\mathcal{P}_{d}$ is the \textit{check polytope}, the convex hull of all
binary vectors of length $d$ with even parity%
(i.e., the relevant components of $\boldsymbol{c}$ for parity-check $j$)
and $\mathcal{P}_{d_j}$ is the \textit{check polytope}, the convex hull of all
binary vectors of length $d_j$ with even parity%
\footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
parity-check $j$, but extended to the continuous domain.}%
.
In figure \ref{fig:dec:poly}, the two relaxations are compared for an
example code.
examplary code.
Figure \ref{fig:dec:poly:exact} shows the codeword polytope
$\text{poly}\left( \mathcal{C} \right) $, i.e. the constraints for the
$\text{poly}\left( \mathcal{C} \right) $, i.e., the constraints for the
equivalent linear program to exact \ac{ML} decoding - only valid codewords are
feasible solutions.
Figure \ref{fig:dec:poly:local} shows the local codeword polytope of each check
@ -577,16 +583,16 @@ figure \ref{fig:dec:poly:relaxed}.%
It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
vertices with fractional values;
these represent erroneous non-codeword solutions to the linear program and
correspond to the so-called \textit{pseudocodewords} introduced in
correspond to the so-called \textit{pseudo-codewords} introduced in
\cite{feldman_paper}.
However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
exponentially, it is a lot more tractable for practical applications.
The resulting formulation of the relaxed optimization problem is the following:%
The resulting formulation of the relaxed optimization problem becomes:%
%
\begin{align*}
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
\text{subject to }\hspace{2mm} &\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j},
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\
\text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j},
\hspace{5mm}j\in\mathcal{J}
.\end{align*}%
@ -633,28 +639,28 @@ determined by the channel model.
The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also
known as the equal probability assumption is made on
$\mathcal{C}\left( \boldsymbol{H} \right)$.
However, because the considered domain is continuous,
However, since the considered domain is continuous,
the prior \ac{PDF} cannot be ignored as a constant during the minimization
as is often done, and has a rather unwieldy representation:%
%
\begin{align}
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
\frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right) \right| }
\sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) }
\frac{1}{\left| \mathcal{C} \right| }
\sum_{\boldsymbol{c} \in \mathcal{C} }
\delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
\label{eq:prox:prior_pdf}
.\end{align}%
%
In order to rewrite the prior \ac{PDF}
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
the so-called \textit{code-constraint polynomial} is introduced:%
the so-called \textit{code-constraint polynomial} is introduced as:%
%
\begin{align*}
h\left( \boldsymbol{x} \right) =
\underbrace{\sum_{j=1}^{n} \left( x_j^2-1 \right) ^2}_{\text{Bipolar constraint}}
+ \underbrace{\sum_{i=1}^{m} \left[
\left( \prod_{j\in \mathcal{A}
\left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity Constraint}}%
\left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity constraint}}%
.\end{align*}%
%
The intention of this function is to provide a way to penalize vectors far
@ -662,20 +668,20 @@ from a codeword and favor those close to one.
In order to achieve this, the polynomial is composed of two parts: one term
representing the bipolar constraint, providing for a discrete solution of the
continuous optimization problem, and one term representing the parity
constraint, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
The prior \ac{PDF} is then approximated using the code-constraint polynomial:%
constraints, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
The prior \ac{PDF} is then approximated using the code-constraint polynomial as:%
%
\begin{align}
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)
\approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }%
\approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) }%
\label{eq:prox:prior_pdf_approx}
.\end{align}%
%
The authors justify this approximation by arguing that for
The authors justify this approximation by arguing, that for
$\gamma \rightarrow \infty$, the approximation in equation
\ref{eq:prox:prior_pdf_approx} approaches the original function in equation
\ref{eq:prox:prior_pdf}.
This approximation can then be plugged into equation \ref{eq:prox:vanilla_MAP}
(\ref{eq:prox:prior_pdf_approx}) approaches the original function in equation
(\ref{eq:prox:prior_pdf}).
This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP})
and the likelihood can be rewritten using the negative log-likelihood
$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left(
@ -683,8 +689,8 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
%
\begin{align*}
\hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
e^{-\gamma h\left( \boldsymbol{x} \right) } \\
\mathrm{e}^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) } \\
&= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
+ \gamma h\left( \boldsymbol{x} \right)
@ -692,10 +698,10 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
.\end{align*}%
%
Thus, with proximal decoding, the objective function
$f\left( \boldsymbol{x} \right)$ considered is%
$g\left( \boldsymbol{x} \right)$ considered is%
%
\begin{align}
f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
g\left( \boldsymbol{x} \right) = L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
+ \gamma h\left( \boldsymbol{x} \right)%
\label{eq:prox:objective_function}
\end{align}%
@ -703,14 +709,14 @@ $f\left( \boldsymbol{x} \right)$ considered is%
and the decoding problem is reformulated to%
%
\begin{align*}
\text{minimize}\hspace{2mm} &L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
\text{minimize}\hspace{2mm} &L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
+ \gamma h\left( \boldsymbol{x} \right)\\
\text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n
.\end{align*}
%
For the solution of the approximate \ac{MAP} decoding problem, the two parts
of \ref{eq:prox:objective_function} are considered separately:
of equation (\ref{eq:prox:objective_function}) are considered separately:
the minimization of the objective function occurs in an alternating
fashion, switching between the negative log-likelihood
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
@ -737,8 +743,8 @@ It is then immediately approximated with gradient-descent:%
\argmin_{\boldsymbol{t} \in \mathbb{R}^n}
\left( \gamma h\left( \boldsymbol{x} \right) +
\frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
&\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
\hspace{5mm} \gamma \text{ small}
&\approx \boldsymbol{r} - \gamma \nabla h \left( \boldsymbol{r} \right),
\hspace{5mm} \gamma > 0, \text{ small}
.\end{align*}%
%
The second step thus becomes%
@ -775,7 +781,9 @@ is%
%
\begin{align*}
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
= \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
= \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x}
\rVert^2 }
{2\sigma^2}}
.\end{align*}
%
Thus, the gradient of the negative log-likelihood becomes%

View File

@ -73,6 +73,8 @@ Lastly, the optimization methods utilized are described.
\label{fig:notation}
\end{figure}
\todo{Note about $\tilde{\boldsymbol{c}}$ (and maybe $\tilde{\boldsymbol{x}}$?)}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Channel Coding with LDPC Codes}