First round of corrections
This commit is contained in:
parent
d36cd83cf6
commit
af45d0ce19
@ -31,6 +31,15 @@
|
|||||||
long = binary phase-shift keying
|
long = binary phase-shift keying
|
||||||
}
|
}
|
||||||
|
|
||||||
|
%
|
||||||
|
% C
|
||||||
|
%
|
||||||
|
|
||||||
|
\DeclareAcronym{CN}{
|
||||||
|
short = CN,
|
||||||
|
long = check node
|
||||||
|
}
|
||||||
|
|
||||||
%
|
%
|
||||||
% F
|
% F
|
||||||
%
|
%
|
||||||
@ -87,3 +96,13 @@
|
|||||||
short = PDF,
|
short = PDF,
|
||||||
long = probability density function
|
long = probability density function
|
||||||
}
|
}
|
||||||
|
|
||||||
|
%
|
||||||
|
% V
|
||||||
|
%
|
||||||
|
|
||||||
|
\DeclareAcronym{VN}{
|
||||||
|
short = VN,
|
||||||
|
long = variable node
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -26,30 +26,32 @@ Generally, the original decoding problem considered is either the \ac{MAP} or
|
|||||||
the \ac{ML} decoding problem:%
|
the \ac{ML} decoding problem:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{c \in \mathcal{C}}
|
\hat{\boldsymbol{c}}_{\text{\ac{MAP}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
|
||||||
f_{\boldsymbol{C} \mid \boldsymbol{Y}} \left( \boldsymbol{c} \mid \boldsymbol{y} \right)\\
|
P \left(\boldsymbol{C} = \boldsymbol{c} \mid \boldsymbol{Y} = \boldsymbol{y}
|
||||||
\hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{c \in \mathcal{C}}
|
\right)\\
|
||||||
f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c} \right)
|
\hat{\boldsymbol{c}}_{\text{\ac{ML}}} &= \argmax_{\boldsymbol{c} \in \mathcal{C}}
|
||||||
|
f_{\boldsymbol{Y} \mid \boldsymbol{C}} \left( \boldsymbol{y} \mid \boldsymbol{c}
|
||||||
|
\right)
|
||||||
.\end{align*}%
|
.\end{align*}%
|
||||||
%
|
%
|
||||||
The goal is to arrive at a formulation, where a certain objective function
|
The goal is to arrive at a formulation, where a certain objective function
|
||||||
$f$ must be minimized under certain constraints:%
|
$g \left( \cdot \right) $ must be minimized under certain constraints:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\text{minimize}\hspace{2mm} &f\left( \boldsymbol{c} \right)\\
|
\text{minimize}\hspace{2mm} &g\left( \tilde{\boldsymbol{c}} \right)\\
|
||||||
\text{subject to}\hspace{2mm} &\boldsymbol{c} \in D
|
\text{subject to}\hspace{2mm} &\tilde{\boldsymbol{c}} \in D
|
||||||
,\end{align*}%
|
,\end{align*}%
|
||||||
%
|
%
|
||||||
where $D$ is the domain of values attainable for $\boldsymbol{c}$ and represents the
|
where $D \subseteq \mathbb{R}^n$ is the domain of values attainable for $\tilde{\boldsymbol{c}}$
|
||||||
constraints.
|
and represents the constraints.
|
||||||
|
|
||||||
In contrast to the established message-passing decoding algorithms,
|
In contrast to the established message-passing decoding algorithms,
|
||||||
the viewpoint then changes from observing the decoding process in its
|
the prespective then changes from observing the decoding process in its
|
||||||
tanner graph representation (as shown in figure \ref{fig:dec:tanner})
|
Tanner graph representation with \acp{VN} and \acp{CN} (as shown in figure \ref{fig:dec:tanner})
|
||||||
to a spatial representation (figure \ref{fig:dec:spatial}),
|
to a spatial representation (figure \ref{fig:dec:spatial}),
|
||||||
where the codewords are some of the edges of a hypercube.
|
where the codewords are some of the edges of a hypercube.
|
||||||
The goal is to find the point $\boldsymbol{c}$,
|
The goal is to find the point $\tilde{\boldsymbol{c}}$,
|
||||||
which minimizes the objective function $f$.
|
which minimizes the objective function $g\left( \cdot \right) $.
|
||||||
|
|
||||||
%
|
%
|
||||||
% Figure showing decoding space
|
% Figure showing decoding space
|
||||||
@ -70,15 +72,18 @@ which minimizes the objective function $f$.
|
|||||||
\begin{tikzpicture}[scale=1, transform shape]
|
\begin{tikzpicture}[scale=1, transform shape]
|
||||||
\node[checknode,
|
\node[checknode,
|
||||||
label={[below, label distance=-0.4cm, align=center]
|
label={[below, label distance=-0.4cm, align=center]
|
||||||
$c$\\$\left( x_1 + x_2 + x_3 = 0 \right) $}]
|
CN\\$\left( c_1 + c_2 + c_3 = 0 \right) $}]
|
||||||
(c) at (0, 0) {};
|
(cn) at (0, 0) {};
|
||||||
\node[variablenode, label={$x_1$}] (x1) at (-2, 2) {};
|
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_1 \right)$}]
|
||||||
\node[variablenode, label={$x_2$}] (x2) at (0, 2) {};
|
(c1) at (-2, 2) {};
|
||||||
\node[variablenode, label={$x_3$}] (x3) at (2, 2) {};
|
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_2 \right)$}]
|
||||||
|
(c2) at (0, 2) {};
|
||||||
|
\node[variablenode, label={[above, align=center] \acs{VN}\\$\left( c_3 \right)$}]
|
||||||
|
(c3) at (2, 2) {};
|
||||||
|
|
||||||
\draw (c) -- (x1);
|
\draw (cn) -- (c1);
|
||||||
\draw (c) -- (x2);
|
\draw (cn) -- (c2);
|
||||||
\draw (c) -- (x3);
|
\draw (cn) -- (c3);
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|
||||||
\caption{Tanner graph representation of a single parity-check code}
|
\caption{Tanner graph representation of a single parity-check code}
|
||||||
@ -148,7 +153,7 @@ which minimizes the objective function $f$.
|
|||||||
|
|
||||||
\node[color=KITgreen, fill=KITgreen,
|
\node[color=KITgreen, fill=KITgreen,
|
||||||
draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
|
draw, circle, inner sep=0pt, minimum size=4pt] (c) at (0.9, 0.7, 1) {};
|
||||||
\node[color=KITgreen, right=0cm of c] {$\boldsymbol{c}$};
|
\node[color=KITgreen, right=0cm of c] {$\tilde{\boldsymbol{c}}$};
|
||||||
\end{tikzpicture}
|
\end{tikzpicture}
|
||||||
|
|
||||||
\caption{Spatial representation of a single parity-check code}
|
\caption{Spatial representation of a single parity-check code}
|
||||||
@ -157,6 +162,7 @@ which minimizes the objective function $f$.
|
|||||||
|
|
||||||
\caption{Different representations of the decoding problem}
|
\caption{Different representations of the decoding problem}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
\todo{Rename $c$ to e.g. $h$ or remove it completely?}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -184,17 +190,15 @@ making the \ac{ML} and \ac{MAP} decoding problems equivalent.}%
|
|||||||
\label{eq:lp:ml}
|
\label{eq:lp:ml}
|
||||||
.\end{align}%
|
.\end{align}%
|
||||||
%
|
%
|
||||||
Assuming a memoryless channel, \ref{eq:lp:ml} can be rewritten in terms
|
Assuming a memoryless channel, equation (\ref{eq:lp:ml}) can be rewritten in terms
|
||||||
of the \acp{LLR} $\gamma_i$ \cite[Sec 2.5]{feldman_thesis}:%
|
of the \acp{LLR} $\gamma_i$ \cite[Sec 2.5]{feldman_thesis}:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
|
\hat{\boldsymbol{c}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
|
||||||
\sum_{i=1}^{n} \gamma_i y_i,%
|
\sum_{i=1}^{n} \gamma_i c_i,%
|
||||||
\hspace{5mm} \gamma_i = \ln\left(
|
\hspace{5mm} \gamma_i = \ln\left(
|
||||||
\frac{f_{\boldsymbol{Y} | \boldsymbol{C}}
|
\frac{f_{Y_i | C_i} \left( y_i \mid C_i = 0 \right) }
|
||||||
\left( Y_i = y_i \mid C_i = 0 \right) }
|
{f_{Y_i | C_i} \left( y_i \mid C_i = 1 \right) } \right)
|
||||||
{f_{\boldsymbol{Y} | \boldsymbol{C}}
|
|
||||||
\left( Y_i = y_i | C_i = 1 \right) } \right)
|
|
||||||
.\end{align*}
|
.\end{align*}
|
||||||
%
|
%
|
||||||
The authors propose the following cost function%
|
The authors propose the following cost function%
|
||||||
@ -203,7 +207,7 @@ have the same meaning.}
|
|||||||
for the \ac{LP} decoding problem:%
|
for the \ac{LP} decoding problem:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\sum_{i=1}^{n} \gamma_i c_i
|
g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i
|
||||||
.\end{align*}
|
.\end{align*}
|
||||||
%
|
%
|
||||||
With this cost function, the exact integer linear program formulation of \ac{ML}
|
With this cost function, the exact integer linear program formulation of \ac{ML}
|
||||||
@ -227,13 +231,13 @@ decoding, redefining the constraints in terms of the \text{codeword polytope}
|
|||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\text{poly}\left( \mathcal{C} \right) = \left\{
|
\text{poly}\left( \mathcal{C} \right) = \left\{
|
||||||
\sum_{c \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
|
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
|
||||||
\text{ : } \lambda_{\boldsymbol{c}} \ge 0,
|
\text{ : } \lambda_{\boldsymbol{c}} \ge 0,
|
||||||
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\}
|
\sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\}
|
||||||
,\end{align*} %
|
,\end{align*} %
|
||||||
%
|
%
|
||||||
which represents the \textit{convex hull} of all possible codewords,
|
which represents the \textit{convex hull} of all possible codewords,
|
||||||
i.e. the convex set of linear combinations of all codewords.
|
i.e., the convex set of linear combinations of all codewords.
|
||||||
However, since the number of constraints needed to characterize the codeword
|
However, since the number of constraints needed to characterize the codeword
|
||||||
polytope is exponential in the code length, this formulation is relaxed further.
|
polytope is exponential in the code length, this formulation is relaxed further.
|
||||||
By observing that each check node defines its own local single parity-check
|
By observing that each check node defines its own local single parity-check
|
||||||
@ -244,12 +248,14 @@ This consideration leads to constraints, that can be described as follows
|
|||||||
\cite[Sec. II, A]{efficient_lp_dec_admm}:%
|
\cite[Sec. II, A]{efficient_lp_dec_admm}:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j}
|
\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
|
||||||
\hspace{5mm}\forall j\in \mathcal{J}
|
\hspace{5mm}\forall j\in \mathcal{J}
|
||||||
,\end{align*}%
|
,\end{align*}%
|
||||||
|
\todo{Explicitly state that the first relaxation is essentially just lifing the integer
|
||||||
|
requirement}%
|
||||||
where $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
|
where $\boldsymbol{T}_j$ is the \textit{transfer matrix}, which selects the
|
||||||
neighboring variable nodes
|
neighboring variable nodes
|
||||||
of check node $j$%
|
of check node $j$
|
||||||
\footnote{For example, if the $j$th row of the parity-check matrix
|
\footnote{For example, if the $j$th row of the parity-check matrix
|
||||||
$\boldsymbol{H}$ was $\boldsymbol{h}_j =
|
$\boldsymbol{H}$ was $\boldsymbol{h}_j =
|
||||||
\begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
|
\begin{bmatrix} 0 & 1 & 0 & 1 & 0 & 1 & 0 \end{bmatrix}$,
|
||||||
@ -259,17 +265,17 @@ the transfer matrix would be $\boldsymbol{T}_j =
|
|||||||
0 & 0 & 0 & 1 & 0 & 0 & 0 \\
|
0 & 0 & 0 & 1 & 0 & 0 & 0 \\
|
||||||
0 & 0 & 0 & 0 & 0 & 1 & 0 \\
|
0 & 0 & 0 & 0 & 0 & 1 & 0 \\
|
||||||
\end{bmatrix} $ (example taken from \cite[Sec. II, A]{efficient_lp_dec_admm}).}
|
\end{bmatrix} $ (example taken from \cite[Sec. II, A]{efficient_lp_dec_admm}).}
|
||||||
(i.e. the relevant components of $\boldsymbol{c}$ for parity-check $j$)
|
(i.e., the relevant components of $\boldsymbol{c}$ for parity-check $j$)
|
||||||
and $\mathcal{P}_{d}$ is the \textit{check polytope}, the convex hull of all
|
and $\mathcal{P}_{d_j}$ is the \textit{check polytope}, the convex hull of all
|
||||||
binary vectors of length $d$ with even parity%
|
binary vectors of length $d_j$ with even parity%
|
||||||
\footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
|
\footnote{Essentially $\mathcal{P}_{d_j}$ is the set of vectors that satisfy
|
||||||
parity-check $j$, but extended to the continuous domain.}%
|
parity-check $j$, but extended to the continuous domain.}%
|
||||||
.
|
.
|
||||||
|
|
||||||
In figure \ref{fig:dec:poly}, the two relaxations are compared for an
|
In figure \ref{fig:dec:poly}, the two relaxations are compared for an
|
||||||
example code.
|
examplary code.
|
||||||
Figure \ref{fig:dec:poly:exact} shows the codeword polytope
|
Figure \ref{fig:dec:poly:exact} shows the codeword polytope
|
||||||
$\text{poly}\left( \mathcal{C} \right) $, i.e. the constraints for the
|
$\text{poly}\left( \mathcal{C} \right) $, i.e., the constraints for the
|
||||||
equivalent linear program to exact \ac{ML} decoding - only valid codewords are
|
equivalent linear program to exact \ac{ML} decoding - only valid codewords are
|
||||||
feasible solutions.
|
feasible solutions.
|
||||||
Figure \ref{fig:dec:poly:local} shows the local codeword polytope of each check
|
Figure \ref{fig:dec:poly:local} shows the local codeword polytope of each check
|
||||||
@ -577,16 +583,16 @@ figure \ref{fig:dec:poly:relaxed}.%
|
|||||||
It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
|
It can be seen that the relaxed codeword polytope $\overline{Q}$ introduces
|
||||||
vertices with fractional values;
|
vertices with fractional values;
|
||||||
these represent erroneous non-codeword solutions to the linear program and
|
these represent erroneous non-codeword solutions to the linear program and
|
||||||
correspond to the so-called \textit{pseudocodewords} introduced in
|
correspond to the so-called \textit{pseudo-codewords} introduced in
|
||||||
\cite{feldman_paper}.
|
\cite{feldman_paper}.
|
||||||
However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
|
However, since for \ac{LDPC} codes $\overline{Q}$ scales linearly with $n$ instead of
|
||||||
exponentially, it is a lot more tractable for practical applications.
|
exponentially, it is a lot more tractable for practical applications.
|
||||||
|
|
||||||
The resulting formulation of the relaxed optimization problem is the following:%
|
The resulting formulation of the relaxed optimization problem becomes:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
|
\text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\
|
||||||
\text{subject to }\hspace{2mm} &\boldsymbol{T}_j \boldsymbol{c} \in \mathcal{P}_{d_j},
|
\text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j},
|
||||||
\hspace{5mm}j\in\mathcal{J}
|
\hspace{5mm}j\in\mathcal{J}
|
||||||
.\end{align*}%
|
.\end{align*}%
|
||||||
|
|
||||||
@ -633,28 +639,28 @@ determined by the channel model.
|
|||||||
The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also
|
The prior \ac{PDF} $f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$ is also
|
||||||
known as the equal probability assumption is made on
|
known as the equal probability assumption is made on
|
||||||
$\mathcal{C}\left( \boldsymbol{H} \right)$.
|
$\mathcal{C}\left( \boldsymbol{H} \right)$.
|
||||||
However, because the considered domain is continuous,
|
However, since the considered domain is continuous,
|
||||||
the prior \ac{PDF} cannot be ignored as a constant during the minimization
|
the prior \ac{PDF} cannot be ignored as a constant during the minimization
|
||||||
as is often done, and has a rather unwieldy representation:%
|
as is often done, and has a rather unwieldy representation:%
|
||||||
%
|
%
|
||||||
\begin{align}
|
\begin{align}
|
||||||
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
|
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) =
|
||||||
\frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right) \right| }
|
\frac{1}{\left| \mathcal{C} \right| }
|
||||||
\sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) }
|
\sum_{\boldsymbol{c} \in \mathcal{C} }
|
||||||
\delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
|
\delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right)
|
||||||
\label{eq:prox:prior_pdf}
|
\label{eq:prox:prior_pdf}
|
||||||
.\end{align}%
|
.\end{align}%
|
||||||
%
|
%
|
||||||
In order to rewrite the prior \ac{PDF}
|
In order to rewrite the prior \ac{PDF}
|
||||||
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
|
$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$,
|
||||||
the so-called \textit{code-constraint polynomial} is introduced:%
|
the so-called \textit{code-constraint polynomial} is introduced as:%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
h\left( \boldsymbol{x} \right) =
|
h\left( \boldsymbol{x} \right) =
|
||||||
\underbrace{\sum_{j=1}^{n} \left( x_j^2-1 \right) ^2}_{\text{Bipolar constraint}}
|
\underbrace{\sum_{j=1}^{n} \left( x_j^2-1 \right) ^2}_{\text{Bipolar constraint}}
|
||||||
+ \underbrace{\sum_{i=1}^{m} \left[
|
+ \underbrace{\sum_{i=1}^{m} \left[
|
||||||
\left( \prod_{j\in \mathcal{A}
|
\left( \prod_{j\in \mathcal{A}
|
||||||
\left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity Constraint}}%
|
\left( i \right) } x_j \right) -1 \right] ^2}_{\text{Parity constraint}}%
|
||||||
.\end{align*}%
|
.\end{align*}%
|
||||||
%
|
%
|
||||||
The intention of this function is to provide a way to penalize vectors far
|
The intention of this function is to provide a way to penalize vectors far
|
||||||
@ -662,20 +668,20 @@ from a codeword and favor those close to one.
|
|||||||
In order to achieve this, the polynomial is composed of two parts: one term
|
In order to achieve this, the polynomial is composed of two parts: one term
|
||||||
representing the bipolar constraint, providing for a discrete solution of the
|
representing the bipolar constraint, providing for a discrete solution of the
|
||||||
continuous optimization problem, and one term representing the parity
|
continuous optimization problem, and one term representing the parity
|
||||||
constraint, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
|
constraints, accommodating the role of the parity-check matrix $\boldsymbol{H}$.
|
||||||
The prior \ac{PDF} is then approximated using the code-constraint polynomial:%
|
The prior \ac{PDF} is then approximated using the code-constraint polynomial as:%
|
||||||
%
|
%
|
||||||
\begin{align}
|
\begin{align}
|
||||||
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)
|
f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)
|
||||||
\approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }%
|
\approx \frac{1}{Z}\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) }%
|
||||||
\label{eq:prox:prior_pdf_approx}
|
\label{eq:prox:prior_pdf_approx}
|
||||||
.\end{align}%
|
.\end{align}%
|
||||||
%
|
%
|
||||||
The authors justify this approximation by arguing that for
|
The authors justify this approximation by arguing, that for
|
||||||
$\gamma \rightarrow \infty$, the approximation in equation
|
$\gamma \rightarrow \infty$, the approximation in equation
|
||||||
\ref{eq:prox:prior_pdf_approx} approaches the original function in equation
|
(\ref{eq:prox:prior_pdf_approx}) approaches the original function in equation
|
||||||
\ref{eq:prox:prior_pdf}.
|
(\ref{eq:prox:prior_pdf}).
|
||||||
This approximation can then be plugged into equation \ref{eq:prox:vanilla_MAP}
|
This approximation can then be plugged into equation (\ref{eq:prox:vanilla_MAP})
|
||||||
and the likelihood can be rewritten using the negative log-likelihood
|
and the likelihood can be rewritten using the negative log-likelihood
|
||||||
$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
|
$L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
|
||||||
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left(
|
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left(
|
||||||
@ -683,8 +689,8 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
|
|||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
|
\hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}}
|
||||||
e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
|
\mathrm{e}^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }
|
||||||
e^{-\gamma h\left( \boldsymbol{x} \right) } \\
|
\mathrm{e}^{-\gamma h\left( \boldsymbol{x} \right) } \\
|
||||||
&= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
|
&= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left(
|
||||||
L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
||||||
+ \gamma h\left( \boldsymbol{x} \right)
|
+ \gamma h\left( \boldsymbol{x} \right)
|
||||||
@ -692,10 +698,10 @@ $L \left( \boldsymbol{y} \mid \boldsymbol{x} \right) = -\ln\left(
|
|||||||
.\end{align*}%
|
.\end{align*}%
|
||||||
%
|
%
|
||||||
Thus, with proximal decoding, the objective function
|
Thus, with proximal decoding, the objective function
|
||||||
$f\left( \boldsymbol{x} \right)$ considered is%
|
$g\left( \boldsymbol{x} \right)$ considered is%
|
||||||
%
|
%
|
||||||
\begin{align}
|
\begin{align}
|
||||||
f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
|
g\left( \boldsymbol{x} \right) = L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
||||||
+ \gamma h\left( \boldsymbol{x} \right)%
|
+ \gamma h\left( \boldsymbol{x} \right)%
|
||||||
\label{eq:prox:objective_function}
|
\label{eq:prox:objective_function}
|
||||||
\end{align}%
|
\end{align}%
|
||||||
@ -703,14 +709,14 @@ $f\left( \boldsymbol{x} \right)$ considered is%
|
|||||||
and the decoding problem is reformulated to%
|
and the decoding problem is reformulated to%
|
||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
\text{minimize}\hspace{2mm} &L\left( \boldsymbol{x} \mid \boldsymbol{y} \right)
|
\text{minimize}\hspace{2mm} &L\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
||||||
+ \gamma h\left( \boldsymbol{x} \right)\\
|
+ \gamma h\left( \boldsymbol{x} \right)\\
|
||||||
\text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n
|
\text{subject to}\hspace{2mm} &\boldsymbol{x} \in \mathbb{R}^n
|
||||||
.\end{align*}
|
.\end{align*}
|
||||||
%
|
%
|
||||||
|
|
||||||
For the solution of the approximate \ac{MAP} decoding problem, the two parts
|
For the solution of the approximate \ac{MAP} decoding problem, the two parts
|
||||||
of \ref{eq:prox:objective_function} are considered separately:
|
of equation (\ref{eq:prox:objective_function}) are considered separately:
|
||||||
the minimization of the objective function occurs in an alternating
|
the minimization of the objective function occurs in an alternating
|
||||||
fashion, switching between the negative log-likelihood
|
fashion, switching between the negative log-likelihood
|
||||||
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
|
$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled
|
||||||
@ -737,8 +743,8 @@ It is then immediately approximated with gradient-descent:%
|
|||||||
\argmin_{\boldsymbol{t} \in \mathbb{R}^n}
|
\argmin_{\boldsymbol{t} \in \mathbb{R}^n}
|
||||||
\left( \gamma h\left( \boldsymbol{x} \right) +
|
\left( \gamma h\left( \boldsymbol{x} \right) +
|
||||||
\frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
|
\frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\
|
||||||
&\approx \boldsymbol{x} - \gamma \nabla h \left( \boldsymbol{r} \right),
|
&\approx \boldsymbol{r} - \gamma \nabla h \left( \boldsymbol{r} \right),
|
||||||
\hspace{5mm} \gamma \text{ small}
|
\hspace{5mm} \gamma > 0, \text{ small}
|
||||||
.\end{align*}%
|
.\end{align*}%
|
||||||
%
|
%
|
||||||
The second step thus becomes%
|
The second step thus becomes%
|
||||||
@ -775,7 +781,9 @@ is%
|
|||||||
%
|
%
|
||||||
\begin{align*}
|
\begin{align*}
|
||||||
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
f_{\boldsymbol{Y} \mid \boldsymbol{X}}\left( \boldsymbol{y} \mid \boldsymbol{x} \right)
|
||||||
= \frac{1}{\sqrt{2\pi\sigma^2}}e^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x} \rVert^2 }{\sigma^2}}
|
= \frac{1}{\sqrt{2\pi\sigma^2}}\mathrm{e}^{-\frac{\lVert \boldsymbol{y}-\boldsymbol{x}
|
||||||
|
\rVert^2 }
|
||||||
|
{2\sigma^2}}
|
||||||
.\end{align*}
|
.\end{align*}
|
||||||
%
|
%
|
||||||
Thus, the gradient of the negative log-likelihood becomes%
|
Thus, the gradient of the negative log-likelihood becomes%
|
||||||
|
|||||||
@ -73,6 +73,8 @@ Lastly, the optimization methods utilized are described.
|
|||||||
\label{fig:notation}
|
\label{fig:notation}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
|
\todo{Note about $\tilde{\boldsymbol{c}}$ (and maybe $\tilde{\boldsymbol{x}}$?)}
|
||||||
|
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
||||||
\section{Channel Coding with LDPC Codes}
|
\section{Channel Coding with LDPC Codes}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user