From 05be3d21b65df7b7deda0c86310fb874ca7f5202 Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Tue, 14 Feb 2023 15:34:39 +0100 Subject: [PATCH] First draft of proximal decoding background --- latex/thesis/chapters/decoding_techniques.tex | 157 +++++++++++++++++- 1 file changed, 150 insertions(+), 7 deletions(-) diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index 241bc79..1ae4ed3 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -72,9 +72,9 @@ \label{sec:dec:LP Decoding using ADMM} \begin{itemize} - \item Equivalent ML optimization problem - \item LP relaxation - \item ADMM as a solver + \item Equivalent \ac{ML} optimization problem + \item \Ac{LP} relaxation + \item \Ac{ADMM} as a solver \end{itemize} @@ -82,8 +82,151 @@ \section{Proximal Decoding}% \label{sec:dec:Proximal Decoding} -\begin{itemize} - \item Formulation of optimization problem - \item Proximal gradient method as a solver -\end{itemize} +Proximal decoding was proposed by Wadayama et. al \cite{proximal_paper}. +With this decoding algorithm, the objective function is minimized using +the proximal gradient method. +In contrast to \ac{LP} decoding, the objective function is based on a +non-convex optimization formulation of the \ac{MAP} decoding problem. + +In order to derive the objective function, the authors reformulate the +\ac{MAP} decoding problem:% +% +\begin{align} + \hat{\boldsymbol{x}} = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} + f_{\boldsymbol{X} \mid \boldsymbol{Y}} + \left( \boldsymbol{x} \mid \boldsymbol{y} \right) + = \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} f_{\boldsymbol{Y} \mid \boldsymbol{X}} + \left( \boldsymbol{y} \mid \boldsymbol{x} \right) + f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)% + \label{eq:prox:vanilla_MAP} +\end{align}% +% +The likelihood is usually a known function determined by the channel model. +In order to rewrite the prior \ac{PDF} +$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right)$, +the so-called \textit{code-constraint polynomial} is introduced:% +% +\begin{align} + h\left( \boldsymbol{x} \right) = \sum_{j=1}^{n} \left( x_j^2-1 \right) ^2 + + \sum_{i=1}^{m} \left[ + \left( \prod_{j\in \mathcal{A}\left( i \right) } x_j \right) -1 \right] ^2% + \label{eq:prox:ccp} +\end{align}% +% +The intention of this function is to provide a way to penalize vectors far +from a codeword and favor those close to a codeword. +In order to achieve this, the polynomial is composed of two parts: one term +representing the bibolar constraint, providing for a discrete solution of the +continuous optimization problem, and one term representing the parity +constraint, accomodating the role of the parity-check matrix $\boldsymbol{H}$. +% +The prior \ac{PDF} is then approximated using the code-constraint polynomial\todo{Italic?}:% +% +\begin{align} + f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) = + \frac{1}{\left| \mathcal{C}\left( \boldsymbol{H} \right) \right| } + \sum_{c \in \mathcal{C}\left( \boldsymbol{H} \right) } + \delta\left( \boldsymbol{x} - \left( -1 \right) ^{\boldsymbol{c}}\right) + \approx \frac{1}{Z}e^{-\gamma h\left( \boldsymbol{x} \right) }% + \label{eq:prox:prior_pdf_approx} +\end{align}% +% +The authors justify this approximation by arguing that for +$\gamma \rightarrow \infty$, the right-hand side aproaches the left-hand +side. In \ref{eq:prox:vanilla_MAP} the prior \ac{PDF} +$f_{\boldsymbol{X}}\left( \boldsymbol{x} \right) $ can then be subsituted +for \ref{eq:prox:prior_pdf_approx} and the likelihood can be rewritten using +the negative log-likelihood +$f_{\boldsymbol{X} \mid \boldsymbol{Y}}\left( \boldsymbol{x} \mid \boldsymbol{y} \right) + = e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) }$:% +% +\begin{align} + \hat{\boldsymbol{x}} &= \argmax_{\boldsymbol{x} \in \mathbb{R}^{n}} + e^{- L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) } + e^{-\gamma h\left( \boldsymbol{x} \right) } \nonumber \\ + &= \argmin_{\boldsymbol{x} \in \mathbb{R}^n} \left( + L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) + + \gamma h\left( \boldsymbol{x} \right) + \right)% + \label{eq:prox:approx_map_problem} +\end{align}% +% +Thus, with proximal decoding, the objective function +$f\left( \boldsymbol{x} \right)$ to be minimized is% +% +\begin{align} + f\left( \boldsymbol{x} \right) = L\left( \boldsymbol{x} \mid \boldsymbol{y} \right) + + \gamma h\left( \boldsymbol{x} \right)% + \label{eq:prox:objective_function} +.\end{align}\todo{Dot after equations?} + +For the solution of the approximalte \ac{MAP} decoding problem, the two parts +of \ref{eq:prox:approx_map_problem} are considered separately from one +another: the minimization of the objective function occurs in an alternating +manner, switching between the minimization of the negative log-likelihood +$L\left( \boldsymbol{y} \mid \boldsymbol{x} \right) $ and the scaled +code-constaint polynomial $\gamma h\left( \boldsymbol{x} \right) $. +Two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$ are introduced, +describing the result of each of the two steps. +The first step, minimizing the log-likelihood using gradient descent, yields% +% +\begin{align*} + \boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla + L\left( \boldsymbol{y} \mid \boldsymbol{s} \right), + \hspace{5mm}\omega > 0 +.\end{align*}% +% +For the second step, minimizig the scaled code-constraint polynomial using +the proximal gradient method, the proximal operator of +$\gamma h\left( \boldsymbol{x} \right) $ has to be computed and is +immediately approximalted by a gradient-descent step:% +% +\begin{align*} + \text{prox}_{\gamma h} \left( \boldsymbol{x} \right) &\equiv + \argmin_{\boldsymbol{t} \in \mathbb{R}^n} + \left( \gamma h\left( \boldsymbol{x} \right) + + \frac{1}{2} \lVert \boldsymbol{t} - \boldsymbol{x} \rVert \right)\\ + &\approx \boldsymbol{x} - \gamma h \left( \boldsymbol{x} \right), + \hspace{5mm} \gamma \text{ small} +.\end{align*}% +% +The second step thus becomes% +% +\begin{align*} + \boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma h\left( \boldsymbol{x} \right), + \hspace{5mm}\gamma > 0,\text{ small} +.\end{align*} +% +While the approximatin of the prior \ac{PDF} made in \ref{eq:prox:prior_pdf_approx} +theoretically becomes better +with larger $\gamma$, the constraint that $\gamma$ be small is important, +as it keeps the effect of $h\left( \boldsymbol{x} \right) $ on the landscape +of the objective function small. +Otherwise, unwanted stationary points, including local minima are introduced. +The authors say that in practice, the value of $\gamma$ should be adjusted +according to the decoding performance. + +The iterative decoding process resulting from this considreation is shown in +figure \ref{fig:prox:alg}. + +\begin{figure}[H] + \centering + + \begin{genericAlgorithm}[caption={}, label={}] +$\boldsymbol{s} \leftarrow \boldsymbol{0}$ +for $K$ iterations do + $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \nabla L \left( \boldsymbol{y} \mid \boldsymbol{s} \right) $ + $\boldsymbol{s} \leftarrow \boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) $ + $\boldsymbol{\hat{x}} \leftarrow \text{sign}\left( \boldsymbol{s} \right) $ + if $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ do + return $\boldsymbol{\hat{c}}$ + end if +end for +return $\boldsymbol{\hat{c}}$ + \end{genericAlgorithm} + + + \caption{Proximal decoding algorithm} + \label{fig:prox:alg} +\end{figure}