diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index 95bc066..b00db2e 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -676,29 +676,168 @@ exponentially, it is a lot more tractable for practical applications. The resulting formulation of the relaxed optimization problem becomes:% % -\begin{align*} - \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\ - \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}, - \hspace{5mm}j\in\mathcal{J} -.\end{align*}% +\begin{align} + \begin{aligned} + \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\ + \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j} + \hspace{5mm}\forall j\in\mathcal{J}. + \end{aligned} \label{eq:lp:relaxed_formulation} +\end{align}% +\todo{Rewrite sum as dot product} +\todo{Space before $\forall$?} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{LP Decoding using ADMM}% \label{sec:dec:LP Decoding using ADMM} -\begin{itemize} - \item Why ADMM? - \begin{itemize} - \item Distributed nature, making it a competitor to BP - (which can also be implemented in a distributed manner) - \cite[Sec. I]{original_admm} - \item Computational performance similar to BP has been demnonstrated - \cite[Sec. I]{original_admm} - \end{itemize} - \item Adaptive linear programming? - \item How ADMM is adapted to LP decoding -\end{itemize} +The \ac{LP} decoding formulation in section \ref{sec:dec:Decoding using Optimization Methods} +is a very general one that can be solved with a number of different optimization methods. +In this work \ac{ADMM} is examined, as its distributed nature allows for a very efficient +implementation. +\ac{LP} decoding using \ac{ADMM} can be regarded as a message +passing algorithm with two separate update steps that can be performed +simulatneously; +the resulting algorithm has a striking similarity to \ac{BP} and its computational +complexity has been demonstrated to compare favorably to \ac{BP} \cite{original_admm}, +\cite{efficient_lp_dec_admm}. + +The \ac{LP} decoding problem in (\ref{eq:lp:relaxed_formulation}) can be +slightly rewritten using the auxiliary variables +$\boldsymbol{z}_{1:m}$:% +% +\begin{align} + \begin{aligned} + \begin{array}{r} + \text{minimize } + \end{array}\hspace{0.5mm} & \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} \\ + \begin{array}{r} + \text{subject to }\\ + \phantom{te} + \end{array}\hspace{0.5mm} & \setlength{\arraycolsep}{1.4pt} + \begin{array}{rl} + \boldsymbol{T}_j\tilde{\boldsymbol{c}} + &= \boldsymbol{z}_j\\ + \boldsymbol{z}_j + &\in \mathcal{P}_{d_j} + \end{array} + \hspace{5mm} \forall j\in\mathcal{J}. + \end{aligned} + \label{eq:lp:admm_reformulated} +\end{align} +% +In this form, the problem almost fits the \ac{ADMM} template described in section +\ref{sec:theo:Optimization Methods}, except for the fact that there are multiple equality +constraints $\boldsymbol{T}_j \tilde{\boldsymbol{c}} = \boldsymbol{z}_j$ and the +additional constraints $\boldsymbol{z}_j \in \mathcal{P}_{d_j} \, \forall\, j\in\mathcal{J}$. +\todo{$\forall$ in text?} +The multiple constraints can be addressed by introducing additional terms in the +augmented lagrangian:% +% +\begin{align*} + \mathcal{L}_{\mu}\left( \tilde{\boldsymbol{c}}, \boldsymbol{z}_{1:m}, + \boldsymbol{\lambda}_{1:m} \right) + = \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} + + \sum_{j\in\mathcal{J}} \boldsymbol{\lambda}^\text{T}_j + \left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \right) + + \frac{\mu}{2}\sum_{j\in\mathcal{J}} + \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert^2_2 +.\end{align*}% +% +The additional constraints remain in the dual optimization problem:% +% +\begin{align*} + \text{maximize } \min_{\substack{\tilde{\boldsymbol{c}} \\ + \boldsymbol{z}_j \in \mathcal{P}_{d_j}}} + \mathcal{L}_{\mu}\left( \tilde{\boldsymbol{c}}, \boldsymbol{z}_{1:m}, + \boldsymbol{\lambda}_{1:m} \right) +.\end{align*}% +% +The steps to solve the dual problem then become: +% +\begin{alignat*}{3} + \tilde{\boldsymbol{c}} &\leftarrow \argmin_{\tilde{\boldsymbol{c}}} \mathcal{L}_{\mu} \left( + \tilde{\boldsymbol{c}}, \boldsymbol{z}_{1:m}, \boldsymbol{\lambda}_{1:m} \right) \\ + \boldsymbol{z}_j &\leftarrow \argmin_{\boldsymbol{z}_j \in \mathcal{P}_{d_j}} + \mathcal{L}_{\mu} \left( + \tilde{\boldsymbol{c}}, \boldsymbol{z}_{1:m}, \boldsymbol{\lambda}_{1:m} \right) + \hspace{3mm} &&\forall j\in\mathcal{J} \\ + \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j + + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} + - \boldsymbol{z}_j \right) + \hspace{3mm} &&\forall j\in\mathcal{J} +.\end{alignat*} +% +Luckily, the additional constaints only affect the $\boldsymbol{z}_j$-update steps. +Furthermore, the $\boldsymbol{z}_j$-update steps can be shown to be equivalent to projections +onto the check polytopes $\mathcal{P}_{d_j}$ \cite[Sec. III. B.]{original_admm} +and the $\tilde{\boldsymbol{c}}$-update can be computed analytically \cite[Sec. III.]{lautern}:% +% +\begin{alignat*}{3} + \tilde{c}_i &\leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left( + \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{\lambda}_j \right)_i + - \left( \boldsymbol{z}_j \right)_i \Big) - \frac{\gamma_i}{\mu} \right) + \hspace{3mm} && \forall i\in\mathcal{I} \\ + \boldsymbol{z}_j &\leftarrow \Pi_{\mathcal{P}_{d_j}}\left( + \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{\lambda}_j \right) + \hspace{3mm} && \forall j\in\mathcal{J} \\ + \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j + + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} + - \boldsymbol{z}_j \right) + \hspace{3mm} && \forall j\in\mathcal{J} +.\end{alignat*} +% +One thing to note is that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously, +as they are independent of one another. +The same is true for the updates of the individual components of $\tilde{\boldsymbol{c}}$. + +The reason \ac{ADMM} is able to perform so well is due to the relocation of the constraints +$\boldsymbol{T}_j\tilde{\boldsymbol{c}}_j\in\mathcal{P}_{d_j}\,\forall\, j\in\mathcal{J}$ +into the objective function itself. +The minimization of the new objective function can then take place simultaneously +with respect to all $\boldsymbol{z}_j, j\in\mathcal{J}$. +Effectively, all of the $\left|\mathcal{J}\right|$ parity constraints are +able to be handled at the same time. +This can also be understood by interpreting the decoding process as a message-passing +algorithm \cite[Sec. III. D.]{original_admm}, \cite[Sec. II. B.]{efficient_lp_dec_admm}, +as is shown in figure \ref{fig:lp:message_passing} +\footnote{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$ are additional parameters +defining the tolerances for the stopping criteria of the algorithm. +$\boldsymbol{z}_j^\prime$ denotes the value of $\boldsymbol{z}_j$ in the previous iteration.}% +\todo{Move footnote to figure caption}% +.% +\todo{Explicitly specify sections?}% +% +\begin{figure}[H] + \centering + + \begin{genericAlgorithm}[caption={}, label={}] +Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}_{1:m}$ and $\boldsymbol{\lambda}_{1:m}$ +while $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{pri}}$ or $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{z}^\prime_j - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{dual}}$ + Perform check update + ... + Perform variable update + ... + \end{genericAlgorithm} + + \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm} + \label{fig:lp:message_passing} +\end{figure}% +% +\noindent The $\tilde{c}_i$-updates can be interpreted as a variable-node update step, +and the $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be interpreted as +a check-node update step. +The updates for each variable- and check-node can be perfomed in parallel. +With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM} +is able to achieve similar computational complexity to \ac{BP}. + +The main computational effort in solving the linear program then amounts to +computing the projection operation $\Pi_{\mathcal{P}_{d_j}} \left( \cdot \right) $ +onto each check polytope. Various different methods to perform this projection +have been proposed (e.g., in \cite{original_admm}, \cite{efficient_lp_dec_admm}, +\cite{lautern}). +The method chosen here is the one presented in \cite{lautern}. + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -746,7 +885,7 @@ as is often done, and has a rather unwieldy representation:% f_{\tilde{\boldsymbol{X}}}\left( \tilde{\boldsymbol{x}} \right) = \frac{1}{\left| \mathcal{C} \right| } \sum_{\boldsymbol{c} \in \mathcal{C} } - \delta\left( \tilde{\boldsymbol{x}} - \left( -1 \right) ^{\boldsymbol{c}}\right) + \delta\big( \tilde{\boldsymbol{x}} - \left( -1 \right) ^{\boldsymbol{c}}\big) \label{eq:prox:prior_pdf} .\end{align}% % @@ -758,7 +897,7 @@ the so-called \textit{code-constraint polynomial} is introduced as:% h\left( \tilde{\boldsymbol{x}} \right) = \underbrace{\sum_{i=1}^{n} \left( \tilde{x_i}^2-1 \right) ^2}_{\text{Bipolar constraint}} + \underbrace{\sum_{j=1}^{m} \left[ - \left( \prod_{i\in N \left( j \right) } \tilde{x_i} \right) + \left( \prod_{i\in N_c \left( j \right) } \tilde{x_i} \right) -1 \right] ^2}_{\text{Parity constraint}}% .\end{align*}% % @@ -795,6 +934,10 @@ $L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left( + \gamma h\left( \tilde{\boldsymbol{x}} \right) \right)% .\end{align*}% +\todo{\textbackslash left($\cdot$ \textbackslash right)\\ +$\rightarrow$\\ +\textbackslash big( $\cdot$ \textbackslash big)\\ +?}% % Thus, with proximal decoding, the objective function $g\left( \tilde{\boldsymbol{x}} \right)$ considered is% @@ -847,6 +990,7 @@ It is then immediately approximated with gradient-descent:% \hspace{5mm} \gamma > 0, \text{ small} .\end{align*}% % +\todo{explicitly state $\nabla h$?} The second step thus becomes% % \begin{align*}