From ad4dfc632571828d8cd094bb86cff50086842b72 Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Wed, 22 Mar 2023 14:07:54 +0100 Subject: [PATCH] Fixed most TODOs in decoding techniques section --- latex/thesis/chapters/decoding_techniques.tex | 99 +++++++++---------- 1 file changed, 49 insertions(+), 50 deletions(-) diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex index fe8f553..1536a99 100644 --- a/latex/thesis/chapters/decoding_techniques.tex +++ b/latex/thesis/chapters/decoding_techniques.tex @@ -196,12 +196,10 @@ of the \acp{LLR} $\gamma_i$ \cite[Sec. 2.5]{feldman_thesis}:% \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmin_{\boldsymbol{c}\in\mathcal{C}} \sum_{i=1}^{n} \gamma_i c_i,% \hspace{5mm} \gamma_i = \ln\left( - \frac{f_{Y_i | C_i} \left( y_i \mid C_i = 0 \right) } - {f_{Y_i | C_i} \left( y_i \mid C_i = 1 \right) } \right) + \frac{f_{Y_i | C_i} \left( y_i \mid c_i = 0 \right) } + {f_{Y_i | C_i} \left( y_i \mid c_i = 1 \right) } \right) .\end{align*} % -\todo{$C_i$ or $c_i$?}% -% The authors propose the following cost function% \footnote{In this context, \textit{cost function} and \textit{objective function} have the same meaning.} @@ -209,15 +207,14 @@ for the \ac{LP} decoding problem:% % \begin{align*} g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i + = \boldsymbol{\gamma}^\text{T}\boldsymbol{c} .\end{align*} % -\todo{Write as dot product} -% With this cost function, the exact integer linear program formulation of \ac{ML} -decoding is the following:% +decoding becomes the following:% % \begin{align*} - \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\ + \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{c} \\ \text{subject to }\hspace{2mm} &\boldsymbol{c} \in \mathcal{C} .\end{align*}% % @@ -234,13 +231,11 @@ decoding, redefining the constraints in terms of the \text{codeword polytope} % \begin{align*} \text{poly}\left( \mathcal{C} \right) = \left\{ - \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c} - \text{ : } \lambda_{\boldsymbol{c}} \ge 0, - \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\} + \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} \boldsymbol{c} + \text{ : } \alpha_{\boldsymbol{c}} \ge 0, + \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} = 1 \right\} ,\end{align*} % % -\todo{$\lambda$ might be confusing here}% -% which represents the \textit{convex hull} of all possible codewords, i.e., the convex set of linear combinations of all codewords. This corresponds to simply lifting the integer requirement. @@ -685,12 +680,11 @@ The resulting formulation of the relaxed optimization problem becomes:% % \begin{align} \begin{aligned} - \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\ + \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} \\ \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j} \hspace{5mm}\forall j\in\mathcal{J}. \end{aligned} \label{eq:lp:relaxed_formulation} \end{align}% -\todo{Rewrite sum as dot product} \todo{Space before $\forall$?} @@ -703,8 +697,7 @@ is a very general one that can be solved with a number of different optimization In this work \ac{ADMM} is examined, as its distributed nature allows for a very efficient implementation. \ac{LP} decoding using \ac{ADMM} can be regarded as a message -passing algorithm with two separate update steps that can be performed -simulatneously; +passing algorithm with separate variable- and check-node update steps; the resulting algorithm has a striking similarity to \ac{BP} and its computational complexity has been demonstrated to compare favorably to \ac{BP} \cite{original_admm}, \cite{efficient_lp_dec_admm}. @@ -778,15 +771,25 @@ The steps to solve the dual problem then become: Luckily, the additional constaints only affect the $\boldsymbol{z}_j$-update steps. Furthermore, the $\boldsymbol{z}_j$-update steps can be shown to be equivalent to projections onto the check polytopes $\mathcal{P}_{d_j}$ \cite[Sec. III. B.]{original_admm} -and the $\tilde{\boldsymbol{c}}$-update can be computed analytically \cite[Sec. III.]{lautern}:% +and the $\tilde{\boldsymbol{c}}$-update can be computed analytically% +% +\footnote{In the $\tilde{c}_i$-update rule, the term +$\left( \boldsymbol{z}_j \right)_i$ is a slight abuse of notation, as +$\boldsymbol{z}_j$ has less components than there are variable-nodes $i$. +What is actually meant is the component of $\boldsymbol{z}_j$ that is associated +with the variable node $i$, i.e., $\left( \boldsymbol{T}_j^\text{T}\boldsymbol{z}_j\right)_i$. +The same is true for $\left( \boldsymbol{\lambda}_j \right)_i$.} +% +\cite[Sec. III.]{lautern}:% % \begin{alignat*}{3} \tilde{c}_i &\leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left( - \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{\lambda}_j \right)_i - - \left( \boldsymbol{z}_j \right)_i \Big) - \frac{\gamma_i}{\mu} \right) + \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{z}_j \right)_i + - \frac{1}{\mu} \left( \boldsymbol{\lambda}_j \right)_i \Big) + - \frac{\gamma_i}{\mu} \right) \hspace{3mm} && \forall i\in\mathcal{I} \\ \boldsymbol{z}_j &\leftarrow \Pi_{\mathcal{P}_{d_j}}\left( - \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{\lambda}_j \right) + \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \frac{\boldsymbol{\lambda}_j}{\mu} \right) \hspace{3mm} && \forall j\in\mathcal{J} \\ \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} @@ -794,9 +797,7 @@ and the $\tilde{\boldsymbol{c}}$-update can be computed analytically \cite[Sec. \hspace{3mm} && \forall j\in\mathcal{J} .\end{alignat*} % -\todo{$\tilde{c}_i$-update With or without projection onto $\left[ 0, 1 \right] ^n$?} -% -One thing to note is that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously, +It should be noted that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously, as they are independent of one another. The same is true for the updates of the individual components of $\tilde{\boldsymbol{c}}$. @@ -809,13 +810,7 @@ Effectively, all of the $\left|\mathcal{J}\right|$ parity constraints are able to be handled at the same time. This can also be understood by interpreting the decoding process as a message-passing algorithm \cite[Sec. III. D.]{original_admm}, \cite[Sec. II. B.]{efficient_lp_dec_admm}, -as is shown in figure \ref{fig:lp:message_passing} -\footnote{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$ are additional parameters -defining the tolerances for the stopping criteria of the algorithm. -The variable $\boldsymbol{z}_j^\prime$ denotes the value of -$\boldsymbol{z}_j$ in the previous iteration.}% -\todo{Move footnote to figure caption}% -.% +as is shown in figure \ref{fig:lp:message_passing}.% \todo{Explicitly specify sections?}% % \begin{figure}[H] @@ -828,26 +823,35 @@ Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]}$ and $\boldsymbol{\la while $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{pri}}$ or $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{z}^\prime_j - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{dual}}$ do for all $j$ in $\mathcal{J}$ do $\boldsymbol{z}_j \leftarrow \Pi_{\mathcal{P}_{d_j}}\left( - \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{\lambda}_j \right)$ - $\boldsymbol{\lambda}_j \leftarrow \boldsymbol{\lambda}_j + \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \frac{\boldsymbol{\lambda}_j}{\mu} \right)$ + $\boldsymbol{\lambda}_j \leftarrow \boldsymbol{\lambda}_j + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \right)$ end for for all $i$ in $\mathcal{I}$ do $\tilde{c}_i \leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left( - \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{\lambda}_j \right)_i - - \left( \boldsymbol{z}_j \right)_i \Big) - \frac{\gamma_i}{\mu} \right)$ + \sum_{j\in N_v\left( i \right) } \Big( + \left( \boldsymbol{z}_j \right)_i - \frac{1}{\mu} \left( \boldsymbol{\lambda}_j + \right)_i + \Big) - \frac{\gamma_i}{\mu} \right)$ end for end while \end{genericAlgorithm} - \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm} + \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm% + \protect\footnotemark{}} \label{fig:lp:message_passing} \end{figure}% % -\noindent The $\tilde{c}_i$-updates can be understood as a variable-node update step, -and the $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be understood as -a check-node update step. +\footnotetext{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$ +are additional parameters +defining the tolerances for the stopping criteria of the algorithm. +The variable $\boldsymbol{z}_j^\prime$ denotes the value of +$\boldsymbol{z}_j$ in the previous iteration.}% +% +\noindent The $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be understood as +a check-node update step (lines $3$-$6$) and the $\tilde{c}_i$-updates can be understood as +a variable-node update step (lines $7$-$9$ in figure \ref{fig:lp:message_passing}). The updates for each variable- and check-node can be perfomed in parallel. With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM} is able to achieve similar computational complexity to \ac{BP}. @@ -948,17 +952,13 @@ $L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left( % \begin{align*} \hat{\boldsymbol{x}} &= \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}} - \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) } - \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\ - &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \left( - L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) - + \gamma h\left( \tilde{\boldsymbol{x}} \right) - \right)% + \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) } + \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\ + &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \big( + L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) + + \gamma h\left( \tilde{\boldsymbol{x}} \right) + \big)% .\end{align*}% -\todo{\textbackslash left($\cdot$ \textbackslash right)\\ -$\rightarrow$\\ -\textbackslash big( $\cdot$ \textbackslash big)\\ -?}% % Thus, with proximal decoding, the objective function $g\left( \tilde{\boldsymbol{x}} \right)$ considered is% @@ -1011,7 +1011,6 @@ It is then immediately approximated with gradient-descent:% \hspace{5mm} \gamma > 0, \text{ small} .\end{align*}% % -\todo{explicitly state $\nabla h$?} The second step thus becomes% % \begin{align*}