From ad4dfc632571828d8cd094bb86cff50086842b72 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Wed, 22 Mar 2023 14:07:54 +0100
Subject: [PATCH] Fixed most TODOs in decoding techniques section

---
 latex/thesis/chapters/decoding_techniques.tex | 99 +++++++++----------
 1 file changed, 49 insertions(+), 50 deletions(-)

diff --git a/latex/thesis/chapters/decoding_techniques.tex b/latex/thesis/chapters/decoding_techniques.tex
index fe8f553..1536a99 100644
--- a/latex/thesis/chapters/decoding_techniques.tex
+++ b/latex/thesis/chapters/decoding_techniques.tex
@@ -196,12 +196,10 @@ of the \acp{LLR} $\gamma_i$ \cite[Sec. 2.5]{feldman_thesis}:%
     \hat{\boldsymbol{c}}_{\text{\ac{ML}}} = \argmin_{\boldsymbol{c}\in\mathcal{C}}
         \sum_{i=1}^{n} \gamma_i c_i,%
     \hspace{5mm} \gamma_i = \ln\left(
-        \frac{f_{Y_i | C_i} \left( y_i  \mid C_i = 0 \right) }
-        {f_{Y_i | C_i} \left( y_i \mid C_i = 1 \right) } \right)
+        \frac{f_{Y_i | C_i} \left( y_i  \mid c_i = 0 \right) }
+        {f_{Y_i | C_i} \left( y_i \mid c_i = 1 \right) } \right)
 .\end{align*}
 %
-\todo{$C_i$ or $c_i$?}%
-%
 The authors propose the following cost function%
 \footnote{In this context, \textit{cost function} and \textit{objective function}
 have the same meaning.}
@@ -209,15 +207,14 @@ for the \ac{LP} decoding problem:%
 %
 \begin{align*}
     g\left( \boldsymbol{c} \right) = \sum_{i=1}^{n} \gamma_i c_i
+        = \boldsymbol{\gamma}^\text{T}\boldsymbol{c}
 .\end{align*}
 %
-\todo{Write as dot product}
-%
 With this cost function, the exact integer linear program formulation of \ac{ML}
-decoding is the following:%
+decoding becomes the following:%
 %
 \begin{align*}
-    \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i c_i \\
+    \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{c} \\
     \text{subject to }\hspace{2mm} &\boldsymbol{c} \in \mathcal{C}
 .\end{align*}%
 %
@@ -234,13 +231,11 @@ decoding, redefining the constraints in terms of the \text{codeword polytope}
 %
 \begin{align*}
     \text{poly}\left( \mathcal{C} \right) = \left\{
-        \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} \boldsymbol{c}
-            \text{ : } \lambda_{\boldsymbol{c}} \ge 0,
-        \sum_{\boldsymbol{c} \in \mathcal{C}} \lambda_{\boldsymbol{c}} = 1 \right\} 
+        \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} \boldsymbol{c}
+            \text{ : } \alpha_{\boldsymbol{c}} \ge 0,
+        \sum_{\boldsymbol{c} \in \mathcal{C}} \alpha_{\boldsymbol{c}} = 1 \right\} 
 ,\end{align*} %
 %
-\todo{$\lambda$ might be confusing here}%
-%
 which represents the \textit{convex hull} of all possible codewords,
 i.e., the convex set of linear combinations of all codewords.
 This corresponds to simply lifting the integer requirement.
@@ -685,12 +680,11 @@ The resulting formulation of the relaxed optimization problem becomes:%
 %
 \begin{align}
     \begin{aligned}
-        \text{minimize }\hspace{2mm} &\sum_{i=1}^{n} \gamma_i \tilde{c}_i \\
+        \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\tilde{\boldsymbol{c}} \\
         \text{subject to }\hspace{2mm} &\boldsymbol{T}_j \tilde{\boldsymbol{c}} \in \mathcal{P}_{d_j}
         \hspace{5mm}\forall j\in\mathcal{J}.
     \end{aligned} \label{eq:lp:relaxed_formulation}
 \end{align}%
-\todo{Rewrite sum as dot product}
 \todo{Space before $\forall$?}
 
 
@@ -703,8 +697,7 @@ is a very general one that can be solved with a number of different optimization
 In this work \ac{ADMM} is examined, as its distributed nature allows for a very efficient
 implementation.
 \ac{LP} decoding using \ac{ADMM} can be regarded as a message
-passing algorithm with two separate update steps that can be performed
-simulatneously;
+passing algorithm with separate variable- and check-node update steps;
 the resulting algorithm has a striking similarity to \ac{BP} and its computational
 complexity has been demonstrated to compare favorably to \ac{BP} \cite{original_admm},
 \cite{efficient_lp_dec_admm}.
@@ -778,15 +771,25 @@ The steps to solve the dual problem then become:
 Luckily, the additional constaints only affect the $\boldsymbol{z}_j$-update steps.
 Furthermore, the $\boldsymbol{z}_j$-update steps can be shown to be equivalent to projections
 onto the check polytopes $\mathcal{P}_{d_j}$ \cite[Sec. III. B.]{original_admm}
-and the $\tilde{\boldsymbol{c}}$-update can be computed analytically \cite[Sec. III.]{lautern}:%
+and the $\tilde{\boldsymbol{c}}$-update can be computed analytically%
+%
+\footnote{In the $\tilde{c}_i$-update rule, the term
+$\left( \boldsymbol{z}_j \right)_i$ is a slight abuse of notation, as
+$\boldsymbol{z}_j$ has less components than there are variable-nodes $i$.
+What is actually meant is the component of $\boldsymbol{z}_j$ that is associated
+with the variable node $i$, i.e., $\left( \boldsymbol{T}_j^\text{T}\boldsymbol{z}_j\right)_i$.
+The same is true for $\left( \boldsymbol{\lambda}_j \right)_i$.}
+%
+\cite[Sec. III.]{lautern}:%
 %
 \begin{alignat*}{3}
     \tilde{c}_i &\leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left( 
-        \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{\lambda}_j \right)_i
-            - \left( \boldsymbol{z}_j \right)_i \Big) - \frac{\gamma_i}{\mu} \right)
+        \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{z}_j \right)_i
+            - \frac{1}{\mu} \left( \boldsymbol{\lambda}_j \right)_i \Big)
+        - \frac{\gamma_i}{\mu} \right)
     \hspace{3mm} && \forall i\in\mathcal{I} \\
     \boldsymbol{z}_j &\leftarrow \Pi_{\mathcal{P}_{d_j}}\left(
-        \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{\lambda}_j \right)
+        \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \frac{\boldsymbol{\lambda}_j}{\mu} \right)
     \hspace{3mm} && \forall j\in\mathcal{J} \\
     \boldsymbol{\lambda}_j &\leftarrow \boldsymbol{\lambda}_j
         + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}}
@@ -794,9 +797,7 @@ and the $\tilde{\boldsymbol{c}}$-update can be computed analytically \cite[Sec.
     \hspace{3mm} && \forall j\in\mathcal{J}
 .\end{alignat*}
 %
-\todo{$\tilde{c}_i$-update With or without projection onto $\left[ 0, 1 \right] ^n$?}
-%
-One thing to note is that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously,
+It should be noted that all of the $\boldsymbol{z}_j$-updates can be computed simultaneously,
 as they are independent of one another.
 The same is true for the updates of the individual components of $\tilde{\boldsymbol{c}}$.
 
@@ -809,13 +810,7 @@ Effectively, all of the $\left|\mathcal{J}\right|$ parity constraints are
 able to be handled at the same time.
 This can also be understood by interpreting the decoding process as a message-passing
 algorithm \cite[Sec. III. D.]{original_admm}, \cite[Sec. II. B.]{efficient_lp_dec_admm},
-as is shown in figure \ref{fig:lp:message_passing} 
-\footnote{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$ are additional parameters
-defining the tolerances for the stopping criteria of the algorithm.
-The variable $\boldsymbol{z}_j^\prime$ denotes the value of
-$\boldsymbol{z}_j$ in the previous iteration.}%
-\todo{Move footnote to figure caption}%
-.%
+as is shown in figure \ref{fig:lp:message_passing}.%
 \todo{Explicitly specify sections?}%
 %
 \begin{figure}[H]
@@ -828,26 +823,35 @@ Initialize $\tilde{\boldsymbol{c}}, \boldsymbol{z}_{[1:m]}$ and $\boldsymbol{\la
 while $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{T}_j\tilde{\boldsymbol{c}} - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{pri}}$ or $\sum_{j\in\mathcal{J}} \lVert \boldsymbol{z}^\prime_j - \boldsymbol{z}_j \rVert_2 \ge \epsilon_{\text{dual}}$ do
     for all $j$ in $\mathcal{J}$ do
         $\boldsymbol{z}_j \leftarrow \Pi_{\mathcal{P}_{d_j}}\left(
-            \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \boldsymbol{\lambda}_j \right)$
-       $\boldsymbol{\lambda}_j \leftarrow \boldsymbol{\lambda}_j
+            \boldsymbol{T}_j\tilde{\boldsymbol{c}} + \frac{\boldsymbol{\lambda}_j}{\mu} \right)$
+        $\boldsymbol{\lambda}_j \leftarrow \boldsymbol{\lambda}_j
             + \mu\left( \boldsymbol{T}_j\tilde{\boldsymbol{c}}
             - \boldsymbol{z}_j \right)$
     end for
     for all $i$ in $\mathcal{I}$ do
         $\tilde{c}_i \leftarrow \frac{1}{\left| N_v\left( i \right) \right|} \left( 
-            \sum_{j\in N_v\left( i \right) } \Big( \left( \boldsymbol{\lambda}_j \right)_i
-                - \left( \boldsymbol{z}_j \right)_i \Big) - \frac{\gamma_i}{\mu} \right)$
+            \sum_{j\in N_v\left( i \right) } \Big(
+                \left( \boldsymbol{z}_j \right)_i - \frac{1}{\mu} \left( \boldsymbol{\lambda}_j
+                \right)_i
+            \Big) - \frac{\gamma_i}{\mu} \right)$
     end for
 end while
     \end{genericAlgorithm}
 
-    \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm}
+    \caption{\ac{LP} decoding using \ac{ADMM} interpreted as a message passing algorithm%
+        \protect\footnotemark{}}
     \label{fig:lp:message_passing}
 \end{figure}%
 %
-\noindent The $\tilde{c}_i$-updates can be understood as a variable-node update step,
-and the $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be understood as
-a check-node update step.
+\footnotetext{$\epsilon_{\text{pri}} > 0$ and $\epsilon_{\text{dual}} > 0$
+are additional parameters
+defining the tolerances for the stopping criteria of the algorithm.
+The variable $\boldsymbol{z}_j^\prime$ denotes the value of
+$\boldsymbol{z}_j$ in the previous iteration.}%
+%
+\noindent The $\boldsymbol{z}_j$- and $\boldsymbol{\lambda}_j$-updates can be understood as
+a check-node update step (lines $3$-$6$) and the $\tilde{c}_i$-updates can be understood as
+a variable-node update step (lines $7$-$9$ in figure \ref{fig:lp:message_passing}).
 The updates for each variable- and check-node can be perfomed in parallel.
 With this interpretation it becomes clear why \ac{LP} decoding using \ac{ADMM}
 is able to achieve similar computational complexity to \ac{BP}.
@@ -948,17 +952,13 @@ $L \left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) = -\ln\left(
 %
 \begin{align*}
     \hat{\boldsymbol{x}} &= \argmax_{\tilde{\boldsymbol{x}} \in \mathbb{R}^{n}}
-        \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) }
-        \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\
-        &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \left(
-        L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
-        + \gamma h\left( \tilde{\boldsymbol{x}} \right) 
-        \right)%
+            \mathrm{e}^{- L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right) }
+            \mathrm{e}^{-\gamma h\left( \tilde{\boldsymbol{x}} \right) } \\
+        &= \argmin_{\tilde{\boldsymbol{x}} \in \mathbb{R}^n} \big(
+            L\left( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \right)
+            + \gamma h\left( \tilde{\boldsymbol{x}} \right) 
+            \big)%
 .\end{align*}%
-\todo{\textbackslash left($\cdot$ \textbackslash right)\\
-$\rightarrow$\\
-\textbackslash big( $\cdot$ \textbackslash big)\\
-?}%
 %
 Thus, with proximal decoding, the objective function
 $g\left( \tilde{\boldsymbol{x}} \right)$ considered is%
@@ -1011,7 +1011,6 @@ It is then immediately approximated with gradient-descent:%
     \hspace{5mm} \gamma > 0, \text{ small}
 .\end{align*}%
 %
-\todo{explicitly state $\nabla h$?}
 The second step thus becomes%
 %
 \begin{align*}