From 8eb206256da35a829ed408f1154d11acd39daee9 Mon Sep 17 00:00:00 2001
From: Andreas Tsouchlos <an.tsouchlos@gmail.com>
Date: Thu, 13 Apr 2023 01:37:46 +0200
Subject: [PATCH] Added discussion and fixed bibliography

---
 latex/thesis/bibliography.bib                 | 34 ++++++++--------
 latex/thesis/chapters/discussion.tex          | 39 +++++++++++++++++--
 latex/thesis/chapters/lp_dec_using_admm.tex   |  2 +
 .../chapters/theoretical_background.tex       |  6 +--
 4 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib
index 44cee94..d9a0de3 100644
--- a/latex/thesis/bibliography.bib
+++ b/latex/thesis/bibliography.bib
@@ -6,6 +6,7 @@
     year        = {2003},
     url         = {https://dspace.mit.edu/handle/1721.1/42831},
 }
+
 @article{proximal_paper,
     title={Proximal Decoding for LDPC Codes},
     author={Tadashi Wadayama and Satoshi Takabe},
@@ -51,6 +52,8 @@
     url    = {http://www.inference.org.uk/mackay/codes/data.html}
 }
 
+
+
 @article{proximal_algorithms,
     author = {Parikh, Neal and Boyd, Stephen},
     title = {Proximal Algorithms},
@@ -61,8 +64,9 @@
     volume = {1},
     number = {3},
     issn = {2167-3888},
-    url = {https://doi.org/10.1561/2400000003},
-    doi = {10.1561/2400000003},
+%    url = {https://doi.org/10.1561/2400000003},
+    url={https://ieeexplore.ieee.org/document/8187362},
+%    doi = {10.1561/2400000003},
     journal = {Found. Trends Optim.},
     month = {1},
     pages = {127–239},
@@ -77,15 +81,16 @@
     institution = {KIT},
 }
 
-@book{distr_opt_book,
-  author    = {Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
-  title = {Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers},
-  year      = {2011},
-  volume    = {},
-  number    = {},
-  pages     = {},
-  doi       = {},
-  url       = {https://ieeexplore.ieee.org/document/8186925},
+@article{distr_opt_book,
+  title={Distributed optimization and statistical learning via the alternating direction method of multipliers},
+  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan and others},
+  journal={Foundations and Trends in Machine learning},
+  volume={3},
+  number={1},
+  pages={1--122},
+  year={2011},
+  publisher={Now Publishers, Inc.},
+  url= {https://ieeexplore.ieee.org/document/8186925}
 }
 
 @INPROCEEDINGS{efficient_lp_dec_admm,
@@ -141,13 +146,6 @@
   isbn={978-1-886529-19-9}
 }
 
-@BOOK{admm_distr_stats,
-  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
-  booktitle={Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers},
-  year={2011},
-  url={https://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf}
-}
-
 @INPROCEEDINGS{alp,
   author={Taghavi, Mohammad H. and Siegel, Paul H.},
   booktitle={2006 IEEE International Symposium on Information Theory}, 
diff --git a/latex/thesis/chapters/discussion.tex b/latex/thesis/chapters/discussion.tex
index 15b3fca..7506f19 100644
--- a/latex/thesis/chapters/discussion.tex
+++ b/latex/thesis/chapters/discussion.tex
@@ -1,8 +1,39 @@
 \chapter{Discussion}%
 \label{chapter:discussion}
     
-\begin{itemize}
-    \item Proximal decoding improvement limitations
-\end{itemize}
-%   - Improvement pitfalls
+While the modified proximal decoding algorithm presented in section
+\ref{sec:prox:Improved Implementation} shows some promising results, further
+investigation is required to determine how different choices of parameters
+affect the decoding performance.
+Additionally, a more mathematically rigorous foundation for determining the
+potentially wrong components of the estimate is desirable.
+
+As mentioned in section \ref{subsec:prox:conv_properties}, the alternating
+minimization of the two gradients in the proximal decoding algorithm leads to
+an oscillation after a number of iterations.
+One approach to alleviate this problem might be to use \ac{ADMM} instead of
+the proximal gradient method to solve the optimization problem.
+This is because due to the introduction of the dual variable, the minimization
+of each part of the objective function would no longer take place with regard
+to the same exact variable.
+Additionally, ``\ac{ADMM} will converge even when the x- and z-minimization
+steps are not carried out exactly [\ldots]''
+\cite[Sec. 3.4.4]{distr_opt_book}, which is advantageous, as the
+constraints are never truly satisfied; not even after the minimization step
+dealing with the constraint part of the objective function.
+Despite this, an initial examination by Yanxia Lu in
+\cite[Sec. 4.2.4.]{yanxia_lu_thesis} shows only limited success.
+
+Another interesting approach might be the combination of proximal and \ac{LP}
+decoding.
+Performing an initial number of iterations using proximal decoding to obtain
+a rough first estimate and subsequently using \ac{LP} decoding with only the
+violated constraints may be a way to achieve a shorter running time, because
+of the low-complexity nature of proximal decoding.
+This could be usefull, for example, to mitigate the slow convergence of
+\ac{ADMM} \cite[3.2.2]{distr_opt_book}.
+Subsequently introducing additional parity checks might be a way of combining
+the best properties of proximal decoding, \ac{LP} decoding using \ac{ADMM} and
+\textit{adaptive \ac{LP} decoding} \cite{alp} to obtain a decoder relatively
+efficiently approximating \ac{ML} performance.
 
diff --git a/latex/thesis/chapters/lp_dec_using_admm.tex b/latex/thesis/chapters/lp_dec_using_admm.tex
index 6613a8c..29e55ee 100644
--- a/latex/thesis/chapters/lp_dec_using_admm.tex
+++ b/latex/thesis/chapters/lp_dec_using_admm.tex
@@ -527,6 +527,8 @@ The resulting formulation of the relaxed optimization problem becomes%
     \end{aligned} \label{eq:lp:relaxed_formulation}
 \end{align}%
 
+\todo{Mention ML certificate property}
+
 
 %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 \section{Decoding Algorithm}%
diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex
index 75801a5..d6748e8 100644
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@@ -570,7 +570,7 @@ Thus, we can define the \textit{dual problem} as the search for the tightest low
 %
 and recover the solution $\boldsymbol{x}_{\text{opt}}$ to problem (\ref{eq:theo:admm_standard})
 from the solution $\boldsymbol{\lambda}_\text{opt}$ to problem (\ref{eq:theo:dual})
-by computing \cite[Sec. 2.1]{admm_distr_stats}%
+by computing \cite[Sec. 2.1]{distr_opt_book}%
 %
 \begin{align}
     \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x} \ge \boldsymbol{0}}
@@ -582,7 +582,7 @@ by computing \cite[Sec. 2.1]{admm_distr_stats}%
 The dual problem can then be solved iteratively using \textit{dual ascent}: starting with an
 initial estimate for $\boldsymbol{\lambda}$, calculate an estimate for $\boldsymbol{x}$
 using equation (\ref{eq:theo:admm_obtain_primal}); then, update $\boldsymbol{\lambda}$
-using gradient descent \cite[Sec. 2.1]{admm_distr_stats}:%
+using gradient descent \cite[Sec. 2.1]{distr_opt_book}:%
 %
 \begin{align*}
     \boldsymbol{x} &\leftarrow \argmin_{\boldsymbol{x}} \mathcal{L}\left(
@@ -621,7 +621,7 @@ $\boldsymbol{A} = \begin{bmatrix}
     \boldsymbol{A}_N
 \end{bmatrix}$.
 The minimization of each term can then happen in parallel, in a distributed
-fashion \cite[Sec. 2.2]{admm_distr_stats}.
+fashion \cite[Sec. 2.2]{distr_opt_book}.
 In each minimization step, only one subvector $\boldsymbol{x}_i$ of
 $\boldsymbol{x}$ is considered, regarding all other subvectors as being
 constant.