From 8eb206256da35a829ed408f1154d11acd39daee9 Mon Sep 17 00:00:00 2001 From: Andreas Tsouchlos Date: Thu, 13 Apr 2023 01:37:46 +0200 Subject: [PATCH] Added discussion and fixed bibliography --- latex/thesis/bibliography.bib | 34 ++++++++-------- latex/thesis/chapters/discussion.tex | 39 +++++++++++++++++-- latex/thesis/chapters/lp_dec_using_admm.tex | 2 + .../chapters/theoretical_background.tex | 6 +-- 4 files changed, 56 insertions(+), 25 deletions(-) diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib index 44cee94..d9a0de3 100644 --- a/latex/thesis/bibliography.bib +++ b/latex/thesis/bibliography.bib @@ -6,6 +6,7 @@ year = {2003}, url = {https://dspace.mit.edu/handle/1721.1/42831}, } + @article{proximal_paper, title={Proximal Decoding for LDPC Codes}, author={Tadashi Wadayama and Satoshi Takabe}, @@ -51,6 +52,8 @@ url = {http://www.inference.org.uk/mackay/codes/data.html} } + + @article{proximal_algorithms, author = {Parikh, Neal and Boyd, Stephen}, title = {Proximal Algorithms}, @@ -61,8 +64,9 @@ volume = {1}, number = {3}, issn = {2167-3888}, - url = {https://doi.org/10.1561/2400000003}, - doi = {10.1561/2400000003}, +% url = {https://doi.org/10.1561/2400000003}, + url={https://ieeexplore.ieee.org/document/8187362}, +% doi = {10.1561/2400000003}, journal = {Found. Trends Optim.}, month = {1}, pages = {127–239}, @@ -77,15 +81,16 @@ institution = {KIT}, } -@book{distr_opt_book, - author = {Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan}, - title = {Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers}, - year = {2011}, - volume = {}, - number = {}, - pages = {}, - doi = {}, - url = {https://ieeexplore.ieee.org/document/8186925}, +@article{distr_opt_book, + title={Distributed optimization and statistical learning via the alternating direction method of multipliers}, + author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan and others}, + journal={Foundations and Trends in Machine learning}, + volume={3}, + number={1}, + pages={1--122}, + year={2011}, + publisher={Now Publishers, Inc.}, + url= {https://ieeexplore.ieee.org/document/8186925} } @INPROCEEDINGS{efficient_lp_dec_admm, @@ -141,13 +146,6 @@ isbn={978-1-886529-19-9} } -@BOOK{admm_distr_stats, - author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan}, - booktitle={Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers}, - year={2011}, - url={https://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf} -} - @INPROCEEDINGS{alp, author={Taghavi, Mohammad H. and Siegel, Paul H.}, booktitle={2006 IEEE International Symposium on Information Theory}, diff --git a/latex/thesis/chapters/discussion.tex b/latex/thesis/chapters/discussion.tex index 15b3fca..7506f19 100644 --- a/latex/thesis/chapters/discussion.tex +++ b/latex/thesis/chapters/discussion.tex @@ -1,8 +1,39 @@ \chapter{Discussion}% \label{chapter:discussion} -\begin{itemize} - \item Proximal decoding improvement limitations -\end{itemize} -% - Improvement pitfalls +While the modified proximal decoding algorithm presented in section +\ref{sec:prox:Improved Implementation} shows some promising results, further +investigation is required to determine how different choices of parameters +affect the decoding performance. +Additionally, a more mathematically rigorous foundation for determining the +potentially wrong components of the estimate is desirable. + +As mentioned in section \ref{subsec:prox:conv_properties}, the alternating +minimization of the two gradients in the proximal decoding algorithm leads to +an oscillation after a number of iterations. +One approach to alleviate this problem might be to use \ac{ADMM} instead of +the proximal gradient method to solve the optimization problem. +This is because due to the introduction of the dual variable, the minimization +of each part of the objective function would no longer take place with regard +to the same exact variable. +Additionally, ``\ac{ADMM} will converge even when the x- and z-minimization +steps are not carried out exactly [\ldots]'' +\cite[Sec. 3.4.4]{distr_opt_book}, which is advantageous, as the +constraints are never truly satisfied; not even after the minimization step +dealing with the constraint part of the objective function. +Despite this, an initial examination by Yanxia Lu in +\cite[Sec. 4.2.4.]{yanxia_lu_thesis} shows only limited success. + +Another interesting approach might be the combination of proximal and \ac{LP} +decoding. +Performing an initial number of iterations using proximal decoding to obtain +a rough first estimate and subsequently using \ac{LP} decoding with only the +violated constraints may be a way to achieve a shorter running time, because +of the low-complexity nature of proximal decoding. +This could be usefull, for example, to mitigate the slow convergence of +\ac{ADMM} \cite[3.2.2]{distr_opt_book}. +Subsequently introducing additional parity checks might be a way of combining +the best properties of proximal decoding, \ac{LP} decoding using \ac{ADMM} and +\textit{adaptive \ac{LP} decoding} \cite{alp} to obtain a decoder relatively +efficiently approximating \ac{ML} performance. diff --git a/latex/thesis/chapters/lp_dec_using_admm.tex b/latex/thesis/chapters/lp_dec_using_admm.tex index 6613a8c..29e55ee 100644 --- a/latex/thesis/chapters/lp_dec_using_admm.tex +++ b/latex/thesis/chapters/lp_dec_using_admm.tex @@ -527,6 +527,8 @@ The resulting formulation of the relaxed optimization problem becomes% \end{aligned} \label{eq:lp:relaxed_formulation} \end{align}% +\todo{Mention ML certificate property} + %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Decoding Algorithm}% diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex index 75801a5..d6748e8 100644 --- a/latex/thesis/chapters/theoretical_background.tex +++ b/latex/thesis/chapters/theoretical_background.tex @@ -570,7 +570,7 @@ Thus, we can define the \textit{dual problem} as the search for the tightest low % and recover the solution $\boldsymbol{x}_{\text{opt}}$ to problem (\ref{eq:theo:admm_standard}) from the solution $\boldsymbol{\lambda}_\text{opt}$ to problem (\ref{eq:theo:dual}) -by computing \cite[Sec. 2.1]{admm_distr_stats}% +by computing \cite[Sec. 2.1]{distr_opt_book}% % \begin{align} \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x} \ge \boldsymbol{0}} @@ -582,7 +582,7 @@ by computing \cite[Sec. 2.1]{admm_distr_stats}% The dual problem can then be solved iteratively using \textit{dual ascent}: starting with an initial estimate for $\boldsymbol{\lambda}$, calculate an estimate for $\boldsymbol{x}$ using equation (\ref{eq:theo:admm_obtain_primal}); then, update $\boldsymbol{\lambda}$ -using gradient descent \cite[Sec. 2.1]{admm_distr_stats}:% +using gradient descent \cite[Sec. 2.1]{distr_opt_book}:% % \begin{align*} \boldsymbol{x} &\leftarrow \argmin_{\boldsymbol{x}} \mathcal{L}\left( @@ -621,7 +621,7 @@ $\boldsymbol{A} = \begin{bmatrix} \boldsymbol{A}_N \end{bmatrix}$. The minimization of each term can then happen in parallel, in a distributed -fashion \cite[Sec. 2.2]{admm_distr_stats}. +fashion \cite[Sec. 2.2]{distr_opt_book}. In each minimization step, only one subvector $\boldsymbol{x}_i$ of $\boldsymbol{x}$ is considered, regarding all other subvectors as being constant.