Added first part of explanation of ADMM

2023-03-16 17:19:49 +01:00 · 2023-03-16 17:19:49 +01:00 · 355d789cef
commit 355d789cef
parent 2f87165c0d
2 changed files with 109 additions and 4 deletions
--- a/latex/thesis/bibliography.bib
+++ b/latex/thesis/bibliography.bib
@ -132,3 +132,19 @@
  doi={10.1109/TIT.2013.2281372}
 }
@book{intro_to_lin_opt_book,
  title={Introduction to linear optimization},
  author={Bertsimas, Dimitris and Tsitsiklis, John N},
  volume={6},
  year={1997},
  publisher={Athena scientific Belmont, MA},
  isbn={978-1-886529-19-9}
 }
@BOOK{admm_distr_stats,
  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
  booktitle={Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers},
  year={2011},
  url={https://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf}
 }
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@ -101,8 +101,97 @@ Lastly, the optimization methods utilized are described.
 \section{Optimization Methods}
 \label{sec:theo:Optimization Methods}
-\begin{itemize}
+Generally, any linear program \todo{Acronym} can be expressed in \textit{standard form}%
-    \item \Ac{ADMM}
+\todo{Citation needed}%
-    \item Proximal gradient method
+\footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be
-\end{itemize}
+interpreted componentwise.}%
 :%
 %
 \begin{alignat}{3}
    \begin{alignedat}{3}
        \text{minimize }\hspace{2mm}   && \boldsymbol{\gamma}^\text{T} \boldsymbol{x}         \\
        \text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x}   & = \boldsymbol{b}   \\
                                       &&               \boldsymbol{x}   & \ge \boldsymbol{0}.
    \end{alignedat}
    \label{eq:theo:admm_standard}
 \end{alignat}%
 %
 A technique called \textit{lagrangian relaxation} can then be applied - some of the
 constraints are moved into the objective function itself and the weights
 $\boldsymbol{\lambda}$ are introduced. A new, relaxed problem is formulated:
 %
 \begin{align}
    \begin{aligned}
        \text{minimize }\hspace{2mm}   & \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
            + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
                - \boldsymbol{A}\boldsymbol{x} \right)  \\
        \text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0},
    \end{aligned}
    \label{eq:theo:admm_relaxed}
 \end{align}%
 %
 the new objective function being the \textit{lagrangian}%
 %
 \begin{align*}
 \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right)
    = \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
        + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
            - \boldsymbol{A}\boldsymbol{x} \right)
 .\end{align*}%
 This problem is not directly equivalent to the original one, as the
 solution now depends on the choice of the \textit{lagrange multipliers}
 $\boldsymbol{\lambda}$.
 Interestingly, for our particular class of problems,
 the optimal objective of the relaxed problem (\ref{eq:theo:admm_relaxed}) is a lower bound for
 the optimal objective of the original problem (\ref{eq:theo:admm_standard})
 \cite[Sec. 4.1]{intro_to_lin_opt_book}:%
 %
 \begin{align*}
    \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \phantom{a}}}
        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda}
        \right)
    \le
    \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x}
            = \boldsymbol{b}}}
        \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
 .\end{align*}
 %
 Furthermore, for linear programs \textit{strong duality}
 always holds.
 \todo{Citation needed}
 This means that not only is it a lower bound, the tightest lower
 bound actually reaches the value itself:
 %
 \begin{align*}
    \max_{\boldsymbol{\lambda}} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}}
        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right) 
    = \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x}
            = \boldsymbol{b}}}
        \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
 .\end{align*}
 %
 In other words, with the optimal choice of $\boldsymbol{\lambda}$,
 the optimal objectives of the problems (\ref{eq:theo:admm_standard})
 and (\ref{eq:theo:admm_relaxed}) have the same value.
 Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:%
 %
 \begin{align}
    \text{maximize }\hspace{2mm} & \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L}
        \left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right)
    \label{eq:theo:dual}
 ,\end{align}
 %
 and recover the optimal point $\boldsymbol{x}_{\text{opt}}$
 (the solution to problem (\ref{eq:theo:admm_standard}))
 from the dual optimal point $\boldsymbol{\lambda}_\text{opt}$
 (the solution to problem (\ref{eq:theo:dual}))
 by computing \cite[Sec. 2.1]{admm_distr_stats}%
 %
 \begin{align}
    \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x}}
        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b},
            \boldsymbol{\lambda}_{\text{opt}} \right)
    \label{eq:theo:admm_obtain_primal}
 .\end{align}