Added first part of explanation of ADMM

2023-03-16 17:19:49 +01:00 · 2023-03-16 17:19:49 +01:00 · 355d789cef
commit 355d789cef
parent 2f87165c0d
2 changed files with 109 additions and 4 deletions
--- a/latex/thesis/bibliography.bib
+++ b/latex/thesis/bibliography.bib
@ -132,3 +132,19 @@
  doi={10.1109/TIT.2013.2281372}
 }

+@book{intro_to_lin_opt_book,
+  title={Introduction to linear optimization},
+  author={Bertsimas, Dimitris and Tsitsiklis, John N},
+  volume={6},
+  year={1997},
+  publisher={Athena scientific Belmont, MA},
+  isbn={978-1-886529-19-9}
+}
+
+@BOOK{admm_distr_stats,
+  author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan},
+  booktitle={Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers},
+  year={2011},
+  url={https://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf}
+}
+
--- a/latex/thesis/chapters/theoretical_background.tex
+++ b/latex/thesis/chapters/theoretical_background.tex
@ -101,8 +101,97 @@ Lastly, the optimization methods utilized are described.
 \section{Optimization Methods}
 \label{sec:theo:Optimization Methods}

-\begin{itemize}
-    \item \Ac{ADMM}
-    \item Proximal gradient method
-\end{itemize}
+Generally, any linear program \todo{Acronym} can be expressed in \textit{standard form}%
+\todo{Citation needed}%
+\footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be
+interpreted componentwise.}%
+:%
+%
+\begin{alignat}{3}
+    \begin{alignedat}{3}
+        \text{minimize }\hspace{2mm}   && \boldsymbol{\gamma}^\text{T} \boldsymbol{x}         \\
+        \text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x}   & = \boldsymbol{b}   \\
+                                       &&               \boldsymbol{x}   & \ge \boldsymbol{0}.
+    \end{alignedat}
+    \label{eq:theo:admm_standard}
+\end{alignat}%
+%
+A technique called \textit{lagrangian relaxation} can then be applied - some of the
+constraints are moved into the objective function itself and the weights
+$\boldsymbol{\lambda}$ are introduced. A new, relaxed problem is formulated:
+%
+\begin{align}
+    \begin{aligned}
+        \text{minimize }\hspace{2mm}   & \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
+            + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
+                - \boldsymbol{A}\boldsymbol{x} \right)  \\
+        \text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0},
+    \end{aligned}
+    \label{eq:theo:admm_relaxed}
+\end{align}%
+%
+the new objective function being the \textit{lagrangian}%
+%
+\begin{align*}
+\mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right)
+    = \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
+        + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b}
+            - \boldsymbol{A}\boldsymbol{x} \right)
+.\end{align*}%
+
+This problem is not directly equivalent to the original one, as the
+solution now depends on the choice of the \textit{lagrange multipliers}
+$\boldsymbol{\lambda}$.
+Interestingly, for our particular class of problems,
+the optimal objective of the relaxed problem (\ref{eq:theo:admm_relaxed}) is a lower bound for
+the optimal objective of the original problem (\ref{eq:theo:admm_standard})
+\cite[Sec. 4.1]{intro_to_lin_opt_book}:%
+%
+\begin{align*}
+    \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \phantom{a}}}
+        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda}
+        \right)
+    \le
+    \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x}
+            = \boldsymbol{b}}}
+        \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
+.\end{align*}
+%
+Furthermore, for linear programs \textit{strong duality}
+always holds.
+\todo{Citation needed}
+This means that not only is it a lower bound, the tightest lower
+bound actually reaches the value itself:
+%
+\begin{align*}
+    \max_{\boldsymbol{\lambda}} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}}
+        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right) 
+    = \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x}
+            = \boldsymbol{b}}}
+        \boldsymbol{\gamma}^\text{T}\boldsymbol{x}
+.\end{align*}
+%
+In other words, with the optimal choice of $\boldsymbol{\lambda}$,
+the optimal objectives of the problems (\ref{eq:theo:admm_standard})
+and (\ref{eq:theo:admm_relaxed}) have the same value.
+Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:%
+%
+\begin{align}
+    \text{maximize }\hspace{2mm} & \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L}
+        \left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right)
+    \label{eq:theo:dual}
+,\end{align}
+%
+and recover the optimal point $\boldsymbol{x}_{\text{opt}}$
+(the solution to problem (\ref{eq:theo:admm_standard}))
+from the dual optimal point $\boldsymbol{\lambda}_\text{opt}$
+(the solution to problem (\ref{eq:theo:dual}))
+by computing \cite[Sec. 2.1]{admm_distr_stats}%
+%
+\begin{align}
+    \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x}}
+        \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b},
+            \boldsymbol{\lambda}_{\text{opt}} \right)
+    \label{eq:theo:admm_obtain_primal}
+.\end{align}