diff --git a/latex/thesis/bibliography.bib b/latex/thesis/bibliography.bib index 6a859dc..e4effd3 100644 --- a/latex/thesis/bibliography.bib +++ b/latex/thesis/bibliography.bib @@ -132,3 +132,19 @@ doi={10.1109/TIT.2013.2281372} } +@book{intro_to_lin_opt_book, + title={Introduction to linear optimization}, + author={Bertsimas, Dimitris and Tsitsiklis, John N}, + volume={6}, + year={1997}, + publisher={Athena scientific Belmont, MA}, + isbn={978-1-886529-19-9} +} + +@BOOK{admm_distr_stats, + author={Boyd, Stephen and Parikh, Neal and Chu, Eric and Peleato, Borja and Eckstein, Jonathan}, + booktitle={Distributed Optimization and Statistical Learning via the Alternating Direction Method of Multipliers}, + year={2011}, + url={https://web.stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf} +} + diff --git a/latex/thesis/chapters/theoretical_background.tex b/latex/thesis/chapters/theoretical_background.tex index 5b47ea1..2a3a2a6 100644 --- a/latex/thesis/chapters/theoretical_background.tex +++ b/latex/thesis/chapters/theoretical_background.tex @@ -101,8 +101,97 @@ Lastly, the optimization methods utilized are described. \section{Optimization Methods} \label{sec:theo:Optimization Methods} -\begin{itemize} - \item \Ac{ADMM} - \item Proximal gradient method -\end{itemize} +Generally, any linear program \todo{Acronym} can be expressed in \textit{standard form}% +\todo{Citation needed}% +\footnote{The inequality $\boldsymbol{x} \ge \boldsymbol{0}$ is to be +interpreted componentwise.}% +:% +% +\begin{alignat}{3} + \begin{alignedat}{3} + \text{minimize }\hspace{2mm} && \boldsymbol{\gamma}^\text{T} \boldsymbol{x} \\ + \text{subject to }\hspace{2mm} && \boldsymbol{A}\boldsymbol{x} & = \boldsymbol{b} \\ + && \boldsymbol{x} & \ge \boldsymbol{0}. + \end{alignedat} + \label{eq:theo:admm_standard} +\end{alignat}% +% +A technique called \textit{lagrangian relaxation} can then be applied - some of the +constraints are moved into the objective function itself and the weights +$\boldsymbol{\lambda}$ are introduced. A new, relaxed problem is formulated: +% +\begin{align} + \begin{aligned} + \text{minimize }\hspace{2mm} & \boldsymbol{\gamma}^\text{T}\boldsymbol{x} + + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b} + - \boldsymbol{A}\boldsymbol{x} \right) \\ + \text{subject to }\hspace{2mm} & \boldsymbol{x} \ge \boldsymbol{0}, + \end{aligned} + \label{eq:theo:admm_relaxed} +\end{align}% +% +the new objective function being the \textit{lagrangian}% +% +\begin{align*} +\mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right) + = \boldsymbol{\gamma}^\text{T}\boldsymbol{x} + + \boldsymbol{\lambda}^\text{T}\left(\boldsymbol{b} + - \boldsymbol{A}\boldsymbol{x} \right) +.\end{align*}% + +This problem is not directly equivalent to the original one, as the +solution now depends on the choice of the \textit{lagrange multipliers} +$\boldsymbol{\lambda}$. +Interestingly, for our particular class of problems, +the optimal objective of the relaxed problem (\ref{eq:theo:admm_relaxed}) is a lower bound for +the optimal objective of the original problem (\ref{eq:theo:admm_standard}) +\cite[Sec. 4.1]{intro_to_lin_opt_book}:% +% +\begin{align*} + \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \phantom{a}}} + \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} + \right) + \le + \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x} + = \boldsymbol{b}}} + \boldsymbol{\gamma}^\text{T}\boldsymbol{x} +.\end{align*} +% +Furthermore, for linear programs \textit{strong duality} +always holds. +\todo{Citation needed} +This means that not only is it a lower bound, the tightest lower +bound actually reaches the value itself: +% +\begin{align*} + \max_{\boldsymbol{\lambda}} \, \min_{\boldsymbol{x} \ge \boldsymbol{0}} + \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right) + = \min_{\substack{\boldsymbol{x} \ge \boldsymbol{0} \\ \boldsymbol{A}\boldsymbol{x} + = \boldsymbol{b}}} + \boldsymbol{\gamma}^\text{T}\boldsymbol{x} +.\end{align*} +% +In other words, with the optimal choice of $\boldsymbol{\lambda}$, +the optimal objectives of the problems (\ref{eq:theo:admm_standard}) +and (\ref{eq:theo:admm_relaxed}) have the same value. +Thus, we can define the \textit{dual problem} as the search for the tightest lower bound:% +% +\begin{align} + \text{maximize }\hspace{2mm} & \min_{\boldsymbol{x} \ge \boldsymbol{0}} \mathcal{L} + \left( \boldsymbol{x}, \boldsymbol{b}, \boldsymbol{\lambda} \right) + \label{eq:theo:dual} +,\end{align} +% +and recover the optimal point $\boldsymbol{x}_{\text{opt}}$ +(the solution to problem (\ref{eq:theo:admm_standard})) +from the dual optimal point $\boldsymbol{\lambda}_\text{opt}$ +(the solution to problem (\ref{eq:theo:dual})) +by computing \cite[Sec. 2.1]{admm_distr_stats}% +% +\begin{align} + \boldsymbol{x}_{\text{opt}} = \argmin_{\boldsymbol{x}} + \mathcal{L}\left( \boldsymbol{x}, \boldsymbol{b}, + \boldsymbol{\lambda}_{\text{opt}} \right) + \label{eq:theo:admm_obtain_primal} +.\end{align}