diff --git a/letter.tex b/letter.tex index a651373..f90a4f4 100644 --- a/letter.tex +++ b/letter.tex @@ -9,6 +9,7 @@ \usepackage{dsfont} \usepackage{mleftright} \usepackage{bbm} +\usepackage{bm} \usepackage[ backend=biber, style=ieee, @@ -201,43 +202,43 @@ codewords, the lengths of which are denoted by $k \in \mathbb{N}$ and $n \in \mathbb{N}$, respectively, with $k \le n$. The set of codewords $\mathcal{C} \subset \mathbb{F}_2^n$ of a binary linear code can be represented using the parity-check matrix -$\boldsymbol{H} \in \mathbb{F}_2^{m \times n} $, where $m$ represents the +$\bm{H} \in \mathbb{F}_2^{m \times n} $, where $m$ represents the number of parity-checks: % \begin{align*} - \mathcal{C} := \left\{ \boldsymbol{c} \in \mathbb{F}_2^n : - \boldsymbol{H}\boldsymbol{c}^\text{T} = \boldsymbol{0} \right\} + \mathcal{C} := \left\{ \bm{c} \in \mathbb{F}_2^n : + \bm{H}\bm{c}^\text{T} = \bm{0} \right\} \end{align*} % The check nodes $j \in \mathcal{J}:=\left\{1, \ldots, m\right\}$ each -correspond to a parity check, i.e., a row of $\boldsymbol{H}$. +correspond to a parity check, i.e., a row of $\bm{H}$. The variable nodes $i \in \mathcal{I}:=\left\{1, \ldots, n\right\}$ correspond to the components of a codeword being subjected to a parity check, i.e., -to the columns of $\boldsymbol{H}$. +to the columns of $\bm{H}$. The neighborhood of a parity check $j$, i.e., the set of indices of components relevant for the according parity check, is denoted by -$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \boldsymbol{H}\negthinspace_{j,i} = 1 \right\}, +$\mathcal{N}_c(j) := \left\{i \in \mathcal{I}: \bm{H}\negthinspace_{j,i} = 1 \right\}, \hspace{2mm} j \in \mathcal{J}$. -In order to transmit a codeword $\boldsymbol{c} \in \mathbb{F}_2^n$, it is +In order to transmit a codeword $\bm{c} \in \mathbb{F}_2^n$, it is mapped onto a \textit{binary phase shift keying} (BPSK) symbol via -$\boldsymbol{x} = 1 - 2\boldsymbol{c}$, with -$ \boldsymbol{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an +$\bm{x} = 1 - 2\bm{c}$, with +$ \bm{x} \in \left\{\pm 1\right\}^n$, which is then transmitted over an AWGN channel. -The received vector $\boldsymbol{y} \in \mathbb{R}^n$ is decoded to obtain an +The received vector $\bm{y} \in \mathbb{R}^n$ is decoded to obtain an estimate of the transmitted codeword, denoted as -$\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$. -A distinction is made between $\boldsymbol{x} \in \left\{\pm 1\right\}^n$ -and $\tilde{\boldsymbol{x}} \in \mathbb{R}^n$, +$\hat{\bm{c}} \in \mathbb{F}_2^n$. +A distinction is made between $\bm{x} \in \left\{\pm 1\right\}^n$ +and $\tilde{\bm{x}} \in \mathbb{R}^n$, the former denoting the BPSK symbol physically transmitted over the channel and the latter being used as a variable during the optimization process. -The posterior probability of having transmitted $\boldsymbol{x}$ when receiving -$\boldsymbol{y}$ is expressed as a \textit{probability mass function} (PMF) -$P_{\boldsymbol{X}\mid\boldsymbol{Y}}(\boldsymbol{x} \mid \boldsymbol{y})$. -Likewise, the likelihood of receiving $\boldsymbol{y}$ upon transmitting -$\boldsymbol{x}$ is expressed as a \textit{probability density function} (PDF) -$f_{\boldsymbol{Y}\mid\boldsymbol{X}}(\boldsymbol{y} \mid \boldsymbol{x})$. +The posterior probability of having transmitted $\bm{x}$ when receiving +$\bm{y}$ is expressed as a \textit{probability mass function} (PMF) +$P_{\bm{X}\mid\bm{Y}}(\bm{x} \mid \bm{y})$. +Likewise, the likelihood of receiving $\bm{y}$ upon transmitting +$\bm{x}$ is expressed as a \textit{probability density function} (PDF) +$f_{\bm{Y}\mid\bm{X}}(\bm{y} \mid \bm{x})$. %%%%%%%%%%%%%%%%%%%%% @@ -258,7 +259,7 @@ One such expression, formulated under the assumption of BPSK, is the \textit{code-constraint polynomial} \cite{proximal_paper} % \begin{align*} - h( \tilde{\boldsymbol{x}} ) = + h( \tilde{\bm{x}} ) = \underbrace{\sum_{i=1}^{n} \left( \tilde{x}_i^2-1 \right) ^2}_{\text{Bipolar constraint}} + \underbrace{\sum_{j=1}^{m} \left[ @@ -274,45 +275,45 @@ information regarding the code. The channel model can be considered using the negative log-likelihood % \begin{align*} - L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) = -\ln\mleft( - f_{\boldsymbol{Y} \mid \tilde{\boldsymbol{X}}} \mleft( - \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) \mright) + L \mleft( \bm{y} \mid \tilde{\bm{x}} \mright) = -\ln\mleft( + f_{\bm{Y} \mid \tilde{\bm{X}}} \mleft( + \bm{y} \mid \tilde{\bm{x}} \mright) \mright) .\end{align*} % The information about the channel and the code are consolidated in the objective function \cite{proximal_paper} % \begin{align*} - g \mleft( \tilde{\boldsymbol{x}} \mright) - = L \mleft( \boldsymbol{y} \mid \tilde{\boldsymbol{x}} \mright) - + \gamma h\mleft( \tilde{\boldsymbol{x}} \mright), + g \mleft( \tilde{\bm{x}} \mright) + = L \mleft( \bm{y} \mid \tilde{\bm{x}} \mright) + + \gamma h\mleft( \tilde{\bm{x}} \mright), \hspace{5mm} \gamma > 0% .\end{align*} % The objective function is minimized using the proximal gradient method, which amounts to iteratively performing two gradient-descent steps \cite{proximal_paper} with the given objective function and considering AWGN channels. -To this end, two helper variables, $\boldsymbol{r}$ and $\boldsymbol{s}$, are +To this end, two helper variables, $\bm{r}$ and $\bm{s}$, are introduced, describing the result of each of the two steps: % \begin{alignat}{3} - \boldsymbol{r} &\leftarrow \boldsymbol{s} - - \omega \mleft( \boldsymbol{s} - \boldsymbol{y} \mright) + \bm{r} &\leftarrow \bm{s} + - \omega \mleft( \bm{s} - \bm{y} \mright) \hspace{5mm }&&\omega > 0 \label{eq:r_update}\\ - \boldsymbol{s} &\leftarrow \boldsymbol{r} - - \gamma \nabla h\mleft( \boldsymbol{r} \mright), + \bm{s} &\leftarrow \bm{r} + - \gamma \nabla h\mleft( \bm{r} \mright), \hspace{5mm} &&\gamma > 0 \label{eq:s_update} .\end{alignat} % -An equation for determining $\nabla h(\boldsymbol{r})$ is given in +An equation for determining $\nabla h(\bm{r})$ is given in \cite{proximal_paper}. -It should be noted that the variables $\boldsymbol{r}$ and $\boldsymbol{s}$ -represent $\tilde{\boldsymbol{x}}$ during different +It should be noted that the variables $\bm{r}$ and $\bm{s}$ +represent $\tilde{\bm{x}}$ during different stages of the decoding process. As the gradient of the code-constraint polynomial can attain very large values in some cases, an additional step is introduced to ensure numerical stability: -every current estimate $\boldsymbol{s}$ is projected onto +every current estimate $\bm{s}$ is projected onto $\left[-\eta, \eta\right]^n$ by a projection $\Pi_\eta : \mathbb{R}^n \rightarrow \left[-\eta, \eta\right]^n$, where $\eta$ is a positive constant slightly larger than one, e.g., $\eta = 1.5$. @@ -324,16 +325,16 @@ presented in Algorithm \ref{alg:proximal_decoding}. \label{alg:proximal_decoding} \begin{algorithmic} - \STATE $\boldsymbol{s} \leftarrow \boldsymbol{0}$ + \STATE $\bm{s} \leftarrow \bm{0}$ \STATE \textbf{for} $K$ iterations \textbf{do} - \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $ - \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$ - \STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \boldsymbol{s} \le 0 \right\}}$ - \STATE \hspace{5mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do} - \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$ + \STATE \hspace{5mm} $\bm{r} \leftarrow \bm{s} - \omega \left( \bm{s} - \bm{y} \right) $ + \STATE \hspace{5mm} $\bm{s} \leftarrow \Pi_\eta \left(\bm{r} - \gamma \nabla h\left( \bm{r} \right) \right)$ + \STATE \hspace{5mm} $\bm{\hat{c}} \leftarrow \mathbbm{1}_{\left\{ \bm{s} \le 0 \right\}}$ + \STATE \hspace{5mm} \textbf{if} $\bm{H}\bm{\hat{c}} = \bm{0}$ \textbf{do} + \STATE \hspace{10mm} \textbf{return} $\bm{\hat{c}}$ \STATE \hspace{5mm} \textbf{end if} \STATE \textbf{end for} - \STATE \textbf{return} $\boldsymbol{\hat{c}}$ + \STATE \textbf{return} $\bm{\hat{c}}$ \end{algorithmic} \end{algorithm} @@ -481,8 +482,8 @@ optimization process.% at={(0.9775,0.97)},anchor=north east}] \addlegendimage{mark=none} \addlegendentry{ - $\nabla L\left(\boldsymbol{y} - \mid \tilde{\boldsymbol{x}}\right)$ + $\nabla L\left(\bm{y} + \mid \tilde{\bm{x}}\right)$ }; \end{axis} \end{tikzpicture} @@ -530,22 +531,22 @@ optimization process.% empty legend, at={(0.9775,0.97)},anchor=north east}] \addlegendimage{mark=none} - \addlegendentry{$\nabla h\left(\tilde{\boldsymbol{x}}\right)$}; + \addlegendentry{$\nabla h\left(\tilde{\bm{x}}\right)$}; \end{axis} \end{tikzpicture} \fi \caption{Gradients - $\nabla L\left(\boldsymbol{y} \mid \tilde{\boldsymbol{x}}\right)$ - and $\nabla h \left( \tilde{\boldsymbol{x}} \right)$ for a repetition + $\nabla L\left(\bm{y} \mid \tilde{\bm{x}}\right)$ + and $\nabla h \left( \tilde{\bm{x}} \right)$ for a repetition code with $n=2$. - Shown for $\boldsymbol{y} = \begin{bmatrix} -0.5 & 0.8 \end{bmatrix}$. + Shown for $\bm{y} = \begin{bmatrix} -0.5 & 0.8 \end{bmatrix}$. } \label{fig:grad} \end{figure}% % In Fig. \ref{fig:prox:convergence_large_n}, we consider only component -$\left(\tilde{\boldsymbol{x}}\right)_1$ of the estimate during a +$\left(\tilde{\bm{x}}\right)_1$ of the estimate during a decoding operation for the LDPC code used also for Fig. 1. Two qualities may be observed. First, we observe the average absolute values of the two gradients are equal, @@ -585,14 +586,14 @@ oscillate after a certain number of iterations.% table [col sep=comma, x=k, y=grad_h_0, discard if gt={k}{300}] {res/extreme_components_20433484_combined.csv}; - \addlegendentry{$\left(\tilde{\boldsymbol{x}}\right)_1$} + \addlegendentry{$\left(\tilde{\bm{x}}\right)_1$} \addlegendentry{$\left(\nabla L\right)_1$} \addlegendentry{$\left(\nabla h\right)_1$} \end{axis} \end{tikzpicture} \fi - \caption{Visualization of component $\left(\tilde{\boldsymbol{x}}\right)_1$ + \caption{Visualization of component $\left(\tilde{\bm{x}}\right)_1$ for a decoding operation for a (3,6) regular LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}. Parameters used for simulation: $\gamma = 0.05, \omega = 0.05, @@ -608,7 +609,7 @@ Considering the magnitude of the oscillation of the gradient of the code constra polynomial, some interesting behavior may be observed. Fig. \ref{fig:p_error} shows the probability that a component of the estimate is wrong, determined through a Monte Carlo simulation, when the components of -$\boldsymbol{c}$ are ordered from smallest to largest oscillation of +$\bm{c}$ are ordered from smallest to largest oscillation of $\left(\nabla h\right)_i$. The lower the magnitude of the oscillation, the higher the probability that the @@ -640,7 +641,7 @@ the probability that a given component was decoded incorrectly.% \fi \caption{Probability that a component of the estimated codeword - $\hat{\boldsymbol{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular + $\hat{\bm{c}}\in \mathbb{F}_2^n$ is erroneous for a (3,6) regular LDPC code with $n=204, k=102$ \cite[\text{204.33.484}]{mackay}. The indices $i'$ are ordered such that the amplitude of oscillation of $\left(\nabla h\right)_{i'}$ increases with $i'$. @@ -656,7 +657,7 @@ If a valid codeword has been reached, i.e., if the algorithm has converged, we return this solution. Otherwise, $N \in \mathbb{N}$ components are selected based on the criterion presented above. -Beginning with the recent estimate $\hat{\boldsymbol{c}} \in \mathbb{F}_2^n$, +Beginning with the recent estimate $\hat{\bm{c}} \in \mathbb{F}_2^n$, all variations of words with the selected components modified are then generated and an ``ML-in-the-list'' step is performed. @@ -665,13 +666,13 @@ generated and an ``ML-in-the-list'' step is performed. \label{alg:ml-in-the-list} \begin{algorithmic} - \STATE Find valid codewords under $\left(\hat{\boldsymbol{c}}_{l}\right)_{1=1}^{2^N}$ + \STATE Find valid codewords under $\left(\hat{\bm{c}}_{l}\right)_{1=1}^{2^N}$ \STATE \textbf{if} no valid codewords exist - \STATE \hspace{5mm} Compute $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$ for all variations $\boldsymbol{c}_l$ + \STATE \hspace{5mm} Compute $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$ for all variations $\bm{c}_l$ \STATE \textbf{else} - \STATE \hspace{5mm} Compute $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$ for valid codewords + \STATE \hspace{5mm} Compute $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$ for valid codewords \STATE \textbf{end if} - \STATE \textbf{return} $\hat{\boldsymbol{c}}_l$ with highest $\langle \hat{\boldsymbol{c}}_l, \hat{\boldsymbol{c}} \rangle$ + \STATE \textbf{return} $\hat{\bm{c}}_l$ with highest $\langle \hat{\bm{c}}_l, \hat{\bm{c}} \rangle$ \end{algorithmic} \end{algorithm}% % @@ -681,18 +682,18 @@ generated and an ``ML-in-the-list'' step is performed. \label{alg:improved} \begin{algorithmic} - \STATE $\boldsymbol{s} \leftarrow \boldsymbol{0}$ + \STATE $\bm{s} \leftarrow \bm{0}$ \STATE \textbf{for} $K$ iterations \textbf{do} - \STATE \hspace{5mm} $\boldsymbol{r} \leftarrow \boldsymbol{s} - \omega \left( \boldsymbol{s} - \boldsymbol{y} \right) $ - \STATE \hspace{5mm} $\boldsymbol{s} \leftarrow \Pi_\eta \left(\boldsymbol{r} - \gamma \nabla h\left( \boldsymbol{r} \right) \right)$ - \STATE \hspace{5mm} $\boldsymbol{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \boldsymbol{s} \right) = -1 \right\}$ - \STATE \hspace{10mm} \textbf{if} $\boldsymbol{H}\boldsymbol{\hat{c}} = \boldsymbol{0}$ \textbf{do} - \STATE \hspace{10mm} \textbf{return} $\boldsymbol{\hat{c}}$ + \STATE \hspace{5mm} $\bm{r} \leftarrow \bm{s} - \omega \left( \bm{s} - \bm{y} \right) $ + \STATE \hspace{5mm} $\bm{s} \leftarrow \Pi_\eta \left(\bm{r} - \gamma \nabla h\left( \bm{r} \right) \right)$ + \STATE \hspace{5mm} $\bm{\hat{c}} \leftarrow \mathds{1} \left\{ \text{sign}\left( \bm{s} \right) = -1 \right\}$ + \STATE \hspace{10mm} \textbf{if} $\bm{H}\bm{\hat{c}} = \bm{0}$ \textbf{do} + \STATE \hspace{10mm} \textbf{return} $\bm{\hat{c}}$ \STATE \hspace{5mm} \textbf{end if} \STATE \textbf{end for} \STATE $\textcolor{KITblue}{\text{Estimate $N$ wrong bit indices $\mathcal{I} = \{i_1,\ldots,i_N\}$}}$ - \STATE $\textcolor{KITblue}{\text{Generate candidate list $\left(\hat{\boldsymbol{c}}_{l}\right)_{l=1}^{2^N}$ by varying bits in $\mathcal{I}$}}$\vspace{1mm} - \STATE $\textcolor{KITblue}{\textbf{return ml\textunderscore in\textunderscore the\textunderscore list}\left(\left(\hat{\boldsymbol{c}}_l\right)_{1=1}^{2^N}\right)}$ + \STATE $\textcolor{KITblue}{\text{Generate candidate list $\left(\hat{\bm{c}}_{l}\right)_{l=1}^{2^N}$ by varying bits in $\mathcal{I}$}}$\vspace{1mm} + \STATE $\textcolor{KITblue}{\textbf{return ml\textunderscore in\textunderscore the\textunderscore list}\left(\left(\hat{\bm{c}}_l\right)_{1=1}^{2^N}\right)}$ \end{algorithmic} \end{algorithm}