ba-thesis/sw/utility/misc.py

import unicodedata
import re
import typing
import pandas as pd
import numpy as np


def slugify(value, allow_unicode=False):
    """
    Taken from https://github.com/django/django/blob/master/django/utils
    /text.py
    Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
    dashes to single dashes. Remove characters that aren't alphanumerics,
    underscores, or hyphens. Convert to lowercase. Also strip leading and
    trailing whitespace, dashes, and underscores.
    """
    value = str(value)
    if allow_unicode:
        value = unicodedata.normalize('NFKC', value)
    else:
        value = unicodedata.normalize('NFKD', value).encode('ascii',
                                                            'ignore').decode(
            'ascii')
    value = re.sub(r'[^\w\s-]', '', value.lower())
    return re.sub(r'[-\s]+', '-', value).strip('-_')


def pgf_reformat_data_3d(results: typing.Sequence, x_param_name: str,
                         y_param_name: str,
                         z_param_names: typing.Sequence[str]):
    """Reformat the results obtained from the GenericMultithreadedSimulator
    into a form usable by pgfplots.

    :param results: Results from GenericMultiThreadedSimulator
    (dict of the form {params1: results1, params2: results2, ...}),
    where resultsN and paramsN are themselves dicts:
        paramsN = {param_name_1: val, param_name_2: val, ...}
        resultsN = {result_name_1: val, result_name_2: val, ...}
    :param x_param_name:
    :param y_param_name:
    :param z_param_names:
    :return: pandas DataFrame of the following form:
        {x_param_name: [x1, x1, x1, ..., x2, x2, x2, ...],
        y_param_name: [y1, y2, y3, ..., y1, y2, y3, ...],
        z_param_name: [z11, z21, z31, ..., z12, z22, z32, ...]}
    """
    # Create result variables
    x = np.zeros(len(results))
    y = np.zeros(len(results))
    zs = {name: np.zeros(len(results)) for name in z_param_names}

    # Populate result variables
    for i, (params, result) in enumerate(results.items()):
        x_val = params[x_param_name]
        y_val = params[y_param_name]
        for z_param_name in z_param_names:
            zs[z_param_name][i] = result[z_param_name]

        x[i] = x_val
        y[i] = y_val

    # Create and return pandas DataFrame
    df = pd.DataFrame({x_param_name: x, y_param_name: y})
    for z_param_name in z_param_names:
        df[z_param_name] = zs[z_param_name]

    return df.sort_values(by=[x_param_name, y_param_name])


def count_bit_errors(x: np.array, x_hat: np.array) -> int:
    """Count the number of different bits between two words."""
    return np.sum(x != x_hat)