ba-thesis/sw/utility/misc.py

73 lines
2.6 KiB
Python

import unicodedata
import re
import typing
import pandas as pd
import numpy as np
def slugify(value, allow_unicode=False):
"""
Taken from https://github.com/django/django/blob/master/django/utils
/text.py
Convert to ASCII if 'allow_unicode' is False. Convert spaces or repeated
dashes to single dashes. Remove characters that aren't alphanumerics,
underscores, or hyphens. Convert to lowercase. Also strip leading and
trailing whitespace, dashes, and underscores.
"""
value = str(value)
if allow_unicode:
value = unicodedata.normalize('NFKC', value)
else:
value = unicodedata.normalize('NFKD', value).encode('ascii',
'ignore').decode(
'ascii')
value = re.sub(r'[^\w\s-]', '', value.lower())
return re.sub(r'[-\s]+', '-', value).strip('-_')
def pgf_reformat_data_3d(results: typing.Sequence, x_param_name: str,
y_param_name: str,
z_param_names: typing.Sequence[str]):
"""Reformat the results obtained from the GenericMultithreadedSimulator
into a form usable by pgfplots.
:param results: Results from GenericMultiThreadedSimulator
(dict of the form {params1: results1, params2: results2, ...}),
where resultsN and paramsN are themselves dicts:
paramsN = {param_name_1: val, param_name_2: val, ...}
resultsN = {result_name_1: val, result_name_2: val, ...}
:param x_param_name:
:param y_param_name:
:param z_param_names:
:return: pandas DataFrame of the following form:
{x_param_name: [x1, x1, x1, ..., x2, x2, x2, ...],
y_param_name: [y1, y2, y3, ..., y1, y2, y3, ...],
z_param_name: [z11, z21, z31, ..., z12, z22, z32, ...]}
"""
# Create result variables
x = np.zeros(len(results))
y = np.zeros(len(results))
zs = {name: np.zeros(len(results)) for name in z_param_names}
# Populate result variables
for i, (params, result) in enumerate(results.items()):
x_val = params[x_param_name]
y_val = params[y_param_name]
for z_param_name in z_param_names:
zs[z_param_name][i] = result[z_param_name]
x[i] = x_val
y[i] = y_val
# Create and return pandas DataFrame
df = pd.DataFrame({x_param_name: x, y_param_name: y})
for z_param_name in z_param_names:
df[z_param_name] = zs[z_param_name]
return df.sort_values(by=[x_param_name, y_param_name])
def count_bit_errors(x: np.array, x_hat: np.array) -> int:
"""Count the number of different bits between two words."""
return np.sum(x != x_hat)