123 lines
3.9 KiB
Python
123 lines
3.9 KiB
Python
import typing
|
|
import pandas as pd
|
|
from banking_breakdown import types
|
|
import json
|
|
import re
|
|
import numpy as np
|
|
|
|
|
|
def _escape_string(to_escape: str):
|
|
return to_escape.translate(str.maketrans({"&": r"\&"}))
|
|
|
|
|
|
def _compute_total_balance(df: pd.DataFrame) -> pd.DataFrame:
|
|
stripped_df = pd.DataFrame(
|
|
{'t': df["t"], 'value': df["balance"]})
|
|
|
|
stripped_df.index = stripped_df['t']
|
|
gb = stripped_df.groupby(pd.Grouper(freq='M'))
|
|
|
|
result = gb.tail(1)['value'].reset_index()
|
|
|
|
return result
|
|
|
|
|
|
def _compute_net_income(df: pd.DataFrame) -> pd.DataFrame:
|
|
df.index = df['t']
|
|
income_df = df.loc[df['value'] > 0]
|
|
expenses_df = df.loc[df['value'] < 0]
|
|
|
|
income_df = income_df.groupby(pd.Grouper(freq='M'))[
|
|
'value'].sum().reset_index().round(decimals=2)
|
|
expenses_df = expenses_df.groupby(pd.Grouper(freq='M'))[
|
|
'value'].sum().reset_index().round(decimals=2)
|
|
|
|
t = income_df['t']
|
|
income = income_df['value'].round(decimals=2)
|
|
expenses = expenses_df['value'].round(decimals=2)
|
|
net = (income + expenses).round(decimals=2)
|
|
|
|
result_df = pd.DataFrame(
|
|
{'t': t, 'income': income, 'expenses': expenses, 'net': net})
|
|
|
|
return result_df
|
|
|
|
|
|
def _compute_category_overview(df: pd.DataFrame) -> pd.DataFrame:
|
|
df = df.loc[df['value'] < 0]
|
|
df = df.drop('t', axis=1)
|
|
df = df.groupby(['category']).sum().reset_index()
|
|
values = (df['value'] / df['value'].sum() * 100).to_numpy()
|
|
values[-1] += 100 - np.sum(values)
|
|
values = np.round(values, decimals=1)
|
|
categories = [_escape_string(category) for category in df['category']]
|
|
|
|
category_overview_df = pd.DataFrame(
|
|
{"category": categories, "value": values})
|
|
|
|
category_overview_df = category_overview_df.sort_values('value',
|
|
ascending=False)
|
|
|
|
return category_overview_df
|
|
|
|
|
|
def _compute_expenses_by_category(complete_df: pd.DataFrame) -> pd.DataFrame:
|
|
complete_df = complete_df.loc[complete_df['value'] < 0].copy()
|
|
complete_df['value'] = -complete_df['value']
|
|
complete_df.index = complete_df['t']
|
|
|
|
complete_gb = complete_df.groupby(pd.Grouper(freq='M'))
|
|
|
|
categories = complete_df['category'].unique()
|
|
data_dict = {category: [] for category in categories}
|
|
|
|
for (month_date, month_df) in complete_gb:
|
|
month_df = month_df.drop('t', axis=1).reset_index().drop('t', axis=1)
|
|
|
|
category_df = month_df.groupby(['category']).sum().reset_index()
|
|
|
|
for _, row in category_df.iterrows():
|
|
data_dict[row['category']].append(row['value'])
|
|
|
|
non_listed = list(set(categories) - set(category_df['category']))
|
|
for category in non_listed:
|
|
data_dict[category].append(0)
|
|
|
|
result = pd.DataFrame(data_dict)
|
|
result = result.reindex(result.mean().sort_values(ascending=False).index,
|
|
axis=1)
|
|
result = result.round(decimals=2)
|
|
result['t'] = complete_gb.tail(1).drop('t', axis=1).reset_index()['t']
|
|
|
|
return result
|
|
|
|
|
|
def _compute_detailed_balance(df: pd.DataFrame) -> pd.DataFrame:
|
|
return pd.DataFrame({'t': df["t"],
|
|
'value': df["balance"]})
|
|
|
|
|
|
def parse_statement(filename: str) -> types.ReportData:
|
|
df = pd.read_csv(filename)
|
|
df["t"] = pd.to_datetime(df["t"], format='%Y-%m-%d')
|
|
|
|
category_overview_df = _compute_category_overview(df)
|
|
total_balance_df = _compute_total_balance(df)
|
|
net_income_df = _compute_net_income(df)
|
|
detailed_balance_df = _compute_detailed_balance(df)
|
|
expenses_by_category_df = _compute_expenses_by_category(df)
|
|
|
|
return types.ReportData(category_overview_df,
|
|
expenses_by_category_df,
|
|
net_income_df,
|
|
total_balance_df,
|
|
detailed_balance_df, )
|
|
|
|
|
|
def main():
|
|
report_data = parse_statement("../res/bank_statement_2023_categorized.csv")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|