import typing import pandas as pd from banking_breakdown import types import json import re import numpy as np def _escape_string(to_escape: str): return to_escape.translate(str.maketrans({"&": r"\&"})) def _compute_total_balance(df: pd.DataFrame) -> pd.DataFrame: stripped_df = pd.DataFrame( {'t': df["t"], 'value': df["balance"]}) stripped_df.index = stripped_df['t'] gb = stripped_df.groupby(pd.Grouper(freq='M')) result = gb.tail(1)['value'].reset_index() return result def _compute_net_income(df: pd.DataFrame) -> pd.DataFrame: df.index = df['t'] income_df = df.loc[df['value'] > 0] expenses_df = df.loc[df['value'] < 0] income_df = income_df.groupby(pd.Grouper(freq='M'))[ 'value'].sum().reset_index().round(decimals=2) expenses_df = expenses_df.groupby(pd.Grouper(freq='M'))[ 'value'].sum().reset_index().round(decimals=2) t = income_df['t'] income = income_df['value'].round(decimals=2) expenses = expenses_df['value'].round(decimals=2) net = (income + expenses).round(decimals=2) result_df = pd.DataFrame( {'t': t, 'income': income, 'expenses': expenses, 'net': net}) return result_df def _compute_category_overview(df: pd.DataFrame) -> pd.DataFrame: df = df.loc[df['value'] < 0] df = df.drop('t', axis=1) df = df.groupby(['category']).sum().reset_index() values = (df['value'] / df['value'].sum() * 100).to_numpy() values[-1] += 100 - np.sum(values) values = np.round(values, decimals=1) categories = [_escape_string(category) for category in df['category']] category_overview_df = pd.DataFrame( {"category": categories, "value": values}) category_overview_df = category_overview_df.sort_values('value', ascending=False) return category_overview_df def _compute_expenses_by_category(complete_df: pd.DataFrame) -> pd.DataFrame: complete_df = complete_df.loc[complete_df['value'] < 0].copy() complete_df['value'] = -complete_df['value'] complete_df.index = complete_df['t'] complete_gb = complete_df.groupby(pd.Grouper(freq='M')) categories = complete_df['category'].unique() data_dict = {category: [] for category in categories} for (month_date, month_df) in complete_gb: month_df = month_df.drop('t', axis=1).reset_index().drop('t', axis=1) category_df = month_df.groupby(['category']).sum().reset_index() for _, row in category_df.iterrows(): data_dict[row['category']].append(row['value']) non_listed = list(set(categories) - set(category_df['category'])) for category in non_listed: data_dict[category].append(0) result = pd.DataFrame(data_dict) result = result.reindex(result.mean().sort_values(ascending=False).index, axis=1) result = result.round(decimals=2) result['t'] = complete_gb.tail(1).drop('t', axis=1).reset_index()['t'] return result def _compute_detailed_balance(df: pd.DataFrame) -> pd.DataFrame: return pd.DataFrame({'t': df["t"], 'value': df["balance"]}) def parse_statement(filename: str) -> types.ReportData: df = pd.read_csv(filename) df["t"] = pd.to_datetime(df["t"], format='%Y-%m-%d') category_overview_df = _compute_category_overview(df) total_balance_df = _compute_total_balance(df) net_income_df = _compute_net_income(df) detailed_balance_df = _compute_detailed_balance(df) expenses_by_category_df = _compute_expenses_by_category(df) return types.ReportData(category_overview_df, expenses_by_category_df, net_income_df, total_balance_df, detailed_balance_df, ) def main(): report_data = parse_statement("../res/bank_statement_2023_categorized.csv") if __name__ == "__main__": main()