banking-breakdown/banking_breakdown/statement_parser.py

81 lines
2.3 KiB
Python

import typing
import pandas as pd
from banking_breakdown import types
import json
import re
import numpy as np
def _read_regex_dict(regex_file: str = "res/category_regexes.json"):
with open(regex_file, 'r') as f:
return json.load(f)
def _tag_with_category(df: pd.DataFrame) -> pd.DataFrame:
regex_dict = _read_regex_dict()
return df
def _compute_total_balance(df: pd.DataFrame) -> pd.DataFrame:
stripped_df = pd.DataFrame(
{'t': df["Buchungstag"], 'value': df["Saldo nach Buchung"]})
stripped_df.index = stripped_df['t']
gb = stripped_df.groupby(pd.Grouper(freq='M'))
result = gb.tail(1)['value'].reset_index()
#result['t'] = result['t'].apply(lambda dt: dt.replace(day=1))
return result
def _compute_net_income(df: pd.DataFrame) -> pd.DataFrame:
stripped_df = pd.DataFrame({'t': df["Buchungstag"], 'value': df["Betrag"]})
result = stripped_df.resample(rule='M', on="t").sum().reset_index()
#result['t'] = result['t'].apply(lambda dt: dt.replace(day=1))
return result
def _compute_category_overview(df: pd.DataFrame) -> pd.DataFrame:
categories = ["A", "B", "C", "D", "E", "F", "G"]
values = np.array([10, 12, 53, 12, 90, 23, 32])
values = values / values.sum() * 100
values = np.round(values, decimals=1)
values[-1] += 100 - np.sum(values)
category_overview_df = pd.DataFrame(
{"category": categories, "value": values})
return category_overview_df
def _compute_detailed_balance(df: pd.DataFrame) -> pd.DataFrame:
return pd.DataFrame({'t': df["Buchungstag"],
'value': df["Saldo nach Buchung"]})
def parse_statement(filename: str) -> types.ReportData:
df = pd.read_csv(filename, delimiter=';', decimal=",")
df["Buchungstag"] = pd.to_datetime(df["Buchungstag"], format='%d.%m.%Y')
category_overview_df = _compute_category_overview(df)
total_balance_df = _compute_total_balance(df)
net_income_df = _compute_net_income(df)
detailed_balance_df = _compute_detailed_balance(df)
return types.ReportData(category_overview_df,
net_income_df,
total_balance_df,
detailed_balance_df)
def main():
report_data = parse_statement("../res/banking_statement_2023.csv")
if __name__ == "__main__":
main()