banking-breakdown/banking_breakdown/statement_parser.py

100 lines
3.2 KiB
Python

import typing
import pandas as pd
from banking_breakdown import types
import json
import re
import numpy as np
# def _read_regex_dict(regex_file: str = "res/category_regexes.json"):
# with open(regex_file, 'r') as f:
# return json.load(f)
#
#
# def _tag_with_category(df: pd.DataFrame) -> pd.DataFrame:
# regex_dict = _read_regex_dict()
#
# return df
#
#
# def _compute_total_balance(df: pd.DataFrame) -> pd.DataFrame:
# stripped_df = pd.DataFrame(
# {'t': df["Valutadatum"], 'value': df["Saldo nach Buchung"]})
#
# stripped_df.index = stripped_df['t']
# gb = stripped_df.groupby(pd.Grouper(freq='M'))
#
# result = gb.tail(1)['value'].reset_index()
#
# return result
#
#
# def _compute_net_income(df: pd.DataFrame) -> pd.DataFrame:
# stripped_df = pd.DataFrame({'t': df["Valutadatum"], 'value': df["Betrag"]})
#
# stripped_df.index = stripped_df['t']
# gb = stripped_df.groupby(pd.Grouper(freq='M'))
#
# result = gb["value"].sum().reset_index()
# return result
#
#
# def _compute_category_overview(df: pd.DataFrame) -> pd.DataFrame:
# categories = ["Social life", "Other", "Food", "Hobbies",
# "Rent \\& Utilities", "Education", "Transportation"]
# values = np.array([10, 12, 53, 12, 90, 23, 32])
# values = values / values.sum() * 100
# values = np.round(values, decimals=1)
# values[-1] += 100 - np.sum(values)
#
# category_overview_df = pd.DataFrame(
# {"category": categories, "value": values})
#
# return category_overview_df
#
#
# def _compute_detailed_balance(df: pd.DataFrame) -> pd.DataFrame:
# return pd.DataFrame({'t': df["Valutadatum"],
# 'value': df["Saldo nach Buchung"]})
#
#
# def parse_statement(filename: str) -> types.ReportData:
# df = pd.read_csv(filename, delimiter=';', decimal=",")
# df["Valutadatum"] = pd.to_datetime(df["Valutadatum"], format='%d.%m.%Y')
#
# category_overview_df = _compute_category_overview(df)
# total_balance_df = _compute_total_balance(df)
# net_income_df = _compute_net_income(df)
# detailed_balance_df = _compute_detailed_balance(df)
#
# return types.ReportData(category_overview_df,
# net_income_df,
# total_balance_df,
# detailed_balance_df)
#
#
# def main():
# report_data = parse_statement("../res/bank_statement_2023.csv")
#
#
# if __name__ == "__main__":
# main()
def get_stripped_statement(filename: str) -> pd.DataFrame:
# df = pd.read_csv(filename, delimiter=';', decimal=",")
df = pd.read_csv(filename, delimiter=';')
df["Valutadatum"] = (pd.to_datetime(df["Valutadatum"], format='%d.%m.%Y')
.dt.strftime('%Y-%m-%d'))
result = pd.DataFrame({'t': df["Valutadatum"],
'other party': df["Name Zahlungsbeteiligter"],
'value': df["Betrag"],
'balance': df["Saldo nach Buchung"],
'category': [''] * len(df["Valutadatum"]),
'description': df["Buchungstext"],
'purpose': df["Verwendungszweck"]
})
return result