import typing import pandas as pd from banking_breakdown import types import json import re import numpy as np # def _read_regex_dict(regex_file: str = "res/category_regexes.json"): # with open(regex_file, 'r') as f: # return json.load(f) # # # def _tag_with_category(df: pd.DataFrame) -> pd.DataFrame: # regex_dict = _read_regex_dict() # # return df # # # def _compute_total_balance(df: pd.DataFrame) -> pd.DataFrame: # stripped_df = pd.DataFrame( # {'t': df["Valutadatum"], 'value': df["Saldo nach Buchung"]}) # # stripped_df.index = stripped_df['t'] # gb = stripped_df.groupby(pd.Grouper(freq='M')) # # result = gb.tail(1)['value'].reset_index() # # return result # # # def _compute_net_income(df: pd.DataFrame) -> pd.DataFrame: # stripped_df = pd.DataFrame({'t': df["Valutadatum"], 'value': df["Betrag"]}) # # stripped_df.index = stripped_df['t'] # gb = stripped_df.groupby(pd.Grouper(freq='M')) # # result = gb["value"].sum().reset_index() # return result # # # def _compute_category_overview(df: pd.DataFrame) -> pd.DataFrame: # categories = ["Social life", "Other", "Food", "Hobbies", # "Rent \\& Utilities", "Education", "Transportation"] # values = np.array([10, 12, 53, 12, 90, 23, 32]) # values = values / values.sum() * 100 # values = np.round(values, decimals=1) # values[-1] += 100 - np.sum(values) # # category_overview_df = pd.DataFrame( # {"category": categories, "value": values}) # # return category_overview_df # # # def _compute_detailed_balance(df: pd.DataFrame) -> pd.DataFrame: # return pd.DataFrame({'t': df["Valutadatum"], # 'value': df["Saldo nach Buchung"]}) # # # def parse_statement(filename: str) -> types.ReportData: # df = pd.read_csv(filename, delimiter=';', decimal=",") # df["Valutadatum"] = pd.to_datetime(df["Valutadatum"], format='%d.%m.%Y') # # category_overview_df = _compute_category_overview(df) # total_balance_df = _compute_total_balance(df) # net_income_df = _compute_net_income(df) # detailed_balance_df = _compute_detailed_balance(df) # # return types.ReportData(category_overview_df, # net_income_df, # total_balance_df, # detailed_balance_df) # # # def main(): # report_data = parse_statement("../res/banking_statement_2023.csv") # # # if __name__ == "__main__": # main() def get_stripped_statement(filename: str) -> pd.DataFrame: df = pd.read_csv(filename, delimiter=';', decimal=",") df["Valutadatum"] = pd.to_datetime(df["Valutadatum"], format='%d.%m.%Y') result = pd.DataFrame({'t': df["Valutadatum"], 'other party': df["Name Zahlungsbeteiligter"], 'value': df["Betrag"], 'balance afterwards': df["Saldo nach Buchung"], 'description': df["Buchungstext"], 'purpose': df["Verwendungszweck"] }) result['category'] = [''] * len(result.index) return result