|
3 | 3 |
|
4 | 4 | Usage:
|
5 | 5 | hdfc-analytics account --statement-csv=<statement-csv> --categories-config=<categories-config-toml> --column-config=<column-mapping-toml> [--llm=<llm-name>] [--llm-host=<llm-host-url>]
|
| 6 | + hdfc-analytics cc --statement-dir=<statement-dir> --name=<name> --password=<password> --categories-config=<categories-config-toml> --column-config=<column-mapping-toml> [--llm=<llm-name>] [--llm-host=<llm-host-url>] |
6 | 7 |
|
7 | 8 | Options:
|
8 |
| - --statement-csv=<sattement-csv> Path to bank account / credit card statement csv. |
| 9 | + --statement-csv=<sattement-csv> Path to bank account statement csv. |
| 10 | + --statement-dir=<sattement-dir> Path to bank credit card statement directory. |
| 11 | + --name=<name> Your name as written on the statement (applicable for CC analytics only) |
| 12 | + --password=<password> Password to open CC statement pdf (format is: first 5 letters of your first name followed by DDMM of your DOB) |
9 | 13 | --categories-config=<categories-config-toml> Path to file with categories configs.
|
10 | 14 | --column-config=<column-mapping-toml> Path to file with column mapping configs.
|
11 | 15 | --llm=<llm-name> Flag to enable LLMs to tag transaction.
|
12 | 16 | --llm-host=<llm-host-url> LLM host. Applicable for Ollama or Huggingface served models.
|
13 | 17 | """
|
14 | 18 |
|
| 19 | +import glob |
| 20 | +import os |
15 | 21 | from typing import List
|
16 | 22 |
|
| 23 | +import hdfc_cc_parser |
17 | 24 | import pandas as pd
|
18 | 25 | import toml
|
19 | 26 | from docopt import docopt
|
@@ -49,12 +56,44 @@ def main():
|
49 | 56 |
|
50 | 57 | plot_df(categorized_df)
|
51 | 58 |
|
| 59 | + if args["cc"]: |
| 60 | + statement_dir = args["--statement-dir"] |
| 61 | + categories_config = args["--categories-config"] |
| 62 | + column_config = args["--column-config"] |
| 63 | + llm_host = args["--llm-host"] |
| 64 | + llm = args["--llm"] |
| 65 | + |
| 66 | + pdf_files = glob.glob(os.path.join(statement_dir, "*.PDF")) |
| 67 | + dfs = [] |
| 68 | + for pdf_file in pdf_files: |
| 69 | + output = hdfc_cc_parser.parse_cc_statement(pdf_file, args["--name"], args["--password"]) |
| 70 | + df = pd.DataFrame([row.split(',') for row in output.split('\n') if row], columns=['date', 'description', 'rp', 'amount']) |
| 71 | + dfs.append(df) |
| 72 | + |
| 73 | + # Concatenate all the DataFrames |
| 74 | + combined_df = pd.concat(dfs, ignore_index=True) |
| 75 | + |
| 76 | + # Load the mappings |
| 77 | + column_mappings = load_column_mappings(column_config, statement_type="cc") |
| 78 | + |
| 79 | + # Apply the column mappings |
| 80 | + combined_df = map_columns(combined_df, column_mappings) |
| 81 | + |
| 82 | + categorizer = StatementCategorizer(categories_config, llm_host, llm) |
| 83 | + |
| 84 | + # Categorize the DataFrame |
| 85 | + categorized_df = categorizer.categorize_dataframe(combined_df) |
| 86 | + |
| 87 | + plot_df(categorized_df, statement_type="cc") |
| 88 | + |
52 | 89 |
|
53 | 90 | # Load column mappings from a TOML file
|
54 |
| -def load_column_mappings(config_path: str) -> List[str]: |
| 91 | +def load_column_mappings( |
| 92 | + config_path: str, statement_type: str = "account" |
| 93 | +) -> List[str]: |
55 | 94 | with open(config_path, "r") as config_file:
|
56 | 95 | config = toml.load(config_file)
|
57 |
| - return config["default"] # Assuming there"s only one mapping set, "default" |
| 96 | + return config[statement_type] # Assuming there"s only one mapping set, "default" |
58 | 97 |
|
59 | 98 |
|
60 | 99 | # Apply the column mappings to a DataFrame
|
|
0 commit comments