10
10
import pandas as pd
11
11
from fuzzywuzzy import fuzz
12
12
13
- from src .components .utils .kpi_mapping import KPI_MAPPING , KPI_CATEGORY
13
+ from src .components .utils .kpi_mapping import get_kpi_mapping_category
14
14
from .base_curator import BaseCurator
15
15
16
16
logger = logging .getLogger (__name__ )
@@ -50,7 +50,7 @@ def __init__(
50
50
self .company_to_exclude = company_to_exclude
51
51
random .seed (seed )
52
52
53
- def run (self , extraction_folder , annotation_excels , output_folder ):
53
+ def run (self , extraction_folder , annotation_excels , output_folder , kpi_df ):
54
54
"""Create ESG table dataset.
55
55
56
56
It saves all examples in a csv.
@@ -69,7 +69,7 @@ def run(self, extraction_folder, annotation_excels, output_folder):
69
69
70
70
examples_list = []
71
71
for excel_file in self .annotation_excels :
72
- examples_excel = self .process_single_annotation_file (excel_file )
72
+ examples_excel = self .process_single_annotation_file (excel_file , kpi_df )
73
73
examples_list .extend (examples_excel )
74
74
75
75
df_result = pd .DataFrame (examples_list ).reset_index (drop = True )
@@ -211,7 +211,7 @@ def __obtain_filename_to_strarr(self):
211
211
212
212
return filename_to_stringarr
213
213
214
- def __clean_annotation_file (self , df , annotation_filepath ):
214
+ def __clean_annotation_file (self , df , annotation_filepath , kpi_df ):
215
215
"""Clean annotation file.
216
216
217
217
Returns a clean dataframe after dropping all NaN rows,
@@ -260,6 +260,11 @@ def get_pdf_name_right(f):
260
260
261
261
df ["source_file" ] = df ["source_file" ].apply (get_pdf_name_right )
262
262
263
+ # get kpi mappings
264
+ kpi_dict = get_kpi_mapping_category (kpi_df )
265
+ KPI_MAPPING = kpi_dict ["KPI_MAPPING" ]
266
+ KPI_CATEGORY = kpi_dict ["KPI_CATEGORY" ]
267
+
263
268
# kpi mapping. No need to make it as class method
264
269
def map_kpi (r ):
265
270
try :
@@ -356,7 +361,7 @@ def __create_table_meta(self):
356
361
return meta_dict
357
362
358
363
def process_single_annotation_file (
359
- self , annotation_filepath , sheet_name = "data_ex_in_xls"
364
+ self , annotation_filepath , kpi_df , sheet_name = "data_ex_in_xls" ,
360
365
):
361
366
"""Create examples for a single excel file.
362
367
@@ -388,7 +393,7 @@ def process_single_annotation_file(
388
393
return [[]]
389
394
390
395
# clean dataframe
391
- df = self .__clean_annotation_file (df , annotation_filepath )
396
+ df = self .__clean_annotation_file (df , annotation_filepath , kpi_df )
392
397
393
398
# table_meta contains {pdf_name:{page: list of table csvs, ...}, ...}
394
399
table_meta = self .__create_table_meta ()
0 commit comments