47
47
from api .database import get_db , UploadedFile
48
48
from api .database import engine , Base
49
49
import chardet
50
+ import logging
51
+ import csv
52
+ from io import StringIO
50
53
51
54
app = FastAPI ()
55
+ logger = logging .getLogger (__name__ )
52
56
53
57
# Global variable to store the last processed data
54
58
last_processed_data = {"unique_rows" : None , "duplicate_rows" : None }
@@ -69,45 +73,82 @@ async def startup_event():
69
73
async def health ():
70
74
return {"status" : "ok" }
71
75
76
+ def detect_delimiter (sample_text : str ) -> str :
77
+ """Detect whether comma or semicolon is the most likely delimiter."""
78
+ comma_count = sample_text .count (',' )
79
+ semicolon_count = sample_text .count (';' )
80
+ return ',' if comma_count >= semicolon_count else ';'
72
81
@app .post ("/upload_file" )
73
82
async def upload_file (
74
83
file : UploadFile = File (...),
75
84
category : str = Form (...),
76
85
db : Session = Depends (get_db )
77
86
):
78
87
contents = await file .read ()
79
- encoding = chardet .detect (contents )["encoding" ]
88
+ detection = chardet .detect (contents )
89
+ encoding = detection ["encoding" ]
80
90
encoding = encoding .lower () if encoding else None
81
- if encoding not in ['utf-8' , 'utf-8-sig' ]:
82
- return JSONResponse (
83
- status_code = 400 ,
84
- content = {
85
- "message" : "File is not UTF-8 encoded" ,
86
- },
91
+ logger .info (f"Detected file encoding: '{ encoding } '" )
92
+ try :
93
+ if encoding not in ['utf-8' , 'utf-8-sig' , 'iso-8859-1' , 'windows-1252' , 'ascii' ]:
94
+ logger .warning (f"Unsupported file encoding: { file .filename } (Detected: { encoding } )" )
95
+ return JSONResponse (
96
+ status_code = 400 ,
97
+ content = {"message" : f"Unsupported file encoding: { encoding } " },
98
+ )
99
+
100
+ # Decode contents
101
+ text_data = contents .decode (encoding )
102
+ df_raw = pd .read_csv (io .StringIO (contents .decode (encoding )), header = None )
103
+
104
+ # If only one column exists, try splitting it
105
+ if df_raw .shape [1 ] == 1 :
106
+ logger .warning (f"Only one column detected in { file .filename } . Attempting to split." )
107
+
108
+ sample = text_data [:1000 ] # Use first 1000 characters for detection
109
+ detected_delim = detect_delimiter (sample )
110
+ logger .warning (f"Detected delimiter: '{ detected_delim } '" )
111
+ # Split the single column into columns
112
+ split_df = df_raw [0 ].str .split (detected_delim , expand = True )
113
+ # Use first row as header -> include explicitly headers
114
+ split_df .columns = split_df .iloc [0 ].astype (str )
115
+ # Drop the header row from data
116
+ df = split_df .iloc [1 :].reset_index (drop = True )
117
+
118
+ # Convert DataFrame back to CSV
119
+ processed_csv = df .to_csv (index = False )
120
+
121
+
122
+ # Check if a file with the same name and category already exists
123
+ existing_file = (
124
+ db .query (UploadedFile )
125
+ .filter (UploadedFile .filename == file .filename , UploadedFile .category == category )
126
+ .first ()
87
127
)
88
128
89
- # Check if a file with the same name and category already exists
90
- existing_file = (
91
- db .query (UploadedFile )
92
- .filter (UploadedFile .filename == file .filename , UploadedFile .category == category )
93
- .first ()
94
- )
129
+ if existing_file :
130
+ return JSONResponse (
131
+ status_code = 409 , # Conflict
132
+ content = {
133
+ "message" : f"'{ file .filename } ' already exists in category '{ category } '." ,
134
+ "id" : existing_file .id ,
135
+ },
136
+ )
137
+
138
+ db_file = UploadedFile (filename = file .filename , content = processed_csv .encode (encoding ), category = category )
139
+ db .add (db_file )
140
+ db .commit ()
141
+ db .refresh (db_file )
95
142
96
- if existing_file :
143
+ except Exception as e :
144
+ db .rollback ()
145
+ logger .error (f"Error saving file: { file .filename } . Error: { e } " )
97
146
return JSONResponse (
98
- status_code = 409 , # Conflict
99
- content = {
100
- "message" : f"'{ file .filename } ' already exists in category '{ category } '." ,
101
- "id" : existing_file .id ,
102
- },
147
+ status_code = 500 ,
148
+ content = {"message" : "Internal server error" , "details" : str (e )},
103
149
)
104
150
105
- # Proceed with saving the file if it doesn't exist
106
- db_file = UploadedFile (filename = file .filename , content = contents , category = category )
107
- db .add (db_file )
108
- db .commit ()
109
- db .refresh (db_file )
110
-
151
+ logger .info (f"File uploaded successfully: { file .filename } " )
111
152
return {"message" : "File uploaded successfully" , "id" : db_file .id }
112
153
113
154
0 commit comments