1
1
import os
2
- import csv
3
2
import json
4
3
import sys
4
+ import time
5
+ import random
6
+ from dataclasses import dataclass
7
+ from contextlib import contextmanager
8
+
9
+ import pandas as pd
5
10
from google .oauth2 import service_account
6
11
from googleapiclient .discovery import build
7
12
from googleapiclient .errors import HttpError
8
- import time
9
13
10
- SCOPES = ["https://www.googleapis.com/auth/spreadsheets" ]
11
- MAX_RETRIES = 3
12
- RETRY_DELAY = 5
14
+ @dataclass
15
+ class Config :
16
+ scopes : tuple = ("https://www.googleapis.com/auth/spreadsheets" ,)
17
+ max_retries : int = 3
18
+ sheet_range : str = 'Sheet1'
19
+ sheet_id : int = 0 # Assumes first sheet in the spreadsheet
20
+
21
+ config = Config ()
13
22
14
23
def get_env_var (var_name ):
15
24
value = os .environ .get (var_name )
@@ -23,15 +32,15 @@ def setup_credentials():
23
32
try :
24
33
creds_dict = json .loads (gcp_json )
25
34
return service_account .Credentials .from_service_account_info (
26
- creds_dict , scopes = SCOPES )
35
+ creds_dict , scopes = config . scopes )
27
36
except json .JSONDecodeError :
28
37
print ("Error: Invalid JSON in GCP_JSON environment variable" )
29
38
sys .exit (1 )
30
39
31
40
def read_csv (file_path ):
32
41
try :
33
- with open (file_path , 'r' ) as file :
34
- return list ( csv . reader ( file ) )
42
+ df = pd . read_csv (file_path )
43
+ return [ df . columns . tolist ()] + df . values . tolist ( )
35
44
except FileNotFoundError :
36
45
print (f"Error: CSV file not found at { file_path } " )
37
46
sys .exit (1 )
@@ -43,52 +52,102 @@ def validate_data(data):
43
52
# Add more validation as needed
44
53
return True
45
54
55
+ @contextmanager
56
+ def get_sheets_service (creds ):
57
+ service = build ("sheets" , "v4" , credentials = creds )
58
+ try :
59
+ yield service
60
+ finally :
61
+ service .close ()
62
+
46
63
def upload_to_sheets (service , spreadsheet_id , data ):
47
- sheet_range = 'Sheet1'
48
64
body = {'values' : data }
49
65
50
- for attempt in range (MAX_RETRIES ):
66
+ for attempt in range (config . max_retries ):
51
67
try :
52
68
spreadsheet = service .spreadsheets ().get (spreadsheetId = spreadsheet_id ).execute ()
53
69
print (f"Successfully accessed spreadsheet: { spreadsheet ['properties' ]['title' ]} " )
54
70
71
+ # Clear the sheet
55
72
service .spreadsheets ().values ().clear (
56
73
spreadsheetId = spreadsheet_id ,
57
- range = sheet_range
74
+ range = config . sheet_range
58
75
).execute ()
59
76
77
+ # Update values
60
78
result = service .spreadsheets ().values ().update (
61
79
spreadsheetId = spreadsheet_id ,
62
- range = sheet_range ,
80
+ range = config . sheet_range ,
63
81
valueInputOption = 'RAW' ,
64
82
body = body
65
83
).execute ()
66
84
print (f"{ result .get ('updatedCells' )} cells updated." )
85
+
86
+ # Format header row as bold and freeze it
87
+ requests = [
88
+ {
89
+ "repeatCell" : {
90
+ "range" : {
91
+ "sheetId" : config .sheet_id ,
92
+ "startRowIndex" : 0 ,
93
+ "endRowIndex" : 1
94
+ },
95
+ "cell" : {
96
+ "userEnteredFormat" : {
97
+ "textFormat" : {
98
+ "bold" : True
99
+ }
100
+ }
101
+ },
102
+ "fields" : "userEnteredFormat.textFormat.bold"
103
+ }
104
+ },
105
+ {
106
+ "updateSheetProperties" : {
107
+ "properties" : {
108
+ "sheetId" : config .sheet_id ,
109
+ "gridProperties" : {
110
+ "frozenRowCount" : 1
111
+ }
112
+ },
113
+ "fields" : "gridProperties.frozenRowCount"
114
+ }
115
+ }
116
+ ]
117
+
118
+ # Execute the formatting requests
119
+ service .spreadsheets ().batchUpdate (
120
+ spreadsheetId = spreadsheet_id ,
121
+ body = {"requests" : requests }
122
+ ).execute ()
123
+
124
+ print ("Header row formatted as bold and frozen." )
67
125
return
68
126
except HttpError as err :
69
127
if err .resp .status in [403 , 404 ]:
70
128
print (f"Error { err .resp .status } : { err } " )
71
129
print ("Check spreadsheet ID and service account permissions." )
72
130
sys .exit (1 )
73
- elif attempt < MAX_RETRIES - 1 :
74
- print (f"Attempt { attempt + 1 } failed. Retrying in { RETRY_DELAY } seconds..." )
75
- time .sleep (RETRY_DELAY )
131
+ elif attempt < config .max_retries - 1 :
132
+ wait_time = (2 ** attempt ) + random .uniform (0 , 1 )
133
+ print (f"Attempt { attempt + 1 } failed. Retrying in { wait_time :.2f} seconds..." )
134
+ time .sleep (wait_time )
76
135
else :
77
- print (f"Failed after { MAX_RETRIES } attempts: { err } " )
136
+ print (f"Failed after { config . max_retries } attempts: { err } " )
78
137
sys .exit (1 )
79
138
80
139
def main ():
81
140
creds = setup_credentials ()
82
- service = build ("sheets" , "v4" , credentials = creds )
83
141
spreadsheet_id = get_env_var ('GOOGLE_SHEETS_ID' )
84
142
85
143
print (f"Attempting to access spreadsheet with ID: { spreadsheet_id } " )
86
144
87
- csv_content = read_csv ('public /merged.csv' )
145
+ csv_content = read_csv ('output /merged.csv' )
88
146
if not validate_data (csv_content ):
89
147
sys .exit (1 )
90
148
91
- upload_to_sheets (service , spreadsheet_id , csv_content )
149
+ with get_sheets_service (creds ) as service :
150
+ upload_to_sheets (service , spreadsheet_id , csv_content )
92
151
93
152
if __name__ == "__main__" :
94
153
main ()
0 commit comments