Skip to content

Commit 98923b1

Browse files
committed
m
1 parent e8a09b3 commit 98923b1

File tree

2 files changed

+33
-73
lines changed

2 files changed

+33
-73
lines changed

.github/workflows/scrape.yml

Lines changed: 10 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,64 +1,32 @@
1-
name: Scrape Job Data and Upload to Google Sheets
1+
ame: Scrape and Upload to Google Sheets
22

33
on:
44
push:
5-
branches:
6-
- master
7-
pull_request_target:
5+
branches: [master]
6+
pull_request:
7+
branches: [master]
88
schedule:
99
- cron: "0 0 * * *"
10-
workflow_dispatch:
1110

1211
jobs:
1312
scrape-and-upload:
1413
runs-on: ubuntu-latest
15-
permissions:
16-
contents: write
1714
steps:
18-
- name: Checkout code
19-
uses: actions/checkout@v4
20-
with:
21-
token: ${{ secrets.MEONG }}
22-
repository: ${{ github.event.pull_request.head.repo.full_name }}
23-
ref: ${{ github.head_ref }}
24-
15+
- uses: actions/checkout@v3
2516
- name: Set up Python
2617
uses: actions/setup-python@v4
2718
with:
2819
python-version: "3.x"
29-
3020
- name: Install dependencies
3121
run: |
3222
python -m pip install --upgrade pip
33-
pip install scrapy google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
34-
35-
# - name: Run scrape.sh
36-
# run: bash scrape.sh
37-
38-
# - name: Check for CSV files
39-
# run: |
40-
# if ls output/*.csv 1> /dev/null 2>&1; then
41-
# echo "CSV files found."
42-
# else
43-
# echo "No CSV files found. Exiting."
44-
# exit 1
45-
# fi
46-
47-
# - name: Move CSV files to public directory
48-
# run: |
49-
# mkdir -p public
50-
# mv output/*.csv public/
51-
23+
pip install google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
24+
- name: Run scraper
25+
run: |
26+
# Your scraping script here
27+
# This should create public/merged.csv
5228
- name: Upload to Google Sheets
5329
env:
5430
GCP_JSON: ${{ secrets.GCP_JSON }}
5531
GOOGLE_SHEETS_ID: ${{ secrets.GOOGLE_SHEETS_ID }}
5632
run: python upload_to_sheets.py
57-
58-
- name: Commit and push changes
59-
run: |
60-
git config --local user.email "action@github.com"
61-
git config --local user.name "GitHub Action"
62-
git add public/*.csv
63-
git commit -m "[skip ci] Updated job data" || echo "No changes to commit"
64-
git push

upload_to_sheets.py

Lines changed: 23 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,31 @@
11
import os
22
import csv
33
import json
4-
import sys
54
from google.oauth2 import service_account
65
from googleapiclient.discovery import build
76
from googleapiclient.errors import HttpError
87

8+
# If modifying these scopes, update the GCP_JSON secret accordingly.
9+
SCOPES = ["https://www.googleapis.com/auth/spreadsheets"]
10+
911
def get_env_var(var_name):
1012
value = os.environ.get(var_name)
1113
if value is None:
12-
print(f"Error: {var_name} environment variable is not set")
13-
sys.exit(1)
14+
raise ValueError(f"{var_name} environment variable is not set")
1415
return value
1516

1617
def setup_credentials():
17-
creds_json = get_env_var('GCP_JSON') # Changed from GCP_SA_KEY to GCP_JSON
18-
try:
19-
creds_dict = json.loads(creds_json)
20-
return service_account.Credentials.from_service_account_info(
21-
creds_dict,
22-
scopes=['https://www.googleapis.com/auth/spreadsheets']
23-
)
24-
except json.JSONDecodeError:
25-
print("Error: Invalid JSON in GCP_JSON") # Changed error message
26-
sys.exit(1)
18+
gcp_json = get_env_var('GCP_JSON')
19+
creds_dict = json.loads(gcp_json)
20+
return service_account.Credentials.from_service_account_info(
21+
creds_dict, scopes=SCOPES)
2722

2823
def read_csv(file_path):
29-
try:
30-
with open(file_path, 'r') as file:
31-
return list(csv.reader(file))
32-
except FileNotFoundError:
33-
print(f"Error: CSV file not found at {file_path}")
34-
sys.exit(1)
24+
with open(file_path, 'r') as file:
25+
return list(csv.reader(file))
3526

3627
def upload_to_sheets(service, spreadsheet_id, data):
37-
sheet_range = 'Sheet1'
28+
sheet_range = 'Sheet1' # Update this if you want to use a different sheet name
3829
body = {'values': data}
3930

4031
try:
@@ -52,19 +43,20 @@ def upload_to_sheets(service, spreadsheet_id, data):
5243
body=body
5344
).execute()
5445
print(f"{result.get('updatedCells')} cells updated.")
55-
except HttpError as e:
56-
print(f"HTTP error occurred: {e}")
57-
if e.resp.status == 404:
58-
print("Error 404: Make sure the Google Sheets ID is correct and the service account has access to the sheet.")
59-
sys.exit(1)
46+
except HttpError as err:
47+
print(f"An error occurred: {err}")
48+
raise
6049

6150
def main():
62-
creds = setup_credentials()
63-
service = build('sheets', 'v4', credentials=creds)
64-
spreadsheet_id = get_env_var('GOOGLE_SHEETS_ID')
65-
66-
csv_content = read_csv('public/merged.csv')
67-
upload_to_sheets(service, spreadsheet_id, csv_content)
51+
try:
52+
creds = setup_credentials()
53+
service = build("sheets", "v4", credentials=creds)
54+
spreadsheet_id = get_env_var('GOOGLE_SHEETS_ID')
55+
56+
csv_content = read_csv('public/merged.csv')
57+
upload_to_sheets(service, spreadsheet_id, csv_content)
58+
except HttpError as err:
59+
print(err)
6860

6961
if __name__ == "__main__":
7062
main()

0 commit comments

Comments
 (0)