-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandle_data.py
104 lines (85 loc) · 3.88 KB
/
handle_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from utilities import *
from google.oauth2.service_account import Credentials
from googleapiclient.discovery import build
import pandas as pd
SPREADSHEET_ID = "1wknOyTVfeMZDUpIN8EG9DjYU8IgfsZXUN9aNJGnE8q0"
credentials_info = get_credentials("Google-Sheets-API-Key")
# Authenticate with Google Sheets API
SCOPES = ['https://www.googleapis.com/auth/spreadsheets']
credentials = Credentials.from_service_account_info(credentials_info, scopes=SCOPES)
service = build('sheets', 'v4', credentials=credentials)
# Function to fetch data from Google Sheets
def fetch_google_sheet_data():
result = service.spreadsheets().values().batchGet(
spreadsheetId=SPREADSHEET_ID,
ranges=[f"Sheet1!A:A", f"Sheet1!B:B", f"Sheet1!E:E"]
).execute()
col_names = []
data = []
for column in result["valueRanges"]:
values = column["values"]
col_names.append(values[0][0])
data.append(value[0] for value in values[1:])
df = pd.DataFrame(zip(*data), columns=col_names) # Use first row as column names
return df
def push_to_spreadsheet(all_data, new_data, latest_dates):
# Function to append data to the Google Sheet
def append_to_google_sheet(values):
body = {
'values': values
}
result = service.spreadsheets().values().append(
spreadsheetId=SPREADSHEET_ID,
range='Sheet1', # Specify the sheet name
valueInputOption='RAW', # Use 'RAW' or 'USER_ENTERED'
insertDataOption='INSERT_ROWS', # Insert new rows
body=body
).execute()
print(f"{result.get('updates').get('updatedRows')} rows appended.")
def color_new_rows(start_row, end_row):
"""
Colors the newly added rows in light green.
"""
requests = [
{
"repeatCell": {
"range": {
"sheetId": 990599841,
"startRowIndex": start_row,
"endRowIndex": end_row + 1 # Make it inclusive
},
"cell": {
"userEnteredFormat": {
"backgroundColor": {"red": 1.0, "green": 0.6, "blue": 0.0}
}
},
"fields": "userEnteredFormat.backgroundColor"
}
}
]
# Send the batchUpdate request to format the rows
service.spreadsheets().batchUpdate(
spreadsheetId=SPREADSHEET_ID, body={"requests": requests}
).execute()
print(f"Rows {start_row} to {end_row} color-coded.")
# Replace NaN values with empty strings to prevent unintended NaNs
new_data.fillna("", inplace=True)
# Convert 'date_of_incident' to datetime, forcing errors='coerce' to handle bad data
new_data["date_of_incident"] = pd.to_datetime(new_data["date_of_incident"], errors="coerce")
# Filter new_data to keep only rows with dates **after** the latest recorded date in that category
filtered_new_data = new_data[new_data.apply(
lambda row: row["date_of_incident"] > latest_dates.get(row["primary_category"], pd.NaT),
axis=1
)]
# Add tracking number
all_data["tracking_number"] = pd.to_numeric(all_data["tracking_number"], errors="coerce")
max_tracking_number = all_data["tracking_number"].max()
filtered_new_data["tracking_number"] = range(max_tracking_number + 1, max_tracking_number + 1 + len(filtered_new_data))
filtered_new_data["date_of_incident"] = filtered_new_data["date_of_incident"].dt.strftime('%Y-%m-%d')
if len(filtered_new_data != 0):
# Append data to Google Sheet
append_to_google_sheet(filtered_new_data.fillna("").values.tolist())
print("Data Succesfully Pushed to Spreadsheet")
color_new_rows(len(all_data) + 1, len(all_data) + len(filtered_new_data))
else:
print("No new data to push")