-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcarBrandScrap.py
186 lines (145 loc) · 5.4 KB
/
carBrandScrap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time
import mysql.connector
#51823SK
# Function to connect to the database
def connect_db(database_name):
return mysql.connector.connect(
host='localhost', # e.g., 'localhost'
user='root',
password='',
database=database_name
)
def get_number_by_car_model(car_model):
db = connect_db("vcdb")
cursor = db.cursor()
# SQL query using two schemas
# query = """
# SELECT schema2.some_table.some_number
# FROM schema1.cars
# JOIN schema2.some_table ON schema1.cars.country = schema2.some_table.country
# WHERE schema1.cars.model = %s
#"""
query = """
SELECT MakeID
FROM vcdb.make WHERE vcdb.make.MakeName = %s
"""
cursor.execute(query, (car_model,))
result = cursor.fetchone()
db.close()
if result:
return result[0]
else:
print(car_model + " failed to retrieve model ID")
return None
def get_number_by_region(region_name):
db = connect_db("autoapp")
cursor = db.cursor()
query = """
SELECT regionid
FROM autoapp.region WHERE autoapp.region.regionabbr = %s OR autoapp.region.regionname = %s
"""
cursor.execute(query, (region_name, region_name,))
result = cursor.fetchone()
db.close()
if result:
return result[0]
elif region_name == "South Korea":
return 6 #South Korea is Korea in database
else:
return 4 #assume the country is "European"
# Configure Selenium (choose your browser, here we use Chrome)
def setup_driver():
driver = webdriver.Chrome()
return driver
def search_website(query, driver):
# Example website - you can change this to your target website
driver.get("https://www.car.info/en-se/brands")
time.sleep(2)
# Find search input and type the query
search_box = driver.find_element(By.ID, "brand_search")
search_box.send_keys(query)
search_box.send_keys(Keys.RETURN)
# Wait for page to load
time.sleep(1)
try:
partLink= driver.find_element(By.CLASS_NAME, "brand_name")
partLink.click()
time.sleep(1)
except:
print("Empty results: " + query)
return None
# Go to the tab with buyer's guide
#button = driver.find_element(By.XPATH, "//*[contains(text(), 'Buyers Guide')]")
#button = driver.find_element(By.CSS_SELECTOR, ".x-tab .x-unselectable .x-box-item .x-tab-default .x-top .x-tab-top .x-tab-default-top .x-tab-over")
#button = driver.find_element(By.XPATH, "//*[contains(@class, 'x-tab') and contains(@class, 'x-unselectable') and contains(@class, 'x-box-item')and contains(@class, 'x-tab-default')and contains(@class, 'x-top')and contains(@class, 'x-tab-top')and contains(@class, 'x-tab-default-top')and contains(@class, 'x-tab-over')]")
#button.click()
#time.sleep(1)
# Parse the HTML of the page
soup = BeautifulSoup(driver.page_source, 'html.parser')
return soup
def extract_data(query, soup):
# Find the table or the rows inside the grid
if (soup != None):
cells = soup.find_all(class_='ast-i')
country = cells[2].text.strip()
regionID = get_number_by_region(country)
makeID = get_number_by_car_model(query)
db = connect_db("backpack")
cursor = db.cursor()
try:
# Insert data into the table
insert_query = """
INSERT INTO backpack.make_to_region
VALUES (%s, %s);
"""
cursor.execute(insert_query, (makeID, regionID))
# Commit the transaction
db.commit()
# Check if the insert was successful
if cursor.rowcount > 0:
print("Data inserted successfully")
else:
print("Insert failed")
except Exception as e:
db.rollback() # Rollback in case of error
print(f"An error occurred: {e}")
finally:
cursor.close() # Close the cursor
db.close() # Close the database connection
# Main function to run everything
def main():
'''
query = input("Enter your search query: ")
driver = setup_driver()
# Perform search and extract data
soup = search_website(query, driver)
data = extract_data(query, soup)
# Save to Excel
#name = input("Enter excel name: ")
name = query + "info"+"single"
save_to_excel(data, name)
'''
print("Enter your search queries (press Enter on an empty line to finish):")
queries = []
while True:
query = input()
if query.strip() == "":
break
queries.append(query.strip())
if not queries:
print("No queries entered.")
return
driver = setup_driver()
for query in queries:
# Perform search and extract data
soup = search_website(query, driver)
extract_data(query, soup)
driver.quit()
main()