-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
125 lines (98 loc) · 5.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import urllib.request, time
import pandas as pd
from selenium import webdriver
from selenium.common.exceptions import WebDriverException, TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions
from selenium.webdriver.support.ui import WebDriverWait
from tkinter import Tk, messagebox
from pathlib import Path
from utils import *
from urllib.parse import unquote_plus
from bs4 import BeautifulSoup
def main():
CUR_DIR = Path(__file__).parent
PROGRAM = 'chromedriver.exe'
PATH = CUR_DIR / PROGRAM
URL_FINAL, SEARCH_TERM_FINAL, CLASS_TERM_FINAL, START_DATE_FINAL, END_DATE_FINAL, TIMEOUT, querCSV, querXLSX = GUI()
start_time = time.time()
OPTIONS = webdriver.ChromeOptions()
OPTIONS.add_argument('--headless')
OPTIONS.add_argument('--window-size=%s' % '1920,1080')
try:
DRIVER = webdriver.Chrome(PATH, options=OPTIONS)
DRIVER.get(URL_FINAL)
except WebDriverException:
OPTIONS.binary_location = 'D:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
OPTIONS.add_experimental_option('excludeSwitches', ['enable-logging'])
DRIVER = webdriver.Chrome(PATH, options=OPTIONS)
DRIVER.get(URL_FINAL)
SEARCHBAR = DRIVER.find_element_by_id('iddadosConsulta.pesquisaLivre')
SEARCHBAR.send_keys(SEARCH_TERM_FINAL)
FROM = DRIVER.find_element_by_id('iddadosConsulta.dtInicio')
FROM.send_keys(START_DATE_FINAL)
TO = DRIVER.find_element_by_id('iddadosConsulta.dtFim')
TO.send_keys(END_DATE_FINAL)
CONSULT = DRIVER.find_element_by_id('pbSubmit')
#CLASS = DRIVER.find_element_by_id('classe_selectionText')
CLASS_SEARCH = DRIVER.find_element_by_id('botaoProcurar_classe')
CLASS_SEARCH.click()
try:
WebDriverWait(DRIVER, TIMEOUT).until(expected_conditions.presence_of_element_located((By.CLASS_NAME, 'treeView')))
CLASS_SEARCH_BAR = DRIVER.find_element_by_id('classe_treeSelectFilter')
CLASS_SEARCH_BUTTON = DRIVER.find_element_by_id('filtroButton')
CHECKBOX = DRIVER.find_element_by_id('classe_tree_node_8554')
CLASS_SEARCH_BAR.send_keys(CLASS_TERM_FINAL)
CLASS_SEARCH_BUTTON.click()
CHECKBOX.click()
CONFIRM = DRIVER.find_element_by_xpath('//*[@id="classe_treeSelectContainer"]/div[3]/table/tbody/tr/td/input[1]')
CONFIRM.click()
CONSULT.click()
try:
WebDriverWait(DRIVER, TIMEOUT).until(expected_conditions.presence_of_element_located((By.ID, 'divDadosResultado')))
PARSEURL = DRIVER.current_url
with urllib.request.urlopen(urllib.request.Request(PARSEURL, headers = {'User-Agent': 'Chrome'})) as HTML:
PAGE = HTML.read()
DRIVER.quit()
#PAGE = unquote_plus(str(PAGE))
TREE = BeautifulSoup(PAGE, 'lxml')
#HTML = TREE.prettify()
DATA = [[], [], [], [], [], [], [], []]
for TABLE in TREE.find_all('tr', class_ = 'fundocinza1'):
c=1
for LINE in TABLE.find_all('tr', class_ = 'fonte'):
for EXTRA in LINE.find_all('td', attrs = {'align': 'left', 'colspan': '2'}):
for PROCESS in LINE.find_all('span', class_ = 'fonteNegrito'):
DATA[0].append(tag_cleanup(PROCESS))
EXTRA.replaceWith('')
for LINE2 in LINE.find_all('td', attrs = {'align': 'left'}):
for CONTENT in LINE2.find_all('strong'):
CONTENT.replaceWith('')
DATA[c].append(tag_cleanup(LINE2)[1:])
c+=1
df = pd.DataFrame()
for i in range(len(DATA[0])):
row = {'Processo': DATA[0][i], 'Classe': DATA[1][i], 'Assunto': DATA[2][i], 'Magistrado': DATA[3][i], 'Comarca': DATA[4][i], 'Foro': DATA[5][i], 'Vara': DATA[6][i], 'Data de Disponibilização': DATA[7][i]}
df = df.append(row, ignore_index=True)
if querCSV:
df.to_csv('processos.csv', encoding='latin1')
if querXLSX:
df.to_excel('processos.xlsx')
delta = round(time.time() - start_time, 3)
root = Tk()
root.withdraw()
messagebox.showinfo('Finalizado', f'Planilha(s) gerada(s).\nO programa levou {delta}s para rodar.')
except TimeoutException:
DRIVER.quit()
root = Tk()
root.withdraw()
messagebox.showerror('Timeout', 'O site demorou demais para responder, tente novamente.')
root.destroy()
except TimeoutException:
DRIVER.quit()
root = Tk()
root.withdraw()
messagebox.showerror('Timeout', 'O site demorou demais para responder, tente novamente.')
root.destroy()
if __name__ == '__main__':
main()