-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrossword.py
147 lines (127 loc) · 5.23 KB
/
crossword.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import os
import pickle
from getpass import getpass
import requests
import yaml
from bs4 import BeautifulSoup
class CrosswordFetcher:
SESSION_FILE = '.sessionfile'
CONFIG_FILE = '.timescrosswords.yaml'
def __init__(self):
# Load the session from a saved file
self.load_credentials()
self.load_session()
def load_credentials(self):
try:
with open(self.CONFIG_FILE, 'r') as configfile:
self.cookies = yaml.load(configfile, Loader=yaml.FullLoader)
except IOError:
# No config file - create one
# Warning - password is stored unencrypted.
self.cookies = {
'acs_tnl': getpass('acs_tnl Cookie: '),
'sacs_tnl': getpass('sacs_tnl Cookie: ')
}
with open(self.CONFIG_FILE, 'w') as configfile:
yaml.dump({
'acs_tnl': self.cookies['acs_tnl'],
'sacs_tnl': self.cookies['sacs_tnl'],
}, configfile, default_flow_style=False)
os.chmod(self.CONFIG_FILE, 400)
def load_session(self):
self.session = requests.session()
try:
with open(self.SESSION_FILE, 'rb') as f:
self.session.cookies.update(pickle.load(f))
self.session.cookies.update(self.cookies)
except (IOError, EOFError):
pass # Empty cookies, ignore
def save_session(self):
with open(self.SESSION_FILE, 'wb') as f:
pickle.dump(self.session.cookies, f)
def get_query_params(self, url):
params = url.split('?')[1]
keyvalues = [param for param in params.split('&')]
return {param.split('=')[0]: param.split('=')[1] for param in keyvalues}
def __del__(self):
self.save_session()
def get_crosswords(self, crossword_type, start, end, to_print=False):
search_page = self.construct_url(crossword_type, start, end)
pdfs = self.get_crosswords_from_url(search_page)
print('Downloading {count} crosswords'.format(count=len(pdfs)))
for pdf in pdfs:
filename = self.download_and_save(pdf)
if to_print:
self.print_crossword(filename)
def get_crosswords_from_url(self, url):
r = self.session.get(url)
if not r.ok:
raise Exception("Couldn't get crosswords from", url, r)
b = BeautifulSoup(r.text, features='html.parser')
# Get all the printable links
print_links = b.find_all('p', {'class': 'PuzzleItem-secondary-link PuzzleItem--print-link'})
links = [p.contents[0]['href'] for p in print_links]
print('Found {count} crosswords'.format(count=len(links)))
if len(links) == 0:
return links
try:
show_more_link = \
next(f for f in b.find_all('a', {'class': 'Item-cta Link--primary'}) if f.text == 'Show more')['href']
return links + self.get_crosswords_from_url(show_more_link)
except StopIteration: # No link to more. Nothing to do
pass
return links
def download_and_save(self, url):
'''
Downloads and saves a PDF from the given URL to the current working directory
:param url: the URL (hopefully a PDF) to download
:return: the filename of the saved file
'''
resp = self.session.get(url)
print('Saving ' + url)
fd = open(url.split('/')[-1] + '.pdf', 'wb')
fd.write(resp.content)
fd.close()
return fd.name # Checkme
def construct_url(self, crossword_type, start, end):
return 'https://www.thetimes.co.uk/puzzleclub/crosswordclub/puzzles-list?search=&filter[puzzle_type]={type}&filter[publish_at][from]={start}&filter[publish_at][to]={end}' \
.format(type=crossword_type, start=start, end=end)
def print_crossword(self, filename):
'''
Prints a file (to the printer, not stdout). Very rudimentary, assumes the default printer
:param filename: the file to print
'''
print('Printing ' + filename)
os.execv('lp', filename)
crossword_types = {
"6": "Sunday Times Concise",
"5": "Times Concise",
"7": "Times Concise Jumbo",
"1": "Quick Cryptic",
"3": "Sunday Times Cryptic",
"2": "Times Cryptic",
"4": "Times Cryptic Jumbo",
"9": "General Knowledge Jumbo",
"8": "Mephisto",
"12": "Monthly Club Special",
"10": "O Tempora! (Latin)",
"11": "The Listener",
"SPECIALIST": "Specialist",
"CRYPTIC": "Cryptic",
}
if __name__ == '__main__':
# Todo: Parse CLI args
crossword_type = "hutne"
while crossword_type not in crossword_types: # get type from flags
print ("Crossword types: ")
for t in crossword_types:
print(t + ": " + crossword_types[t])
crossword_type = raw_input("Which Crossword Type? ")
if True: # get start date from flags
start = raw_input("Start Date (dd/mm/yyyy) ")
if True: # get end date from flags
end = raw_input("End Date (dd/mm/yyyy) ")
# if not to_print:
# to_print = raw_input("Print downloaded crosswords?")
to_print = False # get this from user input
CrosswordFetcher().get_crosswords(crossword_type, start, end, to_print=to_print)