-
Notifications
You must be signed in to change notification settings - Fork 0
/
plugin.py
87 lines (73 loc) · 2.98 KB
/
plugin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# coding: utf-8
###
# Copyleft (ↄ) 2009, Štěpán Němec
# All rites reversed.
###
import codecs
import re
import urllib
from BeautifulSoup import BeautifulSoup
import supybot.conf as conf
import supybot.utils as utils
from supybot.commands import *
import supybot.callbacks as callbacks
def strlist(brs):
sL = []
for i in brs:
sL.append(i.string)
return sL
_langRe = re.compile(r'\S+lang\S+')
class SeznamSlov(callbacks.Plugin):
"""Provides a `seznamslov' command interfacing [screen scraping :-(] to the
Czech translating dictionary at <http://slovnik.seznam.cz>."""
threaded = True
_isLang = re.compile(r'^(?:en|de|fr|it|es)_cz$|^cz_(?:en|de|fr|it|es)$')
def seznamslov(self, irc, msg, args, term, lang):
"""<term> [<lang>]
Searches the online dictionary at slovnik.seznam.cz.
Optional second argument in the format 'from_to' specifies the
translation languages. Valid values are: en_cz (the default), de_cz,
fr_cz, it_cz, es_cz and the reversed equivalents cz_en etc.
"""
if not isinstance(lang, basestring):
lang = lang.group(0)
url = 'http://slovnik.seznam.cz/?q=%s&lang=%s' % (
urllib.quote_plus(term.decode('utf-8', 'replace').encode('utf-8',
'replace')), lang)
soup = BeautifulSoup(utils.web.getUrl(url))
words = soup.find('table', { 'id': 'words' })
result = ''
if words is not None:
worditems = words.findAll('tr')
for item in worditems:
orig = codecs.encode(item.find('td', { 'class': 'word' }).find('a',
{ 'href': _langRe }).string, 'utf-8', 'replace')
trls = strlist(item.find('td', { 'class': 'translated' }).findAll(
'a', { 'href': _langRe }))
result += orig + ': ' + ', '.join(trls).encode('utf-8',
'replace') + ' --- '
collocations = soup.find('div', { 'id': 'collocations' })
if collocations is not None:
colllist = collocations.find('dl')
collorigs = colllist.findAll('dt')
colltrls = colllist.findAll('dd')
pairs = zip(collorigs, colltrls)
for pair in pairs:
orig = codecs.encode(pair[0].find('a',
{ 'href': _langRe }).string, 'utf-8', 'replace')
trl = codecs.encode(pair[1].find('a',
{ 'href': _langRe }).string, 'utf-8', 'replace')
result += orig + ': ' + trl + ' --- '
if self.registryValue('showUrl', msg.args[0]):
url = ' (<' + url + '>)'
else:
url = ''
if result:
result = result[:-5] + url
irc.reply(result)
else:
irc.reply('Nothing found.' + url)
seznamslov = wrap(seznamslov, ['something', additional(('matches', _isLang,
'Invalid language specification.'), 'en_cz')])
Class = SeznamSlov
# vim:set shiftwidth=4 softtabstop=4 expandtab textwidth=79: