From 482161e98b254c4d67a81def8c726dad7bf03af8 Mon Sep 17 00:00:00 2001 From: Steve Vaught Date: Tue, 19 Apr 2011 00:50:58 -0400 Subject: [PATCH 1/5] Added error checking for empty parameters --- modules/calc.py | 4 ++++ modules/dict.py | 2 ++ modules/search.py | 2 ++ modules/validate.py | 2 ++ modules/wiktionary.py | 2 ++ 5 files changed, 12 insertions(+) diff --git a/modules/calc.py b/modules/calc.py index 88ac814..5a72e10 100755 --- a/modules/calc.py +++ b/modules/calc.py @@ -69,6 +69,8 @@ calc.example = '.calc 5 + 3' def c(phenny, input): """Google calculator.""" + if not input.group(2): + return phenny.reply("Nothing to calculate.") q = input.group(2).encode('utf-8') q = q.replace('\xcf\x95', 'phi') # utf-8 U+03D5 q = q.replace('\xcf\x80', 'pi') # utf-8 U+03C0 @@ -99,6 +101,8 @@ def py(phenny, input): py.commands = ['py'] def wa(phenny, input): + if not input.group(2): + return phenny.reply("No search term.") query = input.group(2).encode('utf-8') uri = 'http://tumbolia.appspot.com/wa/' answer = web.get(uri + web.urllib.quote(query)) diff --git a/modules/dict.py b/modules/dict.py index 125f686..8f13e99 100755 --- a/modules/dict.py +++ b/modules/dict.py @@ -22,6 +22,8 @@ r_info = re.compile( ) def dict(phenny, input): + if not input.group(2): + return phenny.reply("Nothing to define.") word = input.group(2) word = urllib.quote(word.encode('utf-8')) diff --git a/modules/search.py b/modules/search.py index 1067531..d83a47e 100755 --- a/modules/search.py +++ b/modules/search.py @@ -69,6 +69,8 @@ r_query = re.compile( ) def gcs(phenny, input): + if not input.group(2): + return phenny.reply("Nothing to compare.") queries = r_query.findall(input.group(2)) if len(queries) > 6: return phenny.reply('Sorry, can only compare up to six things.') diff --git a/modules/validate.py b/modules/validate.py index 185623f..85815d1 100755 --- a/modules/validate.py +++ b/modules/validate.py @@ -11,6 +11,8 @@ import web def val(phenny, input): """Check a webpage using the W3C Markup Validator.""" + if not input.group(2): + return phenny.reply("Nothing to validate.") uri = input.group(2) if not uri.startswith('http://'): uri = 'http://' + uri diff --git a/modules/wiktionary.py b/modules/wiktionary.py index c8f665e..4a5f407 100755 --- a/modules/wiktionary.py +++ b/modules/wiktionary.py @@ -72,6 +72,8 @@ def format(word, definitions, number=2): return result.strip(' .,') def w(phenny, input): + if not input.group(2): + return phenny.reply("Nothing to define.") word = input.group(2) etymology, definitions = wiktionary(word) if not definitions: From ccd0343d131799985f18cf5ac6c3c963bed230aa Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Tue, 17 May 2011 00:04:14 +0100 Subject: [PATCH 2/5] Added multilingual wikipedia search capability. --- modules/wikipedia.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/modules/wikipedia.py b/modules/wikipedia.py index 30a23f3..4a4a11b 100755 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -10,9 +10,9 @@ http://inamidst.com/phenny/ import re, urllib import web -wikiuri = 'http://en.wikipedia.org/wiki/%s' -wikisearch = 'http://en.wikipedia.org/wiki/Special:Search?' \ - + 'search=%s&fulltext=Search' +wikiuri = 'http://%s.wikipedia.org/wiki/%s' +# wikisearch = 'http://%s.wikipedia.org/wiki/Special:Search?' \ +# + 'search=%s&fulltext=Search' r_tr = re.compile(r'(?ims)]*>.*?') r_paragraph = re.compile(r'(?ims)]*>.*?

|]*>.*?') @@ -59,30 +59,30 @@ def search(term): return uri[len('http://en.wikipedia.org/wiki/'):] else: return term -def wikipedia(term, last=False): +def wikipedia(term, language='en', last=False): global wikiuri if not '%' in term: if isinstance(term, unicode): t = term.encode('utf-8') else: t = term q = urllib.quote(t) - u = wikiuri % q + u = wikiuri % (language, q) bytes = web.get(u) - else: bytes = web.get(wikiuri % term) + else: bytes = web.get(wikiuri % (language, term)) bytes = r_tr.sub('', bytes) if not last: r = r_redirect.search(bytes[:4096]) if r: term = urllib.unquote(r.group(1)) - return wikipedia(term, last=True) + return wikipedia(term, language=language, last=True) paragraphs = r_paragraph.findall(bytes) if not paragraphs: if not last: term = search(term) - return wikipedia(term, last=True) + return wikipedia(term, language=language, last=True) return None # Pre-process @@ -115,7 +115,7 @@ def wikipedia(term, last=False): if not m: if not last: term = search(term) - return wikipedia(term, last=True) + return wikipedia(term, language=language, last=True) return None sentence = m.group(0) @@ -130,14 +130,14 @@ def wikipedia(term, last=False): or ('or add a request for it' in sentence)): if not last: term = search(term) - return wikipedia(term, last=True) + return wikipedia(term, language=language, last=True) return None sentence = '"' + sentence.replace('"', "'") + '"' sentence = sentence.decode('utf-8').encode('utf-8') wikiuri = wikiuri.decode('utf-8').encode('utf-8') term = term.decode('utf-8').encode('utf-8') - return sentence + ' - ' + (wikiuri % term) + return sentence + ' - ' + (wikiuri % (language, term)) def wik(phenny, input): origterm = input.groups()[1] @@ -146,12 +146,19 @@ def wik(phenny, input): origterm = origterm.encode('utf-8') term = urllib.unquote(origterm) + language = 'en' + if term.startswith(':') and (' ' in term): + a, b = term.split(' ', 1) + a = a.lstrip(':') + if a.isalpha(): + language, term = a, b term = term[0].upper() + term[1:] term = term.replace(' ', '_') - try: result = wikipedia(term) + try: result = wikipedia(term, language) except IOError: - error = "Can't connect to en.wikipedia.org (%s)" % (wikiuri % term) + args = (language, wikiuri % (language, term)) + error = "Can't connect to %s.wikipedia.org (%s)" % args return phenny.say(error) if result is not None: From 78ec2730460e8271e3a9d96056799785e6866f83 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 20 May 2011 19:11:55 +0100 Subject: [PATCH 3/5] Fixing a bug with regexp matching when the bot has a metachar name. --- bot.py | 4 ++-- modules/wikipedia.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) mode change 100755 => 100644 modules/wikipedia.py diff --git a/bot.py b/bot.py index e71aa96..3886a87 100755 --- a/bot.py +++ b/bot.py @@ -94,8 +94,8 @@ class Phenny(irc.Bot): def sub(pattern, self=self): # These replacements have significant order - pattern = pattern.replace('$nickname', self.nick) - return pattern.replace('$nick', r'%s[,:] +' % self.nick) + pattern = pattern.replace('$nickname', re.escape(self.nick)) + return pattern.replace('$nick', r'%s[,:] +' % re.escape(self.nick)) for name, func in self.variables.iteritems(): # print name, func diff --git a/modules/wikipedia.py b/modules/wikipedia.py old mode 100755 new mode 100644 index 4a4a11b..b476ba3 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -127,7 +127,8 @@ def wikipedia(term, language='en', last=False): sentence = ' '.join(words) + ' [...]' if (('using the Article Wizard if you wish' in sentence) - or ('or add a request for it' in sentence)): + or ('or add a request for it' in sentence) + or ('in existing articles' in sentence)): if not last: term = search(term) return wikipedia(term, language=language, last=True) From 12c8cd07f52883299ed628752b580462c31ce9f1 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 17 Jun 2011 16:49:37 +0100 Subject: [PATCH 4/5] Search shim, and an encoding fix. --- modules/calc.py | 2 +- modules/ping.py | 2 +- modules/search.py | 15 +++++++++++++++ modules/wikipedia.py | 0 4 files changed, 17 insertions(+), 2 deletions(-) mode change 100644 => 100755 modules/wikipedia.py diff --git a/modules/calc.py b/modules/calc.py index 88ac814..9d632ca 100755 --- a/modules/calc.py +++ b/modules/calc.py @@ -90,7 +90,7 @@ c.commands = ['c'] c.example = '.c 5 + 3' def py(phenny, input): - query = input.group(2) + query = input.group(2).encode('utf-8') uri = 'http://tumbolia.appspot.com/py/' answer = web.get(uri + web.urllib.quote(query)) if answer: diff --git a/modules/ping.py b/modules/ping.py index 97e41e1..23219ac 100755 --- a/modules/ping.py +++ b/modules/ping.py @@ -11,7 +11,7 @@ def hello(phenny, input): greeting = random.choice(('Hi', 'Hey', 'Hello')) punctuation = random.choice(('', '!')) phenny.say(greeting + ' ' + input.nick + punctuation) -hello.rule = r'(?i)(hi|hello|hey) $nickname\b' +hello.rule = r'(?i)(hi|hello|hey) $nickname[ \t]*$' def interjection(phenny, input): phenny.say(input.nick + '!') diff --git a/modules/search.py b/modules/search.py index 1067531..f99baf9 100755 --- a/modules/search.py +++ b/modules/search.py @@ -10,17 +10,31 @@ http://inamidst.com/phenny/ import re import web +class Grab(web.urllib.URLopener): + def __init__(self, *args): + self.version = 'Mozilla/5.0 (Phenny)' + web.urllib.URLopener.__init__(self, *args) + self.addheader('Referer', 'https://github.com/sbp/phenny') + def http_error_default(self, url, fp, errcode, errmsg, headers): + return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url) + def search(query): """Search using AjaxSearch, and return its JSON.""" uri = 'http://ajax.googleapis.com/ajax/services/search/web' args = '?v=1.0&safe=off&q=' + web.urllib.quote(query.encode('utf-8')) + handler = web.urllib._urlopener + web.urllib._urlopener = Grab() bytes = web.get(uri + args) + web.urllib._urlopener = handler return web.json(bytes) def result(query): results = search(query) try: return results['responseData']['results'][0]['unescapedUrl'] except IndexError: return None + except TypeError: + print results + return False def count(query): results = search(query) @@ -48,6 +62,7 @@ def g(phenny, input): if not hasattr(phenny.bot, 'last_seen_uri'): phenny.bot.last_seen_uri = {} phenny.bot.last_seen_uri[input.sender] = uri + elif uri is False: phenny.reply("Problem getting data from Google.") else: phenny.reply("No results found for '%s'." % query) g.commands = ['g'] g.priority = 'high' diff --git a/modules/wikipedia.py b/modules/wikipedia.py old mode 100644 new mode 100755 From 7dbddc8429d0b238f64acabab435d27472ced062 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 17 Jun 2011 16:56:49 +0100 Subject: [PATCH 5/5] Allow latin1 searches of etymonline, which doesn't seem to support utf-8. --- modules/etymology.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/etymology.py b/modules/etymology.py index 1480191..55c5deb 100755 --- a/modules/etymology.py +++ b/modules/etymology.py @@ -46,7 +46,7 @@ def etymology(word): raise ValueError("Word too long: %s[...]" % word[:10]) word = {'axe': 'ax/axe'}.get(word, word) - bytes = web.get(etyuri % word) + bytes = web.get(etyuri % web.urllib.quote(word)) definitions = r_definition.findall(bytes) if not definitions: @@ -77,7 +77,7 @@ def etymology(word): def f_etymology(self, origin, match, args): word = match.group(2) - try: result = etymology(word.encode('utf-8')) + try: result = etymology(word.encode('iso-8859-1')) except IOError: msg = "Can't connect to etymonline.com (%s)" % (etyuri % word) self.msg(origin.sender, msg) @@ -92,7 +92,7 @@ def f_etymology(self, origin, match, args): msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri) self.msg(origin.sender, msg) # @@ Cf. http://swhack.com/logs/2006-01-04#T01-50-22 -f_etymology.rule = (['ety'], r"([A-Za-z0-9' .-]+)$") +f_etymology.rule = (['ety'], r"(.+?)$") f_etymology.thread = True f_etymology.priority = 'high'