From f329429de4b0d7561527d419cd1307eaf38f1cb3 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 22 Jul 2011 15:43:50 +0100 Subject: [PATCH 1/3] Support for gzip encoding from Wikipedia. --- modules/dict.py | 1 - modules/wikipedia.py | 9 ++++++++- modules/wiktionary.py | 4 ++++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/dict.py b/modules/dict.py index 8f13e99..8fa6182 100755 --- a/modules/dict.py +++ b/modules/dict.py @@ -51,7 +51,6 @@ def dict(phenny, input): if result.endswith('-') and (len(result) < 30): phenny.reply('Sorry, no definition found.') else: phenny.say(result) -dict.commands = ['dict'] if __name__ == '__main__': print __doc__.strip() diff --git a/modules/wikipedia.py b/modules/wikipedia.py index b476ba3..4ad0113 100755 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -7,7 +7,7 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re, urllib +import re, urllib, gzip, StringIO import web wikiuri = 'http://%s.wikipedia.org/wiki/%s' @@ -69,6 +69,13 @@ def wikipedia(term, language='en', last=False): u = wikiuri % (language, q) bytes = web.get(u) else: bytes = web.get(wikiuri % (language, term)) + + if bytes.startswith('\x1f\x8b\x08\x00\x00\x00\x00\x00'): + f = StringIO.StringIO(bytes) + f.seek(0) + gzip_file = gzip.GzipFile(fileobj=f) + bytes = gzip_file.read() + bytes = r_tr.sub('', bytes) if not last: diff --git a/modules/wiktionary.py b/modules/wiktionary.py index 4a5f407..9229194 100755 --- a/modules/wiktionary.py +++ b/modules/wiktionary.py @@ -92,5 +92,9 @@ def w(phenny, input): w.commands = ['w'] w.example = '.w bailiwick' +def encarta(phenny, input): + return phenny.reply('Microsoft removed Encarta, try .w instead!') +encarta.commands = ['dict'] + if __name__ == '__main__': print __doc__.strip() From c493e7ca07cca403334eabcc069d5bc1cb468220 Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Fri, 22 Jul 2011 15:53:16 +0100 Subject: [PATCH 2/3] Better file management in Wikipedia module, and deleted the Encarta module. --- modules/dict.py | 56 -------------------------------------------- modules/wikipedia.py | 2 ++ 2 files changed, 2 insertions(+), 56 deletions(-) delete mode 100755 modules/dict.py diff --git a/modules/dict.py b/modules/dict.py deleted file mode 100755 index 8fa6182..0000000 --- a/modules/dict.py +++ /dev/null @@ -1,56 +0,0 @@ -#!/usr/bin/env python -""" -dict.py - Phenny Dictionary Module -Copyright 2008-9, Sean B. Palmer, inamidst.com -Licensed under the Eiffel Forum License 2. - -http://inamidst.com/phenny/ -""" - -import re, urllib -import web -from tools import deprecated - -r_li = re.compile(r'(?ims)
  • .*?
  • ') -r_tag = re.compile(r'<[^>]+>') -r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))') -r_word = re.compile(r'^[A-Za-z0-9\' -]+$') - -uri = 'http://encarta.msn.com/dictionary_/%s.html' -r_info = re.compile( - r'(?:ResultBody">

    (.*?) )|(?:(.*?))' -) - -def dict(phenny, input): - if not input.group(2): - return phenny.reply("Nothing to define.") - word = input.group(2) - word = urllib.quote(word.encode('utf-8')) - - def trim(thing): - if thing.endswith(' '): - thing = thing[:-6] - return thing.strip(' :.') - - bytes = web.get(uri % word) - results = {} - wordkind = None - for kind, sense in r_info.findall(bytes): - kind, sense = trim(kind), trim(sense) - if kind: wordkind = kind - elif sense: - results.setdefault(wordkind, []).append(sense) - result = input.group(2).encode('utf-8') + ' - ' - for key in sorted(results.keys()): - if results[key]: - result += (key or '') + ' 1. ' + results[key][0] - if len(results[key]) > 1: - result += ', 2. ' + results[key][1] - result += '; ' - result = result.rstrip('; ') - if result.endswith('-') and (len(result) < 30): - phenny.reply('Sorry, no definition found.') - else: phenny.say(result) - -if __name__ == '__main__': - print __doc__.strip() diff --git a/modules/wikipedia.py b/modules/wikipedia.py index 4ad0113..7d83893 100755 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -75,6 +75,8 @@ def wikipedia(term, language='en', last=False): f.seek(0) gzip_file = gzip.GzipFile(fileobj=f) bytes = gzip_file.read() + gzip_file.close() + f.close() bytes = r_tr.sub('', bytes) From 66edd833726d0a14400df65311999b1496b9f8cb Mon Sep 17 00:00:00 2001 From: "Sean B. Palmer" Date: Thu, 4 Aug 2011 15:53:55 +0100 Subject: [PATCH 3/3] DuckDuckGo support, and minor wa and wik fixes. --- modules/calc.py | 2 +- modules/search.py | 19 +++++++++++++++++++ modules/wikipedia.py | 2 +- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/modules/calc.py b/modules/calc.py index d8cad35..9a5b187 100755 --- a/modules/calc.py +++ b/modules/calc.py @@ -105,7 +105,7 @@ def wa(phenny, input): return phenny.reply("No search term.") query = input.group(2).encode('utf-8') uri = 'http://tumbolia.appspot.com/wa/' - answer = web.get(uri + web.urllib.quote(query)) + answer = web.get(uri + web.urllib.quote(query.replace('+', '%2B'))) if answer: phenny.say(answer) else: phenny.reply('Sorry, no result.') diff --git a/modules/search.py b/modules/search.py index 2751bb5..c8f9a6a 100755 --- a/modules/search.py +++ b/modules/search.py @@ -129,5 +129,24 @@ def bing(phenny, input): bing.commands = ['bing'] bing.example = '.bing swhack' +r_ddg = re.compile(r'nofollow" class="[^"]+" href="(.*?)">') + +def ddg(phenny, input): + query = input.group(2) + if not query: return phenny.reply('.ddg what?') + + query = web.urllib.quote(query.encode('utf-8')) + uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query + bytes = web.get(uri) + m = r_ddg.search(bytes) + if m: + uri = m.group(1) + phenny.reply(uri) + if not hasattr(phenny.bot, 'last_seen_uri'): + phenny.bot.last_seen_uri = {} + phenny.bot.last_seen_uri[input.sender] = uri + else: phenny.reply("No results found for '%s'." % query) +ddg.commands = ['ddg'] + if __name__ == '__main__': print __doc__.strip() diff --git a/modules/wikipedia.py b/modules/wikipedia.py index 7d83893..ba9ce0b 100755 --- a/modules/wikipedia.py +++ b/modules/wikipedia.py @@ -24,7 +24,7 @@ r_redirect = re.compile( abbrs = ['etc', 'ca', 'cf', 'Co', 'Ltd', 'Inc', 'Mt', 'Mr', 'Mrs', 'Dr', 'Ms', 'Rev', 'Fr', 'St', 'Sgt', 'pron', 'approx', 'lit', - 'syn', 'transl', 'sess', 'fl', 'Op', 'Dec'] \ + 'syn', 'transl', 'sess', 'fl', 'Op', 'Dec', 'Brig', 'Gen'] \ + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') \ + list('abcdefghijklmnopqrstuvwxyz') t_sentence = r'^.{5,}?(?