From f329429de4b0d7561527d419cd1307eaf38f1cb3 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sbp@aldebaran.local>
Date: Fri, 22 Jul 2011 15:43:50 +0100
Subject: [PATCH 1/3] Support for gzip encoding from Wikipedia.

---
 modules/dict.py       | 1 -
 modules/wikipedia.py  | 9 ++++++++-
 modules/wiktionary.py | 4 ++++
 3 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/modules/dict.py b/modules/dict.py
index 8f13e99..8fa6182 100755
--- a/modules/dict.py
+++ b/modules/dict.py
@@ -51,7 +51,6 @@ def dict(phenny, input):
    if result.endswith('-') and (len(result) < 30): 
       phenny.reply('Sorry, no definition found.')
    else: phenny.say(result)
-dict.commands = ['dict']
 
 if __name__ == '__main__': 
    print __doc__.strip()
diff --git a/modules/wikipedia.py b/modules/wikipedia.py
index b476ba3..4ad0113 100755
--- a/modules/wikipedia.py
+++ b/modules/wikipedia.py
@@ -7,7 +7,7 @@ Licensed under the Eiffel Forum License 2.
 http://inamidst.com/phenny/
 """
 
-import re, urllib
+import re, urllib, gzip, StringIO
 import web
 
 wikiuri = 'http://%s.wikipedia.org/wiki/%s'
@@ -69,6 +69,13 @@ def wikipedia(term, language='en', last=False):
       u = wikiuri % (language, q)
       bytes = web.get(u)
    else: bytes = web.get(wikiuri % (language, term))
+
+   if bytes.startswith('\x1f\x8b\x08\x00\x00\x00\x00\x00'): 
+      f = StringIO.StringIO(bytes)
+      f.seek(0)
+      gzip_file = gzip.GzipFile(fileobj=f)
+      bytes = gzip_file.read()
+
    bytes = r_tr.sub('', bytes)
 
    if not last: 
diff --git a/modules/wiktionary.py b/modules/wiktionary.py
index 4a5f407..9229194 100755
--- a/modules/wiktionary.py
+++ b/modules/wiktionary.py
@@ -92,5 +92,9 @@ def w(phenny, input):
 w.commands = ['w']
 w.example = '.w bailiwick'
 
+def encarta(phenny, input): 
+   return phenny.reply('Microsoft removed Encarta, try .w instead!')
+encarta.commands = ['dict']
+
 if __name__ == '__main__': 
    print __doc__.strip()

From c493e7ca07cca403334eabcc069d5bc1cb468220 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sbp@aldebaran.local>
Date: Fri, 22 Jul 2011 15:53:16 +0100
Subject: [PATCH 2/3] Better file management in Wikipedia module, and deleted
 the Encarta module.

---
 modules/dict.py      | 56 --------------------------------------------
 modules/wikipedia.py |  2 ++
 2 files changed, 2 insertions(+), 56 deletions(-)
 delete mode 100755 modules/dict.py

diff --git a/modules/dict.py b/modules/dict.py
deleted file mode 100755
index 8fa6182..0000000
--- a/modules/dict.py
+++ /dev/null
@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-"""
-dict.py - Phenny Dictionary Module
-Copyright 2008-9, Sean B. Palmer, inamidst.com
-Licensed under the Eiffel Forum License 2.
-
-http://inamidst.com/phenny/
-"""
-
-import re, urllib
-import web
-from tools import deprecated
-
-r_li = re.compile(r'(?ims)<li>.*?</li>')
-r_tag = re.compile(r'<[^>]+>')
-r_parens = re.compile(r'(?<=\()(?:[^()]+|\([^)]+\))*(?=\))')
-r_word = re.compile(r'^[A-Za-z0-9\' -]+$')
-
-uri = 'http://encarta.msn.com/dictionary_/%s.html'
-r_info = re.compile(
-   r'(?:ResultBody"><br /><br />(.*?)&nbsp;)|(?:<b>(.*?)</b>)'
-)
-
-def dict(phenny, input): 
-   if not input.group(2):
-      return phenny.reply("Nothing to define.")
-   word = input.group(2)
-   word = urllib.quote(word.encode('utf-8'))
-
-   def trim(thing): 
-      if thing.endswith('&nbsp;'): 
-         thing = thing[:-6]
-      return thing.strip(' :.')
-
-   bytes = web.get(uri % word)
-   results = {}
-   wordkind = None
-   for kind, sense in r_info.findall(bytes): 
-      kind, sense = trim(kind), trim(sense)
-      if kind: wordkind = kind
-      elif sense: 
-         results.setdefault(wordkind, []).append(sense)
-   result = input.group(2).encode('utf-8') + ' - '
-   for key in sorted(results.keys()): 
-      if results[key]: 
-         result += (key or '') + ' 1. ' + results[key][0]
-         if len(results[key]) > 1: 
-            result += ', 2. ' + results[key][1]
-         result += '; '
-   result = result.rstrip('; ')
-   if result.endswith('-') and (len(result) < 30): 
-      phenny.reply('Sorry, no definition found.')
-   else: phenny.say(result)
-
-if __name__ == '__main__': 
-   print __doc__.strip()
diff --git a/modules/wikipedia.py b/modules/wikipedia.py
index 4ad0113..7d83893 100755
--- a/modules/wikipedia.py
+++ b/modules/wikipedia.py
@@ -75,6 +75,8 @@ def wikipedia(term, language='en', last=False):
       f.seek(0)
       gzip_file = gzip.GzipFile(fileobj=f)
       bytes = gzip_file.read()
+      gzip_file.close()
+      f.close()
 
    bytes = r_tr.sub('', bytes)
 

From 66edd833726d0a14400df65311999b1496b9f8cb Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sean@miscoranda.com>
Date: Thu, 4 Aug 2011 15:53:55 +0100
Subject: [PATCH 3/3] DuckDuckGo support, and minor wa and wik fixes.

---
 modules/calc.py      |  2 +-
 modules/search.py    | 19 +++++++++++++++++++
 modules/wikipedia.py |  2 +-
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/modules/calc.py b/modules/calc.py
index d8cad35..9a5b187 100755
--- a/modules/calc.py
+++ b/modules/calc.py
@@ -105,7 +105,7 @@ def wa(phenny, input):
       return phenny.reply("No search term.")
    query = input.group(2).encode('utf-8')
    uri = 'http://tumbolia.appspot.com/wa/'
-   answer = web.get(uri + web.urllib.quote(query))
+   answer = web.get(uri + web.urllib.quote(query.replace('+', '%2B')))
    if answer: 
       phenny.say(answer)
    else: phenny.reply('Sorry, no result.')
diff --git a/modules/search.py b/modules/search.py
index 2751bb5..c8f9a6a 100755
--- a/modules/search.py
+++ b/modules/search.py
@@ -129,5 +129,24 @@ def bing(phenny, input):
 bing.commands = ['bing']
 bing.example = '.bing swhack'
 
+r_ddg = re.compile(r'nofollow" class="[^"]+" href="(.*?)">')
+
+def ddg(phenny, input): 
+   query = input.group(2)
+   if not query: return phenny.reply('.ddg what?')
+
+   query = web.urllib.quote(query.encode('utf-8'))
+   uri = 'http://duckduckgo.com/html/?q=%s&kl=uk-en' % query
+   bytes = web.get(uri)
+   m = r_ddg.search(bytes)
+   if m: 
+      uri = m.group(1)
+      phenny.reply(uri)
+      if not hasattr(phenny.bot, 'last_seen_uri'):
+         phenny.bot.last_seen_uri = {}
+      phenny.bot.last_seen_uri[input.sender] = uri
+   else: phenny.reply("No results found for '%s'." % query)
+ddg.commands = ['ddg']
+
 if __name__ == '__main__': 
    print __doc__.strip()
diff --git a/modules/wikipedia.py b/modules/wikipedia.py
index 7d83893..ba9ce0b 100755
--- a/modules/wikipedia.py
+++ b/modules/wikipedia.py
@@ -24,7 +24,7 @@ r_redirect = re.compile(
 
 abbrs = ['etc', 'ca', 'cf', 'Co', 'Ltd', 'Inc', 'Mt', 'Mr', 'Mrs', 
          'Dr', 'Ms', 'Rev', 'Fr', 'St', 'Sgt', 'pron', 'approx', 'lit', 
-         'syn', 'transl', 'sess', 'fl', 'Op', 'Dec'] \
+         'syn', 'transl', 'sess', 'fl', 'Op', 'Dec', 'Brig', 'Gen'] \
    + list('ABCDEFGHIJKLMNOPQRSTUVWXYZ') \
    + list('abcdefghijklmnopqrstuvwxyz')
 t_sentence = r'^.{5,}?(?<!\b%s)(?:\.(?=[\[ ][A-Z0-9]|\Z)|\Z)'