From 482161e98b254c4d67a81def8c726dad7bf03af8 Mon Sep 17 00:00:00 2001
From: Steve Vaught <Steven.D.Vaught@gmail.com>
Date: Tue, 19 Apr 2011 00:50:58 -0400
Subject: [PATCH 1/5] Added error checking for empty parameters

---
 modules/calc.py       | 4 ++++
 modules/dict.py       | 2 ++
 modules/search.py     | 2 ++
 modules/validate.py   | 2 ++
 modules/wiktionary.py | 2 ++
 5 files changed, 12 insertions(+)
diff --git a/modules/calc.py b/modules/calc.py
index 88ac814..5a72e10 100755
--- a/modules/calc.py
+++ b/modules/calc.py
@@ -69,6 +69,8 @@ calc.example = '.calc 5 + 3'
 
 def c(phenny, input): 
    """Google calculator."""
+   if not input.group(2):
+      return phenny.reply("Nothing to calculate.")
    q = input.group(2).encode('utf-8')
    q = q.replace('\xcf\x95', 'phi') # utf-8 U+03D5
    q = q.replace('\xcf\x80', 'pi') # utf-8 U+03C0
@@ -99,6 +101,8 @@ def py(phenny, input):
 py.commands = ['py']
 
 def wa(phenny, input): 
+   if not input.group(2):
+      return phenny.reply("No search term.")
    query = input.group(2).encode('utf-8')
    uri = 'http://tumbolia.appspot.com/wa/'
    answer = web.get(uri + web.urllib.quote(query))
diff --git a/modules/dict.py b/modules/dict.py
index 125f686..8f13e99 100755
--- a/modules/dict.py
+++ b/modules/dict.py
@@ -22,6 +22,8 @@ r_info = re.compile(
 )
 
 def dict(phenny, input): 
+   if not input.group(2):
+      return phenny.reply("Nothing to define.")
    word = input.group(2)
    word = urllib.quote(word.encode('utf-8'))
 
diff --git a/modules/search.py b/modules/search.py
index 1067531..d83a47e 100755
--- a/modules/search.py
+++ b/modules/search.py
@@ -69,6 +69,8 @@ r_query = re.compile(
 )
 
 def gcs(phenny, input): 
+   if not input.group(2):
+      return phenny.reply("Nothing to compare.")
    queries = r_query.findall(input.group(2))
    if len(queries) > 6: 
       return phenny.reply('Sorry, can only compare up to six things.')
diff --git a/modules/validate.py b/modules/validate.py
index 185623f..85815d1 100755
--- a/modules/validate.py
+++ b/modules/validate.py
@@ -11,6 +11,8 @@ import web
 
 def val(phenny, input): 
    """Check a webpage using the W3C Markup Validator."""
+   if not input.group(2):
+      return phenny.reply("Nothing to validate.")
    uri = input.group(2)
    if not uri.startswith('http://'): 
       uri = 'http://' + uri
diff --git a/modules/wiktionary.py b/modules/wiktionary.py
index c8f665e..4a5f407 100755
--- a/modules/wiktionary.py
+++ b/modules/wiktionary.py
@@ -72,6 +72,8 @@ def format(word, definitions, number=2):
    return result.strip(' .,')
 
 def w(phenny, input): 
+   if not input.group(2):
+      return phenny.reply("Nothing to define.")
    word = input.group(2)
    etymology, definitions = wiktionary(word)
    if not definitions: 

From ccd0343d131799985f18cf5ac6c3c963bed230aa Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sean@miscoranda.com>
Date: Tue, 17 May 2011 00:04:14 +0100
Subject: [PATCH 2/5] Added multilingual wikipedia search capability.

---
 modules/wikipedia.py | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/modules/wikipedia.py b/modules/wikipedia.py
index 30a23f3..4a4a11b 100755
--- a/modules/wikipedia.py
+++ b/modules/wikipedia.py
@@ -10,9 +10,9 @@ http://inamidst.com/phenny/
 import re, urllib
 import web
 
-wikiuri = 'http://en.wikipedia.org/wiki/%s'
-wikisearch = 'http://en.wikipedia.org/wiki/Special:Search?' \
-                    + 'search=%s&fulltext=Search'
+wikiuri = 'http://%s.wikipedia.org/wiki/%s'
+# wikisearch = 'http://%s.wikipedia.org/wiki/Special:Search?' \
+#                     + 'search=%s&fulltext=Search'
 
 r_tr = re.compile(r'(?ims)<tr[^>]*>.*?</tr>')
 r_paragraph = re.compile(r'(?ims)<p[^>]*>.*?</p>|<li(?!n)[^>]*>.*?</li>')
@@ -59,30 +59,30 @@ def search(term):
       return uri[len('http://en.wikipedia.org/wiki/'):]
    else: return term
 
-def wikipedia(term, last=False): 
+def wikipedia(term, language='en', last=False): 
    global wikiuri
    if not '%' in term: 
       if isinstance(term, unicode): 
          t = term.encode('utf-8')
       else: t = term
       q = urllib.quote(t)
-      u = wikiuri % q
+      u = wikiuri % (language, q)
       bytes = web.get(u)
-   else: bytes = web.get(wikiuri % term)
+   else: bytes = web.get(wikiuri % (language, term))
    bytes = r_tr.sub('', bytes)
 
    if not last: 
       r = r_redirect.search(bytes[:4096])
       if r: 
          term = urllib.unquote(r.group(1))
-         return wikipedia(term, last=True)
+         return wikipedia(term, language=language, last=True)
 
    paragraphs = r_paragraph.findall(bytes)
 
    if not paragraphs: 
       if not last: 
          term = search(term)
-         return wikipedia(term, last=True)
+         return wikipedia(term, language=language, last=True)
       return None
 
    # Pre-process
@@ -115,7 +115,7 @@ def wikipedia(term, last=False):
    if not m: 
       if not last: 
          term = search(term)
-         return wikipedia(term, last=True)
+         return wikipedia(term, language=language, last=True)
       return None
    sentence = m.group(0)
 
@@ -130,14 +130,14 @@ def wikipedia(term, last=False):
     or ('or add a request for it' in sentence)): 
       if not last: 
          term = search(term)
-         return wikipedia(term, last=True)
+         return wikipedia(term, language=language, last=True)
       return None
 
    sentence = '"' + sentence.replace('"', "'") + '"'
    sentence = sentence.decode('utf-8').encode('utf-8')
    wikiuri = wikiuri.decode('utf-8').encode('utf-8')
    term = term.decode('utf-8').encode('utf-8')
-   return sentence + ' - ' + (wikiuri % term)
+   return sentence + ' - ' + (wikiuri % (language, term))
 
 def wik(phenny, input): 
    origterm = input.groups()[1]
@@ -146,12 +146,19 @@ def wik(phenny, input):
    origterm = origterm.encode('utf-8')
 
    term = urllib.unquote(origterm)
+   language = 'en'
+   if term.startswith(':') and (' ' in term): 
+      a, b = term.split(' ', 1)
+      a = a.lstrip(':')
+      if a.isalpha(): 
+         language, term = a, b
    term = term[0].upper() + term[1:]
    term = term.replace(' ', '_')
 
-   try: result = wikipedia(term)
+   try: result = wikipedia(term, language)
    except IOError: 
-      error = "Can't connect to en.wikipedia.org (%s)" % (wikiuri % term)
+      args = (language, wikiuri % (language, term))
+      error = "Can't connect to %s.wikipedia.org (%s)" % args
       return phenny.say(error)
 
    if result is not None: 

From 78ec2730460e8271e3a9d96056799785e6866f83 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sbp@aldebaran.local>
Date: Fri, 20 May 2011 19:11:55 +0100
Subject: [PATCH 3/5] Fixing a bug with regexp matching when the bot has a
 metachar name.

---
 bot.py               | 4 ++--
 modules/wikipedia.py | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)
 mode change 100755 => 100644 modules/wikipedia.py

diff --git a/bot.py b/bot.py
index e71aa96..3886a87 100755
--- a/bot.py
+++ b/bot.py
@@ -94,8 +94,8 @@ class Phenny(irc.Bot):
 
       def sub(pattern, self=self): 
          # These replacements have significant order
-         pattern = pattern.replace('$nickname', self.nick)
-         return pattern.replace('$nick', r'%s[,:] +' % self.nick)
+         pattern = pattern.replace('$nickname', re.escape(self.nick))
+         return pattern.replace('$nick', r'%s[,:] +' % re.escape(self.nick))
 
       for name, func in self.variables.iteritems(): 
          # print name, func
diff --git a/modules/wikipedia.py b/modules/wikipedia.py
old mode 100755
new mode 100644
index 4a4a11b..b476ba3
--- a/modules/wikipedia.py
+++ b/modules/wikipedia.py
@@ -127,7 +127,8 @@ def wikipedia(term, language='en', last=False):
       sentence = ' '.join(words) + ' [...]'
 
    if (('using the Article Wizard if you wish' in sentence)
-    or ('or add a request for it' in sentence)): 
+    or ('or add a request for it' in sentence)
+    or ('in existing articles' in sentence)): 
       if not last: 
          term = search(term)
          return wikipedia(term, language=language, last=True)

From 12c8cd07f52883299ed628752b580462c31ce9f1 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sbp@aldebaran.local>
Date: Fri, 17 Jun 2011 16:49:37 +0100
Subject: [PATCH 4/5] Search shim, and an encoding fix.

---
 modules/calc.py      |  2 +-
 modules/ping.py      |  2 +-
 modules/search.py    | 15 +++++++++++++++
 modules/wikipedia.py |  0
 4 files changed, 17 insertions(+), 2 deletions(-)
 mode change 100644 => 100755 modules/wikipedia.py

diff --git a/modules/calc.py b/modules/calc.py
index 88ac814..9d632ca 100755
--- a/modules/calc.py
+++ b/modules/calc.py
@@ -90,7 +90,7 @@ c.commands = ['c']
 c.example = '.c 5 + 3'
 
 def py(phenny, input): 
-   query = input.group(2)
+   query = input.group(2).encode('utf-8')
    uri = 'http://tumbolia.appspot.com/py/'
    answer = web.get(uri + web.urllib.quote(query))
    if answer: 
diff --git a/modules/ping.py b/modules/ping.py
index 97e41e1..23219ac 100755
--- a/modules/ping.py
+++ b/modules/ping.py
@@ -11,7 +11,7 @@ def hello(phenny, input):
    greeting = random.choice(('Hi', 'Hey', 'Hello'))
    punctuation = random.choice(('', '!'))
    phenny.say(greeting + ' ' + input.nick + punctuation)
-hello.rule = r'(?i)(hi|hello|hey) $nickname\b'
+hello.rule = r'(?i)(hi|hello|hey) $nickname[ \t]*$'
 
 def interjection(phenny, input): 
    phenny.say(input.nick + '!')
diff --git a/modules/search.py b/modules/search.py
index 1067531..f99baf9 100755
--- a/modules/search.py
+++ b/modules/search.py
@@ -10,17 +10,31 @@ http://inamidst.com/phenny/
 import re
 import web
 
+class Grab(web.urllib.URLopener):
+   def __init__(self, *args):
+      self.version = 'Mozilla/5.0 (Phenny)'
+      web.urllib.URLopener.__init__(self, *args)
+      self.addheader('Referer', 'https://github.com/sbp/phenny')
+   def http_error_default(self, url, fp, errcode, errmsg, headers):
+      return web.urllib.addinfourl(fp, [headers, errcode], "http:" + url)
+
 def search(query): 
    """Search using AjaxSearch, and return its JSON."""
    uri = 'http://ajax.googleapis.com/ajax/services/search/web'
    args = '?v=1.0&safe=off&q=' + web.urllib.quote(query.encode('utf-8'))
+   handler = web.urllib._urlopener
+   web.urllib._urlopener = Grab()
    bytes = web.get(uri + args)
+   web.urllib._urlopener = handler
    return web.json(bytes)
 
 def result(query): 
    results = search(query)
    try: return results['responseData']['results'][0]['unescapedUrl']
    except IndexError: return None
+   except TypeError: 
+      print results
+      return False
 
 def count(query): 
    results = search(query)
@@ -48,6 +62,7 @@ def g(phenny, input):
       if not hasattr(phenny.bot, 'last_seen_uri'):
          phenny.bot.last_seen_uri = {}
       phenny.bot.last_seen_uri[input.sender] = uri
+   elif uri is False: phenny.reply("Problem getting data from Google.")
    else: phenny.reply("No results found for '%s'." % query)
 g.commands = ['g']
 g.priority = 'high'
diff --git a/modules/wikipedia.py b/modules/wikipedia.py
old mode 100644
new mode 100755

From 7dbddc8429d0b238f64acabab435d27472ced062 Mon Sep 17 00:00:00 2001
From: "Sean B. Palmer" <sbp@aldebaran.local>
Date: Fri, 17 Jun 2011 16:56:49 +0100
Subject: [PATCH 5/5] Allow latin1 searches of etymonline, which doesn't seem
 to support utf-8.

---
 modules/etymology.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/etymology.py b/modules/etymology.py
index 1480191..55c5deb 100755
--- a/modules/etymology.py
+++ b/modules/etymology.py
@@ -46,7 +46,7 @@ def etymology(word):
       raise ValueError("Word too long: %s[...]" % word[:10])
    word = {'axe': 'ax/axe'}.get(word, word)
 
-   bytes = web.get(etyuri % word)
+   bytes = web.get(etyuri % web.urllib.quote(word))
    definitions = r_definition.findall(bytes)
 
    if not definitions: 
@@ -77,7 +77,7 @@ def etymology(word):
 def f_etymology(self, origin, match, args): 
    word = match.group(2)
 
-   try: result = etymology(word.encode('utf-8'))
+   try: result = etymology(word.encode('iso-8859-1'))
    except IOError: 
       msg = "Can't connect to etymonline.com (%s)" % (etyuri % word)
       self.msg(origin.sender, msg)
@@ -92,7 +92,7 @@ def f_etymology(self, origin, match, args):
       msg = 'Can\'t find the etymology for "%s". Try %s' % (word, uri)
       self.msg(origin.sender, msg)
 # @@ Cf. http://swhack.com/logs/2006-01-04#T01-50-22
-f_etymology.rule = (['ety'], r"([A-Za-z0-9' .-]+)$")
+f_etymology.rule = (['ety'], r"(.+?)$")
 f_etymology.thread = True
 f_etymology.priority = 'high'