added uri snarfing with automatic title reading

2011-03-05 19:51:52 -06:00 · 2011-03-05 19:51:52 -06:00 · ff2434db41
parent 18a24a8117
commit ff2434db41
1 changed files with 34 additions and 24 deletions
--- a/modules/head.py
+++ b/modules/head.py
@ -9,6 +9,7 @@ http://inamidst.com/phenny/
 import re, urllib, urllib2, httplib, urlparse, time, cookielib
 from htmlentitydefs import name2codepoint
 from string import join
 import web
 from tools import deprecated
@ -82,7 +83,32 @@ def f_title(self, origin, match, args):
      uri = self.last_seen_uri.get(origin.sender)
   if not uri: 
      return self.msg(origin.sender, 'I need a URI to give the title of...')
   title = gettitle(uri)
   if title:
      self.msg(origin.sender, origin.nick + ': ' + title)
   else: self.msg(origin.sender, origin.nick + ': No title found')
 f_title.commands = ['title']
 def noteuri(phenny, input): 
   uri = input.group(1).encode('utf-8')
   if not hasattr(phenny.bot, 'last_seen_uri'): 
      phenny.bot.last_seen_uri = {}
   phenny.bot.last_seen_uri[input.sender] = uri
 noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
 noteuri.priority = 'low'
 titlecommands = r'(?:' + join(f_title.commands, r'|') + r')'
 def snarfuri(phenny, input):
   if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
      return
   uri = input.group(1).encode('utf-8')
   title = gettitle(uri)
   if title:
      phenny.msg(input.sender, '[ ' + title + ' ]')
 snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
 snarfuri.priority = 'low'
 def gettitle(uri):
   if not ':' in uri: 
      uri = 'http://' + uri
   uri = uri.replace('#!', '?_escaped_fragment_=')
@ -98,7 +124,6 @@ def f_title(self, origin, match, args):
         u = urllib2.urlopen(req)
         info = u.info()
         u.close()
         # info = web.head(uri)
         if not isinstance(info, list): 
            status = '200'
@ -111,23 +136,19 @@ def f_title(self, origin, match, args):
         redirects += 1
         if redirects >= 25: 
-            self.msg(origin.sender, origin.nick + ": Too many redirects")
+            return None
            return
      try: mtype = info['content-type']
      except: 
-         err = ": Couldn't get the Content-Type, sorry"
+         return None
-         return self.msg(origin.sender, origin.nick + err)
+         if not (('/html' in mtype) or ('/xhtml' in mtype)): 
-      if not (('/html' in mtype) or ('/xhtml' in mtype)): 
+            return None
         self.msg(origin.sender, origin.nick + ": Document isn't HTML")
         return
      u = urllib2.urlopen(req)
      bytes = u.read(262144)
      u.close()
   except IOError: 
      self.msg(origin.sender, "Can't connect to %s" % uri)
      return
   m = r_title.search(bytes)
@ -161,21 +182,10 @@ def f_title(self, origin, match, args):
            try: title = title.decode('iso-8859-1').encode('utf-8')
            except: title = title.decode('cp1252').encode('utf-8')
         else: pass
-      else: title = '[The title is empty.]'
+         title = title.replace('\n', '')
-
+         title = title.replace('\r', '')
-      title = title.replace('\n', '')
+      else: title = None
-      title = title.replace('\r', '')
+   return title
      self.msg(origin.sender, origin.nick + ': ' + title)
   else: self.msg(origin.sender, origin.nick + ': No title found')
 f_title.commands = ['title']
 def noteuri(phenny, input): 
   uri = input.group(1).encode('utf-8')
   if not hasattr(phenny.bot, 'last_seen_uri'): 
      phenny.bot.last_seen_uri = {}
   phenny.bot.last_seen_uri[input.sender] = uri
 noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
 noteuri.priority = 'low'
 if __name__ == '__main__': 
   print __doc__.strip()