From ff2434db414603251cbcbe5f965c58c7e775ea92 Mon Sep 17 00:00:00 2001 From: David Moore Date: Sat, 5 Mar 2011 19:51:52 -0600 Subject: [PATCH] added uri snarfing with automatic title reading --- modules/head.py | 58 +++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/modules/head.py b/modules/head.py index 5231edb..096b9a9 100755 --- a/modules/head.py +++ b/modules/head.py @@ -9,6 +9,7 @@ http://inamidst.com/phenny/ import re, urllib, urllib2, httplib, urlparse, time, cookielib from htmlentitydefs import name2codepoint +from string import join import web from tools import deprecated @@ -82,7 +83,32 @@ def f_title(self, origin, match, args): uri = self.last_seen_uri.get(origin.sender) if not uri: return self.msg(origin.sender, 'I need a URI to give the title of...') + title = gettitle(uri) + if title: + self.msg(origin.sender, origin.nick + ': ' + title) + else: self.msg(origin.sender, origin.nick + ': No title found') +f_title.commands = ['title'] +def noteuri(phenny, input): + uri = input.group(1).encode('utf-8') + if not hasattr(phenny.bot, 'last_seen_uri'): + phenny.bot.last_seen_uri = {} + phenny.bot.last_seen_uri[input.sender] = uri +noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' +noteuri.priority = 'low' + +titlecommands = r'(?:' + join(f_title.commands, r'|') + r')' +def snarfuri(phenny, input): + if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): + return + uri = input.group(1).encode('utf-8') + title = gettitle(uri) + if title: + phenny.msg(input.sender, '[ ' + title + ' ]') +snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' +snarfuri.priority = 'low' + +def gettitle(uri): if not ':' in uri: uri = 'http://' + uri uri = uri.replace('#!', '?_escaped_fragment_=') @@ -98,7 +124,6 @@ def f_title(self, origin, match, args): u = urllib2.urlopen(req) info = u.info() u.close() - # info = web.head(uri) if not isinstance(info, list): status = '200' @@ -111,23 +136,19 @@ def f_title(self, origin, match, args): redirects += 1 if redirects >= 25: - self.msg(origin.sender, origin.nick + ": Too many redirects") - return + return None try: mtype = info['content-type'] except: - err = ": Couldn't get the Content-Type, sorry" - return self.msg(origin.sender, origin.nick + err) - if not (('/html' in mtype) or ('/xhtml' in mtype)): - self.msg(origin.sender, origin.nick + ": Document isn't HTML") - return + return None + if not (('/html' in mtype) or ('/xhtml' in mtype)): + return None u = urllib2.urlopen(req) bytes = u.read(262144) u.close() except IOError: - self.msg(origin.sender, "Can't connect to %s" % uri) return m = r_title.search(bytes) @@ -161,21 +182,10 @@ def f_title(self, origin, match, args): try: title = title.decode('iso-8859-1').encode('utf-8') except: title = title.decode('cp1252').encode('utf-8') else: pass - else: title = '[The title is empty.]' - - title = title.replace('\n', '') - title = title.replace('\r', '') - self.msg(origin.sender, origin.nick + ': ' + title) - else: self.msg(origin.sender, origin.nick + ': No title found') -f_title.commands = ['title'] - -def noteuri(phenny, input): - uri = input.group(1).encode('utf-8') - if not hasattr(phenny.bot, 'last_seen_uri'): - phenny.bot.last_seen_uri = {} - phenny.bot.last_seen_uri[input.sender] = uri -noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' -noteuri.priority = 'low' + title = title.replace('\n', '') + title = title.replace('\r', '') + else: title = None + return title if __name__ == '__main__': print __doc__.strip()