added uri snarfing with automatic title reading
parent
18a24a8117
commit
ff2434db41
|
@ -9,6 +9,7 @@ http://inamidst.com/phenny/
|
|||
|
||||
import re, urllib, urllib2, httplib, urlparse, time, cookielib
|
||||
from htmlentitydefs import name2codepoint
|
||||
from string import join
|
||||
import web
|
||||
from tools import deprecated
|
||||
|
||||
|
@ -82,7 +83,32 @@ def f_title(self, origin, match, args):
|
|||
uri = self.last_seen_uri.get(origin.sender)
|
||||
if not uri:
|
||||
return self.msg(origin.sender, 'I need a URI to give the title of...')
|
||||
title = gettitle(uri)
|
||||
if title:
|
||||
self.msg(origin.sender, origin.nick + ': ' + title)
|
||||
else: self.msg(origin.sender, origin.nick + ': No title found')
|
||||
f_title.commands = ['title']
|
||||
|
||||
def noteuri(phenny, input):
|
||||
uri = input.group(1).encode('utf-8')
|
||||
if not hasattr(phenny.bot, 'last_seen_uri'):
|
||||
phenny.bot.last_seen_uri = {}
|
||||
phenny.bot.last_seen_uri[input.sender] = uri
|
||||
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||
noteuri.priority = 'low'
|
||||
|
||||
titlecommands = r'(?:' + join(f_title.commands, r'|') + r')'
|
||||
def snarfuri(phenny, input):
|
||||
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
|
||||
return
|
||||
uri = input.group(1).encode('utf-8')
|
||||
title = gettitle(uri)
|
||||
if title:
|
||||
phenny.msg(input.sender, '[ ' + title + ' ]')
|
||||
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||
snarfuri.priority = 'low'
|
||||
|
||||
def gettitle(uri):
|
||||
if not ':' in uri:
|
||||
uri = 'http://' + uri
|
||||
uri = uri.replace('#!', '?_escaped_fragment_=')
|
||||
|
@ -98,7 +124,6 @@ def f_title(self, origin, match, args):
|
|||
u = urllib2.urlopen(req)
|
||||
info = u.info()
|
||||
u.close()
|
||||
# info = web.head(uri)
|
||||
|
||||
if not isinstance(info, list):
|
||||
status = '200'
|
||||
|
@ -111,23 +136,19 @@ def f_title(self, origin, match, args):
|
|||
|
||||
redirects += 1
|
||||
if redirects >= 25:
|
||||
self.msg(origin.sender, origin.nick + ": Too many redirects")
|
||||
return
|
||||
return None
|
||||
|
||||
try: mtype = info['content-type']
|
||||
except:
|
||||
err = ": Couldn't get the Content-Type, sorry"
|
||||
return self.msg(origin.sender, origin.nick + err)
|
||||
return None
|
||||
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
||||
self.msg(origin.sender, origin.nick + ": Document isn't HTML")
|
||||
return
|
||||
return None
|
||||
|
||||
u = urllib2.urlopen(req)
|
||||
bytes = u.read(262144)
|
||||
u.close()
|
||||
|
||||
except IOError:
|
||||
self.msg(origin.sender, "Can't connect to %s" % uri)
|
||||
return
|
||||
|
||||
m = r_title.search(bytes)
|
||||
|
@ -161,21 +182,10 @@ def f_title(self, origin, match, args):
|
|||
try: title = title.decode('iso-8859-1').encode('utf-8')
|
||||
except: title = title.decode('cp1252').encode('utf-8')
|
||||
else: pass
|
||||
else: title = '[The title is empty.]'
|
||||
|
||||
title = title.replace('\n', '')
|
||||
title = title.replace('\r', '')
|
||||
self.msg(origin.sender, origin.nick + ': ' + title)
|
||||
else: self.msg(origin.sender, origin.nick + ': No title found')
|
||||
f_title.commands = ['title']
|
||||
|
||||
def noteuri(phenny, input):
|
||||
uri = input.group(1).encode('utf-8')
|
||||
if not hasattr(phenny.bot, 'last_seen_uri'):
|
||||
phenny.bot.last_seen_uri = {}
|
||||
phenny.bot.last_seen_uri[input.sender] = uri
|
||||
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||
noteuri.priority = 'low'
|
||||
else: title = None
|
||||
return title
|
||||
|
||||
if __name__ == '__main__':
|
||||
print __doc__.strip()
|
||||
|
|
Loading…
Reference in New Issue