added uri snarfing with automatic title reading
parent
18a24a8117
commit
ff2434db41
|
@ -9,6 +9,7 @@ http://inamidst.com/phenny/
|
||||||
|
|
||||||
import re, urllib, urllib2, httplib, urlparse, time, cookielib
|
import re, urllib, urllib2, httplib, urlparse, time, cookielib
|
||||||
from htmlentitydefs import name2codepoint
|
from htmlentitydefs import name2codepoint
|
||||||
|
from string import join
|
||||||
import web
|
import web
|
||||||
from tools import deprecated
|
from tools import deprecated
|
||||||
|
|
||||||
|
@ -82,7 +83,32 @@ def f_title(self, origin, match, args):
|
||||||
uri = self.last_seen_uri.get(origin.sender)
|
uri = self.last_seen_uri.get(origin.sender)
|
||||||
if not uri:
|
if not uri:
|
||||||
return self.msg(origin.sender, 'I need a URI to give the title of...')
|
return self.msg(origin.sender, 'I need a URI to give the title of...')
|
||||||
|
title = gettitle(uri)
|
||||||
|
if title:
|
||||||
|
self.msg(origin.sender, origin.nick + ': ' + title)
|
||||||
|
else: self.msg(origin.sender, origin.nick + ': No title found')
|
||||||
|
f_title.commands = ['title']
|
||||||
|
|
||||||
|
def noteuri(phenny, input):
|
||||||
|
uri = input.group(1).encode('utf-8')
|
||||||
|
if not hasattr(phenny.bot, 'last_seen_uri'):
|
||||||
|
phenny.bot.last_seen_uri = {}
|
||||||
|
phenny.bot.last_seen_uri[input.sender] = uri
|
||||||
|
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||||
|
noteuri.priority = 'low'
|
||||||
|
|
||||||
|
titlecommands = r'(?:' + join(f_title.commands, r'|') + r')'
|
||||||
|
def snarfuri(phenny, input):
|
||||||
|
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
|
||||||
|
return
|
||||||
|
uri = input.group(1).encode('utf-8')
|
||||||
|
title = gettitle(uri)
|
||||||
|
if title:
|
||||||
|
phenny.msg(input.sender, '[ ' + title + ' ]')
|
||||||
|
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||||
|
snarfuri.priority = 'low'
|
||||||
|
|
||||||
|
def gettitle(uri):
|
||||||
if not ':' in uri:
|
if not ':' in uri:
|
||||||
uri = 'http://' + uri
|
uri = 'http://' + uri
|
||||||
uri = uri.replace('#!', '?_escaped_fragment_=')
|
uri = uri.replace('#!', '?_escaped_fragment_=')
|
||||||
|
@ -98,7 +124,6 @@ def f_title(self, origin, match, args):
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
info = u.info()
|
info = u.info()
|
||||||
u.close()
|
u.close()
|
||||||
# info = web.head(uri)
|
|
||||||
|
|
||||||
if not isinstance(info, list):
|
if not isinstance(info, list):
|
||||||
status = '200'
|
status = '200'
|
||||||
|
@ -111,23 +136,19 @@ def f_title(self, origin, match, args):
|
||||||
|
|
||||||
redirects += 1
|
redirects += 1
|
||||||
if redirects >= 25:
|
if redirects >= 25:
|
||||||
self.msg(origin.sender, origin.nick + ": Too many redirects")
|
return None
|
||||||
return
|
|
||||||
|
|
||||||
try: mtype = info['content-type']
|
try: mtype = info['content-type']
|
||||||
except:
|
except:
|
||||||
err = ": Couldn't get the Content-Type, sorry"
|
return None
|
||||||
return self.msg(origin.sender, origin.nick + err)
|
|
||||||
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
||||||
self.msg(origin.sender, origin.nick + ": Document isn't HTML")
|
return None
|
||||||
return
|
|
||||||
|
|
||||||
u = urllib2.urlopen(req)
|
u = urllib2.urlopen(req)
|
||||||
bytes = u.read(262144)
|
bytes = u.read(262144)
|
||||||
u.close()
|
u.close()
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
self.msg(origin.sender, "Can't connect to %s" % uri)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
m = r_title.search(bytes)
|
m = r_title.search(bytes)
|
||||||
|
@ -161,21 +182,10 @@ def f_title(self, origin, match, args):
|
||||||
try: title = title.decode('iso-8859-1').encode('utf-8')
|
try: title = title.decode('iso-8859-1').encode('utf-8')
|
||||||
except: title = title.decode('cp1252').encode('utf-8')
|
except: title = title.decode('cp1252').encode('utf-8')
|
||||||
else: pass
|
else: pass
|
||||||
else: title = '[The title is empty.]'
|
|
||||||
|
|
||||||
title = title.replace('\n', '')
|
title = title.replace('\n', '')
|
||||||
title = title.replace('\r', '')
|
title = title.replace('\r', '')
|
||||||
self.msg(origin.sender, origin.nick + ': ' + title)
|
else: title = None
|
||||||
else: self.msg(origin.sender, origin.nick + ': No title found')
|
return title
|
||||||
f_title.commands = ['title']
|
|
||||||
|
|
||||||
def noteuri(phenny, input):
|
|
||||||
uri = input.group(1).encode('utf-8')
|
|
||||||
if not hasattr(phenny.bot, 'last_seen_uri'):
|
|
||||||
phenny.bot.last_seen_uri = {}
|
|
||||||
phenny.bot.last_seen_uri[input.sender] = uri
|
|
||||||
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
|
||||||
noteuri.priority = 'low'
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print __doc__.strip()
|
print __doc__.strip()
|
||||||
|
|
Loading…
Reference in New Issue