Merge pull request #41 from hansenchris/master

Get titles from linx for additional functionality
master
mutantmonkey 2012-09-24 20:54:31 -07:00
commit 97a3f884bc
2 changed files with 15 additions and 101 deletions

View File

@ -14,15 +14,15 @@ import urllib.error
import http.client import http.client
import http.cookiejar import http.cookiejar
import time import time
from html.entities import name2codepoint
import web import web
from tools import deprecated from tools import deprecated
from modules.linx import check_posted_link from modules.linx import get_title
cj = http.cookiejar.LWPCookieJar() cj = http.cookiejar.LWPCookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener) urllib.request.install_opener(opener)
def head(phenny, input): def head(phenny, input):
"""Provide HTTP HEAD information.""" """Provide HTTP HEAD information."""
uri = input.group(2) uri = input.group(2)
@ -87,7 +87,7 @@ def f_title(self, origin, match, args):
uri = self.last_seen_uri.get(origin.sender) uri = self.last_seen_uri.get(origin.sender)
if not uri: if not uri:
return self.msg(origin.sender, 'I need a URI to give the title of...') return self.msg(origin.sender, 'I need a URI to give the title of...')
title = gettitle(uri) title = get_title(uri)
if title: if title:
self.msg(origin.sender, origin.nick + ': ' + title) self.msg(origin.sender, origin.nick + ': ' + title)
else: self.msg(origin.sender, origin.nick + ': No title found') else: self.msg(origin.sender, origin.nick + ': No title found')
@ -109,98 +109,13 @@ def snarfuri(phenny, input):
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
return return
uri = input.group(1) uri = input.group(1)
title = gettitle(uri, input.sender) title = get_title(uri, input.sender)
if title: if title:
phenny.msg(input.sender, title) phenny.msg(input.sender, title)
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
snarfuri.priority = 'low' snarfuri.priority = 'low'
snarfuri.thread = True
def gettitle(uri, channel):
if not ':' in uri:
uri = 'http://' + uri
uri = uri.replace('#!', '?_escaped_fragment_=')
title = None
localhost = [
'http://localhost/', 'http://localhost:80/',
'http://localhost:8080/', 'http://127.0.0.1/',
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
'https://localhost/', 'https://localhost:80/',
'https://localhost:8080/', 'https://127.0.0.1/',
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
]
for s in localhost:
if uri.startswith(s):
return phenny.reply('Sorry, access forbidden.')
try:
redirects = 0
while True:
info = web.head(uri)
if not isinstance(info, list):
status = '200'
else:
status = str(info[1])
info = info[0]
if status.startswith('3'):
uri = urllib.parse.urljoin(uri, info['Location'])
else: break
redirects += 1
if redirects >= 25:
return None
try: mtype = info['content-type']
except:
return None
if not (('/html' in mtype) or ('/xhtml' in mtype)):
return None
bytes = web.get(uri)
#bytes = u.read(262144)
#u.close()
except IOError:
return
m = r_title.search(bytes)
if m:
title = m.group(1)
title = title.strip()
title = title.replace('\t', ' ')
title = title.replace('\r', ' ')
title = title.replace('\n', ' ')
while ' ' in title:
title = title.replace(' ', ' ')
if len(title) > 200:
title = title[:200] + '[...]'
def e(m):
entity = m.group(0)
if entity.startswith('&#x'):
cp = int(entity[3:-1], 16)
return chr(cp)
elif entity.startswith('&#'):
cp = int(entity[2:-1])
return chr(cp)
else:
char = name2codepoint[entity[1:-1]]
return chr(char)
title = r_entity.sub(e, title)
if title:
title = title.replace('\n', '')
title = title.replace('\r', '')
channels = ['#vtluug', '#vtcsec']
if channel in channels:
title = "[ " + title + " ] " + check_posted_link(uri, channel)
else:
title = "[ " + title + " ] "
else: title = None
return title
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())

View File

@ -1,7 +1,8 @@
#!/usr/bin/python3 #!/usr/bin/python3
""" """
linx.py - linx.li tools linx.py - linx.li tools
author: mutantmonkey <mutantmonkey@mutantmonkey.in>, andreim <andreim@andreim.net> author: andreim <andreim@andreim.net>
author: mutantmonkey <mutantmonkey@mutantmonkey.in>
""" """
from urllib.error import HTTPError from urllib.error import HTTPError
@ -10,6 +11,14 @@ import web
import json import json
def get_title(url, channel):
""" Have linx retrieve the (augmented) title """
try:
return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel})
except:
return
def linx(phenny, input, short=False): def linx(phenny, input, short=False):
""".linx <url> - Upload a remote URL to linx.li.""" """.linx <url> - Upload a remote URL to linx.li."""
@ -88,15 +97,5 @@ def posted(phenny, input):
posted.rule = (['posted'], r'(.*)') posted.rule = (['posted'], r'(.*)')
def check_posted_link(url, channel):
""" helper method for gettitle() """
try:
req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel})
except:
req = ""
return req
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())