Gets title from linx for augmented titling capacity

master
AndreiM 2012-09-24 21:55:58 -04:00
parent 7f55eb7cc5
commit 7b87baeb7e
2 changed files with 14 additions and 101 deletions

View File

@ -14,15 +14,15 @@ import urllib.error
import http.client import http.client
import http.cookiejar import http.cookiejar
import time import time
from html.entities import name2codepoint
import web import web
from tools import deprecated from tools import deprecated
from modules.linx import check_posted_link from modules.linx import get_title
cj = http.cookiejar.LWPCookieJar() cj = http.cookiejar.LWPCookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener) urllib.request.install_opener(opener)
def head(phenny, input): def head(phenny, input):
"""Provide HTTP HEAD information.""" """Provide HTTP HEAD information."""
uri = input.group(2) uri = input.group(2)
@ -87,7 +87,7 @@ def f_title(self, origin, match, args):
uri = self.last_seen_uri.get(origin.sender) uri = self.last_seen_uri.get(origin.sender)
if not uri: if not uri:
return self.msg(origin.sender, 'I need a URI to give the title of...') return self.msg(origin.sender, 'I need a URI to give the title of...')
title = gettitle(uri) title = get_title(uri)
if title: if title:
self.msg(origin.sender, origin.nick + ': ' + title) self.msg(origin.sender, origin.nick + ': ' + title)
else: self.msg(origin.sender, origin.nick + ': No title found') else: self.msg(origin.sender, origin.nick + ': No title found')
@ -109,98 +109,12 @@ def snarfuri(phenny, input):
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
return return
uri = input.group(1) uri = input.group(1)
title = gettitle(uri, input.sender) title = get_title(uri, input.sender)
if title: if title:
phenny.msg(input.sender, title) phenny.msg(input.sender, title)
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
snarfuri.priority = 'low' snarfuri.priority = 'low'
def gettitle(uri, channel):
if not ':' in uri:
uri = 'http://' + uri
uri = uri.replace('#!', '?_escaped_fragment_=')
title = None
localhost = [
'http://localhost/', 'http://localhost:80/',
'http://localhost:8080/', 'http://127.0.0.1/',
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
'https://localhost/', 'https://localhost:80/',
'https://localhost:8080/', 'https://127.0.0.1/',
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
]
for s in localhost:
if uri.startswith(s):
return phenny.reply('Sorry, access forbidden.')
try:
redirects = 0
while True:
info = web.head(uri)
if not isinstance(info, list):
status = '200'
else:
status = str(info[1])
info = info[0]
if status.startswith('3'):
uri = urllib.parse.urljoin(uri, info['Location'])
else: break
redirects += 1
if redirects >= 25:
return None
try: mtype = info['content-type']
except:
return None
if not (('/html' in mtype) or ('/xhtml' in mtype)):
return None
bytes = web.get(uri)
#bytes = u.read(262144)
#u.close()
except IOError:
return
m = r_title.search(bytes)
if m:
title = m.group(1)
title = title.strip()
title = title.replace('\t', ' ')
title = title.replace('\r', ' ')
title = title.replace('\n', ' ')
while ' ' in title:
title = title.replace(' ', ' ')
if len(title) > 200:
title = title[:200] + '[...]'
def e(m):
entity = m.group(0)
if entity.startswith('&#x'):
cp = int(entity[3:-1], 16)
return chr(cp)
elif entity.startswith('&#'):
cp = int(entity[2:-1])
return chr(cp)
else:
char = name2codepoint[entity[1:-1]]
return chr(char)
title = r_entity.sub(e, title)
if title:
title = title.replace('\n', '')
title = title.replace('\r', '')
channels = ['#vtluug', '#vtcsec']
if channel in channels:
title = "[ " + title + " ] " + check_posted_link(uri, channel)
else:
title = "[ " + title + " ] "
else: title = None
return title
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())

View File

@ -1,7 +1,8 @@
#!/usr/bin/python3 #!/usr/bin/python3
""" """
linx.py - linx.li tools linx.py - linx.li tools
author: mutantmonkey <mutantmonkey@mutantmonkey.in>, andreim <andreim@andreim.net> author: andreim <andreim@andreim.net>
author: mutantmonkey <mutantmonkey@mutantmonkey.in>
""" """
from urllib.error import HTTPError from urllib.error import HTTPError
@ -10,6 +11,14 @@ import web
import json import json
def get_title(url, channel):
""" Have linx retrieve the (augmented) title """
try:
return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel})
except:
return
def linx(phenny, input, short=False): def linx(phenny, input, short=False):
""".linx <url> - Upload a remote URL to linx.li.""" """.linx <url> - Upload a remote URL to linx.li."""
@ -88,15 +97,5 @@ def posted(phenny, input):
posted.rule = (['posted'], r'(.*)') posted.rule = (['posted'], r'(.*)')
def check_posted_link(url, channel):
""" helper method for gettitle() """
try:
req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel})
except:
req = ""
return req
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())