Gets title from linx for augmented titling capacity

master
AndreiM 2012-09-24 21:55:58 -04:00
parent 7f55eb7cc5
commit 7b87baeb7e
2 changed files with 14 additions and 101 deletions

View File

@ -14,15 +14,15 @@ import urllib.error
import http.client
import http.cookiejar
import time
from html.entities import name2codepoint
import web
from tools import deprecated
from modules.linx import check_posted_link
from modules.linx import get_title
cj = http.cookiejar.LWPCookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener)
def head(phenny, input):
"""Provide HTTP HEAD information."""
uri = input.group(2)
@ -87,7 +87,7 @@ def f_title(self, origin, match, args):
uri = self.last_seen_uri.get(origin.sender)
if not uri:
return self.msg(origin.sender, 'I need a URI to give the title of...')
title = gettitle(uri)
title = get_title(uri)
if title:
self.msg(origin.sender, origin.nick + ': ' + title)
else: self.msg(origin.sender, origin.nick + ': No title found')
@ -109,98 +109,12 @@ def snarfuri(phenny, input):
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
return
uri = input.group(1)
title = gettitle(uri, input.sender)
title = get_title(uri, input.sender)
if title:
phenny.msg(input.sender, title)
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
snarfuri.priority = 'low'
def gettitle(uri, channel):
if not ':' in uri:
uri = 'http://' + uri
uri = uri.replace('#!', '?_escaped_fragment_=')
title = None
localhost = [
'http://localhost/', 'http://localhost:80/',
'http://localhost:8080/', 'http://127.0.0.1/',
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
'https://localhost/', 'https://localhost:80/',
'https://localhost:8080/', 'https://127.0.0.1/',
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
]
for s in localhost:
if uri.startswith(s):
return phenny.reply('Sorry, access forbidden.')
try:
redirects = 0
while True:
info = web.head(uri)
if not isinstance(info, list):
status = '200'
else:
status = str(info[1])
info = info[0]
if status.startswith('3'):
uri = urllib.parse.urljoin(uri, info['Location'])
else: break
redirects += 1
if redirects >= 25:
return None
try: mtype = info['content-type']
except:
return None
if not (('/html' in mtype) or ('/xhtml' in mtype)):
return None
bytes = web.get(uri)
#bytes = u.read(262144)
#u.close()
except IOError:
return
m = r_title.search(bytes)
if m:
title = m.group(1)
title = title.strip()
title = title.replace('\t', ' ')
title = title.replace('\r', ' ')
title = title.replace('\n', ' ')
while ' ' in title:
title = title.replace(' ', ' ')
if len(title) > 200:
title = title[:200] + '[...]'
def e(m):
entity = m.group(0)
if entity.startswith('&#x'):
cp = int(entity[3:-1], 16)
return chr(cp)
elif entity.startswith('&#'):
cp = int(entity[2:-1])
return chr(cp)
else:
char = name2codepoint[entity[1:-1]]
return chr(char)
title = r_entity.sub(e, title)
if title:
title = title.replace('\n', '')
title = title.replace('\r', '')
channels = ['#vtluug', '#vtcsec']
if channel in channels:
title = "[ " + title + " ] " + check_posted_link(uri, channel)
else:
title = "[ " + title + " ] "
else: title = None
return title
if __name__ == '__main__':
print(__doc__.strip())

View File

@ -1,7 +1,8 @@
#!/usr/bin/python3
"""
linx.py - linx.li tools
author: mutantmonkey <mutantmonkey@mutantmonkey.in>, andreim <andreim@andreim.net>
author: andreim <andreim@andreim.net>
author: mutantmonkey <mutantmonkey@mutantmonkey.in>
"""
from urllib.error import HTTPError
@ -10,6 +11,14 @@ import web
import json
def get_title(url, channel):
""" Have linx retrieve the (augmented) title """
try:
return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel})
except:
return
def linx(phenny, input, short=False):
""".linx <url> - Upload a remote URL to linx.li."""
@ -88,15 +97,5 @@ def posted(phenny, input):
posted.rule = (['posted'], r'(.*)')
def check_posted_link(url, channel):
""" helper method for gettitle() """
try:
req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel})
except:
req = ""
return req
if __name__ == '__main__':
print(__doc__.strip())