Gets title from linx for augmented titling capacity
parent
7f55eb7cc5
commit
7b87baeb7e
|
@ -14,15 +14,15 @@ import urllib.error
|
||||||
import http.client
|
import http.client
|
||||||
import http.cookiejar
|
import http.cookiejar
|
||||||
import time
|
import time
|
||||||
from html.entities import name2codepoint
|
|
||||||
import web
|
import web
|
||||||
from tools import deprecated
|
from tools import deprecated
|
||||||
from modules.linx import check_posted_link
|
from modules.linx import get_title
|
||||||
|
|
||||||
cj = http.cookiejar.LWPCookieJar()
|
cj = http.cookiejar.LWPCookieJar()
|
||||||
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
|
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
|
||||||
urllib.request.install_opener(opener)
|
urllib.request.install_opener(opener)
|
||||||
|
|
||||||
|
|
||||||
def head(phenny, input):
|
def head(phenny, input):
|
||||||
"""Provide HTTP HEAD information."""
|
"""Provide HTTP HEAD information."""
|
||||||
uri = input.group(2)
|
uri = input.group(2)
|
||||||
|
@ -87,7 +87,7 @@ def f_title(self, origin, match, args):
|
||||||
uri = self.last_seen_uri.get(origin.sender)
|
uri = self.last_seen_uri.get(origin.sender)
|
||||||
if not uri:
|
if not uri:
|
||||||
return self.msg(origin.sender, 'I need a URI to give the title of...')
|
return self.msg(origin.sender, 'I need a URI to give the title of...')
|
||||||
title = gettitle(uri)
|
title = get_title(uri)
|
||||||
if title:
|
if title:
|
||||||
self.msg(origin.sender, origin.nick + ': ' + title)
|
self.msg(origin.sender, origin.nick + ': ' + title)
|
||||||
else: self.msg(origin.sender, origin.nick + ': No title found')
|
else: self.msg(origin.sender, origin.nick + ': No title found')
|
||||||
|
@ -109,98 +109,12 @@ def snarfuri(phenny, input):
|
||||||
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
|
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
|
||||||
return
|
return
|
||||||
uri = input.group(1)
|
uri = input.group(1)
|
||||||
title = gettitle(uri, input.sender)
|
title = get_title(uri, input.sender)
|
||||||
if title:
|
if title:
|
||||||
phenny.msg(input.sender, title)
|
phenny.msg(input.sender, title)
|
||||||
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
||||||
snarfuri.priority = 'low'
|
snarfuri.priority = 'low'
|
||||||
|
|
||||||
def gettitle(uri, channel):
|
|
||||||
if not ':' in uri:
|
|
||||||
uri = 'http://' + uri
|
|
||||||
uri = uri.replace('#!', '?_escaped_fragment_=')
|
|
||||||
|
|
||||||
title = None
|
|
||||||
localhost = [
|
|
||||||
'http://localhost/', 'http://localhost:80/',
|
|
||||||
'http://localhost:8080/', 'http://127.0.0.1/',
|
|
||||||
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
|
|
||||||
'https://localhost/', 'https://localhost:80/',
|
|
||||||
'https://localhost:8080/', 'https://127.0.0.1/',
|
|
||||||
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
|
|
||||||
]
|
|
||||||
for s in localhost:
|
|
||||||
if uri.startswith(s):
|
|
||||||
return phenny.reply('Sorry, access forbidden.')
|
|
||||||
|
|
||||||
try:
|
|
||||||
redirects = 0
|
|
||||||
while True:
|
|
||||||
info = web.head(uri)
|
|
||||||
|
|
||||||
if not isinstance(info, list):
|
|
||||||
status = '200'
|
|
||||||
else:
|
|
||||||
status = str(info[1])
|
|
||||||
info = info[0]
|
|
||||||
if status.startswith('3'):
|
|
||||||
uri = urllib.parse.urljoin(uri, info['Location'])
|
|
||||||
else: break
|
|
||||||
|
|
||||||
redirects += 1
|
|
||||||
if redirects >= 25:
|
|
||||||
return None
|
|
||||||
|
|
||||||
try: mtype = info['content-type']
|
|
||||||
except:
|
|
||||||
return None
|
|
||||||
|
|
||||||
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
|
||||||
return None
|
|
||||||
|
|
||||||
bytes = web.get(uri)
|
|
||||||
#bytes = u.read(262144)
|
|
||||||
#u.close()
|
|
||||||
|
|
||||||
except IOError:
|
|
||||||
return
|
|
||||||
|
|
||||||
m = r_title.search(bytes)
|
|
||||||
if m:
|
|
||||||
title = m.group(1)
|
|
||||||
title = title.strip()
|
|
||||||
title = title.replace('\t', ' ')
|
|
||||||
title = title.replace('\r', ' ')
|
|
||||||
title = title.replace('\n', ' ')
|
|
||||||
while ' ' in title:
|
|
||||||
title = title.replace(' ', ' ')
|
|
||||||
if len(title) > 200:
|
|
||||||
title = title[:200] + '[...]'
|
|
||||||
|
|
||||||
def e(m):
|
|
||||||
entity = m.group(0)
|
|
||||||
if entity.startswith('&#x'):
|
|
||||||
cp = int(entity[3:-1], 16)
|
|
||||||
return chr(cp)
|
|
||||||
elif entity.startswith('&#'):
|
|
||||||
cp = int(entity[2:-1])
|
|
||||||
return chr(cp)
|
|
||||||
else:
|
|
||||||
char = name2codepoint[entity[1:-1]]
|
|
||||||
return chr(char)
|
|
||||||
title = r_entity.sub(e, title)
|
|
||||||
|
|
||||||
if title:
|
|
||||||
title = title.replace('\n', '')
|
|
||||||
title = title.replace('\r', '')
|
|
||||||
|
|
||||||
channels = ['#vtluug', '#vtcsec']
|
|
||||||
if channel in channels:
|
|
||||||
title = "[ " + title + " ] " + check_posted_link(uri, channel)
|
|
||||||
else:
|
|
||||||
title = "[ " + title + " ] "
|
|
||||||
else: title = None
|
|
||||||
return title
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__doc__.strip())
|
print(__doc__.strip())
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
"""
|
"""
|
||||||
linx.py - linx.li tools
|
linx.py - linx.li tools
|
||||||
author: mutantmonkey <mutantmonkey@mutantmonkey.in>, andreim <andreim@andreim.net>
|
author: andreim <andreim@andreim.net>
|
||||||
|
author: mutantmonkey <mutantmonkey@mutantmonkey.in>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
|
@ -10,6 +11,14 @@ import web
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
|
def get_title(url, channel):
|
||||||
|
""" Have linx retrieve the (augmented) title """
|
||||||
|
try:
|
||||||
|
return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel})
|
||||||
|
except:
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
def linx(phenny, input, short=False):
|
def linx(phenny, input, short=False):
|
||||||
""".linx <url> - Upload a remote URL to linx.li."""
|
""".linx <url> - Upload a remote URL to linx.li."""
|
||||||
|
|
||||||
|
@ -88,15 +97,5 @@ def posted(phenny, input):
|
||||||
posted.rule = (['posted'], r'(.*)')
|
posted.rule = (['posted'], r'(.*)')
|
||||||
|
|
||||||
|
|
||||||
def check_posted_link(url, channel):
|
|
||||||
""" helper method for gettitle() """
|
|
||||||
|
|
||||||
try:
|
|
||||||
req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel})
|
|
||||||
except:
|
|
||||||
req = ""
|
|
||||||
|
|
||||||
return req
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(__doc__.strip())
|
print(__doc__.strip())
|
||||||
|
|
Loading…
Reference in New Issue