diff --git a/modules/head.py b/modules/head.py index 3f4803b..47ff5b1 100644 --- a/modules/head.py +++ b/modules/head.py @@ -14,15 +14,15 @@ import urllib.error import http.client import http.cookiejar import time -from html.entities import name2codepoint import web from tools import deprecated -from modules.linx import check_posted_link +from modules.linx import get_title cj = http.cookiejar.LWPCookieJar() opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) + def head(phenny, input): """Provide HTTP HEAD information.""" uri = input.group(2) @@ -87,7 +87,7 @@ def f_title(self, origin, match, args): uri = self.last_seen_uri.get(origin.sender) if not uri: return self.msg(origin.sender, 'I need a URI to give the title of...') - title = gettitle(uri) + title = get_title(uri) if title: self.msg(origin.sender, origin.nick + ': ' + title) else: self.msg(origin.sender, origin.nick + ': No title found') @@ -109,98 +109,12 @@ def snarfuri(phenny, input): if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): return uri = input.group(1) - title = gettitle(uri, input.sender) + title = get_title(uri, input.sender) if title: phenny.msg(input.sender, title) snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' snarfuri.priority = 'low' -def gettitle(uri, channel): - if not ':' in uri: - uri = 'http://' + uri - uri = uri.replace('#!', '?_escaped_fragment_=') - - title = None - localhost = [ - 'http://localhost/', 'http://localhost:80/', - 'http://localhost:8080/', 'http://127.0.0.1/', - 'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', - 'https://localhost/', 'https://localhost:80/', - 'https://localhost:8080/', 'https://127.0.0.1/', - 'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', - ] - for s in localhost: - if uri.startswith(s): - return phenny.reply('Sorry, access forbidden.') - - try: - redirects = 0 - while True: - info = web.head(uri) - - if not isinstance(info, list): - status = '200' - else: - status = str(info[1]) - info = info[0] - if status.startswith('3'): - uri = urllib.parse.urljoin(uri, info['Location']) - else: break - - redirects += 1 - if redirects >= 25: - return None - - try: mtype = info['content-type'] - except: - return None - - if not (('/html' in mtype) or ('/xhtml' in mtype)): - return None - - bytes = web.get(uri) - #bytes = u.read(262144) - #u.close() - - except IOError: - return - - m = r_title.search(bytes) - if m: - title = m.group(1) - title = title.strip() - title = title.replace('\t', ' ') - title = title.replace('\r', ' ') - title = title.replace('\n', ' ') - while ' ' in title: - title = title.replace(' ', ' ') - if len(title) > 200: - title = title[:200] + '[...]' - - def e(m): - entity = m.group(0) - if entity.startswith('&#x'): - cp = int(entity[3:-1], 16) - return chr(cp) - elif entity.startswith('&#'): - cp = int(entity[2:-1]) - return chr(cp) - else: - char = name2codepoint[entity[1:-1]] - return chr(char) - title = r_entity.sub(e, title) - - if title: - title = title.replace('\n', '') - title = title.replace('\r', '') - - channels = ['#vtluug', '#vtcsec'] - if channel in channels: - title = "[ " + title + " ] " + check_posted_link(uri, channel) - else: - title = "[ " + title + " ] " - else: title = None - return title if __name__ == '__main__': print(__doc__.strip()) diff --git a/modules/linx.py b/modules/linx.py index e69f3d7..d458234 100644 --- a/modules/linx.py +++ b/modules/linx.py @@ -1,7 +1,8 @@ #!/usr/bin/python3 """ linx.py - linx.li tools -author: mutantmonkey , andreim +author: andreim +author: mutantmonkey """ from urllib.error import HTTPError @@ -10,6 +11,14 @@ import web import json +def get_title(url, channel): + """ Have linx retrieve the (augmented) title """ + try: + return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel}) + except: + return + + def linx(phenny, input, short=False): """.linx - Upload a remote URL to linx.li.""" @@ -88,15 +97,5 @@ def posted(phenny, input): posted.rule = (['posted'], r'(.*)') -def check_posted_link(url, channel): - """ helper method for gettitle() """ - - try: - req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel}) - except: - req = "" - - return req - if __name__ == '__main__': print(__doc__.strip())