diff --git a/modules/head.py b/modules/head.py index 9f6f162..616bf5c 100644 --- a/modules/head.py +++ b/modules/head.py @@ -14,6 +14,7 @@ import urllib.error import http.client import http.cookiejar import time +from html.entities import name2codepoint import web from tools import deprecated from modules.linx import get_title @@ -23,22 +24,24 @@ opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) -def head(phenny, input): +def head(phenny, input): """Provide HTTP HEAD information.""" uri = input.group(2) uri = (uri or '') - if ' ' in uri: + if ' ' in uri: uri, header = uri.rsplit(' ', 1) - else: uri, header = uri, None + else: + uri, header = uri, None - if not uri and hasattr(phenny, 'last_seen_uri'): - try: uri = phenny.last_seen_uri[input.sender] - except KeyError: return phenny.say('?') + if not uri and hasattr(phenny, 'last_seen_uri'): + try: + uri = phenny.last_seen_uri[input.sender] + except KeyError: + return phenny.say('?') - if not uri.startswith('htt'): + if not uri.startswith('htt'): uri = 'http://' + uri # uri = uri.replace('#!', '?_escaped_fragment_=') - start = time.time() try: @@ -53,63 +56,73 @@ def head(phenny, input): resptime = time.time() - start - if header is None: + if header is None: data = [] - if 'Status' in info: + if 'Status' in info: data.append(info['Status']) - if 'content-type' in info: + if 'content-type' in info: data.append(info['content-type'].replace('; charset=', ', ')) - if 'last-modified' in info: + if 'last-modified' in info: modified = info['last-modified'] modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z') data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified)) - if 'content-length' in info: + if 'content-length' in info: data.append(info['content-length'] + ' bytes') data.append('{0:1.2f} s'.format(resptime)) phenny.reply(', '.join(data)) - else: + else: headerlower = header.lower() - if headerlower in info: + if headerlower in info: phenny.say(header + ': ' + info.get(headerlower)) - else: + else: msg = 'There was no %s header in the response.' % header phenny.say(msg) head.commands = ['head'] head.example = '.head http://www.w3.org/' + @deprecated -def f_title(self, origin, match, args): +def f_title(self, origin, match, args): """.title - Return the title of URI.""" uri = match.group(2) uri = (uri or '') - if not uri and hasattr(self, 'last_seen_uri'): + if not uri and hasattr(self, 'last_seen_uri'): uri = self.last_seen_uri.get(origin.sender) - if not uri: + if not uri: return self.msg(origin.sender, 'I need a URI to give the title of...') - title = get_title(uri) + title = gettitle(uri) if title: self.msg(origin.sender, origin.nick + ': ' + title) - else: self.msg(origin.sender, origin.nick + ': No title found') + else: + self.msg(origin.sender, origin.nick + ': No title found') f_title.commands = ['title'] r_title = re.compile(r'(?ims)]*>(.*?)') r_entity = re.compile(r'&[A-Za-z0-9#]+;') -def noteuri(phenny, input): + +def noteuri(phenny, input): uri = input.group(1) - if not hasattr(phenny.bot, 'last_seen_uri'): + if not hasattr(phenny.bot, 'last_seen_uri'): phenny.bot.last_seen_uri = {} phenny.bot.last_seen_uri[input.sender] = uri noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' noteuri.priority = 'low' titlecommands = r'(?:' + r'|'.join(f_title.commands) + r')' + + def snarfuri(phenny, input): if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): return uri = input.group(1) - title = get_title(uri, input.sender) + + if phenny.config.linx_api_key != "": + title = get_title(phenny, uri, input.sender) + else: + title = gettitle(uri) + if title: phenny.msg(input.sender, title) snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' @@ -117,5 +130,90 @@ snarfuri.priority = 'low' snarfuri.thread = True -if __name__ == '__main__': +def gettitle(uri): + if not ':' in uri: + uri = 'http://' + uri + uri = uri.replace('#!', '?_escaped_fragment_=') + + title = None + localhost = [ + 'http://localhost/', 'http://localhost:80/', + 'http://localhost:8080/', 'http://127.0.0.1/', + 'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', + 'https://localhost/', 'https://localhost:80/', + 'https://localhost:8080/', 'https://127.0.0.1/', + 'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', + ] + for s in localhost: + if uri.startswith(s): + return phenny.reply('Sorry, access forbidden.') + + try: + redirects = 0 + while True: + info = web.head(uri) + + if not isinstance(info, list): + status = '200' + else: + status = str(info[1]) + info = info[0] + if status.startswith('3'): + uri = urllib.parse.urljoin(uri, info['Location']) + else: + break + + redirects += 1 + if redirects >= 25: + return None + + try: + mtype = info['content-type'] + except: + return None + + if not (('/html' in mtype) or ('/xhtml' in mtype)): + return None + + bytes = web.get(uri) + #bytes = u.read(262144) + #u.close() + + except IOError: + return + + m = r_title.search(bytes) + if m: + title = m.group(1) + title = title.strip() + title = title.replace('\t', ' ') + title = title.replace('\r', ' ') + title = title.replace('\n', ' ') + while ' ' in title: + title = title.replace(' ', ' ') + if len(title) > 200: + title = title[:200] + '[...]' + + def e(m): + entity = m.group(0) + if entity.startswith('&#x'): + cp = int(entity[3:-1], 16) + return chr(cp) + elif entity.startswith('&#'): + cp = int(entity[2:-1]) + return chr(cp) + else: + char = name2codepoint[entity[1:-1]] + return chr(char) + title = r_entity.sub(e, title) + + if title: + title = title.replace('\n', '') + title = title.replace('\r', '') + else: + title = None + return title + + +if __name__ == '__main__': print(__doc__.strip()) diff --git a/modules/linx.py b/modules/linx.py index d458234..7757fcf 100644 --- a/modules/linx.py +++ b/modules/linx.py @@ -11,10 +11,10 @@ import web import json -def get_title(url, channel): +def get_title(phenny, url, channel): """ Have linx retrieve the (augmented) title """ try: - return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel}) + return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel, 'api_key': phenny.config.linx_api_key}) except: return @@ -28,7 +28,7 @@ def linx(phenny, input, short=False): return try: - req = web.post("http://linx.li/vtluug", {'url': url, 'short': short}) + req = web.post("http://linx.li/vtluug", {'url': url, 'short': short, 'api_key': phenny.config.linx_api_key}) except (HTTPError, IOError): raise GrumbleError("THE INTERNET IS FUCKING BROKEN. Please try again later.") @@ -70,7 +70,7 @@ def lines(phenny, input): date = "today" try: - req = web.post("http://linx.li/vtluuglines", {'nickname': nickname, 'date': date, 'sender': input.nick, 'channel': input.sender}) + req = web.post("http://linx.li/vtluuglines", {'nickname': nickname, 'date': date, 'sender': input.nick, 'channel': input.sender, 'api_key': phenny.config.linx_api_key}) except (HTTPError, IOError): raise GrumbleError("THE INTERNET IS FUCKING BROKEN. Please try again later.") @@ -88,7 +88,7 @@ def posted(phenny, input): return try: - req = web.post("http://linx.li/vtluugposted", {'message': message, 'sender': input.nick, 'channel': input.sender}) + req = web.post("http://linx.li/vtluugposted", {'message': message, 'sender': input.nick, 'channel': input.sender, 'api_key': phenny.config.linx_api_key}) except (HTTPError, IOError): raise GrumbleError("THE INTERNET IS FUCKING BROKEN. Please try again later.") diff --git a/phenny b/phenny index 7accb33..d9eb607 100755 --- a/phenny +++ b/phenny @@ -38,6 +38,10 @@ def create_default_config(fn): # password = 'example' # serverpass = 'serverpass' + # linx-enabled features (.linx, .posted, .lines, snarfuri with special capabilities) + # leave the api key blank to not use them and be sure to add the 'linx' module to the ignore list. + linx_api_key = "" + # These are people who will be able to use admin.py's functions... admins = [owner, 'someoneyoutrust'] # But admin.py is disabled by default, as follows: