From 92ea5b531060c851255932172cd3ea22b9650d12 Mon Sep 17 00:00:00 2001 From: mutantmonkey Date: Wed, 28 Dec 2011 17:45:11 -0500 Subject: [PATCH] head: fix .head and add response time --- modules/head.py | 279 +++++++++++++++++++++++++----------------------- 1 file changed, 143 insertions(+), 136 deletions(-) diff --git a/modules/head.py b/modules/head.py index 5570999..36edf21 100644 --- a/modules/head.py +++ b/modules/head.py @@ -7,7 +7,13 @@ Licensed under the Eiffel Forum License 2. http://inamidst.com/phenny/ """ -import re, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, http.client, urllib.parse, time, http.cookiejar +import re +import urllib.request +import urllib.parse +import urllib.error +import http.client +import http.cookiejar +import time from html.entities import name2codepoint import web from tools import deprecated @@ -17,55 +23,56 @@ opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj)) urllib.request.install_opener(opener) def head(phenny, input): - """Provide HTTP HEAD information.""" - uri = input.group(2) - uri = (uri or '') - if ' ' in uri: - uri, header = uri.rsplit(' ', 1) - else: uri, header = uri, None + """Provide HTTP HEAD information.""" + uri = input.group(2) + uri = (uri or '') + if ' ' in uri: + uri, header = uri.rsplit(' ', 1) + else: uri, header = uri, None - if not uri and hasattr(phenny, 'last_seen_uri'): - try: uri = phenny.last_seen_uri[input.sender] - except KeyError: return phenny.say('?') + if not uri and hasattr(phenny, 'last_seen_uri'): + try: uri = phenny.last_seen_uri[input.sender] + except KeyError: return phenny.say('?') - if not uri.startswith('htt'): - uri = 'http://' + uri - # uri = uri.replace('#!', '?_escaped_fragment_=') + if not uri.startswith('htt'): + uri = 'http://' + uri + # uri = uri.replace('#!', '?_escaped_fragment_=') + + start = time.time() - try: info = web.head(uri) - except IOError: return phenny.say("Can't connect to %s" % uri) - except http.client.InvalidURL: return phenny.say("Not a valid URI, sorry.") + try: + info = web.head(uri) + info['status'] = '200' + except urllib.error.HTTPError as e: + return phenny.say(str(e.code)) + except http.client.InvalidURL: + return phenny.say("Not a valid URI, sorry.") + except IOError: + return phenny.say("Can't connect to %s" % uri) - if not isinstance(info, list): - try: info = dict(info) - except TypeError: - return phenny.reply('Try .head http://example.org/ [optional header]') - info['Status'] = '200' - else: - newInfo = dict(info[0]) - newInfo['Status'] = str(info[1]) - info = newInfo + resptime = time.time() - start - if header is None: - data = [] - if 'Status' in info: - data.append(info['Status']) - if 'content-type' in info: - data.append(info['content-type'].replace('; charset=', ', ')) - if 'last-modified' in info: - modified = info['last-modified'] - modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z') - data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified)) - if 'content-length' in info: - data.append(info['content-length'] + ' bytes') - phenny.reply(', '.join(data)) - else: - headerlower = header.lower() - if headerlower in info: - phenny.say(header + ': ' + info.get(headerlower)) - else: - msg = 'There was no %s header in the response.' % header - phenny.say(msg) + if header is None: + data = [] + if 'Status' in info: + data.append(info['Status']) + if 'content-type' in info: + data.append(info['content-type'].replace('; charset=', ', ')) + if 'last-modified' in info: + modified = info['last-modified'] + modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z') + data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified)) + if 'content-length' in info: + data.append(info['content-length'] + ' bytes') + data.append('{0:1.2f} s'.format(resptime)) + phenny.reply(', '.join(data)) + else: + headerlower = header.lower() + if headerlower in info: + phenny.say(header + ': ' + info.get(headerlower)) + else: + msg = 'There was no %s header in the response.' % header + phenny.say(msg) head.commands = ['head'] head.example = '.head http://www.w3.org/' @@ -74,118 +81,118 @@ r_entity = re.compile(r'&[A-Za-z0-9#]+;') @deprecated def f_title(self, origin, match, args): - """.title - Return the title of URI.""" - uri = match.group(2) - uri = (uri or '') + """.title - Return the title of URI.""" + uri = match.group(2) + uri = (uri or '') - if not uri and hasattr(self, 'last_seen_uri'): - uri = self.last_seen_uri.get(origin.sender) - if not uri: - return self.msg(origin.sender, 'I need a URI to give the title of...') - title = gettitle(uri) - if title: - self.msg(origin.sender, origin.nick + ': ' + title) - else: self.msg(origin.sender, origin.nick + ': No title found') + if not uri and hasattr(self, 'last_seen_uri'): + uri = self.last_seen_uri.get(origin.sender) + if not uri: + return self.msg(origin.sender, 'I need a URI to give the title of...') + title = gettitle(uri) + if title: + self.msg(origin.sender, origin.nick + ': ' + title) + else: self.msg(origin.sender, origin.nick + ': No title found') f_title.commands = ['title'] def noteuri(phenny, input): - uri = input.group(1) - if not hasattr(phenny.bot, 'last_seen_uri'): - phenny.bot.last_seen_uri = {} - phenny.bot.last_seen_uri[input.sender] = uri + uri = input.group(1) + if not hasattr(phenny.bot, 'last_seen_uri'): + phenny.bot.last_seen_uri = {} + phenny.bot.last_seen_uri[input.sender] = uri noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' noteuri.priority = 'low' titlecommands = r'(?:' + r'|'.join(f_title.commands) + r')' def snarfuri(phenny, input): - if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): - return - uri = input.group(1) - title = gettitle(uri) - if title: - phenny.msg(input.sender, '[ ' + title + ' ]') + if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): + return + uri = input.group(1) + title = gettitle(uri) + if title: + phenny.msg(input.sender, '[ ' + title + ' ]') snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' snarfuri.priority = 'low' def gettitle(uri): - if not ':' in uri: - uri = 'http://' + uri - uri = uri.replace('#!', '?_escaped_fragment_=') + if not ':' in uri: + uri = 'http://' + uri + uri = uri.replace('#!', '?_escaped_fragment_=') - title = None - localhost = [ - 'http://localhost/', 'http://localhost:80/', - 'http://localhost:8080/', 'http://127.0.0.1/', - 'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', - 'https://localhost/', 'https://localhost:80/', - 'https://localhost:8080/', 'https://127.0.0.1/', - 'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', - ] - for s in localhost: - if uri.startswith(s): - return phenny.reply('Sorry, access forbidden.') + title = None + localhost = [ + 'http://localhost/', 'http://localhost:80/', + 'http://localhost:8080/', 'http://127.0.0.1/', + 'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', + 'https://localhost/', 'https://localhost:80/', + 'https://localhost:8080/', 'https://127.0.0.1/', + 'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', + ] + for s in localhost: + if uri.startswith(s): + return phenny.reply('Sorry, access forbidden.') - try: - redirects = 0 - while True: - info = web.head(uri) + try: + redirects = 0 + while True: + info = web.head(uri) - if not isinstance(info, list): - status = '200' - else: - status = str(info[1]) - info = info[0] - if status.startswith('3'): - uri = urllib.parse.urljoin(uri, info['Location']) - else: break + if not isinstance(info, list): + status = '200' + else: + status = str(info[1]) + info = info[0] + if status.startswith('3'): + uri = urllib.parse.urljoin(uri, info['Location']) + else: break - redirects += 1 - if redirects >= 25: + redirects += 1 + if redirects >= 25: + return None + + try: mtype = info['content-type'] + except: return None + if not (('/html' in mtype) or ('/xhtml' in mtype)): + return None - try: mtype = info['content-type'] - except: - return None - if not (('/html' in mtype) or ('/xhtml' in mtype)): - return None + bytes = web.get(uri) + #bytes = u.read(262144) + #u.close() - bytes = web.get(uri) - #bytes = u.read(262144) - #u.close() + except IOError: + return - except IOError: - return + m = r_title.search(bytes) + if m: + title = m.group(1) + title = title.strip() + title = title.replace('\t', ' ') + title = title.replace('\r', ' ') + title = title.replace('\n', ' ') + while ' ' in title: + title = title.replace(' ', ' ') + if len(title) > 200: + title = title[:200] + '[...]' + + def e(m): + entity = m.group(0) + if entity.startswith('&#x'): + cp = int(entity[3:-1], 16) + return chr(cp) + elif entity.startswith('&#'): + cp = int(entity[2:-1]) + return chr(cp) + else: + char = name2codepoint[entity[1:-1]] + return chr(char) + title = r_entity.sub(e, title) - m = r_title.search(bytes) - if m: - title = m.group(1) - title = title.strip() - title = title.replace('\t', ' ') - title = title.replace('\r', ' ') - title = title.replace('\n', ' ') - while ' ' in title: - title = title.replace(' ', ' ') - if len(title) > 200: - title = title[:200] + '[...]' - - def e(m): - entity = m.group(0) - if entity.startswith('&#x'): - cp = int(entity[3:-1], 16) - return chr(cp) - elif entity.startswith('&#'): - cp = int(entity[2:-1]) - return chr(cp) - else: - char = name2codepoint[entity[1:-1]] - return chr(char) - title = r_entity.sub(e, title) - - if title: - title = title.replace('\n', '') - title = title.replace('\r', '') - else: title = None - return title + if title: + title = title.replace('\n', '') + title = title.replace('\r', '') + else: title = None + return title if __name__ == '__main__': - print(__doc__.strip()) + print(__doc__.strip())