head: fix .head and add response time

master
mutantmonkey 2011-12-28 17:45:11 -05:00
parent 9594c33266
commit 92ea5b5310
1 changed files with 143 additions and 136 deletions

View File

@ -7,7 +7,13 @@ Licensed under the Eiffel Forum License 2.
http://inamidst.com/phenny/ http://inamidst.com/phenny/
""" """
import re, urllib.request, urllib.parse, urllib.error, urllib.request, urllib.error, urllib.parse, http.client, urllib.parse, time, http.cookiejar import re
import urllib.request
import urllib.parse
import urllib.error
import http.client
import http.cookiejar
import time
from html.entities import name2codepoint from html.entities import name2codepoint
import web import web
from tools import deprecated from tools import deprecated
@ -17,55 +23,56 @@ opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
urllib.request.install_opener(opener) urllib.request.install_opener(opener)
def head(phenny, input): def head(phenny, input):
"""Provide HTTP HEAD information.""" """Provide HTTP HEAD information."""
uri = input.group(2) uri = input.group(2)
uri = (uri or '') uri = (uri or '')
if ' ' in uri: if ' ' in uri:
uri, header = uri.rsplit(' ', 1) uri, header = uri.rsplit(' ', 1)
else: uri, header = uri, None else: uri, header = uri, None
if not uri and hasattr(phenny, 'last_seen_uri'): if not uri and hasattr(phenny, 'last_seen_uri'):
try: uri = phenny.last_seen_uri[input.sender] try: uri = phenny.last_seen_uri[input.sender]
except KeyError: return phenny.say('?') except KeyError: return phenny.say('?')
if not uri.startswith('htt'): if not uri.startswith('htt'):
uri = 'http://' + uri uri = 'http://' + uri
# uri = uri.replace('#!', '?_escaped_fragment_=') # uri = uri.replace('#!', '?_escaped_fragment_=')
start = time.time()
try: info = web.head(uri) try:
except IOError: return phenny.say("Can't connect to %s" % uri) info = web.head(uri)
except http.client.InvalidURL: return phenny.say("Not a valid URI, sorry.") info['status'] = '200'
except urllib.error.HTTPError as e:
return phenny.say(str(e.code))
except http.client.InvalidURL:
return phenny.say("Not a valid URI, sorry.")
except IOError:
return phenny.say("Can't connect to %s" % uri)
if not isinstance(info, list): resptime = time.time() - start
try: info = dict(info)
except TypeError:
return phenny.reply('Try .head http://example.org/ [optional header]')
info['Status'] = '200'
else:
newInfo = dict(info[0])
newInfo['Status'] = str(info[1])
info = newInfo
if header is None: if header is None:
data = [] data = []
if 'Status' in info: if 'Status' in info:
data.append(info['Status']) data.append(info['Status'])
if 'content-type' in info: if 'content-type' in info:
data.append(info['content-type'].replace('; charset=', ', ')) data.append(info['content-type'].replace('; charset=', ', '))
if 'last-modified' in info: if 'last-modified' in info:
modified = info['last-modified'] modified = info['last-modified']
modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z') modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z')
data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified)) data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified))
if 'content-length' in info: if 'content-length' in info:
data.append(info['content-length'] + ' bytes') data.append(info['content-length'] + ' bytes')
phenny.reply(', '.join(data)) data.append('{0:1.2f} s'.format(resptime))
else: phenny.reply(', '.join(data))
headerlower = header.lower() else:
if headerlower in info: headerlower = header.lower()
phenny.say(header + ': ' + info.get(headerlower)) if headerlower in info:
else: phenny.say(header + ': ' + info.get(headerlower))
msg = 'There was no %s header in the response.' % header else:
phenny.say(msg) msg = 'There was no %s header in the response.' % header
phenny.say(msg)
head.commands = ['head'] head.commands = ['head']
head.example = '.head http://www.w3.org/' head.example = '.head http://www.w3.org/'
@ -74,118 +81,118 @@ r_entity = re.compile(r'&[A-Za-z0-9#]+;')
@deprecated @deprecated
def f_title(self, origin, match, args): def f_title(self, origin, match, args):
""".title <URI> - Return the title of URI.""" """.title <URI> - Return the title of URI."""
uri = match.group(2) uri = match.group(2)
uri = (uri or '') uri = (uri or '')
if not uri and hasattr(self, 'last_seen_uri'): if not uri and hasattr(self, 'last_seen_uri'):
uri = self.last_seen_uri.get(origin.sender) uri = self.last_seen_uri.get(origin.sender)
if not uri: if not uri:
return self.msg(origin.sender, 'I need a URI to give the title of...') return self.msg(origin.sender, 'I need a URI to give the title of...')
title = gettitle(uri) title = gettitle(uri)
if title: if title:
self.msg(origin.sender, origin.nick + ': ' + title) self.msg(origin.sender, origin.nick + ': ' + title)
else: self.msg(origin.sender, origin.nick + ': No title found') else: self.msg(origin.sender, origin.nick + ': No title found')
f_title.commands = ['title'] f_title.commands = ['title']
def noteuri(phenny, input): def noteuri(phenny, input):
uri = input.group(1) uri = input.group(1)
if not hasattr(phenny.bot, 'last_seen_uri'): if not hasattr(phenny.bot, 'last_seen_uri'):
phenny.bot.last_seen_uri = {} phenny.bot.last_seen_uri = {}
phenny.bot.last_seen_uri[input.sender] = uri phenny.bot.last_seen_uri[input.sender] = uri
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
noteuri.priority = 'low' noteuri.priority = 'low'
titlecommands = r'(?:' + r'|'.join(f_title.commands) + r')' titlecommands = r'(?:' + r'|'.join(f_title.commands) + r')'
def snarfuri(phenny, input): def snarfuri(phenny, input):
if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()): if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
return return
uri = input.group(1) uri = input.group(1)
title = gettitle(uri) title = gettitle(uri)
if title: if title:
phenny.msg(input.sender, '[ ' + title + ' ]') phenny.msg(input.sender, '[ ' + title + ' ]')
snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?' snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
snarfuri.priority = 'low' snarfuri.priority = 'low'
def gettitle(uri): def gettitle(uri):
if not ':' in uri: if not ':' in uri:
uri = 'http://' + uri uri = 'http://' + uri
uri = uri.replace('#!', '?_escaped_fragment_=') uri = uri.replace('#!', '?_escaped_fragment_=')
title = None title = None
localhost = [ localhost = [
'http://localhost/', 'http://localhost:80/', 'http://localhost/', 'http://localhost:80/',
'http://localhost:8080/', 'http://127.0.0.1/', 'http://localhost:8080/', 'http://127.0.0.1/',
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', 'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
'https://localhost/', 'https://localhost:80/', 'https://localhost/', 'https://localhost:80/',
'https://localhost:8080/', 'https://127.0.0.1/', 'https://localhost:8080/', 'https://127.0.0.1/',
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', 'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
] ]
for s in localhost: for s in localhost:
if uri.startswith(s): if uri.startswith(s):
return phenny.reply('Sorry, access forbidden.') return phenny.reply('Sorry, access forbidden.')
try: try:
redirects = 0 redirects = 0
while True: while True:
info = web.head(uri) info = web.head(uri)
if not isinstance(info, list): if not isinstance(info, list):
status = '200' status = '200'
else: else:
status = str(info[1]) status = str(info[1])
info = info[0] info = info[0]
if status.startswith('3'): if status.startswith('3'):
uri = urllib.parse.urljoin(uri, info['Location']) uri = urllib.parse.urljoin(uri, info['Location'])
else: break else: break
redirects += 1 redirects += 1
if redirects >= 25: if redirects >= 25:
return None
try: mtype = info['content-type']
except:
return None return None
if not (('/html' in mtype) or ('/xhtml' in mtype)):
return None
try: mtype = info['content-type'] bytes = web.get(uri)
except: #bytes = u.read(262144)
return None #u.close()
if not (('/html' in mtype) or ('/xhtml' in mtype)):
return None
bytes = web.get(uri) except IOError:
#bytes = u.read(262144) return
#u.close()
except IOError: m = r_title.search(bytes)
return if m:
title = m.group(1)
title = title.strip()
title = title.replace('\t', ' ')
title = title.replace('\r', ' ')
title = title.replace('\n', ' ')
while ' ' in title:
title = title.replace(' ', ' ')
if len(title) > 200:
title = title[:200] + '[...]'
def e(m):
entity = m.group(0)
if entity.startswith('&#x'):
cp = int(entity[3:-1], 16)
return chr(cp)
elif entity.startswith('&#'):
cp = int(entity[2:-1])
return chr(cp)
else:
char = name2codepoint[entity[1:-1]]
return chr(char)
title = r_entity.sub(e, title)
m = r_title.search(bytes) if title:
if m: title = title.replace('\n', '')
title = m.group(1) title = title.replace('\r', '')
title = title.strip() else: title = None
title = title.replace('\t', ' ') return title
title = title.replace('\r', ' ')
title = title.replace('\n', ' ')
while ' ' in title:
title = title.replace(' ', ' ')
if len(title) > 200:
title = title[:200] + '[...]'
def e(m):
entity = m.group(0)
if entity.startswith('&#x'):
cp = int(entity[3:-1], 16)
return chr(cp)
elif entity.startswith('&#'):
cp = int(entity[2:-1])
return chr(cp)
else:
char = name2codepoint[entity[1:-1]]
return chr(char)
title = r_entity.sub(e, title)
if title:
title = title.replace('\n', '')
title = title.replace('\r', '')
else: title = None
return title
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())