2008-02-21 07:06:33 -05:00
|
|
|
#!/usr/bin/env python
|
|
|
|
"""
|
|
|
|
head.py - Phenny HTTP Metadata Utilities
|
|
|
|
Copyright 2008, Sean B. Palmer, inamidst.com
|
|
|
|
Licensed under the Eiffel Forum License 2.
|
|
|
|
|
|
|
|
http://inamidst.com/phenny/
|
|
|
|
"""
|
|
|
|
|
2011-12-28 17:45:11 -05:00
|
|
|
import re
|
|
|
|
import urllib.parse
|
|
|
|
import time
|
2012-11-26 22:04:36 -05:00
|
|
|
from html.entities import name2codepoint
|
2008-02-21 07:06:33 -05:00
|
|
|
import web
|
|
|
|
from tools import deprecated
|
|
|
|
|
2012-09-24 21:55:58 -04:00
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
def head(phenny, input):
|
2011-12-28 17:45:11 -05:00
|
|
|
"""Provide HTTP HEAD information."""
|
|
|
|
uri = input.group(2)
|
|
|
|
uri = (uri or '')
|
2012-11-26 22:04:36 -05:00
|
|
|
if ' ' in uri:
|
2011-12-28 17:45:11 -05:00
|
|
|
uri, header = uri.rsplit(' ', 1)
|
2012-11-26 22:04:36 -05:00
|
|
|
else:
|
|
|
|
uri, header = uri, None
|
2011-12-28 17:45:11 -05:00
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
if not uri and hasattr(phenny, 'last_seen_uri'):
|
|
|
|
try:
|
|
|
|
uri = phenny.last_seen_uri[input.sender]
|
|
|
|
except KeyError:
|
|
|
|
return phenny.say('?')
|
2011-12-28 17:45:11 -05:00
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
if not uri.startswith('htt'):
|
2011-12-28 17:45:11 -05:00
|
|
|
uri = 'http://' + uri
|
|
|
|
# uri = uri.replace('#!', '?_escaped_fragment_=')
|
|
|
|
start = time.time()
|
|
|
|
|
|
|
|
try:
|
|
|
|
info = web.head(uri)
|
|
|
|
info['status'] = '200'
|
2013-06-09 01:27:24 -04:00
|
|
|
except web.HTTPError as e:
|
2013-07-21 01:25:45 -04:00
|
|
|
if hasattr(e, 'code'):
|
|
|
|
return phenny.say(str(e.code))
|
|
|
|
else:
|
|
|
|
return phenny.say(str(e.response.status_code))
|
2013-06-09 01:27:24 -04:00
|
|
|
except web.ConnectionError:
|
2011-12-28 17:45:11 -05:00
|
|
|
return phenny.say("Can't connect to %s" % uri)
|
|
|
|
|
|
|
|
resptime = time.time() - start
|
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
if header is None:
|
2011-12-28 17:45:11 -05:00
|
|
|
data = []
|
2012-11-26 22:04:36 -05:00
|
|
|
if 'Status' in info:
|
2011-12-28 17:45:11 -05:00
|
|
|
data.append(info['Status'])
|
2012-11-26 22:04:36 -05:00
|
|
|
if 'content-type' in info:
|
2011-12-28 17:45:11 -05:00
|
|
|
data.append(info['content-type'].replace('; charset=', ', '))
|
2012-11-26 22:04:36 -05:00
|
|
|
if 'last-modified' in info:
|
2011-12-28 17:45:11 -05:00
|
|
|
modified = info['last-modified']
|
|
|
|
modified = time.strptime(modified, '%a, %d %b %Y %H:%M:%S %Z')
|
|
|
|
data.append(time.strftime('%Y-%m-%d %H:%M:%S UTC', modified))
|
2012-11-26 22:04:36 -05:00
|
|
|
if 'content-length' in info:
|
2011-12-28 17:45:11 -05:00
|
|
|
data.append(info['content-length'] + ' bytes')
|
|
|
|
data.append('{0:1.2f} s'.format(resptime))
|
|
|
|
phenny.reply(', '.join(data))
|
2012-11-26 22:04:36 -05:00
|
|
|
else:
|
2011-12-28 17:45:11 -05:00
|
|
|
headerlower = header.lower()
|
2012-11-26 22:04:36 -05:00
|
|
|
if headerlower in info:
|
2011-12-28 17:45:11 -05:00
|
|
|
phenny.say(header + ': ' + info.get(headerlower))
|
2012-11-26 22:04:36 -05:00
|
|
|
else:
|
2011-12-28 17:45:11 -05:00
|
|
|
msg = 'There was no %s header in the response.' % header
|
|
|
|
phenny.say(msg)
|
2008-02-29 10:36:18 -05:00
|
|
|
head.commands = ['head']
|
|
|
|
head.example = '.head http://www.w3.org/'
|
2008-02-21 07:06:33 -05:00
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
|
2012-06-27 19:33:09 -04:00
|
|
|
r_title = re.compile(r'(?ims)<title[^>]*>(.*?)</title\s*>')
|
|
|
|
r_entity = re.compile(r'&[A-Za-z0-9#]+;')
|
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
|
|
|
|
def noteuri(phenny, input):
|
2011-12-28 17:45:11 -05:00
|
|
|
uri = input.group(1)
|
2012-11-26 22:04:36 -05:00
|
|
|
if not hasattr(phenny.bot, 'last_seen_uri'):
|
2011-12-28 17:45:11 -05:00
|
|
|
phenny.bot.last_seen_uri = {}
|
|
|
|
phenny.bot.last_seen_uri[input.sender] = uri
|
2011-03-05 20:51:52 -05:00
|
|
|
noteuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
|
|
|
|
noteuri.priority = 'low'
|
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
|
|
|
|
|
2011-03-05 20:51:52 -05:00
|
|
|
def snarfuri(phenny, input):
|
2014-04-09 02:45:34 -04:00
|
|
|
uri = input.group(2)
|
2014-04-06 19:35:14 -04:00
|
|
|
title = gettitle(phenny, input, uri)
|
2012-11-26 22:04:36 -05:00
|
|
|
|
2011-12-28 17:45:11 -05:00
|
|
|
if title:
|
2012-06-27 19:33:09 -04:00
|
|
|
phenny.msg(input.sender, title)
|
2014-04-09 02:45:34 -04:00
|
|
|
snarfuri.rule = r'([^\.].*)?(http[s]?://[^<> "\x01]+)[,.]?'
|
2011-03-05 20:51:52 -05:00
|
|
|
snarfuri.priority = 'low'
|
2012-09-24 23:03:15 -04:00
|
|
|
snarfuri.thread = True
|
2008-02-29 10:36:18 -05:00
|
|
|
|
2008-02-21 07:06:33 -05:00
|
|
|
|
2014-04-06 19:35:14 -04:00
|
|
|
def gettitle(phenny, input, uri):
|
2012-11-26 22:04:36 -05:00
|
|
|
if not ':' in uri:
|
|
|
|
uri = 'http://' + uri
|
|
|
|
uri = uri.replace('#!', '?_escaped_fragment_=')
|
|
|
|
|
|
|
|
title = None
|
|
|
|
localhost = [
|
|
|
|
'http://localhost/', 'http://localhost:80/',
|
|
|
|
'http://localhost:8080/', 'http://127.0.0.1/',
|
|
|
|
'http://127.0.0.1:80/', 'http://127.0.0.1:8080/',
|
|
|
|
'https://localhost/', 'https://localhost:80/',
|
|
|
|
'https://localhost:8080/', 'https://127.0.0.1/',
|
|
|
|
'https://127.0.0.1:80/', 'https://127.0.0.1:8080/',
|
|
|
|
]
|
|
|
|
for s in localhost:
|
|
|
|
if uri.startswith(s):
|
|
|
|
return phenny.reply('Sorry, access forbidden.')
|
|
|
|
|
|
|
|
try:
|
|
|
|
redirects = 0
|
|
|
|
while True:
|
|
|
|
info = web.head(uri)
|
|
|
|
|
|
|
|
if not isinstance(info, list):
|
|
|
|
status = '200'
|
|
|
|
else:
|
|
|
|
status = str(info[1])
|
|
|
|
info = info[0]
|
|
|
|
if status.startswith('3'):
|
|
|
|
uri = urllib.parse.urljoin(uri, info['Location'])
|
|
|
|
else:
|
|
|
|
break
|
|
|
|
|
|
|
|
redirects += 1
|
|
|
|
if redirects >= 25:
|
|
|
|
return None
|
|
|
|
|
|
|
|
try:
|
|
|
|
mtype = info['content-type']
|
|
|
|
except:
|
|
|
|
return None
|
|
|
|
|
|
|
|
if not (('/html' in mtype) or ('/xhtml' in mtype)):
|
|
|
|
return None
|
|
|
|
|
|
|
|
bytes = web.get(uri)
|
|
|
|
#bytes = u.read(262144)
|
|
|
|
#u.close()
|
|
|
|
|
2013-11-29 02:28:11 -05:00
|
|
|
except:
|
2012-11-26 22:04:36 -05:00
|
|
|
return
|
|
|
|
|
|
|
|
m = r_title.search(bytes)
|
|
|
|
if m:
|
|
|
|
title = m.group(1)
|
|
|
|
title = title.strip()
|
|
|
|
title = title.replace('\t', ' ')
|
|
|
|
title = title.replace('\r', ' ')
|
|
|
|
title = title.replace('\n', ' ')
|
|
|
|
while ' ' in title:
|
|
|
|
title = title.replace(' ', ' ')
|
|
|
|
if len(title) > 200:
|
|
|
|
title = title[:200] + '[...]'
|
|
|
|
|
|
|
|
def e(m):
|
|
|
|
entity = m.group(0)
|
|
|
|
if entity.startswith('&#x'):
|
|
|
|
cp = int(entity[3:-1], 16)
|
|
|
|
return chr(cp)
|
|
|
|
elif entity.startswith('&#'):
|
|
|
|
cp = int(entity[2:-1])
|
|
|
|
return chr(cp)
|
|
|
|
else:
|
|
|
|
char = name2codepoint[entity[1:-1]]
|
|
|
|
return chr(char)
|
|
|
|
title = r_entity.sub(e, title)
|
|
|
|
|
|
|
|
if title:
|
|
|
|
title = title.replace('\n', '')
|
|
|
|
title = title.replace('\r', '')
|
2012-11-26 22:37:22 -05:00
|
|
|
title = "[ {0} ]".format(title)
|
2014-04-06 19:35:14 -04:00
|
|
|
|
|
|
|
if "posted" in phenny.variables:
|
2014-04-06 23:00:29 -04:00
|
|
|
from modules.posted import check_posted
|
|
|
|
|
2014-04-06 19:35:14 -04:00
|
|
|
posted = check_posted(phenny, input, uri)
|
|
|
|
|
|
|
|
if posted:
|
|
|
|
title = "{0} (posted: {1})".format(title, posted)
|
|
|
|
|
|
|
|
|
2012-11-26 22:04:36 -05:00
|
|
|
else:
|
|
|
|
title = None
|
|
|
|
return title
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2014-04-09 02:45:34 -04:00
|
|
|
print(__doc__.strip())
|