Gets title from linx for augmented titling capacity

2012-09-24 21:55:58 -04:00
parent 7f55eb7cc5
commit 7b87baeb7e
2 changed files with 14 additions and 101 deletions
@@ -14,15 +14,15 @@ import urllib.error
 import http.client
 import http.cookiejar
 import time
-from html.entities import name2codepoint
 import web
 from tools import deprecated
-from modules.linx import check_posted_link
+from modules.linx import get_title

 cj = http.cookiejar.LWPCookieJar()
 opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
 urllib.request.install_opener(opener)

+
 def head(phenny, input): 
    """Provide HTTP HEAD information."""
    uri = input.group(2)
@@ -87,7 +87,7 @@ def f_title(self, origin, match, args):
        uri = self.last_seen_uri.get(origin.sender)
    if not uri: 
        return self.msg(origin.sender, 'I need a URI to give the title of...')
-    title = gettitle(uri)
+    title = get_title(uri)
    if title:
        self.msg(origin.sender, origin.nick + ': ' + title)
    else: self.msg(origin.sender, origin.nick + ': No title found')
@@ -109,98 +109,12 @@ def snarfuri(phenny, input):
    if re.match(r'(?i)' + phenny.config.prefix + titlecommands, input.group()):
        return
    uri = input.group(1)
-    title = gettitle(uri, input.sender)
+    title = get_title(uri, input.sender)
    if title:
        phenny.msg(input.sender, title)
 snarfuri.rule = r'.*(http[s]?://[^<> "\x01]+)[,.]?'
 snarfuri.priority = 'low'

-def gettitle(uri, channel):
-    if not ':' in uri: 
-        uri = 'http://' + uri
-    uri = uri.replace('#!', '?_escaped_fragment_=')
-
-    title = None
-    localhost = [
-        'http://localhost/', 'http://localhost:80/', 
-        'http://localhost:8080/', 'http://127.0.0.1/', 
-        'http://127.0.0.1:80/', 'http://127.0.0.1:8080/', 
-        'https://localhost/', 'https://localhost:80/', 
-        'https://localhost:8080/', 'https://127.0.0.1/', 
-        'https://127.0.0.1:80/', 'https://127.0.0.1:8080/', 
-    ]
-    for s in localhost: 
-        if uri.startswith(s): 
-            return phenny.reply('Sorry, access forbidden.')
-
-    try: 
-        redirects = 0
-        while True: 
-            info = web.head(uri)
-
-            if not isinstance(info, list): 
-                status = '200'
-            else: 
-                status = str(info[1])
-                info = info[0]
-            if status.startswith('3'): 
-                uri = urllib.parse.urljoin(uri, info['Location'])
-            else: break
-
-            redirects += 1
-            if redirects >= 25: 
-                return None
-
-        try: mtype = info['content-type']
-        except: 
-            return None
-
-        if not (('/html' in mtype) or ('/xhtml' in mtype)): 
-            return None
-
-        bytes = web.get(uri)
-        #bytes = u.read(262144)
-        #u.close()
-
-    except IOError: 
-        return
-
-    m = r_title.search(bytes)
-    if m: 
-        title = m.group(1)
-        title = title.strip()
-        title = title.replace('\t', ' ')
-        title = title.replace('\r', ' ')
-        title = title.replace('\n', ' ')
-        while '  ' in title: 
-            title = title.replace('  ', ' ')
-        if len(title) > 200: 
-            title = title[:200] + '[...]'
-        
-        def e(m): 
-            entity = m.group(0)
-            if entity.startswith('&#x'): 
-                cp = int(entity[3:-1], 16)
-                return chr(cp)
-            elif entity.startswith('&#'): 
-                cp = int(entity[2:-1])
-                return chr(cp)
-            else: 
-                char = name2codepoint[entity[1:-1]]
-                return chr(char)
-        title = r_entity.sub(e, title)
-
-        if title: 
-            title = title.replace('\n', '')
-            title = title.replace('\r', '')
-
-            channels = ['#vtluug', '#vtcsec']
-            if channel in channels:
-                title = "[ " + title + " ] " + check_posted_link(uri, channel)
-            else:
-                title = "[ " + title + " ] "
-        else: title = None
-    return title

 if __name__ == '__main__': 
    print(__doc__.strip())
@@ -1,7 +1,8 @@
 #!/usr/bin/python3
 """
 linx.py - linx.li tools
-author: mutantmonkey <mutantmonkey@mutantmonkey.in>, andreim <andreim@andreim.net>
+author: andreim <andreim@andreim.net>
+author: mutantmonkey <mutantmonkey@mutantmonkey.in>
 """

 from urllib.error import HTTPError
@@ -10,6 +11,14 @@ import web
 import json


+def get_title(url, channel):
+    """ Have linx retrieve the (augmented) title """
+    try:
+        return web.post("http://linx.li/vtluuggettitle", {'url': url, 'channel': channel})
+    except:
+        return
+
+
 def linx(phenny, input, short=False):
    """.linx <url> - Upload a remote URL to linx.li."""

@@ -88,15 +97,5 @@ def posted(phenny, input):
 posted.rule = (['posted'], r'(.*)')


-def check_posted_link(url, channel):
-    """ helper method for gettitle() """
-
-    try:
-        req = web.post("http://linx.li/vtluugpostedurl", {'url': url, 'channel': channel})
-    except:
-        req = ""
-
-    return req
-
 if __name__ == '__main__':
    print(__doc__.strip())