fix .u

2011-10-22 21:54:12 -04:00
parent d9a8cb9a12
commit 7b41ea29bd
1 changed files with 96 additions and 94 deletions
--- a/modules/codepoints.py
+++ b/modules/codepoints.py
@@ -11,124 +11,126 @@ import re, unicodedata
 from itertools import islice
 def about(u, cp=None, name=None): 
-   if cp is None: 
+    if cp is None: 
-      cp = ord(u)
+        cp = ord(u)
-   if name is None: 
+    if name is None: 
-      try: name = unicodedata.name(u)
+        try: name = unicodedata.name(u)
-      except ValueError: 
+        except ValueError: 
-         return 'U+%04X (No name found)' % cp
+            return 'U+%04X (No name found)' % cp
-   if not unicodedata.combining(u): 
+    if not unicodedata.combining(u): 
-      template = 'U+%04X %s (%s)'
+        template = 'U+%04X %s (%s)'
-   else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
+    else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
-   return template % (cp, name, u)
+    return template % (cp, name, u)
 def codepoint_simple(arg): 
-   arg = arg.upper()
+    arg = arg.upper()
-   r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
+    r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
-   results = []
+    results = []
-   for cp in range(0xFFFF): 
+    for cp in range(0xFFFF): 
-      u = chr(cp)
+        u = chr(cp)
-      try: name = unicodedata.name(u)
+        try: name = unicodedata.name(u)
-      except ValueError: continue
+        except ValueError: continue
-      if r_label.search(name): 
+        if r_label.search(name): 
         results.append((len(name), u, cp, name))
   if not results: 
      r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
      for cp in range(0xFFFF): 
         u = chr(cp)
         try: name = unicodedata.name(u)
         except ValueError: continue
         if r_label.search(name): 
            results.append((len(name), u, cp, name))
    if not results: 
        r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
        for cp in range(0xFFFF): 
            u = chr(cp)
            try: name = unicodedata.name(u)
            except ValueError: continue
-   if not results: 
+            if r_label.search(name): 
-      return None
+                results.append((len(name), u, cp, name))
-   length, u, cp, name = sorted(results)[0]
+    if not results: 
-   return about(u, cp, name)
+        return None
    length, u, cp, name = sorted(results)[0]
    return about(u, cp, name)
 def codepoint_extended(arg): 
-   arg = arg.upper()
+    arg = arg.upper()
-   try: r_search = re.compile(arg)
+    try: r_search = re.compile(arg)
-   except: raise ValueError('Broken regexp: %r' % arg)
+    except: raise ValueError('Broken regexp: %r' % arg)
-   for cp in range(1, 0x10FFFF): 
+    for cp in range(1, 0x10FFFF): 
-      u = chr(cp)
+        u = chr(cp)
-      name = unicodedata.name(u, '-')
+        name = unicodedata.name(u, '-')
-      if r_search.search(name): 
+        if r_search.search(name): 
-         yield about(u, cp, name)
+            yield about(u, cp, name)
 def u(phenny, input): 
-   """Look up unicode information."""
+    """Look up unicode information."""
-   arg = input.bytes[3:]
+    arg = input.bytes[3:]
-   # phenny.msg('#inamidst', '%r' % arg)
+    # phenny.msg('#inamidst', '%r' % arg)
-   if not arg: 
+    if not arg: 
-      return phenny.reply('You gave me zero length input.')
+        return phenny.reply('You gave me zero length input.')
-   elif not arg.strip(' '): 
+    elif not arg.strip(b' '): 
-      if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
+        if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
-      return phenny.reply('1 SPACE (U+0020)')
+        return phenny.reply('1 SPACE (U+0020)')
-   # @@ space
+    # @@ space
-   if set(arg.upper()) - set(
+    if set(arg.upper()) - set(
-      'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'): 
+        b'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'): 
-      printable = False
+        printable = False
-   elif len(arg) > 1: 
+    elif len(arg) > 1: 
-      printable = True
+        printable = True
-   else: printable = False
+    else: printable = False
-   if printable: 
+    if printable: 
-      extended = False
+        extended = False
-      for c in '.?+*{}[]\\/^$': 
+        for c in b'.?+*{}[]\\/^$': 
-         if c in arg: 
+            if c in arg: 
-            extended = True
+                extended = True
-            break
+                break
-      if len(arg) == 4: 
+        if len(arg) == 4: 
-         try: u = chr(int(arg, 16))
+            try: u = chr(int(arg, 16))
-         except ValueError: pass
+            except ValueError: pass
-         else: return phenny.say(about(u))
+            else: return phenny.say(about(u))
-      if extended: 
+        arg = arg.decode('utf-8')
-         # look up a codepoint with regexp
+
-         results = list(islice(codepoint_extended(arg), 4))
+        if extended: 
-         for i, result in enumerate(results): 
+            # look up a codepoint with regexp
-            if (i < 2) or ((i == 2) and (len(results) < 4)): 
+            results = list(islice(codepoint_extended(arg), 4))
-               phenny.say(result)
+            for i, result in enumerate(results): 
-            elif (i == 2) and (len(results) > 3): 
+                if (i < 2) or ((i == 2) and (len(results) < 4)): 
-               phenny.say(result + ' [...]')
+                    phenny.say(result)
-         if not results: 
+                elif (i == 2) and (len(results) > 3): 
-            phenny.reply('Sorry, no results')
+                    phenny.say(result + ' [...]')
-      else: 
+            if not results: 
-         # look up a codepoint freely
+                phenny.reply('Sorry, no results')
-         result = codepoint_simple(arg)
+        else: 
-         if result is not None: 
+            # look up a codepoint freely
-            phenny.say(result)
+            result = codepoint_simple(arg)
-         else: phenny.reply("Sorry, no results for %r." % arg)
+            if result is not None: 
-   else: 
+                phenny.say(result)
-      text = arg.decode('utf-8')
+            else: phenny.reply("Sorry, no results for %r." % arg)
-      # look up less than three podecoints
+    else: 
-      if len(text) <= 3: 
+        text = arg.decode('utf-8')
-         for u in text: 
+        # look up less than three podecoints
-            phenny.say(about(u))
+        if len(text) <= 3: 
-      # look up more than three podecoints
+            for u in text: 
-      elif len(text) <= 10: 
+                phenny.say(about(u))
-         phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
+        # look up more than three podecoints
-      else: phenny.reply('Sorry, your input is too long!')
+        elif len(text) <= 10: 
            phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
        else: phenny.reply('Sorry, your input is too long!')
 u.commands = ['u']
 u.example = '.u 203D'
 def bytes(phenny, input): 
-   """Show the input as pretty printed bytes."""
+    """Show the input as pretty printed bytes."""
-   b = input.bytes
+    b = input.bytes
-   phenny.reply('%r' % b[b.find(' ') + 1:])
+    phenny.reply('%r' % b[b.find(' ') + 1:])
 bytes.commands = ['bytes']
 bytes.example = '.bytes \xe3\x8b\xa1'
 if __name__ == '__main__': 
-   print(__doc__.strip())
+    print(__doc__.strip())