master
mutantmonkey 2011-10-22 21:54:12 -04:00
parent d9a8cb9a12
commit 7b41ea29bd
1 changed files with 96 additions and 94 deletions

View File

@ -11,124 +11,126 @@ import re, unicodedata
from itertools import islice from itertools import islice
def about(u, cp=None, name=None): def about(u, cp=None, name=None):
if cp is None: if cp is None:
cp = ord(u) cp = ord(u)
if name is None: if name is None:
try: name = unicodedata.name(u) try: name = unicodedata.name(u)
except ValueError: except ValueError:
return 'U+%04X (No name found)' % cp return 'U+%04X (No name found)' % cp
if not unicodedata.combining(u): if not unicodedata.combining(u):
template = 'U+%04X %s (%s)' template = 'U+%04X %s (%s)'
else: template = 'U+%04X %s (\xe2\x97\x8c%s)' else: template = 'U+%04X %s (\xe2\x97\x8c%s)'
return template % (cp, name, u) return template % (cp, name, u)
def codepoint_simple(arg): def codepoint_simple(arg):
arg = arg.upper() arg = arg.upper()
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b') r_label = re.compile('\\b' + arg.replace(' ', '.*\\b') + '\\b')
results = [] results = []
for cp in range(0xFFFF): for cp in range(0xFFFF):
u = chr(cp) u = chr(cp)
try: name = unicodedata.name(u) try: name = unicodedata.name(u)
except ValueError: continue except ValueError: continue
if r_label.search(name): if r_label.search(name):
results.append((len(name), u, cp, name))
if not results:
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
for cp in range(0xFFFF):
u = chr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if r_label.search(name):
results.append((len(name), u, cp, name)) results.append((len(name), u, cp, name))
if not results:
r_label = re.compile('\\b' + arg.replace(' ', '.*\\b'))
for cp in range(0xFFFF):
u = chr(cp)
try: name = unicodedata.name(u)
except ValueError: continue
if not results: if r_label.search(name):
return None results.append((len(name), u, cp, name))
length, u, cp, name = sorted(results)[0] if not results:
return about(u, cp, name) return None
length, u, cp, name = sorted(results)[0]
return about(u, cp, name)
def codepoint_extended(arg): def codepoint_extended(arg):
arg = arg.upper() arg = arg.upper()
try: r_search = re.compile(arg) try: r_search = re.compile(arg)
except: raise ValueError('Broken regexp: %r' % arg) except: raise ValueError('Broken regexp: %r' % arg)
for cp in range(1, 0x10FFFF): for cp in range(1, 0x10FFFF):
u = chr(cp) u = chr(cp)
name = unicodedata.name(u, '-') name = unicodedata.name(u, '-')
if r_search.search(name): if r_search.search(name):
yield about(u, cp, name) yield about(u, cp, name)
def u(phenny, input): def u(phenny, input):
"""Look up unicode information.""" """Look up unicode information."""
arg = input.bytes[3:] arg = input.bytes[3:]
# phenny.msg('#inamidst', '%r' % arg) # phenny.msg('#inamidst', '%r' % arg)
if not arg: if not arg:
return phenny.reply('You gave me zero length input.') return phenny.reply('You gave me zero length input.')
elif not arg.strip(' '): elif not arg.strip(b' '):
if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg)) if len(arg) > 1: return phenny.reply('%s SPACEs (U+0020)' % len(arg))
return phenny.reply('1 SPACE (U+0020)') return phenny.reply('1 SPACE (U+0020)')
# @@ space # @@ space
if set(arg.upper()) - set( if set(arg.upper()) - set(
'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'): b'ABCDEFGHIJKLMNOPQRSTUVWYXYZ0123456789- .?+*{}[]\\/^$'):
printable = False printable = False
elif len(arg) > 1: elif len(arg) > 1:
printable = True printable = True
else: printable = False else: printable = False
if printable: if printable:
extended = False extended = False
for c in '.?+*{}[]\\/^$': for c in b'.?+*{}[]\\/^$':
if c in arg: if c in arg:
extended = True extended = True
break break
if len(arg) == 4: if len(arg) == 4:
try: u = chr(int(arg, 16)) try: u = chr(int(arg, 16))
except ValueError: pass except ValueError: pass
else: return phenny.say(about(u)) else: return phenny.say(about(u))
if extended: arg = arg.decode('utf-8')
# look up a codepoint with regexp
results = list(islice(codepoint_extended(arg), 4)) if extended:
for i, result in enumerate(results): # look up a codepoint with regexp
if (i < 2) or ((i == 2) and (len(results) < 4)): results = list(islice(codepoint_extended(arg), 4))
phenny.say(result) for i, result in enumerate(results):
elif (i == 2) and (len(results) > 3): if (i < 2) or ((i == 2) and (len(results) < 4)):
phenny.say(result + ' [...]') phenny.say(result)
if not results: elif (i == 2) and (len(results) > 3):
phenny.reply('Sorry, no results') phenny.say(result + ' [...]')
else: if not results:
# look up a codepoint freely phenny.reply('Sorry, no results')
result = codepoint_simple(arg) else:
if result is not None: # look up a codepoint freely
phenny.say(result) result = codepoint_simple(arg)
else: phenny.reply("Sorry, no results for %r." % arg) if result is not None:
else: phenny.say(result)
text = arg.decode('utf-8') else: phenny.reply("Sorry, no results for %r." % arg)
# look up less than three podecoints else:
if len(text) <= 3: text = arg.decode('utf-8')
for u in text: # look up less than three podecoints
phenny.say(about(u)) if len(text) <= 3:
# look up more than three podecoints for u in text:
elif len(text) <= 10: phenny.say(about(u))
phenny.reply(' '.join('U+%04X' % ord(c) for c in text)) # look up more than three podecoints
else: phenny.reply('Sorry, your input is too long!') elif len(text) <= 10:
phenny.reply(' '.join('U+%04X' % ord(c) for c in text))
else: phenny.reply('Sorry, your input is too long!')
u.commands = ['u'] u.commands = ['u']
u.example = '.u 203D' u.example = '.u 203D'
def bytes(phenny, input): def bytes(phenny, input):
"""Show the input as pretty printed bytes.""" """Show the input as pretty printed bytes."""
b = input.bytes b = input.bytes
phenny.reply('%r' % b[b.find(' ') + 1:]) phenny.reply('%r' % b[b.find(' ') + 1:])
bytes.commands = ['bytes'] bytes.commands = ['bytes']
bytes.example = '.bytes \xe3\x8b\xa1' bytes.example = '.bytes \xe3\x8b\xa1'
if __name__ == '__main__': if __name__ == '__main__':
print(__doc__.strip()) print(__doc__.strip())