Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(2)  |  Call(1)  |  Derive(0)  |  Import(1)
This method is used for normalization of unicode characters to the base ASCII
letters. Output is ASCII encoded string (or char) with only ASCII letters,
digits, punctuation and whitespace characters. Case is preserved.

        def normalizeUnicode(text, encoding='humanascii'):
    """
    This method is used for normalization of unicode characters to the base ASCII
    letters. Output is ASCII encoded string (or char) with only ASCII letters,
    digits, punctuation and whitespace characters. Case is preserved.
    """
    unicodeinput = True
    if not isinstance(text, unicode):
        text = unicode(text, 'utf-8')
        unicodeinput = False

    res = ''
    global allowed, allowedid
    if encoding == 'humanascii' or encoding == 'identifier':
        enc = 'ascii'
    else:
        enc = encoding
    for ch in text:
        if (encoding == 'humanascii') and (ch in allowed):
            # ASCII chars, digits etc. stay untouched
            res += ch
            continue
        if (encoding == 'identifier') and (ch in allowedid):
            # ASCII chars, digits etc. stay untouched
            res += ch
            continue
        else:
            try:
                ch.encode(enc,'strict')
                if encoding == 'identifier':
                    res += '_'
                else:
                    res += ch
            except UnicodeEncodeError:
                ordinal = ord(ch)
                if mapping.has_key(ordinal):
                    # try to apply custom mappings
                    res += mapping.get(ordinal)
                elif decomposition(ch) or len(normalize('NFKD',ch)) > 1:
                    normalized = filter(lambda i: not combining(i), normalize('NFKD', ch)).strip()
                    # normalized string may contain non-letter chars too. Remove them
                    # normalized string may result to  more than one char
                    if encoding == 'identifier':
                        res += ''.join([c for c in normalized if c in allowedid])
                    else:
                        res += ''.join([c for c in normalized if c in allowed])
                else:
                    # hex string instead of unknown char
                    res += "%x" % ordinal
    if encoding == 'identifier':
        res = res.strip('_').replace('_____','_').replace('____','_').replace('___','_').replace('__','_')
        if not res.strip('_')[0] in string.ascii_letters:
            res = '_' + res
    if unicodeinput:
        return res
    else:
        return res.encode('utf-8')
        


src/p/y/PyFileMaker-2.6/PyFileMaker/FMData.py   PyFileMaker(Download)
from re import compile
from FMError import FMError
from UnicodeNormalizer import normalizeUnicode
 
reDateTime = compile('((\d{2})/(\d{2})/(\d{4}))? ?((\d{2}):(\d{2}):(\d{2}))?')
	new2old = {}
	for key in from_dict:
		k = normalizeUnicode(key,'identifier')
		if k != key:
			i = ''