python编码转换实验

发布时间:2019-09-18 07:24:51编辑:auto阅读(1604)

    Python 2.6.6 (r266:84292, Jul 23 2015, 15:22:56) 

    [GCC 4.4.7 20120313 (Red Hat 4.4.7-11)] on linux2

    Type "help", "copyright", "credits" or "license" for more information.

    >>> print ord('A')

    65

    >>>     

    ... 

    >>> a = {"a":"1","b","2"}

      File "<stdin>", line 1

        a = {"a":"1","b","2"}

                        ^

    SyntaxError: invalid syntax

    >>> a = {"a":"1","b":"2"}

    >>> str(a)

    "{'a': '1', 'b': '2'}"

    >>> print a

    {'a': '1', 'b': '2'}

    >>> print type(a)

    <type 'dict'>

    >>> print type(str(a))

    <type 'str'>

    >>> b = [1,2,3]

    >>> print type(b)

    <type 'list'>

    >>> print type(str(b))

    <type 'str'>

    >>> str(b)

    '[1, 2, 3]'

    >>> b.__class__

    <type 'list'>

    >>> str(b).__class__

    <type 'str'>

    >>> isinstance(a, str)

    False

    >>> isinstance(a, dict)

    True

    >>> isinstance(a, unicode)

    False

    >>> isinstance(a, utf-8)  

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'utf' is not defined

    >>> isinstance(a, 'utf-8')

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    TypeError: isinstance() arg 2 must be a class, type, or tuple of classes and types

    >>> isinstance(a, type)   

    False

    >>> isinstance(a, unicode)

    False

    >>> isinstance(a, unicode)

    False

    >>> import chardet

    >>> chardet.detect(a)

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect

        u.feed(aBuf)

      File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 74, in feed

        if aBuf[:3] == codecs.BOM:

    TypeError: unhashable type

    >>> chardet.detect(str(a))

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> chardet.detect(str(b))

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> c = ["我","是"]

    >>> chardet.detect(str(c))

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> print c

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> c.encode('unicode')

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'list' object has no attribute 'encode'

    >>> str(c).encode('unicode')

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    LookupError: unknown encoding: unicode

    >>> str(c).encode('utf-8')  

    "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"

    >>> d = str(c)

    >>> chardet.detect(d)

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> chardet.detect(c)

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect

        u.feed(aBuf)

      File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed

        if self._highBitDetector.search(aBuf):

    TypeError: expected string or buffer

    >>> chardet.detect(d)

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> print d

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print dc

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'dc' is not defined

    >>> print c

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print d.decode('ascii')

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(d.decode('ascii'))

    <type 'unicode'>

    >>> print d.decode('ascii')      

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> chardet.detect(c.decode('ascii')

    ... )

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'list' object has no attribute 'decode'

    >>> chardet.detect(d.decode('ascii'))

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect

        raise ValueError('Expected a bytes object, not a unicode object')

    ValueError: Expected a bytes object, not a unicode object

    >>> type(d)

    <type 'str'>

    >>> print type(d.decode('ascii'))

    <type 'unicode'>

    >>>  print d.decode('ascii') 

      File "<stdin>", line 1

        print d.decode('ascii') 

        ^

    IndentationError: unexpected indent

    >>> print d.decode('ascii') 

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print d.decode('ascii').encode('utf-8')

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print d.decode('ascii').encode('utf-8')[0]

    [

    >>> print d.decode('ascii')  

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> e = d.decode('ascii')  

    >>> print e

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> type(e)

    <type 'unicode'>

    >>> f = e.encode('utf-8')

    >>> f

    "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"

    >>> print f

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> type(f)

    <type 'str'>

    >>> print f.decode("unicode_escape")

    ['', 'ˉ']

    >>> print f.encode("raw_unicode_escape")

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print f.encode("raw_unicode_escape").decode('utf-8')

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print b

    [1, 2, 3]

    >>> print c

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(c)

    <type 'list'>

    >>> print type(d)

    <type 'str'>

    >>> print d

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> import syss

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    ImportError: No module named syss

    >>> import sys

    >>> reload(sys)

    <module 'sys' (built-in)>

    >>> sys.setdefaultencoding('utf-8')

    >>> print d

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(c)

    <type 'list'>

    >>> print type(d)

    <type 'str'>

    >>> cc = ["我","是"]

    >>> print cc

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(cc)

    <type 'list'>

    >>> dd = str(cc)

    >>> pirnt dd

      File "<stdin>", line 1

        pirnt dd

               ^

    SyntaxError: invalid syntax

    >>> print dd

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(dd)

    <type 'str'>

    >>> chardet.detect(d)

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> chardet.detect(dd)

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> sys.defaultencoding()

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencoding'

    >>> sys.defaultencoding

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencoding'

    >>> sys.defaultencode

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencode'

    >>> sys.defaultencode()

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencode'

    >>> sys.defaultencoding()

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencoding'

    >>> sys.defaultencode

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    AttributeError: 'module' object has no attribute 'defaultencode'

    >>> q = '中国'

    >>> type(q)

    <type 'str'>

    >>> chardet.detect(q0

    ... )

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'q0' is not defined

    >>> chardet.detect(q)

    {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}

    >>> p = ['中国', '复兴']

    >>> chardet.detect(p)   

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 30, in detect

        u.feed(aBuf)

      File "/usr/lib/python2.6/site-packages/chardet/universaldetector.py", line 108, in feed

        if self._highBitDetector.search(aBuf):

    TypeError: expected string or buffer

    >>> chardet.detect(str(p))

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> print type(dd)

    <type 'str'>

    >>> print dd.decode('unicode_escape')

    ['', 'ˉ']

    >>> print type(dd.decode('unicode_escape'))

    <type 'unicode'>

    >>> dd

    "['\\xe6\\x88\\x91', '\\xe6\\x98\\xaf']"

    >>> print dd

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print dd.encode('raw_unicode_escape')

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print type(dd.encode('raw_unicode_escape'))

    <type 'str'>

    >>> print type(dd.encode('raw_unicode_escape').decode('utf-8'))

    <type 'unicode'>

    >>> print type(dd.encode('raw_unicode_escape').decode('utf-8')

    ... )

    <type 'unicode'>

    >>> print dd

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print dd, type(dd)

    ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>

    >>> print dd.encode('raw_unicode_escape'), type(dd.encode('raw_unicode_escape'))

    ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'str'>

    >>> print dd.decode('utf-8'), type(dd.decode('utf-8')

    ... )

    ['\xe6\x88\x91', '\xe6\x98\xaf'] <type 'unicode'>

    >>> print dd.decode('utf-8')

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print dd

    ['\xe6\x88\x91', '\xe6\x98\xaf']

    >>> print ee

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'ee' is not defined

    >>> ee = u"dd"

    >>> ee = u"['\xe6\x88\x91', '\xe6\x98\xaf']"

    >>> print ee

    ['', 'ˉ']

    >>> ee

    u"['\xe6\x88\x91', '\xe6\x98\xaf']"

    >>> ee = [u'中国', u'复兴']

    >>> type(ee)

    <type 'list'>

    >>> print ee

    [u'\u4e2d\u56fd', u'\u590d\u5174']

    >>> print str(ee)

    [u'\u4e2d\u56fd', u'\u590d\u5174']

    >>> printee

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'printee' is not defined

    >>> print ee

    [u'\u4e2d\u56fd', u'\u590d\u5174']

    >>> print json.dumps(ee).decode('unicode_escape')

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'json' is not defined

    >>> import json

    >>> print json.dumps(ee).decode('unicode_escape')

    ["中国", "复兴"]

    >>> print str(ee).decode('unicode_escape')       

    [u'中国', u'复兴']

    >>> x = '中国'

    >>> print x

    中国

    >>> x

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> type(x)

    <type 'str'>

    >>> chardet.detect(x)

    {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}

    >>> y = x.decode('utf-8')

    >>> y

    u'\u4e2d\u56fd'

    >>> print y

    中国

    >>> chardet.detect(y)    

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect

        raise ValueError('Expected a bytes object, not a unicode object')

    ValueError: Expected a bytes object, not a unicode object

    >>> x

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> x = '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> print x

    中国

    >>> x

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> x = u'\xe4\xb8\xad\xe5\x9b\xbd'

    >>> print x

    -

    >>> x.decode('utf-8')

    u'\xe4\xb8\xad\xe5\x9b\xbd'

    >>> print x.decode('utf-8')

    -

    >>> chardet.detect(x)

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

      File "/usr/lib/python2.6/site-packages/chardet/__init__.py", line 25, in detect

        raise ValueError('Expected a bytes object, not a unicode object')

    ValueError: Expected a bytes object, not a unicode object

    >>> print type(x)

    <type 'unicode'>

    >>> x

    u'\xe4\xb8\xad\xe5\x9b\xbd'

    >>> pirnt x

      File "<stdin>", line 1

        pirnt x

              ^

    SyntaxError: invalid syntax

    >>> print x

    -

    >>> print x.encode('raw_unicode_escape')

    中国

    >>> y = x.encode('raw_unicode_escape')  

    >>> y

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> type y

      File "<stdin>", line 1

        type y

             ^

    SyntaxError: invalid syntax

    >>> type(y)

    <type 'str'>

    >>> print y

    中国

    >>> chardet.detect(y)

    {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}

    >>> z = y.encode('utf-8')

    >>> print z

    中国

    >>> z 

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> y

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> type(z)

    <type 'str'>

    >>> type(y)

    <type 'str'>

    >>> chardet.detect(y)    

    {'confidence': 0.75249999999999995, 'encoding': 'utf-8'}

    >>> y

    '\xe4\xb8\xad\xe5\x9b\xbd'

    >>> z = y.encode('utf-8')

    >>> z = y.decode('utf-8')

    >>> z

    u'\u4e2d\u56fd'

    >>> print z

    中国

    >>> type(z)

    <type 'unicode'>

    >>> a 

    u'\xe4\xb8\xad\xe5\x9b\xbd'

    >>> f='\u53eb\u6211'  

    >>> print f

    \u53eb\u6211

    >>> f

    '\\u53eb\\u6211'

    >>> type(f)

    <type 'str'>

    >>> chardet.detect(f)

    {'confidence': 1.0, 'encoding': 'ascii'}

    >>> f.decode('ascii')

    u'\\u53eb\\u6211'

    >>> print f.decode('ascii')

    \u53eb\u6211

    >>> f.decode('unicode_escape')

    u'\u53eb\u6211'

    >>> print f.decode('unicode_escape')

    叫我

    >>> sys.getdefaultencoding()

    'utf-8'

    >>> dd = { 'name': u'功夫熊猫' }

    >>> print dd

    {'name': u'\u529f\u592b\u718a\u732b'}

    >>> dd

    {'name': u'\u529f\u592b\u718a\u732b'}

    >>> dd2 = { 'name': '功夫熊猫' }

    >>> dd2

    {'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}

    >>> print simplejson.dumps(dd, ensure_ascii=False)

    Traceback (most recent call last):

      File "<stdin>", line 1, in <module>

    NameError: name 'simplejson' is not defined

    >>> print json.dumps(dd, ensure_ascii=False)      

    {"name": "功夫熊猫"}

    >>> print json.dumps(dd2, ensure_ascii=False)

    {"name": "功夫熊猫"}

    >>> print dd2

    {'name': '\xe5\x8a\x9f\xe5\xa4\xab\xe7\x86\x8a\xe7\x8c\xab'}

    >>> 


关键字