#!/usr/bin/env python # encoding: utf-8 # # Copyright (c) 2010 Doug Hellmann. All rights reserved. # """Translating between encodings on the fly. """ #end_pymotw_header from codecs_to_hex import to_hex import codecs from cStringIO import StringIO # Raw version of the original data. data = u'pi: \u03c0' # Manually encode it as UTF-8. utf8 = data.encode('utf-8') print 'Start as UTF-8 :', to_hex(utf8, 1) # Set up an output buffer, then wrap it as an EncodedFile. output = StringIO() encoded_file = codecs.EncodedFile(output, data_encoding='utf-8', file_encoding='utf-16') encoded_file.write(utf8) # Fetch the buffer contents as a UTF-16 encoded byte string utf16 = output.getvalue() print 'Encoded to UTF-16:', to_hex(utf16, 2) # Set up another buffer with the UTF-16 data for reading, # and wrap it with another EncodedFile. buffer = StringIO(utf16) encoded_file = codecs.EncodedFile(buffer, data_encoding='utf-8', file_encoding='utf-16') # Read the UTF-8 encoded version of the data. recoded = encoded_file.read() print 'Back to UTF-8 :', to_hex(recoded, 1)