Python unicode + xml.dom.minidom + write to file

# -*- coding: utf-8 -*-
import xml.dom.minidom, codecs
impl = xml.dom.minidom.getDOMImplementation()
dom = impl.createDocument(None, "root", None)
root_el = dom.documentElement
ascii_el = dom.createElement('ascii')
root_el.appendChild(ascii_el)
ascii_el.appendChild(dom.createTextNode('abc'))
utf_el = dom.createElement('utf')
root_el.appendChild(utf_el)
utf_el.appendChild(dom.createTextNode(unicode('µ 6000', 'utf-8')))
# There's a couple of ways of dealing with output:
if True:
# Will write a utf encoded file without BOM
# <?xml version="1.0"?>
# Will print "µ" to the console
# type(xmlstr) == 'unicode' i.e. unicode string
xmlstr = dom.toxml()
f = codecs.open('utf_test.xml', 'w', 'utf-8')
f.write(xmlstr)
f.close()
print xmlstr
print type(xmlstr)
else:
# Will write a utf encoded file without BOM (same as above)
# <?xml version="1.0" encoding="utf-8"?>
# Will print "??" to the console
# type(xmlstr) == 'str' -- i.e. byte string
xmlstr = dom.toxml('utf-8')
f = open('utf_test.xml', 'w')
f.write(xmlstr)
f.close()
print xmlstr
print type(xmlstr)

Leave a Reply