1 """
2 History:
3
4 Author xmlescape: Gabriel Genellina
5 Author xmlunescape: Leif K-Brooks, based on work by Aaron Swartz
6 Source http://www.thescripts.com/forum/thread594350.html
7
8 Modified by Paul Kienzle
9 """
10
11
12
13 from htmlentitydefs import codepoint2name,name2codepoint
14 import re
15
16 unichr2entity = dict((unichr(code),u'&%s;'%name)
17 for code,name in codepoint2name.iteritems() if code !=38)
18
19
21 """xmlstr = xmlescape(str)
22 Convert text into a form suitable for inclusion in an XML file,
23 with characters such as '&' replaced by &
24 """
25 if u"&"in text:
26 text = text.replace(u"&",u"&")
27 for key,value in d.iteritems():
28 if key in text:
29 text = text.replace(key,value)
30 return text
31
32
34 """regular expression character replacement function for xmlunescape.
35 """
36 s = m.group(1)
37 if s[0] == u'#':
38 s = s[1:]
39 try:
40 if s[0] in u'xX':
41 c = int(s[1:], 16)
42 else:
43 c = int(s)
44 return unichr(c)
45 except ValueError:
46 return m.group(0)
47 else:
48 try:
49 return unichr(name2codepoint[s])
50 except (ValueError, KeyError):
51 return m.group(0)
52
53
54 _entity_re = re.compile(r"&(#?[xX]?(?:[0-9a-fA-F]+|\w{1,8}));")
56 """str = xmlunescape(xmlstr)
57 Replace XML entities with original ISO characters.
58 """
59 return _entity_re.sub(_replace_entity, s)
60
61
62 if __name__ == "__main__":
63 s = "<>&;"
64 xmls = xmlescape(s)
65 uns = xmlunescape(xmls)
66 print "%s => %s => %s"%(s,xmls,uns)
67