forked from bortzmeyer/Web-LangTag
34 lines
830 B
Python
34 lines
830 B
Python
|
#!/usr/bin/env python3
|
||
|
|
||
|
""" Converts an UTF-8 text file to an ASCII file with hexadecimal
|
||
|
Numeric Character References (like œ). """
|
||
|
|
||
|
import sys
|
||
|
import re
|
||
|
|
||
|
extension = re.compile("^(.*)\.([a-z0-9_-]+)$", re.IGNORECASE)
|
||
|
|
||
|
def convert(thematch):
|
||
|
codepoint = int(thematch.group(1), 16)
|
||
|
return chr(codepoint)
|
||
|
|
||
|
for ifilename in sys.argv[1:]:
|
||
|
print("Converting %s..." % ifilename)
|
||
|
match = extension.search (ifilename)
|
||
|
if match:
|
||
|
ext_ifile = match.group(2)
|
||
|
ofilename = match.group(1) + "-ncr." + ext_ifile
|
||
|
else:
|
||
|
ofilename = ifilename + "-ncr"
|
||
|
ifile = open(ifilename, "r")
|
||
|
ofile = open(ofilename, "w")
|
||
|
data = ifile.read()
|
||
|
for ch in data:
|
||
|
if ord(ch) > 127:
|
||
|
ch = "&#x%x;" % ord(ch)
|
||
|
ofile.write(ch)
|
||
|
ifile.close()
|
||
|
ofile.close()
|
||
|
|
||
|
|