forked from bortzmeyer/Web-LangTag
34 lines
830 B
Python
Executable File
34 lines
830 B
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
""" Converts an UTF-8 text file to an ASCII file with hexadecimal
|
|
Numeric Character References (like œ). """
|
|
|
|
import sys
|
|
import re
|
|
|
|
extension = re.compile("^(.*)\.([a-z0-9_-]+)$", re.IGNORECASE)
|
|
|
|
def convert(thematch):
|
|
codepoint = int(thematch.group(1), 16)
|
|
return chr(codepoint)
|
|
|
|
for ifilename in sys.argv[1:]:
|
|
print("Converting %s..." % ifilename)
|
|
match = extension.search (ifilename)
|
|
if match:
|
|
ext_ifile = match.group(2)
|
|
ofilename = match.group(1) + "-ncr." + ext_ifile
|
|
else:
|
|
ofilename = ifilename + "-ncr"
|
|
ifile = open(ifilename, "r")
|
|
ofile = open(ofilename, "w")
|
|
data = ifile.read()
|
|
for ch in data:
|
|
if ord(ch) > 127:
|
|
ch = "&#x%x;" % ord(ch)
|
|
ofile.write(ch)
|
|
ifile.close()
|
|
ofile.close()
|
|
|
|
|