Web-LangTag/registries/utf82ncr.py

34 lines
830 B
Python
Raw Normal View History

2023-06-09 10:02:30 +02:00
#!/usr/bin/env python3
""" Converts an UTF-8 text file to an ASCII file with hexadecimal
Numeric Character References (like œ). """
import sys
import re
extension = re.compile("^(.*)\.([a-z0-9_-]+)$", re.IGNORECASE)
def convert(thematch):
codepoint = int(thematch.group(1), 16)
return chr(codepoint)
for ifilename in sys.argv[1:]:
print("Converting %s..." % ifilename)
match = extension.search (ifilename)
if match:
ext_ifile = match.group(2)
ofilename = match.group(1) + "-ncr." + ext_ifile
else:
ofilename = ifilename + "-ncr"
ifile = open(ifilename, "r")
ofile = open(ofilename, "w")
data = ifile.read()
for ch in data:
if ord(ch) > 127:
ch = "&#x%x;" % ord(ch)
ofile.write(ch)
ifile.close()
ofile.close()