From 1678049cf9a38a300e12a3132357f7d62a2f9cf1 Mon Sep 17 00:00:00 2001 From: Kazephil Date: Mon, 15 Aug 2022 14:45:44 +0900 Subject: [PATCH] Collection of classes to handle translation memory (TMX) XML data. --- omegat_tools/tmxhelpers.py | 72 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 omegat_tools/tmxhelpers.py diff --git a/omegat_tools/tmxhelpers.py b/omegat_tools/tmxhelpers.py new file mode 100644 index 0000000..367d58f --- /dev/null +++ b/omegat_tools/tmxhelpers.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- + +from lxml import etree + +class TMX: + '''Base class to define and manipulate simple TMX documents.''' + + # Default values + default_doctype = '' + default_version = '1.4' + default_header = {'creationtool': 'Segment Extractor', + 'o-tmf': 'Unknown', + 'adminlang': 'EN-US', + 'datatype': 'plaintext', + 'creationtoolversion': '0.1', + 'segtype': 'sentence', + 'srclang': 'JA' + } + + + def __init__(self, header=default_header, version=default_version): + self.tmx = etree.Element('tmx', attrib={'version': version}) + self.header = etree.SubElement(self.tmx, 'header', header) + self.body = etree.SubElement(self.tmx, 'body') + + + def add_tu(self, tu): + '''Add a tu element to the TMX document.''' + + # Todo: add code to ensure the tu is valid + self.body.append(tu) + + +class OmegaT_TMX(TMX): + '''Class for OmegaT-specific TMX documents.''' + + def __init__(self, header=TMX.default_header, version='1.4'): + super().__init__(header, version) + self.default_trans = etree.Comment('Default translations') + self.body.append(self.default_trans) + + + def find_alternative_translations(self): + '''Check for alternative translations in the TMX document''' + + alt_expr = '(//tu/prop[@type="file"])[1]' + prop_types = ['id', 'prev', 'next'] + check_alt = self.body.xpath(alt_expr) + + if len(check_alt) > 0: + prop = check_alt[0].getnext().attrib.get('type') + if prop in prop_types: + return check_alt[0].getparent() + + + + def insert_alt_comment(self): + '''Insert the alternative translation comment in the TMX document.''' + + self.alt_trans =etree.Comment('Alternative translations') + + # alt_expr = 'count(//tu/prop)>=3' + + first_alt = self.find_alternative_translations() + if first_alt is not None: + first_alt.addprevious(self.alt_trans) + else: + self.body.append(self.alt_trans) + + +class TMXfile(): + pass \ No newline at end of file