Adapter la récupération de langues aux TMX multilingues

This commit is contained in:
Philippe Tourigny 2023-04-22 09:29:33 +09:00
parent 2e122ebfd6
commit f120766840
2 changed files with 127 additions and 69 deletions

View File

@ -1,9 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Dominique Meeùs, created 1-10-2013, version 0.9.
XSLT transformation of an Open Document Format spreadsheet in two columns
XSLT transformation of an Open Document Format spreadsheet in five columns
into a TMX translation memory exchange file.
Filter to install as an export filter for LibreOffice Calc. -->
<!-- Copyright 2013 Dominique Meeùs.
<!-- Dominique Meeùs, modified 21-5-2020, version 0.92.
Hardcoded languages of the columns are : nl-BE, fr-BE, de-DE, en-GB, es-ES. -->
<!-- Philippe Tourigny, modified 12-9-2022, version 0.94
Enable the filter to read the language code from the first column
in each row. Also, add a SYSTEM DOCTYPE declaration to the output
XML file.-->
<!-- Copyright 2013, 2020 Dominique Meeùs, and 2022 Philippe Tourigny@.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation,
@ -20,50 +26,88 @@
xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0"
exclude-result-prefixes="office table text">
<!-- Namespaces needed to access parts of the document -->
<xsl:output method = "xml" indent = "yes" encoding = "UTF-8" omit-xml-declaration = "no"/>
<xsl:variable name="headerCell" select="//table:table/table:table-row[1]/table:table-cell"></xsl:variable>
<!-- Version 0.94: Add a SYSTEM DOCTYPE to output -->
<xsl:output method = "xml" indent = "yes" encoding = "UTF-8"
doctype-system="tmx14.dtd" omit-xml-declaration = "no"/>
<xsl:template match="/">
<!-- Get the source and target languages from the first and second column of the first row. Code based on this answer https://forum.openoffice.org/en/forum/viewtopic.php?p=56627&sid=d4ebce191acc01d99d587fef28377db4#p56627 from the Apache Office forum -->
<xsl:variable name="adminlang" select="$headerCell[2]/text:p"></xsl:variable>
<xsl:variable name="srclang" select="$headerCell[1]/text:p"></xsl:variable>
<xsl:variable name="tgtlang" select="$headerCell[2]/text:p"></xsl:variable>
<tmx version="1.4">
<!-- Define variables to make code easier to manage.
Cells in the first row are language code headings.
The first target language is selected as the
administrative language for the output TMX.
The first column identifies the source language. -->
<xsl:variable name="headingCell"
select="//table:table/table:table-row[1]/table:table-cell"/>
<xsl:variable name="adminlang"
select="$headingCell[2]/text:p"/>
<xsl:variable name="srclang"
select="$headingCell[1]/text:p"/>
<!-- Define the TMX header
The <xsl:attribute> element is used because variables
are not recognized if entered directly in attributes. -->
<header>
<xsl:attribute name="creationtool">ods2tmx filter</xsl:attribute>
<xsl:attribute name="creationtoolversion">0.93</xsl:attribute>
<xsl:attribute name="creationtool">TMX-export for LibreOffice</xsl:attribute>
<xsl:attribute name="creationtoolversion">0.94</xsl:attribute>
<xsl:attribute name="segtype">sentence</xsl:attribute>
<xsl:attribute name="o-tmf">application/vnd.oasis.opendocument.spreadsheet</xsl:attribute>
<xsl:attribute name="adminlang"><xsl:value-of select="$adminlang"></xsl:value-of></xsl:attribute>
<xsl:attribute name="srclang"><xsl:value-of select="$srclang"></xsl:value-of></xsl:attribute>
<xsl:attribute name="adminlang">
<xsl:value-of select="$adminlang"/>
</xsl:attribute>
<xsl:attribute name="srclang">
<xsl:value-of select="$srclang"/>
</xsl:attribute>
<xsl:attribute name="datatype">plaintext</xsl:attribute>
</header>
<!-- Define the TMX body
Additional templates are called to make the code
easier to modify and maintain. -->
<body>
<xsl:for-each select="//table:table-row[position()>1]">
<tu>
<xsl:for-each select="table:table-cell">
<xsl:choose>
<xsl:when test="position()=1">
<tuv>
<xsl:attribute name="xml:lang"><xsl:value-of select="$srclang"></xsl:value-of></xsl:attribute>
<seg><xsl:value-of select="text:p"/></seg>
</tuv>
</xsl:when>
<xsl:when test="position()=2">
<tuv>
<xsl:attribute name="xml:lang"><xsl:value-of select="$tgtlang"></xsl:value-of></xsl:attribute>
<seg><xsl:value-of select="text:p"/></seg>
</tuv>
</xsl:when>
</xsl:choose>
</xsl:for-each>
</tu>
</xsl:for-each>
<!-- Call the template to insert <tu> elements -->
<xsl:apply-templates select="//table:table"/>
</body>
</tmx>
</xsl:template>
<!-- Templates to populate the <tu> and <tuv> elements-->
<!-- Template for <tu> elements:
Loop through each row, skipping the first one. -->
<xsl:template match="table:table">
<xsl:for-each select="table:table-row">
<tu>
<xsl:apply-templates select="//table:table-row[position()>1]"/>
</tu>
</xsl:for-each>
</xsl:template>
<!-- Template for the <tuv> elements:
Loop through each cell in the row, and assign the value
of the language for that column to the xml:lang attribute.
After that, add the <seg> element and populated it with the
text in the current cell. -->
<xsl:template match="table:table-row[position()>1]">
<xsl:for-each select="table:table-cell">
<xsl:variable name="currentLang"
select="//table:table/table:table-row[1]/table:table-cell"/>
<xsl:variable name="currentColumn"
select="position()"/>
<xsl:if test="normalize-space(text:p) != ''">
<tuv>
<xsl:attribute name="xml:lang">
<xsl:value-of select="$currentLang[$currentColumn]/text:p"/>
</xsl:attribute>
<seg>
<xsl:value-of select="text:p"/>
</seg>
</tuv>
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

View File

@ -4,7 +4,10 @@
XSLT transformation of a TMX translation memory exchange file
into an Open Document Format spreadsheet in two columns.
Filter to install as an import filter for LibreOffice Calc. -->
<!-- Copyright 2013 Dominique Meeùs.
<!-- Philippe Tourigny, modified 12-9-2022, version 0.99
Allow the filter to retrieve the languages in the TMX from its
first <tu> element, and create a column for each language. -->
<!-- Copyright 2013 Dominique Meeùs, and 2022 Philippe Tourigny.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License
as published by the Free Software Foundation,
@ -42,65 +45,76 @@
office:version="1.0">
<office:automatic-styles>
<!-- Properties of the table -->
<!-- PTable properties -->
<style:style style:name="ta1" style:family="table" style:master-page-name="Default">
<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
</style:style>
<!-- Properties of the columns -->
<!-- I consider the case of a two-languages TMX -->
<style:style style:name="co1" style:family="table-column"><!-- source language -->
<style:table-column-properties fo:break-before="auto" style:column-width="20.000cm"/>
</style:style>
<style:style style:name="co2" style:family="table-column"><!-- target language -->
<style:table-column-properties fo:break-before="auto" style:column-width="20.000cm"/>
<!-- Column properties (for all languages) -->
<style:style style:name="co1" style:family="table-column">
<style:table-column-properties fo:break-before="auto" style:column-width="14.000cm"/>
</style:style>
<!-- Properties of the rows -->
<!-- The rows are “optimal height” but do not expand for the “wrap option” of the cells -->
<!-- Row properties -->
<!-- All rows are set to “optimal height” -->
<style:style style:name="ro1" style:family="table-row">
<style:table-row-properties fo:break-before="auto" style:use-optimal-row-height="true"/>
</style:style>
<!-- Properties of the cells -->
<style:style style:name="ce1" style:family="table-cell" style:parent-style-name="Default">
<style:table-cell-properties fo:wrap-option="wrap"/>
<!-- Cell properties -->
<!-- Language code heading cells
The language codes are centered and set in bold
in the first column. -->
<style:style style:name="heading" style:family="table-cell"
style:parent-style-name="Default">
<style:table-cell-properties style:text-align-source="fix"
style:repeat-content="false" fo:wrap-option="wrap"/>
<style:paragraph-properties fo:text-align="center"/>
<style:text-properties fo:font-weight="bold"/>
</style:style>
<!-- Style for cells with the segment text -->
<style:style style:name="ce2" style:family="table-cell" style:parent-style-name="Default">
<style:table-cell-properties fo:wrap-option="wrap"/>
</style:style>
</office:automatic-styles>
<!-- Get the source and target languages from the TMX file -->
<xsl:variable name="srclang" select="tmx/body/tu/tuv[1]/@*[local-name() = 'lang']"></xsl:variable>
<xsl:variable name="tgtlang" select="/tmx/body/tu/tuv[2]/@*[local-name() = 'lang']"></xsl:variable>
<!-- Define variables used to identify the languages
In a TMX with three or more languages. All translation unit
(<tuv>) elements are assumed to contain the same number of
languages, and the first <tu> is used to identify them. -->
<!-- Todo: Identify the <tu> with the largest highest number of <tuv> elements to identify all languages in a TMX file with more languages in some <tu> elements than others. -->
<xsl:variable name="firstTU" select="tmx/body/tu[1]"/>
<xsl:variable name="numLangs" select="count($firstTU/tuv)"/>
<office:body>
<office:spreadsheet>
<table:table>
<!-- Format of the columns -->
<!-- How about a free number of columns ? -->
<table:table-column table:style-name="co1" table:default-cell-style-name="ce1"/>
<table:table-column table:style-name="co2" table:default-cell-style-name="ce2"/>
<table:table table:style-name="ta1">
<!-- Set the format for a number of columns equal to
the number of languages in the imported TMX file -->
<table:table-column table:style-name="co1" table:number-columns-repeated="{$numLangs}"
table:default-cell-style-name="ce2"/>
<!-- Put the source and target languages retrieved from
the TMX file in the first two columns of the first row -->
<table:table-row>
<table:table-cell>
<!-- Fill in the language headers in the first row
The use of the "local-name()" function enables
the filter to handle older versions that use the
"lang" attribute as well as recent versions that
use the "xml:lang"` attributes -->
<table:table-row table:style-name="ro1">
<xsl:for-each select="$firstTU/tuv">
<table:table-cell table:style-name="heading">
<text:p>
<xsl:value-of select="$srclang"></xsl:value-of>
</text:p>
</table:table-cell>
<table:table-cell>
<text:p>
<xsl:value-of select="$tgtlang"></xsl:value-of>
<xsl:value-of select="@*[local-name()='lang']"/>
</text:p>
</table:table-cell>
</xsl:for-each>
</table:table-row>
<!-- Process XML of the input TMX file: one row for each tu, one cell for the segment in each tuv -->
<!-- Process the <tu> and <tuv> elements in the TMX file:
One row per tu, one column per segment in each <tuv>. -->
<xsl:for-each select="tmx/body/tu">
<table:table-row>
<table:table-row table:style-name="ro1">
<xsl:for-each select="tuv">
<table:table-cell>
<text:p><xsl:value-of select="seg"/></text:p>