feature #29148 Load original file metadata when loading Xliff 1.2 files (eternoendless)

This PR was squashed before being merged into the 4.3-dev branch (closes #29148).

Discussion
----------

Load original file metadata when loading Xliff 1.2 files

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | no
| New feature?  | no
| BC breaks?    | no
| Deprecations? | no
| Tests pass?   | yes
| Fixed tickets | n/a
| License       | MIT
| Doc PR        | n/a

At PrestaShop, we maintain our translations catalog automatically using an internal tool based on our [TranslationToolsBundle](https://github.com/PrestaShop/TranslationToolsBundle), which is capable of reverse building a MessageCatalogue by parsing the source code, and then saving it to Xliff files.

Currently, this tool is only capable of building catalogs from scratch. We are currently moving to an incremental catalog where we only add new wordings, and keep old ones even if they are no longer present in the code (because of B/C). To do that, instead of starting from a clean MessageCatalogue, we load our current catalog using XliffLoader, and use that MessageCatalogue as a base. Easy peasy. But then we found a problem...

The Xliff 1.2 standard defines a list of `<trans-unit>` elements within a collection of `<file>` elements. The `<file>` element has a required attribute named `original`, which is supposed to contain the name of the file where the wordings are used in (at least in our case it does). **This attribute is currently ignored by XliffFileLoader**.

This means that it's currently impossible to read a Xliff 1.2 file using XliffFileloader, and save it back to Xliff without losing data.

This Pull Request adds a new `file` element to the messages' metadata (alongside `notes`, `target-attributes` and `id`). Right now, it only contains `original`, but it could be extended to contain all the other attributes from the `<file>` element if needed.

This required a small change in the loader where we loop through `<file>` elements before fetching their `<trans-unit>` children, instead of fetching all `<trans-unit>` elements at once.

Commits
-------

4073319d0f Load original file metadata when loading Xliff 1.2 files
This commit is contained in:
Fabien Potencier 2019-01-13 17:47:52 +01:00
commit 9e9506ebef
4 changed files with 138 additions and 29 deletions

View File

@ -1,6 +1,11 @@
CHANGELOG
=========
4.3.0
-----
* Improved Xliff 1.2 loader to load the original file's metadata
4.2.0
-----

View File

@ -82,38 +82,50 @@ class XliffFileLoader implements LoaderInterface
$xml = simplexml_import_dom($dom);
$encoding = strtoupper($dom->encoding);
$xml->registerXPathNamespace('xliff', 'urn:oasis:names:tc:xliff:document:1.2');
foreach ($xml->xpath('//xliff:trans-unit') as $translation) {
$attributes = $translation->attributes();
$namespace = 'urn:oasis:names:tc:xliff:document:1.2';
$xml->registerXPathNamespace('xliff', $namespace);
if (!(isset($attributes['resname']) || isset($translation->source))) {
continue;
}
foreach ($xml->xpath('//xliff:file') as $file) {
$fileAttributes = $file->attributes();
$source = isset($attributes['resname']) && $attributes['resname'] ? $attributes['resname'] : $translation->source;
// If the xlf file has another encoding specified, try to convert it because
// simple_xml will always return utf-8 encoded values
$target = $this->utf8ToCharset((string) (isset($translation->target) ? $translation->target : $translation->source), $encoding);
$file->registerXPathNamespace('xliff', $namespace);
$catalogue->set((string) $source, $target, $domain);
foreach ($file->xpath('.//xliff:trans-unit') as $translation) {
$attributes = $translation->attributes();
$metadata = array();
if ($notes = $this->parseNotesMetadata($translation->note, $encoding)) {
$metadata['notes'] = $notes;
}
if (isset($translation->target) && $translation->target->attributes()) {
$metadata['target-attributes'] = array();
foreach ($translation->target->attributes() as $key => $value) {
$metadata['target-attributes'][$key] = (string) $value;
if (!(isset($attributes['resname']) || isset($translation->source))) {
continue;
}
}
if (isset($attributes['id'])) {
$metadata['id'] = (string) $attributes['id'];
}
$source = isset($attributes['resname']) && $attributes['resname'] ? $attributes['resname'] : $translation->source;
// If the xlf file has another encoding specified, try to convert it because
// simple_xml will always return utf-8 encoded values
$target = $this->utf8ToCharset((string) ($translation->target ?? $translation->source), $encoding);
$catalogue->setMetadata((string) $source, $metadata, $domain);
$catalogue->set((string) $source, $target, $domain);
$metadata = array(
'file' => array(
'original' => (string) $fileAttributes['original'],
),
);
if ($notes = $this->parseNotesMetadata($translation->note, $encoding)) {
$metadata['notes'] = $notes;
}
if (isset($translation->target) && $translation->target->attributes()) {
$metadata['target-attributes'] = array();
foreach ($translation->target->attributes() as $key => $value) {
$metadata['target-attributes'][$key] = (string) $value;
}
}
if (isset($attributes['id'])) {
$metadata['id'] = (string) $attributes['id'];
}
$catalogue->setMetadata((string) $source, $metadata, $domain);
}
}
}

View File

@ -84,7 +84,16 @@ class XliffFileLoaderTest extends TestCase
$this->assertEquals(utf8_decode('föö'), $catalogue->get('bar', 'domain1'));
$this->assertEquals(utf8_decode('bär'), $catalogue->get('foo', 'domain1'));
$this->assertEquals(array('notes' => array(array('content' => utf8_decode('bäz'))), 'id' => '1'), $catalogue->getMetadata('foo', 'domain1'));
$this->assertEquals(
array(
'notes' => array(array('content' => utf8_decode('bäz'))),
'id' => '1',
'file' => array(
'original' => 'file.ext',
),
),
$catalogue->getMetadata('foo', 'domain1')
);
}
public function testTargetAttributesAreStoredCorrectly()
@ -164,11 +173,41 @@ class XliffFileLoaderTest extends TestCase
$loader = new XliffFileLoader();
$catalogue = $loader->load(__DIR__.'/../fixtures/withnote.xlf', 'en', 'domain1');
$this->assertEquals(array('notes' => array(array('priority' => 1, 'content' => 'foo')), 'id' => '1'), $catalogue->getMetadata('foo', 'domain1'));
$this->assertEquals(
array(
'notes' => array(array('priority' => 1, 'content' => 'foo')),
'id' => '1',
'file' => array(
'original' => 'file.ext',
),
),
$catalogue->getMetadata('foo', 'domain1')
);
// message without target
$this->assertEquals(array('notes' => array(array('content' => 'bar', 'from' => 'foo')), 'id' => '2'), $catalogue->getMetadata('extra', 'domain1'));
$this->assertEquals(
array(
'notes' => array(array('content' => 'bar', 'from' => 'foo')),
'id' => '2',
'file' => array(
'original' => 'file.ext',
),
),
$catalogue->getMetadata('extra', 'domain1')
);
// message with empty target
$this->assertEquals(array('notes' => array(array('content' => 'baz'), array('priority' => 2, 'from' => 'bar', 'content' => 'qux')), 'id' => '123'), $catalogue->getMetadata('key', 'domain1'));
$this->assertEquals(
array(
'notes' => array(
array('content' => 'baz'),
array('priority' => 2, 'from' => 'bar', 'content' => 'qux'),
),
'id' => '123',
'file' => array(
'original' => 'file.ext',
),
),
$catalogue->getMetadata('key', 'domain1')
);
}
public function testLoadVersion2()
@ -257,4 +296,30 @@ class XliffFileLoaderTest extends TestCase
$this->assertSame('processed', $metadata['notes'][0]['category']);
$this->assertSame('true', $metadata['notes'][0]['content']);
}
public function testLoadWithMultipleFileNodes()
{
$loader = new XliffFileLoader();
$catalogue = $loader->load(__DIR__.'/../fixtures/resources-multi-files.xlf', 'en', 'domain1');
$this->assertEquals(
array(
'id' => '1',
'file' => array(
'original' => 'file.ext',
),
),
$catalogue->getMetadata('foo', 'domain1')
);
$this->assertEquals(
array(
'notes' => array(array('content' => 'note')),
'id' => '4',
'file' => array(
'original' => 'otherfile.ext',
),
),
$catalogue->getMetadata('test', 'domain1')
);
}
}

View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<xliff xmlns="urn:oasis:names:tc:xliff:document:1.2" version="1.2">
<file source-language="en" datatype="plaintext" original="file.ext">
<body>
<trans-unit id="1">
<source>foo</source>
<target>bar</target>
</trans-unit>
</body>
</file>
<file source-language="en" datatype="plaintext" original="otherfile.ext">
<body>
<trans-unit id="2">
<source>extra</source>
</trans-unit>
<trans-unit id="3">
<source>key</source>
<target></target>
</trans-unit>
<trans-unit id="4">
<source>test</source>
<target>with</target>
<note>note</note>
</trans-unit>
</body>
</file>
</xliff>