feature #18294 [Yaml] dump non UTF-8 encoded strings as binary data (xabbuh)

This PR was merged into the 3.1-dev branch.

Discussion
----------

[Yaml] dump non UTF-8 encoded strings as binary data

| Q             | A
| ------------- | ---
| Branch?       | master
| Bug fix?      | yes
| New feature?  | no
| BC breaks?    | no
| Deprecations? | yes
| Tests pass?   | yes
| Fixed tickets | #18241
| License       | MIT
| Doc PR        |

Commits
-------

86e4a6f dump non UTF-8 encoded strings as binary data
This commit is contained in:
Fabien Potencier 2016-03-30 16:44:34 +02:00
commit 06eb52cb75
4 changed files with 15 additions and 18 deletions

View File

@ -4,14 +4,14 @@ CHANGELOG
3.1.0
-----
* Strings that are not UTF-8 encoded will be dumped as base64 encoded binary
data.
* Added support for dumping multi line strings as literal blocks.
* Added support for parsing base64 encoded binary data when they are tagged
with the `!!binary` tag.
* Added support for dumping binary data as base64 encoded strings by passing
the `Yaml::DUMP_BASE64_BINARY_DATA` flag.
* Added support for parsing timestamps as `\DateTime` objects:
```php

View File

@ -201,7 +201,7 @@ class Inline
return $repr;
case '' == $value:
return "''";
case Yaml::DUMP_BASE64_BINARY_DATA & $flags && self::isBinaryString($value):
case self::isBinaryString($value):
return '!!binary '.base64_encode($value);
case Escaper::requiresDoubleQuoting($value):
return Escaper::escapeWithDoubleQuotes($value);
@ -627,7 +627,7 @@ class Inline
private static function isBinaryString($value)
{
return preg_match('/[^\x09-\x0d\x20-\xff]/', $value);
return !preg_match('//u', $value) || preg_match('/[^\x09-\x0d\x20-\xff]/', $value);
}
/**

View File

@ -277,20 +277,18 @@ EOF;
);
}
public function testBinaryDataIsDumpedAsIsWithoutFlag()
{
$binaryData = file_get_contents(__DIR__.'/Fixtures/arrow.gif');
$expected = "{ data: '".str_replace("'", "''", $binaryData)."' }";
$this->assertSame($expected, $this->dumper->dump(array('data' => $binaryData)));
}
public function testBinaryDataIsDumpedBase64EncodedWithFlag()
public function testBinaryDataIsDumpedBase64Encoded()
{
$binaryData = file_get_contents(__DIR__.'/Fixtures/arrow.gif');
$expected = '{ data: !!binary '.base64_encode($binaryData).' }';
$this->assertSame($expected, $this->dumper->dump(array('data' => $binaryData), 0, 0, Yaml::DUMP_BASE64_BINARY_DATA));
$this->assertSame($expected, $this->dumper->dump(array('data' => $binaryData)));
}
public function testNonUtf8DataIsDumpedBase64Encoded()
{
// "für" (ISO-8859-1 encoded)
$this->assertSame('!!binary ZsM/cg==', $this->dumper->dump("f\xc3\x3fr"));
}
/**

View File

@ -26,9 +26,8 @@ class Yaml
const PARSE_OBJECT_FOR_MAP = 8;
const DUMP_EXCEPTION_ON_INVALID_TYPE = 16;
const PARSE_DATETIME = 32;
const DUMP_BASE64_BINARY_DATA = 64;
const DUMP_OBJECT_AS_MAP = 128;
const DUMP_MULTI_LINE_LITERAL_BLOCK = 256;
const DUMP_OBJECT_AS_MAP = 64;
const DUMP_MULTI_LINE_LITERAL_BLOCK = 128;
/**
* Parses YAML into a PHP value.