Skip to content

Commit

Permalink
Add 'utf8' option
Browse files Browse the repository at this point in the history
With that YAML::PP will automatically decode input and encode output
when using load_string and dump_string.

Default is off for backwards compatibility.
  • Loading branch information
perlpunk committed Dec 8, 2023
1 parent 8a2ccd5 commit 6f5c47d
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 11 deletions.
29 changes: 24 additions & 5 deletions lib/YAML/PP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ my %YAML_VERSIONS = ('1.1' => 1, '1.2' => 1);
sub new {
my ($class, %args) = @_;

my $utf8 = delete $args{utf8};
my $bool = delete $args{boolean};
$bool = 'perl' unless defined $bool;
my $schemas = delete $args{schema} || ['+'];
Expand Down Expand Up @@ -63,6 +64,7 @@ sub new {
my $default_schema = $schemas{ $default_yaml_version };

my $loader = YAML::PP::Loader->new(
utf8 => $utf8,
schemas => \%schemas,
cyclic_refs => $cyclic_refs,
parser => $parser,
Expand All @@ -71,6 +73,7 @@ sub new {
duplicate_keys => $duplicate_keys,
);
my $dumper = YAML::PP::Dumper->new(
utf8 => $utf8,
schema => $default_schema,
emitter => $emitter,
header => $header,
Expand Down Expand Up @@ -524,6 +527,7 @@ L<https://perlpunk.github.io/YAML-PP-p5/test-suite.html>
my $ypp = YAML::PP->new( cyclic_refs => 'fatal' );
my $ypp = YAML::PP->new(
utf8 => 0,
boolean => 'JSON::PP',
schema => ['Core'],
cyclic_refs => 'fatal',
Expand All @@ -537,6 +541,21 @@ Options:
=over
=item utf8
Values: 0 or 1 (default: 0)
If true, then loading a string will assume that the string is utf8 encoded.
It will automatically decode it and croak on invalid utf8.
Dumping a string will automatically encode the string into utf8.
If false, you may have to do the decoding/encoding yourself.
For historical reasons the default is false.
Note that this value has no influence on loading from or dumping to a file,
which will always read/write utf8 encoded.
=item boolean
Values: C<perl> (currently default), C<JSON::PP>, C<boolean>, C<perl_experimental>
Expand Down Expand Up @@ -825,8 +844,8 @@ C<preserved_(scalar|mapping|sequence)> L<"METHODS"> below.
Input should be Unicode characters.
So if you read from a file, you should decode it, for example with
C<Encode::decode()>.
Set the C<utf8> option to automatically let YAML::PP do the decoding, or
use C<Encode::decode()>.
Note that in scalar context, C<load_string> and C<load_file> return the first
document (like L<YAML::Syck>), while L<YAML> and L<YAML::XS> return the
Expand All @@ -845,12 +864,12 @@ Strings will be loaded as unicode characters.
my $yaml = $ypp->dump_string($doc1, $doc2);
my $yaml = $ypp->dump_string(@docs);
Input strings should be Unicode characters.
Input data should be Unicode characters.
Output will return Unicode characters.
So if you want to write that to a file (or pass to YAML::XS, for example),
you typically encode it via C<Encode::encode()>.
Set the C<utf8> option to get utf8 encoded data back, or use
C<Encode::encode()>.
=head2 dump_file
Expand Down
6 changes: 5 additions & 1 deletion lib/YAML/PP/Dumper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ use YAML::PP::Common qw/
sub new {
my ($class, %args) = @_;

my $utf8 = delete $args{utf8};
$utf8 = 0 unless defined $utf8;
my $header = delete $args{header};
$header = 1 unless defined $header;
my $footer = delete $args{footer};
Expand All @@ -44,6 +46,7 @@ sub new {
die "Unexpected arguments: " . join ', ', sort keys %args;
}
my $self = bless {
utf8 => $utf8,
representer => YAML::PP::Representer->new(
schema => $schema,
preserve => $preserve,
Expand All @@ -62,6 +65,7 @@ sub new {
sub clone {
my ($self) = @_;
my $clone = {
utf8 => $self->{utf8},
representer => $self->representer->clone,
emitter => $self->emitter->clone,
version_directive => $self->version_directive,
Expand Down Expand Up @@ -223,7 +227,7 @@ sub _emit_node {

sub dump_string {
my ($self, @docs) = @_;
my $writer = YAML::PP::Writer->new;
my $writer = YAML::PP::Writer->new(utf8 => $self->{utf8});
$self->emitter->set_writer($writer);
my $output = $self->dump(@docs);
return $output;
Expand Down
3 changes: 3 additions & 0 deletions lib/YAML/PP/Lexer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ use Carp qw/ croak /;

sub new {
my ($class, %args) = @_;
if (keys %args) {
die "Unexpected arguments: " . join ', ', sort keys %args;
}
my $self = bless {
reader => $args{reader},
}, $class;
Expand Down
6 changes: 5 additions & 1 deletion lib/YAML/PP/Loader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ use YAML::PP::Reader;
sub new {
my ($class, %args) = @_;

my $utf8 = delete $args{utf8};
$utf8 = 0 unless defined $utf8;
my $cyclic_refs = delete $args{cyclic_refs} || 'fatal';
my $default_yaml_version = delete $args{default_yaml_version} || '1.2';
my $preserve = delete $args{preserve};
Expand Down Expand Up @@ -44,6 +46,7 @@ sub new {
die "Unexpected arguments: " . join ', ', sort keys %args;
}
my $self = bless {
utf8 => $utf8,
parser => $parser,
constructor => $constructor,
}, $class;
Expand All @@ -53,6 +56,7 @@ sub new {
sub clone {
my ($self) = @_;
my $clone = {
utf8 => $self->{utf8},
parser => $self->parser->clone,
constructor => $self->constructor->clone,
};
Expand All @@ -75,7 +79,7 @@ sub filename {

sub load_string {
my ($self, $yaml) = @_;
$self->parser->set_reader(YAML::PP::Reader->new( input => $yaml ));
$self->parser->set_reader(YAML::PP::Reader->new( input => $yaml, utf8 => $self->{utf8} ));
$self->load();
}

Expand Down
9 changes: 5 additions & 4 deletions lib/YAML/PP/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ sub new {
my ($class, %args) = @_;
my $reader = delete $args{reader} || YAML::PP::Reader->new;
my $default_yaml_version = delete $args{default_yaml_version};
my $receiver = delete $args{receiver};
if (keys %args) {
die "Unexpected arguments: " . join ', ', sort keys %args;
}
my $self = bless {
default_yaml_version => $default_yaml_version || '1.2',
lexer => YAML::PP::Lexer->new(
reader => $reader,
),
lexer => YAML::PP::Lexer->new(),
}, $class;
my $receiver = delete $args{receiver};
if ($receiver) {
$self->set_receiver($receiver);
}
Expand Down
9 changes: 9 additions & 0 deletions lib/YAML/PP/Reader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use strict;
use warnings;
package YAML::PP::Reader;
use Encode;

our $VERSION = '0.000'; # VERSION

Expand All @@ -11,6 +12,14 @@ sub set_input { $_[0]->{input} = $_[1] }
sub new {
my ($class, %args) = @_;
my $input = delete $args{input};
my $utf8 = delete $args{utf8};
$utf8 = 0 unless defined $utf8;
if (keys %args) {
die "Unexpected arguments: " . join ', ', sort keys %args;
}
if ($utf8) {
$input = decode 'UTF-8', $input, Encode::FB_CROAK;
}
return bless {
input => $input,
}, $class;
Expand Down
7 changes: 7 additions & 0 deletions lib/YAML/PP/Writer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
use strict;
use warnings;
package YAML::PP::Writer;
use Encode;

our $VERSION = '0.000'; # VERSION

Expand All @@ -10,15 +11,21 @@ sub set_output { $_[0]->{output} = $_[1] }

sub new {
my ($class, %args) = @_;
my $utf8 = delete $args{utf8};
$utf8 = 0 unless defined $utf8;
my $output = delete $args{output};
$output = '' unless defined $output;
return bless {
utf8 => $utf8,
output => $output,
}, $class;
}

sub write {
my ($self, $line) = @_;
if ($self->{utf8}) {
$line = encode 'UTF-8', $line, Encode::FB_CROAK;
}
$self->{output} .= $line;
}

Expand Down
70 changes: 70 additions & 0 deletions t/58.utf8.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
#!/usr/bin/env perl
use strict;
use warnings;
use Test::More;
use Data::Dumper;
use YAML::PP;
use Encode;

my $p_utf8 = YAML::PP->new(
header => 0,
utf8 => 1,
);
my $p_perl = YAML::PP->new(
header => 0,
utf8 => 0,
);
my $p_default = YAML::PP->new(header => 0);


my $utf8 = <<'EOM';
[bär]
EOM

my $perl = decode_utf8 $utf8;

my $bear = "bär";
my $bear_perl = decode_utf8 $bear;

subtest 'load unicode' => sub {
my $data = $p_utf8->load_string($utf8);
is $data->[0], $bear_perl, 'load utf8';

eval {
$data = $p_utf8->load_string($perl);
};
my $err = $@;
like $err, qr{does not map to Unicode}, 'load decoded with utf8 loader fails';

$data = $p_perl->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with perl loader';

$data = $p_perl->load_string($utf8);
is $data->[0], $bear, 'load utf8 with perl loader';

$data = $p_default->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with default loader';

$data = $p_default->load_string($utf8);
is $data->[0], 'bär', 'load utf8 with default loader';
};

subtest 'dump unicode' => sub {
my $yaml = $p_utf8->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear, 'dump perl data with utf8 dumper -> utf8';

$yaml = $p_utf8->dump_string([$bear]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, encode_utf8($bear), 'dump utf8 data with utf8 dumper -> rubbish';

$yaml = $p_perl->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl';

$yaml = $p_perl->dump_string([$bear]);
$yaml =~ s/^- //; chomp $yaml;
$yaml, $bear, 'dump utf8 data with perl dumper -> utf8';
};

done_testing;

0 comments on commit 6f5c47d

Please sign in to comment.