diff --git a/lib/YAML/PP.pm b/lib/YAML/PP.pm index baac40c8..1fd72c58 100644 --- a/lib/YAML/PP.pm +++ b/lib/YAML/PP.pm @@ -842,10 +842,10 @@ C L<"METHODS"> below. my $doc = $ypp->load_string("foo: bar"); my @docs = $ypp->load_string("foo: bar\n---\n- a"); -Input should be Unicode characters. +Input should be Unicode characters by default. -Set the C option to automatically let YAML::PP do the decoding, or -use C. +Set the C option to pass utf8 encoded data and automatically let YAML::PP +do the decoding, or use C. Note that in scalar context, C and C return the first document (like L), while L and L return the @@ -866,7 +866,7 @@ Strings will be loaded as unicode characters. Input data should be Unicode characters. -Output will return Unicode characters. +Output will return Unicode characters by default. Set the C option to get utf8 encoded data back, or use C. diff --git a/lib/YAML/PP/Dumper.pm b/lib/YAML/PP/Dumper.pm index 43614a42..7a7b2152 100644 --- a/lib/YAML/PP/Dumper.pm +++ b/lib/YAML/PP/Dumper.pm @@ -227,8 +227,19 @@ sub _emit_node { sub dump_string { my ($self, @docs) = @_; - my $writer = YAML::PP::Writer->new(utf8 => $self->{utf8}); - $self->emitter->set_writer($writer); + my $e = ref $self->emitter; + no strict 'refs'; + if (defined &{$e."::new_writer"}) { + warn __PACKAGE__.':'.__LINE__.": =======================\n"; + $self->emitter->new_writer('YAML::PP::Writer' => + utf8_out => $self->{utf8}, + ); + } + else { + $self->emitter->set_writer(YAML::PP::Writer->new( + utf8_out => $self->{utf8}, + )); + } my $output = $self->dump(@docs); return $output; } diff --git a/lib/YAML/PP/Emitter.pm b/lib/YAML/PP/Emitter.pm index 9352a5e4..e7d0b4a5 100644 --- a/lib/YAML/PP/Emitter.pm +++ b/lib/YAML/PP/Emitter.pm @@ -43,6 +43,14 @@ sub column { return $_[0]->{column} } sub set_indent { $_[0]->{indent} = $_[1] } sub writer { $_[0]->{writer} } sub set_writer { $_[0]->{writer} = $_[1] } +sub new_writer { + my ($self, $class, %args) = @_; + my $writer = $class->new( + utf8_in => 0, + utf8_out => $args{utf8_out}, + ); + $self->{writer} = $writer; +} sub tagmap { return $_[0]->{tagmap} } sub set_tagmap { $_[0]->{tagmap} = $_[1] } diff --git a/lib/YAML/PP/Loader.pm b/lib/YAML/PP/Loader.pm index 84eacc29..955917ce 100644 --- a/lib/YAML/PP/Loader.pm +++ b/lib/YAML/PP/Loader.pm @@ -87,15 +87,14 @@ sub load_string { input => $yaml, utf8_in => $self->{utf8}, ); - $self->load(); } else { $self->parser->set_reader(YAML::PP::Reader->new( input => $yaml, utf8_in => $self->{utf8}, )); - $self->load(); } + $self->load(); } sub load_file { diff --git a/lib/YAML/PP/Reader.pm b/lib/YAML/PP/Reader.pm index da7d2892..bb6a4240 100644 --- a/lib/YAML/PP/Reader.pm +++ b/lib/YAML/PP/Reader.pm @@ -2,7 +2,7 @@ use strict; use warnings; package YAML::PP::Reader; -use Encode; +use Encode qw/ decode encode /; our $VERSION = '0.000'; # VERSION diff --git a/lib/YAML/PP/Writer.pm b/lib/YAML/PP/Writer.pm index 2bc6ed90..8c7584df 100644 --- a/lib/YAML/PP/Writer.pm +++ b/lib/YAML/PP/Writer.pm @@ -6,36 +6,56 @@ use Encode; our $VERSION = '0.000'; # VERSION -sub output { return $_[0]->{output} } +use Devel::Peek; +sub output { + my ($self) = @_; + warn __PACKAGE__.':'.__LINE__.": !!!!!!!!!!!!! $self->{utf8_in} <-> $self->{utf8_out}\n"; + my $output = $self->{output}; + Dump $output; + return $output if $self->{coded}; + if ($self->{utf8_in} and ! $self->{utf8_out}) { + warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! DECODE $output\n"; + $output = decode 'UTF-8', $output, Encode::FB_CROAK; + } + elsif (not $self->{utf8_in} and $self->{utf8_out}) { + warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! ENCODE $output\n"; + $output = encode 'UTF-8', $output, Encode::FB_CROAK; + } + $self->{output} = $output; + $self->{coded} = 1; + return $output +} sub set_output { $_[0]->{output} = $_[1] } sub new { my ($class, %args) = @_; - my $utf8 = delete $args{utf8}; - $utf8 = 0 unless defined $utf8; + my $utf8_in = delete $args{utf8_in}; + my $utf8_out = delete $args{utf8_out}; + $utf8_in = 0 unless defined $utf8_in; + $utf8_out = 0 unless defined $utf8_out; my $output = delete $args{output}; $output = '' unless defined $output; return bless { - utf8 => $utf8, + utf8_in => $utf8_in, + utf8_out => $utf8_out, output => $output, }, $class; } sub write { my ($self, $line) = @_; - if ($self->{utf8}) { - $line = encode 'UTF-8', $line, Encode::FB_CROAK; - } $self->{output} .= $line; } sub init { $_[0]->set_output(''); + $_[0]->{coded} = 0; } sub finish { my ($self) = @_; $_[0]->set_output(undef); + $_[0]->{coded} = 0; } 1; diff --git a/t/58.utf8.t b/t/58.utf8.t index e0ed9d64..09ca94b9 100644 --- a/t/58.utf8.t +++ b/t/58.utf8.t @@ -6,16 +6,6 @@ use Data::Dumper; use YAML::PP; use Encode; -my $p_utf8 = YAML::PP->new( - header => 0, - utf8 => 1, -); -my $p_perl = YAML::PP->new( - header => 0, - utf8 => 0, -); -my $p_default = YAML::PP->new(header => 0); - my $utf8 = <<'EOM'; [bär] @@ -26,45 +16,58 @@ my $perl = decode_utf8 $utf8; my $bear_utf8 = "bär"; my $bear_perl = decode_utf8 $bear_utf8; -subtest 'load unicode' => sub { - my $data = $p_utf8->load_string($utf8); - is $data->[0], $bear_perl, 'load utf8'; +subtest 'YAML::PP' => sub { + my $p_utf8 = YAML::PP->new( + header => 0, + utf8 => 1, + ); + my $p_perl = YAML::PP->new( + header => 0, + utf8 => 0, + ); + my $p_default = YAML::PP->new(header => 0); + - eval { - $data = $p_utf8->load_string($perl); - }; - my $err = $@; - like $err, qr{does not map to Unicode}, 'load decoded with utf8 loader fails'; + subtest 'load unicode' => sub { + my $data = $p_utf8->load_string($utf8); + is $data->[0], $bear_perl, 'load utf8'; + + eval { + $data = $p_utf8->load_string($perl); + }; + my $err = $@; + like $err, qr{does not map to Unicode}, 'load decoded with utf8 loader fails'; - $data = $p_perl->load_string($perl); - is $data->[0], $bear_perl, 'load decoded with perl loader'; + $data = $p_perl->load_string($perl); + is $data->[0], $bear_perl, 'load decoded with perl loader'; - $data = $p_perl->load_string($utf8); - is $data->[0], $bear_utf8, 'load utf8 with perl loader'; + $data = $p_perl->load_string($utf8); + is $data->[0], $bear_utf8, 'load utf8 with perl loader'; - $data = $p_default->load_string($perl); - is $data->[0], $bear_perl, 'load decoded with default loader'; + $data = $p_default->load_string($perl); + is $data->[0], $bear_perl, 'load decoded with default loader'; - $data = $p_default->load_string($utf8); - is $data->[0], $bear_utf8, 'load utf8 with default loader'; -}; + $data = $p_default->load_string($utf8); + is $data->[0], $bear_utf8, 'load utf8 with default loader'; + }; -subtest 'dump unicode' => sub { - my $yaml = $p_utf8->dump_string([$bear_perl]); - $yaml =~ s/^- //; chomp $yaml; - is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8'; + subtest 'dump unicode' => sub { + my $yaml = $p_utf8->dump_string([$bear_perl]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8'; - $yaml = $p_utf8->dump_string([$bear_utf8]); - $yaml =~ s/^- //; chomp $yaml; - is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish'; + $yaml = $p_utf8->dump_string([$bear_utf8]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish'; - $yaml = $p_perl->dump_string([$bear_perl]); - $yaml =~ s/^- //; chomp $yaml; - is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl'; + $yaml = $p_perl->dump_string([$bear_perl]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl'; - $yaml = $p_perl->dump_string([$bear_utf8]); - $yaml =~ s/^- //; chomp $yaml; - $yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8'; + $yaml = $p_perl->dump_string([$bear_utf8]); + $yaml =~ s/^- //; chomp $yaml; + $yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8'; + }; }; my $pplib = eval "use YAML::PP::LibYAML; 1"; @@ -100,6 +103,26 @@ subtest 'YAML::PP::LibYAML' => sub { $data = $p_default->load_string($utf8); is $data->[0], $bear_perl, 'load utf8 with default loader'; }; + + my $bear_perl = decode_utf8 $bear_utf8; + subtest 'dump unicode' => sub { + diag "############################"; + my $yaml = $p_utf8->dump_string([$bear_perl]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8'; + + $yaml = $p_utf8->dump_string([$bear_utf8]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish'; + + $yaml = $p_perl->dump_string([$bear_perl]); + $yaml =~ s/^- //; chomp $yaml; + is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl'; + + $yaml = $p_perl->dump_string([$bear_utf8]); + $yaml =~ s/^- //; chomp $yaml; + $yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8'; + }; };