Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
perlpunk committed Dec 30, 2023
1 parent 1924924 commit 3a1215d
Show file tree
Hide file tree
Showing 7 changed files with 117 additions and 56 deletions.
8 changes: 4 additions & 4 deletions lib/YAML/PP.pm
Original file line number Diff line number Diff line change
Expand Up @@ -842,10 +842,10 @@ C<preserved_(scalar|mapping|sequence)> L<"METHODS"> below.
my $doc = $ypp->load_string("foo: bar");
my @docs = $ypp->load_string("foo: bar\n---\n- a");
Input should be Unicode characters.
Input should be Unicode characters by default.
Set the C<utf8> option to automatically let YAML::PP do the decoding, or
use C<Encode::decode()>.
Set the C<utf8> option to pass utf8 encoded data and automatically let YAML::PP
do the decoding, or use C<Encode::decode()>.
Note that in scalar context, C<load_string> and C<load_file> return the first
document (like L<YAML::Syck>), while L<YAML> and L<YAML::XS> return the
Expand All @@ -866,7 +866,7 @@ Strings will be loaded as unicode characters.
Input data should be Unicode characters.
Output will return Unicode characters.
Output will return Unicode characters by default.
Set the C<utf8> option to get utf8 encoded data back, or use
C<Encode::encode()>.
Expand Down
15 changes: 13 additions & 2 deletions lib/YAML/PP/Dumper.pm
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,19 @@ sub _emit_node {

sub dump_string {
my ($self, @docs) = @_;
my $writer = YAML::PP::Writer->new(utf8 => $self->{utf8});
$self->emitter->set_writer($writer);
my $e = ref $self->emitter;
no strict 'refs';
if (defined &{$e."::new_writer"}) {
warn __PACKAGE__.':'.__LINE__.": =======================\n";
$self->emitter->new_writer('YAML::PP::Writer' =>
utf8_out => $self->{utf8},
);
}
else {
$self->emitter->set_writer(YAML::PP::Writer->new(
utf8_out => $self->{utf8},
));
}
my $output = $self->dump(@docs);
return $output;
}
Expand Down
8 changes: 8 additions & 0 deletions lib/YAML/PP/Emitter.pm
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,14 @@ sub column { return $_[0]->{column} }
sub set_indent { $_[0]->{indent} = $_[1] }
sub writer { $_[0]->{writer} }
sub set_writer { $_[0]->{writer} = $_[1] }
sub new_writer {
my ($self, $class, %args) = @_;
my $writer = $class->new(
utf8_in => 0,
utf8_out => $args{utf8_out},
);
$self->{writer} = $writer;
}
sub tagmap { return $_[0]->{tagmap} }
sub set_tagmap { $_[0]->{tagmap} = $_[1] }

Expand Down
3 changes: 1 addition & 2 deletions lib/YAML/PP/Loader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,14 @@ sub load_string {
input => $yaml,
utf8_in => $self->{utf8},
);
$self->load();
}
else {
$self->parser->set_reader(YAML::PP::Reader->new(
input => $yaml,
utf8_in => $self->{utf8},
));
$self->load();
}
$self->load();
}

sub load_file {
Expand Down
2 changes: 1 addition & 1 deletion lib/YAML/PP/Reader.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
use strict;
use warnings;
package YAML::PP::Reader;
use Encode;
use Encode qw/ decode encode /;

our $VERSION = '0.000'; # VERSION

Expand Down
34 changes: 27 additions & 7 deletions lib/YAML/PP/Writer.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,56 @@ use Encode;

our $VERSION = '0.000'; # VERSION

sub output { return $_[0]->{output} }
use Devel::Peek;
sub output {
my ($self) = @_;
warn __PACKAGE__.':'.__LINE__.": !!!!!!!!!!!!! $self->{utf8_in} <-> $self->{utf8_out}\n";
my $output = $self->{output};
Dump $output;
return $output if $self->{coded};
if ($self->{utf8_in} and ! $self->{utf8_out}) {
warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! DECODE $output\n";
$output = decode 'UTF-8', $output, Encode::FB_CROAK;
}
elsif (not $self->{utf8_in} and $self->{utf8_out}) {
warn __PACKAGE__.':'.__LINE__.": !!!!!!!!! ENCODE $output\n";
$output = encode 'UTF-8', $output, Encode::FB_CROAK;
}
$self->{output} = $output;
$self->{coded} = 1;
return $output
}
sub set_output { $_[0]->{output} = $_[1] }

sub new {
my ($class, %args) = @_;
my $utf8 = delete $args{utf8};
$utf8 = 0 unless defined $utf8;
my $utf8_in = delete $args{utf8_in};
my $utf8_out = delete $args{utf8_out};
$utf8_in = 0 unless defined $utf8_in;
$utf8_out = 0 unless defined $utf8_out;
my $output = delete $args{output};
$output = '' unless defined $output;
return bless {
utf8 => $utf8,
utf8_in => $utf8_in,
utf8_out => $utf8_out,
output => $output,
}, $class;
}

sub write {
my ($self, $line) = @_;
if ($self->{utf8}) {
$line = encode 'UTF-8', $line, Encode::FB_CROAK;
}
$self->{output} .= $line;
}

sub init {
$_[0]->set_output('');
$_[0]->{coded} = 0;
}

sub finish {
my ($self) = @_;
$_[0]->set_output(undef);
$_[0]->{coded} = 0;
}

1;
Expand Down
103 changes: 63 additions & 40 deletions t/58.utf8.t
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,6 @@ use Data::Dumper;
use YAML::PP;
use Encode;

my $p_utf8 = YAML::PP->new(
header => 0,
utf8 => 1,
);
my $p_perl = YAML::PP->new(
header => 0,
utf8 => 0,
);
my $p_default = YAML::PP->new(header => 0);


my $utf8 = <<'EOM';
[bär]
Expand All @@ -26,45 +16,58 @@ my $perl = decode_utf8 $utf8;
my $bear_utf8 = "bär";
my $bear_perl = decode_utf8 $bear_utf8;

subtest 'load unicode' => sub {
my $data = $p_utf8->load_string($utf8);
is $data->[0], $bear_perl, 'load utf8';
subtest 'YAML::PP' => sub {
my $p_utf8 = YAML::PP->new(
header => 0,
utf8 => 1,
);
my $p_perl = YAML::PP->new(
header => 0,
utf8 => 0,
);
my $p_default = YAML::PP->new(header => 0);


eval {
$data = $p_utf8->load_string($perl);
};
my $err = $@;
like $err, qr{does not map to Unicode}, 'load decoded with utf8 loader fails';
subtest 'load unicode' => sub {
my $data = $p_utf8->load_string($utf8);
is $data->[0], $bear_perl, 'load utf8';

eval {
$data = $p_utf8->load_string($perl);
};
my $err = $@;
like $err, qr{does not map to Unicode}, 'load decoded with utf8 loader fails';

$data = $p_perl->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with perl loader';
$data = $p_perl->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with perl loader';

$data = $p_perl->load_string($utf8);
is $data->[0], $bear_utf8, 'load utf8 with perl loader';
$data = $p_perl->load_string($utf8);
is $data->[0], $bear_utf8, 'load utf8 with perl loader';

$data = $p_default->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with default loader';
$data = $p_default->load_string($perl);
is $data->[0], $bear_perl, 'load decoded with default loader';

$data = $p_default->load_string($utf8);
is $data->[0], $bear_utf8, 'load utf8 with default loader';
};
$data = $p_default->load_string($utf8);
is $data->[0], $bear_utf8, 'load utf8 with default loader';
};

subtest 'dump unicode' => sub {
my $yaml = $p_utf8->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8';
subtest 'dump unicode' => sub {
my $yaml = $p_utf8->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8';

$yaml = $p_utf8->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish';
$yaml = $p_utf8->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish';

$yaml = $p_perl->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl';
$yaml = $p_perl->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl';

$yaml = $p_perl->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
$yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8';
$yaml = $p_perl->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
$yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8';
};
};

my $pplib = eval "use YAML::PP::LibYAML; 1";
Expand Down Expand Up @@ -100,6 +103,26 @@ subtest 'YAML::PP::LibYAML' => sub {
$data = $p_default->load_string($utf8);
is $data->[0], $bear_perl, 'load utf8 with default loader';
};

my $bear_perl = decode_utf8 $bear_utf8;
subtest 'dump unicode' => sub {
diag "############################";
my $yaml = $p_utf8->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_utf8, 'dump perl data with utf8 dumper -> utf8';

$yaml = $p_utf8->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, encode_utf8($bear_utf8), 'dump utf8 data with utf8 dumper -> rubbish';

$yaml = $p_perl->dump_string([$bear_perl]);
$yaml =~ s/^- //; chomp $yaml;
is $yaml, $bear_perl, 'dump perl data with perl dumper -> perl';

$yaml = $p_perl->dump_string([$bear_utf8]);
$yaml =~ s/^- //; chomp $yaml;
$yaml, $bear_utf8, 'dump utf8 data with perl dumper -> utf8';
};
};


Expand Down

0 comments on commit 3a1215d

Please sign in to comment.