From c65b360b8ef9cfffbe5a945d6a221eef06bdbe5c Mon Sep 17 00:00:00 2001 From: Christian Walde Date: Fri, 22 Jul 2022 10:34:30 +0200 Subject: [PATCH] proof-of-concept for feature-tracking and perl sub signatures (see #273) --- lib/PPI/Document.pm | 46 +++++++---- lib/PPI/Element.pm | 23 ++++++ lib/PPI/Lexer.pm | 14 +++- lib/PPI/Statement/Include.pm | 43 +++++++++++ lib/PPI/Token.pm | 1 + lib/PPI/Token/Signature.pm | 55 +++++++++++++ lib/PPI/Token/Whitespace.pm | 17 ++++- t/feature_tracking.t | 144 +++++++++++++++++++++++++++++++++++ 8 files changed, 324 insertions(+), 19 deletions(-) create mode 100644 lib/PPI/Token/Signature.pm create mode 100644 t/feature_tracking.t diff --git a/lib/PPI/Document.pm b/lib/PPI/Document.pm index 950c1250..7173ebfd 100644 --- a/lib/PPI/Document.pm +++ b/lib/PPI/Document.pm @@ -128,17 +128,22 @@ In all cases, the document is considered to be "anonymous" and not tied back to where it was created from. Specifically, if you create a PPI::Document from a filename, the document will B remember where it was created from. +Returns a C object, or C if parsing fails. +L objects can also be thrown if there are parsing problems. + The constructor also takes attribute flags. -At this time, the only available attribute is the C flag. +=head3 readonly -Setting C to true will allow various systems to provide -additional optimisations and caching. Note that because C is an -optimisation flag, it is off by default and you will need to explicitly -enable it. +Setting C to true will allow various systems to provide additional +optimisations and caching. Note that because C is an optimisation +flag, it is off by default and you will need to explicitly enable it. -Returns a C object, or C if parsing fails. -L objects can also be thrown if there are parsing problems. +=head3 feature_mods + +Setting feature_mods with a hashref allows defining perl parsing features to be +enabled for the whole document. (e.g. when the code is assumed to be run as a +oneliner) =cut @@ -181,25 +186,25 @@ sub new { my $document = $CACHE->get_document($file_contents); return $class->_setattr( $document, %attr ) if $document; - $document = PPI::Lexer->lex_source( $$file_contents ); + $document = PPI::Lexer->lex_source( $$file_contents, %attr ); if ( $document ) { # Save in the cache $CACHE->store_document( $document ); - return $class->_setattr( $document, %attr ); + return $document; } } else { - my $document = PPI::Lexer->lex_file( $source ); - return $class->_setattr( $document, %attr ) if $document; + my $document = PPI::Lexer->lex_file( $source, %attr ); + return $document if $document; } } elsif ( _SCALAR0($source) ) { - my $document = PPI::Lexer->lex_source( $$source ); - return $class->_setattr( $document, %attr ) if $document; + my $document = PPI::Lexer->lex_source( $$source, %attr ); + return $document if $document; } elsif ( _ARRAY0($source) ) { $source = join '', map { "$_\n" } @$source; - my $document = PPI::Lexer->lex_source( $source ); - return $class->_setattr( $document, %attr ) if $document; + my $document = PPI::Lexer->lex_source( $source, %attr ); + return $document if $document; } else { $class->_error("Unknown object or reference was passed to PPI::Document::new"); @@ -229,6 +234,7 @@ sub _setattr { my ($class, $document, %attr) = @_; $document->{readonly} = !! $attr{readonly}; $document->{filename} = $attr{filename}; + $document->{feature_mods} = $attr{feature_mods}; return $document; } @@ -344,6 +350,16 @@ sub tab_width { $self->{tab_width} = shift; } +=head2 feature_mods { feature_name => $enabled } + +=cut + +sub feature_mods { + my $self = shift; + return $self->{feature_mods} unless @_; + $self->{feature_mods} = shift; +} + =pod =head2 save diff --git a/lib/PPI/Element.pm b/lib/PPI/Element.pm index 2ee74d53..e0f42852 100644 --- a/lib/PPI/Element.pm +++ b/lib/PPI/Element.pm @@ -467,9 +467,32 @@ sub previous_token { } } +=head2 presumed_features +Returns a hash that indicates which features appear to be active for the given +element. +=cut + +sub presumed_features { + my ($self) = @_; + my @feature_mods; + my $walker = $self; + while ($walker) { + my $sib_walk = $walker; + while ($sib_walk) { + push @feature_mods, $sib_walk if $sib_walk->can("feature_mods"); + $sib_walk = $sib_walk->sprevious_sibling; + } + $walker = $walker->parent; + } + + my %feature_mods = map %{$_}, reverse grep defined, map $_->feature_mods, + @feature_mods; + + return \%feature_mods; +} ##################################################################### # Manipulation diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm index 631ffb58..815cd7ce 100644 --- a/lib/PPI/Lexer.pm +++ b/lib/PPI/Lexer.pm @@ -133,6 +133,8 @@ creates a L for the content and lexes the token stream produced by the tokenizer. Basically, a sort of all-in-one method for getting a L object from a file name. +Additional arguments are passed to the tokenizer as a hash. + Returns a L object, or C on error. =cut @@ -143,6 +145,7 @@ sub lex_file { unless ( defined $file ) { return $self->_error("Did not pass a filename to PPI::Lexer::lex_file"); } + my %args = @_; # Create the Tokenizer my $Tokenizer = eval { @@ -154,7 +157,7 @@ sub lex_file { return $self->_error( $errstr ); } - $self->lex_tokenizer( $Tokenizer ); + $self->lex_tokenizer( $Tokenizer, %args ); } =pod @@ -165,6 +168,8 @@ The C method takes a normal scalar string as argument. It creates a L object for the string, and then lexes the resulting token stream. +Additional arguments are passed to the tokenizer as a hash. + Returns a L object, or C on error. =cut @@ -175,6 +180,7 @@ sub lex_source { unless ( defined $source and not ref $source ) { return $self->_error("Did not pass a string to PPI::Lexer::lex_source"); } + my %args = @_; # Create the Tokenizer and hand off to the next method my $Tokenizer = eval { @@ -186,7 +192,7 @@ sub lex_source { return $self->_error( $errstr ); } - $self->lex_tokenizer( $Tokenizer ); + $self->lex_tokenizer( $Tokenizer, %args ); } =pod @@ -196,6 +202,8 @@ sub lex_source { The C takes as argument a L object. It lexes the token stream from the tokenizer into a L object. +Additional arguments are set on the L produced. + Returns a L object, or C on error. =cut @@ -206,9 +214,11 @@ sub lex_tokenizer { return $self->_error( "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer" ) unless $Tokenizer; + my %args = @_; # Create the empty document my $Document = PPI::Document->new; + ref($Document)->_setattr( $Document, %args ) if keys %args; $Tokenizer->_document($Document); # Lex the token stream into the document diff --git a/lib/PPI/Statement/Include.pm b/lib/PPI/Statement/Include.pm index ae8d3120..c1f697a1 100644 --- a/lib/PPI/Statement/Include.pm +++ b/lib/PPI/Statement/Include.pm @@ -45,6 +45,9 @@ L, L and L methods. =cut use strict; + +use version (); + use PPI::Statement (); use PPI::Statement::Include::Perl6 (); @@ -236,6 +239,46 @@ sub arguments { return @args; } +=head2 arguments + +Returns a hashref of features identified as enabled by the include, or undef if +the include does not enable features. + +=cut + +sub feature_mods { + my ($self) = @_; + + my %known = ( signatures => 1 ); + + return if $self->type eq "require"; + + if ( my $perl_version = $self->version ) { + ## crude proof of concept hack due to above + return { signatures => 1 } if version::parse($perl_version) >= 5.035; + + # # tried using feature.pm here, but it is impossible to install + # # future versions of it, so e.g. a 5.20 install cannot know about + # # 5.36 features + # $perl_version = join ".", # + # ( split /\./, $perl_version )[0], + # 0 + ( split /\./, $perl_version )[1]; + # my $bundle = $feature::feature_bundle{$perl_version}; + # return { map +( $_ => 1 ), %{$bundle} }; + } + + if ( $self->module eq "feature" ) { + my @features = grep $known{$_}, + map +( $_->can("literal") || $_->can("string") || die "???" )->($_), + map $_->isa("PPI::Structure::List") ? $_->children : $_, + $self->arguments; + my $on_or_off = $self->type eq "use" ? 1 : 0; + return { map +( $_ => $on_or_off ), @features } if @features; + } + + return; +} + 1; =pod diff --git a/lib/PPI/Token.pm b/lib/PPI/Token.pm index e261fb18..41609116 100644 --- a/lib/PPI/Token.pm +++ b/lib/PPI/Token.pm @@ -70,6 +70,7 @@ use PPI::Token::Separator (); use PPI::Token::Data (); use PPI::Token::End (); use PPI::Token::Prototype (); +use PPI::Token::Signature (); use PPI::Token::Attribute (); use PPI::Token::Unknown (); diff --git a/lib/PPI/Token/Signature.pm b/lib/PPI/Token/Signature.pm new file mode 100644 index 00000000..0ecf8556 --- /dev/null +++ b/lib/PPI/Token/Signature.pm @@ -0,0 +1,55 @@ +package PPI::Token::Signature; + +=pod + +=head1 NAME + +PPI::Token::Signature - A subroutine signature descriptor + +=head1 INHERITANCE + + PPI::Token::Signature + isa PPI::Token::Prototype + isa PPI::Token + isa PPI::Element + +=head1 SYNOPSIS + + TODO: document + +=head1 DESCRIPTION + + TODO: document + +=cut + +use strict; +use PPI::Token::Prototype (); + +our $VERSION = '1.276'; + +our @ISA = "PPI::Token::Prototype"; + +1; + +=pod + +=head1 SUPPORT + +See the L in the main module. + +=head1 AUTHOR + +Adam Kennedy Eadamk@cpan.orgE + +=head1 COPYRIGHT + +Copyright 2001 - 2011 Adam Kennedy. + +This program is free software; you can redistribute +it and/or modify it under the same terms as Perl itself. + +The full text of the license can be found in the +LICENSE file included with this module. + +=cut diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm index 2874b3da..c46f149c 100644 --- a/lib/PPI/Token/Whitespace.pm +++ b/lib/PPI/Token/Whitespace.pm @@ -212,8 +212,21 @@ sub __TOKENIZER__on_char { # 2. The one before that is the word 'sub'. # 3. The one before that is a 'structure' - # Get the three previous significant tokens - my @tokens = $t->_previous_significant_tokens(3); + # Get at least the three previous significant tokens, and extend the + # retrieval range to include at least one token that can walk the + # already generated tree. (i.e. has a parent) + my ( $tokens_to_get, @tokens ) = (3); + while ( !@tokens or ( $tokens[-1] and !$tokens[-1]->parent ) ) { + @tokens = $t->_previous_significant_tokens($tokens_to_get); + last if @tokens < $tokens_to_get; + $tokens_to_get++; + } + + my ($closest_parented_token) = grep $_->parent, @tokens; + die "no parented element found" unless # + $closest_parented_token ||= $t->_document; + return 'Signature' + if $closest_parented_token->presumed_features->{signatures}; # A normal subroutine declaration my $p1 = $tokens[1]; diff --git a/t/feature_tracking.t b/t/feature_tracking.t new file mode 100644 index 00000000..f7261c3e --- /dev/null +++ b/t/feature_tracking.t @@ -0,0 +1,144 @@ +#!/usr/bin/perl + +use lib 't/lib'; +use PPI::Test::pragmas; +use Test::More tests => 3 + ( $ENV{AUTHOR_TESTING} ? 1 : 0 ); + +use B 'perlstring'; + +use PPI (); + +#use DB::Skip subs => [ +# qw( PPI::Document::new PPI::Lexer::lex_source PPI::Lexer::new +# PPI::Lexer::_clear PPI::Lexer::(eval) PPI::Lexer::X_TOKENIZER +# PPI::Tokenizer::new PPI::Lexer::lex_tokenizer PPI::Node::new ), +# qr/^PPI::Tokenizer::__ANON__.*237.*$/ +#]; + +sub test_document; + +FEATURE_TRACKING: { + test_document + <<'END_PERL', + sub meep(&$) {} + use 5.035; + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Prototype' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Include' => 'use 5.035;', + 'PPI::Token::Word' => 'use', + 'PPI::Token::Number::Float' => '5.035', + 'PPI::Token::Structure' => ';', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Signature' => '($left, $right)', # !!!!!!!!!!!!!!!!!!!! + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "enabling of features"; +} + +DOCUMENT_FEATURES: { + test_document + <<'END_PERL', + sub meep(&$) {} + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Signature' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Signature' => '($left, $right)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "document-level default features", + feature_mods => { signatures => 1 }; +} + +DISABLE_FEATURE: { + test_document + <<'END_PERL', + sub meep(&$) {} + no feature 'signatures'; + sub marp($left, $right) {} +END_PERL + [ + 'PPI::Statement::Sub' => 'sub meep(&$) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'meep', + 'PPI::Token::Signature' => '(&$)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + 'PPI::Statement::Include' => q|no feature 'signatures';|, + 'PPI::Token::Word' => 'no', + 'PPI::Token::Word' => 'feature', + 'PPI::Token::Quote::Single' => q|'signatures'|, + 'PPI::Token::Structure' => ';', + 'PPI::Statement::Sub' => 'sub marp($left, $right) {}', + 'PPI::Token::Word' => 'sub', + 'PPI::Token::Word' => 'marp', + 'PPI::Token::Prototype' => '($left, $right)', + 'PPI::Structure::Block' => '{}', + 'PPI::Token::Structure' => '{', + 'PPI::Token::Structure' => '}', + ], + "disabling of features", + feature_mods => { signatures => 1 }; +} + +### TODO from ppi_token_unknown.t , deduplicate + +sub one_line_explain { + my ($data) = @_; + my @explain = explain $data; + s/\n//g for @explain; + return join "", @explain; +} + +sub main_level_line { + return "" if not $TODO; + my @outer_final; + my $level = 0; + while ( my @outer = caller( $level++ ) ) { + @outer_final = @outer; + } + return "l $outer_final[2] - "; +} + +sub test_document { + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ( $code, $expected, $msg, @args ) = @_; + $msg = perlstring $code if !defined $msg; + + my $d = PPI::Document->new( \$code, @args ); + my $tokens = $d->find( sub { $_[1]->significant } ); + $tokens = [ map { ref($_), $_->content } @$tokens ]; + + my $ok = is_deeply( $tokens, $expected, main_level_line . $msg ); + if ( !$ok ) { + diag ">>> $code -- $msg\n"; + diag one_line_explain $tokens; + diag one_line_explain $expected; + } + + return; +}