From c65b360b8ef9cfffbe5a945d6a221eef06bdbe5c Mon Sep 17 00:00:00 2001
From: Christian Walde <walde.christian@gmail.com>
Date: Fri, 22 Jul 2022 10:34:30 +0200
Subject: [PATCH] proof-of-concept for feature-tracking and perl sub signatures
 (see #273)

---
 lib/PPI/Document.pm          |  46 +++++++----
 lib/PPI/Element.pm           |  23 ++++++
 lib/PPI/Lexer.pm             |  14 +++-
 lib/PPI/Statement/Include.pm |  43 +++++++++++
 lib/PPI/Token.pm             |   1 +
 lib/PPI/Token/Signature.pm   |  55 +++++++++++++
 lib/PPI/Token/Whitespace.pm  |  17 ++++-
 t/feature_tracking.t         | 144 +++++++++++++++++++++++++++++++++++
 8 files changed, 324 insertions(+), 19 deletions(-)
 create mode 100644 lib/PPI/Token/Signature.pm
 create mode 100644 t/feature_tracking.t
diff --git a/lib/PPI/Document.pm b/lib/PPI/Document.pm
index 950c1250..7173ebfd 100644
--- a/lib/PPI/Document.pm
+++ b/lib/PPI/Document.pm
@@ -128,17 +128,22 @@ In all cases, the document is considered to be "anonymous" and not tied back
 to where it was created from. Specifically, if you create a PPI::Document from
 a filename, the document will B<not> remember where it was created from.
 
+Returns a C<PPI::Document> object, or C<undef> if parsing fails.
+L<PPI::Exception> objects can also be thrown if there are parsing problems.
+
 The constructor also takes attribute flags.
 
-At this time, the only available attribute is the C<readonly> flag.
+=head3 readonly
 
-Setting C<readonly> to true will allow various systems to provide
-additional optimisations and caching. Note that because C<readonly> is an
-optimisation flag, it is off by default and you will need to explicitly
-enable it.
+Setting C<readonly> to true will allow various systems to provide additional
+optimisations and caching. Note that because C<readonly> is an optimisation
+flag, it is off by default and you will need to explicitly enable it.
 
-Returns a C<PPI::Document> object, or C<undef> if parsing fails.
-L<PPI::Exception> objects can also be thrown if there are parsing problems.
+=head3 feature_mods
+
+Setting feature_mods with a hashref allows defining perl parsing features to be
+enabled for the whole document. (e.g. when the code is assumed to be run as a
+oneliner)
 
 =cut
 
@@ -181,25 +186,25 @@ sub new {
 			my $document = $CACHE->get_document($file_contents);
 			return $class->_setattr( $document, %attr ) if $document;
 
-			$document = PPI::Lexer->lex_source( $$file_contents );
+			$document = PPI::Lexer->lex_source( $$file_contents, %attr );
 			if ( $document ) {
 				# Save in the cache
 				$CACHE->store_document( $document );
-				return $class->_setattr( $document, %attr );
+				return $document;
 			}
 		} else {
-			my $document = PPI::Lexer->lex_file( $source );
-			return $class->_setattr( $document, %attr ) if $document;
+			my $document = PPI::Lexer->lex_file( $source, %attr );
+			return $document if $document;
 		}
 
 	} elsif ( _SCALAR0($source) ) {
-		my $document = PPI::Lexer->lex_source( $$source );
-		return $class->_setattr( $document, %attr ) if $document;
+		my $document = PPI::Lexer->lex_source( $$source, %attr );
+		return $document if $document;
 
 	} elsif ( _ARRAY0($source) ) {
 		$source = join '', map { "$_\n" } @$source;
-		my $document = PPI::Lexer->lex_source( $source );
-		return $class->_setattr( $document, %attr ) if $document;
+		my $document = PPI::Lexer->lex_source( $source, %attr );
+		return $document if $document;
 
 	} else {
 		$class->_error("Unknown object or reference was passed to PPI::Document::new");
@@ -229,6 +234,7 @@ sub _setattr {
 	my ($class, $document, %attr) = @_;
 	$document->{readonly} = !! $attr{readonly};
 	$document->{filename} = $attr{filename};
+	$document->{feature_mods} = $attr{feature_mods};
 	return $document;
 }
 
@@ -344,6 +350,16 @@ sub tab_width {
 	$self->{tab_width} = shift;
 }
 
+=head2 feature_mods { feature_name => $enabled }
+
+=cut
+
+sub feature_mods {
+	my $self = shift;
+	return $self->{feature_mods} unless @_;
+	$self->{feature_mods} = shift;
+}
+
 =pod
 
 =head2 save
diff --git a/lib/PPI/Element.pm b/lib/PPI/Element.pm
index 2ee74d53..e0f42852 100644
--- a/lib/PPI/Element.pm
+++ b/lib/PPI/Element.pm
@@ -467,9 +467,32 @@ sub previous_token {
 	}
 }
 
+=head2 presumed_features
 
+Returns a hash that indicates which features appear to be active for the given
+element.
 
+=cut
+
+sub presumed_features {
+	my ($self) = @_;
 
+	my @feature_mods;
+	my $walker = $self;
+	while ($walker) {
+		my $sib_walk = $walker;
+		while ($sib_walk) {
+			push @feature_mods, $sib_walk if $sib_walk->can("feature_mods");
+			$sib_walk = $sib_walk->sprevious_sibling;
+		}
+		$walker = $walker->parent;
+	}
+
+	my %feature_mods = map %{$_}, reverse grep defined, map $_->feature_mods,
+	  @feature_mods;
+
+	return \%feature_mods;
+}
 
 #####################################################################
 # Manipulation
diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm
index 631ffb58..815cd7ce 100644
--- a/lib/PPI/Lexer.pm
+++ b/lib/PPI/Lexer.pm
@@ -133,6 +133,8 @@ creates a L<PPI::Tokenizer> for the content and lexes the token stream
 produced by the tokenizer. Basically, a sort of all-in-one method for
 getting a L<PPI::Document> object from a file name.
 
+Additional arguments are passed to the tokenizer as a hash.
+
 Returns a L<PPI::Document> object, or C<undef> on error.
 
 =cut
@@ -143,6 +145,7 @@ sub lex_file {
 	unless ( defined $file ) {
 		return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
 	}
+	my %args = @_;
 
 	# Create the Tokenizer
 	my $Tokenizer = eval {
@@ -154,7 +157,7 @@ sub lex_file {
 		return $self->_error( $errstr );
 	}
 
-	$self->lex_tokenizer( $Tokenizer );
+	$self->lex_tokenizer( $Tokenizer, %args );
 }
 
 =pod
@@ -165,6 +168,8 @@ The C<lex_source> method takes a normal scalar string as argument. It
 creates a L<PPI::Tokenizer> object for the string, and then lexes the
 resulting token stream.
 
+Additional arguments are passed to the tokenizer as a hash.
+
 Returns a L<PPI::Document> object, or C<undef> on error.
 
 =cut
@@ -175,6 +180,7 @@ sub lex_source {
 	unless ( defined $source and not ref $source ) {
 		return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
 	}
+	my %args = @_;
 
 	# Create the Tokenizer and hand off to the next method
 	my $Tokenizer = eval {
@@ -186,7 +192,7 @@ sub lex_source {
 		return $self->_error( $errstr );
 	}
 
-	$self->lex_tokenizer( $Tokenizer );
+	$self->lex_tokenizer( $Tokenizer, %args );
 }
 
 =pod
@@ -196,6 +202,8 @@ sub lex_source {
 The C<lex_tokenizer> takes as argument a L<PPI::Tokenizer> object. It
 lexes the token stream from the tokenizer into a L<PPI::Document> object.
 
+Additional arguments are set on the L<PPI::Document> produced.
+
 Returns a L<PPI::Document> object, or C<undef> on error.
 
 =cut
@@ -206,9 +214,11 @@ sub lex_tokenizer {
 	return $self->_error(
 		"Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
 	) unless $Tokenizer;
+	my %args = @_;
 
 	# Create the empty document
 	my $Document = PPI::Document->new;
+	ref($Document)->_setattr( $Document, %args ) if keys %args;
 	$Tokenizer->_document($Document);
 
 	# Lex the token stream into the document
diff --git a/lib/PPI/Statement/Include.pm b/lib/PPI/Statement/Include.pm
index ae8d3120..c1f697a1 100644
--- a/lib/PPI/Statement/Include.pm
+++ b/lib/PPI/Statement/Include.pm
@@ -45,6 +45,9 @@ L<PPI::Statement>, L<PPI::Node> and L<PPI::Element> methods.
 =cut
 
 use strict;
+
+use version ();
+
 use PPI::Statement                 ();
 use PPI::Statement::Include::Perl6 ();
 
@@ -236,6 +239,46 @@ sub arguments {
 	return @args;
 }
 
+=head2 arguments
+
+Returns a hashref of features identified as enabled by the include, or undef if
+the include does not enable features.
+
+=cut
+
+sub feature_mods {
+	my ($self) = @_;
+
+	my %known = ( signatures => 1 );
+
+	return if $self->type eq "require";
+
+	if ( my $perl_version = $self->version ) {
+		## crude proof of concept hack due to above
+		return { signatures => 1 } if version::parse($perl_version) >= 5.035;
+
+		# # tried using feature.pm here, but it is impossible to install
+		# # future versions of it, so e.g. a 5.20 install cannot know about
+		# # 5.36 features
+		# $perl_version = join ".",    #
+		#   ( split /\./, $perl_version )[0],
+		#   0 + ( split /\./, $perl_version )[1];
+		# my $bundle = $feature::feature_bundle{$perl_version};
+		# return { map +( $_ => 1 ), %{$bundle} };
+	}
+
+	if ( $self->module eq "feature" ) {
+		my @features = grep $known{$_},
+		  map +( $_->can("literal") || $_->can("string") || die "???" )->($_),
+		  map $_->isa("PPI::Structure::List") ? $_->children : $_,
+		  $self->arguments;
+		my $on_or_off = $self->type eq "use" ? 1 : 0;
+		return { map +( $_ => $on_or_off ), @features } if @features;
+	}
+
+	return;
+}
+
 1;
 
 =pod
diff --git a/lib/PPI/Token.pm b/lib/PPI/Token.pm
index e261fb18..41609116 100644
--- a/lib/PPI/Token.pm
+++ b/lib/PPI/Token.pm
@@ -70,6 +70,7 @@ use PPI::Token::Separator             ();
 use PPI::Token::Data                  ();
 use PPI::Token::End                   ();
 use PPI::Token::Prototype             ();
+use PPI::Token::Signature             ();
 use PPI::Token::Attribute             ();
 use PPI::Token::Unknown               ();
 
diff --git a/lib/PPI/Token/Signature.pm b/lib/PPI/Token/Signature.pm
new file mode 100644
index 00000000..0ecf8556
--- /dev/null
+++ b/lib/PPI/Token/Signature.pm
@@ -0,0 +1,55 @@
+package PPI::Token::Signature;
+
+=pod
+
+=head1 NAME
+
+PPI::Token::Signature - A subroutine signature descriptor
+
+=head1 INHERITANCE
+
+  PPI::Token::Signature
+  isa PPI::Token::Prototype
+      isa PPI::Token
+          isa PPI::Element
+
+=head1 SYNOPSIS
+
+  TODO: document
+
+=head1 DESCRIPTION
+
+  TODO: document
+
+=cut
+
+use strict;
+use PPI::Token::Prototype ();
+
+our $VERSION = '1.276';
+
+our @ISA = "PPI::Token::Prototype";
+
+1;
+
+=pod
+
+=head1 SUPPORT
+
+See the L<support section|PPI/SUPPORT> in the main module.
+
+=head1 AUTHOR
+
+Adam Kennedy E<lt>adamk@cpan.orgE<gt>
+
+=head1 COPYRIGHT
+
+Copyright 2001 - 2011 Adam Kennedy.
+
+This program is free software; you can redistribute
+it and/or modify it under the same terms as Perl itself.
+
+The full text of the license can be found in the
+LICENSE file included with this module.
+
+=cut
diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm
index 2874b3da..c46f149c 100644
--- a/lib/PPI/Token/Whitespace.pm
+++ b/lib/PPI/Token/Whitespace.pm
@@ -212,8 +212,21 @@ sub __TOKENIZER__on_char {
 		# 2. The one before that is the word 'sub'.
 		# 3. The one before that is a 'structure'
 
-		# Get the three previous significant tokens
-		my @tokens = $t->_previous_significant_tokens(3);
+		# Get at least the three previous significant tokens, and extend the
+		# retrieval range to include at least one token that can walk the
+		# already generated tree. (i.e. has a parent)
+		my ( $tokens_to_get, @tokens ) = (3);
+		while ( !@tokens or ( $tokens[-1] and !$tokens[-1]->parent ) ) {
+			@tokens = $t->_previous_significant_tokens($tokens_to_get);
+			last if @tokens < $tokens_to_get;
+			$tokens_to_get++;
+		}
+
+		my ($closest_parented_token) = grep $_->parent, @tokens;
+		die "no parented element found" unless    #
+		  $closest_parented_token ||= $t->_document;
+		return 'Signature'
+		  if $closest_parented_token->presumed_features->{signatures};
 
 		# A normal subroutine declaration
 		my $p1 = $tokens[1];
diff --git a/t/feature_tracking.t b/t/feature_tracking.t
new file mode 100644
index 00000000..f7261c3e
--- /dev/null
+++ b/t/feature_tracking.t
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use lib 't/lib';
+use PPI::Test::pragmas;
+use Test::More tests => 3 + ( $ENV{AUTHOR_TESTING} ? 1 : 0 );
+
+use B 'perlstring';
+
+use PPI ();
+
+#use DB::Skip subs => [
+#	qw( PPI::Document::new  PPI::Lexer::lex_source  PPI::Lexer::new
+#	  PPI::Lexer::_clear  PPI::Lexer::(eval)  PPI::Lexer::X_TOKENIZER
+#	  PPI::Tokenizer::new  PPI::Lexer::lex_tokenizer  PPI::Node::new  ),
+#	qr/^PPI::Tokenizer::__ANON__.*237.*$/
+#];
+
+sub test_document;
+
+FEATURE_TRACKING: {
+	test_document
+	  <<'END_PERL',
+		sub meep(&$) {}
+		use 5.035;
+		sub marp($left, $right) {}
+END_PERL
+	  [
+		'PPI::Statement::Sub'       => 'sub meep(&$) {}',
+		'PPI::Token::Word'          => 'sub',
+		'PPI::Token::Word'          => 'meep',
+		'PPI::Token::Prototype'     => '(&$)',
+		'PPI::Structure::Block'     => '{}',
+		'PPI::Token::Structure'     => '{',
+		'PPI::Token::Structure'     => '}',
+		'PPI::Statement::Include'   => 'use 5.035;',
+		'PPI::Token::Word'          => 'use',
+		'PPI::Token::Number::Float' => '5.035',
+		'PPI::Token::Structure'     => ';',
+		'PPI::Statement::Sub'       => 'sub marp($left, $right) {}',
+		'PPI::Token::Word'          => 'sub',
+		'PPI::Token::Word'          => 'marp',
+		'PPI::Token::Signature'     => '($left, $right)', # !!!!!!!!!!!!!!!!!!!!
+		'PPI::Structure::Block'     => '{}',
+		'PPI::Token::Structure'     => '{',
+		'PPI::Token::Structure'     => '}',
+	  ],
+	  "enabling of features";
+}
+
+DOCUMENT_FEATURES: {
+	test_document
+	  <<'END_PERL',
+		sub meep(&$) {}
+		sub marp($left, $right) {}
+END_PERL
+	  [
+		'PPI::Statement::Sub'   => 'sub meep(&$) {}',
+		'PPI::Token::Word'      => 'sub',
+		'PPI::Token::Word'      => 'meep',
+		'PPI::Token::Signature' => '(&$)',
+		'PPI::Structure::Block' => '{}',
+		'PPI::Token::Structure' => '{',
+		'PPI::Token::Structure' => '}',
+		'PPI::Statement::Sub'   => 'sub marp($left, $right) {}',
+		'PPI::Token::Word'      => 'sub',
+		'PPI::Token::Word'      => 'marp',
+		'PPI::Token::Signature' => '($left, $right)',
+		'PPI::Structure::Block' => '{}',
+		'PPI::Token::Structure' => '{',
+		'PPI::Token::Structure' => '}',
+	  ],
+	  "document-level default features",
+	  feature_mods => { signatures => 1 };
+}
+
+DISABLE_FEATURE: {
+	test_document
+	  <<'END_PERL',
+		sub meep(&$) {}
+		no feature 'signatures';
+		sub marp($left, $right) {}
+END_PERL
+	  [
+		'PPI::Statement::Sub'       => 'sub meep(&$) {}',
+		'PPI::Token::Word'          => 'sub',
+		'PPI::Token::Word'          => 'meep',
+		'PPI::Token::Signature'     => '(&$)',
+		'PPI::Structure::Block'     => '{}',
+		'PPI::Token::Structure'     => '{',
+		'PPI::Token::Structure'     => '}',
+		'PPI::Statement::Include'   => q|no feature 'signatures';|,
+		'PPI::Token::Word'          => 'no',
+		'PPI::Token::Word'          => 'feature',
+		'PPI::Token::Quote::Single' => q|'signatures'|,
+		'PPI::Token::Structure'     => ';',
+		'PPI::Statement::Sub'       => 'sub marp($left, $right) {}',
+		'PPI::Token::Word'          => 'sub',
+		'PPI::Token::Word'          => 'marp',
+		'PPI::Token::Prototype'     => '($left, $right)',
+		'PPI::Structure::Block'     => '{}',
+		'PPI::Token::Structure'     => '{',
+		'PPI::Token::Structure'     => '}',
+	  ],
+	  "disabling of features",
+	  feature_mods => { signatures => 1 };
+}
+
+### TODO from ppi_token_unknown.t , deduplicate
+
+sub one_line_explain {
+	my ($data) = @_;
+	my @explain = explain $data;
+	s/\n//g for @explain;
+	return join "", @explain;
+}
+
+sub main_level_line {
+	return "" if not $TODO;
+	my @outer_final;
+	my $level = 0;
+	while ( my @outer = caller( $level++ ) ) {
+		@outer_final = @outer;
+	}
+	return "l $outer_final[2] - ";
+}
+
+sub test_document {
+	local $Test::Builder::Level = $Test::Builder::Level + 1;
+	my ( $code, $expected, $msg, @args ) = @_;
+	$msg = perlstring $code if !defined $msg;
+
+	my $d      = PPI::Document->new( \$code, @args );
+	my $tokens = $d->find( sub { $_[1]->significant } );
+	$tokens = [ map { ref($_), $_->content } @$tokens ];
+
+	my $ok = is_deeply( $tokens, $expected, main_level_line . $msg );
+	if ( !$ok ) {
+		diag ">>> $code -- $msg\n";
+		diag one_line_explain $tokens;
+		diag one_line_explain $expected;
+	}
+
+	return;
+}