diff --git a/.gitignore b/.gitignore index 206eb924..61ba0712 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,5 @@ /inc /pm_to_blib /MANIFEST +/nytprof* +/cover_db diff --git a/Changes b/Changes index 12c17f8a..80fb3e42 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,32 @@ Revision history for Perl extension PPI +1.221_01 + Summary: + - support Perl 5.12 "package NAMESPACE VERSION BLOCK" syntax + - incompatible behavior fixes on PPI::Token::Attribute->parameters + + Details: + - support Perl 5.12 "package NAMESPACE VERSION BLOCK" syntax + (RT #67831, GitHub #70) (BDFOY, MOREGAN) + - Prevent package names like 'x' from being parsed as operators + (GitHub #75) (MOREGAN) + - Prevent package names like 'v10' from being parsed as version + strings (GitHub #76) (MOREGAN) + - Prevent sub names like 'v10' from being parsed as version + strings (RT #74527, GitHub #65) (JAE, MOREGAN) + - Prevent 'use' and 'no' package names from being parsed as + operators (MOREGAN) + - Prevent left side of fat comma from parsing as operator + operators (MOREGAN) + - Fix 1.218 regression where packages, subs, and words after + labels like /^x\d+/ would parse as x operator (GitHub #122) + (MOREGAN) + - Fix misparsing of sub attributes separated by whitespace + (GitHub #117) (MOREGAN) + - Make PPI::Token::Attribute->parameters return undef not empty + string when attribute has no parameters (GitHub #116) (MOREGAN) + - Unit tests for PPI::Token::Attribute (GitHub #115) (MOREGAN) + 1.220 Tue 11 Nov 2014 Summary: - incompatible behavior fixes on PPI::Statement::Sub->prototype diff --git a/Makefile.PL b/Makefile.PL index ce565279..2f1a6a8a 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -40,6 +40,7 @@ test_requires 'Test::More' => '0.86'; test_requires 'Test::NoWarnings' => '0.084'; test_requires 'Test::Object' => '0.07'; test_requires 'Test::SubCalls' => '1.07'; +test_requires 'Test::Deep'; # Force the existence of the weaken function # (which some distributions annoyingly don't have) diff --git a/README.md b/README.md index 4bb2c5ee..b1c30a15 100644 --- a/README.md +++ b/README.md @@ -1,793 +1,773 @@ -# NAME - -PPI - Parse, Analyze and Manipulate Perl (without perl) - -# SYNOPSIS - - use PPI; - - # Create a new empty document - my $Document = PPI::Document->new; - - # Create a document from source - $Document = PPI::Document->new(\'print "Hello World!\n"'); - - # Load a Document from a file - $Document = PPI::Document->new('Module.pm'); - - # Does it contain any POD? - if ( $Document->find_any('PPI::Token::Pod') ) { - print "Module contains POD\n"; - } - - # Get the name of the main package - $pkg = $Document->find_first('PPI::Statement::Package')->namespace; - - # Remove all that nasty documentation - $Document->prune('PPI::Token::Pod'); - $Document->prune('PPI::Token::Comment'); - - # Save the file - $Document->save('Module.pm.stripped'); - -# DESCRIPTION - -## About this Document - -This is the PPI manual. It describes its reason for existing, its general -structure, its use, an overview of the API, and provides a few -implementation samples. - -## Background - -The ability to read, and manipulate Perl (the language) programmatically -other than with perl (the application) was one that caused difficulty -for a long time. - -The cause of this problem was Perl's complex and dynamic grammar. -Although there is typically not a huge diversity in the grammar of most -Perl code, certain issues cause large problems when it comes to parsing. - -Indeed, quite early in Perl's history Tom Christiansen introduced the Perl -community to the quote _"Nothing but perl can parse Perl"_, or as it is -more often stated now as a truism: - -**"Only perl can parse Perl"** - -One example of the sorts of things the prevent Perl being easily parsed are -function signatures, as demonstrated by the following. - - @result = (dothis $foo, $bar); - - # Which of the following is it equivalent to? - @result = (dothis($foo), $bar); - @result = dothis($foo, $bar); - -The first line above can be interpreted in two different ways, depending -on whether the `&dothis` function is expecting one argument, or two, -or several. - -A "code parser" (something that parses for the purpose of execution) such -as perl needs information that is not found in the immediate vicinity of -the statement being parsed. - -The information might not just be elsewhere in the file, it might not even be -in the same file at all. It might also not be able to determine this -information without the prior execution of a `BEGIN {}` block, or the -loading and execution of one or more external modules. Or worse the &dothis -function may not even have been written yet. - -**When parsing Perl as code, you must also execute it** - -Even perl itself never really fully understands the structure of the source -code after and indeed **as** it processes it, and in that sense doesn't -"parse" Perl source into anything remotely like a structured document. -This makes it of no real use for any task that needs to treat the source -code as a document, and do so reliably and robustly. - -For more information on why it is impossible to parse perl, see Randal -Schwartz's seminal response to the question of "Why can't you parse Perl". - -[http://www.perlmonks.org/index.pl?node\_id=44722](http://www.perlmonks.org/index.pl?node_id=44722) - -The purpose of PPI is **not** to parse Perl _Code_, but to parse Perl -_Documents_. By treating the problem this way, we are able to parse a -single file containing Perl source code "isolated" from any other -resources, such as libraries upon which the code may depend, and -without needing to run an instance of perl alongside or inside the parser. - -Historically, using an embedded perl parser was widely considered to be -the most likely avenue for finding a solution to `Parse::Perl`. It was -investigated from time to time and attempts have generally failed or -suffered from sufficiently bad corner cases that they were abandoned. - -## What Does PPI Stand For? - -`PPI` is an acronym for the longer original module name -`Parse::Perl::Isolated`. And in the spirit or the silly acronym games -played by certain unnamed Open Source projects you may have _hurd_ of, -it also a reverse backronym of "I Parse Perl". - -Of course, I could just be lying and have just made that second bit up -10 minutes before the release of PPI 1.000. Besides, **all** the cool -Perl packages have TLAs (Three Letter Acronyms). It's a rule or something. - -Why don't you just think of it as the **Perl Parsing Interface** for simplicity. - -The original name was shortened to prevent the author (and you the users) -from contracting RSI by having to type crazy things like -`Parse::Perl::Isolated::Token::QuoteLike::Backtick` 100 times a day. - -In acknowledgment that someone may some day come up with a valid solution -for the grammar problem it was decided at the commencement of the project -to leave the `Parse::Perl` namespace free for any such effort. - -Since that time I've been able to prove to my own satisfaction that it -**is** truly impossible to accurately parse Perl as both code and document -at once. For the academics, parsing Perl suffers from the "Halting Problem". - -With this in mind `Parse::Perl` has now been co-opted as the title for -the SourceForge project that publishes PPI and a large collection of other -applications and modules related to the (document) parsing of Perl source -code. - -You can find this project at [http://sf.net/projects/parseperl](http://sf.net/projects/parseperl), -however we no longer use the SourceForge CVS server. Instead, the -current development version of PPI is available via SVN at -[http://svn.ali.as/cpan/trunk/PPI/](http://svn.ali.as/cpan/trunk/PPI/). - -## Why Parse Perl? - -Once you can accept that we will never be able to parse Perl well enough -to meet the standards of things that treat Perl as code, it is worth -re-examining `why` we want to "parse" Perl at all. - -What are the things that people might want a "Perl parser" for. - -- Documentation - - Analyzing the contents of a Perl document to automatically generate - documentation, in parallel to, or as a replacement for, POD documentation. - - Allow an indexer to locate and process all the comments and - documentation from code for "full text search" applications. - -- Structural and Quality Analysis - - Determine quality or other metrics across a body of code, and identify - situations relating to particular phrases, techniques or locations. - - Index functions, variables and packages within Perl code, and doing search - and graph (in the node/edge sense) analysis of large code bases. - -- Refactoring - - Make structural, syntax, or other changes to code in an automated manner, - either independently or in assistance to an editor. This sort of task list - includes backporting, forward porting, partial evaluation, "improving" code, - or whatever. All the sort of things you'd want from a [Perl::Editor](https://metacpan.org/pod/Perl::Editor). - -- Layout - - Change the layout of code without changing its meaning. This includes - techniques such as tidying (like [perltidy](https://metacpan.org/pod/perltidy)), obfuscation, compressing and - "squishing", or to implement formatting preferences or policies. - -- Presentation - - This includes methods of improving the presentation of code, without changing - the content of the code. Modify, improve, syntax colour etc the presentation - of a Perl document. Generating "IntelliText"-like functions. - -If we treat this as a baseline for the sort of things we are going to have -to build on top of Perl, then it becomes possible to identify a standard -for how good a Perl parser needs to be. - -## How good is Good Enough(TM) - -PPI seeks to be good enough to achieve all of the above tasks, or to provide -a sufficiently good API on which to allow others to implement modules in -these and related areas. - -However, there are going to be limits to this process. Because PPI cannot -adapt to changing grammars, any code written using source filters should not -be assumed to be parsable. - -At one extreme, this includes anything munged by [Acme::Bleach](https://metacpan.org/pod/Acme::Bleach), as well -as (arguably) more common cases like [Switch](https://metacpan.org/pod/Switch). We do not pretend to be -able to always parse code using these modules, although as long as it still -follows a format that looks like Perl syntax, it may be possible to extend -the lexer to handle them. - -The ability to extend PPI to handle lexical additions to the language is on -the drawing board to be done some time post-1.0 - -The goal for success was originally to be able to successfully parse 99% of -all Perl documents contained in CPAN. This means the entire file in each -case. - -PPI has succeeded in this goal far beyond the expectations of even the -author. At time of writing there are only 28 non-Acme Perl modules in CPAN -that PPI is incapable of parsing. Most of these are so badly broken they -do not compile as Perl code anyway. - -So unless you are actively going out of your way to break PPI, you should -expect that it will handle your code just fine. - -## Internationalisation - -PPI provides partial support for internationalisation and localisation. - -Specifically, it allows the use characters from the Latin-1 character -set to be used in quotes, comments, and POD. Primarily, this covers -languages from Europe and South America. - -PPI does **not** currently provide support for Unicode, although there -is an initial implementation available in a development branch from -CVS. - -If you need Unicode support, and would like to help stress test the -Unicode support so we can move it to the main branch and enable it -in the main release should contact the author. (contact details below) - -## Round Trip Safe - -When PPI parses a file it builds **everything** into the model, including -whitespace. This is needed in order to make the Document fully "Round Trip" -safe. - -The general concept behind a "Round Trip" parser is that it knows what it -is parsing is somewhat uncertain, and so **expects** to get things wrong -from time to time. In the cases where it parses code wrongly the tree -will serialize back out to the same string of code that was read in, -repairing the parser's mistake as it heads back out to the file. - -The end result is that if you parse in a file and serialize it back out -without changing the tree, you are guaranteed to get the same file you -started with. PPI does this correctly and reliably for 100% of all known -cases. - -**What goes in, will come out. Every time.** - -The one minor exception at this time is that if the newlines for your file -are wrong (meaning not matching the platform newline format), PPI will -localise them for you. (It isn't to be convenient, supporting -arbitrary newlines would make some of the code more complicated) - -Better control of the newline type is on the wish list though, and -anyone wanting to help out is encouraged to contact the author. - -# IMPLEMENTATION - -## General Layout - -PPI is built upon two primary "parsing" components, [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) -and [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer), and a large tree of about 50 classes which implement -the various the _Perl Document Object Model_ (PDOM). - -The PDOM is conceptually similar in style and intent to the regular DOM or -other code Abstract Syntax Trees (ASTs), but contains some differences -to handle perl-specific cases, and to assist in treating the code as a -document. Please note that it is **not** an implementation of the official -Document Object Model specification, only somewhat similar to it. - -On top of the Tokenizer, Lexer and the classes of the PDOM, sit a number -of classes intended to make life a little easier when dealing with PDOM -trees. - -Both the major parsing components were hand-coded from scratch with only -plain Perl code and a few small utility modules. There are no grammar or -patterns mini-languages, no YACC or LEX style tools and only a small number -of regular expressions. - -This is primarily because of the sheer volume of accumulated cruft that -exists in Perl. Not even perl itself is capable of parsing Perl documents -(remember, it just parses and executes it as code). - -As a result, PPI needed to be cruftier than perl itself. Feel free to -shudder at this point, and hope you never have to understand the Tokenizer -codebase. Speaking of which... - -## The Tokenizer - -The Tokenizer takes source code and converts it into a series of tokens. It -does this using a slow but thorough character by character manual process, -rather than using a pattern system or complex regexes. - -Or at least it does so conceptually. If you were to actually trace the code -you would find it's not truly character by character due to a number of -regexps and optimisations throughout the code. This lets the Tokenizer -"skip ahead" when it can find shortcuts, so it tends to jump around a line -a bit wildly at times. - -In practice, the number of times the Tokenizer will **actually** move the -character cursor itself is only about 5% - 10% higher than the number of -tokens contained in the file. This makes it about as optimal as it can be -made without implementing it in something other than Perl. - -In 2001 when PPI was started, this structure made PPI quite slow, and not -really suitable for interactive tasks. This situation has improved greatly -with multi-gigahertz processors, but can still be painful when working with -very large files. - -The target parsing rate for PPI is about 5000 lines per gigacycle. It is -currently believed to be at about 1500, and main avenue for making it to -the target speed has now become [PPI::XS](https://metacpan.org/pod/PPI::XS), a drop-in XS accelerator for -PPI. - -Since [PPI::XS](https://metacpan.org/pod/PPI::XS) has only just gotten off the ground and is currently only -at proof-of-concept stage, this may take a little while. Anyone interested -in helping out with [PPI::XS](https://metacpan.org/pod/PPI::XS) is **highly** encouraged to contact the -author. In fact, the design of [PPI::XS](https://metacpan.org/pod/PPI::XS) means it's possible to port -one function at a time safely and reliably. So every little bit will help. - -## The Lexer - -The Lexer takes a token stream, and converts it to a lexical tree. Because -we are parsing Perl **documents** this includes whitespace, comments, and -all number of weird things that have no relevance when code is actually -executed. - -An instantiated [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer) consumes [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) objects and -produces [PPI::Document](https://metacpan.org/pod/PPI::Document) objects. However you should probably never be -working with the Lexer directly. You should just be able to create -[PPI::Document](https://metacpan.org/pod/PPI::Document) objects and work with them directly. - -## The Perl Document Object Model - -The PDOM is a structured collection of data classes that together provide -a correct and scalable model for documents that follow the standard Perl -syntax. - -## The PDOM Class Tree - -The following lists all of the 67 current PDOM classes, listing with indentation -based on inheritance. - - PPI::Element - PPI::Node - PPI::Document - PPI::Document::Fragment - PPI::Statement - PPI::Statement::Package - PPI::Statement::Include - PPI::Statement::Sub - PPI::Statement::Scheduled - PPI::Statement::Compound - PPI::Statement::Break - PPI::Statement::Given - PPI::Statement::When - PPI::Statement::Data - PPI::Statement::End - PPI::Statement::Expression - PPI::Statement::Variable - PPI::Statement::Null - PPI::Statement::UnmatchedBrace - PPI::Statement::Unknown - PPI::Structure - PPI::Structure::Block - PPI::Structure::Subscript - PPI::Structure::Constructor - PPI::Structure::Condition - PPI::Structure::List - PPI::Structure::For - PPI::Structure::Given - PPI::Structure::When - PPI::Structure::Unknown - PPI::Token - PPI::Token::Whitespace - PPI::Token::Comment - PPI::Token::Pod - PPI::Token::Number - PPI::Token::Number::Binary - PPI::Token::Number::Octal - PPI::Token::Number::Hex - PPI::Token::Number::Float - PPI::Token::Number::Exp - PPI::Token::Number::Version - PPI::Token::Word - PPI::Token::DashedWord - PPI::Token::Symbol - PPI::Token::Magic - PPI::Token::ArrayIndex - PPI::Token::Operator - PPI::Token::Quote - PPI::Token::Quote::Single - PPI::Token::Quote::Double - PPI::Token::Quote::Literal - PPI::Token::Quote::Interpolate - PPI::Token::QuoteLike - PPI::Token::QuoteLike::Backtick - PPI::Token::QuoteLike::Command - PPI::Token::QuoteLike::Regexp - PPI::Token::QuoteLike::Words - PPI::Token::QuoteLike::Readline - PPI::Token::Regexp - PPI::Token::Regexp::Match - PPI::Token::Regexp::Substitute - PPI::Token::Regexp::Transliterate - PPI::Token::HereDoc - PPI::Token::Cast - PPI::Token::Structure - PPI::Token::Label - PPI::Token::Separator - PPI::Token::Data - PPI::Token::End - PPI::Token::Prototype - PPI::Token::Attribute - PPI::Token::Unknown - -To summarize the above layout, all PDOM objects inherit from the -[PPI::Element](https://metacpan.org/pod/PPI::Element) class. - -Under this are [PPI::Token](https://metacpan.org/pod/PPI::Token), strings of content with a known type, -and [PPI::Node](https://metacpan.org/pod/PPI::Node), syntactically significant containers that hold other -Elements. - -The three most important of these are the [PPI::Document](https://metacpan.org/pod/PPI::Document), the -[PPI::Statement](https://metacpan.org/pod/PPI::Statement) and the [PPI::Structure](https://metacpan.org/pod/PPI::Structure) classes. - -## The Document, Statement and Structure - -At the top of all complete PDOM trees is a [PPI::Document](https://metacpan.org/pod/PPI::Document) object. It -represents a complete file of Perl source code as you might find it on -disk. - -There are some specialised types of document, such as [PPI::Document::File](https://metacpan.org/pod/PPI::Document::File) -and [PPI::Document::Normalized](https://metacpan.org/pod/PPI::Document::Normalized) but for the purposes of the PDOM they are -all just considered to be the same thing. - -Each Document will contain a number of **Statements**, **Structures** and -**Tokens**. - -A [PPI::Statement](https://metacpan.org/pod/PPI::Statement) is any series of Tokens and Structures that are treated -as a single contiguous statement by perl itself. You should note that a -Statement is as close as PPI can get to "parsing" the code in the sense that -perl-itself parses Perl code when it is building the op-tree. - -Because of the isolation and Perl's syntax, it is provably impossible for -PPI to accurately determine precedence of operators or which tokens are -implicit arguments to a sub call. - -So rather than lead you on with a bad guess that has a strong chance of -being wrong, PPI does not attempt to determine precedence or sub parameters -at all. - -At a fundamental level, it only knows that this series of elements -represents a single Statement as perl sees it, but it can do so with -enough certainty that it can be trusted. - -However, for specific Statement types the PDOM is able to derive additional -useful information about their meaning. For the best, most useful, and most -heavily used example, see [PPI::Statement::Include](https://metacpan.org/pod/PPI::Statement::Include). - -A [PPI::Structure](https://metacpan.org/pod/PPI::Structure) is any series of tokens contained within matching braces. -This includes code blocks, conditions, function argument braces, anonymous -array and hash constructors, lists, scoping braces and all other syntactic -structures represented by a matching pair of braces, including (although it -may not seem obvious at first) `` braces. - -Each Structure contains none, one, or many Tokens and Structures (the rules -for which vary for the different Structure subclasses) - -Under the PDOM structure rules, a Statement can **never** directly contain -another child Statement, a Structure can **never** directly contain another -child Structure, and a Document can **never** contain another Document -anywhere in the tree. - -Aside from these three rules, the PDOM tree is extremely flexible. - -## The PDOM at Work - -To demonstrate the PDOM in use lets start with an example showing how the -tree might look for the following chunk of simple Perl code. - - #!/usr/bin/perl - - print( "Hello World!" ); - - exit(); - -Translated into a PDOM tree it would have the following structure (as shown -via the included [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper)). - - PPI::Document - PPI::Token::Comment '#!/usr/bin/perl\n' - PPI::Token::Whitespace '\n' - PPI::Statement - PPI::Token::Word 'print' - PPI::Structure::List ( ... ) - PPI::Token::Whitespace ' ' - PPI::Statement::Expression - PPI::Token::Quote::Double '"Hello World!"' - PPI::Token::Whitespace ' ' - PPI::Token::Structure ';' - PPI::Token::Whitespace '\n' - PPI::Token::Whitespace '\n' - PPI::Statement - PPI::Token::Word 'exit' - PPI::Structure::List ( ... ) - PPI::Token::Structure ';' - PPI::Token::Whitespace '\n' - -Please note that in this example, strings are only listed for the -**actual** [PPI::Token](https://metacpan.org/pod/PPI::Token) that contains that string. Structures are listed -with the type of brace characters it represents noted. - -The [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper) module can be used to generate similar trees yourself. - -We can make that PDOM dump a little easier to read if we strip out all the -whitespace. Here it is again, sans the distracting whitespace tokens. - - PPI::Document - PPI::Token::Comment '#!/usr/bin/perl\n' - PPI::Statement - PPI::Token::Word 'print' - PPI::Structure::List ( ... ) - PPI::Statement::Expression - PPI::Token::Quote::Double '"Hello World!"' - PPI::Token::Structure ';' - PPI::Statement - PPI::Token::Word 'exit' - PPI::Structure::List ( ... ) - PPI::Token::Structure ';' - -As you can see, the tree can get fairly deep at time, especially when every -isolated token in a bracket becomes its own statement. This is needed to -allow anything inside the tree the ability to grow. It also makes the -search and analysis algorithms much more flexible. - -Because of the depth and complexity of PDOM trees, a vast number of very easy -to use methods have been added wherever possible to help people working with -PDOM trees do normal tasks relatively quickly and efficiently. - -## Overview of the Primary Classes - -The main PPI classes, and links to their own documentation, are listed -here in alphabetical order. - -- [PPI::Document](https://metacpan.org/pod/PPI::Document) - - The Document object, the root of the PDOM. - -- [PPI::Document::Fragment](https://metacpan.org/pod/PPI::Document::Fragment) - - A cohesive fragment of a larger Document. Although not of any real current - use, it is needed for use in certain internal tree manipulation - algorithms. - - For example, doing things like cut/copy/paste etc. Very similar to a - [PPI::Document](https://metacpan.org/pod/PPI::Document), but has some additional methods and does not represent - a lexical scope boundary. - - A document fragment is also non-serializable, and so cannot be written out - to a file. - -- [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper) - - A simple class for dumping readable debugging versions of PDOM structures, - such as in the demonstration above. - -- [PPI::Element](https://metacpan.org/pod/PPI::Element) - - The Element class is the abstract base class for all objects within the PDOM - -- [PPI::Find](https://metacpan.org/pod/PPI::Find) - - Implements an instantiable object form of a PDOM tree search. - -- [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer) - - The PPI Lexer. Converts Token streams into PDOM trees. - -- [PPI::Node](https://metacpan.org/pod/PPI::Node) - - The Node object, the abstract base class for all PDOM objects that can - contain other Elements, such as the Document, Statement and Structure - objects. - -- [PPI::Statement](https://metacpan.org/pod/PPI::Statement) - - The base class for all Perl statements. Generic "evaluate for side-effects" - statements are of this actual type. Other more interesting statement types - belong to one of its children. - - See it's own documentation for a longer description and list of all of the - different statement types and sub-classes. - -- [PPI::Structure](https://metacpan.org/pod/PPI::Structure) - - The abstract base class for all structures. A Structure is a language - construct consisting of matching braces containing a set of other elements. - - See the [PPI::Structure](https://metacpan.org/pod/PPI::Structure) documentation for a description and - list of all of the different structure types and sub-classes. - -- [PPI::Token](https://metacpan.org/pod/PPI::Token) - - A token is the basic unit of content. At its most basic, a Token is just - a string tagged with metadata (its class, and some additional flags in - some cases). - -- [PPI::Token::\_QuoteEngine](https://metacpan.org/pod/PPI::Token::_QuoteEngine) - - The [PPI::Token::Quote](https://metacpan.org/pod/PPI::Token::Quote) and [PPI::Token::QuoteLike](https://metacpan.org/pod/PPI::Token::QuoteLike) classes provide - abstract base classes for the many and varied types of quote and - quote-like things in Perl. However, much of the actual quote login is - implemented in a separate quote engine, based at - [PPI::Token::\_QuoteEngine](https://metacpan.org/pod/PPI::Token::_QuoteEngine). - - Classes that inherit from [PPI::Token::Quote](https://metacpan.org/pod/PPI::Token::Quote), [PPI::Token::QuoteLike](https://metacpan.org/pod/PPI::Token::QuoteLike) - and [PPI::Token::Regexp](https://metacpan.org/pod/PPI::Token::Regexp) are generally parsed only by the Quote Engine. - -- [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) - - The PPI Tokenizer. One Tokenizer consumes a chunk of text and provides - access to a stream of [PPI::Token](https://metacpan.org/pod/PPI::Token) objects. - - The Tokenizer is very very complicated, to the point where even the author - treads carefully when working with it. - - Most of the complication is the result of optimizations which have tripled - the tokenization speed, at the expense of maintainability. We cope with the - spaghetti by heavily commenting everything. - -- [PPI::Transform](https://metacpan.org/pod/PPI::Transform) - - The Perl Document Transformation API. Provides a standard interface and - abstract base class for objects and classes that manipulate Documents. - -# INSTALLING - -The core PPI distribution is pure Perl and has been kept as tight as -possible and with as few dependencies as possible. - -It should download and install normally on any platform from within -the CPAN and CPANPLUS applications, or directly using the distribution -tarball. If installing by hand, you may need to install a few small -utility modules first. The exact ones will depend on your version of -perl. - -There are no special install instructions for PPI, and the normal -`Perl Makefile.PL`, `make`, `make test`, `make install` instructions -apply. - -# EXTENDING - -The PPI namespace itself is reserved for the sole use of the modules under -the umbrella of the `Parse::Perl` SourceForge project. - -[http://sf.net/projects/parseperl](http://sf.net/projects/parseperl) - -You are recommended to use the PPIx:: namespace for PPI-specific -modifications or prototypes thereof, or Perl:: for modules which provide -a general Perl language-related functions. - -If what you wish to implement looks like it fits into PPIx:: namespace, -you should consider contacting the `Parse::Perl` mailing list (detailed on -the SourceForge site) first, as what you want may already be in progress, -or you may wish to consider joining the team and doing it within the -`Parse::Perl` project itself. - -# TO DO - -\- Many more analysis and utility methods for PDOM classes - -\- Creation of a PPI::Tutorial document - -\- Add many more key functions to PPI::XS - -\- We can **always** write more and better unit tests - -\- Complete the full implementation of ->literal (1.200) - -\- Full understanding of scoping (due 1.300) - -# SUPPORT - -This module is stored in an Open Repository at the following address. - -[http://svn.ali.as/cpan/trunk/PPI](http://svn.ali.as/cpan/trunk/PPI) - -Write access to the repository is made available automatically to any -published CPAN author, and to most other volunteers on request. - -If you are able to submit your bug report in the form of new (failing) -unit tests, or can apply your fix directly instead of submitting a patch, -you are **strongly** encouraged to do so, as the author currently maintains -over 100 modules and it can take some time to deal with non-"Critical" bug -reports or patches. - -This will also guarantee that your issue will be addressed in the next -release of the module. - -For large changes though, please consider creating a branch so that they -can be properly reviewed and trialed before being applied to the trunk. - -If you cannot provide a direct test or fix, or don't have time to do so, -then regular bug reports are still accepted and appreciated via the -GitHub bug tracker. - -[https://github.com/adamkennedy/PPI/issues](https://github.com/adamkennedy/PPI/issues) - -For other issues or questions, contact the `Parse::Perl` project mailing -list. - -For commercial or media-related enquiries, or to have your SVN commit bit -enabled, contact the author. - -# AUTHOR - -Adam Kennedy - -# ACKNOWLEDGMENTS - -A huge thank you to Phase N Australia ([http://phase-n.com/](http://phase-n.com/)) for -permitting the original open sourcing and release of this distribution -from what was originally several thousand hours of commercial work. - -Another big thank you to The Perl Foundation -([http://www.perlfoundation.org/](http://www.perlfoundation.org/)) for funding for the final big -refactoring and completion run. - -Also, to the various co-maintainers that have contributed both large and -small with tests and patches and especially to those rare few who have -deep-dived into the guts to (gasp) add a feature. - - - Dan Brook : PPIx::XPath, Acme::PerlML - - Audrey Tang : "Line Noise" Testing - - Arjen Laarhoven : Three-element ->location support - - Elliot Shank : Perl 5.10 support, five-element ->location - -And finally, thanks to those brave ( and foolish :) ) souls willing to dive -in and use, test drive and provide feedback on PPI before version 1.000, -in some cases before it made it to beta quality, and still did extremely -distasteful things (like eating 50 meg of RAM a second). - -I owe you all a beer. Corner me somewhere and collect at your convenience. -If I missed someone who wasn't in my email history, thank you too :) - - # In approximate order of appearance - - Claes Jacobsson - - Michael Schwern - - Jeff T. Parsons - - CPAN Author "CHOCOLATEBOY" - - Robert Rotherberg - - CPAN Author "PODMASTER" - - Richard Soderberg - - Nadim ibn Hamouda el Khemir - - Graciliano M. P. - - Leon Brocard - - Jody Belka - - Curtis Ovid - - Yuval Kogman - - Michael Schilli - - Slaven Rezic - - Lars Thegler - - Tony Stubblebine - - Tatsuhiko Miyagawa - - CPAN Author "CHROMATIC" - - Matisse Enzer - - Roy Fulbright - - Dan Brook - - Johnny Lee - - Johan Lindstrom - -And to single one person out, thanks go to Randal Schwartz who -spent a great number of hours in IRC over a critical 6 month period -explaining why Perl is impossibly unparsable and constantly shoving evil -and ugly corner cases in my face. He remained a tireless devil's advocate, -and without his support this project genuinely could never have been -completed. - -So for my schooling in the Deep Magiks, you have my deepest gratitude Randal. - -# COPYRIGHT - -Copyright 2001 - 2011 Adam Kennedy. - -This program is free software; you can redistribute -it and/or modify it under the same terms as Perl itself. - -The full text of the license can be found in the -LICENSE file included with this module. +# NAME + +PPI - Parse, Analyze and Manipulate Perl (without perl) + +# SYNOPSIS + + use PPI; + + # Create a new empty document + my $Document = PPI::Document->new; + + # Create a document from source + $Document = PPI::Document->new(\'print "Hello World!\n"'); + + # Load a Document from a file + $Document = PPI::Document->new('Module.pm'); + + # Does it contain any POD? + if ( $Document->find_any('PPI::Token::Pod') ) { + print "Module contains POD\n"; + } + + # Get the name of the main package + $pkg = $Document->find_first('PPI::Statement::Package')->namespace; + + # Remove all that nasty documentation + $Document->prune('PPI::Token::Pod'); + $Document->prune('PPI::Token::Comment'); + + # Save the file + $Document->save('Module.pm.stripped'); + +# DESCRIPTION + +## About this Document + +This is the PPI manual. It describes its reason for existing, its general +structure, its use, an overview of the API, and provides a few +implementation samples. + +## Background + +The ability to read, and manipulate Perl (the language) programmatically +other than with perl (the application) was one that caused difficulty +for a long time. + +The cause of this problem was Perl's complex and dynamic grammar. +Although there is typically not a huge diversity in the grammar of most +Perl code, certain issues cause large problems when it comes to parsing. + +Indeed, quite early in Perl's history Tom Christiansen introduced the Perl +community to the quote _"Nothing but perl can parse Perl"_, or as it is +more often stated now as a truism: + +**"Only perl can parse Perl"** + +One example of the sorts of things the prevent Perl being easily parsed are +function signatures, as demonstrated by the following. + + @result = (dothis $foo, $bar); + + # Which of the following is it equivalent to? + @result = (dothis($foo), $bar); + @result = dothis($foo, $bar); + +The first line above can be interpreted in two different ways, depending +on whether the `&dothis` function is expecting one argument, or two, +or several. + +A "code parser" (something that parses for the purpose of execution) such +as perl needs information that is not found in the immediate vicinity of +the statement being parsed. + +The information might not just be elsewhere in the file, it might not even be +in the same file at all. It might also not be able to determine this +information without the prior execution of a `BEGIN {}` block, or the +loading and execution of one or more external modules. Or worse the &dothis +function may not even have been written yet. + +**When parsing Perl as code, you must also execute it** + +Even perl itself never really fully understands the structure of the source +code after and indeed **as** it processes it, and in that sense doesn't +"parse" Perl source into anything remotely like a structured document. +This makes it of no real use for any task that needs to treat the source +code as a document, and do so reliably and robustly. + +For more information on why it is impossible to parse perl, see Randal +Schwartz's seminal response to the question of "Why can't you parse Perl". + +[http://www.perlmonks.org/index.pl?node\_id=44722](http://www.perlmonks.org/index.pl?node_id=44722) + +The purpose of PPI is **not** to parse Perl _Code_, but to parse Perl +_Documents_. By treating the problem this way, we are able to parse a +single file containing Perl source code "isolated" from any other +resources, such as libraries upon which the code may depend, and +without needing to run an instance of perl alongside or inside the parser. + +Historically, using an embedded perl parser was widely considered to be +the most likely avenue for finding a solution to parsing Perl. It has been +investigated from time to time, but attempts have generally failed or +suffered from sufficiently bad corner cases that they were abandoned. + +## What Does PPI Stand For? + +`PPI` is an acronym for the longer original module name +`Parse::Perl::Isolated`. And in the spirit or the silly acronym games +played by certain unnamed Open Source projects you may have _hurd_ of, +it also a reverse backronym of "I Parse Perl". + +Of course, I could just be lying and have just made that second bit up +10 minutes before the release of PPI 1.000. Besides, **all** the cool +Perl packages have TLAs (Three Letter Acronyms). It's a rule or something. + +Why don't you just think of it as the **Perl Parsing Interface** for simplicity. + +The original name was shortened to prevent the author (and you the users) +from contracting RSI by having to type crazy things like +`Parse::Perl::Isolated::Token::QuoteLike::Backtick` 100 times a day. + +In acknowledgment that someone may some day come up with a valid solution +for the grammar problem it was decided at the commencement of the project +to leave the `Parse::Perl` namespace free for any such effort. + +Since that time I've been able to prove to my own satisfaction that it +**is** truly impossible to accurately parse Perl as both code and document +at once. For the academics, parsing Perl suffers from the "Halting Problem". + +## Why Parse Perl? + +Once you can accept that we will never be able to parse Perl well enough +to meet the standards of things that treat Perl as code, it is worth +re-examining `why` we want to "parse" Perl at all. + +What are the things that people might want a "Perl parser" for. + +- Documentation + + Analyzing the contents of a Perl document to automatically generate + documentation, in parallel to, or as a replacement for, POD documentation. + + Allow an indexer to locate and process all the comments and + documentation from code for "full text search" applications. + +- Structural and Quality Analysis + + Determine quality or other metrics across a body of code, and identify + situations relating to particular phrases, techniques or locations. + + Index functions, variables and packages within Perl code, and doing search + and graph (in the node/edge sense) analysis of large code bases. + + [Perl::Critic](https://metacpan.org/pod/Perl::Critic), based on PPI, is a large, thriving tool for bug detection + and style analysis of Perl code. + +- Refactoring + + Make structural, syntax, or other changes to code in an automated manner, + either independently or in assistance to an editor. This sort of task list + includes backporting, forward porting, partial evaluation, "improving" code, + or whatever. All the sort of things you'd want from a [Perl::Editor](https://metacpan.org/pod/Perl::Editor). + +- Layout + + Change the layout of code without changing its meaning. This includes + techniques such as tidying (like [perltidy](https://metacpan.org/pod/perltidy)), obfuscation, compressing and + "squishing", or to implement formatting preferences or policies. + +- Presentation + + This includes methods of improving the presentation of code, without changing + the content of the code. Modify, improve, syntax colour etc the presentation + of a Perl document. Generating "IntelliText"-like functions. + +If we treat this as a baseline for the sort of things we are going to have +to build on top of Perl, then it becomes possible to identify a standard +for how good a Perl parser needs to be. + +## How good is Good Enough(TM) + +PPI seeks to be good enough to achieve all of the above tasks, or to provide +a sufficiently good API on which to allow others to implement modules in +these and related areas. + +However, there are going to be limits to this process. Because PPI cannot +adapt to changing grammars, any code written using source filters should not +be assumed to be parsable. + +At one extreme, this includes anything munged by [Acme::Bleach](https://metacpan.org/pod/Acme::Bleach), as well +as (arguably) more common cases like [Switch](https://metacpan.org/pod/Switch). We do not pretend to be +able to always parse code using these modules, although as long as it still +follows a format that looks like Perl syntax, it may be possible to extend +the lexer to handle them. + +The ability to extend PPI to handle lexical additions to the language is on +the drawing board to be done some time post-1.0 + +The goal for success was originally to be able to successfully parse 99% of +all Perl documents contained in CPAN. This means the entire file in each +case. + +PPI has succeeded in this goal far beyond the expectations of even the +author. At time of writing there are only 28 non-Acme Perl modules in CPAN +that PPI is incapable of parsing. Most of these are so badly broken they +do not compile as Perl code anyway. + +So unless you are actively going out of your way to break PPI, you should +expect that it will handle your code just fine. + +## Internationalisation + +PPI provides partial support for internationalisation and localisation. + +Specifically, it allows the use characters from the Latin-1 character +set to be used in quotes, comments, and POD. Primarily, this covers +languages from Europe and South America. + +PPI does **not** currently provide support for Unicode. +If you need Unicode support and would like to help, +contact the author. (contact details below) + +## Round Trip Safe + +When PPI parses a file it builds **everything** into the model, including +whitespace. This is needed in order to make the Document fully "Round Trip" +safe. + +The general concept behind a "Round Trip" parser is that it knows what it +is parsing is somewhat uncertain, and so **expects** to get things wrong +from time to time. In the cases where it parses code wrongly the tree +will serialize back out to the same string of code that was read in, +repairing the parser's mistake as it heads back out to the file. + +The end result is that if you parse in a file and serialize it back out +without changing the tree, you are guaranteed to get the same file you +started with. PPI does this correctly and reliably for 100% of all known +cases. + +**What goes in, will come out. Every time.** + +The one minor exception at this time is that if the newlines for your file +are wrong (meaning not matching the platform newline format), PPI will +localise them for you. (It isn't to be convenient, supporting +arbitrary newlines would make some of the code more complicated) + +Better control of the newline type is on the wish list though, and +anyone wanting to help out is encouraged to contact the author. + +# IMPLEMENTATION + +## General Layout + +PPI is built upon two primary "parsing" components, [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) +and [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer), and a large tree of about 50 classes which implement +the various the _Perl Document Object Model_ (PDOM). + +The PDOM is conceptually similar in style and intent to the regular DOM or +other code Abstract Syntax Trees (ASTs), but contains some differences +to handle perl-specific cases, and to assist in treating the code as a +document. Please note that it is **not** an implementation of the official +Document Object Model specification, only somewhat similar to it. + +On top of the Tokenizer, Lexer and the classes of the PDOM, sit a number +of classes intended to make life a little easier when dealing with PDOM +trees. + +Both the major parsing components were hand-coded from scratch with only +plain Perl code and a few small utility modules. There are no grammar or +patterns mini-languages, no YACC or LEX style tools and only a small number +of regular expressions. + +This is primarily because of the sheer volume of accumulated cruft that +exists in Perl. Not even perl itself is capable of parsing Perl documents +(remember, it just parses and executes it as code). + +As a result, PPI needed to be cruftier than perl itself. Feel free to +shudder at this point, and hope you never have to understand the Tokenizer +codebase. Speaking of which... + +## The Tokenizer + +The Tokenizer takes source code and converts it into a series of tokens. It +does this using a slow but thorough character by character manual process, +rather than using a pattern system or complex regexes. + +Or at least it does so conceptually. If you were to actually trace the code +you would find it's not truly character by character due to a number of +regexps and optimisations throughout the code. This lets the Tokenizer +"skip ahead" when it can find shortcuts, so it tends to jump around a line +a bit wildly at times. + +In practice, the number of times the Tokenizer will **actually** move the +character cursor itself is only about 5% - 10% higher than the number of +tokens contained in the file. This makes it about as optimal as it can be +made without implementing it in something other than Perl. + +In 2001 when PPI was started, this structure made PPI quite slow, and not +really suitable for interactive tasks. This situation has improved greatly +with multi-gigahertz processors, but can still be painful when working with +very large files. + +The target parsing rate for PPI is about 5000 lines per gigacycle. It is +currently believed to be at about 1500, and main avenue for making it to +the target speed has now become [PPI::XS](https://metacpan.org/pod/PPI::XS), a drop-in XS accelerator for +PPI. + +Since [PPI::XS](https://metacpan.org/pod/PPI::XS) has only just gotten off the ground and is currently only +at proof-of-concept stage, this may take a little while. Anyone interested +in helping out with [PPI::XS](https://metacpan.org/pod/PPI::XS) is **highly** encouraged to contact the +author. In fact, the design of [PPI::XS](https://metacpan.org/pod/PPI::XS) means it's possible to port +one function at a time safely and reliably. So every little bit will help. + +## The Lexer + +The Lexer takes a token stream, and converts it to a lexical tree. Because +we are parsing Perl **documents** this includes whitespace, comments, and +all number of weird things that have no relevance when code is actually +executed. + +An instantiated [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer) consumes [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) objects and +produces [PPI::Document](https://metacpan.org/pod/PPI::Document) objects. However you should probably never be +working with the Lexer directly. You should just be able to create +[PPI::Document](https://metacpan.org/pod/PPI::Document) objects and work with them directly. + +## The Perl Document Object Model + +The PDOM is a structured collection of data classes that together provide +a correct and scalable model for documents that follow the standard Perl +syntax. + +## The PDOM Class Tree + +The following lists all of the 67 current PDOM classes, listing with indentation +based on inheritance. + + PPI::Element + PPI::Node + PPI::Document + PPI::Document::Fragment + PPI::Statement + PPI::Statement::Package + PPI::Statement::Include + PPI::Statement::Sub + PPI::Statement::Scheduled + PPI::Statement::Compound + PPI::Statement::Break + PPI::Statement::Given + PPI::Statement::When + PPI::Statement::Data + PPI::Statement::End + PPI::Statement::Expression + PPI::Statement::Variable + PPI::Statement::Null + PPI::Statement::UnmatchedBrace + PPI::Statement::Unknown + PPI::Structure + PPI::Structure::Block + PPI::Structure::Subscript + PPI::Structure::Constructor + PPI::Structure::Condition + PPI::Structure::List + PPI::Structure::For + PPI::Structure::Given + PPI::Structure::When + PPI::Structure::Unknown + PPI::Token + PPI::Token::Whitespace + PPI::Token::Comment + PPI::Token::Pod + PPI::Token::Number + PPI::Token::Number::Binary + PPI::Token::Number::Octal + PPI::Token::Number::Hex + PPI::Token::Number::Float + PPI::Token::Number::Exp + PPI::Token::Number::Version + PPI::Token::Word + PPI::Token::DashedWord + PPI::Token::Symbol + PPI::Token::Magic + PPI::Token::ArrayIndex + PPI::Token::Operator + PPI::Token::Quote + PPI::Token::Quote::Single + PPI::Token::Quote::Double + PPI::Token::Quote::Literal + PPI::Token::Quote::Interpolate + PPI::Token::QuoteLike + PPI::Token::QuoteLike::Backtick + PPI::Token::QuoteLike::Command + PPI::Token::QuoteLike::Regexp + PPI::Token::QuoteLike::Words + PPI::Token::QuoteLike::Readline + PPI::Token::Regexp + PPI::Token::Regexp::Match + PPI::Token::Regexp::Substitute + PPI::Token::Regexp::Transliterate + PPI::Token::HereDoc + PPI::Token::Cast + PPI::Token::Structure + PPI::Token::Label + PPI::Token::Separator + PPI::Token::Data + PPI::Token::End + PPI::Token::Prototype + PPI::Token::Attribute + PPI::Token::Unknown + +To summarize the above layout, all PDOM objects inherit from the +[PPI::Element](https://metacpan.org/pod/PPI::Element) class. + +Under this are [PPI::Token](https://metacpan.org/pod/PPI::Token), strings of content with a known type, +and [PPI::Node](https://metacpan.org/pod/PPI::Node), syntactically significant containers that hold other +Elements. + +The three most important of these are the [PPI::Document](https://metacpan.org/pod/PPI::Document), the +[PPI::Statement](https://metacpan.org/pod/PPI::Statement) and the [PPI::Structure](https://metacpan.org/pod/PPI::Structure) classes. + +## The Document, Statement and Structure + +At the top of all complete PDOM trees is a [PPI::Document](https://metacpan.org/pod/PPI::Document) object. It +represents a complete file of Perl source code as you might find it on +disk. + +There are some specialised types of document, such as [PPI::Document::File](https://metacpan.org/pod/PPI::Document::File) +and [PPI::Document::Normalized](https://metacpan.org/pod/PPI::Document::Normalized) but for the purposes of the PDOM they are +all just considered to be the same thing. + +Each Document will contain a number of **Statements**, **Structures** and +**Tokens**. + +A [PPI::Statement](https://metacpan.org/pod/PPI::Statement) is any series of Tokens and Structures that are treated +as a single contiguous statement by perl itself. You should note that a +Statement is as close as PPI can get to "parsing" the code in the sense that +perl-itself parses Perl code when it is building the op-tree. + +Because of the isolation and Perl's syntax, it is provably impossible for +PPI to accurately determine precedence of operators or which tokens are +implicit arguments to a sub call. + +So rather than lead you on with a bad guess that has a strong chance of +being wrong, PPI does not attempt to determine precedence or sub parameters +at all. + +At a fundamental level, it only knows that this series of elements +represents a single Statement as perl sees it, but it can do so with +enough certainty that it can be trusted. + +However, for specific Statement types the PDOM is able to derive additional +useful information about their meaning. For the best, most useful, and most +heavily used example, see [PPI::Statement::Include](https://metacpan.org/pod/PPI::Statement::Include). + +A [PPI::Structure](https://metacpan.org/pod/PPI::Structure) is any series of tokens contained within matching braces. +This includes code blocks, conditions, function argument braces, anonymous +array and hash constructors, lists, scoping braces and all other syntactic +structures represented by a matching pair of braces, including (although it +may not seem obvious at first) `` braces. + +Each Structure contains none, one, or many Tokens and Structures (the rules +for which vary for the different Structure subclasses) + +Under the PDOM structure rules, a Statement can **never** directly contain +another child Statement, a Structure can **never** directly contain another +child Structure, and a Document can **never** contain another Document +anywhere in the tree. + +Aside from these three rules, the PDOM tree is extremely flexible. + +## The PDOM at Work + +To demonstrate the PDOM in use lets start with an example showing how the +tree might look for the following chunk of simple Perl code. + + #!/usr/bin/perl + + print( "Hello World!" ); + + exit(); + +Translated into a PDOM tree it would have the following structure (as shown +via the included [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper)). + + PPI::Document + PPI::Token::Comment '#!/usr/bin/perl\n' + PPI::Token::Whitespace '\n' + PPI::Statement + PPI::Token::Word 'print' + PPI::Structure::List ( ... ) + PPI::Token::Whitespace ' ' + PPI::Statement::Expression + PPI::Token::Quote::Double '"Hello World!"' + PPI::Token::Whitespace ' ' + PPI::Token::Structure ';' + PPI::Token::Whitespace '\n' + PPI::Token::Whitespace '\n' + PPI::Statement + PPI::Token::Word 'exit' + PPI::Structure::List ( ... ) + PPI::Token::Structure ';' + PPI::Token::Whitespace '\n' + +Please note that in this example, strings are only listed for the +**actual** [PPI::Token](https://metacpan.org/pod/PPI::Token) that contains that string. Structures are listed +with the type of brace characters it represents noted. + +The [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper) module can be used to generate similar trees yourself. + +We can make that PDOM dump a little easier to read if we strip out all the +whitespace. Here it is again, sans the distracting whitespace tokens. + + PPI::Document + PPI::Token::Comment '#!/usr/bin/perl\n' + PPI::Statement + PPI::Token::Word 'print' + PPI::Structure::List ( ... ) + PPI::Statement::Expression + PPI::Token::Quote::Double '"Hello World!"' + PPI::Token::Structure ';' + PPI::Statement + PPI::Token::Word 'exit' + PPI::Structure::List ( ... ) + PPI::Token::Structure ';' + +As you can see, the tree can get fairly deep at time, especially when every +isolated token in a bracket becomes its own statement. This is needed to +allow anything inside the tree the ability to grow. It also makes the +search and analysis algorithms much more flexible. + +Because of the depth and complexity of PDOM trees, a vast number of very easy +to use methods have been added wherever possible to help people working with +PDOM trees do normal tasks relatively quickly and efficiently. + +## Overview of the Primary Classes + +The main PPI classes, and links to their own documentation, are listed +here in alphabetical order. + +- [PPI::Document](https://metacpan.org/pod/PPI::Document) + + The Document object, the root of the PDOM. + +- [PPI::Document::Fragment](https://metacpan.org/pod/PPI::Document::Fragment) + + A cohesive fragment of a larger Document. Although not of any real current + use, it is needed for use in certain internal tree manipulation + algorithms. + + For example, doing things like cut/copy/paste etc. Very similar to a + [PPI::Document](https://metacpan.org/pod/PPI::Document), but has some additional methods and does not represent + a lexical scope boundary. + + A document fragment is also non-serializable, and so cannot be written out + to a file. + +- [PPI::Dumper](https://metacpan.org/pod/PPI::Dumper) + + A simple class for dumping readable debugging versions of PDOM structures, + such as in the demonstration above. + +- [PPI::Element](https://metacpan.org/pod/PPI::Element) + + The Element class is the abstract base class for all objects within the PDOM + +- [PPI::Find](https://metacpan.org/pod/PPI::Find) + + Implements an instantiable object form of a PDOM tree search. + +- [PPI::Lexer](https://metacpan.org/pod/PPI::Lexer) + + The PPI Lexer. Converts Token streams into PDOM trees. + +- [PPI::Node](https://metacpan.org/pod/PPI::Node) + + The Node object, the abstract base class for all PDOM objects that can + contain other Elements, such as the Document, Statement and Structure + objects. + +- [PPI::Statement](https://metacpan.org/pod/PPI::Statement) + + The base class for all Perl statements. Generic "evaluate for side-effects" + statements are of this actual type. Other more interesting statement types + belong to one of its children. + + See its own documentation for a longer description and list of all of the + different statement types and sub-classes. + +- [PPI::Structure](https://metacpan.org/pod/PPI::Structure) + + The abstract base class for all structures. A Structure is a language + construct consisting of matching braces containing a set of other elements. + + See the [PPI::Structure](https://metacpan.org/pod/PPI::Structure) documentation for a description and + list of all of the different structure types and sub-classes. + +- [PPI::Token](https://metacpan.org/pod/PPI::Token) + + A token is the basic unit of content. At its most basic, a Token is just + a string tagged with metadata (its class, and some additional flags in + some cases). + +- [PPI::Token::\_QuoteEngine](https://metacpan.org/pod/PPI::Token::_QuoteEngine) + + The [PPI::Token::Quote](https://metacpan.org/pod/PPI::Token::Quote) and [PPI::Token::QuoteLike](https://metacpan.org/pod/PPI::Token::QuoteLike) classes provide + abstract base classes for the many and varied types of quote and + quote-like things in Perl. However, much of the actual quote login is + implemented in a separate quote engine, based at + [PPI::Token::\_QuoteEngine](https://metacpan.org/pod/PPI::Token::_QuoteEngine). + + Classes that inherit from [PPI::Token::Quote](https://metacpan.org/pod/PPI::Token::Quote), [PPI::Token::QuoteLike](https://metacpan.org/pod/PPI::Token::QuoteLike) + and [PPI::Token::Regexp](https://metacpan.org/pod/PPI::Token::Regexp) are generally parsed only by the Quote Engine. + +- [PPI::Tokenizer](https://metacpan.org/pod/PPI::Tokenizer) + + The PPI Tokenizer. One Tokenizer consumes a chunk of text and provides + access to a stream of [PPI::Token](https://metacpan.org/pod/PPI::Token) objects. + + The Tokenizer is very very complicated, to the point where even the author + treads carefully when working with it. + + Most of the complication is the result of optimizations which have tripled + the tokenization speed, at the expense of maintainability. We cope with the + spaghetti by heavily commenting everything. + +- [PPI::Transform](https://metacpan.org/pod/PPI::Transform) + + The Perl Document Transformation API. Provides a standard interface and + abstract base class for objects and classes that manipulate Documents. + +# INSTALLING + +The core PPI distribution is pure Perl and has been kept as tight as +possible and with as few dependencies as possible. + +It should download and install normally on any platform from within +the CPAN and CPANPLUS applications, or directly using the distribution +tarball. If installing by hand, you may need to install a few small +utility modules first. The exact ones will depend on your version of +perl. + +There are no special install instructions for PPI, and the normal +`Perl Makefile.PL`, `make`, `make test`, `make install` instructions +apply. + +# EXTENDING + +The PPI namespace itself is reserved for use by PPI itself. +You are recommended to use the PPIx:: namespace for PPI-specific +modifications or prototypes thereof, or Perl:: for modules which provide +a general Perl language-related functions. + +If what you wish to implement looks like it fits into the PPIx:: namespace, +you should consider contacting the PPI maintainers on GitHub first, as what +you want may already be in progress, or you may wish to consider contributing +to PPI itself. + +# TO DO + +\- Many more analysis and utility methods for PDOM classes + +\- Creation of a PPI::Tutorial document + +\- Add many more key functions to PPI::XS + +\- We can **always** write more and better unit tests + +\- Complete the full implementation of ->literal (1.200) + +\- Full understanding of scoping (due 1.300) + +# SUPPORT + +The most recent version of PPI is available at the following address. + +[http://search.cpan.org/~mithaldu/PPI/](http://search.cpan.org/~mithaldu/PPI/) + +PPI source is maintained in a GitHub repository at the following address. + +[https://github.com/adamkennedy/PPI](https://github.com/adamkennedy/PPI) + +Contributions via GitHub pull request are welcome. + +Bug fixes in the form of pull requests or bug reports with +new (failing) unit tests have the best chance of being addressed +by busy maintainers, and are **strongly** encouraged. + +If you cannot provide a test or fix, or don't have time to do so, +then regular bug reports are still accepted and appreciated via the +GitHub bug tracker. + +[https://github.com/adamkennedy/PPI/issues](https://github.com/adamkennedy/PPI/issues) + +The `ppidump` utility that is part of the [Perl::Critic](https://metacpan.org/pod/Perl::Critic) distribution +is a useful tool for demonstrating how PPI is parsing (or misparsing) +small code snippets, and for providing information for bug reports. + +For other issues, questions, or commercial or media-related enquiries, +contact the author. + +# AUTHOR + +Adam Kennedy + +# ACKNOWLEDGMENTS + +A huge thank you to Phase N Australia ([http://phase-n.com/](http://phase-n.com/)) for +permitting the original open sourcing and release of this distribution +from what was originally several thousand hours of commercial work. + +Another big thank you to The Perl Foundation +([http://www.perlfoundation.org/](http://www.perlfoundation.org/)) for funding for the final big +refactoring and completion run. + +Also, to the various co-maintainers that have contributed both large and +small with tests and patches and especially to those rare few who have +deep-dived into the guts to (gasp) add a feature. + + - Dan Brook : PPIx::XPath, Acme::PerlML + - Audrey Tang : "Line Noise" Testing + - Arjen Laarhoven : Three-element ->location support + - Elliot Shank : Perl 5.10 support, five-element ->location + +And finally, thanks to those brave ( and foolish :) ) souls willing to dive +in and use, test drive and provide feedback on PPI before version 1.000, +in some cases before it made it to beta quality, and still did extremely +distasteful things (like eating 50 meg of RAM a second). + +I owe you all a beer. Corner me somewhere and collect at your convenience. +If I missed someone who wasn't in my email history, thank you too :) + + # In approximate order of appearance + - Claes Jacobsson + - Michael Schwern + - Jeff T. Parsons + - CPAN Author "CHOCOLATEBOY" + - Robert Rotherberg + - CPAN Author "PODMASTER" + - Richard Soderberg + - Nadim ibn Hamouda el Khemir + - Graciliano M. P. + - Leon Brocard + - Jody Belka + - Curtis Ovid + - Yuval Kogman + - Michael Schilli + - Slaven Rezic + - Lars Thegler + - Tony Stubblebine + - Tatsuhiko Miyagawa + - CPAN Author "CHROMATIC" + - Matisse Enzer + - Roy Fulbright + - Dan Brook + - Johnny Lee + - Johan Lindstrom + +And to single one person out, thanks go to Randal Schwartz who +spent a great number of hours in IRC over a critical 6 month period +explaining why Perl is impossibly unparsable and constantly shoving evil +and ugly corner cases in my face. He remained a tireless devil's advocate, +and without his support this project genuinely could never have been +completed. + +So for my schooling in the Deep Magiks, you have my deepest gratitude Randal. + +# COPYRIGHT + +Copyright 2001 - 2011 Adam Kennedy. + +This program is free software; you can redistribute +it and/or modify it under the same terms as Perl itself. + +The full text of the license can be found in the +LICENSE file included with this module. diff --git a/lib/PPI.pm b/lib/PPI.pm index c0d866e1..bc9f1af3 100644 --- a/lib/PPI.pm +++ b/lib/PPI.pm @@ -139,8 +139,8 @@ resources, such as libraries upon which the code may depend, and without needing to run an instance of perl alongside or inside the parser. Historically, using an embedded perl parser was widely considered to be -the most likely avenue for finding a solution to C. It was -investigated from time to time and attempts have generally failed or +the most likely avenue for finding a solution to parsing Perl. It has been +investigated from time to time, but attempts have generally failed or suffered from sufficiently bad corner cases that they were abandoned. =head2 What Does PPI Stand For? @@ -168,16 +168,6 @@ Since that time I've been able to prove to my own satisfaction that it B truly impossible to accurately parse Perl as both code and document at once. For the academics, parsing Perl suffers from the "Halting Problem". -With this in mind C has now been co-opted as the title for -the SourceForge project that publishes PPI and a large collection of other -applications and modules related to the (document) parsing of Perl source -code. - -You can find this project at L, -however we no longer use the SourceForge CVS server. Instead, the -current development version of PPI is available via SVN at -L. - =head2 Why Parse Perl? Once you can accept that we will never be able to parse Perl well enough @@ -204,6 +194,9 @@ situations relating to particular phrases, techniques or locations. Index functions, variables and packages within Perl code, and doing search and graph (in the node/edge sense) analysis of large code bases. +L, based on PPI, is a large, thriving tool for bug detection +and style analysis of Perl code. + =item Refactoring Make structural, syntax, or other changes to code in an automated manner, @@ -268,13 +261,9 @@ Specifically, it allows the use characters from the Latin-1 character set to be used in quotes, comments, and POD. Primarily, this covers languages from Europe and South America. -PPI does B currently provide support for Unicode, although there -is an initial implementation available in a development branch from -CVS. - -If you need Unicode support, and would like to help stress test the -Unicode support so we can move it to the main branch and enable it -in the main release should contact the author. (contact details below) +PPI does B currently provide support for Unicode. +If you need Unicode support and would like to help, +contact the author. (contact details below) =head2 Round Trip Safe @@ -640,7 +629,7 @@ The base class for all Perl statements. Generic "evaluate for side-effects" statements are of this actual type. Other more interesting statement types belong to one of its children. -See it's own documentation for a longer description and list of all of the +See its own documentation for a longer description and list of all of the different statement types and sub-classes. =item L @@ -704,20 +693,15 @@ apply. =head1 EXTENDING -The PPI namespace itself is reserved for the sole use of the modules under -the umbrella of the C SourceForge project. - -L - +The PPI namespace itself is reserved for use by PPI itself. You are recommended to use the PPIx:: namespace for PPI-specific modifications or prototypes thereof, or Perl:: for modules which provide a general Perl language-related functions. -If what you wish to implement looks like it fits into PPIx:: namespace, -you should consider contacting the C mailing list (detailed on -the SourceForge site) first, as what you want may already be in progress, -or you may wish to consider joining the team and doing it within the -C project itself. +If what you wish to implement looks like it fits into the PPIx:: namespace, +you should consider contacting the PPI maintainers on GitHub first, as what +you want may already be in progress, or you may wish to consider contributing +to PPI itself. =head1 TO DO @@ -735,36 +719,32 @@ C project itself. =head1 SUPPORT -This module is stored in an Open Repository at the following address. +The most recent version of PPI is available at the following address. -L +L -Write access to the repository is made available automatically to any -published CPAN author, and to most other volunteers on request. +PPI source is maintained in a GitHub repository at the following address. -If you are able to submit your bug report in the form of new (failing) -unit tests, or can apply your fix directly instead of submitting a patch, -you are B encouraged to do so, as the author currently maintains -over 100 modules and it can take some time to deal with non-"Critical" bug -reports or patches. +L -This will also guarantee that your issue will be addressed in the next -release of the module. +Contributions via GitHub pull request are welcome. -For large changes though, please consider creating a branch so that they -can be properly reviewed and trialed before being applied to the trunk. +Bug fixes in the form of pull requests or bug reports with +new (failing) unit tests have the best chance of being addressed +by busy maintainers, and are B encouraged. -If you cannot provide a direct test or fix, or don't have time to do so, +If you cannot provide a test or fix, or don't have time to do so, then regular bug reports are still accepted and appreciated via the GitHub bug tracker. L -For other issues or questions, contact the C project mailing -list. +The C utility that is part of the L distribution +is a useful tool for demonstrating how PPI is parsing (or misparsing) +small code snippets, and for providing information for bug reports. -For commercial or media-related enquiries, or to have your SVN commit bit -enabled, contact the author. +For other issues, questions, or commercial or media-related enquiries, +contact the author. =head1 AUTHOR diff --git a/lib/PPI/Cache.pm b/lib/PPI/Cache.pm index fef77de0..ffd83ad8 100644 --- a/lib/PPI/Cache.pm +++ b/lib/PPI/Cache.pm @@ -225,7 +225,7 @@ sub _store { my ($dir, $file) = $self->_paths($md5hex); # Save the file - File::Path::mkpath( $dir, 0, 0755 ) unless -d $dir; + File::Path::mkpath( $dir, 0, 0777 ) unless -d $dir; if ( VMS ) { Storable::lock_nstore( $object, $file ); } else { @@ -268,7 +268,7 @@ sub _md5hex { my $it = _SCALAR($_[0]) ? PPI::Util::md5hex(${$_[0]}) : $_[0]; - return (defined $it and ! ref $it and $it =~ /^[a-f0-9]{32}\z/si) + return (defined $it and ! ref $it and $it =~ /^[[:xdigit:]]{32}\z/s) ? lc $it : undef; } diff --git a/lib/PPI/Document.pm b/lib/PPI/Document.pm index 90689705..19032694 100644 --- a/lib/PPI/Document.pm +++ b/lib/PPI/Document.pm @@ -48,8 +48,8 @@ C implements the necessary C and C hooks to provide native support for L, if you have it installed. -However if you want to clone clone a Document, you are highly recommended -to use the internal C<$Document-Eclone> method rather than Storable's +However if you want to clone a Document, you are highly recommended +to use the C<$Document-Eclone> method rather than Storable's C function (although C should still work). =head1 METHODS @@ -791,7 +791,7 @@ Returns a L object, or C on error. sub normalized { # The normalization process will utterly destroy and mangle # anything passed to it, so we are going to only give it a - # clone of ourself. + # clone of ourselves. PPI::Normal->process( $_[0]->clone ); } diff --git a/lib/PPI/Find.pm b/lib/PPI/Find.pm index bb56a648..92631858 100644 --- a/lib/PPI/Find.pm +++ b/lib/PPI/Find.pm @@ -288,7 +288,7 @@ stop the iteration prematurely. It resets the Find object and allows it to be safely reused. A Find object will be automatically finished when C returns false. -This means you should only need to call C when you stop +This means you should only need to call C when you stop iterating early. You may safely call this method even when not iterating and it will return diff --git a/lib/PPI/Lexer.pm b/lib/PPI/Lexer.pm index 9105c2d9..629c0cf6 100644 --- a/lib/PPI/Lexer.pm +++ b/lib/PPI/Lexer.pm @@ -292,7 +292,7 @@ sub _lex_document { # Is this the close of a structure. if ( $Token->__LEXER__closes ) { # Because we are at the top of the tree, this is an error. - # This means either a mis-parsing, or an mistake in the code. + # This means either a mis-parsing, or a mistake in the code. # To handle this, we create a "Naked Close" statement $self->_add_element( $Document, PPI::Statement::UnmatchedBrace->new($Token) @@ -440,6 +440,28 @@ sub _statement { # Is it a token in our known classes list my $class = $STATEMENT_CLASSES{$Token->content}; + if ( $class ) { + # Is the next significant token a => + # Read ahead to the next significant token + my $Next; + while ( $Next = $self->_get_token ) { + if ( !$Next->significant ) { + push @{$self->{delayed}}, $Next; + next; + } + + last if + !$Next->isa( 'PPI::Token::Operator' ) or $Next->content ne '=>'; + + # Got the next token + # Is an ordinary expression + $self->_rollback( $Next ); + return 'PPI::Statement'; + } + + # Rollback and continue + $self->_rollback( $Next ); + } # Handle potential barewords for subscripts if ( $Parent->isa('PPI::Structure::Subscript') ) { @@ -533,8 +555,16 @@ sub _statement { } # Found the next significant token. + if ( + $Next->isa('PPI::Token::Operator') + and + $Next->content eq '=>' + ) { + # Is an ordinary expression + $self->_rollback( $Next ); + return 'PPI::Statement'; # Is it a v6 use? - if ( $Next->content eq 'v6' ) { + } elsif ( $Next->content eq 'v6' ) { $self->_rollback( $Next ); return 'PPI::Statement::Include::Perl6'; } else { @@ -696,22 +726,23 @@ sub _continues { return ''; } - # Alrighty then, there are only five implied end statement types, - # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, and ::When - # statements. - unless ( ref($Statement) =~ /\b(?:Scheduled|Sub|Compound|Given|When)$/ ) { - return 1; - } - - # Of these five, ::Scheduled, ::Sub, ::Given, and ::When follow the same - # simple rule and can be handled first. + # Alrighty then, there are six implied-end statement types: + # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, ::When, + # and ::Package statements. + return 1 + if ref $Statement !~ /\b(?:Scheduled|Sub|Compound|Given|When|Package)$/; + + # Of these six, ::Scheduled, ::Sub, ::Given, and ::When follow the same + # simple rule and can be handled first. The block form of ::Package + # follows the rule, too. (The non-block form of ::Package + # requires a statement terminator, and thus doesn't need to have + # an implied end detected.) my @part = $Statement->schildren; my $LastChild = $part[-1]; - unless ( $Statement->isa('PPI::Statement::Compound') ) { - # If the last significant element of the statement is a block, - # then a scheduled statement is done, no questions asked. - return ! $LastChild->isa('PPI::Structure::Block'); - } + # If the last significant element of the statement is a block, + # then an implied-end statement is done, no questions asked. + return !$LastChild->isa('PPI::Structure::Block') + if !$Statement->isa('PPI::Statement::Compound'); # Now we get to compound statements, which kind of suck (to lex). # However, of them all, the 'if' type, which includes unless, are @@ -1118,6 +1149,16 @@ sub _curly { and return 'PPI::Structure::Subscript'; } } + + # Are we the last argument of sub? + # E.g.: 'sub foo {}', 'sub foo ($) {}' + return 'PPI::Structure::Block' if $Parent->isa('PPI::Statement::Sub'); + + # Are we the second or third argument of package? + # E.g.: 'package Foo {}' or 'package Foo v1.2.3 {}' + return 'PPI::Structure::Block' + if $Parent->isa('PPI::Statement::Package'); + if ( $CURLY_CLASSES{$content} ) { # Known type return $CURLY_CLASSES{$content}; @@ -1309,7 +1350,7 @@ sub _get_token { # $self->{Tokenizer}->get_token; # } -# Delay the addition of a insignificant elements. +# Delay the addition of insignificant elements. # This ended up being inlined. # sub _delay_element { # my $self = shift; diff --git a/lib/PPI/Statement/End.pm b/lib/PPI/Statement/End.pm index 9fccf4f6..7bc4c7b6 100644 --- a/lib/PPI/Statement/End.pm +++ b/lib/PPI/Statement/End.pm @@ -12,7 +12,7 @@ PPI::Statement::End - Content after the __END__ of a module __END__ - This is part of an PPI::Statement::End statement + This is part of a PPI::Statement::End statement =pod diff --git a/lib/PPI/Statement/Package.pm b/lib/PPI/Statement/Package.pm index 52006ec8..19945e7e 100644 --- a/lib/PPI/Statement/Package.pm +++ b/lib/PPI/Statement/Package.pm @@ -47,6 +47,9 @@ BEGIN { @ISA = 'PPI::Statement'; } +# Lexer clues +sub __LEXER__normal() { '' } + =pod =head2 namespace diff --git a/lib/PPI/Statement/Sub.pm b/lib/PPI/Statement/Sub.pm index 4722e799..8c5ebd8e 100644 --- a/lib/PPI/Statement/Sub.pm +++ b/lib/PPI/Statement/Sub.pm @@ -165,6 +165,10 @@ Returns true if it is a special reserved subroutine, or false if not. sub reserved { my $self = shift; my $name = $self->name or return ''; + # perlsub is silent on whether reserveds can contain: + # - underscores; + # we allow them due to existing practice like CLONE_SKIP and __SUB__. + # - numbers; we allow them by PPI tradition. $name eq uc $name; } @@ -172,10 +176,6 @@ sub reserved { =pod -=head1 TO DO - -- Write unit tests for this package - =head1 SUPPORT See the L in the main module. diff --git a/lib/PPI/Token/Attribute.pm b/lib/PPI/Token/Attribute.pm index db9858c0..dfea2c92 100644 --- a/lib/PPI/Token/Attribute.pm +++ b/lib/PPI/Token/Attribute.pm @@ -79,7 +79,7 @@ Returns C if the attribute does not have parameters. sub parameters { my $self = shift; - $self->{content} =~ /\((.+)\)$/ ? $1 : undef; + $self->{content} =~ /\((.*)\)$/ ? $1 : undef; } diff --git a/lib/PPI/Token/HereDoc.pm b/lib/PPI/Token/HereDoc.pm index 7b351e7c..b9bab290 100644 --- a/lib/PPI/Token/HereDoc.pm +++ b/lib/PPI/Token/HereDoc.pm @@ -201,14 +201,10 @@ sub __TOKENIZER__on_char { return undef; } - # Define $line outside of the loop, so that if we encounter the - # end of the file, we have access to the last line still. - my $line; - # Suck in the HEREDOC - $token->{_heredoc} = []; + $token->{_heredoc} = \my @heredoc; my $terminator = $token->{_terminator} . "\n"; - while ( defined($line = $t->_get_line) ) { + while ( defined( my $line = $t->_get_line ) ) { if ( $line eq $terminator ) { # Keep the actual termination line for consistency # when we are re-assembling the file @@ -219,29 +215,30 @@ sub __TOKENIZER__on_char { } # Add the line - push @{$token->{_heredoc}}, $line; + push @heredoc, $line; } # End of file. # Error: Didn't reach end of here-doc before end of file. - # $line might be undef if we get NO lines. - if ( defined $line and $line eq $token->{_terminator} ) { - # If the last line matches the terminator - # but is missing the newline, we want to allow - # it anyway (like perl itself does). In this case - # perl would normally throw a warning, but we will - # also ignore that as well. - pop @{$token->{_heredoc}}; - $token->{_terminator_line} = $line; - } else { - # The HereDoc was not properly terminated. - $token->{_terminator_line} = undef; - # Trim off the trailing whitespace - if ( defined $token->{_heredoc}->[-1] and $t->{source_eof_chop} ) { - chop $token->{_heredoc}->[-1]; + # If the here-doc block is not empty, look at the last line to determine if + # the here-doc terminator is missing a newline (which Perl would fail to + # compile but is easy to detect) or if the here-doc block was just not + # terminated at all (which Perl would fail to compile as well). + $token->{_terminator_line} = undef; + if ( @heredoc and defined $heredoc[-1] ) { + # See PPI::Tokenizer, the algorithm there adds a space at the end of the + # document that we need to make sure we remove. + if ( $t->{source_eof_chop} ) { + chop $heredoc[-1]; $t->{source_eof_chop} = ''; } + + # Check if the last line of the file matches the terminator without + # newline at the end. If so, remove it from the content and set it as + # the terminator line. + $token->{_terminator_line} = pop @heredoc + if $heredoc[-1] eq $token->{_terminator}; } # Set a hint for PPI::Document->serialize so it can diff --git a/lib/PPI/Token/Number/Hex.pm b/lib/PPI/Token/Number/Hex.pm index f7f59745..5237382d 100644 --- a/lib/PPI/Token/Number/Hex.pm +++ b/lib/PPI/Token/Number/Hex.pm @@ -76,7 +76,7 @@ sub __TOKENIZER__on_char { # Allow underscores straight through return 1 if $char eq '_'; - if ( $char =~ /[\da-f]/i ) { + if ( $char =~ /[[:xdigit:]]/ ) { return 1; } diff --git a/lib/PPI/Token/Number/Version.pm b/lib/PPI/Token/Number/Version.pm index fefc0090..daf69cda 100644 --- a/lib/PPI/Token/Number/Version.pm +++ b/lib/PPI/Token/Number/Version.pm @@ -102,17 +102,23 @@ sub __TOKENIZER__on_char { sub __TOKENIZER__commit { my $t = $_[1]; - # Get the rest of the line + # Capture the rest of the token pos $t->{line} = $t->{line_cursor}; - if ( $t->{line} !~ m/\G(v\d+(?:\.\d+)*)/gc ) { + if ( $t->{line} !~ m/\G(v\d+(?:\.\d+)+|v\d+\b)/gc ) { # This was not a v-string after all (it's a word) return PPI::Token::Word->__TOKENIZER__commit($t); } + my $content = $1; + + # If there are no periods this could be a word starting with v\d + # Forced to be a word. Done. + return PPI::Token::Word->__TOKENIZER__commit($t) + if $content !~ /\./ and $t->__current_token_is_forced_word; + # This is a v-string - my $vstring = $1; - $t->{line_cursor} += length($vstring); - $t->_new_token('Number::Version', $vstring); + $t->{line_cursor} += length $content; + $t->_new_token( 'Number::Version', $content ); $t->_finalize_token->__TOKENIZER__on_char($t); } diff --git a/lib/PPI/Token/Quote.pm b/lib/PPI/Token/Quote.pm index a032f6e3..e3fefe3e 100644 --- a/lib/PPI/Token/Quote.pm +++ b/lib/PPI/Token/Quote.pm @@ -86,7 +86,7 @@ of the quotes. =head2 literal -The C method is provided by ::Quote:Literal and +The C method is provided by ::Quote::Literal and ::Quote::Single. This returns the value of the string as Perl sees it: without the quote marks and with C<\\> and C<\'> resolved to C<\> and C<'>. diff --git a/lib/PPI/Token/Quote/Double.pm b/lib/PPI/Token/Quote/Double.pm index c5d475de..b521ebad 100644 --- a/lib/PPI/Token/Quote/Double.pm +++ b/lib/PPI/Token/Quote/Double.pm @@ -80,6 +80,9 @@ string token in place, turning it into the equivalent single-quoted string. If the token is modified, it is reblessed into the L package. +Because the length of the content is not changed, there is no need +to call the document's C method. + The object itself is returned as a convenience. =cut diff --git a/lib/PPI/Token/QuoteLike/Words.pm b/lib/PPI/Token/QuoteLike/Words.pm index 7e15fd9b..2930580f 100644 --- a/lib/PPI/Token/QuoteLike/Words.pm +++ b/lib/PPI/Token/QuoteLike/Words.pm @@ -42,20 +42,27 @@ BEGIN { =head2 literal -Returns the words contained. Note that this method does not check the +Returns the words contained as a list. Note that this method does not check the context that the token is in; it always returns the list and not merely the last element if the token is in scalar context. =cut sub literal { - my $self = shift; - my $section = $self->{sections}->[0]; - return split ' ', substr( - $self->{content}, - $section->{position}, - $section->{size}, - ); + my ( $self ) = @_; + + my $content = $self->_section_content(0); + return if !defined $content; + + # Undo backslash escaping of '\', the left delimiter, + # and the right delimiter. The right delimiter will + # only exist with paired delimiters: qw() qw[] qw<> qw{}. + my ( $left, $right ) = ( $self->_delimiters, '', '' ); + $content =~ s/\\([\Q$left$right\\\E])/$1/g; + + my @words = split ' ', $content; + + return @words; } 1; diff --git a/lib/PPI/Token/Structure.pm b/lib/PPI/Token/Structure.pm index f5677de3..566bcf91 100644 --- a/lib/PPI/Token/Structure.pm +++ b/lib/PPI/Token/Structure.pm @@ -177,7 +177,7 @@ sub previous_token { # Anything that slips through to here is a structure # with a closing brace, but no opening brace, so we # just have to go with it, and continue as we would - # if we started with a opening brace. + # if we started with an opening brace. } # We can use the default implement, if we call it from the diff --git a/lib/PPI/Token/Unknown.pm b/lib/PPI/Token/Unknown.pm index a0bae548..26388ca9 100644 --- a/lib/PPI/Token/Unknown.pm +++ b/lib/PPI/Token/Unknown.pm @@ -90,7 +90,7 @@ sub __TOKENIZER__on_char { } elsif ( $p0->isa('PPI::Token::Structure') and - $p0->content =~ /^(?:\)|\])$/ + $p0->content =~ /^(?:\)|\]|\})$/ ) { $_class = 'Operator'; } else { @@ -290,7 +290,7 @@ sub __TOKENIZER__on_char { return $t->_finalize_token->__TOKENIZER__on_char( $t ); } - # It MIGHT be a label, but its probably the ?: trinary operator + # It MIGHT be a label, but it's probably the ?: trinary operator $t->{class} = $t->{token}->set_class( 'Operator' ); return $t->{class}->__TOKENIZER__on_char( $t ); } diff --git a/lib/PPI/Token/Whitespace.pm b/lib/PPI/Token/Whitespace.pm index 26e76aab..92ea009d 100644 --- a/lib/PPI/Token/Whitespace.pm +++ b/lib/PPI/Token/Whitespace.pm @@ -203,7 +203,8 @@ sub __TOKENIZER__on_line_start { sub __TOKENIZER__on_char { my $t = $_[1]; - my $char = ord substr $t->{line}, $t->{line_cursor}, 1; + my $c = substr $t->{line}, $t->{line_cursor}, 1; + my $char = ord $c; # Do we definitely know what something is? return $COMMITMAP[$char]->__TOKENIZER__commit($t) if $COMMITMAP[$char]; @@ -246,7 +247,7 @@ sub __TOKENIZER__on_char { return 'Prototype'; } - # An prototyped anonymous subroutine + # A prototyped anonymous subroutine my $p0 = $tokens->[0]; if ( $p0->isa('PPI::Token::Word') and $p0->content eq 'sub' # Maybe it's invoking a method named 'sub' @@ -368,7 +369,7 @@ sub __TOKENIZER__on_char { return 'Regexp::Match' if $prec eq ''; # What about the char after the slash? There's some things - # that would be highly illogical to see if its an operator. + # that would be highly illogical to see if it's an operator. my $next_char = substr $t->{line}, $t->{line_cursor} + 1, 1; if ( defined $next_char and length $next_char ) { if ( $next_char =~ /(?:\^|\[|\\)/ ) { @@ -407,9 +408,9 @@ sub __TOKENIZER__on_char { } } elsif ( $char >= 128 ) { # Outside ASCII - return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $t =~ /\w/; - return 'Whitespace' if $t =~ /\s/; - } + return 'PPI::Token::Word'->__TOKENIZER__commit($t) if $c =~ /\w/; + return 'Whitespace' if $c =~ /\s/; + } # All the whitespaces are covered, so what to do diff --git a/lib/PPI/Token/Word.pm b/lib/PPI/Token/Word.pm index 95c62eaa..ba8bae72 100644 --- a/lib/PPI/Token/Word.pm +++ b/lib/PPI/Token/Word.pm @@ -182,23 +182,30 @@ sub __TOKENIZER__on_char { } # We might be a subroutine attribute. - my $tokens = $t->_previous_significant_tokens(1); - if ( $tokens and $tokens->[0]->{_attribute} ) { + if ( __current_token_is_attribute($t) ) { $t->{class} = $t->{token}->set_class( 'Attribute' ); return $t->{class}->__TOKENIZER__commit( $t ); } - # Check for a quote like operator my $word = $t->{token}->{content}; - if ( $QUOTELIKE{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) { - $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} ); - return $t->{class}->__TOKENIZER__on_char( $t ); - } + if ( $KEYWORDS{$word} ) { + # Check for a Perl keyword that is forced to be a normal word instead + if ( $t->__current_token_is_forced_word ) { + $t->{class} = $t->{token}->set_class( 'Word' ); + return $t->{class}->__TOKENIZER__on_char( $t ); + } - # Or one of the word operators - if ( $OPERATOR{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) { - $t->{class} = $t->{token}->set_class( 'Operator' ); - return $t->_finalize_token->__TOKENIZER__on_char( $t ); + # Check for a quote like operator. %QUOTELIKE must be subset of %KEYWORDS + if ( $QUOTELIKE{$word} ) { + $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} ); + return $t->{class}->__TOKENIZER__on_char( $t ); + } + + # Or one of the word operators. %OPERATOR must be subset of %KEYWORDS + if ( $OPERATOR{$word} ) { + $t->{class} = $t->{token}->set_class( 'Operator' ); + return $t->_finalize_token->__TOKENIZER__on_char( $t ); + } } # Unless this is a simple identifier, at this point @@ -251,8 +258,7 @@ sub __TOKENIZER__commit { $t->{line_cursor} += length $word; # We might be a subroutine attribute. - my $tokens = $t->_previous_significant_tokens(1); - if ( $tokens and $tokens->[0]->{_attribute} ) { + if ( __current_token_is_attribute($t) ) { $t->_new_token( 'Attribute', $word ); return ($t->{line_cursor} >= $t->{line_length}) ? 0 : $t->{class}->__TOKENIZER__on_char($t); @@ -310,10 +316,10 @@ sub __TOKENIZER__commit { my $token_class; if ( $word =~ /\:/ ) { - # Since its not a simple identifier... + # Since it's not a simple identifier... $token_class = 'Word'; - } elsif ( $class->__TOKENIZER__literal($t, $word, $tokens) ) { + } elsif ( $KEYWORDS{$word} and $t->__current_token_is_forced_word ) { $token_class = 'Word'; } elsif ( $QUOTELIKE{$word} ) { @@ -327,11 +333,14 @@ sub __TOKENIZER__commit { $token_class = 'Operator'; } else { - # If the next character is a ':' then its a label... + # Get tokens early to be sure to not disturb state set up by pos and m//gc. + my $tokens = $t->_previous_significant_tokens(1); + + # If the next character is a ':' then it's a label... pos $t->{line} = $t->{line_cursor}; if ( $t->{line} =~ m/\G(\s*:)(?!:)/gc ) { if ( $tokens and $tokens->[0]->{content} eq 'sub' ) { - # ... UNLESS its after 'sub' in which + # ... UNLESS it's after 'sub' in which # case it is a sub name and an attribute # operator. # We COULD have checked this at the top @@ -363,44 +372,21 @@ sub __TOKENIZER__commit { $t->_finalize_token->__TOKENIZER__on_char($t); } -# Is the word in a "forced" context, and thus cannot be either an -# operator or a quote-like thing. This version is only useful -# during tokenization. -sub __TOKENIZER__literal { - my ($class, $t, $word, $tokens) = @_; - - # Is this a forced-word context? - # i.e. Would normally be seen as an operator. - unless ( $QUOTELIKE{$word} or $PPI::Token::Operator::OPERATOR{$word} ) { - return ''; - } - - # Check the cases when we have previous tokens - pos $t->{line} = $t->{line_cursor}; - if ( $tokens ) { - my $token = $tokens->[0] or return ''; - # We are forced if we are a method name - return 1 if $token->{content} eq '->'; - # We are forced if we are a sub name - return 1 if $token->isa('PPI::Token::Word') && $token->{content} eq 'sub'; - - # If we are contained in a pair of curly braces, - # we are probably a bareword hash key - if ( $token->{content} eq '{' and $t->{line} =~ /\G\s*\}/gc ) { - return 1; - } - } - - # In addition, if the word is followed by => it is probably - # also actually a word and not a regex. - if ( $t->{line} =~ /\G\s*=>/gc ) { - return 1; - } - - # Otherwise we probably aren't forced - ''; +# Is the current Word really a subroutine attribute? +sub __current_token_is_attribute { + my ( $t ) = @_; + my $tokens = $t->_previous_significant_tokens(1); + return ( + $tokens + and ( + # hint from tokenizer + $tokens->[0]->{_attribute} + # nothing between attribute and us except whitespace + or $tokens->[0]->isa('PPI::Token::Attribute') + ) + ); } 1; diff --git a/lib/PPI/Token/_QuoteEngine/Full.pm b/lib/PPI/Token/_QuoteEngine/Full.pm index 85d23717..7526af22 100644 --- a/lib/PPI/Token/_QuoteEngine/Full.pm +++ b/lib/PPI/Token/_QuoteEngine/Full.pm @@ -32,7 +32,7 @@ BEGIN { 's' => { operator => 's', braced => undef, separator => undef, _sections => 2, modifiers => 1 }, 'tr' => { operator => 'tr', braced => undef, separator => undef, _sections => 2, modifiers => 1 }, - # Y is the little used variant of tr + # Y is the little-used variant of tr 'y' => { operator => 'y', braced => undef, separator => undef, _sections => 2, modifiers => 1 }, '/' => { operator => undef, braced => 0, separator => '/', _sections => 1, modifiers => 1 }, @@ -42,7 +42,7 @@ BEGIN { # The final ( and kind of depreciated ) "first match only" one is not # used yet, since I'm not sure on the context differences between - # this and the trinary operator, but its here for completeness. + # this and the trinary operator, but it's here for completeness. '?' => { operator => undef, braced => 0, separator => '?', _sections => 1, modifiers => 1 }, ); } diff --git a/lib/PPI/Tokenizer.pm b/lib/PPI/Tokenizer.pm index 613ecfca..484c2ae9 100644 --- a/lib/PPI/Tokenizer.pm +++ b/lib/PPI/Tokenizer.pm @@ -102,7 +102,35 @@ my %X_CAN_FOLLOW_OPERATOR = map { $_ => 1 } qw( -- ++ ); # These are the exceptions. my %X_CAN_FOLLOW_STRUCTURE = map { $_ => 1 } qw( } ] \) ); - +# Something that looks like the x operator but follows a word +# is usually that word's argument. +# These are the exceptions. +# chop, chomp, dump are ambiguous because they can have either parms +# or no parms. +my %X_CAN_FOLLOW_WORD = map { $_ => 1 } qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + __SUB__ +); @@ -552,7 +580,7 @@ sub _process_next_char { return 0 if ++$self->{line_cursor} >= $self->{line_length}; # Pass control to the token class - my $result; + my $result; unless ( $result = $self->{class}->__TOKENIZER__on_char( $self ) ) { # undef is error. 0 is "Did stuff ourself, you don't have to do anything" return defined $result ? 1 : undef; @@ -736,7 +764,10 @@ my %OBVIOUS_CONTENT = ( '}' => 'operator', ); -# Try to determine operator/operand context, is possible. + +my %USUALLY_FORCES = map { $_ => 1 } qw( sub package use no ); + +# Try to determine operator/operand context, if possible. # Returns "operator", "operand", or "" if unknown. sub _opcontext { my $self = shift; @@ -768,9 +799,57 @@ sub _current_x_is_operator { $prev && (!$prev->isa('PPI::Token::Operator') || $X_CAN_FOLLOW_OPERATOR{$prev}) && (!$prev->isa('PPI::Token::Structure') || $X_CAN_FOLLOW_STRUCTURE{$prev}) + && (!$prev->isa('PPI::Token::Word') || $X_CAN_FOLLOW_WORD{$prev}) + && !$prev->isa('PPI::Token::Label') ; } + +# Assuming we are at the end of parsing the current token that could be a word, +# a wordlike operator, or a version string, try to determine whether context +# before or after it forces it to be a bareword. This method is only useful +# during tokenization. +sub __current_token_is_forced_word { + my ( $t ) = @_; + + # Check if forced by preceding tokens. + + my ( $prev, $prevprev ) = @{ $t->_previous_significant_tokens(2) }; + if ( !$prev ) { + pos $t->{line} = $t->{line_cursor}; + } + else { + my $content = $prev->{content}; + + # We are forced if we are a method name. + # '->' will always be an operator, so we don't check its type. + return 1 if $content eq '->'; + + # If we are contained in a pair of curly braces, we are probably a + # forced bareword hash key. '{' is never a word or operator, so we + # don't check its type. + pos $t->{line} = $t->{line_cursor}; + return 1 if $content eq '{' and $t->{line} =~ /\G\s*\}/gc; + + # sub, package, use, and no all indicate that what immediately follows + # is a word not an operator or (in the case of sub and package) a + # version string. However, we don't want to be fooled by 'package + # package v10' or 'use no v10'. We're a forced package unless we're + # preceded by 'package sub', in which case we're a version string. + return ( !$prevprev || !$USUALLY_FORCES{$prevprev->content} ) + if $USUALLY_FORCES{$content}; + } + # pos on $t->{line} is guaranteed to be set at this point. + + # Check if forced by following tokens. + + # If the word is followed by => it is probably a word, not a regex. + return 1 if $t->{line} =~ /\G\s*=>/gc; + + # Otherwise we probably aren't forced + return ''; +} + 1; =pod @@ -810,7 +889,7 @@ called in whatever token class we are currently in, which will examine the character at the current position, and handle it. As the handler methods in the various token classes are called, they -build up a output token array for the source code. +build up an output token array for the source code. Various parts of the Tokenizer use look-ahead, arbitrary-distance look-behind (although currently the maximum is three significant tokens), diff --git a/lib/PPI/Transform.pm b/lib/PPI/Transform.pm index ffc2a928..3b38f125 100644 --- a/lib/PPI/Transform.pm +++ b/lib/PPI/Transform.pm @@ -123,7 +123,7 @@ sub document { The C method is used to apply the transform to something. The argument must be a L, or something which can be turned -into a one and then be written back to again. +into one and then be written back to again. Currently, this list is limited to a C reference, although a handler registration process is available for you to add support for diff --git a/t/01_compile.t b/t/01_compile.t index 507831fe..66f87e05 100644 --- a/t/01_compile.t +++ b/t/01_compile.t @@ -1,29 +1,12 @@ #!/usr/bin/perl -# Formal testing for PPI - # This test script only tests that the tree compiles -use strict; -use File::Spec::Functions ':ALL'; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -use Test::More tests => 19; -use Test::NoWarnings; - - - - +use t::lib::PPI::Test::pragmas; +use Test::More tests => 18; -# Check their perl version -ok( $] >= 5.006, "Your perl is new enough" ); -# Does the module load +# Do the modules load use_all_ok( qw{ PPI PPI::Tokenizer diff --git a/t/03_document.t b/t/03_document.t index 0ad563a1..14e6da19 100644 --- a/t/03_document.t +++ b/t/03_document.t @@ -2,47 +2,32 @@ # PPI::Document tests -use strict; -use File::Spec::Functions ':ALL'; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use PPI; - -# Execute the tests +use t::lib::PPI::Test::pragmas; use Test::More tests => 14; -use Test::NoWarnings; - -# Test file -my $file = catfile(qw{ t data 03_document test.dat }); -my $empty = catfile(qw{ t data 03_document empty.dat }); -ok( -f $file, 'Found test file' ); -ok( -f $empty, 'Found test file' ); - -# Test script -my $script = <<'END_PERL'; -#!/usr/bin/perl - -# A simple test script - -print "Hello World!\n"; -END_PERL - - +use File::Spec::Functions ':ALL'; +use PPI; ##################################################################### # Test a basic document # Parse a simple document in all possible ways -SCOPE: { +NEW: { + my $file = catfile(qw{ t data 03_document test.dat }); + ok( -f $file, 'Found test.dat' ); + my $doc1 = PPI::Document->new( $file ); isa_ok( $doc1, 'PPI::Document' ); + # Test script + my $script = <<'END_PERL'; +#!/usr/bin/perl + +# A simple test script + +print "Hello World!\n"; +END_PERL my $doc2 = PPI::Document->new( \$script ); isa_ok( $doc2, 'PPI::Document' ); @@ -61,7 +46,10 @@ SCOPE: { } # Repeat the above with a null document -SCOPE: { +NEW_EMPTY: { + my $empty = catfile(qw{ t data 03_document empty.dat }); + ok( -f $empty, 'Found empty.dat' ); + my $doc1 = PPI::Document->new( $empty ); isa_ok( $doc1, 'PPI::Document' ); diff --git a/t/04_element.t b/t/04_element.t index 433eb8cb..031389b6 100644 --- a/t/04_element.t +++ b/t/04_element.t @@ -5,20 +5,14 @@ # This does an empiric test that when we try to parse something, # something ( anything ) comes out the other side. -use strict; -use File::Spec::Functions ':ALL'; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use PPI::Lexer (); - -# Execute the tests +use t::lib::PPI::Test::pragmas; use Test::More tests => 221; -use Test::NoWarnings; + +use File::Spec::Functions ':ALL'; +use PPI; use Scalar::Util 'refaddr'; +use t::lib::PPI::Test 'pause'; + sub is_object { my ($left, $right, $message) = @_; @@ -48,13 +42,6 @@ sub omethod_fails { } } -sub pause { - local $@; - sleep 1 if !eval { require Time::HiRes; Time::HiRes::sleep(0.1); 1 }; -} - - - ##################################################################### @@ -274,7 +261,7 @@ SCOPE: { my $start = $doc->first_token; isa_ok( $start, 'PPI::Token::Structure' ); is( $start->content, '{', 'Got start token' ); - is( $start->previous_sibling, '', '->previous_sibling for an start opening brace returns false' ); + is( $start->previous_sibling, '', '->previous_sibling for a start opening brace returns false' ); my $braces = $doc->find_first( sub { $_[1]->isa('PPI::Structure') and $_[1]->braces eq '()' } ); diff --git a/t/05_lexer.t b/t/05_lexer.t index 8adab466..d420d256 100644 --- a/t/05_lexer.t +++ b/t/05_lexer.t @@ -1,36 +1,16 @@ #!/usr/bin/perl -# Compare a large number of specific constructs -# with the expected Lexer dumps. - -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use PPI::Lexer; -use PPI::Dumper; - - - - - -##################################################################### -# Prepare +# Compare a large number of specific code samples (.code) +# with the expected Lexer dumps (.dump). +use t::lib::PPI::Test::pragmas; use Test::More tests => 219; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; -use t::lib::PPI; - - - +use File::Spec::Functions ':ALL'; +use PPI::Lexer; +use t::lib::PPI::Test::Run; ##################################################################### # Code/Dump Testing -# ntests = 2 + 15 * nfiles -t::lib::PPI->run_testdir( catdir( 't', 'data', '05_lexer' ) ); +t::lib::PPI::Test::Run->run_testdir( catdir( 't', 'data', '05_lexer' ) ); diff --git a/t/06_round_trip.t b/t/06_round_trip.t index 5f43af81..f691f5e3 100644 --- a/t/06_round_trip.t +++ b/t/06_round_trip.t @@ -3,19 +3,12 @@ # Load ALL of the PPI files, lex them in, dump them # out, and verify that the code goes in and out cleanly. -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More; # Plan comes later -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI; - - +use t::lib::PPI::Test 'find_files'; @@ -104,17 +97,3 @@ sub roundtrip_ok { } } } - -# Find file names in named t/data dirs -sub find_files { - my $testdir = shift; - - # Does the test directory exist? - -e $testdir and -d $testdir and -r $testdir or die "Failed to find test directory $testdir"; - - # Find the .code test files - opendir( TESTDIR, $testdir ) or die "opendir: $!"; - my @perl = map { catfile( $testdir, $_ ) } sort grep { /\.(?:code|pm|t)$/ } readdir(TESTDIR); - closedir( TESTDIR ) or die "closedir: $!"; - return @perl; -} diff --git a/t/07_token.t b/t/07_token.t index 4d5e803d..05c7ba03 100644 --- a/t/07_token.t +++ b/t/07_token.t @@ -2,20 +2,12 @@ # Formal unit tests for specific PPI::Token classes -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -# Execute the tests +use t::lib::PPI::Test::pragmas; use Test::More tests => 447; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; -use t::lib::PPI; use PPI; +use t::lib::PPI::Test::Run; @@ -23,9 +15,8 @@ use PPI; ##################################################################### # Code/Dump Testing -# ntests = 2 + 12 * nfiles -t::lib::PPI->run_testdir( catdir( 't', 'data', '07_token' ) ); +t::lib::PPI::Test::Run->run_testdir( catdir( 't', 'data', '07_token' ) ); @@ -33,7 +24,7 @@ t::lib::PPI->run_testdir( catdir( 't', 'data', '07_token' ) ); ##################################################################### # PPI::Token::Symbol Unit Tests -# Note: braces and the symbol() method are tested in regression.t +# Note: braces and the symbol() method are tested in 08_regression.t SCOPE: { # Test both creation methods diff --git a/t/08_regression.t b/t/08_regression.t index 5b7bee8b..4e48b565 100644 --- a/t/08_regression.t +++ b/t/08_regression.t @@ -4,34 +4,19 @@ # Some other regressions tests are included here for simplicity. -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -# For each new item in t/data/08_regression add another 15 tests +use t::lib::PPI::Test::pragmas; use Test::More tests => 932; -use Test::NoWarnings; -use t::lib::PPI; -use PPI; - -sub pause { - local $@; - sleep 1 if !eval { require Time::HiRes; Time::HiRes::sleep(0.1); 1 }; -} - +use PPI; +use t::lib::PPI::Test 'pause'; +use t::lib::PPI::Test::Run; ##################################################################### # Code/Dump Testing -# ntests = 2 + 14 * nfiles -t::lib::PPI->run_testdir(qw{ t data 08_regression }); +t::lib::PPI::Test::Run->run_testdir(qw{ t data 08_regression }); diff --git a/t/09_normal.t b/t/09_normal.t index 8935df11..296644e3 100644 --- a/t/09_normal.t +++ b/t/09_normal.t @@ -3,16 +3,9 @@ # Testing of the normalization functions. # (only very basic at this point) -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 14; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI; diff --git a/t/10_statement.t b/t/10_statement.t index 61ca1255..b7a196d0 100644 --- a/t/10_statement.t +++ b/t/10_statement.t @@ -2,49 +2,10 @@ # Test the various PPI::Statement packages -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -# Execute the tests -use Test::More tests => 12; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; -use Scalar::Util 'refaddr'; -use PPI::Lexer (); - - +use t::lib::PPI::Test::pragmas; +use Test::More tests => 6; - - -##################################################################### -# Tests for PPI::Statement::Package - -SCOPE: { - # Create a document with various example package statements - my $Document = PPI::Lexer->lex_source( <<'END_PERL' ); -package Foo; -SCOPE: { - package # comment - Bar::Baz; - 1; -} -1; -END_PERL - isa_ok( $Document, 'PPI::Document' ); - - # Check that both of the package statements are detected - my $packages = $Document->find('Statement::Package'); - is( scalar(@$packages), 2, 'Found 2 package statements' ); - is( $packages->[0]->namespace, 'Foo', 'Package 1 returns correct namespace' ); - is( $packages->[1]->namespace, 'Bar::Baz', 'Package 2 returns correct namespace' ); - is( $packages->[0]->file_scoped, 1, '->file_scoped returns true for package 1' ); - is( $packages->[1]->file_scoped, '', '->file_scoped returns false for package 2' ); -} +use PPI; diff --git a/t/11_util.t b/t/11_util.t index 9dcf3094..d802c1de 100644 --- a/t/11_util.t +++ b/t/11_util.t @@ -2,18 +2,10 @@ # Test the PPI::Util package -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 13; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; -use PPI::Lexer (); use PPI; use PPI::Util qw{_Document _slurp}; diff --git a/t/12_location.t b/t/12_location.t index 5aeabdbb..673e0881 100644 --- a/t/12_location.t +++ b/t/12_location.t @@ -2,19 +2,12 @@ # Tests the accuracy and features for location functionality -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 683; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; + use PPI; + my $test_source = <<'END_PERL'; my $foo = 'bar'; diff --git a/t/13_data.t b/t/13_data.t index 30659368..bca4d972 100644 --- a/t/13_data.t +++ b/t/13_data.t @@ -2,19 +2,13 @@ # Tests functionality relating to __DATA__ sections of files -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 8; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI; + my $module = catfile('t', 'data', '13_data', 'Foo.pm'); ok( -f $module, 'Test file exists' ); diff --git a/t/14_charsets.t b/t/14_charsets.t index d4d1745a..de8734ff 100644 --- a/t/14_charsets.t +++ b/t/14_charsets.t @@ -1,14 +1,7 @@ #!/usr/bin/perl -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More; - BEGIN { if ($] < 5.008007) { Test::More->import( skip_all => "Unicode support requires perl 5.8.7" ); @@ -17,9 +10,7 @@ BEGIN { plan( tests => 17 ); } -use Test::NoWarnings; -use utf8; -use File::Spec::Functions ':ALL'; +use utf8; # perl version check above says this is okay use Params::Util qw{_INSTANCE}; use PPI; diff --git a/t/15_transform.t b/t/15_transform.t index 662835fa..62b0e5fe 100644 --- a/t/15_transform.t +++ b/t/15_transform.t @@ -1,15 +1,8 @@ #!/usr/bin/perl -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More 0.86 tests => 24; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use File::Remove; use PPI; diff --git a/t/16_xml.t b/t/16_xml.t index 9f16f561..6895fd4c 100644 --- a/t/16_xml.t +++ b/t/16_xml.t @@ -1,16 +1,8 @@ #!/usr/bin/perl -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More 0.86 tests => 17; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; + use PPI; diff --git a/t/17_storable.t b/t/17_storable.t index 355850fb..88882987 100644 --- a/t/17_storable.t +++ b/t/17_storable.t @@ -2,14 +2,7 @@ # Test compatibility with Storable -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More; BEGIN { # Is Storable installed? @@ -21,7 +14,6 @@ BEGIN { } } -use Test::NoWarnings; use Scalar::Util 'refaddr'; use PPI; diff --git a/t/18_cache.t b/t/18_cache.t index 8f0251c8..749f2223 100644 --- a/t/18_cache.t +++ b/t/18_cache.t @@ -1,23 +1,17 @@ #!/usr/bin/perl -# Test compatibility with Storable - -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +# Test PPI::Cache +use t::lib::PPI::Test::pragmas; use Test::More tests => 43; -use Test::NoWarnings; + use File::Spec::Unix; use File::Spec::Functions ':ALL'; use Scalar::Util 'refaddr'; use File::Remove (); use PPI::Document (); use PPI::Cache (); +use Test::SubCalls; use constant VMS => !! ( $^O eq 'VMS' ); use constant FILE => VMS ? 'File::Spec::Unix' : 'File::Spec'; @@ -107,10 +101,7 @@ isa_ok( PPI::Document->get_cache, 'PPI::Cache' ); is( refaddr($Cache), refaddr(PPI::Document->get_cache), '->get_cache returns the same cache object' ); -SKIP: { - skip("Test::SubCalls requires >= 5.6", 7 ) if $] < 5.006; - require Test::SubCalls; - +SCOPE: { # Set the tracking on the Tokenizer constructor ok( Test::SubCalls::sub_track( 'PPI::Tokenizer::new' ), 'Tracking calls to PPI::Tokenizer::new' ); Test::SubCalls::sub_calls( 'PPI::Tokenizer::new', 0 ); @@ -130,9 +121,7 @@ SKIP: { 'PPI::Document->new with cache enabled returns two identical objects' ); } -SKIP: { - skip("Test::SubCalls requires >= 5.6", 8 ) if $] < 5.006; - +SCOPE: { # Done now, can we clear the cache? is( PPI::Document->set_cache(undef), 1, '->set_cache(undef) returns true' ); is( PPI::Document->get_cache, undef, '->get_cache returns undef' ); diff --git a/t/19_selftesting.t b/t/19_selftesting.t index 6ceb98cf..06957490 100644 --- a/t/19_selftesting.t +++ b/t/19_selftesting.t @@ -5,22 +5,16 @@ # Using PPI to analyse its own code at install-time? Fuck yeah! :) -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More; # Plan comes later -use Test::NoWarnings; + use Test::Object; use File::Spec::Functions ':ALL'; use Params::Util qw{_CLASS _ARRAY _INSTANCE _IDENTIFIER}; use Class::Inspector; use PPI; -use t::lib::PPI; +use t::lib::PPI::Test 'find_files'; +use t::lib::PPI::Test::Object; use constant CI => 'Class::Inspector'; @@ -42,7 +36,7 @@ my @files = sort values %tests; # Find all the testable perl files in t/data foreach my $dir ( '05_lexer', '08_regression', '11_util', '13_data', '15_transform' ) { - my @perl = find_files( $dir ); + my @perl = find_files( catdir('t', 'data', $dir) ); push @files, @perl; } @@ -84,7 +78,7 @@ is_deeply( $bad, [ 'Bad::Class1', 'Bad::Class2', 'Bad::Class3', 'Bad::Class4' ], foreach my $file ( @files ) { # MD5 the raw file my $md5a = PPI::Util::md5hex_file($file); - like( $md5a, qr/^[0-9a-f]{32}\z/, 'md5hex_file ok' ); + like( $md5a, qr/^[[:xdigit:]]{32}\z/, 'md5hex_file ok' ); # Load the file my $Document = PPI::Document->new($file); @@ -133,21 +127,6 @@ foreach my $file ( @files ) { ##################################################################### # Test Functions -# Find file names in named t/data dirs -sub find_files { - my $dir = shift; - my $testdir = catdir( 't', 'data', $dir ); - - # Does the test directory exist? - -e $testdir and -d $testdir and -r $testdir or die "Failed to find test directory $testdir"; - - # Find the .code test files - opendir( TESTDIR, $testdir ) or die "opendir: $!"; - my @perl = map { catfile( $testdir, $_ ) } sort grep { /\.(?:code|pm)$/ } readdir(TESTDIR); - closedir( TESTDIR ) or die "closedir: $!"; - return @perl; -} - # Check for accidental use of illegal or non-existant classes in # ->isa calls. This has happened at least once, presumably because # PPI has a LOT of classes and it can get confusing. diff --git a/t/20_tokenizer_regression.t b/t/20_tokenizer_regression.t deleted file mode 100644 index d9be7a26..00000000 --- a/t/20_tokenizer_regression.t +++ /dev/null @@ -1,145 +0,0 @@ -#!/usr/bin/perl - -# code/dump-style regression tests for known lexing problems. - -# Some other regressions tests are included here for simplicity. - -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -use File::Spec::Functions ':ALL'; - -use PPI::Lexer; -use PPI::Dumper; -use Carp 'croak'; -use Params::Util qw{_INSTANCE}; - -sub pause { - local $@; - sleep 1 if !eval { require Time::HiRes; Time::HiRes::sleep(0.1); 1 }; -} - - - - - -##################################################################### -# Prepare - -use vars qw{@FAILURES}; -BEGIN { - @FAILURES = ( - # Failed cases 3 chars or less - '!%:', '!%:', '!%:', '!%:', '!*:', '!@:', '%:', '%:,', - '%:;', '*:', '*:,', '*::', '*:;', '+%:', '+*:', '+@:', - '-%:', '-*:', '-@:', ';%:', ';*:', ';@:', '@:', '@:,', - '@::', '@:;', '\%:', '\&:', '\*:', '\@:', '~%:', '~*:', - '~@:', '(<', '(<', '=<', 'm(', 'm(', 'm<', 'm[', - 'm{', 'q(', 'q<', 'q[', 'q{', 's(', 's<', 's[', - 's{', 'y(', 'y<', 'y[', 'y{', '$\'0', '009', '0bB', - '0xX', '009;', '0bB;', '0xX;', "<<'", '<<"', '<<`', '&::', - '<s', 's<>-', - '*::0', '*::1', '*:::', '*::\'', '$::0', '$:::', '$::\'', - '@::0', '@::1', '@:::', '&::0', '&::\'', '%:::', '%::\'', - - # More-specific single cases thrown up during the heavy testing - '$:::z', '*:::z', "\\\@::'9:!", "} mz}~<\nV" - ); -} - -use Test::More tests => 1 + scalar(@FAILURES) * 3; -use Test::NoWarnings; - - - - - -##################################################################### -# Code/Dump Testing - -foreach my $code ( @FAILURES ) { - test_code( $code ); - - # Verify there are no stale %PARENT entries - my $quotable = quotable($code); - is( scalar(keys %PPI::Element::PARENT), 0, - "\"$quotable\": No stale %PARENT entries" ); - %PPI::Element::PARENT = %PPI::Element::PARENT; -} - -exit(0); - - - - - -##################################################################### -# Support Functions - -sub test_code { - my $code = shift; - my $quotable = quotable($code); - my $Document = eval { - # $SIG{__WARN__} = sub { croak('Triggered a warning') }; - PPI::Document->new(\$code); - }; - ok( _INSTANCE($Document, 'PPI::Document'), - "\"$quotable\": Document parses ok" ); - unless ( _INSTANCE($Document, 'PPI::Document') ) { - diag( "\"$quotable\": Parsing failed" ); - my $short = quotable(quickcheck($code)); - diag( "Shortest failing substring: \"$short\"" ); - return; - } - - # Version of the code for use in error messages - my $joined = $Document->serialize; - my $joined_quotable = quotable($joined); - is( $joined, $code, - "\"$quotable\": Document round-trips ok: \"$joined_quotable\"" ); -} - -# Find the shortest failing substring of known bad string -sub quickcheck { - my $code = shift; - my $fails = $code; - # $SIG{__WARN__} = sub { croak('Triggered a warning') }; - - while ( length $fails ) { - chop $code; - PPI::Document->new(\$code) or last; - $fails = $code; - } - - while ( length $fails ) { - substr( $code, 0, 1, '' ); - PPI::Document->new(\$code) or return $fails; - $fails = $code; - } - - return $fails; -} - -sub quotable { - my $quotable = shift; - $quotable =~ s/\\/\\\\/g; - $quotable =~ s/\t/\\t/g; - $quotable =~ s/\n/\\n/g; - $quotable =~ s/\$/\\\$/g; - $quotable =~ s/\@/\\\@/g; - return $quotable; -} diff --git a/t/21_exhaustive.t b/t/21_exhaustive.t index de89f86c..1ac9b317 100644 --- a/t/21_exhaustive.t +++ b/t/21_exhaustive.t @@ -2,16 +2,14 @@ # Exhaustively test all possible Perl programs to a particular length -use strict; -use Carp 'croak'; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; +use Test::More; # Plan comes later + +use Params::Util qw{_INSTANCE}; +use PPI; +use t::lib::PPI::Test 'quotable'; -use vars qw{$MAX_CHARS $ITERATIONS $LENGTH @ALL_CHARS}; +use vars qw{$MAX_CHARS $ITERATIONS $LENGTH @ALL_CHARS @FAILURES}; BEGIN { # When distributing, keep this in to verify the test script # is working correctly, but limit to 2 (maaaaybe 3) so we @@ -33,30 +31,38 @@ BEGIN { # '!', '~', '^', '*', '$', '@', '&', ':', '%', '#', ',', "'", '"', '`', # '\\', '/', '_', ' ', "\n", "\t", '-', # ); -} - - - - - -##################################################################### -# Prepare -use Test::More tests => ($MAX_CHARS + $ITERATIONS + 3); -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; -use Params::Util qw{_INSTANCE}; -use PPI; - - - - - -##################################################################### -# Retest Previous Failures - -test_code2( "( {8" ); + # Cases known to have failed in the past. + @FAILURES = ( + # Failed cases 3 chars or less + '!%:', '!%:', '!%:', '!%:', '!*:', '!@:', '%:', '%:,', + '%:;', '*:', '*:,', '*::', '*:;', '+%:', '+*:', '+@:', + '-%:', '-*:', '-@:', ';%:', ';*:', ';@:', '@:', '@:,', + '@::', '@:;', '\%:', '\&:', '\*:', '\@:', '~%:', '~*:', + '~@:', '(<', '(<', '=<', 'm(', 'm(', 'm<', 'm[', + 'm{', 'q(', 'q<', 'q[', 'q{', 's(', 's<', 's[', + 's{', 'y(', 'y<', 'y[', 'y{', '$\'0', '009', '0bB', + '0xX', '009;', '0bB;', '0xX;', "<<'", '<<"', '<<`', '&::', + '<s', 's<>-', + '*::0', '*::1', '*:::', '*::\'', '$::0', '$:::', '$::\'', + '@::0', '@::1', '@:::', '&::0', '&::\'', '%:::', '%::\'', + + # More-specific single cases thrown up during the heavy testing + '$:::z', '*:::z', "\\\@::'9:!", "} mz}~<\nV", + "( {8", + ); +} +use Test::More tests => ($MAX_CHARS + $ITERATIONS + @FAILURES + 1); @@ -64,15 +70,14 @@ test_code2( "( {8" ); ##################################################################### # Code/Dump Testing -my $failures = 0; my $last_index = scalar(@ALL_CHARS) - 1; LENGTHLOOP: foreach my $len ( 1 .. $MAX_CHARS ) { - # Initialise the char array and failure count - my $failures = 0; + # Initialise the char array my @chars = (0) x $len; # The main test loop + my $failures = 0; # simulate subtests CHARLOOP: while ( 1 ) { # Test the current set of chars @@ -80,7 +85,7 @@ foreach my $len ( 1 .. $MAX_CHARS ) { unless ( length($code) == $len ) { die "Failed sanity check. Error in the code generation mechanism"; } - test_code( $code ); + $failures += 1 if !compare_code( $code ); # Increment the last character $chars[$len - 1]++; @@ -115,19 +120,21 @@ for ( 1 .. $ITERATIONS ) { map { int(rand($last_index) + 1) } (1 .. $LENGTH) ); + ok( compare_code($code), "round trip successful" ); +} - # Test it as normal - test_code2( $code ); - # Verify there are no stale %PARENT entries - #my $quotable = quotable($code); - #is( scalar(keys %PPI::Element::PARENT), 0, - # "%PARENT is clean \"$quotable\"" ); + + +##################################################################### +# Test all the failures + +foreach my $code ( @FAILURES ) { + ok( compare_code($code), "round trip of old failure successful" ); } -is( scalar(keys %PPI::Element::PARENT), 0, - 'No stale \%PARENT entries at the end of testing' ); -%PPI::Element::PARENT = %PPI::Element::PARENT; + +exit(0); @@ -136,47 +143,65 @@ is( scalar(keys %PPI::Element::PARENT), 0, ##################################################################### # Support Functions -sub test_code2 { - $failures = 0; - my $string = shift; - my $quotable = quotable($string); - test_code( $string ); - is( $failures, 0, "String parses ok \"$quotable\"" ); +sub compare_code { + my ( $code ) = @_; + + my $round_tripped = round_trip_code($code); + my $ok = ($code eq $round_tripped); + if ( !$ok ) { + my $code_quoted = quotable($code); + diag( qq{input: "$code_quoted"} ); + my $round_tripped_quoted = quotable($round_tripped); + diag( qq{output: "$round_tripped_quoted"} ); + my $shortest = quotable(quickcheck($code)); + diag( qq{shorted failing substring: "$shortest"} ); + } + + if ( scalar(keys %PPI::Element::PARENT) != 0 ) { + $ok = 0; + my $code_quoted = quotable($code); + diag( qq{ Stale \%PARENT entries at the end of testing of "$code_quoted"} ); + } + %PPI::Element::PARENT = %PPI::Element::PARENT; + + return $ok; } -sub test_code { - my $code = shift; + +sub round_trip_code { + my ( $code ) = @_; + + my $result; + my $Document = eval { - # $SIG{__WARN__} = sub { croak('Triggered a warning') }; + # use Carp 'croak'; $SIG{__WARN__} = sub { croak('Triggered a warning') }; PPI::Document->new(\$code); }; + if ( _INSTANCE($Document, 'PPI::Document') ) { + $result = $Document->serialize; + } + + return $result; +} + - # Version of the code for use in error messages - my $quotable = quotable($code); - unless ( _INSTANCE($Document, 'PPI::Document') ) { - $failures++; - diag( "\"$quotable\": Parser did not return a Document" ); - return; +# Find the shortest failing substring of known bad string +sub quickcheck { + my $code = shift; + my $fails = $code; + # use Carp 'croak'; $SIG{__WARN__} = sub { croak('Triggered a warning') }; + + while ( length $fails ) { + chop $code; + PPI::Document->new(\$code) or last; + $fails = $code; } - my $joined = $Document->serialize; - my $joined_quotable = quotable($joined); - unless ( $joined eq $code ) { - $failures++; - diag( "\"$quotable\": Document round-trips ok" ); - diag( "\"$joined_quotable\" (round-trips to)" ); - return; + + while ( length $fails ) { + substr( $code, 0, 1, '' ); + PPI::Document->new(\$code) or return $fails; + $fails = $code; } -} -sub quotable { - my $quotable = shift; - $quotable =~ s/\\/\\\\/g; - $quotable =~ s/\t/\\t/g; - $quotable =~ s/\n/\\n/g; - $quotable =~ s/\$/\\\$/g; - $quotable =~ s/\@/\\\@/g; - $quotable =~ s/\"/\\\"/g; - return $quotable; + return $fails; } - -exit(0); diff --git a/t/22_readonly.t b/t/22_readonly.t index 966ada68..05bf9298 100644 --- a/t/22_readonly.t +++ b/t/22_readonly.t @@ -2,17 +2,9 @@ # Testing of readonly functionality -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 9; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; + use PPI::Document; diff --git a/t/23_file.t b/t/23_file.t index c6fb86d9..68138b60 100644 --- a/t/23_file.t +++ b/t/23_file.t @@ -2,16 +2,9 @@ # Testing of PPI::Document::File -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 5; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI::Document::File; diff --git a/t/24_v6.t b/t/24_v6.t index 5445f67b..a015276a 100644 --- a/t/24_v6.t +++ b/t/24_v6.t @@ -3,16 +3,9 @@ # Regression test of a Perl 5 grammar that exploded # with a "98 subroutine recursion" error in 1.201 -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 9; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI; diff --git a/t/25_increment.t b/t/25_increment.t index 205b9c7b..475741e0 100644 --- a/t/25_increment.t +++ b/t/25_increment.t @@ -5,21 +5,11 @@ # state between an empty document and the entire file to make sure # all of them parse as legal documents and don't crash the parser. -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 3876; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; -use Params::Util qw{_INSTANCE}; -use PPI::Lexer; -use PPI::Dumper; -use t::lib::PPI; + +use PPI; +use t::lib::PPI::Test::Run; @@ -28,4 +18,4 @@ use t::lib::PPI; ##################################################################### # Code/Dump Testing -t::lib::PPI->increment_testdir(qw{ t data 08_regression }); +t::lib::PPI::Test::Run->increment_testdir(qw{ t data 08_regression }); diff --git a/t/26_bom.t b/t/26_bom.t index 9b9a03e9..134cd659 100644 --- a/t/26_bom.t +++ b/t/26_bom.t @@ -1,18 +1,9 @@ #!/usr/bin/perl -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -# For each new item in t/data/08_regression add another 14 tests +use t::lib::PPI::Test::pragmas; use Test::More tests => 21; -use Test::NoWarnings; -use t::lib::PPI; -use PPI; + +use t::lib::PPI::Test::Run; @@ -20,6 +11,5 @@ use PPI; ##################################################################### # Code/Dump Testing -# ntests = 2 + 14 * nfiles -t::lib::PPI->run_testdir(qw{ t data 26_bom }); +t::lib::PPI::Test::Run->run_testdir(qw{ t data 26_bom }); diff --git a/t/27_complete.t b/t/27_complete.t index be4f2f8d..559217ad 100644 --- a/t/27_complete.t +++ b/t/27_complete.t @@ -2,18 +2,12 @@ # Testing for the PPI::Document ->complete method -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More; -use Test::NoWarnings; + use File::Spec::Functions ':ALL'; use PPI; +use t::lib::PPI::Test 'find_files'; # This test uses a series of ordered files, containing test code. # The letter after the number acts as a boolean yes/no answer to @@ -41,23 +35,3 @@ foreach my $file ( @files ) { my $isnot = ($got == $expected) ? 'is' : 'is NOT'; is( $got, $expected, "File $file $isnot complete" ); } - - - - - -##################################################################### -# Support Functions - -sub find_files { - my $testdir = shift; - - # Does the test directory exist? - -e $testdir and -d $testdir and -r $testdir or die "Failed to find test directory $testdir"; - - # Find the .code test files - opendir( TESTDIR, $testdir ) or die "opendir: $!"; - my @perl = map { catfile( $testdir, $_ ) } sort grep { /\.(?:code|pm|t)$/ } readdir(TESTDIR); - closedir( TESTDIR ) or die "closedir: $!"; - return @perl; -} diff --git a/t/28_foreach_qw.t b/t/28_foreach_qw.t index 1b0d3fef..6c00dbf4 100644 --- a/t/28_foreach_qw.t +++ b/t/28_foreach_qw.t @@ -2,17 +2,10 @@ # Standalone tests to check "foreach qw{foo} {}" -use strict; -BEGIN { - no warnings 'once'; - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 13; -use Test::NoWarnings; -use File::Spec::Functions ':ALL'; + +#use File::Spec::Functions ':ALL'; use PPI; diff --git a/t/interactive.t b/t/interactive.t index 58bf31e2..519a2970 100644 --- a/t/interactive.t +++ b/t/interactive.t @@ -1,20 +1,14 @@ #!/usr/bin/perl # Script used to temporarily test the most recent parser bug. -# Testing it here is must more efficient than having to trace +# Testing it here is much more efficient than having to trace # down through the entire set of regression tests. -use strict; -use File::Spec::Functions ':ALL'; -BEGIN { - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::XS_DISABLE = 1; # Prevent warning -} +use t::lib::PPI::Test::pragmas; +use Test::More tests => 3; + use PPI; -# Execute the tests -use Test::More tests => 2; # Define the test code my $code = 'sub f:f('; diff --git a/t/lib/PPI/Test.pm b/t/lib/PPI/Test.pm new file mode 100644 index 00000000..32f4d0fe --- /dev/null +++ b/t/lib/PPI/Test.pm @@ -0,0 +1,50 @@ +package t::lib::PPI::Test; + +use warnings; +use strict; + +use File::Spec::Functions (); + +use vars qw{$VERSION @ISA @EXPORT_OK %EXPORT_TAGS}; +BEGIN { + $VERSION = '1.220'; + @ISA = 'Exporter'; + @EXPORT_OK = qw( find_files quotable pause ); +} + + +# Find file names in named t/data dirs +sub find_files { + my ( $testdir ) = @_; + + # Does the test directory exist? + die "Failed to find test directory $testdir" if !-e $testdir or !-d $testdir or !-r $testdir; + + # Find the .code test files + opendir my $TESTDIR, $testdir or die "opendir: $!"; + my @perl = map { File::Spec::Functions::catfile( $testdir, $_ ) } sort grep { /\.(?:code|pm|t)$/ } readdir $TESTDIR; + closedir $TESTDIR or die "closedir: $!"; + + return @perl; +} + + +sub quotable { + my ( $quotable ) = @_; + $quotable =~ s|\\|\\\\|g; + $quotable =~ s|\t|\\t|g; + $quotable =~ s|\n|\\n|g; + $quotable =~ s|\$|\\\$|g; + $quotable =~ s|\@|\\\@|g; + $quotable =~ s|\"|\\\"|g; + return $quotable; +} + + +sub pause { + local $@; + sleep 1 if !eval { require Time::HiRes; Time::HiRes::sleep(0.1); 1 }; +} + + +1; diff --git a/t/lib/PPI/Test/Object.pm b/t/lib/PPI/Test/Object.pm new file mode 100755 index 00000000..06207c5d --- /dev/null +++ b/t/lib/PPI/Test/Object.pm @@ -0,0 +1,192 @@ +package t::lib::PPI::Test::Object; + +use warnings; +use strict; + +use List::MoreUtils 'any'; +use Params::Util qw{_INSTANCE}; +use PPI::Dumper; +use Test::More; +use Test::Object; + +use vars qw{$VERSION}; +BEGIN { + $VERSION = '1.220'; +} + + + + + +##################################################################### +# PPI::Document Testing + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&document_ok, +); + +sub document_ok { + my $doc = shift; + + # A document should have zero or more children that are either + # a statement or a non-significant child. + my @children = $doc->children; + my $good = grep { + _INSTANCE($_, 'PPI::Statement') + or ( + _INSTANCE($_, 'PPI::Token') and ! $_->significant + ) + } @children; + + is( $good, scalar(@children), + 'Document contains only statements and non-significant tokens' ); + + 1; +} + + + + + +##################################################################### +# Are there an unknowns + +Test::Object->register( + class => 'PPI::Document', + tests => 3, + code => \&unknown_objects, +); + +sub unknown_objects { + my $doc = shift; + + is( + $doc->find_any('Token::Unknown'), + '', + "Contains no PPI::Token::Unknown elements", + ); + is( + $doc->find_any('Structure::Unknown'), + '', + "Contains no PPI::Structure::Unknown elements", + ); + is( + $doc->find_any('Statement::Unknown'), + '', + "Contains no PPI::Statement::Unknown elements", + ); + + 1; +} + + + + + +##################################################################### +# Are there any invalid nestings? + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&nested_statements, +); + +sub nested_statements { + my $doc = shift; + + ok( + ! $doc->find_any( sub { + _INSTANCE($_[1], 'PPI::Statement') + and + any { _INSTANCE($_, 'PPI::Statement') } $_[1]->children + } ), + 'Document contains no nested statements', + ); +} + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&nested_structures, +); + +sub nested_structures { + my $doc = shift; + + ok( + ! $doc->find_any( sub { + _INSTANCE($_[1], 'PPI::Structure') + and + any { _INSTANCE($_, 'PPI::Structure') } $_[1]->children + } ), + 'Document contains no nested structures', + ); +} + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&no_attribute_in_attribute, +); + +sub no_attribute_in_attribute { + my $doc = shift; + + ok( + ! $doc->find_any( sub { + _INSTANCE($_[1], 'PPI::Token::Attribute') + and + ! exists $_[1]->{_attribute} + } ), + 'No ->{_attribute} in PPI::Token::Attributes', + ); +} + + + + + +##################################################################### +# PPI::Statement Tests + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&valid_compound_type, +); + +sub valid_compound_type { + my $document = shift; + my $compound = $document->find('PPI::Statement::Compound') || []; + is( + scalar( grep { not defined $_->type } @$compound ), + 0, 'All compound statements have defined ->type', + ); +} + + + + + +##################################################################### +# Does ->location work properly +# As an aside, fixes #23788: PPI::Statement::location() returns undef for C<({})>. + +Test::Object->register( + class => 'PPI::Document', + tests => 1, + code => \&defined_location, +); + +sub defined_location { + my $document = shift; + my $bad = $document->find( sub { + not defined $_[1]->location + } ); + is( $bad, '', '->location always defined' ); +} + +1; diff --git a/t/lib/PPI.pm b/t/lib/PPI/Test/Run.pm old mode 100755 new mode 100644 similarity index 53% rename from t/lib/PPI.pm rename to t/lib/PPI/Test/Run.pm index e47359e9..e1be78cc --- a/t/lib/PPI.pm +++ b/t/lib/PPI/Test/Run.pm @@ -1,199 +1,19 @@ -package t::lib::PPI; - -use warnings; -use strict; +package t::lib::PPI::Test::Run; use File::Spec::Functions ':ALL'; +use Params::Util qw{_INSTANCE}; +use PPI::Document; +use PPI::Dumper; use Test::More; use Test::Object; -use Params::Util qw{_STRING _INSTANCE}; -use List::MoreUtils 'any'; -use PPI::Dumper; +use t::lib::PPI::Test::Object; use vars qw{$VERSION}; BEGIN { - $VERSION = '1.220'; + $VERSION = '1.218'; } - - - -##################################################################### -# PPI::Document Testing - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&document_ok, -); - -sub document_ok { - my $doc = shift; - - # A document should have zero or more children that are either - # a statement or a non-significant child. - my @children = $doc->children; - my $good = grep { - _INSTANCE($_, 'PPI::Statement') - or ( - _INSTANCE($_, 'PPI::Token') and ! $_->significant - ) - } @children; - - is( $good, scalar(@children), - 'Document contains only statements and non-significant tokens' ); - - 1; -} - - - - - -##################################################################### -# Are there an unknowns - -Test::Object->register( - class => 'PPI::Document', - tests => 3, - code => \&unknown_objects, -); - -sub unknown_objects { - my $doc = shift; - - is( - $doc->find_any('Token::Unknown'), - '', - "Contains no PPI::Token::Unknown elements", - ); - is( - $doc->find_any('Structure::Unknown'), - '', - "Contains no PPI::Structure::Unknown elements", - ); - is( - $doc->find_any('Statement::Unknown'), - '', - "Contains no PPI::Statement::Unknown elements", - ); - - 1; -} - - - - - -##################################################################### -# Are there any invalid nestings? - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&nested_statements, -); - -sub nested_statements { - my $doc = shift; - - ok( - ! $doc->find_any( sub { - _INSTANCE($_[1], 'PPI::Statement') - and - any { _INSTANCE($_, 'PPI::Statement') } $_[1]->children - } ), - 'Document contains no nested statements', - ); -} - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&nested_structures, -); - -sub nested_structures { - my $doc = shift; - - ok( - ! $doc->find_any( sub { - _INSTANCE($_[1], 'PPI::Structure') - and - any { _INSTANCE($_, 'PPI::Structure') } $_[1]->children - } ), - 'Document contains no nested structures', - ); -} - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&no_attribute_in_attribute, -); - -sub no_attribute_in_attribute { - my $doc = shift; - - ok( - ! $doc->find_any( sub { - _INSTANCE($_[1], 'PPI::Token::Attribute') - and - ! exists $_[1]->{_attribute} - } ), - 'No ->{_attribute} in PPI::Token::Attributes', - ); -} - - - - - -##################################################################### -# PPI::Statement Tests - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&valid_compound_type, -); - -sub valid_compound_type { - my $document = shift; - my $compound = $document->find('PPI::Statement::Compound') || []; - is( - scalar( grep { not defined $_->type } @$compound ), - 0, 'All compound statements have defined ->type', - ); -} - - - - - -##################################################################### -# Does ->location work properly -# As an aside, fixes #23788: PPI::Statement::location() returns undef for C<({})>. - -Test::Object->register( - class => 'PPI::Document', - tests => 1, - code => \&defined_location, -); - -sub defined_location { - my $document = shift; - my $bad = $document->find( sub { - not defined $_[1]->location - } ); - is( $bad, '', '->location always defined' ); -} - - - - - ##################################################################### # Process a .code/.dump file pair # plan: 2 + 14 * npairs @@ -206,7 +26,7 @@ sub run_testdir { ok( (-e $testdir and -d $testdir and -r $testdir), "Test directory $testdir found" ); # Find the .code test files - local *TESTDIR; + local *TESTDIR; opendir( TESTDIR, $testdir ) or die "opendir: $!"; my @code = map { catfile( $testdir, $_ ) } sort grep { /\.code$/ } readdir(TESTDIR); closedir( TESTDIR ) or die "closedir: $!"; @@ -286,7 +106,7 @@ sub increment_testdir { ok( (-e $testdir and -d $testdir and -r $testdir), "Test directory $testdir found" ); # Find the .code test files - local *TESTDIR; + local *TESTDIR; opendir( TESTDIR, $testdir ) or die "opendir: $!"; my @code = map { catfile( $testdir, $_ ) } sort grep { /\.code$/ } readdir(TESTDIR); closedir( TESTDIR ) or die "closedir: $!"; diff --git a/t/lib/PPI/Test/pragmas.pm b/t/lib/PPI/Test/pragmas.pm new file mode 100644 index 00000000..07b6251e --- /dev/null +++ b/t/lib/PPI/Test/pragmas.pm @@ -0,0 +1,33 @@ +package t::lib::PPI::Test::pragmas; + +=head1 NAME + +PPI::Test::pragmas -- standard complier/runtime setup for PPI tests + +=cut + +use 5.006; +use strict; +use warnings; + +use Test::NoWarnings; + +BEGIN { + select STDERR; ## no critic ( InputOutput::ProhibitOneArgSelect ) + $| = 1; + select STDOUT; ## no critic ( InputOutput::ProhibitOneArgSelect ) + + no warnings 'once'; ## no critic ( TestingAndDebugging::ProhibitNoWarnings ) + $PPI::XS_DISABLE = 1; + $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; +} + +sub import { + strict->import(); + warnings->import(); + Test::NoWarnings->import(); + return; +} + + +1; diff --git a/t/ppi_element.t b/t/ppi_element.t index b926e683..e3cf2427 100644 --- a/t/ppi_element.t +++ b/t/ppi_element.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Element -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 58; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_lexer.t b/t/ppi_lexer.t index 06315e8f..0cc30b82 100644 --- a/t/ppi_lexer.t +++ b/t/ppi_lexer.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Lexer -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 44; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_node.t b/t/ppi_node.t index 71edfa62..b93bef83 100644 --- a/t/ppi_node.t +++ b/t/ppi_node.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Node -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 3; -use Test::NoWarnings; + use PPI; @@ -19,6 +12,7 @@ PRUNE: { # Avoids a bug in old Perls relating to the detection of scripts # Known to occur in ActivePerl 5.6.1 and at least one 5.6.2 install. my $hashbang = reverse 'lrep/nib/rsu/!#'; + my $document = PPI::Document->new( \<<"END_PERL" ); $hashbang diff --git a/t/ppi_normal.t b/t/ppi_normal.t index 0831f4ee..c24ccc6b 100644 --- a/t/ppi_normal.t +++ b/t/ppi_normal.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Normal -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 28; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_statement.t b/t/ppi_statement.t index 076e609c..73e0eeb0 100644 --- a/t/ppi_statement.t +++ b/t/ppi_statement.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Statement -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 23; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_statement_compound.t b/t/ppi_statement_compound.t index ce3490c8..a7b8b0af 100644 --- a/t/ppi_statement_compound.t +++ b/t/ppi_statement_compound.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Statement::Compound -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 53; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_statement_include.t b/t/ppi_statement_include.t index 20abebea..d477da39 100644 --- a/t/ppi_statement_include.t +++ b/t/ppi_statement_include.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Statement::Include -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More tests => 64; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 12066; + use PPI; @@ -45,11 +38,13 @@ use No::Version; use No::Version::With::Argument 'x'; use No::Version::With::Arguments 1, 2; use 5.005; +use VString::Version v10; +use VString::Version::Decimal v1.5; END_PERL isa_ok( $document, 'PPI::Document' ); my $statements = $document->find('PPI::Statement::Include'); - is( scalar @{$statements}, 7, 'Found expected include statements.' ); + is( scalar @{$statements}, 9, 'Found expected include statements.' ); is( $statements->[0]->module_version, 1, 'Integer version' ); is( $statements->[1]->module_version, 1.5, 'Float version' ); is( $statements->[2]->module_version, 1, 'Version and argument' ); @@ -57,6 +52,8 @@ END_PERL is( $statements->[4]->module_version, undef, 'No version, with argument' ); is( $statements->[5]->module_version, undef, 'No version, with arguments' ); is( $statements->[6]->module_version, undef, 'Version include, no module' ); + is( $statements->[7]->module_version, 'v10', 'Version string' ); + is( $statements->[8]->module_version, 'v1.5', 'Version string with decimal' ); } @@ -235,3 +232,50 @@ END_PERL 'arguments with Test::More', ); } + + +KEYWORDS_AS_MODULE_NAMES: { + for my $name ( + # normal names + 'Foo', + 'Foo::Bar', + 'Foo::Bar::Baz', + 'version', + # Keywords must parse as Word and not influence lexing + # of subsequent curly braces. + keys %PPI::Token::Word::KEYWORDS, + # Other weird and/or special words, just in case + '__PACKAGE__', + '__FILE__', + '__LINE__', + '__SUB__', + 'AUTOLOAD', + ) { + for my $include ( 'use', 'no' ) { # 'require' does not force tokes to be words + for my $version ( '', 'v1.2.3', '1.2.3', 'v10' ) { + my $code = "$include $name $version;"; + + my $Document = PPI::Document->new( \"$code 999;" ); + is( $Document->schildren(), 2, "$code number of statements in document" ); + isa_ok( $Document->schild(0), 'PPI::Statement::Include', $code ); + + # first child is the include statement + my $expected_tokens = [ + [ 'PPI::Token::Word', $include ], + [ 'PPI::Token::Word', $name ], + ]; + if ( $version ) { + push @$expected_tokens, [ 'PPI::Token::Number::Version', $version ]; + } + push @$expected_tokens, [ 'PPI::Token::Structure', ';' ]; + my $got_tokens = [ map { [ ref $_, "$_" ] } $Document->schild(0)->schildren() ]; + is_deeply( $got_tokens, $expected_tokens, "$code tokens as expected" ); + + # second child not swallowed up by the first + isa_ok( $Document->schild(1), 'PPI::Statement', "$code prior statement end recognized" ); + isa_ok( $Document->schild(1)->schild(0), 'PPI::Token::Number', $code ); + is( $Document->schild(1)->schild(0), '999', "$code number correct" ); + } + } + } +} diff --git a/t/ppi_statement_package.t b/t/ppi_statement_package.t index 57a3a45e..643d04c3 100644 --- a/t/ppi_statement_package.t +++ b/t/ppi_statement_package.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Statement::Package -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More tests => 3; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 14949; + use PPI; @@ -35,3 +28,111 @@ END_PERL diag $_->parent()->parent()->content() foreach @{$packages}; } } + + +INSIDE_SCOPE: { + # Create a document with various example package statements + my $Document = PPI::Document->new( \<<'END_PERL' ); +package Foo; +SCOPE: { + package # comment + Bar::Baz; + 1; +} +1; +END_PERL + isa_ok( $Document, 'PPI::Document' ); + + # Check that both of the package statements are detected + my $packages = $Document->find('Statement::Package'); + is( scalar(@$packages), 2, 'Found 2 package statements' ); + is( $packages->[0]->namespace, 'Foo', 'Package 1 returns correct namespace' ); + is( $packages->[1]->namespace, 'Bar::Baz', 'Package 2 returns correct namespace' ); + is( $packages->[0]->file_scoped, 1, '->file_scoped returns true for package 1' ); + is( $packages->[1]->file_scoped, '', '->file_scoped returns false for package 2' ); +} + + +PERL_5_12_SYNTAX: { + my @names = ( + # normal name + 'Foo', + # Keywords must parse as Word and not influence lexing + # of subsequent curly braces. + keys %PPI::Token::Word::KEYWORDS, + # regression: misparsed as version string + 'v10', + # regression GitHub #122: 'x' parsed as x operator + 'x64', + # Other weird and/or special words, just in case + '__PACKAGE__', + '__FILE__', + '__LINE__', + '__SUB__', + 'AUTOLOAD', + ); + my @versions = ( + [ 'v1.2.3 ', 'PPI::Token::Number::Version' ], + [ 'v1.2.3', 'PPI::Token::Number::Version' ], + [ '0.50 ', 'PPI::Token::Number::Float' ], + [ '0.50', 'PPI::Token::Number::Float' ], + [ '', '' ], # omit version, traditional + ); + my @blocks = ( + [ ';', 'PPI::Token::Structure' ], # traditional package syntax + [ '{ 1 }', 'PPI::Structure::Block' ], # 5.12 package syntax + ); + $_->[2] = strip_ws_padding( $_->[0] ) for @versions, @blocks; + + for my $name ( @names ) { + for my $version_pair ( @versions ) { + for my $block_pair ( @blocks ) { + my @test = prepare_package_test( $version_pair, $block_pair, $name ); + test_package_blocks( @test ); + } + } + } +} + +sub strip_ws_padding { + my ( $string ) = @_; + $string =~ s/(^\s+|\s+$)//g; + return $string; +} + +sub prepare_package_test { + my ( $version_pair, $block_pair, $name ) = @_; + + my ( $version, $version_type, $version_stripped ) = @{$version_pair}; + my ( $block, $block_type, $block_stripped ) = @{$block_pair}; + + my $code = "package $name $version$block"; + + my $expected_package_tokens = [ + [ 'PPI::Token::Word', 'package' ], + [ 'PPI::Token::Word', $name ], + ($version ne '') ? [ $version_type, $version_stripped ] : (), + [ $block_type, $block_stripped ], + ]; + + return ( $code, $expected_package_tokens ); +} + +sub test_package_blocks { + my ( $code, $expected_package_tokens ) = @_; + + my $Document = PPI::Document->new( \"$code 999;" ); + is( $Document->schildren, 2, "$code number of statements in document" ); + isa_ok( $Document->schild(0), 'PPI::Statement::Package', $code ); + + # first child is the package statement + my $got_tokens = [ map { [ ref $_, "$_" ] } $Document->schild(0)->schildren ]; + is_deeply( $got_tokens, $expected_package_tokens, "$code tokens as expected" ); + + # second child not swallowed up by the first + isa_ok( $Document->schild(1), 'PPI::Statement', "$code prior statement end recognized" ); + isa_ok( $Document->schild(1)->schild(0), 'PPI::Token::Number', $code ); + is( $Document->schild(1)->schild(0), '999', "$code number correct" ); + + return; +} diff --git a/t/ppi_statement_scheduled.t b/t/ppi_statement_scheduled.t index fe0e59ae..463bef51 100644 --- a/t/ppi_statement_scheduled.t +++ b/t/ppi_statement_scheduled.t @@ -2,19 +2,12 @@ # Test PPI::Statement::Scheduled -use strict; - -BEGIN { - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - +use t::lib::PPI::Test::pragmas; use Test::More tests => 241; -use Test::NoWarnings; + use PPI; + SUB_WORD_OPTIONAL: { for my $name ( qw( BEGIN CHECK UNITCHECK INIT END ) ) { for my $sub ( '', 'sub ' ) { diff --git a/t/ppi_statement_sub.t b/t/ppi_statement_sub.t index 0d5e4ce7..2774fe4b 100644 --- a/t/ppi_statement_sub.t +++ b/t/ppi_statement_sub.t @@ -2,19 +2,41 @@ # Test PPI::Statement::Sub -use strict; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 6208; -BEGIN { - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} - -use Test::More tests => 131; -use Test::NoWarnings; use PPI; +NAME: { + for my $test ( + { code => 'sub foo {}', name => 'foo' }, + { code => 'sub foo{}', name => 'foo' }, + { code => 'sub FOO {}', name => 'FOO' }, + { code => 'sub _foo {}', name => '_foo' }, + { code => 'sub _0foo {}', name => '_0foo' }, + { code => 'sub _foo0 {}', name => '_foo0' }, + { code => 'sub ___ {}', name => '___' }, + { code => 'sub bar() {}', name => 'bar' }, + { code => 'sub baz : method{}', name => 'baz' }, + { code => 'sub baz : method lvalue{}', name => 'baz' }, + { code => 'sub baz : method:lvalue{}', name => 'baz' }, + { code => 'sub baz (*) : method : lvalue{}', name => 'baz' }, + { code => 'sub x64 {}', name => 'x64' }, # should not be parsed as x operator + ) { + my $code = $test->{code}; + my $name = $test->{name}; + + my $Document = PPI::Document->new( \$code ); + isa_ok( $Document, 'PPI::Document', "$code: got document" ); + + my ( $sub_statement, $dummy ) = $Document->schildren; + isa_ok( $sub_statement, 'PPI::Statement::Sub', "$code: document child is a sub" ); + is( $dummy, undef, "$code: document has exactly one child" ); + + is( $sub_statement->name, $name, "$code: name() correct" ); + } +} + SUB_WORD_OPTIONAL: { # 'sub' is optional for these special subs. Make sure they're # recognized as subs and sub declarations. @@ -69,6 +91,57 @@ PROTOTYPE: { } } +BLOCK_AND_FORWARD: { + for my $test ( + { code => 'sub foo {1;}', block => '{1;}' }, + { code => 'sub foo{2;};', block => '{2;}' }, + { code => "sub foo\n{3;};", block => '{3;}' }, + { code => 'sub foo;', block => '' }, + { code => 'sub foo', block => '' }, + ) { + my $code = $test->{code}; + my $block = $test->{block}; + + my $Document = PPI::Document->new( \$code ); + isa_ok( $Document, 'PPI::Document', "$code: got document" ); + + my ( $sub_statement, $dummy ) = $Document->schildren(); + isa_ok( $sub_statement, 'PPI::Statement::Sub', "$code: document child is a sub" ); + is( $dummy, undef, "$code: document has exactly one child" ); + is( $sub_statement->block, $block, "$code: block matches" ); + + is( !$sub_statement->block, !!$sub_statement->forward, "$code: block and forward are opposites" ); + } +} + +RESERVED: { + for my $test ( + { code => 'sub BEGIN {}', reserved => 1 }, + { code => 'sub CHECK {}', reserved => 1 }, + { code => 'sub UNITCHECK {}', reserved => 1 }, + { code => 'sub INIT {}', reserved => 1 }, + { code => 'sub END {}', reserved => 1 }, + { code => 'sub AUTOLOAD {}', reserved => 1 }, + { code => 'sub CLONE_SKIP {}', reserved => 1 }, + { code => 'sub __SUB__ {}', reserved => 1 }, + { code => 'sub _FOO {}', reserved => 1 }, + { code => 'sub FOO9 {}', reserved => 1 }, + { code => 'sub FO9O {}', reserved => 1 }, + { code => 'sub FOo {}', reserved => 0 }, + ) { + my $code = $test->{code}; + my $reserved = $test->{reserved}; + + my $Document = PPI::Document->new( \$code ); + isa_ok( $Document, 'PPI::Document', "$code: got document" ); + + my ( $sub_statement, $dummy ) = $Document->schildren(); + isa_ok( $sub_statement, 'PPI::Statement::Sub', "$code: document child is a sub" ); + is( $dummy, undef, "$code: document has exactly one child" ); + is( !!$sub_statement->reserved, !!$reserved, "$code: reserved matches" ); + } +} + sub test_sub_as { my ( $sub, $name, $followed_by ) = @_; @@ -92,3 +165,75 @@ sub test_sub_as { return; } + +KEYWORDS_AS_SUB_NAMES: { + my @names = ( + # normal name + 'foo', + # Keywords must parse as Word and not influence lexing + # of subsequent curly braces. + keys %PPI::Token::Word::KEYWORDS, + # regression: misparsed as version string + 'v10', + # Other weird and/or special words, just in case + '__PACKAGE__', + '__FILE__', + '__LINE__', + '__SUB__', + 'AUTOLOAD', + ); + my @blocks = ( + [ ';', 'PPI::Token::Structure' ], + [ ' ;', 'PPI::Token::Structure' ], + [ '{ 1 }', 'PPI::Structure::Block' ], + [ ' { 1 }', 'PPI::Structure::Block' ], + ); + $_->[2] = strip_ws_padding( $_->[0] ) for @blocks; + + for my $name ( @names ) { + for my $block_pair ( @blocks ) { + my @test = prepare_sub_test( $block_pair, $name ); + test_subs( @test ); + } + } +} + +sub strip_ws_padding { + my ( $string ) = @_; + $string =~ s/(^\s+|\s+$)//g; + return $string; +} + +sub prepare_sub_test { + my ( $block_pair, $name ) = @_; + + my ( $block, $block_type, $block_stripped ) = @{$block_pair}; + + my $code = "sub $name $block"; + + my $expected_sub_tokens = [ + [ 'PPI::Token::Word', 'sub' ], + [ 'PPI::Token::Word', $name ], + [ $block_type, $block_stripped ], + ]; + + return ( $code, $expected_sub_tokens ); +} + +sub test_subs { + my ( $code, $expected_sub_tokens ) = @_; + + my $Document = PPI::Document->new( \"$code 999;" ); + is( $Document->schildren, 2, "$code number of statements in document" ); + isa_ok( $Document->schild(0), 'PPI::Statement::Sub', $code ); + + my $got_tokens = [ map { [ ref $_, "$_" ] } $Document->schild(0)->schildren ]; + is_deeply( $got_tokens, $expected_sub_tokens, "$code tokens as expected" ); + + # second child not swallowed up by the first + isa_ok( $Document->schild(1), 'PPI::Statement', "$code prior statement end recognized" ); + isa_ok( $Document->schild(1)->schild(0), 'PPI::Token::Number', $code ); + is( $Document->schild(1)->schild(0), '999', "$code number correct" ); + + return; +} diff --git a/t/ppi_statement_variable.t b/t/ppi_statement_variable.t index aeb5b539..c5015f6f 100644 --- a/t/ppi_statement_variable.t +++ b/t/ppi_statement_variable.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Statement::Variable -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More 'no_plan'; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 18; + use PPI; diff --git a/t/ppi_token__quoteengine_full.t b/t/ppi_token__quoteengine_full.t index 344458e8..8ec284ea 100644 --- a/t/ppi_token__quoteengine_full.t +++ b/t/ppi_token__quoteengine_full.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::_QuoteEngine::Full -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 94; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_attribute.t b/t/ppi_token_attribute.t new file mode 100644 index 00000000..1a2667e0 --- /dev/null +++ b/t/ppi_token_attribute.t @@ -0,0 +1,139 @@ +#!/usr/bin/perl + +# Unit testing for PPI::Token::Attribute + +use t::lib::PPI::Test::pragmas; +use Test::More tests => 1789; + +use PPI; +use Test::Deep; + +sub execute_test; +sub permute_test; + +PARSING_AND_METHODS: { + # no attribute + execute_test 'sub foo {}', []; + execute_test 'sub foo;', []; + + # perl allows there to be no attributes following the colon. + execute_test 'sub foo:{}', []; + execute_test 'sub foo : {}', []; + + # Attribute with no parameters + permute_test 'foo', [ [ 'Attr1', undef ] ]; + permute_test 'foo', [ [ 'Attr1', undef ] ]; + permute_test 'foo', [ [ 'Attr1', undef ] ]; + permute_test 'method', [ [ 'Attr1', undef ] ]; + permute_test 'lvalue', [ [ 'Attr1', undef ] ]; + permute_test 'foo', [ [ '_', undef ] ]; + + # Attribute with parameters + permute_test 'foo', [ [ 'Attr1', '' ] ]; + permute_test 'foo', [ [ 'Attr1', ' ' ] ]; + permute_test 'foo', [ [ 'Attr1', ' () ' ] ]; + permute_test 'foo', [ [ 'Attr1', ' (()) ' ] ]; + permute_test 'foo', [ [ 'Attr1', ' \) ' ] ]; + permute_test 'foo', [ [ 'Attr1', ' \( ' ] ]; + permute_test 'foo', [ [ 'Attr1', '{' ] ]; + permute_test 'foo', [ [ '_', '' ] ]; + + # Multiple attributes, separated by colon+whitespace + permute_test 'foo', [ [ 'Attr1', undef ], [ 'Attr2', undef ] ]; + permute_test 'foo', [ [ 'Attr1', undef ], [ 'Attr2', undef ] ]; + permute_test 'foo', [ [ 'Attr1', undef ], [ 'Attr2', undef ] ]; + permute_test 'foo', [ [ 'Attr1', undef ], [ 'Attr2', undef ], [ 'Attr3', undef ] ]; + permute_test 'foo', [ [ 'Attr1', '' ], [ 'Attr2', '' ], [ 'Attr3', '' ] ]; + permute_test 'foo', [ [ 'Attr1', '' ], [ 'Attr2', '___' ], [ 'Attr3', '' ] ]; + + # Multiple attributes, separated by whitespace only + permute_test 'foo', [ [ 'Attr1', undef ], [ 'Attr2', undef ] ]; + permute_test 'foo', [ [ 'Attr1', 'a' ], [ 'Attr2', 'b' ] ]; + + # Examples from perldoc attributes + permute_test 'foo', [ [ 'switch', '10,foo(7,3)' ], [ 'expensive', undef ] ]; + permute_test 'foo', [ [ 'Ugly', '\'\\("' ], [ 'Bad', undef ] ]; + permute_test 'foo', [ [ '_5x5', undef ] ]; + permute_test 'foo', [ [ 'lvalue', undef ], [ 'method', undef ] ]; + + # Mixed separators + execute_test 'sub foo : Attr1(a) Attr2(b) : Attr3(c) Attr4(d) {}', [ [ 'Attr1', 'a' ], [ 'Attr2', 'b' ], [ 'Attr3', 'c' ], [ 'Attr4', 'd' ] ]; + + # When PPI supports anonymous subs, we'll need tests for + # attributes on them, too. +} + +sub execute_test { + my ( $code, $expected, $msg ) = @_; + $msg = $code if !defined $msg; + + my $Document = PPI::Document->new( \$code ); + isa_ok( $Document, 'PPI::Document', "$msg got document" ); + + my $attributes = $Document->find( 'PPI::Token::Attribute') || []; + is( scalar(@$attributes), scalar(@$expected), "$msg got expected number of attributes" ); + is_deeply( + [ map { [ $_->identifier, $_->parameters ] } @$attributes ], + $expected, + "$msg attribute properties as expected" + ); + + my $blocks = $Document->find( 'PPI::Structure::Block') || []; + my $blocks_expected = $code =~ m/{}$/ ? [ '{}' ] : []; + is_deeply( + [ map { $_->content } @$blocks ], + $blocks_expected, + "$msg blocks found as expected" + ); + + return; +} + +sub assemble_and_run { + my ( $name, $post_colon, $separator, $attributes, $post_attributes, $block ) = @_; + $block = '{}' if !defined $block; + + my $attribute_str = join $separator, map { defined $_->[1] ? "$_->[0]($_->[1])" : $_->[0] } @$attributes; + my $code = "sub $name :$post_colon$attribute_str$post_attributes$block"; + + my $msg = $code; + $msg =~ s|\x{b}|\\v|g; + $msg =~ s|\t|\\t|g; + $msg =~ s|\r|\\r|g; + $msg =~ s|\n|\\n|g; + $msg =~ s|\f|\\f|g; + + execute_test $code, $attributes, $msg; + + return; +} + +sub permute_test { + my ( $name, $attributes ) = @_; + + # Vertical tab \x{b} is whitespace since perl 5.20, but PPI currently + # (1.220) only supports it as whitespace when running on 5.20 + # or greater. + + assemble_and_run $name, '', ':', $attributes, '', '{}'; + assemble_and_run $name, '', ':', $attributes, '', ';'; + assemble_and_run $name, ' ', ' ', $attributes, ' ', '{}'; + assemble_and_run $name, ' ', "\t", $attributes, ' ', '{}'; + assemble_and_run $name, ' ', "\r", $attributes, ' ', '{}'; + assemble_and_run $name, ' ', "\n", $attributes, ' ', '{}'; + assemble_and_run $name, ' ', "\f", $attributes, ' ', '{}'; + + assemble_and_run $name, "\t", "\t", $attributes, "\t", '{}'; + assemble_and_run $name, "\t", "\t", $attributes, "\t", ';'; + assemble_and_run $name, "\r", "\r", $attributes, "\r", '{}'; + assemble_and_run $name, "\n", "\n", $attributes, "\n", '{}'; + assemble_and_run $name, "\f", "\f", $attributes, "\f", '{}'; + assemble_and_run $name, "\f", "\f", $attributes, "\f", ';'; + + assemble_and_run $name, "\t", "\t:\t", $attributes, "\t", '{}'; + assemble_and_run $name, "\r", "\r:\r", $attributes, "\r", '{}'; + assemble_and_run $name, "\n", "\n:\n", $attributes, "\n", '{}'; + assemble_and_run $name, "\f", "\f:\f", $attributes, "\f", '{}'; + + return; +} diff --git a/t/ppi_token_dashedword.t b/t/ppi_token_dashedword.t index 509e5115..12595ec6 100644 --- a/t/ppi_token_dashedword.t +++ b/t/ppi_token_dashedword.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::DashedWord -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 10; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_heredoc.t b/t/ppi_token_heredoc.t new file mode 100644 index 00000000..0056d5a1 --- /dev/null +++ b/t/ppi_token_heredoc.t @@ -0,0 +1,164 @@ +#!/usr/bin/perl + +# Unit testing for PPI::Token::HereDoc + +use t::lib::PPI::Test::pragmas; +use Test::More tests => 12; + +use PPI; +use Test::Deep; + +# List of tests to perform. Each test requires the following information: +# - 'name': the name of the test in the output. +# - 'content': the Perl string to parse using PPI. +# - 'expected': a hashref with the keys being property names on the +# PPI::Token::HereDoc object, and the values being the expected value of +# that property after the heredoc block has been parsed. +my @tests = ( + + # Tests with a carriage return after the termination marker. + { + name => 'Bareword terminator.', + content => "my \$heredoc = < { + _terminator_line => "HERE\n", + _damaged => undef, + _terminator => 'HERE', + _mode => 'interpolate', + }, + }, + { + name => 'Single-quoted bareword terminator.', + content => "my \$heredoc = <<'HERE';\nLine 1\nLine 2\nHERE\n", + expected => { + _terminator_line => "HERE\n", + _damaged => undef, + _terminator => 'HERE', + _mode => 'literal', + }, + }, + { + name => 'Double-quoted bareword terminator.', + content => "my \$heredoc = <<\"HERE\";\nLine 1\nLine 2\nHERE\n", + expected => { + _terminator_line => "HERE\n", + _damaged => undef, + _terminator => 'HERE', + _mode => 'interpolate', + }, + }, + { + name => 'Command-quoted terminator.', + content => "my \$heredoc = <<`HERE`;\nLine 1\nLine 2\nHERE\n", + expected => { + _terminator_line => "HERE\n", + _damaged => undef, + _terminator => 'HERE', + _mode => 'command', + }, + }, + { + name => 'Legacy escaped bareword terminator.', + content => "my \$heredoc = <<\\HERE;\nLine 1\nLine 2\nHERE\n", + expected => { + _terminator_line => "HERE\n", + _damaged => undef, + _terminator => 'HERE', + _mode => 'literal', + }, + }, + + # Tests without a carriage return after the termination marker. + { + name => 'Bareword terminator (no return).', + content => "my \$heredoc = < { + _terminator_line => 'HERE', + _damaged => 1, + _terminator => 'HERE', + _mode => 'interpolate', + }, + }, + { + name => 'Single-quoted bareword terminator (no return).', + content => "my \$heredoc = <<'HERE';\nLine 1\nLine 2\nHERE", + expected => { + _terminator_line => "HERE", + _damaged => 1, + _terminator => 'HERE', + _mode => 'literal', + }, + }, + { + name => 'Double-quoted bareword terminator (no return).', + content => "my \$heredoc = <<\"HERE\";\nLine 1\nLine 2\nHERE", + expected => { + _terminator_line => 'HERE', + _damaged => 1, + _terminator => 'HERE', + _mode => 'interpolate', + }, + }, + { + name => 'Command-quoted terminator (no return).', + content => "my \$heredoc = <<`HERE`;\nLine 1\nLine 2\nHERE", + expected => { + _terminator_line => 'HERE', + _damaged => 1, + _terminator => 'HERE', + _mode => 'command', + }, + }, + { + name => 'Legacy escaped bareword terminator (no return).', + content => "my \$heredoc = <<\\HERE;\nLine 1\nLine 2\nHERE", + expected => { + _terminator_line => 'HERE', + _damaged => 1, + _terminator => 'HERE', + _mode => 'literal', + }, + }, + + # Tests without a terminator. + { + name => 'Unterminated heredoc block.', + content => "my \$heredoc = < { + _terminator_line => undef, + _damaged => 1, + _terminator => 'HERE', + _mode => 'interpolate', + }, + } + +); + +for my $test ( @tests ) { + subtest( + $test->{name}, + sub { + plan tests => 6 + keys %{ $test->{expected} }; + + my $document = PPI::Document->new( \$test->{content} ); + isa_ok( $document, 'PPI::Document' ); + + my $heredocs = $document->find( 'Token::HereDoc' ); + is( ref $heredocs, 'ARRAY', 'Found heredocs.' ); + is( scalar @$heredocs, 1, 'Found 1 heredoc block.' ); + + my $heredoc = $heredocs->[0]; + isa_ok( $heredoc, 'PPI::Token::HereDoc' ); + can_ok( $heredoc, 'heredoc' ); + + my @content = $heredoc->heredoc; + is_deeply( + \@content, + [ "Line 1\n", "Line 2\n", ], + 'The returned content does not include the heredoc terminator.', + ) or diag "heredoc() returned ", explain \@content; + + is( $heredoc->{$_}, $test->{expected}{$_}, "property '$_'" ) for keys %{ $test->{expected} }; + } + ); +} diff --git a/t/ppi_token_magic.t b/t/ppi_token_magic.t index 56c8643a..9b241bf8 100644 --- a/t/ppi_token_magic.t +++ b/t/ppi_token_magic.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Magic -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 39; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_number_version.t b/t/ppi_token_number_version.t index cda8b47a..b0a9f0cf 100644 --- a/t/ppi_token_number_version.t +++ b/t/ppi_token_number_version.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Number::Version -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More tests => 10; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 736; + use PPI; @@ -29,3 +22,82 @@ LITERAL: { is( length($literal2), 4, 'The literal length of doc1 is 4' ); is( $literal1, $literal2, 'Literals match for 1.2.3.4 vs v1.2.3.4' ); } + + +VSTRING_ENDS_CORRECTLY: { + my @tests = ( + ( + map { + { + desc=>"no . in 'v49$_', so not a version string", + code=>"v49$_", + expected=>[ 'PPI::Token::Word' => "v49$_" ], + } + } ( + 'x3', # not fooled by faux x operator with operand + 'e10', # not fooled by faux scientific notation + keys %PPI::Token::Word::KEYWORDS, + ), + ), + ( + map { + { + desc => "version string in 'v49.49$_' stops after number", + code => "v49.49$_", + expected => [ + 'PPI::Token::Number::Version' => 'v49.49', + get_class($_) => $_, + ], + }, + } ( + keys %PPI::Token::Word::KEYWORDS, + ), + ), + ( + map { + { + desc => "version string in '49.49.49$_' stops after number", + code => "49.49.49$_", + expected => [ + 'PPI::Token::Number::Version' => '49.49.49', + get_class($_) => $_, + ], + }, + } ( + keys %PPI::Token::Word::KEYWORDS, + ), + ), + { + desc => 'version string, x, and operand', + code => 'v49.49.49x3', + expected => [ + 'PPI::Token::Number::Version' => 'v49.49.49', + 'PPI::Token::Operator' => 'x', + 'PPI::Token::Number' => '3', + ], + }, + ); + for my $test ( @tests ) { + my $code = $test->{code}; + + my $d = PPI::Document->new( \$test->{code} ); + my $tokens = $d->find( sub { 1; } ); + $tokens = [ map { ref($_), $_->content() } @$tokens ]; + my $expected = $test->{expected}; + unshift @$expected, 'PPI::Statement', $test->{code}; + my $ok = is_deeply( $tokens, $expected, $test->{desc} ); + if ( !$ok ) { + diag "$test->{code} ($test->{desc})\n"; + diag explain $tokens; + diag explain $test->{expected}; + } + } +} + +sub get_class { + my ( $t ) = @_; + my $ql = $PPI::Token::Word::QUOTELIKE{$t}; + return "PPI::Token::$ql" if $ql; + return 'PPI::Token::Operator' if $PPI::Token::Word::OPERATOR{$t}; + return 'PPI::Token::Word'; +} diff --git a/t/ppi_token_operator.t b/t/ppi_token_operator.t index 86d456bc..13d30838 100644 --- a/t/ppi_token_operator.t +++ b/t/ppi_token_operator.t @@ -2,19 +2,9 @@ # Unit testing for PPI::Token::Operator -use strict; -BEGIN { - $| = 1; - select STDERR; - $| = 1; - select STDOUT; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More tests => 398; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 1142; + use PPI; @@ -33,20 +23,6 @@ FIND_ONE_OP: { } -HEREDOC: { - my $source = '$a = <new( \$source ); - isa_ok( $doc, 'PPI::Document', "parsed '$source'" ); - my $ops = $doc->find( 'Token::HereDoc' ); - is( ref $ops, 'ARRAY', "found heredoc" ); - is( @$ops, 1, "heredoc found exactly once" ); - - $ops = $doc->find( 'Token::Operator' ); - is( ref $ops, 'ARRAY', "operator = found operators in heredoc test" ); - is( @$ops, 1, "operator = found exactly once in heredoc test" ); -} - - PARSE_ALL_OPERATORS: { foreach my $op ( sort keys %PPI::Token::Operator::OPERATOR ) { my $source = $op eq '<>' ? '<>;' : "\$foo $op 2;"; @@ -423,11 +399,22 @@ OPERATOR_X: { 'PPI::Token::Structure' => '}', ] }, + { + desc => 'label plus x', + code => 'LABEL: x64', + expected => [ + 'PPI::Statement::Compound' => 'LABEL:', + 'PPI::Token::Label' => 'LABEL:', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Statement' => 'x64', + 'PPI::Token::Word' => 'x64', + ] + }, ); # Exhaustively test when a preceding operator implies following # 'x' is word not an operator. This detects the regression in - # which '$obj->x86_convert()' was being parsed as an the x + # which '$obj->x86_convert()' was being parsed as an x # operator. my %operators = ( %PPI::Token::Operator::OPERATOR, @@ -478,6 +465,45 @@ OPERATOR_X: { push @tests, { desc => $desc, code => $code, expected => \@expected }; } + + # Test that Perl builtins known to have a null prototype do not + # force a following 'x' to be a word. + my %noprotos = map { $_ => 1 } qw( + endgrent + endhostent + endnetent + endprotoent + endpwent + endservent + fork + getgrent + gethostent + getlogin + getnetent + getppid + getprotoent + getpwent + getservent + setgrent + setpwent + time + times + wait + wantarray + __SUB__ + ); + foreach my $noproto ( keys %noprotos ) { + my $code = "$noproto x3"; + my @expected = ( + 'PPI::Token::Word' => $noproto, + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Operator' => 'x', + 'PPI::Token::Number' => '3', + ); + my $desc = "builtin $noproto does not force following x to be a word"; + push @tests, { desc => "builtin $noproto does not force following x to be a word", code => $code, expected => \@expected }; + } + foreach my $test ( @tests ) { my $d = PPI::Document->new( \$test->{code} ); my $tokens = $d->find( sub { 1; } ); @@ -495,3 +521,95 @@ OPERATOR_X: { } } + +OPERATOR_FAT_COMMA: { + my @tests = ( + { + desc => 'integer with integer', + code => '1 => 2', + expected => [ + 'PPI::Token::Number' => '1', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Number' => '2', + ], + }, + { + desc => 'word with integer', + code => 'foo => 2', + expected => [ + 'PPI::Token::Word' => 'foo', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Number' => '2', + ], + }, + { + desc => 'dashed word with integer', + code => '-foo => 2', + expected => [ + 'PPI::Token::Word' => '-foo', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Whitespace' => ' ', + 'PPI::Token::Number' => '2', + ], + }, + ( map { { + desc=>$_, + code=>"$_=>2", + expected=>[ + 'PPI::Token::Word' => $_, + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Number' => '2', + ] + } } keys %PPI::Token::Word::KEYWORDS ), + ( map { { + desc=>$_, + code=>"($_=>2)", + expected=>[ + 'PPI::Structure::List' => "($_=>2)", + 'PPI::Token::Structure' => '(', + 'PPI::Statement::Expression' => "$_=>2", + 'PPI::Token::Word' => $_, + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Number' => '2', + 'PPI::Token::Structure' => ')', + ] + } } keys %PPI::Token::Word::KEYWORDS ), + ( map { { + desc=>$_, + code=>"{$_=>2}", + expected=>[ + 'PPI::Structure::Constructor' => "{$_=>2}", + 'PPI::Token::Structure' => '{', + 'PPI::Statement::Expression' => "$_=>2", + 'PPI::Token::Word' => $_, + 'PPI::Token::Operator' => '=>', + 'PPI::Token::Number' => '2', + 'PPI::Token::Structure' => '}', + ] + } } keys %PPI::Token::Word::KEYWORDS ), + ); + + for my $test ( @tests ) { + my $code = $test->{code}; + + my $d = PPI::Document->new( \$test->{code} ); + my $tokens = $d->find( sub { 1; } ); + $tokens = [ map { ref($_), $_->content() } @$tokens ]; + my $expected = $test->{expected}; + if ( $expected->[0] !~ /^PPI::Statement/ ) { + unshift @$expected, 'PPI::Statement', $test->{code}; + } + my $ok = is_deeply( $tokens, $expected, $test->{desc} ); + if ( !$ok ) { + diag "$test->{code} ($test->{desc})\n"; + diag explain $tokens; + diag explain $test->{expected}; + } + } +} + diff --git a/t/ppi_token_pod.t b/t/ppi_token_pod.t index e0c24649..96f41110 100644 --- a/t/ppi_token_pod.t +++ b/t/ppi_token_pod.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Pod -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 9; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_prototype.t b/t/ppi_token_prototype.t index a0d4439a..8185482b 100644 --- a/t/ppi_token_prototype.t +++ b/t/ppi_token_prototype.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Prototype -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 801; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quote.t b/t/ppi_token_quote.t index c037fc54..e624d6c9 100644 --- a/t/ppi_token_quote.t +++ b/t/ppi_token_quote.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Quote -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 16; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quote_double.t b/t/ppi_token_quote_double.t index b83e9226..915fc5d0 100644 --- a/t/ppi_token_quote_double.t +++ b/t/ppi_token_quote_double.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Quote::Double -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 20; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quote_interpolate.t b/t/ppi_token_quote_interpolate.t index 4dde28df..094e8930 100644 --- a/t/ppi_token_quote_interpolate.t +++ b/t/ppi_token_quote_interpolate.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Quote::Interpolate -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 9; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quote_literal.t b/t/ppi_token_quote_literal.t index 14b71cf6..e740fbc1 100644 --- a/t/ppi_token_quote_literal.t +++ b/t/ppi_token_quote_literal.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Quote::Literal -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 13; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quote_single.t b/t/ppi_token_quote_single.t index 6b11da53..119f2044 100644 --- a/t/ppi_token_quote_single.t +++ b/t/ppi_token_quote_single.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Quote::Single -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 25; -use Test::NoWarnings; + use PPI; diff --git a/t/ppi_token_quotelike_words.t b/t/ppi_token_quotelike_words.t index d983f092..db0d7faa 100644 --- a/t/ppi_token_quotelike_words.t +++ b/t/ppi_token_quotelike_words.t @@ -2,56 +2,137 @@ # Unit testing for PPI::Token::QuoteLike::Words -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} -use Test::More tests => 13; -use Test::NoWarnings; +use t::lib::PPI::Test::pragmas; +use Test::More tests => 1941; +use Test::Deep; + use PPI; +sub permute_test; +sub assemble_and_run; LITERAL: { - my $empty_list_document = PPI::Document->new(\<<'END_PERL'); -qw// -qw/ / -END_PERL - - isa_ok( $empty_list_document, 'PPI::Document' ); - my $empty_list_tokens = - $empty_list_document->find('PPI::Token::QuoteLike::Words'); - is( scalar @{$empty_list_tokens}, 2, 'Found expected empty word lists.' ); - foreach my $token ( @{$empty_list_tokens} ) { - my @literal = $token->literal; - is( scalar @literal, 0, qq ); - } - - my $non_empty_list_document = PPI::Document->new(\<<'END_PERL'); -qw/foo bar baz/ -qw/ foo bar baz / -qw {foo bar baz} -END_PERL - my @expected = qw/ foo bar baz /; - - isa_ok( $non_empty_list_document, 'PPI::Document' ); - my $non_empty_list_tokens = - $non_empty_list_document->find('PPI::Token::QuoteLike::Words'); - is( - scalar(@$non_empty_list_tokens), - 3, - 'Found expected non-empty word lists.', - ); - foreach my $token ( @$non_empty_list_tokens ) { - my $literal = $token->literal; - is( - $literal, - scalar @expected, - qq, - ); - is_deeply( [ $token->literal ], \@expected, '->literal matches expected' ); - } + # empty + permute_test [], '/', '/', []; + permute_test [], '"', '"', []; + permute_test [], "'", "'", []; + permute_test [], '(', ')', []; + permute_test [], '{', '}', []; + permute_test [], '[', ']', []; + permute_test [], '<', '>', []; + + # words + permute_test ['a', 'b', 'c'], '/', '/', ['a', 'b', 'c']; + permute_test ['a,', 'b', 'c,'], '/', '/', ['a,', 'b', 'c,']; + permute_test ['a', ',', '#', 'c'], '/', '/', ['a', ',', '#', 'c']; + permute_test ['f_oo', 'b_ar'], '/', '/', ['f_oo', 'b_ar']; + + # it's allowed for both delims to be closers + permute_test ['a'], ')', ')', ['a']; + permute_test ['a'], '}', '}', ['a']; + permute_test ['a'], ']', ']', ['a']; + permute_test ['a'], '>', '>', ['a']; + + # containing things that sometimes are delimiters + permute_test ['/'], '(', ')', ['/']; + permute_test ['//'], '(', ')', ['//']; + permute_test ['qw()'], '(', ')', ['qw()']; + permute_test ['qw', '()'], '(', ')', ['qw', '()']; + permute_test ['qw//'], '(', ')', ['qw//']; + + # nested delimiters + permute_test ['()'], '(', ')', ['()']; + permute_test ['{}'], '{', '}', ['{}']; + permute_test ['[]'], '[', ']', ['[]']; + permute_test ['<>'], '<', '>', ['<>']; + permute_test ['((', ')', ')'], '(', ')', ['((', ')', ')']; + permute_test ['{{', '}', '}'], '{', '}', ['{{', '}', '}']; + permute_test ['[[', ']', ']'], '[', ']', ['[[', ']', ']']; + permute_test ['<<', '>', '>'], '<', '>', ['<<', '>', '>']; + + my $bs = '\\'; # a single backslash character + + # escaped opening and closing + permute_test ["$bs)"], '(', ')', [')']; + permute_test ["$bs("], '(', ')', ['(']; + permute_test ["$bs}"], '{', '}', ['}']; + permute_test ["${bs}{"], '{', '}', ['{']; + permute_test ["$bs]"], '[', ']', [']']; + permute_test ["${bs}["], '[', ']', ['[']; + permute_test ["$bs<"], '<', '>', ['<']; + permute_test ["$bs>"], '<', '>', ['>']; + permute_test ["$bs/"], '/', '/', ['/']; + permute_test ["$bs'"], "'", "'", ["'"]; + permute_test [$bs.'"'], '"', '"', ['"']; + + # alphanum delims have to be separated from qw + assemble_and_run " ", ['a', "${bs}1"], '1', " ", " ", '1', ['a', '1']; + assemble_and_run " ", ["${bs}a"], 'a', " ", " ", 'a', ['a']; + assemble_and_run "\n", ["${bs}a"], 'a', "\n", "\n", 'a', ['a']; + + # '#' delims cannot be separated from qw + assemble_and_run '', ['a'], '#', '', ' ', '#', ['a']; + assemble_and_run '', ['a'], '#', ' ', ' ', '#', ['a']; + assemble_and_run '', ["$bs#"], '#', '', ' ', '#', ['#']; + assemble_and_run '', ["$bs#"], '#', ' ', ' ', '#', ['#']; + assemble_and_run '', ["$bs#"], '#', "\n", "\n", '#', ['#']; + + # a single backslash represents itself + assemble_and_run '', [$bs], '(', ' ', ' ', ')', [$bs]; + assemble_and_run '', [$bs], '(', "\n", ' ', ')', [$bs]; + + # a double backslash represents itself + assemble_and_run '', ["$bs$bs"], '(', ' ', ' ', ')', [$bs]; + assemble_and_run '', ["$bs$bs"], '(', "\n", ' ', ')', [$bs]; + + # even backslash can be a delimiter, in when it is, backslashes + # can't be embedded or escaped. + assemble_and_run '', [], $bs, ' ', ' ', $bs, []; + assemble_and_run '', [], $bs, "\n", "\n", $bs, []; + assemble_and_run '', ['a'], $bs, '', ' ', $bs, ['a']; + assemble_and_run ' ', ['a'], $bs, '', ' ', $bs, ['a']; + assemble_and_run "\n", ['a'], $bs, '', ' ', $bs, ['a']; +} + +sub execute_test { + my ( $code, $expected, $msg ) = @_; + + my $d = PPI::Document->new( \$code ); + isa_ok( $d, 'PPI::Document', $msg ); + my $found = $d->find( 'PPI::Token::QuoteLike::Words' ) || []; + is( @$found, 1, "$msg - exactly one qw" ); + is( $found->[0]->content, $code, "$msg content()" ); + is_deeply( [ $found->[0]->literal ], $expected, "$msg literal()" ); + + return; +} + +sub assemble_and_run { + my ( $pre_left_delim, $words_in, $left_delim, $delim_padding, $word_separator, $right_delim, $expected ) = @_; + + my $code = "qw$pre_left_delim$left_delim$delim_padding" . join(' ', @$words_in) . "$delim_padding$right_delim"; + execute_test $code, $expected, $code; + + return; +} + +sub permute_test { + my ( $words_in, $left_delim, $right_delim, $expected ) = @_; + + assemble_and_run "", $words_in, $left_delim, "", " ", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "", "\t", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "", "\n", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "", "\f", $right_delim, $expected; + + assemble_and_run "", $words_in, $left_delim, " ", " ", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "\t", "\t", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "\n", "\n", $right_delim, $expected; + assemble_and_run "", $words_in, $left_delim, "\f", "\f", $right_delim, $expected; + + assemble_and_run " ", $words_in, $left_delim, " ", " ", $right_delim, $expected; + assemble_and_run "\t", $words_in, $left_delim, "\t", "\t", $right_delim, $expected; + assemble_and_run "\n", $words_in, $left_delim, "\n", "\n", $right_delim, $expected; + assemble_and_run "\f", $words_in, $left_delim, "\f", "\f", $right_delim, $expected; + + return; } diff --git a/t/ppi_token_unknown.t b/t/ppi_token_unknown.t new file mode 100644 index 00000000..6f3cd4cb --- /dev/null +++ b/t/ppi_token_unknown.t @@ -0,0 +1,42 @@ +#!/usr/bin/perl + +# Unit testing for PPI::Token::Unknown + +use t::lib::PPI::Test::pragmas; +use Test::More tests => 2; + +use PPI; + + +OPERATOR_MULT_CAST: { + my @tests = ( + { + desc => 'multiply, not cast', + code => '$c{d}*$e', + expected => [ + 'PPI::Statement' => '$c{d}*$e', + 'PPI::Token::Symbol' => '$c', + 'PPI::Structure::Subscript' => '{d}', + 'PPI::Token::Structure' => '{', + 'PPI::Statement::Expression' => 'd', + 'PPI::Token::Word' => 'd', + 'PPI::Token::Structure' => '}', + 'PPI::Token::Operator' => '*', + 'PPI::Token::Symbol' => '$e', + ] + }, + ); + + for my $test ( @tests ) { + my $d = PPI::Document->new( \$test->{code} ); + my $tokens = $d->find( sub { 1 } ); + $tokens = [ map { ref $_, $_->content } @$tokens ]; + my $expected = $test->{expected}; + unshift @$expected, 'PPI::Statement', $test->{code} if $expected->[0] !~ /^PPI::Statement/; + next if is_deeply( $tokens, $expected, $test->{desc} ); + + diag "$test->{code} ($test->{desc})\n"; + diag explain $tokens; + diag explain $test->{expected}; + } +} diff --git a/t/ppi_token_word.t b/t/ppi_token_word.t index 74354265..7f2b91c8 100644 --- a/t/ppi_token_word.t +++ b/t/ppi_token_word.t @@ -2,16 +2,9 @@ # Unit testing for PPI::Token::Word -use strict; -BEGIN { - $| = 1; - $^W = 1; - no warnings 'once'; - $PPI::XS_DISABLE = 1; - $PPI::Lexer::X_TOKENIZER ||= $ENV{X_TOKENIZER}; -} +use t::lib::PPI::Test::pragmas; use Test::More tests => 1756; -use Test::NoWarnings; + use PPI; diff --git a/xt/api.t b/xt/api.t index 2aa31fb1..6a585e43 100644 --- a/xt/api.t +++ b/xt/api.t @@ -2,20 +2,16 @@ # Basic first pass API testing for PPI -use strict; +use t::lib::PPI::Test::pragmas; use Test::More; BEGIN { - $| = 1; - $PPI::XS_DISABLE = 1; - $PPI::XS_DISABLE = 1; # Prevent warning if ( $ENV{RELEASE_TESTING} ) { plan( tests => 2931 ); } else { - plan( skip_all => 'Author tests not required for installation' ); + plan( tests => 2931, skip_all => 'Author tests not required for installation' ); } } -use File::Spec::Functions ':ALL'; -use Test::NoWarnings; + use Test::ClassAPI; use PPI; use PPI::Dumper;