Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Escape square brackets in path #100

Closed
wants to merge 17 commits into from
Closed
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 60 additions & 1 deletion lib/URI.pm
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@ use warnings;

our $VERSION = '5.11';

# 1=version 5.10 and earlier; 0=version 5.11 and later
use constant HAS_RESERVED_SQUARE_BRACKETS => $ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} ? 1 : 0;

our ($ABS_REMOTE_LEADING_DOTS, $ABS_ALLOW_RELATIVE_SCHEME, $DEFAULT_QUERY_FORM_DELIMITER);
our $RESERVED_SQUARE_BRACKETS = $ENV{URI_RESERVED_SQUARE_BRACKETS} || 0; # 1=version 5.10 and earlier; 0=version 5.11 and later
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved

my %implements; # mapping from scheme to implementor class

# Some "official" character classes

our $reserved = q(;/?:@&=+$,[]);
our $reserved = HAS_RESERVED_SQUARE_BRACKETS ? q(;/?:@&=+$,[]) : q(;/?:@&=+$,);
our $mark = q(-_.!~*'()); #'; emacs
our $unreserved = "A-Za-z0-9\Q$mark\E";
our $uric = quotemeta($reserved) . $unreserved . "%";
our $uric4host = $uric . ( HAS_RESERVED_SQUARE_BRACKETS ? '' : quotemeta( q([]) ) );
our $uric4user = quotemeta( q{!$'()*,;:._~%-+=%&} ) . "A-Za-z0-9" . ( HAS_RESERVED_SQUARE_BRACKETS ? quotemeta( q([]) ) : '' ); # RFC-3987: iuserinfo w/o UTF

our $scheme_re = '[a-zA-Z][a-zA-Z0-9.+\-]*';

Expand Down Expand Up @@ -86,10 +92,35 @@ sub _init
}


#-- Version: 5.11+
# Since the complete URI will be percent-encoded including '[' and ']',
# we selectively unescape square brackets from the autority/host part of the URI.
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved
# Derived modules that implement _uric_escape() should take this into account
# if they do not rely on URI::_uric_escape().
# No unescaping is performed for the userinfo@ part of the authority part.
sub _fix_uric_escape_for_host_part {
return if HAS_RESERVED_SQUARE_BRACKETS;
return if $_[0] !~ /%/;

if ($_[0] =~ m,^((?:$URI::scheme_re:)?)//([^/?\#]*)(.*)$,os) {
my $orig = $2;
my ($user, $host) = $orig =~ /^(.*@)?([^@]*)$/;
$user ||= '';
my $port = $host =~ s/(:\d+)$// ? $1 : '';
#TODO: die() here if scheme indicates TCP/UDP and port is out of range [0..65535] ?
#TODO: count substitutions and die() here if count('[') != count(']') > 1 ? (imbalanced or too many)
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved
$host =~ s/\%5B/[/gi;
$host =~ s/\%5D/]/gi;
$_[0] =~ s/\Q$orig\E/$user$host$port/;
}
}


sub _uric_escape
{
my($class, $str) = @_;
$str =~ s*([^$uric\#])* URI::Escape::escape_char($1) *ego;
_fix_uric_escape_for_host_part( $str );
utf8::downgrade($str);
return $str;
}
Expand Down Expand Up @@ -1085,6 +1116,34 @@ examples:
This value can be set to ";" to have the query form C<key=value> pairs
delimited by ";" instead of "&" which is the default.

=back

=head1 ENVIRONMENT VARIABLES

=item URI_HAS_RESERVED_SQUARE_BRACKETS
simbabque marked this conversation as resolved.
Show resolved Hide resolved

Before version 5.11, URI treated square brackets as reserved characters
throughout the whole URI string. However, these brackets are reserved
only within the autority/host part of the URI and nowhere else (RFC 3986).
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved

Starting with version 5.11, URI takes this distinction into account.
Setting the environment variable c<URI_HAS_RESERVED_SQUARE_BRACKETS>
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved
(programmatically or via the shell), restores the old behavior.

Note: This environment variable is just an initialiser and has to be set
before module URI is used/required. Changing it at run time has no effect.
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved

#-- restore 5.10 behavior programmatically
BEGIN {
$ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} = 1;
}
use URI ();
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved


Its value can be checked programmatically by accessing the constant
C<URI::HAS_RESERVED_SQUARE_BRACKETS>.


=back

=head1 BUGS
Expand Down
41 changes: 39 additions & 2 deletions lib/URI/_generic.pm
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,41 @@ use Carp ();

our $VERSION = '5.11';

my $ACHAR = $URI::uric; $ACHAR =~ s,\\[/?],,g;
my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g;
my $ACHAR = URI::HAS_RESERVED_SQUARE_BRACKETS ? $URI::uric : $URI::uric4host; $ACHAR =~ s,\\[/?],,g;
my $PCHAR = $URI::uric; $PCHAR =~ s,\\[?],,g;

sub _no_scheme_ok { 1 }

our $IPv6_re;
{ #-- "borrowed" from SALVAs Regexp::IPv6 - https://metacpan.org/dist/Regexp-IPv6/source/lib/Regexp/IPv6.pm
#TODO: Should be made a dependency of this module.
Perlbotics marked this conversation as resolved.
Show resolved Hide resolved
my $IPv4 = "((25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))";
my $G = "[0-9a-fA-F]{1,4}";

my @tail = ( ":",
"(:($G)?|$IPv4)",
":($IPv4|$G(:$G)?|)",
"(:$IPv4|:$G(:$IPv4|(:$G){0,2})|:)",
"((:$G){0,2}(:$IPv4|(:$G){1,2})|:)",
"((:$G){0,3}(:$IPv4|(:$G){1,2})|:)",
"((:$G){0,4}(:$IPv4|(:$G){1,2})|:)" );

$IPv6_re = $G;
$IPv6_re = "$G:($IPv6_re|$_)" for @tail;
$IPv6_re = qq/:(:$G){0,5}((:$G){1,2}|:$IPv4)|$IPv6_re/;
$IPv6_re =~ s/\(/(?:/g;
$IPv6_re = qr/$IPv6_re/;
}


sub _looks_like_raw_ip6_address {
my $addr = shift;
#TODO: use Regexp::IPv6
return 1 if $addr and $addr =~ /^$IPv6_re$/;
return 0;
}


sub authority
{
my $self = shift;
Expand All @@ -26,6 +56,13 @@ sub authority
my $rest = $3;
if (defined $auth) {
$auth =~ s/([^$ACHAR])/ URI::Escape::escape_char($1)/ego;
if ( my ($user, $host) = $auth =~ /^(.*@)?([^@]+)$/ ) { #-- special escape userinfo part
$user ||= '';
$user =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego;
$user =~ s/%40$/\@/; # recover final '@'
$host = "[$host]" if _looks_like_raw_ip6_address( $host );
$auth = $user . $host;
}
utf8::downgrade($auth);
$$self .= "//$auth";
}
Expand Down
7 changes: 4 additions & 3 deletions lib/URI/_server.pm
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ sub _uric_escape {
}

sub _host_escape {
return unless $_[0] =~ /[^$URI::uric]/;
return if URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric]/;
return if !URI::HAS_RESERVED_SQUARE_BRACKETS and $_[0] !~ /[^$URI::uric4host]/;
eval {
require URI::_idna;
$_[0] = URI::_idna::encode($_[0]);
Expand Down Expand Up @@ -59,8 +60,8 @@ sub userinfo
$new =~ s/.*@//; # remove old stuff
my $ui = shift;
if (defined $ui) {
$ui =~ s/@/%40/g; # protect @
$new = "$ui\@$new";
$ui =~ s/([^$URI::uric4user])/ URI::Escape::escape_char($1)/ego;
$new = "$ui\@$new";
}
$self->authority($new);
}
Expand Down
7 changes: 6 additions & 1 deletion t/old-base.t
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,12 @@ sub escape_test {
is($all, $new, "uri_escape->uri_unescape"),

$url->path($all);
is($url->full_path, q(%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23$%&'()*+,-./0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF), ref($url) . '->as_string');

if ( URI::HAS_RESERVED_SQUARE_BRACKETS ) {
is($url->full_path, q(%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23$%&'()*+,-./0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ[%5C]%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF), ref($url) . '->as_string');
} else {
is($url->full_path, q(%00%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22%23$%&'()*+,-./0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F%80%81%82%83%84%85%86%87%88%89%8A%8B%8C%8D%8E%8F%90%91%92%93%94%95%96%97%98%99%9A%9B%9C%9D%9E%9F%A0%A1%A2%A3%A4%A5%A6%A7%A8%A9%AA%AB%AC%AD%AE%AF%B0%B1%B2%B3%B4%B5%B6%B7%B8%B9%BA%BB%BC%BD%BE%BF%C0%C1%C2%C3%C4%C5%C6%C7%C8%C9%CA%CB%CC%CD%CE%CF%D0%D1%D2%D3%D4%D5%D6%D7%D8%D9%DA%DB%DC%DD%DE%DF%E0%E1%E2%E3%E4%E5%E6%E7%E8%E9%EA%EB%EC%ED%EE%EF%F0%F1%F2%F3%F4%F5%F6%F7%F8%F9%FA%FB%FC%FD%FE%FF), ref($url) . '->as_string');
}

# test escaping uses uppercase (preferred by rfc1837)
$url = new URI::URL 'file://h/';
Expand Down
41 changes: 41 additions & 0 deletions t/sq-brackets-legacy.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
use strict;
use warnings;

use Test::More;

BEGIN {
$ENV{URI_HAS_RESERVED_SQUARE_BRACKETS} = 1;
}

use URI ();

sub show {
diag explain("self: ", shift);
}


#-- test bugfix of https://github.com/libwww-perl/URI/issues/99


no warnings; #-- don't complain about the fragment # being a potential comment
my @legacy_tests = qw(
ftp://[::1]/
http://example.com/path_with_square_[brackets]
http://[::1]/and_[%5Bmixed%5D]_stuff_in_path
https://[::1]/path_with_square_[brackets]_and_query?par=value[1]&par=value[2]
http://[::1]/path_with_square_[brackets]_and_query?par=value[1]#and_fragment[2]
https://root[user]@[::1]/welcome.html
);
use warnings;

is( URI::HAS_RESERVED_SQUARE_BRACKETS, 1, "constant indicates to treat square brackets as reserved characters (legacy)" );

foreach my $same ( @legacy_tests ) {
my $u = URI->new( $same );
is( $u->canonical,
$same,
"legacy: reserved square brackets not escaped"
) or show $u;
}

done_testing;
Loading