-
Notifications
You must be signed in to change notification settings - Fork 14
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CAF::Download class: API proposal #110
base: master
Are you sure you want to change the base?
Changes from all commits
4e9a9c9
88d2920
b49b666
6a53db1
6f666e9
76d37b6
3a25691
71656f4
f78fe60
b001e2c
b72a514
4d3e18d
c79c0f3
eca2713
eebf13b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
declaration template quattor/types/download; | ||
|
||
include 'pan/types'; | ||
|
||
@documentation{ | ||
A string that represents a URL that can be handled by CAF::Download | ||
Format: [auth+][method+]protocol://location | ||
Protocols: http|file | ||
Methods: lwp|curl transport | ||
Auth: kinit|gssapi|x509 authentication | ||
Location: anything | ||
} | ||
type caf_url_string = string with { | ||
mp_l = split('://', SELF); | ||
|
||
if (length(mp_l) != 2) { | ||
error('invalid URL string requires ://, got' + SELF); | ||
return(false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. redundant - error() already exits |
||
}; | ||
|
||
if(! match(mp_l[0], '^((kinit|gssapi|x509)\+)?((lwp|curl)\+)?(https?|file)$')) { | ||
error('invalid method+protocol for ' + mp_l[0]); | ||
return(false); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ditto |
||
}; | ||
|
||
a_m_p = split('\+', mp_l[0]); | ||
protocol = a_m_p[length(a_m_p)-1]; | ||
|
||
if (protocol == 'file' && (!match(mp_l[1], '^/'))) { | ||
error("location for file protocol has to start with /, got " + mp_l[1]); | ||
return(false); | ||
}; | ||
|
||
true; | ||
}; | ||
|
||
# similar to kerberos_principal_string in ncm-ccm | ||
# http://web.mit.edu/kerberos/krb5-1.4/krb5-1.4.3/doc/krb5-user/Kerberos-Glossary.html | ||
type kerberos_primary = string with match(SELF, '^\w+$'); | ||
type kerberos_realm = string with match(SELF, '^[A-Z][A-Z.-_]*$'); | ||
type kerberos_instance = string with match(SELF, '^\w[\w.-]*$'); | ||
|
||
# TODO: What if you want to use the defaults for all 4 settings? | ||
@documentation{ | ||
CAF::Download kerberos configuration | ||
} | ||
type caf_url_krb5 = { | ||
'keytab' ? string | ||
'primary' ? kerberos_primary | ||
'realm' ? kerberos_realm | ||
'instances' ? kerberos_instance[] | ||
} with { | ||
if(exists(SELF['instances']) && ! exists(SELF['primary'])) { | ||
error("Cannot have krb5 instance(s) without primary"); | ||
}; | ||
true; | ||
}; | ||
|
||
@documentation{ | ||
CAF::Download X509 configuration | ||
} | ||
type caf_url_x509 = { | ||
'cacert' ? string | ||
'capath' ? string | ||
'cert' ? string | ||
'key' ? string | ||
} with { | ||
if(exists(SELF['cacert']) && exists(SELF['capath'])) { | ||
error('Both X509 cacert and capath defined, cannot have both'); | ||
}; | ||
true; | ||
}; | ||
|
||
@documentation{ | ||
CAF::Download proxy configuration | ||
} | ||
type caf_url_proxy = { | ||
'server' : type_hostname | ||
'port' ? type_port | ||
'reverse' ? boolean # reverse proxy (default is false, i.e. forward) | ||
}; | ||
|
||
@documentation{ | ||
CAF::Download supported authentication: one of gssapi, kinit or lwp | ||
} | ||
type caf_url_auth = string with match(SELF, '^(gssapi|kinit|lwp)$'); | ||
|
||
@documentation{ | ||
CAF::Download supported download method: one of lwp or curl | ||
} | ||
type caf_url_method = string with match(SELF, '^(lwp|curl)$'); | ||
|
||
@documentation{ | ||
A URL that can be handled by CAF::Download | ||
} | ||
type caf_url = { | ||
'auth' ? caf_url_auth[] | ||
'method' ? caf_url_method[] | ||
'proto' : string with match(SELF, '^(file|https?)$') | ||
'server' ? type_hostname | ||
'filename' : string | ||
|
||
#'version' ? string | ||
|
||
'timeout' : long(0..) = 600 # download timeout in seconds (600s * 1kB/s BW = 600kB document) | ||
'head_timeout' ? long(0..) # timeout in seconds for initial request which checks for changes/existence | ||
|
||
'retries' : long(0..) = 3 # number retries | ||
'retry_wait' : long(0..) = 30 # number of seconds to wait before a retry | ||
|
||
'krb5' ? caf_url_krb5 | ||
'x509' ? caf_url_x509 | ||
'proxy' ? caf_url_proxy | ||
} with { | ||
# server is simply ignored with file protocol | ||
if ((SELF['proto'] != 'file') && (!(exists(SELF['server'])))) { | ||
error("caf url: cannot set server with file protocol"); | ||
}; | ||
if(exists(SELF['auth'])) { | ||
foreach(idx; auth; SELF['auth']) { | ||
if((auth == 'krb5' || auth == 'gssapi') && | ||
! exists(SELF['krb5'])) { | ||
error(format('Cannot set auth %s without setting krb5', auth)); | ||
}; | ||
if((auth == 'lwp') && ! exists(SELF['x509'])) { | ||
error('Cannot set auth lwp without setting x509'); | ||
}; | ||
}; | ||
}; | ||
|
||
true; | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
# ${license-info} | ||
# ${developer-info} | ||
# ${author-info} | ||
# ${build-info} | ||
|
||
package CAF::Download; | ||
|
||
use strict; | ||
use warnings; | ||
|
||
# For re-export only | ||
use CAF::Download::URL qw(set_url_defaults); | ||
use parent qw(CAF::ObjectText Exporter CAF::Download::URL CAF::Download::Retrieve); | ||
|
||
use Readonly; | ||
use LC::Exception qw (SUCCESS); | ||
|
||
our @EXPORT_OK = qw(set_url_defaults); | ||
|
||
# TODO: dependencies on curl and kinit | ||
|
||
Readonly::Hash my %DOWNLOAD_METHODS => { | ||
http => [qw(lwp curl)], # try https, if not, try http | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry, I don't follow the comment, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for a brief moment i tought about allowing the equivalent of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not obvious how that is related to lwp & curl. Falling back to an insecure method by default is bad but having it configurable is good. |
||
https => [qw(lwp curl)], # only https | ||
file => [qw(lwp)], | ||
}; | ||
|
||
Readonly::Array my @DOWNLOAD_PROTOCOLS => sort keys %DOWNLOAD_METHODS; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is that sorted? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because it's an array, and for reproducability it should be sorted. but the sorting has no meaning, and it seems the current code doesn't use this anymore. |
||
|
||
# TODO: can we mix and match x509/krb5 also for security like TLS? | ||
# The GSSAPI doesn't require TLS, it has encryption | ||
# gssapi here means use the perl GSSAPI bindings to generate the tokens etc | ||
# kinit means to use commandline tools like kinit/kdestroy | ||
# x509/lwp means have LWP handle X509 (TLS + X509 auth) | ||
# TODO: does kinit imply GSSAPI usage? | ||
Readonly::Hash my %DOWNLOAD_AUTHENTICATION => { | ||
krb5 => [qw(gssapi kinit)], | ||
x509 => [qw(lwp)], | ||
}; | ||
|
||
# Disclaimer: inspired by | ||
# NCM::Component::download (15.8) | ||
# EDG::WP4::CCM::Fetch (15.8) | ||
# File::Fetch (0.48) | ||
|
||
=pod | ||
|
||
=head1 NAME | ||
|
||
CAF::Download - Class for downloading content from remote servers. | ||
|
||
=head1 SYNOPSIS | ||
|
||
use CAF::Download; | ||
|
||
my $dl = CAF::Download->new(['https://somewhere/myfile']); | ||
print "$dl"; # stringification | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some detail on how the user/caller should handle errors would be helpful. e.g. how do I get a meaningful string to put in my call to $self->error() if I am supposed to do that myself? Don't other CAF::* methods do that on my behalf? (Sorry if I am a bit confused: the underlying classes look OK but I don't quite yet see a connection from here to them.) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i'll address error handling in the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No idea, sorry. I have never taken a close look at the LC exception stuff and how it should be used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can we move this discussion to #120 ? |
||
|
||
$dl = CAF::TextRender->new(['https://somewhere/else']); | ||
# return CAF::FileWriter instance (downloaded text already added) | ||
my $fh = $dl->filewriter('/some/path'); | ||
die "Problem downloading the data" if (!defined($fh)); | ||
$fh->close(); | ||
|
||
=head1 DESCRIPTION | ||
|
||
This class simplyfies the downloading of content located on remote servers. | ||
It handles things like authentication, decryption, creating the actual file, ... | ||
|
||
=head2 Methods | ||
|
||
=over | ||
|
||
=item C<_initialize> | ||
|
||
Initialize the download object. Arguments: | ||
|
||
=over | ||
|
||
=item urls | ||
|
||
A array reference of urls. Urls will be tried in order, first successful | ||
one is used. Providing more than one url can thus be used for failover. | ||
|
||
See C<prepare_urls> method for a description off the C<urls>. | ||
|
||
=back | ||
|
||
It takes some extra optional arguments: | ||
|
||
=over | ||
|
||
=item C<log> | ||
|
||
A C<CAF::Reporter> object to log to. | ||
|
||
=item C<setup> | ||
|
||
Boolean to run the setup (or not). Default/undef is to run setup. | ||
|
||
=item C<cleanup> | ||
|
||
Boolean to run the cleanup (or not). Default/undef is to run cleanup. | ||
|
||
=item destination | ||
|
||
The destination of the download, e.g. a filename. This is in particular required | ||
for download methods that can write to file themself, like C<curl>. | ||
|
||
=back | ||
|
||
=cut | ||
|
||
sub _initialize | ||
{ | ||
my ($self, $urls, %opts) = @_; | ||
|
||
$self->{urls} = $self->parse_urls($urls); | ||
|
||
%opts = () if !%opts; | ||
|
||
$self->_initialize_textopts(%opts); | ||
|
||
$self->{setup} = (! defined($opts{setup}) || $opts{setup}) ? 1 : 0; | ||
$self->{cleanup} = (! defined($opts{cleanup}) || $opts{cleanup}) ? 1 : 0; | ||
$self->debug(1, "setup $self->{setup} cleanup $self->{cleanup}"); | ||
|
||
if ($opts{destination}) { | ||
$self->{destination} = $self->prepare_destination($opts{destination}); | ||
$self->debug(1, "download destination set to " . ($self->{destination} || '<UNDEF>')); | ||
} | ||
|
||
return SUCCESS; | ||
} | ||
|
||
=pod | ||
|
||
=back | ||
|
||
=cut | ||
|
||
1; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https as well?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well, yes and no. i'm confused myself about the naming with all the available combinations and tools.
there are a number of "scenarios" i want to support, but not sure splitting them up in protocol/method/auth makes sense
kinit+file
, e.g. reading from afs (although i'm not sure if that isn't authorisation instead of authentication))HTTP
GET
etc etc (could require authentication, like 'Basic auth', but is pretty non-safe, so there's no authentication calledpasswd
or something).kinit+http
(orgssapi+http
) and detect theMS
scenario via the same trick that CCM uses (thekrbencrypt
encoding https://github.com/quattor/CCM/blob/master/src/main/perl/Fetch/Download.pm#L96; i guesskrbencrypt
is something MS made up or is it a standardised name?)x509+http
is basically https (https without a valid client certificate is just not smart)kinit+http
orgssapi+http
, so a bit confusing with MS scenario, so i might call thiskinit+httpn
(HTTP Negotiate
is a term used by microsoft for this, but they also support ntlm as authentication).kinit+ipavault
or other custom tools used to manage sensitive datathe first 3 are currently supported by
ccm-fetch
andncm-download
(afai understand the code). the 4th one is to address quattor/CCM#54, the 5th one is there so we don't have to try to setup our own secure datastore and be able to use existing tools.the difference between
kinit
andgssapi
mainly lies in the way the token is generated/managed:kinit/kdestroy
binary or perlGSSAPI
, but they both mean 'use kerberos`.