diff --git a/aspen/algorithms/website.py b/aspen/algorithms/website.py index 9622e4d0d..bc5dc827c 100644 --- a/aspen/algorithms/website.py +++ b/aspen/algorithms/website.py @@ -41,6 +41,7 @@ from aspen.http.response import Response from aspen import typecasting from first import first as _first +from aspen.dispatcher import DispatchResult, DispatchStatus def parse_environ_into_request(environ): @@ -61,20 +62,39 @@ def raise_200_for_OPTIONS(request): def dispatch_request_to_filesystem(website, request): - dispatcher.dispatch(website, request) + + if website.list_directories: + directory_default = website.ours_or_theirs('autoindex.html.spt') + assert directory_default is not None # sanity check + else: + directory_default = None + + result = dispatcher.dispatch( indices = website.indices + , media_type_default = website.media_type_default + , pathparts = request.line.uri.path.parts + , uripath = request.line.uri.path.raw + , querystring = request.line.uri.querystring.raw + , startdir = website.www_root + , directory_default = directory_default + , favicon_default = website.find_ours('favicon.ico') + ) + request.fs = result.match + for k, v in result.wildcards.iteritems(): + request.line.uri.path[k] = v + return {'dispatch_result': result} def apply_typecasters_to_path(website, request): typecasting.apply_typecasters(website.typecasters, request.line.uri.path) -def get_resource_for_request(website, request): +def get_resource_for_request(website, request, dispatch_result): return {'resource': resources.get(website, request)} -def get_response_for_resource(request, resource=None): +def get_response_for_resource(request, dispatch_result, resource=None): if resource is not None: - return {'response': resource.respond(request)} + return {'response': resource.respond(request, dispatch_result)} def get_response_for_exception(website, exception): @@ -112,8 +132,9 @@ def delegate_error_to_simplate(website, request, response, resource=None): # Try to return an error that matches the type of the original resource. request.headers['Accept'] = resource.media_type + ', text/plain; q=0.1' resource = resources.get(website, request) + dispatch_result = DispatchResult(DispatchStatus.okay, fs, {}, 'Found.', {}) try: - response = resource.respond(request, response) + response = resource.respond(request, dispatch_result, response) except Response as response: if response.code != 406: raise diff --git a/aspen/dispatcher.py b/aspen/dispatcher.py index e57f89130..2caaf889c 100644 --- a/aspen/dispatcher.py +++ b/aspen/dispatcher.py @@ -13,7 +13,6 @@ import os from aspen import Response -from .backcompat import namedtuple def debug_noop(*args, **kwargs): @@ -51,13 +50,18 @@ def debug_ext(): return a, b -class DispatchStatus: +class DispatchStatus(object): okay, missing, non_leaf = range(3) -DispatchResult = namedtuple( 'DispatchResult' - , 'status match wildcards detail'.split() - ) +class DispatchResult(object): + def __init__(self, status, match, wildcards, detail, extra): + self.status = status + self.match = match + self.wildcards = wildcards + self.detail = detail + self.extra = extra + self.constrain_path = True def dispatch_abstract(listnodes, is_leaf, traverse, find_index, noext_matched, @@ -100,17 +104,22 @@ def get_wildleaf_fallback(): ext = lastnode_ext if lastnode_ext in wildleafs else None curnode, wildvals = wildleafs[ext] debug(lambda: "Wildcard leaf match %r and ext %r" % (curnode, ext)) - return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.") + return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {}) return None for depth, node in enumerate(nodepath): # check all the possibilities: # node.html, node.html.spt, node.spt, node.html/, %node.html/ %*.html.spt, %*.spt - subnodes = set([ n for n in listnodes(curnode) if not n.startswith('.') ]) # don't serve hidden files + + # don't serve hidden files + subnodes = set([ n for n in listnodes(curnode) if not n.startswith('.') ]) + node_noext, node_ext = splitext(node) - maybe_wild_nodes = [ n for n in sorted(subnodes) if n.startswith("%") ] # only maybe because non-spt files aren't wild + # only maybe because non-spt files aren't wild + maybe_wild_nodes = [ n for n in sorted(subnodes) if n.startswith("%") ] + wild_leaf_ns = [ n for n in maybe_wild_nodes if is_leaf_node(n) and is_spt(n) ] wild_nonleaf_ns = [ n for n in maybe_wild_nodes if not is_leaf_node(n) ] @@ -132,7 +141,8 @@ def get_wildleaf_fallback(): if node == '': # dir request debug(lambda: "...last node is empty") path_so_far = traverse(curnode, node) - # return either an index file or have the path end in '/' which means 404 or autoindex as appropriate + # return either an index file or have the path end in '/' which means 404 or + # autoindex as appropriate found_n = find_index(path_so_far) if found_n is None: found_n = "" @@ -141,17 +151,24 @@ def get_wildleaf_fallback(): curnode = traverse(curnode, found_n) node_name = found_n[1:-4] # strip leading % and trailing .spt wildvals[node_name] = node - return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.") + return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {}) elif node in subnodes and is_leaf_node(node): debug(lambda: "...found exact file, must be static") if is_spt(node): - return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node) + return DispatchResult( DispatchStatus.missing + , None + , None + , "Node %r Not Found" % node + , {} + ) else: found_n = node elif node + ".spt" in subnodes and is_leaf_node(node + ".spt"): debug(lambda: "...found exact spt") found_n = node + ".spt" - elif node_noext + ".spt" in subnodes and is_leaf_node(node_noext + ".spt") and node_ext: # node has an extension + elif node_noext + ".spt" in subnodes and is_leaf_node(node_noext + ".spt") \ + and node_ext: + # node has an extension debug(lambda: "...found indirect spt") # indirect match noext_matched(node) @@ -166,16 +183,31 @@ def get_wildleaf_fallback(): curnode = traverse(curnode, found_n) result = get_wildleaf_fallback() if not result: - return DispatchResult(DispatchStatus.non_leaf, curnode, None, "Tried to access non-leaf node as leaf.") + return DispatchResult( DispatchStatus.non_leaf + , curnode + , None + , "Tried to access non-leaf node as leaf." + , {} + ) return result elif node in subnodes: debug(lambda: "exact dirmatch") - return DispatchResult(DispatchStatus.non_leaf, curnode, None, "Tried to access non-leaf node as leaf.") + return DispatchResult( DispatchStatus.non_leaf + , curnode + , None + , "Tried to access non-leaf node as leaf." + , {} + ) else: debug(lambda: "fallthrough") result = get_wildleaf_fallback() if not result: - return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node) + return DispatchResult( DispatchStatus.missing + , None + , None + , "Node %r Not Found" % node + , {} + ) return result if not last_node: # not at last path seg in request @@ -185,7 +217,8 @@ def get_wildleaf_fallback(): debug(lambda: "Exact match " + repr(node)) curnode = traverse(curnode, found_n) elif wild_nonleaf_ns: - # need to match a wildnode, and we're not the last node, so we should match non-leaf first, then leaf + # need to match a wildnode, and we're not the last node, so we should match + # non-leaf first, then leaf found_n = wild_nonleaf_ns[0] wildvals[found_n[1:]] = node debug(lambda: "Wildcard match %r = %r " % (found_n, node)) @@ -194,10 +227,15 @@ def get_wildleaf_fallback(): debug(lambda: "No exact match for " + repr(node)) result = get_wildleaf_fallback() if not result: - return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node) + return DispatchResult( DispatchStatus.missing + , None + , None + , "Node %r Not Found" % node + , {} + ) return result - return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.") + return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {}) def match_index(indices, indir): @@ -219,35 +257,29 @@ def is_first_index(indices, basedir, name): return False -def update_neg_type(website, request, filename): +def update_neg_type(media_type_default, capture_accept, filename): media_type = mimetypes.guess_type(filename, strict=False)[0] if media_type is None: - media_type = website.media_type_default - request.headers['X-Aspen-Accept'] = media_type - debug(lambda: "set x-aspen-accept to %r" % media_type) + media_type = media_type_default + capture_accept['accept'] = media_type + debug(lambda: "set result.extra['accept'] to %r" % media_type) -def dispatch(website, request, pure_dispatch=False): +def dispatch(indices, media_type_default, pathparts, uripath, querystring, startdir, + directory_default, favicon_default): """Concretize dispatch_abstract. - - This is all side-effecty on the request object, setting, at the least, - request.fs, and at worst other random contents including but not limited - to: request.line.uri.path, request.headers. - """ - # Handle URI path parts - pathparts = request.line.uri.path.parts - # Set up the real environment for the dispatcher. # =============================================== + capture_accept = {} listnodes = os.listdir is_leaf = os.path.isfile traverse = os.path.join - find_index = lambda x: match_index(website.indices, x) - noext_matched = lambda x: update_neg_type(website, request, x) - startdir = website.www_root + find_index = lambda x: match_index(indices, x) + noext_matched = lambda x: update_neg_type(media_type_default, capture_accept, x) + # Dispatch! # ========= @@ -263,69 +295,60 @@ def dispatch(website, request, pure_dispatch=False): debug(lambda: "dispatch_abstract returned: " + repr(result)) + if 'accept' in capture_accept: + result.extra['accept'] = capture_accept['accept'] + if result.match: debug(lambda: "result.match is true" ) matchbase, matchname = result.match.rsplit(os.path.sep,1) - if pathparts[-1] != '' and matchname in website.indices and \ - is_first_index(website.indices, matchbase, matchname): + if pathparts[-1] != '' and matchname in indices and \ + is_first_index(indices, matchbase, matchname): # asked for something that maps to a default index file; redirect to / per issue #175 - debug(lambda: "found default index '%s' maps into %r" % (pathparts[-1], website.indices)) - uri = request.line.uri - location = uri.path.raw[:-len(pathparts[-1])] - if uri.querystring.raw: - location += '?' + uri.querystring.raw + debug( lambda: "found default index '%s' maps into %r" + % (pathparts[-1], indices) + ) + location = uripath[:-len(pathparts[-1])] + if querystring: + location += '?' + querystring raise Response(302, headers={'Location': location}) - if not pure_dispatch: - - # favicon.ico - # =========== - # Serve Aspen's favicon if there's not one. - - if request.line.uri.path.raw == '/favicon.ico': - if result.status != DispatchStatus.okay: - path = request.line.uri.path.raw[1:] - request.fs = website.find_ours(path) - return - - - # robots.txt - # ========== - # Don't let robots.txt be handled by anything other than an actual - # robots.txt file - - if request.line.uri.path.raw == '/robots.txt': - if result.status != DispatchStatus.missing: - if not result.match.endswith('robots.txt'): - raise Response(404) - # Handle returned states. # ======================= + if result.status != DispatchStatus.missing: + if uripath == '/robots.txt' and not result.match.endswith('robots.txt'): # robots.txt + # Don't let robots.txt be handled by anything other than an actual robots.txt file, + # because if you don't have a robots.txt but you do have a wildcard, then you end + # up with logspam. + raise Response(404) + if result.status == DispatchStatus.okay: - if result.match.endswith('/'): # autoindex - if not website.list_directories: + if result.match.endswith('/'): + if directory_default: # autoindex + result.extra['autoindexdir'] = result.match # order matters! + result.match = directory_default + result.wildcards = {} + result.detail = 'Directory default.' + result.constrain_path = False + else: raise Response(404) - autoindex = website.ours_or_theirs('autoindex.html.spt') - assert autoindex is not None # sanity check - request.headers['X-Aspen-AutoIndexDir'] = result.match - request.fs = autoindex - return # return so we skip the no-escape check - else: # normal match - request.fs = result.match - for k, v in result.wildcards.iteritems(): - request.line.uri.path[k] = v - - elif result.status == DispatchStatus.non_leaf: # trailing-slash redirect - uri = request.line.uri - location = uri.path.raw + '/' - if uri.querystring.raw: - location += '?' + uri.querystring.raw + + elif result.status == DispatchStatus.non_leaf: # trailing slash + location = uripath + '/' + if querystring: + location += '?' + querystring raise Response(302, headers={'Location': location}) - elif result.status == DispatchStatus.missing: # 404 - raise Response(404) + elif result.status == DispatchStatus.missing: # 404, but ... + if uripath == '/favicon.ico' and favicon_default: # favicon.ico + result.status = DispatchStatus.okay + result.match = favicon_default + result.wildcards = {} + result.detail = 'Favicon default.' + result.constrain_path = False + else: + raise Response(404) else: raise Response(500, "Unknown result status.") @@ -334,6 +357,7 @@ def dispatch(website, request, pure_dispatch=False): # Protect against escaping the www_root. # ====================================== - if not request.fs.startswith(startdir): + if result.constrain_path and not result.match.startswith(startdir): raise Response(404) + return result diff --git a/aspen/resources/dynamic_resource.py b/aspen/resources/dynamic_resource.py index 5dca15552..f60033141 100644 --- a/aspen/resources/dynamic_resource.py +++ b/aspen/resources/dynamic_resource.py @@ -37,7 +37,7 @@ def __init__(self, *a, **kw): self.pages = self.compile_pages(pages) - def respond(self, request, response=None): + def respond(self, request, dispatch_result, response=None): """Given a Request and maybe a Response, return or raise a Response. """ response = response or Response(charset=self.website.charset_dynamic) @@ -46,7 +46,7 @@ def respond(self, request, response=None): # Populate context. # ================= - context = self.populate_context(request, response) + context = self.populate_context(request, dispatch_result, response) # Exec page two. @@ -77,7 +77,7 @@ def respond(self, request, response=None): return response - def populate_context(self, request, response): + def populate_context(self, request, dispatch_result, response): """Factored out to support testing. """ dynamics = { 'body' : lambda: request.body } @@ -104,8 +104,9 @@ def __getitem__(self, key): # don't let the page override these context.update({ 'request' : request, - 'response': response, - 'resource': self + 'dispatch_result': dispatch_result, + 'resource': self, + 'response': response }) return context diff --git a/aspen/resources/negotiated_resource.py b/aspen/resources/negotiated_resource.py index a20a5e983..05bed35d8 100644 --- a/aspen/resources/negotiated_resource.py +++ b/aspen/resources/negotiated_resource.py @@ -73,9 +73,10 @@ def get_response(self, context): """Given a context dict, return a response object. """ request = context['request'] + dispatch_result = context['dispatch_result'] # find an Accept header - accept = request.headers.get('X-Aspen-Accept', None) + accept = dispatch_result.extra.get('accept', None) if accept is not None: # indirect negotiation failure = Response(404) else: # direct negotiation diff --git a/aspen/resources/static_resource.py b/aspen/resources/static_resource.py index f862bf93a..e454bac2d 100644 --- a/aspen/resources/static_resource.py +++ b/aspen/resources/static_resource.py @@ -19,7 +19,7 @@ def __init__(self, *a, **kw): if self.media_type == 'application/json': self.media_type = self.website.media_type_json - def respond(self, request, response=None): + def respond(self, request, dispatch_result, response=None): """Given a Request and maybe a Response, return or raise a Response. """ response = response or Response() diff --git a/aspen/www/autoindex.html.spt b/aspen/www/autoindex.html.spt index 232316a81..2e592d996 100644 --- a/aspen/www/autoindex.html.spt +++ b/aspen/www/autoindex.html.spt @@ -47,13 +47,14 @@ def _get_time(stats): """ return str(datetime.fromtimestamp(stats[stat.ST_MTIME])) +[----------------------------------------] # Get the directory to list. # ========================== # Support the case where we are in the directory where this file actually # lives! -fspath = request.headers.get('X-Aspen-AutoIndexDir', os.path.dirname(__file__)) +fspath = dispatch_result.extra.get('autoindexdir', os.path.dirname(__file__)) assert os.path.isdir(fspath) # sanity check urlpath = fspath[len(website.www_root):] + os.sep diff --git a/tests/test_dispatcher.py b/tests/test_dispatcher.py index 871e55aa1..61f400ee4 100644 --- a/tests/test_dispatcher.py +++ b/tests/test_dispatcher.py @@ -8,7 +8,6 @@ import aspen from aspen import dispatcher, Response -from aspen.http.request import Request # Helpers @@ -44,6 +43,70 @@ def assert_body(harness, uripath, expected_body): [-----] text/html