AspenWeb · chadwhitacre · Oct 3, 2014 · Oct 3, 2014 · Oct 3, 2014 · Oct 3, 2014
diff --git a/aspen/algorithms/website.py b/aspen/algorithms/website.py
@@ -41,6 +41,7 @@
 from aspen.http.response import Response
 from aspen import typecasting
 from first import first as _first
+from aspen.dispatcher import DispatchResult, DispatchStatus
 
 
 def parse_environ_into_request(environ):
@@ -61,20 +62,39 @@ def raise_200_for_OPTIONS(request):
 
 
 def dispatch_request_to_filesystem(website, request):
-    dispatcher.dispatch(website, request)
+
+    if website.list_directories:
+        directory_default = website.ours_or_theirs('autoindex.html.spt')
+        assert directory_default is not None  # sanity check
+    else:
+        directory_default = None
+
+    result = dispatcher.dispatch( indices               = website.indices
+                                , media_type_default    = website.media_type_default
+                                , pathparts             = request.line.uri.path.parts
+                                , uripath               = request.line.uri.path.raw
+                                , querystring           = request.line.uri.querystring.raw
+                                , startdir              = website.www_root
+                                , directory_default     = directory_default
+                                , favicon_default       = website.find_ours('favicon.ico')
+                                 )
+    request.fs = result.match
+    for k, v in result.wildcards.iteritems():
+        request.line.uri.path[k] = v
+    return {'dispatch_result': result}
 
 
 def apply_typecasters_to_path(website, request):
     typecasting.apply_typecasters(website.typecasters, request.line.uri.path)
 
 
-def get_resource_for_request(website, request):
+def get_resource_for_request(website, request, dispatch_result):
     return {'resource': resources.get(website, request)}
 
 
-def get_response_for_resource(request, resource=None):
+def get_response_for_resource(request, dispatch_result, resource=None):
     if resource is not None:
-        return {'response': resource.respond(request)}
+        return {'response': resource.respond(request, dispatch_result)}
 
 
 def get_response_for_exception(website, exception):
@@ -112,8 +132,9 @@ def delegate_error_to_simplate(website, request, response, resource=None):
             # Try to return an error that matches the type of the original resource.
             request.headers['Accept'] = resource.media_type + ', text/plain; q=0.1'
         resource = resources.get(website, request)
+        dispatch_result = DispatchResult(DispatchStatus.okay, fs, {}, 'Found.', {})
         try:
-            response = resource.respond(request, response)
+            response = resource.respond(request, dispatch_result, response)
         except Response as response:
             if response.code != 406:
                 raise

diff --git a/aspen/dispatcher.py b/aspen/dispatcher.py
@@ -13,7 +13,6 @@
 import os
 
 from aspen import Response
-from .backcompat import namedtuple
 
 
 def debug_noop(*args, **kwargs):
@@ -51,13 +50,18 @@ def debug_ext():
     return a, b
 
 
-class DispatchStatus:
+class DispatchStatus(object):
     okay, missing, non_leaf = range(3)
 
 
-DispatchResult = namedtuple( 'DispatchResult'
-                           , 'status match wildcards detail'.split()
-                            )
+class DispatchResult(object):
+    def __init__(self, status, match, wildcards, detail, extra):
+        self.status = status
+        self.match = match
+        self.wildcards = wildcards
+        self.detail = detail
+        self.extra = extra
+        self.constrain_path = True
 
 
 def dispatch_abstract(listnodes, is_leaf, traverse, find_index, noext_matched,
@@ -100,17 +104,22 @@ def get_wildleaf_fallback():
             ext = lastnode_ext if lastnode_ext in wildleafs else None
             curnode, wildvals = wildleafs[ext]
             debug(lambda: "Wildcard leaf match %r and ext %r" % (curnode, ext))
-            return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+            return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {})
         return None
 
     for depth, node in enumerate(nodepath):
 
         # check all the possibilities:
         # node.html, node.html.spt, node.spt, node.html/, %node.html/ %*.html.spt, %*.spt
-        subnodes = set([ n for n in listnodes(curnode) if not n.startswith('.') ])  # don't serve hidden files
+
+        # don't serve hidden files
+        subnodes = set([ n for n in listnodes(curnode) if not n.startswith('.') ])
+
         node_noext, node_ext = splitext(node)
 
-        maybe_wild_nodes = [ n for n in sorted(subnodes) if n.startswith("%") ]  # only maybe because non-spt files aren't wild
+        # only maybe because non-spt files aren't wild
+        maybe_wild_nodes = [ n for n in sorted(subnodes) if n.startswith("%") ]
+
         wild_leaf_ns = [ n for n in maybe_wild_nodes if is_leaf_node(n) and is_spt(n) ]
         wild_nonleaf_ns = [ n for n in maybe_wild_nodes if not is_leaf_node(n) ]
 
@@ -132,7 +141,8 @@ def get_wildleaf_fallback():
             if node == '':  # dir request
                 debug(lambda: "...last node is empty")
                 path_so_far = traverse(curnode, node)
-                # return either an index file or have the path end in '/' which means 404 or autoindex as appropriate
+                # return either an index file or have the path end in '/' which means 404 or
+                # autoindex as appropriate
                 found_n = find_index(path_so_far)
                 if found_n is None:
                     found_n = ""
@@ -141,17 +151,24 @@ def get_wildleaf_fallback():
                         curnode = traverse(curnode, found_n)
                         node_name = found_n[1:-4]  # strip leading % and trailing .spt
                         wildvals[node_name] = node
-                        return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+                        return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {})
             elif node in subnodes and is_leaf_node(node):
                 debug(lambda: "...found exact file, must be static")
                 if is_spt(node):
-                    return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node)
+                    return DispatchResult( DispatchStatus.missing
+                                         , None
+                                         , None
+                                         , "Node %r Not Found" % node
+                                         , {}
+                                          )
                 else:
                     found_n = node
             elif node + ".spt" in subnodes and is_leaf_node(node + ".spt"):
                 debug(lambda: "...found exact spt")
                 found_n = node + ".spt"
-            elif node_noext + ".spt" in subnodes and is_leaf_node(node_noext + ".spt") and node_ext:  # node has an extension
+            elif node_noext + ".spt" in subnodes and is_leaf_node(node_noext + ".spt") \
+                    and node_ext:
+                # node has an extension
                 debug(lambda: "...found indirect spt")
                 # indirect match
                 noext_matched(node)
@@ -166,16 +183,31 @@ def get_wildleaf_fallback():
                 curnode = traverse(curnode, found_n)
                 result = get_wildleaf_fallback()
                 if not result:
-                    return DispatchResult(DispatchStatus.non_leaf, curnode, None, "Tried to access non-leaf node as leaf.")
+                    return DispatchResult( DispatchStatus.non_leaf
+                                         , curnode
+                                         , None
+                                         , "Tried to access non-leaf node as leaf."
+                                         , {}
+                                          )
                 return result
             elif node in subnodes:
                 debug(lambda: "exact dirmatch")
-                return DispatchResult(DispatchStatus.non_leaf, curnode, None, "Tried to access non-leaf node as leaf.")
+                return DispatchResult( DispatchStatus.non_leaf
+                                     , curnode
+                                     , None
+                                     , "Tried to access non-leaf node as leaf."
+                                     , {}
+                                      )
             else:
                 debug(lambda: "fallthrough")
                 result = get_wildleaf_fallback()
                 if not result:
-                    return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node)
+                    return DispatchResult( DispatchStatus.missing
+                                         , None
+                                         , None
+                                         , "Node %r Not Found" % node
+                                         , {}
+                                          )
                 return result
 
         if not last_node:  # not at last path seg in request
@@ -185,7 +217,8 @@ def get_wildleaf_fallback():
                 debug(lambda: "Exact match " + repr(node))
                 curnode = traverse(curnode, found_n)
             elif wild_nonleaf_ns:
-                # need to match a wildnode, and we're not the last node, so we should match non-leaf first, then leaf
+                # need to match a wildnode, and we're not the last node, so we should match
+                # non-leaf first, then leaf
                 found_n = wild_nonleaf_ns[0]
                 wildvals[found_n[1:]] = node
                 debug(lambda: "Wildcard match %r = %r " % (found_n, node))
@@ -194,10 +227,15 @@ def get_wildleaf_fallback():
                 debug(lambda: "No exact match for " + repr(node))
                 result = get_wildleaf_fallback()
                 if not result:
-                    return DispatchResult(DispatchStatus.missing, None, None, "Node %r Not Found" % node)
+                    return DispatchResult( DispatchStatus.missing
+                                         , None
+                                         , None
+                                         , "Node %r Not Found" % node
+                                         , {}
+                                          )
                 return result
 
-    return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+    return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {})
 
 
 def match_index(indices, indir):
@@ -219,35 +257,29 @@ def is_first_index(indices, basedir, name):
     return False
 
 
-def update_neg_type(website, request, filename):
+def update_neg_type(media_type_default, capture_accept, filename):
     media_type = mimetypes.guess_type(filename, strict=False)[0]
     if media_type is None:
-        media_type = website.media_type_default
-    request.headers['X-Aspen-Accept'] = media_type
-    debug(lambda: "set x-aspen-accept to %r" % media_type)
+        media_type = media_type_default
+    capture_accept['accept'] = media_type
+    debug(lambda: "set result.extra['accept'] to %r" % media_type)
 
 
-def dispatch(website, request, pure_dispatch=False):
+def dispatch(indices, media_type_default, pathparts, uripath, querystring, startdir,
+        directory_default, favicon_default):
     """Concretize dispatch_abstract.
-
-    This is all side-effecty on the request object, setting, at the least,
-    request.fs, and at worst other random contents including but not limited
-    to: request.line.uri.path, request.headers.
-
     """
 
-    # Handle URI path parts
-    pathparts = request.line.uri.path.parts
-
     # Set up the real environment for the dispatcher.
     # ===============================================
 
+    capture_accept = {}
     listnodes = os.listdir
     is_leaf = os.path.isfile
     traverse = os.path.join
-    find_index = lambda x: match_index(website.indices, x)
-    noext_matched = lambda x: update_neg_type(website, request, x)
-    startdir = website.www_root
+    find_index = lambda x: match_index(indices, x)
+    noext_matched = lambda x: update_neg_type(media_type_default, capture_accept, x)
+
 
     # Dispatch!
     # =========
@@ -263,69 +295,60 @@ def dispatch(website, request, pure_dispatch=False):
 
     debug(lambda: "dispatch_abstract returned: " + repr(result))
 
+    if 'accept' in capture_accept:
+        result.extra['accept'] = capture_accept['accept']
+
     if result.match:
         debug(lambda: "result.match is true" )
         matchbase, matchname = result.match.rsplit(os.path.sep,1)
-        if pathparts[-1] != '' and matchname in website.indices and \
-                is_first_index(website.indices, matchbase, matchname):
+        if pathparts[-1] != '' and matchname in indices and \
+                is_first_index(indices, matchbase, matchname):
             # asked for something that maps to a default index file; redirect to / per issue #175
-            debug(lambda: "found default index '%s' maps into %r" % (pathparts[-1], website.indices))
-            uri = request.line.uri
-            location = uri.path.raw[:-len(pathparts[-1])]
-            if uri.querystring.raw:
-                location += '?' + uri.querystring.raw
+            debug( lambda: "found default index '%s' maps into %r"
+                 % (pathparts[-1], indices)
+                  )
+            location = uripath[:-len(pathparts[-1])]
+            if querystring:
+                location += '?' + querystring
             raise Response(302, headers={'Location': location})
 
-    if not pure_dispatch:
-
-        # favicon.ico
-        # ===========
-        # Serve Aspen's favicon if there's not one.
-
-        if request.line.uri.path.raw == '/favicon.ico':
-            if result.status != DispatchStatus.okay:
-                path = request.line.uri.path.raw[1:]
-                request.fs = website.find_ours(path)
-                return
-
-
-        # robots.txt
-        # ==========
-        # Don't let robots.txt be handled by anything other than an actual
-        # robots.txt file
-
-        if request.line.uri.path.raw == '/robots.txt':
-            if result.status != DispatchStatus.missing:
-                if not result.match.endswith('robots.txt'):
-                    raise Response(404)
-
 
     # Handle returned states.
     # =======================
 
+    if result.status != DispatchStatus.missing:
+        if uripath == '/robots.txt' and not result.match.endswith('robots.txt'):  # robots.txt
+            # Don't let robots.txt be handled by anything other than an actual robots.txt file,
+            # because if you don't have a robots.txt but you do have a wildcard, then you end
+            # up with logspam.
+            raise Response(404)
+
     if result.status == DispatchStatus.okay:
-        if result.match.endswith('/'):              # autoindex
-            if not website.list_directories:
+        if result.match.endswith('/'):
+            if directory_default:                                                 # autoindex
+                result.extra['autoindexdir'] = result.match  # order matters!
+                result.match = directory_default
+                result.wildcards = {}
+                result.detail = 'Directory default.'
+                result.constrain_path = False
+            else:
                 raise Response(404)
-            autoindex = website.ours_or_theirs('autoindex.html.spt')
-            assert autoindex is not None # sanity check
-            request.headers['X-Aspen-AutoIndexDir'] = result.match
-            request.fs = autoindex
-            return  # return so we skip the no-escape check
-        else:                                       # normal match
-            request.fs = result.match
-            for k, v in result.wildcards.iteritems():
-                request.line.uri.path[k] = v
-
-    elif result.status == DispatchStatus.non_leaf:  # trailing-slash redirect
-        uri = request.line.uri
-        location = uri.path.raw + '/'
-        if uri.querystring.raw:
-            location += '?' + uri.querystring.raw
+
+    elif result.status == DispatchStatus.non_leaf:                                # trailing slash
+        location = uripath + '/'
+        if querystring:
+            location += '?' + querystring
         raise Response(302, headers={'Location': location})
 
-    elif result.status == DispatchStatus.missing:   # 404
-        raise Response(404)
+    elif result.status == DispatchStatus.missing:                                 # 404, but ...
+        if uripath == '/favicon.ico' and favicon_default:                         # favicon.ico
+            result.status = DispatchStatus.okay
+            result.match = favicon_default
+            result.wildcards = {}
+            result.detail = 'Favicon default.'
+            result.constrain_path = False
+        else:
+            raise Response(404)
 
     else:
         raise Response(500, "Unknown result status.")
@@ -334,6 +357,7 @@ def dispatch(website, request, pure_dispatch=False):
     # Protect against escaping the www_root.
     # ======================================
 
-    if not request.fs.startswith(startdir):
+    if result.constrain_path and not result.match.startswith(startdir):
         raise Response(404)
 
+    return result