Merge pull request #389 from gratipay/refactor-dispatch

Refactor dispatch
AspenWeb · Oct 3, 2014 · 78ec9a3 · 78ec9a3
2 parents 7dba20a + c24da5f
commit 78ec9a3
Show file tree

Hide file tree

Showing 13 changed files with 288 additions and 174 deletions.
diff --git a/aspen/algorithms/website.py b/aspen/algorithms/website.py
@@ -41,6 +41,7 @@
 from aspen.http.response import Response
 from aspen import typecasting
 from first import first as _first
+from aspen.dispatcher import DispatchResult, DispatchStatus
 
 
 def parse_environ_into_request(environ):
@@ -61,20 +62,39 @@ def raise_200_for_OPTIONS(request):
 
 
 def dispatch_request_to_filesystem(website, request):
-    dispatcher.dispatch(website, request)
+
+    if website.list_directories:
+        directory_default = website.ours_or_theirs('autoindex.html.spt')
+        assert directory_default is not None  # sanity check
+    else:
+        directory_default = None
+
+    result = dispatcher.dispatch( indices               = website.indices
+                                , media_type_default    = website.media_type_default
+                                , pathparts             = request.line.uri.path.parts
+                                , uripath               = request.line.uri.path.raw
+                                , querystring           = request.line.uri.querystring.raw
+                                , startdir              = website.www_root
+                                , directory_default     = directory_default
+                                , favicon_default       = website.find_ours('favicon.ico')
+                                 )
+
+    for k, v in result.wildcards.iteritems():
+        request.line.uri.path[k] = v
+    return {'dispatch_result': result}
 
 
 def apply_typecasters_to_path(website, request):
     typecasting.apply_typecasters(website.typecasters, request.line.uri.path)
 
 
-def get_resource_for_request(website, request):
-    return {'resource': resources.get(website, request)}
+def get_resource_for_request(website, request, dispatch_result):
+    return {'resource': resources.get(website, dispatch_result.match)}
 
 
-def get_response_for_resource(request, resource=None):
+def get_response_for_resource(request, dispatch_result, resource=None):
     if resource is not None:
-        return {'response': resource.respond(request)}
+        return {'response': resource.respond(request, dispatch_result)}
 
 
 def get_response_for_exception(website, exception):
@@ -103,17 +123,17 @@ def delegate_error_to_simplate(website, request, response, resource=None):
 
     code = str(response.code)
     possibles = [code + ".spt", "error.spt"]
-    fs = _first(website.ours_or_theirs(errpage) for errpage in possibles)
+    fspath = _first(website.ours_or_theirs(errpage) for errpage in possibles)
 
-    if fs is not None:
-        request.fs = fs
+    if fspath is not None:
         request.original_resource = resource
         if resource is not None:
             # Try to return an error that matches the type of the original resource.
             request.headers['Accept'] = resource.media_type + ', text/plain; q=0.1'
-        resource = resources.get(website, request)
+        resource = resources.get(website, fspath)
+        dispatch_result = DispatchResult(DispatchStatus.okay, fspath, {}, 'Found.', {}, True)
         try:
-            response = resource.respond(request, response)
+            response = resource.respond(request, dispatch_result, response)
         except Response as response:
             if response.code != 406:
                 raise
@@ -130,7 +150,7 @@ def log_traceback_for_exception(website, exception):
     return {'response': response, 'exception': None}
 
 
-def log_result_of_request(website, request=None, response=None):
+def log_result_of_request(website, request=None, dispatch_result=None, response=None):
     """Log access. With our own format (not Apache's).
     """
 
@@ -144,14 +164,14 @@ def log_result_of_request(website, request=None, response=None):
     if request is None:
         msg = "(no request available)"
     else:
-        fs = getattr(request, 'fs', '')
-        if fs.startswith(website.www_root):
-            fs = fs[len(website.www_root):]
-            if fs:
-                fs = '.'+fs
+        fspath = getattr(dispatch_result, 'match', '')
+        if fspath.startswith(website.www_root):
+            fspath = fspath[len(website.www_root):]
+            if fspath:
+                fspath = '.' + fspath
         else:
-            fs = '...' + fs[-21:]
-        msg = "%-24s %s" % (request.line.uri.path.raw, fs)
+            fspath = '...' + fspath[-21:]
+        msg = "%-24s %s" % (request.line.uri.path.raw, fspath)
 
 
     # Where was response raised from?

diff --git a/aspen/dispatcher.py b/aspen/dispatcher.py
@@ -51,13 +51,11 @@ def debug_ext():
     return a, b
 
 
-class DispatchStatus:
+class DispatchStatus(object):
     okay, missing, non_leaf = range(3)
 
 
-DispatchResult = namedtuple( 'DispatchResult'
-                           , 'status match wildcards detail'.split()
-                            )
+DispatchResult = namedtuple('DispatchResult', 'status match wildcards detail extra constrain_path')
 
 
 def dispatch_abstract(listnodes, is_leaf, traverse, find_index, noext_matched,
@@ -100,7 +98,7 @@ def get_wildleaf_fallback():
             ext = lastnode_ext if lastnode_ext in wildleafs else None
             curnode, wildvals = wildleafs[ext]
             debug(lambda: "Wildcard leaf match %r and ext %r" % (curnode, ext))
-            return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+            return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {}, True)
         return None
 
     for depth, node in enumerate(nodepath):
@@ -147,14 +145,22 @@ def get_wildleaf_fallback():
                         curnode = traverse(curnode, found_n)
                         node_name = found_n[1:-4]  # strip leading % and trailing .spt
                         wildvals[node_name] = node
-                        return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+                        return DispatchResult( DispatchStatus.okay
+                                             , curnode
+                                             , wildvals
+                                             , "Found."
+                                             , {}
+                                             , True
+                                              )
             elif node in subnodes and is_leaf_node(node):
                 debug(lambda: "...found exact file, must be static")
                 if is_spt(node):
                     return DispatchResult( DispatchStatus.missing
                                          , None
                                          , None
                                          , "Node %r Not Found" % node
+                                         , {}
+                                         , True
                                           )
                 else:
                     found_n = node
@@ -182,6 +188,8 @@ def get_wildleaf_fallback():
                                          , curnode
                                          , None
                                          , "Tried to access non-leaf node as leaf."
+                                         , {}
+                                         , True
                                           )
                 return result
             elif node in subnodes:
@@ -190,6 +198,8 @@ def get_wildleaf_fallback():
                                      , curnode
                                      , None
                                      , "Tried to access non-leaf node as leaf."
+                                     , {}
+                                     , True
                                       )
             else:
                 debug(lambda: "fallthrough")
@@ -199,6 +209,8 @@ def get_wildleaf_fallback():
                                          , None
                                          , None
                                          , "Node %r Not Found" % node
+                                         , {}
+                                         , True
                                           )
                 return result
 
@@ -223,10 +235,12 @@ def get_wildleaf_fallback():
                                          , None
                                          , None
                                          , "Node %r Not Found" % node
+                                         , {}
+                                         , True
                                           )
                 return result
 
-    return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.")
+    return DispatchResult(DispatchStatus.okay, curnode, wildvals, "Found.", {}, True)
 
 
 def match_index(indices, indir):
@@ -248,35 +262,29 @@ def is_first_index(indices, basedir, name):
     return False
 
 
-def update_neg_type(website, request, filename):
+def update_neg_type(media_type_default, capture_accept, filename):
     media_type = mimetypes.guess_type(filename, strict=False)[0]
     if media_type is None:
-        media_type = website.media_type_default
-    request.headers['X-Aspen-Accept'] = media_type
-    debug(lambda: "set x-aspen-accept to %r" % media_type)
+        media_type = media_type_default
+    capture_accept['accept'] = media_type
+    debug(lambda: "set result.extra['accept'] to %r" % media_type)
 
 
-def dispatch(website, request, pure_dispatch=False):
+def dispatch(indices, media_type_default, pathparts, uripath, querystring, startdir,
+        directory_default, favicon_default):
     """Concretize dispatch_abstract.
-
-    This is all side-effecty on the request object, setting, at the least,
-    request.fs, and at worst other random contents including but not limited
-    to: request.line.uri.path, request.headers.
-
     """
 
-    # Handle URI path parts
-    pathparts = request.line.uri.path.parts
-
     # Set up the real environment for the dispatcher.
     # ===============================================
 
+    capture_accept = {}
     listnodes = os.listdir
     is_leaf = os.path.isfile
     traverse = os.path.join
-    find_index = lambda x: match_index(website.indices, x)
-    noext_matched = lambda x: update_neg_type(website, request, x)
-    startdir = website.www_root
+    find_index = lambda x: match_index(indices, x)
+    noext_matched = lambda x: update_neg_type(media_type_default, capture_accept, x)
+
 
     # Dispatch!
     # =========
@@ -292,71 +300,64 @@ def dispatch(website, request, pure_dispatch=False):
 
     debug(lambda: "dispatch_abstract returned: " + repr(result))
 
+    if 'accept' in capture_accept:
+        result.extra['accept'] = capture_accept['accept']
+
     if result.match:
         debug(lambda: "result.match is true" )
         matchbase, matchname = result.match.rsplit(os.path.sep,1)
-        if pathparts[-1] != '' and matchname in website.indices and \
-                is_first_index(website.indices, matchbase, matchname):
+        if pathparts[-1] != '' and matchname in indices and \
+                is_first_index(indices, matchbase, matchname):
             # asked for something that maps to a default index file; redirect to / per issue #175
             debug( lambda: "found default index '%s' maps into %r"
-                 % (pathparts[-1], website.indices)
+                 % (pathparts[-1], indices)
                   )
-            uri = request.line.uri
-            location = uri.path.raw[:-len(pathparts[-1])]
-            if uri.querystring.raw:
-                location += '?' + uri.querystring.raw
+            location = uripath[:-len(pathparts[-1])]
+            if querystring:
+                location += '?' + querystring
             raise Response(302, headers={'Location': location})
 
-    if not pure_dispatch:
-
-        # favicon.ico
-        # ===========
-        # Serve Aspen's favicon if there's not one.
-
-        if request.line.uri.path.raw == '/favicon.ico':
-            if result.status != DispatchStatus.okay:
-                path = request.line.uri.path.raw[1:]
-                request.fs = website.find_ours(path)
-                return
-
-
-        # robots.txt
-        # ==========
-        # Don't let robots.txt be handled by anything other than an actual
-        # robots.txt file
-
-        if request.line.uri.path.raw == '/robots.txt':
-            if result.status != DispatchStatus.missing:
-                if not result.match.endswith('robots.txt'):
-                    raise Response(404)
-
 
     # Handle returned states.
     # =======================
 
+    if result.status != DispatchStatus.missing:
+        if uripath == '/robots.txt' and not result.match.endswith('robots.txt'):  # robots.txt
+            # Don't let robots.txt be handled by anything other than an actual robots.txt file,
+            # because if you don't have a robots.txt but you do have a wildcard, then you end
+            # up with logspam.
+            raise Response(404)
+
     if result.status == DispatchStatus.okay:
-        if result.match.endswith('/'):              # autoindex
-            if not website.list_directories:
+        if result.match.endswith('/'):
+            if directory_default:                                                 # autoindex
+                result = DispatchResult( result.status
+                                       , directory_default
+                                       , {}
+                                       , 'Directory default.'
+                                       , {'autoindexdir': result.match}
+                                       , False
+                                        )
+            else:
                 raise Response(404)
-            autoindex = website.ours_or_theirs('autoindex.html.spt')
-            assert autoindex is not None # sanity check
-            request.headers['X-Aspen-AutoIndexDir'] = result.match
-            request.fs = autoindex
-            return  # return so we skip the no-escape check
-        else:                                       # normal match
-            request.fs = result.match
-            for k, v in result.wildcards.iteritems():
-                request.line.uri.path[k] = v
-
-    elif result.status == DispatchStatus.non_leaf:  # trailing-slash redirect
-        uri = request.line.uri
-        location = uri.path.raw + '/'
-        if uri.querystring.raw:
-            location += '?' + uri.querystring.raw
+
+    elif result.status == DispatchStatus.non_leaf:                                # trailing slash
+        location = uripath + '/'
+        if querystring:
+            location += '?' + querystring
         raise Response(302, headers={'Location': location})
 
-    elif result.status == DispatchStatus.missing:   # 404
-        raise Response(404)
+    elif result.status == DispatchStatus.missing:                                 # 404, but ...
+        if uripath == '/favicon.ico' and favicon_default:                         # favicon.ico
+            result = DispatchResult( DispatchStatus.okay
+                                   , favicon_default
+                                   , {}
+                                   , 'Favicon default.'
+                                   , {}
+                                   , False
+                                    )
+        else:
+            raise Response(404)
 
     else:
         raise Response(500, "Unknown result status.")
@@ -365,6 +366,7 @@ def dispatch(website, request, pure_dispatch=False):
     # Protect against escaping the www_root.
     # ======================================
 
-    if not request.fs.startswith(startdir):
+    if result.constrain_path and not result.match.startswith(startdir):
         raise Response(404)
 
+    return result