Merge pull request #30 from yougov/reoptimize

Revisit optimizations
chadwhitacre · May 11, 2016 · fa7b43d · fa7b43d
2 parents 0f59e96 + dba03e4
commit fa7b43d
Show file tree

Hide file tree

Showing 5 changed files with 11 additions and 102 deletions.
diff --git a/mongs.py b/mongs.py
@@ -144,3 +144,8 @@ def dt2age(dt):
     fmt = 'in {age}' if dt > now else '{age} ago'
     return fmt.format(age=age)
 
+def has_documents(coll):
+    """
+    Return a boolean for the presence of documents in the collection.
+    """
+    return bool(coll.count())
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
 aspen==0.42
 aspen-jinja2==0.4
-pymongo==3.2
+pymongo==3.2.2
 six
diff --git a/www/%server/%database/%collection/%filter/index.html.spt b/www/%server/%database/%collection/%filter/index.html.spt
@@ -57,25 +57,6 @@ class Pair:
         self.link = link
 
 
-class NamespacePair(Pair):
-    """Represent the single pair of values for a system.namespaces entry.
-    """
-    def __init__(self, base, _id, k, v, database):
-        parts = v.split('.$')
-        if len(parts) == 1:
-            parts += ['']
-        namepart, indexpart = parts
-        if indexpart:
-            indexpart = ".$" + indexpart
-        collection = namepart[len(database)+1:]
-        value = '%s.<a href="../../../%s/">%s</a>%s'
-        value %= (database, collection, collection, indexpart)
-
-        self.k = k
-        self.v = value
-        self.link = False
-
-
 [----]
 
 # Parse input
@@ -90,15 +71,6 @@ single = page is None
 filtered = single
 
 
-# Pair class.
-# ===========
-# If we are on system.namespaces we want to link to collections.
-
-if collection == 'system.namespaces':
-    # will insert hyperlinks to the collection
-    Pair = functools.partial(NamespacePair, database=database)
-
-
 # Compute base, filter, and page.
 # ===============================
 # This simplate is symlinked to be called in two contexts:
@@ -128,15 +100,6 @@ if not single:      # /server/database/collection/filter/page/
         # unfiltered collection, which appears to be O(1) instead of O(N).
         optimize_count = True
 
-elif collection == 'system.namespaces':
-                    # /server/database/system.namespaces/collection/
-
-    # Special case for system.namespaces. The _id is actually the
-    # fully-qualified name of a collection or index. Send them to the
-    # collection!
-    collection = filter.split('$')[0][len(database)+1:]
-    raise Response(301, headers=[('Location', '../../%s/' % collection)])
-
 else:               # /server/database/collection/_id/
     # Convert a request for a specific _id into a filter with one page.
     page = 1
@@ -148,28 +111,14 @@ else:               # /server/database/collection/_id/
         pass
     filter = {"_id": _id}
 
-if collection == 'system.namespaces':
-    # special case for MongoDB's collection of collections; docs have no _id
-    pass
-else:
-    if '_id' not in filter:
-        # I'm sorry, I forget why I did this. :^(
-        filter.update({'_id': {'$exists': True}})
-
 
 # Sort.
 # =====
 # The user passes sort in as part of the {filter} hash, which otherwise is a
 # MongoDB query spec. Pull sort out after the filter has been decoded but
 # before we actually use it.
 
-if 'sort' in filter:
-    sort = filter.pop('sort')
-elif collection == 'system.namespaces':
-    # special case; this has no _id, only name
-    sort = [("name", 1)]
-else:
-    sort = [("_id", 1)]
+sort = filter.pop('sort', None)
 
 
 # Load documents.
@@ -187,20 +136,7 @@ documents.rewind()
 # Sort.
 # =====
 
-if collection == 'system.namespaces' and ndocs > 1024:
-
-    # Special case. It is possible to index system.namespaces, but no one is
-    # really going to have done that. Only sort if there's a small number of
-    # collections.
-    #
-    # The next step would be to check for the presence of an index. I think
-    # MongoDB actually does this internally (refuses to sort if the result set
-    # is too large and there's no index), but I think it still degrades
-    # performance.
-
-    pass
-
-else:
+if sort:
     documents.sort(sort)
 
 

diff --git a/www/%server/%database/%collection/index.html.spt b/www/%server/%database/%collection/index.html.spt
@@ -2,6 +2,7 @@
 """
 import pymongo
 from aspen import Response
+import mongs
 
 [----]
 server = request.line.uri.path['server']
@@ -10,32 +11,9 @@ collection = request.line.uri.path['collection']
 
 coll = pymongo.MongoClient(server)[database][collection]
 
-if collection == 'system.namespaces':
-
-    # Special case. This collection holds the names of all other collections in
-    # this database. Users can access it directly for any database. For
-    # databases with more than a few collections we enforce that users browse
-    # collections using this interface rather than calling collection_names.
-
-    documents = coll.find()
-    if documents.count() < 1024:  # assume that system.namespaces isn't indexed
-        documents.sort('name', pymongo.ASCENDING)
-else:
-    # This is a query for a "normal" collection.
-    documents = coll.find({'_id': {'$exists': True}}).sort('_id', pymongo.ASCENDING)
-
-try:
-    document = documents.next()
-except StopIteration:
-    pass
-else:
-
+if mongs.has_documents(coll):
     # Redirect users to the first document in this collection.
-    # ========================================================
-    # Aspen as of 0.10.8 chokes on empty path parts, so we have to indicate
-    # an empty filter with explicit braces.
-
-    raise Response(302, headers={'Location': './{}/1/'})
+    raise Response(302, headers={'Location': './/1/'})
 
 [----]
 {% extends "base.html" %}

diff --git a/www/%server/%database/index.html.spt b/www/%server/%database/index.html.spt
@@ -86,16 +86,6 @@ database = request.line.uri.path['database']
 db = pymongo.MongoClient(server)[database]
 
 
-# optimize
-# ========
-# The collection_names API call is unsuitable for databases with many
-# collections. In such cases we require the user to browse the collections via
-# the system.namespaces meta-collection.
-
-if db['system.namespaces'].count() > 256:
-    raise Response(302, headers=[('Location', './system.namespaces/')])
-
-
 # dbsize
 # ======
 # We need the disk size of the database as a whole in order to calculate