INTPYTHON-348 add support for QuerySet.raw_aggregate() (#183)

* add intersphinx configuration to docs * INTPYTHON-348 add QuerySet.raw_aggregate() Co-authored-by: Tim Graham <[email protected]> --------- Co-authored-by: Tim Graham <[email protected]>
mongodb-labs · Nov 22, 2024 · 36c5718 · 36c5718
1 parent ca8ac6a
commit 36c5718
Show file tree

Hide file tree

Showing 9 changed files with 562 additions and 10 deletions.
diff --git a/.github/workflows/runtests.py b/.github/workflows/runtests.py
@@ -120,6 +120,7 @@
     "queries",
     "queries_",
     "queryset_pickle",
+    "raw_query_",
     "redirects_tests",
     "reserved_names",
     "reverse_lookup",

diff --git a/django_mongodb/managers.py b/django_mongodb/managers.py
@@ -0,0 +1,7 @@
+from django.db.models.manager import BaseManager
+
+from .queryset import MongoQuerySet
+
+
+class MongoManager(BaseManager.from_queryset(MongoQuerySet)):
+    pass
diff --git a/django_mongodb/queryset.py b/django_mongodb/queryset.py
@@ -0,0 +1,96 @@
+from itertools import chain
+
+from django.core.exceptions import FieldDoesNotExist
+from django.db import connections
+from django.db.models import QuerySet
+from django.db.models.query import RawModelIterable as BaseRawModelIterable
+from django.db.models.query import RawQuerySet as BaseRawQuerySet
+from django.db.models.sql.query import RawQuery as BaseRawQuery
+
+
+class MongoQuerySet(QuerySet):
+    def raw_aggregate(self, pipeline, using=None):
+        return RawQuerySet(pipeline, model=self.model, using=using)
+
+
+class RawQuerySet(BaseRawQuerySet):
+    def __init__(self, pipeline, model=None, using=None):
+        super().__init__(pipeline, model=model, using=using)
+        self.query = RawQuery(pipeline, using=self.db, model=self.model)
+        # Override the superclass's columns property which relies on PEP 249's
+        # cursor.description. Instead, RawModelIterable will set the columns
+        # based on the keys in the first result.
+        self.columns = None
+
+    def iterator(self):
+        yield from RawModelIterable(self)
+
+
+class RawQuery(BaseRawQuery):
+    def __init__(self, pipeline, using, model):
+        self.pipeline = pipeline
+        super().__init__(sql=None, using=using)
+        self.model = model
+
+    def _execute_query(self):
+        connection = connections[self.using]
+        collection = connection.get_collection(self.model._meta.db_table)
+        self.cursor = collection.aggregate(self.pipeline)
+
+    def __str__(self):
+        return str(self.pipeline)
+
+
+class RawModelIterable(BaseRawModelIterable):
+    def __iter__(self):
+        """
+        This is copied from the superclass except for the part that sets
+        self.queryset.columns from the first result.
+        """
+        db = self.queryset.db
+        query = self.queryset.query
+        connection = connections[db]
+        compiler = connection.ops.compiler("SQLCompiler")(query, connection, db)
+        query_iterator = iter(query)
+        try:
+            # Get the columns from the first result.
+            try:
+                first_result = next(query_iterator)
+            except StopIteration:
+                # No results.
+                return
+            self.queryset.columns = list(first_result.keys())
+            # Reset the iterator to include the first item.
+            query_iterator = self._make_result(chain([first_result], query_iterator))
+            (
+                model_init_names,
+                model_init_pos,
+                annotation_fields,
+            ) = self.queryset.resolve_model_init_order()
+            model_cls = self.queryset.model
+            if model_cls._meta.pk.attname not in model_init_names:
+                raise FieldDoesNotExist("Raw query must include the primary key")
+            fields = [self.queryset.model_fields.get(c) for c in self.queryset.columns]
+            converters = compiler.get_converters(
+                [f.get_col(f.model._meta.db_table) if f else None for f in fields]
+            )
+            if converters:
+                query_iterator = compiler.apply_converters(query_iterator, converters)
+            for values in query_iterator:
+                # Associate fields to values
+                model_init_values = [values[pos] for pos in model_init_pos]
+                instance = model_cls.from_db(db, model_init_names, model_init_values)
+                if annotation_fields:
+                    for column, pos in annotation_fields:
+                        setattr(instance, column, values[pos])
+                yield instance
+        finally:
+            query.cursor.close()
+
+    def _make_result(self, query):
+        """
+        Convert documents (dictionaries) to tuples as expected by the rest
+        of __iter__().
+        """
+        for result in query:
+            yield tuple(result.values())
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -17,11 +17,25 @@
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-extensions = []
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+add_module_names = False
+
+extensions = [
+    "sphinx.ext.intersphinx",
+]
 
 # templates_path = ["_templates"]
 exclude_patterns = []
 
+intersphinx_mapping = {
+    "django": (
+        "https://docs.djangoproject.com/en/5.0/",
+        "http://docs.djangoproject.com/en/5.0/_objects/",
+    ),
+    "pymongo": ("https://pymongo.readthedocs.io/en/stable/", None),
+    "python": ("https://docs.python.org/3/", None),
+}
 
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -1,16 +1,11 @@
-.. django_mongodb documentation master file, created by
-   sphinx-quickstart on Mon Apr 15 12:38:26 2024.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root ``toctree`` directive.
-
-Welcome to django_mongodb's documentation!
-==========================================
+django-mongodb 5.0.x documentation
+==================================
 
 .. toctree::
-   :maxdepth: 2
+   :maxdepth: 1
    :caption: Contents:
 
-
+   querysets
 
 Indices and tables
 ==================

diff --git a/docs/source/querysets.rst b/docs/source/querysets.rst
@@ -0,0 +1,69 @@
+``QuerySet`` API reference
+==========================
+
+Some MongoDB-specific ``QuerySet`` methods are available by adding a custom
+:class:`~django.db.models.Manager`, ``MongoManager``, to your model::
+
+    from django.db import models
+
+    from django_mongodb.managers import MongoManager
+
+
+    class MyModel(models.Model):
+        ...
+
+        objects = MongoManager()
+
+
+.. currentmodule:: django_mongodb.queryset.MongoQuerySet
+
+``raw_aggregate()``
+-------------------
+
+.. method:: raw_aggregate(pipeline, using=None)
+
+Similar to :meth:`QuerySet.raw()<django.db.models.query.QuerySet.raw>`, but
+instead of a raw SQL query, this method accepts a pipeline that will be passed
+to :meth:`pymongo.collection.Collection.aggregate`.
+
+For example, you could write a custom match criteria::
+
+    Question.objects.raw_aggregate([{"$match": {"question_text": "What's up"}}])
+
+The pipeline may also return additional fields that will be added as
+annotations on the models::
+
+    >>> questions = Question.objects.raw_aggregate([{
+    ...     "$project": {
+    ...         "question_text": 1,
+    ...         "pub_date": 1,
+    ...         "year_published": {"$year": "$pub_date"}
+    ...     }
+    ... }])
+    >>> for q in questions:
+    ...     print(f"{q.question_text} was published in {q.year_published}.")
+    ...
+    What's up? was published in 2024.
+
+Fields may also be left out:
+
+    >>> Question.objects.raw_aggregate([{"$project": {"question_text": 1}}])
+
+The ``Question`` objects returned by this query will be deferred model instances
+(see :meth:`~django.db.models.query.QuerySet.defer()`). This means that the
+fields that are omitted from the query will be loaded on demand. For example::
+
+    >>> for q in Question.objects.raw_aggregate([{"$project": {"question_text": 1}}]):
+    >>>     print(
+    ...         q.question_text,  # This will be retrieved by the original query.
+    ...         q.pub_date,       # This will be retrieved on demand.
+    ...     )
+    ...
+    What's new 2023-09-03 12:00:00+00:00
+    What's up 2024-08-23 20:57:30+00:00
+
+From outward appearances, this looks like the query has retrieved both the
+question text and published date. However, this example actually issued three
+queries. Only the question texts were retrieved by the ``raw_aggregate()``
+query -- the published dates were both retrieved on demand when they were
+printed.
diff --git a/tests/raw_query_/__init__.py b/tests/raw_query_/__init__.py
diff --git a/tests/raw_query_/models.py b/tests/raw_query_/models.py
@@ -0,0 +1,60 @@
+from django.db import models
+
+from django_mongodb.fields import ObjectIdAutoField
+from django_mongodb.managers import MongoManager
+
+
+class Author(models.Model):
+    first_name = models.CharField(max_length=255)
+    last_name = models.CharField(max_length=255)
+    dob = models.DateField()
+
+    objects = MongoManager()
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Protect against annotations being passed to __init__ --
+        # this'll make the test suite get angry if annotations aren't
+        # treated differently than fields.
+        for k in kwargs:
+            assert k in [f.attname for f in self._meta.fields], (
+                "Author.__init__ got an unexpected parameter: %s" % k
+            )
+
+
+class Book(models.Model):
+    title = models.CharField(max_length=255)
+    author = models.ForeignKey(Author, models.CASCADE)
+    paperback = models.BooleanField(default=False)
+    opening_line = models.TextField()
+
+    objects = MongoManager()
+
+
+class BookFkAsPk(models.Model):
+    book = models.ForeignKey(Book, models.CASCADE, primary_key=True, db_column="not_the_default")
+
+    objects = MongoManager()
+
+
+class Coffee(models.Model):
+    brand = models.CharField(max_length=255, db_column="name")
+    price = models.DecimalField(max_digits=10, decimal_places=2, default=0)
+
+    objects = MongoManager()
+
+
+class MixedCaseIDColumn(models.Model):
+    id = ObjectIdAutoField(primary_key=True, db_column="MiXeD_CaSe_Id")
+
+    objects = MongoManager()
+
+
+class Reviewer(models.Model):
+    reviewed = models.ManyToManyField(Book)
+
+    objects = MongoManager()
+
+
+class FriendlyAuthor(Author):
+    pass