Skip to content

Commit

Permalink
INTPYTHON-348 add support for QuerySet.raw_aggregate() (#183)
Browse files Browse the repository at this point in the history
* add intersphinx configuration to docs

* INTPYTHON-348 add QuerySet.raw_aggregate()

Co-authored-by: Tim Graham <[email protected]>

---------

Co-authored-by: Tim Graham <[email protected]>
  • Loading branch information
aclark4life and timgraham authored Nov 22, 2024
1 parent ca8ac6a commit 36c5718
Show file tree
Hide file tree
Showing 9 changed files with 562 additions and 10 deletions.
1 change: 1 addition & 0 deletions .github/workflows/runtests.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
"queries",
"queries_",
"queryset_pickle",
"raw_query_",
"redirects_tests",
"reserved_names",
"reverse_lookup",
Expand Down
7 changes: 7 additions & 0 deletions django_mongodb/managers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from django.db.models.manager import BaseManager

from .queryset import MongoQuerySet


class MongoManager(BaseManager.from_queryset(MongoQuerySet)):
pass
96 changes: 96 additions & 0 deletions django_mongodb/queryset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from itertools import chain

from django.core.exceptions import FieldDoesNotExist
from django.db import connections
from django.db.models import QuerySet
from django.db.models.query import RawModelIterable as BaseRawModelIterable
from django.db.models.query import RawQuerySet as BaseRawQuerySet
from django.db.models.sql.query import RawQuery as BaseRawQuery


class MongoQuerySet(QuerySet):
def raw_aggregate(self, pipeline, using=None):
return RawQuerySet(pipeline, model=self.model, using=using)


class RawQuerySet(BaseRawQuerySet):
def __init__(self, pipeline, model=None, using=None):
super().__init__(pipeline, model=model, using=using)
self.query = RawQuery(pipeline, using=self.db, model=self.model)
# Override the superclass's columns property which relies on PEP 249's
# cursor.description. Instead, RawModelIterable will set the columns
# based on the keys in the first result.
self.columns = None

def iterator(self):
yield from RawModelIterable(self)


class RawQuery(BaseRawQuery):
def __init__(self, pipeline, using, model):
self.pipeline = pipeline
super().__init__(sql=None, using=using)
self.model = model

def _execute_query(self):
connection = connections[self.using]
collection = connection.get_collection(self.model._meta.db_table)
self.cursor = collection.aggregate(self.pipeline)

def __str__(self):
return str(self.pipeline)


class RawModelIterable(BaseRawModelIterable):
def __iter__(self):
"""
This is copied from the superclass except for the part that sets
self.queryset.columns from the first result.
"""
db = self.queryset.db
query = self.queryset.query
connection = connections[db]
compiler = connection.ops.compiler("SQLCompiler")(query, connection, db)
query_iterator = iter(query)
try:
# Get the columns from the first result.
try:
first_result = next(query_iterator)
except StopIteration:
# No results.
return
self.queryset.columns = list(first_result.keys())
# Reset the iterator to include the first item.
query_iterator = self._make_result(chain([first_result], query_iterator))
(
model_init_names,
model_init_pos,
annotation_fields,
) = self.queryset.resolve_model_init_order()
model_cls = self.queryset.model
if model_cls._meta.pk.attname not in model_init_names:
raise FieldDoesNotExist("Raw query must include the primary key")
fields = [self.queryset.model_fields.get(c) for c in self.queryset.columns]
converters = compiler.get_converters(
[f.get_col(f.model._meta.db_table) if f else None for f in fields]
)
if converters:
query_iterator = compiler.apply_converters(query_iterator, converters)
for values in query_iterator:
# Associate fields to values
model_init_values = [values[pos] for pos in model_init_pos]
instance = model_cls.from_db(db, model_init_names, model_init_values)
if annotation_fields:
for column, pos in annotation_fields:
setattr(instance, column, values[pos])
yield instance
finally:
query.cursor.close()

def _make_result(self, query):
"""
Convert documents (dictionaries) to tuples as expected by the rest
of __iter__().
"""
for result in query:
yield tuple(result.values())
16 changes: 15 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,25 @@
# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

extensions = []
# If true, the current module name will be prepended to all description
# unit titles (such as .. function::).
add_module_names = False

extensions = [
"sphinx.ext.intersphinx",
]

# templates_path = ["_templates"]
exclude_patterns = []

intersphinx_mapping = {
"django": (
"https://docs.djangoproject.com/en/5.0/",
"http://docs.djangoproject.com/en/5.0/_objects/",
),
"pymongo": ("https://pymongo.readthedocs.io/en/stable/", None),
"python": ("https://docs.python.org/3/", None),
}

# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
Expand Down
13 changes: 4 additions & 9 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
@@ -1,16 +1,11 @@
.. django_mongodb documentation master file, created by
sphinx-quickstart on Mon Apr 15 12:38:26 2024.
You can adapt this file completely to your liking, but it should at least
contain the root ``toctree`` directive.
Welcome to django_mongodb's documentation!
==========================================
django-mongodb 5.0.x documentation
==================================

.. toctree::
:maxdepth: 2
:maxdepth: 1
:caption: Contents:


querysets

Indices and tables
==================
Expand Down
69 changes: 69 additions & 0 deletions docs/source/querysets.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
``QuerySet`` API reference
==========================

Some MongoDB-specific ``QuerySet`` methods are available by adding a custom
:class:`~django.db.models.Manager`, ``MongoManager``, to your model::

from django.db import models

from django_mongodb.managers import MongoManager


class MyModel(models.Model):
...

objects = MongoManager()


.. currentmodule:: django_mongodb.queryset.MongoQuerySet

``raw_aggregate()``
-------------------

.. method:: raw_aggregate(pipeline, using=None)

Similar to :meth:`QuerySet.raw()<django.db.models.query.QuerySet.raw>`, but
instead of a raw SQL query, this method accepts a pipeline that will be passed
to :meth:`pymongo.collection.Collection.aggregate`.

For example, you could write a custom match criteria::

Question.objects.raw_aggregate([{"$match": {"question_text": "What's up"}}])

The pipeline may also return additional fields that will be added as
annotations on the models::

>>> questions = Question.objects.raw_aggregate([{
... "$project": {
... "question_text": 1,
... "pub_date": 1,
... "year_published": {"$year": "$pub_date"}
... }
... }])
>>> for q in questions:
... print(f"{q.question_text} was published in {q.year_published}.")
...
What's up? was published in 2024.

Fields may also be left out:

>>> Question.objects.raw_aggregate([{"$project": {"question_text": 1}}])

The ``Question`` objects returned by this query will be deferred model instances
(see :meth:`~django.db.models.query.QuerySet.defer()`). This means that the
fields that are omitted from the query will be loaded on demand. For example::

>>> for q in Question.objects.raw_aggregate([{"$project": {"question_text": 1}}]):
>>> print(
... q.question_text, # This will be retrieved by the original query.
... q.pub_date, # This will be retrieved on demand.
... )
...
What's new 2023-09-03 12:00:00+00:00
What's up 2024-08-23 20:57:30+00:00

From outward appearances, this looks like the query has retrieved both the
question text and published date. However, this example actually issued three
queries. Only the question texts were retrieved by the ``raw_aggregate()``
query -- the published dates were both retrieved on demand when they were
printed.
Empty file added tests/raw_query_/__init__.py
Empty file.
60 changes: 60 additions & 0 deletions tests/raw_query_/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from django.db import models

from django_mongodb.fields import ObjectIdAutoField
from django_mongodb.managers import MongoManager


class Author(models.Model):
first_name = models.CharField(max_length=255)
last_name = models.CharField(max_length=255)
dob = models.DateField()

objects = MongoManager()

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Protect against annotations being passed to __init__ --
# this'll make the test suite get angry if annotations aren't
# treated differently than fields.
for k in kwargs:
assert k in [f.attname for f in self._meta.fields], (
"Author.__init__ got an unexpected parameter: %s" % k
)


class Book(models.Model):
title = models.CharField(max_length=255)
author = models.ForeignKey(Author, models.CASCADE)
paperback = models.BooleanField(default=False)
opening_line = models.TextField()

objects = MongoManager()


class BookFkAsPk(models.Model):
book = models.ForeignKey(Book, models.CASCADE, primary_key=True, db_column="not_the_default")

objects = MongoManager()


class Coffee(models.Model):
brand = models.CharField(max_length=255, db_column="name")
price = models.DecimalField(max_digits=10, decimal_places=2, default=0)

objects = MongoManager()


class MixedCaseIDColumn(models.Model):
id = ObjectIdAutoField(primary_key=True, db_column="MiXeD_CaSe_Id")

objects = MongoManager()


class Reviewer(models.Model):
reviewed = models.ManyToManyField(Book)

objects = MongoManager()


class FriendlyAuthor(Author):
pass
Loading

0 comments on commit 36c5718

Please sign in to comment.