Merge pull request #117 from ilex/master

Add projection methods only(), exclude() and other to QuerySet
heynemann · Mar 3, 2016 · b700d88 · b700d88
2 parents d3538bd + 4c47d94
commit b700d88
Show file tree

Hide file tree

Showing 12 changed files with 1,353 additions and 32 deletions.
diff --git a/docs/source/getting-and-querying.rst b/docs/source/getting-and-querying.rst
@@ -124,3 +124,22 @@ In order to use raw queries, just pass the same object you would use in mongodb:
 
         io_loop.add_timeout(1, create_user)
         io_loop.start()
+
+Retrieving a subset of fields
+-----------------------------
+
+Sometimes a subset of fields on a Document is required, and for efficiency only these should be retrieved from the database. There are some methods that could be used to specify which fields to retrieve. Note that if fields that are not downloaded are accessed, their default value (or None if no default value is provided) will be given.
+
+Projections for reference fields (and a list of reference fields) can be specified too in the same way as for embedded fields. They are applied immediately if `lazy` is `False` or will be applied later after `.load_reference()` will be called otherwise.
+
+.. note:: You can use `BlogPost.title` notation instead of string value 'title' only for the first level fields. So `BlogPost.author.name` will not work, use string 'author.name' instead. Also `_id` field should be always specified as string '_id'.
+
+.. note:: It is not possible to save document with projection specified during retrieving. It will raise exception `motorengine.errors.PartlyLoadedDocumentError` as you would try that.
+
+.. automethod:: motorengine.queryset.QuerySet.only
+
+.. automethod:: motorengine.queryset.QuerySet.exclude
+
+.. automethod:: motorengine.queryset.QuerySet.all_fields
+
+.. automethod:: motorengine.queryset.QuerySet.fields
diff --git a/docs/source/modeling.rst b/docs/source/modeling.rst
@@ -124,7 +124,7 @@ Available Fields
 
 .. autoclass:: motorengine.fields.int_field.IntField
 
-.. autoclass:: motorengine.fields.int_field.BooleanField
+.. autoclass:: motorengine.fields.boolean_field.BooleanField
 
 .. autoclass:: motorengine.fields.float_field.FloatField
 

diff --git a/motorengine/document.py b/motorengine/document.py
@@ -8,24 +8,41 @@
 from motorengine.errors import InvalidDocumentError, LoadReferencesRequiredError
 
 
-AUTHORIZED_FIELDS = ['_id', '_values']
+AUTHORIZED_FIELDS = [
+    '_id', '_values', '_reference_loaded_fields', 'is_partly_loaded'
+]
 
 
 class BaseDocument(object):
-    def __init__(self, **kw):
+    def __init__(
+        self, _is_partly_loaded=False, _reference_loaded_fields=None, **kw
+    ):
+        """
+        :param _is_partly_loaded: is a flag that indicates if the document was
+        loaded partly (with `only`, `exlude`, `fields`). Default: False.
+        :param _reference_loaded_fields: dict that contains projections for
+        reference fields if any. Default: None.
+        :param kw: pairs of fields of the document and their values
+        """
         from motorengine.fields.dynamic_field import DynamicField
 
         self._id = kw.pop('_id', None)
         self._values = {}
+        self.is_partly_loaded = _is_partly_loaded
+
+        if _reference_loaded_fields:
+            self._reference_loaded_fields = _reference_loaded_fields
+        else:
+            self._reference_loaded_fields = {}
 
         for key, field in self._fields.items():
             if callable(field.default):
-                self._values[field.db_field] = field.default()
+                self._values[field.name] = field.default()
             else:
-                self._values[field.db_field] = field.default
+                self._values[field.name] = field.default
 
         for key, value in kw.items():
-            if key not in self._db_field_map:
+            if key not in self._fields:
                 self._fields[key] = DynamicField(db_field="_%s" % key.lstrip('_'))
             self._values[key] = value
 
@@ -51,7 +68,7 @@ def is_embedded_field(self, field):
         return isinstance(field, EmbeddedDocumentField) or (isinstance(field, type) and issubclass(field, EmbeddedDocumentField))
 
     @classmethod
-    def from_son(cls, dic):
+    def from_son(cls, dic, _is_partly_loaded=False, _reference_loaded_fields=None):
         field_values = {}
         for name, value in dic.items():
             field = cls.get_field_by_db_name(name)
@@ -60,7 +77,11 @@ def from_son(cls, dic):
             else:
                 field_values[name] = value
 
-        return cls(**field_values)
+        return cls(
+            _is_partly_loaded=_is_partly_loaded,
+            _reference_loaded_fields=_reference_loaded_fields,
+            **field_values
+        )
 
     def to_son(self):
         data = dict()
@@ -136,7 +157,7 @@ def fill_values_collection(self, collection, field_name, value):
         collection[field_name] = value
 
     def fill_list_values_collection(self, collection, field_name, value):
-        if not field_name in collection:
+        if field_name not in collection:
             collection[field_name] = []
         collection[field_name].append(value)
 
@@ -209,13 +230,23 @@ def find_references(self, document, fields=None, results=None):
 
         return results
 
+    def _get_load_function(self, document, field_name, document_type):
+        """Get appropriate method to load reference field of the document"""
+        if field_name in document._reference_loaded_fields:
+            # there is a projection for this field
+            fields = document._reference_loaded_fields[field_name]
+            return document_type.objects.fields(**fields).get
+        return document_type.objects.get
+
     def find_reference_field(self, document, results, field_name, field):
         if self.is_reference_field(field):
             value = document._values.get(field_name, None)
-
+            load_function = self._get_load_function(
+                document, field_name, field.reference_type
+            )
             if value is not None:
                 results.append([
-                    field.reference_type.objects.get,
+                    load_function,
                     value,
                     document._values,
                     field_name,
@@ -230,9 +261,12 @@ def find_list_field(self, document, results, field_name, field):
                 document_type = values[0].__class__
                 if isinstance(field._base_field, ReferenceField):
                     document_type = field._base_field.reference_type
+                    load_function = self._get_load_function(
+                        document, field_name, document_type
+                    )
                     for value in values:
                         results.append([
-                            document_type.objects.get,
+                            load_function,
                             value,
                             document._values,
                             field_name,
@@ -249,7 +283,7 @@ def find_embed_field(self, document, results, field_name, field):
                 self.find_references(document=value, results=results)
 
     def get_field_value(self, name):
-        if not name in self._fields:
+        if name not in self._fields:
             raise ValueError("Field %s not found in instance of %s." % (
                 name,
                 self.__class__.__name__
@@ -309,7 +343,7 @@ def get_fields(cls, name, fields=None):
         if fields is None:
             fields = []
 
-        if not '.' in name:
+        if '.' not in name:
             dyn_field = DynamicField(db_field="_%s" % name)
             fields.append(cls._fields.get(name, dyn_field))
             return fields

diff --git a/motorengine/errors.py b/motorengine/errors.py
@@ -12,6 +12,10 @@ class LoadReferencesRequiredError(RuntimeError):
     pass
 
 
+class PartlyLoadedDocumentError(ValueError):
+    pass
+
+
 # E11000 duplicate key error index: test.UniqueFieldDocument.$name_1  dup key: { : "test" }
 PYMONGO_ERROR_REGEX = re.compile(r"(?P<error_code>.+?)\s(?P<error_type>.+?):\s*(?P<index_name>.+?)\s+(?P<error>.+?)")
 

diff --git a/motorengine/fields/boolean_field.py b/motorengine/fields/boolean_field.py
@@ -7,9 +7,13 @@
 class BooleanField(BaseField):
     '''
     Field responsible for storing boolean values (:py:func:`bool`).
+
     Usage:
+
     .. testcode:: modeling_fields
+
         isActive = BooleanField(required=True)
+
     `BooleanField` has no additional arguments available (apart from those in `BaseField`).
     '''
     def __init__(self, *args, **kw):

diff --git a/motorengine/query/is_null.py b/motorengine/query/is_null.py
@@ -39,7 +39,8 @@ class User(Document):
         assert 'email' in query_result
         assert '$ne' in query_result['email']
         assert '$exists' in query_result['email']
-r   '''
+
+    '''
 
     def to_query(self, field_name, value):
         if value:

diff --git a/motorengine/query_builder/field_list.py b/motorengine/query_builder/field_list.py
@@ -0,0 +1,112 @@
+from motorengine.query_builder.transform import transform_field_list_query
+
+__all__ = ('QueryFieldList',)
+
+
+class QueryFieldList(object):
+    '''Object that handles combinations of .only() and .exclude() calls'''
+    ONLY = 1
+    EXCLUDE = 0
+
+    def __init__(
+        self, fields=None, value=ONLY, always_include=None, _only_called=False
+    ):
+        '''
+        The QueryFieldList builder
+
+        :param fields: A list of fields used in `.only()` or `.exclude()`
+        :param value: How to handle the fields; either `ONLY` or `EXCLUDE`
+        :param always_include: Any fields to always_include eg `_cls`
+        :param _only_called: Has `.only()` been called?  If so its a set of
+            fields otherwise it performs a union.
+        '''
+        self.value = value
+        self.fields = set(fields or [])
+        self.always_include = set(always_include or [])
+        self._id = None
+        self._only_called = _only_called
+        self.slice = {}
+
+    def __add__(self, f):
+        if isinstance(f.value, dict):
+            for field in f.fields:
+                self.slice[field] = f.value
+            if not self.fields:
+                self.fields = f.fields
+        elif not self.fields:
+            self.fields = f.fields
+            self.value = f.value
+            self.slice = {}
+        elif self.value is self.ONLY and f.value is self.ONLY:
+            self._clean_slice()
+            if self._only_called:
+                self.fields = self.fields.union(f.fields)
+            else:
+                self.fields = f.fields
+        elif self.value is self.EXCLUDE and f.value is self.EXCLUDE:
+            self.fields = self.fields.union(f.fields)
+            self._clean_slice()
+        elif self.value is self.ONLY and f.value is self.EXCLUDE:
+            self.fields -= f.fields
+            self._clean_slice()
+        elif self.value is self.EXCLUDE and f.value is self.ONLY:
+            self.value = self.ONLY
+            self.fields = f.fields - self.fields
+            self._clean_slice()
+
+        # _id should be saved separately to avoid situation such as
+        # exclude('_id').only('other') so the previous code of this method
+        # remove _id from self.fields (its a normal behavior for any field
+        # except for _id because _id field cannot be removed with only)
+        if '_id' in f.fields:
+            self._id = f.value
+
+        if self.always_include:
+            if self.value is self.ONLY and self.fields:
+                if sorted(self.slice.keys()) != sorted(self.fields):
+                    self.fields = self.fields.union(self.always_include)
+            else:
+                # if this is exclude - remove from fields values from
+                # always included fields
+                self.fields -= self.always_include
+
+        if getattr(f, '_only_called', False):
+            self._only_called = True
+        return self
+
+    # python2
+    def __nonzero__(self):
+        return bool(self.fields)
+
+    # python3
+    def __bool__(self):
+        return bool(self.fields)
+
+    def as_dict(self):
+        field_list = dict((field, self.value) for field in self.fields)
+
+        if self.slice:
+            field_list.update(self.slice)
+
+        if self._id is not None:
+            field_list['_id'] = self._id
+
+        return field_list
+
+    def to_query(self, document):
+        ''' Transform to query using db names for fields
+
+        :param document - class of the document
+        '''
+        return transform_field_list_query(document, self.as_dict())
+
+    def reset(self):
+        self.fields = set([])
+        self.slice = {}
+        self.value = self.ONLY
+        self._id = None
+
+    def _clean_slice(self):
+        if self.slice:
+            for field in set(self.slice.keys()) - self.fields:
+                del self.slice[field]
diff --git a/motorengine/query_builder/transform.py b/motorengine/query_builder/transform.py
@@ -105,3 +105,19 @@ def validate_fields(document, query):
             raise ValueError(
                 "Invalid filter '%s': Invalid operator (if this is a sub-property, "
                 "then it must be used in embedded document fields)." % key)
+
+
+def transform_field_list_query(document, query_field_list):
+    if not query_field_list:
+        return None
+
+    fields = {}
+    for key in query_field_list.keys():
+        if key == '_id':
+            fields[key] = query_field_list[key]
+        else:
+            fields_chain = document.get_fields(key)
+            field_db_name = '.'.join([field.db_field for field in fields_chain])
+            fields[field_db_name] = query_field_list[key]
+
+    return fields
-Original file line number
+Diff line change
@@ Expand Up / @@ -12,6 +12,10 @@ class LoadReferencesRequiredError(RuntimeError): @@
         pass
+    class PartlyLoadedDocumentError(ValueError):
+        pass
     # E11000 duplicate key error index: test.UniqueFieldDocument.$name_1  dup key: { : "test" }
     PYMONGO_ERROR_REGEX = re.compile(r"(?P<error_code>.+?)\s(?P<error_type>.+?):\s*(?P<index_name>.+?)\s+(?P<error>.+?)")
@@ Expand Down @@