blob: 1854ab54be618df9b51c72b68bdff07396d9d00d [file] [log] [blame]
Extensions to Django's model logic.
import django.core.exceptions
from django.db import connection
from django.db import connections
from django.db import models as dbmodels
from django.db import transaction
from django.db.models.sql import query
import django.db.models.sql.where
from autotest_lib.client.common_lib import error
from autotest_lib.frontend.afe import rdb_model_extensions
class ValidationError(django.core.exceptions.ValidationError):
Data validation error in adding or updating an object. The associated
value is a dictionary mapping field names to error strings.
def _quote_name(name):
"""Shorthand for connection.ops.quote_name()."""
return connection.ops.quote_name(name)
class LeasedHostManager(dbmodels.Manager):
"""Query manager for unleased, unlocked hosts.
def get_query_set(self):
return (super(LeasedHostManager, self).get_query_set().filter(
leased=0, locked=0))
class ExtendedManager(dbmodels.Manager):
Extended manager supporting subquery filtering.
class CustomQuery(query.Query):
def __init__(self, *args, **kwargs):
super(ExtendedManager.CustomQuery, self).__init__(*args, **kwargs)
self._custom_joins = []
def clone(self, klass=None, **kwargs):
obj = super(ExtendedManager.CustomQuery, self).clone(klass)
obj._custom_joins = list(self._custom_joins)
return obj
def combine(self, rhs, connector):
super(ExtendedManager.CustomQuery, self).combine(rhs, connector)
if hasattr(rhs, '_custom_joins'):
def add_custom_join(self, table, condition, join_type,
condition_values=(), alias=None):
if alias is None:
alias = table
join_dict = dict(table=table,
def convert_query(self, query_set):
Convert the query set's "query" attribute to a CustomQuery.
# Make a copy of the query set
query_set = query_set.all()
query_set.query = query_set.query.clone(
return query_set
class _WhereClause(object):
"""Object allowing us to inject arbitrary SQL into Django queries.
By using this instead of extra(where=...), we can still freely combine
queries with & and |.
def __init__(self, clause, values=()):
self._clause = clause
self._values = values
def as_sql(self, qn=None, connection=None):
return self._clause, self._values
def relabel_aliases(self, change_map):
def add_join(self, query_set, join_table, join_key, join_condition='',
join_condition_values=(), join_from_key=None, alias=None,
suffix='', exclude=False, force_left_join=False):
"""Add a join to query_set.
Join looks like this:
(INNER|LEFT) JOIN <join_table> AS <alias>
ON (<this table>.<join_from_key> = <join_table>.<join_key>
and <join_condition>)
@param join_table table to join to
@param join_key field referencing back to this model to use for the join
@param join_condition extra condition for the ON clause of the join
@param join_condition_values values to substitute into join_condition
@param join_from_key column on this model to join from.
@param alias alias to use for for join
@param suffix suffix to add to join_table for the join alias, if no
alias is provided
@param exclude if true, exclude rows that match this join (will use a
LEFT OUTER JOIN and an appropriate WHERE condition)
@param force_left_join - if true, a LEFT OUTER JOIN will be used
instead of an INNER JOIN regardless of other options
join_from_table = query_set.model._meta.db_table
if join_from_key is None:
join_from_key =
if alias is None:
alias = join_table + suffix
full_join_key = _quote_name(alias) + '.' + _quote_name(join_key)
full_join_condition = '%s = %s.%s' % (full_join_key,
if join_condition:
full_join_condition += ' AND (' + join_condition + ')'
if exclude or force_left_join:
join_type = query_set.query.LOUTER
join_type = query_set.query.INNER
query_set = self.CustomQuery.convert_query(query_set)
if exclude:
query_set = query_set.extra(where=[full_join_key + ' IS NULL'])
return query_set
def _info_for_many_to_one_join(self, field, join_to_query, alias):
@param field: the ForeignKey field on the related model
@param join_to_query: the query over the related model that we're
joining to
@param alias: alias of joined table
info = {}
rhs_table = join_to_query.model._meta.db_table
info['rhs_table'] = rhs_table
info['rhs_column'] = field.column
info['lhs_column'] = field.rel.get_related_field().column
rhs_where = join_to_query.query.where
rhs_where.relabel_aliases({rhs_table: alias})
compiler = join_to_query.query.get_compiler(using=join_to_query.db)
initial_clause, values = compiler.as_sql()
# initial_clause is compiled from `join_to_query`, which is a SELECT
# query returns at most one record. For it to be used in WHERE clause,
# it must be converted to a boolean value using EXISTS.
all_clauses = ('EXISTS (%s)' % initial_clause,)
if hasattr(join_to_query.query, 'extra_where'):
all_clauses += join_to_query.query.extra_where
info['where_clause'] = (
' AND '.join('(%s)' % clause for clause in all_clauses))
info['values'] = values
return info
def _info_for_many_to_many_join(self, m2m_field, join_to_query, alias,
@param m2m_field: a Django field representing the M2M relationship.
It uses a pivot table with the following structure:
this model table <---> M2M pivot table <---> joined model table
@param join_to_query: the query over the related model that we're
joining to.
@param alias: alias of joined table
if m2m_is_on_this_model:
# referenced field on this model
lhs_id_field =
# foreign key on the pivot table referencing lhs_id_field
m2m_lhs_column = m2m_field.m2m_column_name()
# foreign key on the pivot table referencing rhd_id_field
m2m_rhs_column = m2m_field.m2m_reverse_name()
# referenced field on related model
rhs_id_field = m2m_field.rel.get_related_field()
lhs_id_field = m2m_field.rel.get_related_field()
m2m_lhs_column = m2m_field.m2m_reverse_name()
m2m_rhs_column = m2m_field.m2m_column_name()
rhs_id_field =
info = {}
info['rhs_table'] = m2m_field.m2m_db_table()
info['rhs_column'] = m2m_lhs_column
info['lhs_column'] = lhs_id_field.column
# select the ID of related models relevant to this join. we can only do
# a single join, so we need to gather this information up front and
# include it in the join condition.
rhs_ids = join_to_query.values_list(rhs_id_field.attname, flat=True)
assert len(rhs_ids) == 1, ('Many-to-many custom field joins can only '
'match a single related object.')
rhs_id = rhs_ids[0]
info['where_clause'] = '%s.%s = %s' % (_quote_name(alias),
info['values'] = ()
return info
def join_custom_field(self, query_set, join_to_query, alias,
"""Join to a related model to create a custom field in the given query.
This method is used to construct a custom field on the given query based
on a many-valued relationsip. join_to_query should be a simple query
(no joins) on the related model which returns at most one related row
per instance of this model.
For many-to-one relationships, the joined table contains the matching
row from the related model it one is related, NULL otherwise.
For many-to-many relationships, the joined table contains the matching
row if it's related, NULL otherwise.
relationship_type, field = self.determine_relationship(
if relationship_type == self.MANY_TO_ONE:
info = self._info_for_many_to_one_join(field, join_to_query, alias)
elif relationship_type == self.M2M_ON_RELATED_MODEL:
info = self._info_for_many_to_many_join(
m2m_field=field, join_to_query=join_to_query, alias=alias,
elif relationship_type ==self.M2M_ON_THIS_MODEL:
info = self._info_for_many_to_many_join(
m2m_field=field, join_to_query=join_to_query, alias=alias,
return self.add_join(query_set, info['rhs_table'], info['rhs_column'],
def add_where(self, query_set, where, values=()):
query_set = query_set.all()
query_set.query.where.add(self._WhereClause(where, values),
return query_set
def _get_quoted_field(self, table, field):
return _quote_name(table) + '.' + _quote_name(field)
def get_key_on_this_table(self, key_field=None):
if key_field is None:
# default to primary key
key_field =
return self._get_quoted_field(self.model._meta.db_table, key_field)
def escape_user_sql(self, sql):
return sql.replace('%', '%%')
def _custom_select_query(self, query_set, selects):
"""Execute a custom select query.
@param query_set: query set as returned by query_objects.
@param selects: Tables/Columns to select, e.g.
@returns: Result of the query as returned by cursor.fetchall().
compiler = query_set.query.get_compiler(using=query_set.db)
sql, params = compiler.as_sql()
from_ = sql[sql.find(' FROM'):]
if query_set.query.distinct:
distinct = 'DISTINCT '
distinct = ''
sql_query = ('SELECT ' + distinct + ','.join(selects) + from_)
# Chose the connection that's responsible for this type of object
cursor = connections[query_set.db].cursor()
cursor.execute(sql_query, params)
return cursor.fetchall()
def _is_relation_to(self, field, model_class):
return field.rel and is model_class
MANY_TO_ONE = object()
M2M_ON_THIS_MODEL = object()
def determine_relationship(self, related_model):
Determine the relationship between this model and related_model.
related_model must have some sort of many-valued relationship to this
manager's model.
@returns (relationship_type, field), where relationship_type is one of
is the Django field object for the relationship.
# look for a foreign key field on related_model relating to this model
for field in related_model._meta.fields:
if self._is_relation_to(field, self.model):
return self.MANY_TO_ONE, field
# look for an M2M field on related_model relating to this model
for field in related_model._meta.many_to_many:
if self._is_relation_to(field, self.model):
return self.M2M_ON_RELATED_MODEL, field
# maybe this model has the many-to-many field
for field in self.model._meta.many_to_many:
if self._is_relation_to(field, related_model):
return self.M2M_ON_THIS_MODEL, field
raise ValueError('%s has no relation to %s' %
(related_model, self.model))
def _get_pivot_iterator(self, base_objects_by_id, related_model):
Determine the relationship between this model and related_model, and
return a pivot iterator.
@param base_objects_by_id: dict of instances of this model indexed by
their IDs
@returns a pivot iterator, which yields a tuple (base_object,
related_object) for each relationship between a base object and a
related object. all base_object instances come from base_objects_by_id.
Note -- this depends on Django model internals.
relationship_type, field = self.determine_relationship(related_model)
if relationship_type == self.MANY_TO_ONE:
return self._many_to_one_pivot(base_objects_by_id,
related_model, field)
elif relationship_type == self.M2M_ON_RELATED_MODEL:
return self._many_to_many_pivot(
base_objects_by_id, related_model, field.m2m_db_table(),
field.m2m_reverse_name(), field.m2m_column_name())
assert relationship_type == self.M2M_ON_THIS_MODEL
return self._many_to_many_pivot(
base_objects_by_id, related_model, field.m2m_db_table(),
field.m2m_column_name(), field.m2m_reverse_name())
def _many_to_one_pivot(self, base_objects_by_id, related_model,
@returns a pivot iterator - see _get_pivot_iterator()
filter_data = { + '__pk__in':
for related_object in related_model.objects.filter(**filter_data):
# lookup base object in the dict, rather than grabbing it from the
# related object. we need to return instances from the dict, not
# fresh instances of the same models (and grabbing model instances
# from the related models incurs a DB query each time).
base_object_id = getattr(related_object, foreign_key_field.attname)
base_object = base_objects_by_id[base_object_id]
yield base_object, related_object
def _query_pivot_table(self, base_objects_by_id, pivot_table,
pivot_from_field, pivot_to_field, related_model):
@param id_list list of IDs of self.model objects to include
@param pivot_table the name of the pivot table
@param pivot_from_field a field name on pivot_table referencing
@param pivot_to_field a field name on pivot_table referencing the
related model.
@param related_model the related model
@returns pivot list of IDs (base_id, related_id)
query = """
SELECT %(from_field)s, %(to_field)s
FROM %(table)s
WHERE %(from_field)s IN (%(id_list)s)
""" % dict(from_field=pivot_from_field,
id_list=','.join(str(id_) for id_
in base_objects_by_id.iterkeys()))
# Chose the connection that's responsible for this type of object
# The databases for related_model and the current model will always
# be the same, related_model is just easier to obtain here because
# self is only a ExtendedManager, not the object.
cursor = connections[related_model.objects.db].cursor()
return cursor.fetchall()
def _many_to_many_pivot(self, base_objects_by_id, related_model,
pivot_table, pivot_from_field, pivot_to_field):
@param pivot_table: see _query_pivot_table
@param pivot_from_field: see _query_pivot_table
@param pivot_to_field: see _query_pivot_table
@returns a pivot iterator - see _get_pivot_iterator()
id_pivot = self._query_pivot_table(base_objects_by_id, pivot_table,
pivot_from_field, pivot_to_field,
all_related_ids = list(set(related_id for base_id, related_id
in id_pivot))
related_objects_by_id = related_model.objects.in_bulk(all_related_ids)
for base_id, related_id in id_pivot:
yield base_objects_by_id[base_id], related_objects_by_id[related_id]
def populate_relationships(self, base_objects, related_model,
For each instance of this model in base_objects, add a field named
related_list_name listing all the related objects of type related_model.
related_model must be in a many-to-one or many-to-many relationship with
this model.
@param base_objects - list of instances of this model
@param related_model - model class related to this model
@param related_list_name - attribute name in which to store the related
object list.
if not base_objects:
# if we don't bail early, we'll get a SQL error later
# The default maximum value of a host parameter number in SQLite is 999.
# Exceed this will get a DatabaseError later.
batch_size = 900
for i in xrange(0, len(base_objects), batch_size):
base_objects_batch = base_objects[i:i + batch_size]
base_objects_by_id = dict((base_object._get_pk_val(), base_object)
for base_object in base_objects_batch)
pivot_iterator = self._get_pivot_iterator(base_objects_by_id,
for base_object in base_objects_batch:
setattr(base_object, related_list_name, [])
for base_object, related_object in pivot_iterator:
getattr(base_object, related_list_name).append(related_object)
class ModelWithInvalidQuerySet(dbmodels.query.QuerySet):
QuerySet that handles delete() properly for models with an "invalid" bit
def delete(self):
for model in self:
class ModelWithInvalidManager(ExtendedManager):
Manager for objects with an "invalid" bit
def get_query_set(self):
return ModelWithInvalidQuerySet(self.model)
class ValidObjectsManager(ModelWithInvalidManager):
Manager returning only objects with invalid=False.
def get_query_set(self):
queryset = super(ValidObjectsManager, self).get_query_set()
return queryset.filter(invalid=False)
class ModelExtensions(rdb_model_extensions.ModelValidators):
Mixin with convenience functions for models, built on top of
the model validators in rdb_model_extensions.
# TODO: at least some of these functions really belong in a custom
# Manager class
To be able to send jobs and hosts to shards, it's necessary to find their
The most generic approach for this would be to traverse all relationships
to other objects recursively. This would list all objects that are related
in any way.
But this approach finds too many objects: If a host should be transferred,
all it's relationships would be traversed. This would find an acl group.
If then the acl group's relationships are traversed, the relationship
would be followed backwards and many other hosts would be found.
This mapping tells that algorithm which relations to follow explicitly.
"""This set stores foreign keys which we don't want to follow, but
still want to include in the serialized dictionary. For
example, we follow the relationship `Host.hostattribute_set`,
but we do not want to follow `HostAttributes.host_id` back to
to Host, which would otherwise lead to a circle. However, we still
like to serialize HostAttribute.`host_id`."""
On deserializion, if the object to persist already exists, local fields
will only be updated, if their name is in this set.
def convert_human_readable_values(cls, data, to_human_readable=False):
Performs conversions on user-supplied field data, to make it
easier for users to pass human-readable data.
For all fields that have choice sets, convert their values
from human-readable strings to enum values, if necessary. This
allows users to pass strings instead of the corresponding
integer values.
For all foreign key fields, call smart_get with the supplied
data. This allows the user to pass either an ID value or
the name of the object as a string.
If to_human_readable=True, perform the inverse - i.e. convert
numeric values to human readable values.
This method modifies data in-place.
field_dict = cls.get_field_dict()
for field_name in data:
if field_name not in field_dict or data[field_name] is None:
field_obj = field_dict[field_name]
# convert enum values
if field_obj.choices:
for choice_data in field_obj.choices:
# choice_data is (value, name)
if to_human_readable:
from_val, to_val = choice_data
to_val, from_val = choice_data
if from_val == data[field_name]:
data[field_name] = to_val
# convert foreign key values
elif field_obj.rel:
dest_obj =[field_name],
if to_human_readable:
# parameterized_jobs do not have a name_field
if (field_name != 'parameterized_job' and
dest_obj.name_field is not None):
data[field_name] = getattr(dest_obj,
data[field_name] = dest_obj
def _validate_unique(self):
Validate that unique fields are unique. Django manipulators do
this too, but they're a huge pain to use manually. Trust me.
errors = {}
cls = type(self)
field_dict = self.get_field_dict()
manager = cls.get_valid_manager()
for field_name, field_obj in field_dict.iteritems():
if not field_obj.unique:
value = getattr(self, field_name)
if value is None and field_obj.auto_created:
# don't bother checking autoincrement fields about to be
# generated
existing_objs = manager.filter(**{field_name : value})
num_existing = existing_objs.count()
if num_existing == 0:
if num_existing == 1 and existing_objs[0].id ==
errors[field_name] = (
'This value must be unique (%s)' % (value))
return errors
def _validate(self):
First coerces all fields on this instance to their proper Python types.
Then runs validation on every field. Returns a dictionary of
field_name -> error_list.
Based on validate() from django.db.models.Model in Django 0.96, which
was removed in Django 1.0. It should reappear in a later version. See:
error_dict = {}
for f in self._meta.fields:
python_value = f.to_python(
getattr(self, f.attname, f.get_default()))
except django.core.exceptions.ValidationError, e:
error_dict[] = str(e)
if not f.blank and not python_value:
error_dict[] = 'This field is required.'
setattr(self, f.attname, python_value)
return error_dict
def do_validate(self):
errors = self._validate()
unique_errors = self._validate_unique()
for field_name, error in unique_errors.iteritems():
errors.setdefault(field_name, error)
if errors:
raise ValidationError(errors)
# actually (externally) useful methods follow
def add_object(cls, data={}, **kwargs):
Returns a new object created with the given data (a dictionary
mapping field names to values). Merges any extra keyword args
into data.
data = dict(data)
data = cls.prepare_data_args(data)
data = cls.provide_default_values(data)
obj = cls(**data)
return obj
def update_object(self, data={}, **kwargs):
Updates the object with the given data (a dictionary mapping
field names to values). Merges any extra keyword args into
data = dict(data)
data = self.prepare_data_args(data)
for field_name, value in data.iteritems():
setattr(self, field_name, value)
# see query_objects()
_SPECIAL_FILTER_KEYS = ('query_start', 'query_limit', 'sort_by',
'extra_args', 'extra_where', 'no_distinct')
def _extract_special_params(cls, filter_data):
@returns a tuple of dicts (special_params, regular_filters), where
special_params contains the parameters we handle specially and
regular_filters is the remaining data to be handled by Django.
regular_filters = dict(filter_data)
special_params = {}
for key in cls._SPECIAL_FILTER_KEYS:
if key in regular_filters:
special_params[key] = regular_filters.pop(key)
return special_params, regular_filters
def apply_presentation(cls, query, filter_data):
Apply presentation parameters -- sorting and paging -- to the given
@returns new query with presentation applied
special_params, _ = cls._extract_special_params(filter_data)
sort_by = special_params.get('sort_by', None)
if sort_by:
assert isinstance(sort_by, list) or isinstance(sort_by, tuple)
query = query.extra(order_by=sort_by)
query_start = special_params.get('query_start', None)
query_limit = special_params.get('query_limit', None)
if query_start is not None:
if query_limit is None:
raise ValueError('Cannot pass query_start without query_limit')
# query_limit is passed as a page size
query_limit += query_start
return query[query_start:query_limit]
def query_objects(cls, filter_data, valid_only=True, initial_query=None,
Returns a QuerySet object for querying the given model_class
with the given filter_data. Optional special arguments in
filter_data include:
-query_start: index of first return to return
-query_limit: maximum number of results to return
-sort_by: list of fields to sort on. prefixing a '-' onto a
field name changes the sort to descending order.
-extra_args: keyword args to pass to query.extra() (see Django
DB layer documentation)
-extra_where: extra WHERE clause to append
-no_distinct: if True, a DISTINCT will not be added to the SELECT
special_params, regular_filters = cls._extract_special_params(
if initial_query is None:
if valid_only:
initial_query = cls.get_valid_manager()
initial_query = cls.objects
query = initial_query.filter(**regular_filters)
use_distinct = not special_params.get('no_distinct', False)
if use_distinct:
query = query.distinct()
extra_args = special_params.get('extra_args', {})
extra_where = special_params.get('extra_where', None)
if extra_where:
# escape %'s
extra_where = cls.objects.escape_user_sql(extra_where)
extra_args.setdefault('where', []).append(extra_where)
if extra_args:
query = query.extra(**extra_args)
# TODO: Use readonly connection for these queries.
# This has been disabled, because it's not used anyway, as the
# configured readonly user is the same as the real user anyway.
if apply_presentation:
query = cls.apply_presentation(query, filter_data)
return query
def query_count(cls, filter_data, initial_query=None):
Like query_objects, but retreive only the count of results.
filter_data.pop('query_start', None)
filter_data.pop('query_limit', None)
query = cls.query_objects(filter_data, initial_query=initial_query)
return query.count()
def clean_object_dicts(cls, field_dicts):
Take a list of dicts corresponding to object (as returned by
query.values()) and clean the data to be more suitable for
returning to the user.
for field_dict in field_dicts:
def list_objects(cls, filter_data, initial_query=None):
Like query_objects, but return a list of dictionaries.
query = cls.query_objects(filter_data, initial_query=initial_query)
extra_fields = query.query.extra_select.keys()
field_dicts = [model_object.get_object_dict(extra_fields=extra_fields)
for model_object in query]
return field_dicts
def smart_get(cls, id_or_name, valid_only=True):
smart_get(integer) -> get object by ID
smart_get(string) -> get object by name_field
if valid_only:
manager = cls.get_valid_manager()
manager = cls.objects
if isinstance(id_or_name, (int, long)):
return manager.get(pk=id_or_name)
if isinstance(id_or_name, basestring) and hasattr(cls, 'name_field'):
return manager.get(**{cls.name_field : id_or_name})
raise ValueError(
'Invalid positional argument: %s (%s)' % (id_or_name,
def smart_get_bulk(cls, id_or_name_list):
invalid_inputs = []
result_objects = []
for id_or_name in id_or_name_list:
except cls.DoesNotExist:
if invalid_inputs:
raise cls.DoesNotExist('The following %ss do not exist: %s'
% (cls.__name__.lower(),
', '.join(invalid_inputs)))
return result_objects
def get_object_dict(self, extra_fields=None):
Return a dictionary mapping fields to this object's values. @param
extra_fields: list of extra attribute names to include, in addition to
the fields defined on this object.
fields = self.get_field_dict().keys()
if extra_fields:
fields += extra_fields
object_dict = dict((field_name, getattr(self, field_name))
for field_name in fields)
return object_dict
def _postprocess_object_dict(self, object_dict):
"""For subclasses to override."""
def get_valid_manager(cls):
return cls.objects
def _record_attributes(self, attributes):
See on_attribute_changed.
assert not isinstance(attributes, basestring)
self._recorded_attributes = dict((attribute, getattr(self, attribute))
for attribute in attributes)
def _check_for_updated_attributes(self):
See on_attribute_changed.
for attribute, original_value in self._recorded_attributes.iteritems():
new_value = getattr(self, attribute)
if original_value != new_value:
self.on_attribute_changed(attribute, original_value)
def on_attribute_changed(self, attribute, old_value):
Called whenever an attribute is updated. To be overridden.
To use this method, you must:
* call _record_attributes() from __init__() (after making the super
call) with a list of attributes for which you want to be notified upon
* call _check_for_updated_attributes() from save().
def serialize(self, include_dependencies=True):
"""Serializes the object with dependencies.
The variable SERIALIZATION_LINKS_TO_FOLLOW defines which dependencies
this function will serialize with the object.
@param include_dependencies: Whether or not to follow relations to
objects this object depends on.
This parameter is used when uploading
jobs from a shard to the master, as the
master already has all the dependent
@returns: Dictionary representation of the object.
serialized = {}
for field in self._meta.concrete_model._meta.local_fields:
if field.rel is None:
serialized[] = field._get_val_from_obj(self)
# attname will contain "_id" suffix for foreign keys,
# e.g. will be serialized as 'host_id'.
# Use it for easy deserialization.
serialized[field.attname] = field._get_val_from_obj(self)
if include_dependencies:
serialized[link] = self._serialize_relation(link)
return serialized
def _serialize_relation(self, link):
"""Serializes dependent objects given the name of the relation.
@param link: Name of the relation to take objects from.
@returns For To-Many relationships a list of the serialized related
objects, for To-One relationships the serialized related object.
attr = getattr(self, link)
except AttributeError:
# One-To-One relationships that point to None may raise this
return None
if attr is None:
return None
if hasattr(attr, 'all'):
return [obj.serialize() for obj in attr.all()]
return attr.serialize()
def _split_local_from_foreign_values(cls, data):
"""This splits local from foreign values in a serialized object.
@param data: The serialized object.
@returns A tuple of two lists, both containing tuples in the form
(link_name, link_value). The first list contains all links
for local fields, the second one contains those for foreign
links_to_local_values, links_to_related_values = [], []
for link, value in data.iteritems():
# It's a foreign key
links_to_related_values.append((link, value))
# It's a local attribute or a foreign key
# we don't want to follow.
links_to_local_values.append((link, value))
return links_to_local_values, links_to_related_values
def _filter_update_allowed_fields(cls, data):
"""Filters data and returns only files that updates are allowed on.
This is i.e. needed for syncing aborted bits from the master to shards.
Local links are only allowed to be updated, if they are in
Overwriting existing values is allowed in order to be able to sync i.e.
the aborted bit from the master to a shard.
The whitelisting mechanism is in place to prevent overwriting local
status: If all fields were overwritten, jobs would be completely be
set back to their original (unstarted) state.
@param data: List with tuples of the form (link_name, link_value), as
returned by _split_local_from_foreign_values.
@returns List of the same format as data, but only containing data for
fields that updates are allowed on.
return [pair for pair in data
def delete_matching_record(cls, **filter_args):
"""Delete records matching the filter.
@param filter_args: Arguments for the django filter
used to locate the record to delete.
existing_record = cls.objects.get(**filter_args)
except cls.DoesNotExist:
def _deserialize_local(self, data):
"""Set local attributes from a list of tuples.
@param data: List of tuples like returned by
if not data:
for link, value in data:
setattr(self, link, value)
# Overwridden save() methods are prone to errors, so don't execute them.
# This is because:
# - the overwritten methods depend on ACL groups that don't yet exist
# and don't handle errors
# - the overwritten methods think this object already exists in the db
# because the id is already set
super(type(self), self).save()
def _deserialize_relations(self, data):
"""Set foreign attributes from a list of tuples.
This deserialized the related objects using their own deserialize()
function and then sets the relation.
@param data: List of tuples like returned by
for link, value in data:
self._deserialize_relation(link, value)
# See comment in _deserialize_local
super(type(self), self).save()
def get_record(cls, data):
"""Retrieve a record with the data in the given input arg.
@param data: A dictionary containing the information to use in a query
for data. If child models have different constraints of
uniqueness they should override this model.
@return: An object with matching data.
@raises DoesNotExist: If a record with the given data doesn't exist.
return cls.objects.get(id=data['id'])
def deserialize(cls, data):
"""Recursively deserializes and saves an object with it's dependencies.
This takes the result of the serialize method and creates objects
in the database that are just like the original.
If an object of the same type with the same id already exists, it's
local values will be left untouched, unless they are explicitly
Deserialize will always recursively propagate to all related objects
present in data though.
I.e. this is necessary to add users to an already existing acl-group.
@param data: Representation of an object and its dependencies, as
returned by serialize.
@returns: The object represented by data if it didn't exist before,
otherwise the object that existed before and has the same type
and id as the one described by data.
if data is None:
return None
local, related = cls._split_local_from_foreign_values(data)
instance = cls.get_record(data)
local = cls._filter_update_allowed_fields(local)
except cls.DoesNotExist:
instance = cls()
return instance
def sanity_check_update_from_shard(self, shard, updated_serialized,
*args, **kwargs):
"""Check if an update sent from a shard is legitimate.
@raises error.UnallowedRecordsSentToMaster if an update is not
raise NotImplementedError(
'sanity_check_update_from_shard must be implemented by subclass %s '
'for type %s' % type(self))
def update_from_serialized(self, serialized):
"""Updates local fields of an existing object from a serialized form.
This is different than the normal deserialize() in the way that it
does update local values, which deserialize doesn't, but doesn't
recursively propagate to related objects, which deserialize() does.
The use case of this function is to update job records on the master
after the jobs have been executed on a slave, as the master is not
interested in updates for users, labels, specialtasks, etc.
@param serialized: Representation of an object and its dependencies, as
returned by serialize.
@raises ValueError: if serialized contains related objects, i.e. not
only local fields.
local, related = (
if related:
raise ValueError('Serialized must not contain foreign '
'objects: %s' % related)
def custom_deserialize_relation(self, link, data):
"""Allows overriding the deserialization behaviour by subclasses."""
raise NotImplementedError(
'custom_deserialize_relation must be implemented by subclass %s '
'for relation %s' % (type(self), link))
def _deserialize_relation(self, link, data):
"""Deserializes related objects and sets references on this object.
Relations that point to a list of objects are handled automatically.
For many-to-one or one-to-one relations custom_deserialize_relation
must be overridden by the subclass.
Related objects are deserialized using their deserialize() method.
Thereby they and their dependencies are created if they don't exist
and saved to the database.
@param link: Name of the relation.
@param data: Serialized representation of the related object(s).
This means a list of dictionaries for to-many relations,
just a dictionary for to-one relations.
field = getattr(self, link)
if field and hasattr(field, 'all'):
self._deserialize_2m_relation(link, data, field.model)
self.custom_deserialize_relation(link, data)
def _deserialize_2m_relation(self, link, data, related_class):
"""Deserialize related objects for one to-many relationship.
@param link: Name of the relation.
@param data: Serialized representation of the related objects.
This is a list with of dictionaries.
@param related_class: A class representing a django model, with which
this class has a one-to-many relationship.
relation_set = getattr(self, link)
if related_class == self.get_attribute_model():
# When deserializing a model together with
# its attributes, clear all the exising attributes to ensure
# db consistency. Note 'update' won't be sufficient, as we also
# want to remove any attributes that no longer exist in |data|.
# core_filters is a dictionary of filters, defines how
# RelatedMangager would query for the 1-to-many relationship. E.g.
# Host.objects.get(
# id=20).hostattribute_set.core_filters = {host_id:20}
# We use it to delete objects related to the current object.
for serialized in data:
def get_attribute_model(cls):
"""Return the attribute model.
Subclass with attribute-like model should override this to
return the attribute model class. This method will be
called by _deserialize_2m_relation to determine whether
to clear the one-to-many relations first on deserialization of object.
return None
class ModelWithInvalid(ModelExtensions):
Overrides model methods save() and delete() to support invalidation in
place of actual deletion. Subclasses must have a boolean "invalid"
def save(self, *args, **kwargs):
first_time = ( is None)
if first_time:
# see if this object was previously added and invalidated
my_name = getattr(self, self.name_field)
filters = {self.name_field : my_name, 'invalid' : True}
old_object = self.__class__.objects.get(**filters)
except self.DoesNotExist:
# no existing object
super(ModelWithInvalid, self).save(*args, **kwargs)
def resurrect_object(self, old_object):
Called when self is about to be saved for the first time and is actually
"undeleting" a previously deleted object. Can be overridden by
subclasses to copy data as desired from the deleted entry (but this
superclass implementation must normally be called).
""" =
def clean_object(self):
This method is called when an object is marked invalid.
Subclasses should override this to clean up relationships that
should no longer exist if the object were deleted.
def delete(self):
self.invalid = self.invalid
assert not self.invalid
self.invalid = True
def get_valid_manager(cls):
return cls.valid_objects
class Manipulator(object):
Force default manipulators to look only at valid objects -
otherwise they will match against invalid objects when checking
def _prepare(cls, model):
super(ModelWithInvalid.Manipulator, cls)._prepare(model)
cls.manager = model.valid_objects
class ModelWithAttributes(object):
Mixin class for models that have an attribute model associated with them.
The attribute model is assumed to have its value field named "value".
def _get_attribute_model_and_args(self, attribute):
Subclasses should override this to return a tuple (attribute_model,
keyword_args), where attribute_model is a model class and keyword_args
is a dict of args to pass to attribute_model.objects.get() to get an
instance of the given attribute on this object.
raise NotImplementedError
def _is_replaced_by_static_attribute(self, attribute):
Subclasses could override this to indicate whether it has static
return False
def set_attribute(self, attribute, value):
if self._is_replaced_by_static_attribute(attribute):
raise error.UnmodifiableAttributeException(
'Failed to set attribute "%s" for host "%s" since it '
'is static. Use go/chromeos-skylab-inventory-tools to '
'modify this attribute.' % (attribute, self.hostname))
attribute_model, get_args = self._get_attribute_model_and_args(
attribute_object, _ = attribute_model.objects.get_or_create(**get_args)
attribute_object.value = value
def delete_attribute(self, attribute):
if self._is_replaced_by_static_attribute(attribute):
raise error.UnmodifiableAttributeException(
'Failed to delete attribute "%s" for host "%s" since it '
'is static. Use go/chromeos-skylab-inventory-tools to '
'modify this attribute.' % (attribute, self.hostname))
attribute_model, get_args = self._get_attribute_model_and_args(
except attribute_model.DoesNotExist:
def set_or_delete_attribute(self, attribute, value):
if value is None:
self.set_attribute(attribute, value)
class ModelWithHashManager(dbmodels.Manager):
"""Manager for use with the ModelWithHash abstract model class"""
def create(self, **kwargs):
raise Exception('ModelWithHash manager should use get_or_create() '
'instead of create()')
def get_or_create(self, **kwargs):
kwargs['the_hash'] = self.model._compute_hash(**kwargs)
return super(ModelWithHashManager, self).get_or_create(**kwargs)
class ModelWithHash(dbmodels.Model):
"""Superclass with methods for dealing with a hash column"""
the_hash = dbmodels.CharField(max_length=40, unique=True)
objects = ModelWithHashManager()
class Meta:
abstract = True
def _compute_hash(cls, **kwargs):
raise NotImplementedError('Subclasses must override _compute_hash()')
def save(self, force_insert=False, **kwargs):
"""Prevents saving the model in most cases
We want these models to be immutable, so the generic save() operation
will not work. These models should be instantiated through their the
model.objects.get_or_create() method instead.
The exception is that save(force_insert=True) will be allowed, since
that creates a new row. However, the preferred way to make instances of
these models is through the get_or_create() method.
if not force_insert:
# Allow a forced insert to happen; if it's a duplicate, the unique
# constraint will catch it later anyways
raise Exception('ModelWithHash is immutable')
super(ModelWithHash, self).save(force_insert=force_insert, **kwargs)