Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated bulk_create_with_history to support update_conflicts of bulk_create #1419

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Unreleased
- Made ``skip_history_when_saving`` work when creating an object - not just when
updating an object (gh-1262)
- Improved performance of the ``latest_of_each()`` history manager method (gh-1360)
- Updated ``bulk_create_with_history`` to support ``update_conflicts`` and
``update_fields`` of ``bulk_create()`` (gh-1323)

3.7.0 (2024-05-29)
------------------
Expand Down
12 changes: 11 additions & 1 deletion docs/common_issues.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,18 @@ history:
>>> Poll.history.count()
1000

You can also use ``bulk_create_with_history`` with Django's ``update_conflicts`` where
you can specify the fields to update in case of a conflict. This incurs an additional
query to refetch the object to update the history because the full object is not
returned. If a record is updated, the historical record will have a `history_type` of
``'~'``, meaning updated.

Note: if a record is updated, but there were no historical
records for that instance, the historical record will have a `history_type` of ``'+'``.


If you want to specify a change reason or history user for each record in the bulk create,
you can add `_change_reason`, `_history_user` or `_history_date` on each instance:
you can add `_change_reason`, `_history_user`, `_history_date` on each instance:

.. code-block:: pycon

Expand Down
9 changes: 6 additions & 3 deletions simple_history/manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from contextlib import ContextDecorator
from typing import Optional

from django.conf import settings
from django.db import models
from django.db.models import Exists, OuterRef, Q, QuerySet
Expand Down Expand Up @@ -225,9 +228,9 @@ def bulk_history_create(
if not getattr(settings, "SIMPLE_HISTORY_ENABLED", True):
return

history_type = "+"
default_history_type = "+"
if update:
history_type = "~"
default_history_type = "~"

historical_instances = []
for instance in objs:
Expand All @@ -243,7 +246,7 @@ def bulk_history_create(
history_user=history_user,
history_change_reason=get_change_reason_from_object(instance)
or default_change_reason,
history_type=history_type,
history_type=getattr(instance, "_history_type", default_history_type),
**{
field.attname: getattr(instance, field.attname)
for field in self.model.tracked_fields
Expand Down
8 changes: 8 additions & 0 deletions simple_history/tests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ class PollWithExcludedFieldsWithDefaults(models.Model):
)


class PollWithUniqueQuestionAndWithPlace(models.Model):
question = models.CharField(max_length=200, unique=True)
pub_date = models.DateTimeField("date published", null=True)
place = models.TextField(null=True)

history = HistoricalRecords()


class PollWithExcludedFKField(models.Model):
question = models.CharField(max_length=200)
pub_date = models.DateTimeField("date published")
Expand Down
10 changes: 10 additions & 0 deletions simple_history/tests/tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,16 @@ def test_bulk_history_create_with_change_reason(self):
)
)

def test_bulk_history_create_with_history_type(self):
for poll in self.data:
poll._history_type = "~"

Poll.history.bulk_history_create(self.data)

self.assertTrue(
all([history.history_type == "~" for history in Poll.history.all()])
)


class PrefetchingMethodsTestCase(TestCase):
def setUp(self):
Expand Down
65 changes: 64 additions & 1 deletion simple_history/tests/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
import django
from django.contrib.auth import get_user_model
from django.db import IntegrityError, transaction
from django.test import TestCase, TransactionTestCase, override_settings
from django.test import (
TestCase,
TransactionTestCase,
override_settings,
skipUnlessDBFeature,
)
from django.utils import timezone

from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError
Expand All @@ -26,6 +31,7 @@
PollWithSelfManyToMany,
PollWithSeveralManyToMany,
PollWithUniqueQuestion,
PollWithUniqueQuestionAndWithPlace,
Street,
)
from simple_history.utils import (
Expand Down Expand Up @@ -257,6 +263,63 @@ def test_bulk_create_history_with_duplicates_ignore_conflicts(self):
self.assertEqual(PollWithUniqueQuestion.objects.count(), 2)
self.assertEqual(PollWithUniqueQuestion.history.count(), 2)

@skipUnlessDBFeature(
"supports_update_conflicts", "supports_update_conflicts_with_target"
)
def test_bulk_create_history_with_duplicates_update_conflicts_create_only_field(
self,
):
poll1 = PollWithUniqueQuestionAndWithPlace.objects.create(
question="Question 1", pub_date=timezone.now(), place="earth"
)
PollWithUniqueQuestionAndWithPlace.objects.create(
question="Question 2", pub_date=timezone.now(), place="moon"
)
duplicates = [
# Reuse the same object that already exists
poll1,
# Match the unique field but with different values for other fields
PollWithUniqueQuestionAndWithPlace(
question="Question 2", pub_date=None, place="sun"
),
PollWithUniqueQuestionAndWithPlace(
question="Question 3", pub_date=None, place="saturn"
),
]

bulk_create_with_history(
duplicates,
PollWithUniqueQuestionAndWithPlace,
update_conflicts=True,
unique_fields=["question"],
update_fields=["pub_date"],
)
new1, new2, new3 = list(
PollWithUniqueQuestionAndWithPlace.objects.order_by("question")
)
# Confirm the first object was updated and has two historical records
self.assertEqual(new1.place, "earth")
self.assertIsNotNone(new1.pub_date)
new1_hist1, new1_hist2 = list(new1.history.all().order_by("history_id"))
self.assertEqual(new1_hist1.history_type, "+")
self.assertEqual(new1_hist2.history_type, "~")
self.assertIsNotNone(new1_hist2.pub_date)
self.assertIsNotNone(new1_hist2.place, "earth")

# This shouldn't change since it wasn't in update_fields
new2_hist1, new2_hist2 = list(new2.history.all().order_by("history_id"))
self.assertEqual(new2_hist1.history_type, "+")
self.assertEqual(new2_hist2.history_type, "~")
self.assertIsNone(new2_hist2.pub_date)
self.assertIsNotNone(new2_hist2.place, "moon")

# There should only be 1 addition record including saturn since place
# is inserted, but not updated.
new3_hist = new3.history.get()
self.assertEqual(new3_hist.history_type, "+")
self.assertIsNone(new2_hist2.pub_date)
self.assertIsNotNone(new2_hist2.place, "saturn")

def test_bulk_create_history_with_no_ids_return(self):
pub_date = timezone.now()
objects = [
Expand Down
134 changes: 105 additions & 29 deletions simple_history/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from itertools import chain

from django.db import transaction
from django.db.models import Case, ForeignKey, ManyToManyField, Q, When
from django.db.models import (
Case,
Exists,
ForeignKey,
ManyToManyField,
OuterRef,
Q,
Value,
When,
)
from django.forms.models import model_to_dict

from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError
Expand Down Expand Up @@ -86,6 +97,9 @@ def bulk_create_with_history(
model,
batch_size=None,
ignore_conflicts=False,
update_conflicts=False,
update_fields=None,
unique_fields=None,
default_user=None,
default_change_reason=None,
default_date=None,
Expand All @@ -94,12 +108,24 @@ def bulk_create_with_history(
"""
Bulk create the objects specified by objs while also bulk creating
their history (all in one transaction).

Because of not providing primary key attribute after bulk_create on any DB except
Postgres (https://docs.djangoproject.com/en/2.2/ref/models/querysets/#bulk-create)
Divide this process on two transactions for other DB's
this process on will use two transactions for other DB's.

If ``update_conflicts`` is used, the function will refetch records from the
database to ensure that the historical records match the exact state of the
model instances.

:param objs: List of objs (not yet saved to the db) of type model
:param model: Model class that should be created
:param batch_size: Number of objects that should be created in each batch
:param ignore_conflicts: Boolean passed through to Django ORM to ignore conflicts
:param update_conflicts: Boolean passed through to Django ORM to update on conflicts
:param update_fields: List of fields passed through to Django ORM to update on
conflict.
:param unique_fields: List of fields through to Django ORM to identify uniqueness
to update.
:param default_user: Optional user to specify as the history_user in each historical
record
:param default_change_reason: Optional change reason to specify as the change_reason
Expand All @@ -116,16 +142,46 @@ def bulk_create_with_history(
field.name
for field in model._meta.get_fields()
if isinstance(field, ManyToManyField)
or (
# If using either unique fields or update fields, then we can exclude
# anything not in those lists.
(unique_fields or update_fields)
and field.name not in set(chain(unique_fields or [], update_fields or []))
)
]

history_manager = get_history_manager_for_model(model)
model_manager = model._default_manager

second_transaction_required = True
with transaction.atomic(savepoint=False):
objs_with_id = model_manager.bulk_create(
objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts
objs,
batch_size=batch_size,
ignore_conflicts=ignore_conflicts,
update_conflicts=update_conflicts,
update_fields=update_fields,
unique_fields=unique_fields,
)
if objs_with_id and objs_with_id[0].pk and not ignore_conflicts:
if (
objs_with_id
and all(obj.pk for obj in objs_with_id)
and not ignore_conflicts
):
if update_conflicts:
# If we're updating on conflict, it's possible that some rows
# were updated and some were inserted. We need to refetch
# the data to make sure the historical record matches exactly
# since update_fields could be a subset of the fields. We
# also need to annotate the history_type to indicate if the
# record was updated or inserted.
objs_with_id = (
model_manager.filter(pk__in=[obj.pk for obj in objs_with_id])
.annotate(
_history_type=_history_type_annotation(history_manager, model)
)
.select_for_update()
)
second_transaction_required = False
history_manager.bulk_history_create(
objs_with_id,
Expand All @@ -137,34 +193,39 @@ def bulk_create_with_history(
)
if second_transaction_required:
with transaction.atomic(savepoint=False):
# Generate a common query to avoid n+1 selections
# https://github.com/jazzband/django-simple-history/issues/974
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs_with_id):
attributes = dict(
filter(
lambda x: x[1] is not None,
model_to_dict(obj, exclude=exclude_fields).items(),

if objs_with_id:
# Generate a common query to avoid n+1 selections
# https://github.com/jazzband/django-simple-history/issues/974
cumulative_filter = None
obj_when_list = []
for i, obj in enumerate(objs_with_id):
attributes = dict(
filter(
lambda x: x[1] is not None,
model_to_dict(obj, exclude=exclude_fields).items(),
)
)
)
q = Q(**attributes)
cumulative_filter = (cumulative_filter | q) if cumulative_filter else q
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
obj_list = (
list(
model_manager.filter(cumulative_filter).order_by(
Case(*obj_when_list)
q = Q(**attributes)
cumulative_filter = (
(cumulative_filter | q) if cumulative_filter else q
)
# https://stackoverflow.com/a/49625179/1960509
# DEV: If an attribute has `then` as a key
# then they'll also run into issues with `bulk_update`
# due to shared implementation
# https://github.com/django/django/blob/4.0.4/django/db/models/query.py#L624-L638
obj_when_list.append(When(**attributes, then=i))
qs = model_manager.filter(cumulative_filter).order_by(
Case(*obj_when_list)
)
if objs_with_id
else []
)
if update_conflicts:
qs = qs.annotate(
_history_type=_history_type_annotation(history_manager, model)
)
obj_list = list(qs)
else:
obj_list = []
history_manager.bulk_history_create(
obj_list,
batch_size=batch_size,
Expand Down Expand Up @@ -240,3 +301,18 @@ def get_change_reason_from_object(obj):
return getattr(obj, "_change_reason")

return None


def _history_type_annotation(history_manager, model):
"""
Calculate the next history_type based on a Model
QuerySet and whether there's an existing historical record.
"""
key_name = get_app_model_primary_key_name(model)
return Case(
When(
Exists(history_manager.filter(**{key_name: OuterRef(key_name)})),
then=Value("~"),
),
default=Value("+"),
)
Loading