Duplicating model instances and their related objects in Django / Algorithm for recusrively duplicating an object
我有
1 2 3 4 5 6 7 8 9 10 11 12 13 | from django.db import models class Book(models.Model) author = models.ForeignKey('auth.User') class Chapter(models.Model) author = models.ForeignKey('auth.User') book = models.ForeignKey(Book) class Page(models.Model) author = models.ForeignKey('auth.User') book = models.ForeignKey(Book) chapter = models.ForeignKey(Chapter) |
我要做的是复制一个现有的
当看到一个
Django是否支持这种开箱即用的方式?复制模型的通用算法是什么样子的?
干杯,
约翰
更新:
上面给出的类只是一个例子来说明我遇到的问题!
这在Django1.3中不再有效,因为收集到的对象已被移除。见变更集14507
我把我的解决方案贴在了django的代码片段上。它主要基于用于删除对象的
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 | from django.db.models.query import CollectedObjects from django.db.models.fields.related import ForeignKey def duplicate(obj, value, field): """ Duplicate all related objects of `obj` setting `field` to `value`. If one of the duplicate objects has an FK to another duplicate object update that as well. Return the duplicate copy of `obj`. """ collected_objs = CollectedObjects() obj._collect_sub_objects(collected_objs) related_models = collected_objs.keys() root_obj = None # Traverse the related models in reverse deletion order. for model in reversed(related_models): # Find all FKs on `model` that point to a `related_model`. fks = [] for f in model._meta.fields: if isinstance(f, ForeignKey) and f.rel.to in related_models: fks.append(f) # Replace each `sub_obj` with a duplicate. sub_obj = collected_objs[model] for pk_val, obj in sub_obj.iteritems(): for fk in fks: fk_value = getattr(obj,"%s_id" % fk.name) # If this FK has been duplicated then point to the duplicate. if fk_value in collected_objs[fk.rel.to]: dupe_obj = collected_objs[fk.rel.to][fk_value] setattr(obj, fk.name, dupe_obj) # Duplicate the object and save it. obj.id = None setattr(obj, field, value) obj.save() if root_obj is None: root_obj = obj return root_obj |
这里有一个简单的复制对象的方法。
基本上:
(1)将原始对象的ID设置为无:
booku tou copy.id=无
(2)更改"author"属性并保存ojbect:
book_to_copy.author=新的_author
booku tou copy.save()。
(3)执行插入而不是更新
(它不涉及更改页面中的作者——我同意有关重新构建模型的评论)
我没有在Django尝试过,但python的deepcopy可能只适合你。
编辑:
如果实现以下功能,则可以为模型定义自定义复制行为:
1 | __copy__() and __deepcopy__() |
这是http://www.djangosnippets.org/snippets/1282的编辑/
现在它与1.3中替换CollectedObjects的收集器兼容。
我并没有对它进行太大的测试,但用了一个包含20000个子对象的对象进行了测试,但只测试了大约三层外键深度。当然,使用的风险由你自己承担。
对于阅读本文的雄心勃勃的人,您应该考虑将collector子类化(或者复制整个类以消除对Django API中未发布部分的依赖性),将其复制到一个名为"DuplicateCollector"的类中,并编写一个.duplicate方法,该方法与.delete方法类似。这将真正解决这个问题。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | from django.db.models.deletion import Collector from django.db.models.fields.related import ForeignKey def duplicate(obj, value=None, field=None, duplicate_order=None): """ Duplicate all related objects of obj setting field to value. If one of the duplicate objects has an FK to another duplicate object update that as well. Return the duplicate copy of obj. duplicate_order is a list of models which specify how the duplicate objects are saved. For complex objects this can matter. Check to save if objects are being saved correctly and if not just pass in related objects in the order that they should be saved. """ collector = Collector({}) collector.collect([obj]) collector.sort() related_models = collector.data.keys() data_snapshot = {} for key in collector.data.keys(): data_snapshot.update({ key: dict(zip([item.pk for item in collector.data[key]], [item for item in collector.data[key]])) }) root_obj = None # Sometimes it's good enough just to save in reverse deletion order. if duplicate_order is None: duplicate_order = reversed(related_models) for model in duplicate_order: # Find all FKs on model that point to a related_model. fks = [] for f in model._meta.fields: if isinstance(f, ForeignKey) and f.rel.to in related_models: fks.append(f) # Replace each `sub_obj` with a duplicate. if model not in collector.data: continue sub_objects = collector.data[model] for obj in sub_objects: for fk in fks: fk_value = getattr(obj,"%s_id" % fk.name) # If this FK has been duplicated then point to the duplicate. fk_rel_to = data_snapshot[fk.rel.to] if fk_value in fk_rel_to: dupe_obj = fk_rel_to[fk_value] setattr(obj, fk.name, dupe_obj) # Duplicate the object and save it. obj.id = None if field is not None: setattr(obj, field, value) obj.save() if root_obj is None: root_obj = obj return root_obj |
编辑:删除了调试"print"语句。
使用上面的CollectedObjects片段不再有效,但可以通过以下修改来完成:
1 2 | from django.contrib.admin.util import NestedObjects from django.db import DEFAULT_DB_ALIAS |
和
1 | collector = NestedObjects(using=DEFAULT_DB_ALIAS) |
而不是CollectorObjects
在Django 1.5中,这对我很有用:
1 2 3 | thing.id = None thing.pk = None thing.save() |
Django确实有一种内置的方式,可以通过管理员复制一个对象,如下所示:在django管理界面中,是否有复制项目的方法?
如果您正在构建的数据库中只有几个副本,我发现您可以使用管理界面中的后退按钮,更改必要字段,然后再次保存实例。这对我来说很有效,比如,我需要做一个"小金针"和一个"伏特加小金针"鸡尾酒,唯一的区别就是替换名称和配料。很明显,这需要对数据有一点预见性,而且并不像覆盖Django的copy/deepcopy那样强大——但这可能会给一些人带来好处。
简单非一般方法
提出的解决方案对我不起作用,所以我采取了简单而不明智的方式。这只对简单的情况有用。
对于具有以下结构的模型
1 2 3 4 5 6 7 | Book |__ CroppedFace |__ Photo |__ AwsReco |__ AwsLabel |__ AwsFace |__ AwsEmotion |
这作品
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | def duplicate_book(book: Book, new_user: MyUser): # AwsEmotion, AwsFace, AwsLabel, AwsReco, Photo, CroppedFace, Book old_cropped_faces = book.croppedface_set.all() old_photos = book.photo_set.all() book.pk = None book.user = new_user book.save() for cf in old_cropped_faces: cf.pk = None cf.book = book cf.save() for photo in old_photos: photo.pk = None photo.book = book photo.save() if hasattr(photo, 'awsreco'): reco = photo.awsreco old_aws_labels = reco.awslabel_set.all() old_aws_faces = reco.awsface_set.all() reco.pk = None reco.photo = photo reco.save() for label in old_aws_labels: label.pk = None label.reco = reco label.save() for face in old_aws_faces: old_aws_emotions = face.awsemotion_set.all() face.pk = None face.reco = reco face.save() for emotion in old_aws_emotions: emotion.pk = None emotion.aws_face = face emotion.save() return book |
我对Django 2.1.2的任何答案都不太在行,所以我创建了一种通用的方法来执行一个深度复制的数据库模型,该模型主要基于上面发布的答案。
与上述答案的主要区别在于,
此外,由于很难知道数据库模型的复制顺序,我创建了一个简单的排队系统,如果复制失败,将当前模型推到列表的末尾。代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 | import queue from django.contrib.admin.utils import NestedObjects from django.db.models.fields.related import ForeignKey def duplicate(obj, field=None, value=None, max_retries=5): # Use the Nested Objects collector to retrieve the related models collector = NestedObjects(using='default') collector.collect([obj]) related_models = list(collector.data.keys()) # Create an object to map old primary keys to new ones data_snapshot = {} model_queue = queue.Queue() for key in related_models: data_snapshot.update( {key: {item.pk: None for item in collector.data[key]}} ) model_queue.put(key) # For each of the models in related models copy their instances root_obj = None attempt_count = 0 while not model_queue.empty(): model = model_queue.get() root_obj, success = copy_instances(model, related_models, collector, data_snapshot, root_obj) # If the copy is not a success, it probably means that not # all the related fields for the model has been copied yet. # The current model is therefore pushed to the end of the list to be copied last if not success: # If the last model is unsuccessful or the number of max retries is reached, raise an error if model_queue.empty() or attempt_count > max_retries: raise DuplicationError(model) model_queue.put(model) attempt_count += 1 return root_obj def copy_instances(model, related_models, collector, data_snapshot, root_obj): # Store all foreign keys for the model in a list fks = [] for f in model._meta.fields: if isinstance(f, ForeignKey) and f.remote_field.model in related_models: fks.append(f) # Iterate over the instances of the model for obj in collector.data[model]: # For each of the models foreign keys check if the related object has been copied # and if so, assign its personal key to the current objects related field for fk in fks: pk_field = f"{fk.name}_id" fk_value = getattr(obj, pk_field) # Fetch the dictionary containing the old ids fk_rel_to = data_snapshot[fk.remote_field.model] # If the value exists and is in the dictionary assign it to the object if fk_value is not None and fk_value in fk_rel_to: dupe_pk = fk_rel_to[fk_value] # If the desired pk is none it means that the related object has not been copied yet # so the function returns unsuccessful if dupe_pk is None: return root_obj, False setattr(obj, pk_field, dupe_pk) # Store the old pk and save the object without an id to create a shallow copy of the object old_pk = obj.id obj.id = None if field is not None: setattr(obj, field, value) obj.save() # Store the new id in the data snapshot object for potential use on later objects data_snapshot[model][old_pk] = obj.id if root_obj is None: root_obj = obj return root_obj, True |
希望有什么帮助:)
复制错误只是一个简单的异常扩展:
1 2 3 4 5 6 7 8 9 10 11 12 13 | class DuplicationError(Exception): """ Is raised when a duplication operation did not succeed Attributes: model -- The database model that failed """ def __init__(self, model): self.error_model = model def __str__(self): return f'Was not able to duplicate database objects for model {self.error_model}' |
我在django 2.2/python 3.6中尝试了一些答案,但它们似乎没有将一个复制到多个相关对象,也没有复制到多个相关对象。此外,许多还包括对数据结构的硬编码/合并预先知识。
我用一种更通用的方式编写了一个方法,处理一对多和多对多相关对象。包括评论,如果您有建议,我希望改进:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | def duplicate_object(self): """ Duplicate a model instance, making copies of all foreign keys pointing to it. There are 3 steps that need to occur in order: 1. Enumerate the related child objects and m2m relations, saving in lists/dicts 2. Copy the parent object per django docs (doesn't copy relations) 3a. Copy the child objects, relating to the copied parent object 3b. Re-create the m2m relations on the copied parent object """ related_objects_to_copy = [] relations_to_set = {} # Iterate through all the fields in the parent object looking for related fields for field in self._meta.get_fields(): if field.one_to_many: # One to many fields are backward relationships where many child # objects are related to the parent. Enumerate them and save a list # so we can copy them after duplicating our parent object. print(f'Found a one-to-many field: {field.name}') # 'field' is a ManyToOneRel which is not iterable, we need to get # the object attribute itself. related_object_manager = getattr(self, field.name) related_objects = list(related_object_manager.all()) if related_objects: print(f' - {len(related_objects)} related objects to copy') related_objects_to_copy += related_objects elif field.many_to_one: # In testing, these relationships are preserved when the parent # object is copied, so they don't need to be copied separately. print(f'Found a many-to-one field: {field.name}') elif field.many_to_many: # Many to many fields are relationships where many parent objects # can be related to many child objects. Because of this the child # objects don't need to be copied when we copy the parent, we just # need to re-create the relationship to them on the copied parent. print(f'Found a many-to-many field: {field.name}') related_object_manager = getattr(self, field.name) relations = list(related_object_manager.all()) if relations: print(f' - {len(relations)} relations to set') relations_to_set[field.name] = relations # Duplicate the parent object self.pk = None self.save() print(f'Copied parent object ({str(self)})') # Copy the one-to-many child objects and relate them to the copied parent for related_object in related_objects_to_copy: # Iterate through the fields in the related object to find the one that # relates to the parent model. for related_object_field in related_object._meta.fields: if related_object_field.related_model == self.__class__: # If the related_model on this field matches the parent # object's class, perform the copy of the child object and set # this field to the parent object, creating the new # child -> parent relationship. related_object.pk = None setattr(related_object, related_object_field.name, self) related_object.save() text = str(related_object) text = (text[:40] + '..') if len(text) > 40 else text print(f'|- Copied child object ({text})') # Set the many-to-many relations on the copied parent for field_name, relations in relations_to_set.items(): # Get the field by name and set the relations, creating the new # relationships. field = getattr(self, field_name) field.set(relations) text_relations = [] for relation in relations: text_relations.append(str(relation)) print(f'|- Set {len(relations)} many-to-many relations on {field_name} {text_relations}') return self |
我认为使用一个更简单的数据模型你会更开心。
真的有一页是在某一章里,而不是在另一本书里吗?
1 2 3 4 5 6 7 8 9 10 | userMe = User( username="me" ) userYou= User( username="you" ) bookMyA = Book( userMe ) bookYourB = Book( userYou ) chapterA1 = Chapter( book= bookMyA, author=userYou ) #"me" owns the Book,"you" owns the chapter? chapterB2 = Chapter( book= bookYourB, author=userMe ) #"you" owns the book,"me" owns the chapter? page1 = Page( book= bookMyA, chapter= chapterB2, author=userMe ) # Book and Author aggree, chapter doesn't? |
你的模型似乎太复杂了。
我想你会更喜欢简单的东西。我只是在猜测,因为我不知道你的全部问题。
1 2 3 4 5 6 7 8 9 10 | class Book(models.Model) name = models.CharField(...) class Chapter(models.Model) name = models.CharField(...) book = models.ForeignKey(Book) class Page(models.Model) author = models.ForeignKey('auth.User') chapter = models.ForeignKey(Chapter) |
每一页都有不同的作者。因此,每一章都有一个作者集,这本书也是如此。现在,您可以复制书籍、章节和页面,将克隆的页面分配给新作者。
实际上,您可能希望页面和章节之间有多对多的关系,允许您只拥有页面的多个副本,而不需要克隆书籍和章节。
我试验了斯蒂芬·G·图吉的解决方案,我发现它非常聪明,但不幸的是,它在某些特殊情况下不起作用。
让我们假设以下场景:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 | class FattAqp(models.Model): descr = models.CharField('descrizione', max_length=200) ef = models.ForeignKey(Esercizio, ...) forn = models.ForeignKey(Fornitore, ...) class Periodo(models.Model): # id usato per identificare i documenti # periodo rilevato in fattura data_i_p = models.DateField('data inizio', blank=True) idfatt = models.ForeignKey(FattAqp, related_name='periodo') class Lettura(models.Model): mc_i = models.DecimalField(max_digits=7, ...) faqp = models.ForeignKey(FattAqp, related_name='lettura') an_im = models.ForeignKey('cnd.AnagImm', ..) class DettFAqp(models.Model): imponibile = models.DecimalField(...) voce = models.ForeignKey(VoceAqp, ...) periodo = models.ForeignKey(Periodo, related_name='dettfaqp') |
在这种情况下,如果我们试图深入复制一个fattaqp实例,ef,forn,im和voce字段将无法正确设置;另一方面,idfatt,faqp,periodo将无法正确设置。
我通过在函数中再添加一个参数并稍微修改代码来解决这个问题。我用python 3.6和django 2.2测试过它这里是:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | def duplicate_model_with_descendants(obj, whitelist, _new_parent_pk=None, static_fk=None): kwargs = {} children_to_clone = OrderedDict() for field in obj._meta.get_fields(): if field.name =="id": pass elif field.one_to_many: if field.name in whitelist: these_children = list(getattr(obj, field.name).all()) if field.name in children_to_clone: children_to_clone[field.name] |= these_children else: children_to_clone[field.name] = these_children else: pass elif field.many_to_one: name_with_id = field.name + '_id' if _new_parent_pk: kwargs[name_with_id] = _new_parent_pk if name_with_id in static_fk: kwargs[name_with_id] = getattr(obj, name_with_id) elif field.concrete: kwargs[field.name] = getattr(obj, field.name) else: pass new_instance = obj.__class__(**kwargs) new_instance.save() new_instance_pk = new_instance.pk for ky in children_to_clone.keys(): child_collection = getattr(new_instance, ky) for child in children_to_clone[ky]: child_collection.add( duplicate_model_with_descendants(child, whitelist=whitelist, _new_parent_pk=new_instance_pk,static_fk=static_fk)) |
示例用法:
1 2 3 4 | original_record = FattAqp.objects.get(pk=4) WHITELIST = ['lettura', 'periodo', 'dettfaqp'] STATIC_FK = ['fornitore_id','ef_id','an_im_id', 'voce_id'] duplicate_record = duplicate_model_with_descendants(original_record, WHITELIST, static_fk=STATIC_FK) |
这里有一个简单的解决方案。这不依赖于任何未记录的Django API。它假定您要复制一个父记录,以及它的子记录、孙子记录等。您将传入一个实际应该复制的类的白名单,形式是指向其子对象的每个父对象上的一对多关系的名称的
这个解决方案对上面的
关于这段代码还有一点:它确实是递归的,因为它为每个新级别的后代调用自己。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | from collections import OrderedDict def duplicate_model_with_descendants(obj, whitelist, _new_parent_pk=None): kwargs = {} children_to_clone = OrderedDict() for field in obj._meta.get_fields(): if field.name =="id": pass elif field.one_to_many: if field.name in whitelist: these_children = list(getattr(obj, field.name).all()) if children_to_clone.has_key(field.name): children_to_clone[field.name] |= these_children else: children_to_clone[field.name] = these_children else: pass elif field.many_to_one: if _new_parent_pk: kwargs[field.name + '_id'] = _new_parent_pk elif field.concrete: kwargs[field.name] = getattr(obj, field.name) else: pass new_instance = obj.__class__(**kwargs) new_instance.save() new_instance_pk = new_instance.pk for ky in children_to_clone.keys(): child_collection = getattr(new_instance, ky) for child in children_to_clone[ky]: child_collection.add(duplicate_model_with_descendants(child, whitelist=whitelist, _new_parent_pk=new_instance_pk)) return new_instance |
示例用法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | from django.db import models class Book(models.Model) author = models.ForeignKey('auth.User') class Chapter(models.Model) # author = models.ForeignKey('auth.User') book = models.ForeignKey(Book, related_name='chapters') class Page(models.Model) # author = models.ForeignKey('auth.User') # book = models.ForeignKey(Book) chapter = models.ForeignKey(Chapter, related_name='pages') WHITELIST = ['books', 'chapters', 'pages'] original_record = models.Book.objects.get(pk=1) duplicate_record = duplicate_model_with_descendants(original_record, WHITELIST) |