簡體   English   中英

Django的bulk_create()

[英]Django's bulk_create()

我已經閱讀了有關Django的bulk_create()的一些資料,並且我目前正在嘗試實現它,但是它無法正常運行。 最初,我的代碼沒有使用bulk_create,並且花了大約33秒的時間來導入6074行數據。 慢,但是行得通。

楷模:

class Building(models.Model):
    community = models.ForeignKey('Community', related_name='Building Community Name')
    physical_location = models.CharField(max_length=80, null=True, blank=True)
    data_source = models.CharField(max_length=50, null=True, blank=True)
    facility_name = models.CharField(max_length=120, null=True, blank=True)
    facility_type = models.CharField(max_length=80, null=True, blank=True)
    size = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    audited = models.NullBooleanField(blank=True)
    audit_notes = models.TextField(blank=True) 

class RetrofitData(models.Model):
    building_id = models.ForeignKey('Building')
    retrofits_done = models.NullBooleanField(blank=True)
    retrofit_notes = models.TextField(blank=True)
    fuel_oil_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    district_heating_oil_usage_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    electricity_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    natural_gas_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    propane_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    biomass_preretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    fuel_oil_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    district_heating_oil_usage_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    electricity_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    natural_gas_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    propane_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    biomass_postretrofit = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    retrofit_cost = models.DecimalField(null=True, blank=True, max_digits=10, decimal_places=2)
    biomass_heat = models.NullBooleanField(blank=True)
    heat_recovery = models.NullBooleanField(blank=True)

原始代碼:

class BuildingInventoryImporter(dataimport.DataFileImporter):

   def toTrueFalse(self, val):
            if val == "Yes":
                return True
            elif val == "No":
                return False
            else:
                return None

    def decCleaner(self, val):
        if val == '':
            return None
        else:
            return val2dec(val)

    models = [Building, RetrofitData]

    @transaction.commit_manually
    @rollback_on_exception
    def do_import(self):
        book = xlrd.open_workbook(self.data_file.file.path, 
            encoding_override='cp1252')
        sheet = book.sheet_by_index(2)

        for row_index in range(1,sheet.nrows):
            row = sheet.row_values(row_index)

            temp_id= row_index
            community_name = row[0]
            gnis = row[1]
            physical_location = row[2]
            data_source = row[3]
            facility_type = row[5]

            if row[4] == '':
                if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety":
                    facility_name = "Unavailable"
                elif facility_type =="Health Care - Hospitals":
                    facility_name = community_name + " Clinic"
                elif facility_type == "Education - K - 12":
                    facility_name = community_name + " School(s)"
            else:
                facility_name = row[4]    

            size = self.decCleaner(row[6])
            audited = self.toTrueFalse(row[7])
            audit_notes = row[8]

            building, created = self.get_or_new(Building, id=temp_id)

            try:
                community = Community.objects.get(gnis_feature_id=gnis)
            except Community.DoesNotExist:
                self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1]))

                try:
                    community = Community.objects.get(name=community_name)
                    self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0])
                except Community.DoesNotExist:
                    self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1])) 
                    continue

            building.community = community
            building.physical_location = physical_location
            building.data_source = data_source
            building.facility_name = facility_name
            building.facility_type = facility_type
            building.size = size
            building.audited = audited
            building.audit_notes = audit_notes
            building.save()

            retrofit_data, created = self.get_or_new(RetrofitData, building_id=building)

            retrofit_data.retrofits_done =  self.toTrueFalse(row[9])
            retrofit_data.retrofit_notes = row[10]
            retrofit_data.fuel_oil_preretrofit = self.decCleaner(row[11])

            if row[12] == 999999999: #They decided that a unknown value would be represented as 999999999 in the dataset.
                retrofit_data.district_heating_oil_usage_preretrofit = None
            else:
                retrofit_data.district_heating_oil_usage_preretrofit = self.decCleaner(row[12])

            retrofit_data.electricity_preretrofit = self.decCleaner(row[13])
            retrofit_data.natural_gas_preretrofit = self.decCleaner(row[14])
            retrofit_data.propane_preretrofit = self.decCleaner(row[15])
            retrofit_data.biomass_preretrofit = self.decCleaner(row[16])
            retrofit_data.fuel_oil_postretrofit = self.decCleaner(row[17])
            retrofit_data.district_heating_oil_usage_postretrofit = self.decCleaner(row[18])
            retrofit_data.electricity_postretrofit = self.decCleaner(row[19])
            retrofit_data.natural_gas_postretrofit = self.decCleaner(row[20])
            retrofit_data.propane_postretrofit = self.decCleaner(row[21])
            retrofit_data.biomass_postretrofit = self.decCleaner(row[22])
            retrofit_data.retrofit_cost = self.decCleaner(row[23])
            retrofit_data.biomass_heat = self.toTrueFalse(row[24])
            retrofit_data.heat_recovery = self.toTrueFalse(row[25])
            retrofit_data.save()

        if self.dry_run:
            transaction.rollback()
        else:
            transaction.commit()
dataimport.register(BuildingInventoryImporter)

在整個數據導入過程中,它將不得不大約1200次命中數據庫,這導致了緩慢的導入。 因此,為了解決這個問題,我研究了使用bulk_create()

修改后的代碼:

class BuildingInventoryImporterV2(dataimport.DataFileImporter):

    models = [Building, RetrofitData]

    def do_import(self, dry_run=True):    
        book = xlrd.open_workbook(self.data_file.file.path, 
            encoding_override='cp1252')
            sheet = book.sheet_by_index(2)

        building_bulk_list = []
        retrofit_bulk_list = [] 

        for row_index in range(1,sheet.nrows):
            row = sheet.row_values(row_index)

            temp_id= row_index
            community_name = row[0]
            gnis = row[1]
            facility_type = row[5]

            try:
                community = Community.objects.get(gnis_feature_id=gnis)
            except Community.DoesNotExist:
                self.warning("The value entered for the Community GNIS: {0} does not exist.".format(row[1]))

                try:
                    community = Community.objects.get(name=community_name)
                    self.warning("The Community name: {0} is in the db but does not match its associated Community GNIS").format(row[0])
                except Community.DoesNotExist:
                    self.warning("Neither the Community name: {0} nor the Community GNIS: {1} exist.".format(row[0], row[1])) 
                    continue

            if row[4] == '':
                if facility_type =="Other" or facility_type == "Office" or facility_type == "Public Assembly" or facility_type == "Public Safety":
                    facility_name = "Unavailable"
                elif facility_type =="Health Care - Hospitals":
                    facility_name = community_name + " Clinic"
                elif facility_type == "Education - K - 12":
                    facility_name = community_name + " School(s)"
            else:
                facility_name = row[4]

            building_to_add = Building(    
                community=community,    
                physical_location=row[2],    
                data_source=row[3],    
                facility_name=facility_name,    
                facility_type=facility_type,    
                size=self.decCleaner(row[6]),    
                audited=self.toTrueFalse(row[7]),    
                audit_notes=row[8]    
            )
            building_bulk_list.append(building_to_add)
        if self.dry_run is False:
            Building.objects.bulk_create(building_bulk_list)

        for row_index in range(1,sheet.nrows):
            row = sheet.row_values(row_index)
            #They decided that a unknown value would be represented as 999999999 in the dataset.

            if row[12] == 999999999:    
                district_heating_oil_usage_preretrofit = None    
            else:    
                district_heating_oil_usage_preretrofit = self.decCleaner(row[12]) 

            retrofit_data_to_add = RetrofitData(    
                building_id=Building.objects.get(id=temp_id),    
                retrofits_done=self.toTrueFalse(row[9]),    
                retrofit_notes=row[10],    
                fuel_oil_preretrofit=self.decCleaner(row[11]),    
                district_heating_oil_usage_preretrofit=district_heating_oil_usage_preretrofit,    
                electricity_preretrofit=self.decCleaner(row[13]),    
                natural_gas_preretrofit=self.decCleaner(row[14]),    
                propane_preretrofit=self.decCleaner(row[15]),    
                biomass_preretrofit=self.decCleaner(row[16]),    
                fuel_oil_postretrofit=self.decCleaner(row[17]),    
                district_heating_oil_usage_postretrofit=self.decCleaner(row[18]),    
                electricity_postretrofit=self.decCleaner(row[19]),    
                natural_gas_postretrofit=self.decCleaner(row[20]),    
                propane_postretrofit=self.decCleaner(row[21]),    
                biomass_postretrofit=self.decCleaner(row[22]),    
                retrofit_cost=self.decCleaner(row[23]),    
                biomass_heat=self.toTrueFalse(row[24]),    
                heat_recovery=self.toTrueFalse(row[25])    
            )    
            retrofit_bulk_list.append(retrofit_data_to_add) 

        if self.dry_run is False:    
            Building.objects.bulk_create(retrofit_bulk_list)    
dataimport.register(BuildingInventoryImporterV2)

當我進入第二個代碼塊以批量導入RetroFitData時出現問題。 據我了解,bulk_create()不會在調用AutoField pk時分配,因此您需要先將bulk_create()數據放入數據庫中,然后再分配AutoField pk。 但這似乎也不准確。 運行導入后,出現以下錯誤:

Traceback:
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/core/handlers/base.py" in get_response
      111.                     response = wrapped_callback(request, *callback_args, **callback_kwargs)
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/utils/decorators.py" in _wrapped_view
      105.                     response = view_func(request, *args, **kwargs)
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/views/decorators/cache.py" in _wrapped_view_func
      52.         response = view_func(request, *args, **kwargs)
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/admin/sites.py" in inner
      206.             return view(request, *args, **kwargs)
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/contrib/auth/decorators.py" in _wrapped_view
      21.                 return view_func(request, *args, **kwargs)
    File "/home/bhernandez/ISER/aedg/core/adminviews.py" in data_import
      465.                 results = importer.run()
    File "/home/bhernandez/ISER/aedg/core/dataimport/__init__.py" in run
      114.         self.do_import()
    File "/home/bhernandez/ISER/aedg/akw/dataimport/etc.py" in do_import
      656.                 building_id=Building.objects.get(id=temp_id),    
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/manager.py" in manager_method
      92.                 return getattr(self.get_queryset(), name)(*args, **kwargs)
    File "/home/bhernandez/ISER/virtualenvs/alaskawind/lib/python2.7/site-packages/django/db/models/query.py" in get
      357.                 self.model._meta.object_name)

    Exception Type: DoesNotExist at /admin/core/datafile/174/import/
    Exception Value: Building matching query does not exist.

但是,當我檢查“ 建築物”表時,該表已被填充...非常感謝您的幫助或建議。

因此,當您創建RetrofitData您需要知道剛剛創建的Building對象的ID。

可能是因為您使用的ID字段設置為自動遞增的數據庫,因此使用bulk_create創建的對象將不會分配PK。

我想使用building_bulk_list可以使用它的長度來從數據庫中獲取最后一組Building對象,但是為什么不使用一種更傳統的創建對象的方法來創建building_bulk_list ,調用save() ,從而允許您創建ID列表?

然后,使用該ID列表,您可以為RetrofitData運行bulk_create ,遍歷該ID列表以設置與Building ?的關系。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM